- Linux-stable-mirror - lists.linaro.org

[patch 08/11] mm: fix vma_is_anonymous() false-positives

by akpm＠linux-foundation.org

From: "Kirill A. Shutemov" <kirill.shutemov(a)linux.intel.com> Subject: mm: fix vma_is_anonymous() false-positives vma_is_anonymous() relies on ->vm_ops being NULL to detect anonymous VMA. This is unreliable as ->mmap may not set ->vm_ops. False-positive vma_is_anonymous() may lead to crashes: next ffff8801ce5e7040 prev ffff8801d20eca50 mm ffff88019c1e13c0 prot 27 anon_vma ffff88019680cdd8 vm_ops 0000000000000000 pgoff 0 file ffff8801b2ec2d00 private_data 0000000000000000 flags: 0xff(read|write|exec|shared|mayread|maywrite|mayexec|mayshare) ------------[ cut here ]------------ kernel BUG at mm/memory.c:1422! invalid opcode: 0000 [#1] SMP KASAN CPU: 0 PID: 18486 Comm: syz-executor3 Not tainted 4.18.0-rc3+ #136 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:zap_pmd_range mm/memory.c:1421 [inline] RIP: 0010:zap_pud_range mm/memory.c:1466 [inline] RIP: 0010:zap_p4d_range mm/memory.c:1487 [inline] RIP: 0010:unmap_page_range+0x1c18/0x2220 mm/memory.c:1508 Code: ff 31 ff 4c 89 e6 42 c6 04 33 f8 e8 92 dd d0 ff 4d 85 e4 0f 85 4a eb ff ff e8 54 dc d0 ff 48 8b bd 10 fc ff ff e8 82 95 fe ff <0f> 0b e8 41 dc d0 ff 0f 0b 4c 89 ad 18 fc ff ff c7 85 7c fb ff ff RSP: 0018:ffff8801b0587330 EFLAGS: 00010286 RAX: 000000000000013c RBX: 1ffff100360b0e9c RCX: ffffc90002620000 RDX: 0000000000000000 RSI: ffffffff81631851 RDI: 0000000000000001 RBP: ffff8801b05877c8 R08: ffff880199d40300 R09: ffffed003b5c4fc0 R10: ffffed003b5c4fc0 R11: ffff8801dae27e07 R12: 0000000000000000 R13: ffff88019c1e13c0 R14: dffffc0000000000 R15: 0000000020e01000 FS: 00007fca32251700(0000) GS:ffff8801dae00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f04c540d000 CR3: 00000001ac1f0000 CR4: 00000000001426f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: unmap_single_vma+0x1a0/0x310 mm/memory.c:1553 zap_page_range_single+0x3cc/0x580 mm/memory.c:1644 unmap_mapping_range_vma mm/memory.c:2792 [inline] unmap_mapping_range_tree mm/memory.c:2813 [inline] unmap_mapping_pages+0x3a7/0x5b0 mm/memory.c:2845 unmap_mapping_range+0x48/0x60 mm/memory.c:2880 truncate_pagecache+0x54/0x90 mm/truncate.c:800 truncate_setsize+0x70/0xb0 mm/truncate.c:826 simple_setattr+0xe9/0x110 fs/libfs.c:409 notify_change+0xf13/0x10f0 fs/attr.c:335 do_truncate+0x1ac/0x2b0 fs/open.c:63 do_sys_ftruncate+0x492/0x560 fs/open.c:205 __do_sys_ftruncate fs/open.c:215 [inline] __se_sys_ftruncate fs/open.c:213 [inline] __x64_sys_ftruncate+0x59/0x80 fs/open.c:213 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Reproducer: #include <stdio.h> #include <stddef.h> #include <stdint.h> #include <stdlib.h> #include <string.h> #include <sys/types.h> #include <sys/stat.h> #include <sys/ioctl.h> #include <sys/mman.h> #include <unistd.h> #include <fcntl.h> #define KCOV_INIT_TRACE _IOR('c', 1, unsigned long) #define KCOV_ENABLE _IO('c', 100) #define KCOV_DISABLE _IO('c', 101) #define COVER_SIZE (1024<<10) #define KCOV_TRACE_PC 0 #define KCOV_TRACE_CMP 1 int main(int argc, char **argv) { int fd; unsigned long *cover; system("mount -t debugfs none /sys/kernel/debug"); fd = open("/sys/kernel/debug/kcov", O_RDWR); ioctl(fd, KCOV_INIT_TRACE, COVER_SIZE); cover = mmap(NULL, COVER_SIZE * sizeof(unsigned long), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); munmap(cover, COVER_SIZE * sizeof(unsigned long)); cover = mmap(NULL, COVER_SIZE * sizeof(unsigned long), PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); memset(cover, 0, COVER_SIZE * sizeof(unsigned long)); ftruncate(fd, 3UL << 20); return 0; } This can be fixed by assigning anonymous VMAs own vm_ops and not relying on it being NULL. If ->mmap() failed to set ->vm_ops, mmap_region() will set it to dummy_vm_ops. This way we will have non-NULL ->vm_ops for all VMAs. Link: http://lkml.kernel.org/r/20180724121139.62570-4-kirill.shutemov@linux.intel… Signed-off-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com> Reported-by: syzbot+3f84280d52be9b7083cc(a)syzkaller.appspotmail.com Acked-by: Linus Torvalds <torvalds(a)linux-foundation.org> Reviewed-by: Andrew Morton <akpm(a)linux-foundation.org> Cc: Dmitry Vyukov <dvyukov(a)google.com> Cc: Oleg Nesterov <oleg(a)redhat.com> Cc: Andrea Arcangeli <aarcange(a)redhat.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- drivers/char/mem.c | 1 + fs/exec.c | 1 + include/linux/mm.h | 8 ++++++++ mm/mmap.c | 3 +++ mm/nommu.c | 2 ++ 5 files changed, 15 insertions(+) --- a/drivers/char/mem.c~mm-fix-vma_is_anonymous-false-positives +++ a/drivers/char/mem.c @@ -708,6 +708,7 @@ static int mmap_zero(struct file *file, #endif if (vma->vm_flags & VM_SHARED) return shmem_zero_setup(vma); + vma_set_anonymous(vma); return 0; } --- a/fs/exec.c~mm-fix-vma_is_anonymous-false-positives +++ a/fs/exec.c @@ -293,6 +293,7 @@ static int __bprm_mm_init(struct linux_b bprm->vma = vma = vm_area_alloc(mm); if (!vma) return -ENOMEM; + vma_set_anonymous(vma); if (down_write_killable(&mm->mmap_sem)) { err = -EINTR; --- a/include/linux/mm.h~mm-fix-vma_is_anonymous-false-positives +++ a/include/linux/mm.h @@ -454,10 +454,18 @@ struct vm_operations_struct { static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) { + static const struct vm_operations_struct dummy_vm_ops = {}; + vma->vm_mm = mm; + vma->vm_ops = &dummy_vm_ops; INIT_LIST_HEAD(&vma->anon_vma_chain); } +static inline void vma_set_anonymous(struct vm_area_struct *vma) +{ + vma->vm_ops = NULL; +} + struct mmu_gather; struct inode; --- a/mm/mmap.c~mm-fix-vma_is_anonymous-false-positives +++ a/mm/mmap.c @@ -1778,6 +1778,8 @@ unsigned long mmap_region(struct file *f error = shmem_zero_setup(vma); if (error) goto free_vma; + } else { + vma_set_anonymous(vma); } vma_link(mm, vma, prev, rb_link, rb_parent); @@ -2983,6 +2985,7 @@ static int do_brk_flags(unsigned long ad return -ENOMEM; } + vma_set_anonymous(vma); vma->vm_start = addr; vma->vm_end = addr + len; vma->vm_pgoff = pgoff; --- a/mm/nommu.c~mm-fix-vma_is_anonymous-false-positives +++ a/mm/nommu.c @@ -1145,6 +1145,8 @@ static int do_mmap_private(struct vm_are if (ret < len) memset(base + ret, 0, len - ret); + } else { + vma_set_anonymous(vma); } return 0; _

7 years, 1 month

1
0
0 0

[patch 07/11] mm: use vma_init() to initialize VMAs on stack and data segments

by akpm＠linux-foundation.org

From: "Kirill A. Shutemov" <kirill.shutemov(a)linux.intel.com> Subject: mm: use vma_init() to initialize VMAs on stack and data segments Make sure to initialize all VMAs properly, not only those which come from vm_area_cachep. Link: http://lkml.kernel.org/r/20180724121139.62570-3-kirill.shutemov@linux.intel… Signed-off-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com> Acked-by: Linus Torvalds <torvalds(a)linux-foundation.org> Reviewed-by: Andrew Morton <akpm(a)linux-foundation.org> Cc: Dmitry Vyukov <dvyukov(a)google.com> Cc: Oleg Nesterov <oleg(a)redhat.com> Cc: Andrea Arcangeli <aarcange(a)redhat.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- arch/arm/kernel/process.c | 1 + arch/arm/mach-rpc/ecard.c | 2 +- arch/arm64/include/asm/tlb.h | 4 +++- arch/arm64/mm/hugetlbpage.c | 7 +++++-- arch/ia64/include/asm/tlb.h | 2 +- arch/ia64/mm/init.c | 2 +- arch/x86/um/mem_32.c | 2 +- fs/hugetlbfs/inode.c | 2 ++ mm/mempolicy.c | 1 + mm/shmem.c | 1 + 10 files changed, 17 insertions(+), 7 deletions(-) --- a/arch/arm64/include/asm/tlb.h~mm-use-vma_init-to-initialize-vmas-on-stack-and-data-segments +++ a/arch/arm64/include/asm/tlb.h @@ -37,7 +37,9 @@ static inline void __tlb_remove_table(vo static inline void tlb_flush(struct mmu_gather *tlb) { - struct vm_area_struct vma = { .vm_mm = tlb->mm, }; + struct vm_area_struct vma; + + vma_init(&vma, tlb->mm); /* * The ASID allocator will either invalidate the ASID or mark --- a/arch/arm64/mm/hugetlbpage.c~mm-use-vma_init-to-initialize-vmas-on-stack-and-data-segments +++ a/arch/arm64/mm/hugetlbpage.c @@ -108,11 +108,13 @@ static pte_t get_clear_flush(struct mm_s unsigned long pgsize, unsigned long ncontig) { - struct vm_area_struct vma = { .vm_mm = mm }; + struct vm_area_struct vma; pte_t orig_pte = huge_ptep_get(ptep); bool valid = pte_valid(orig_pte); unsigned long i, saddr = addr; + vma_init(&vma, mm); + for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) { pte_t pte = ptep_get_and_clear(mm, addr, ptep); @@ -145,9 +147,10 @@ static void clear_flush(struct mm_struct unsigned long pgsize, unsigned long ncontig) { - struct vm_area_struct vma = { .vm_mm = mm }; + struct vm_area_struct vma; unsigned long i, saddr = addr; + vma_init(&vma, mm); for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) pte_clear(mm, addr, ptep); --- a/arch/arm/kernel/process.c~mm-use-vma_init-to-initialize-vmas-on-stack-and-data-segments +++ a/arch/arm/kernel/process.c @@ -338,6 +338,7 @@ static struct vm_area_struct gate_vma = static int __init gate_vma_init(void) { + vma_init(&gate_vma, NULL); gate_vma.vm_page_prot = PAGE_READONLY_EXEC; return 0; } --- a/arch/arm/mach-rpc/ecard.c~mm-use-vma_init-to-initialize-vmas-on-stack-and-data-segments +++ a/arch/arm/mach-rpc/ecard.c @@ -237,8 +237,8 @@ static void ecard_init_pgtables(struct m memcpy(dst_pgd, src_pgd, sizeof(pgd_t) * (EASI_SIZE / PGDIR_SIZE)); + vma_init(&vma, mm); vma.vm_flags = VM_EXEC; - vma.vm_mm = mm; flush_tlb_range(&vma, IO_START, IO_START + IO_SIZE); flush_tlb_range(&vma, EASI_START, EASI_START + EASI_SIZE); --- a/arch/ia64/include/asm/tlb.h~mm-use-vma_init-to-initialize-vmas-on-stack-and-data-segments +++ a/arch/ia64/include/asm/tlb.h @@ -120,7 +120,7 @@ ia64_tlb_flush_mmu_tlbonly(struct mmu_ga */ struct vm_area_struct vma; - vma.vm_mm = tlb->mm; + vma_init(&vma, tlb->mm); /* flush the address range from the tlb: */ flush_tlb_range(&vma, start, end); /* now flush the virt. page-table area mapping the address range: */ --- a/arch/ia64/mm/init.c~mm-use-vma_init-to-initialize-vmas-on-stack-and-data-segments +++ a/arch/ia64/mm/init.c @@ -273,7 +273,7 @@ static struct vm_area_struct gate_vma; static int __init gate_vma_init(void) { - gate_vma.vm_mm = NULL; + vma_init(&gate_vma, NULL); gate_vma.vm_start = FIXADDR_USER_START; gate_vma.vm_end = FIXADDR_USER_END; gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; --- a/arch/x86/um/mem_32.c~mm-use-vma_init-to-initialize-vmas-on-stack-and-data-segments +++ a/arch/x86/um/mem_32.c @@ -16,7 +16,7 @@ static int __init gate_vma_init(void) if (!FIXADDR_USER_START) return 0; - gate_vma.vm_mm = NULL; + vma_init(&gate_vma, NULL); gate_vma.vm_start = FIXADDR_USER_START; gate_vma.vm_end = FIXADDR_USER_END; gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; --- a/fs/hugetlbfs/inode.c~mm-use-vma_init-to-initialize-vmas-on-stack-and-data-segments +++ a/fs/hugetlbfs/inode.c @@ -411,6 +411,7 @@ static void remove_inode_hugepages(struc bool truncate_op = (lend == LLONG_MAX); memset(&pseudo_vma, 0, sizeof(struct vm_area_struct)); + vma_init(&pseudo_vma, current->mm); pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED); pagevec_init(&pvec); next = start; @@ -595,6 +596,7 @@ static long hugetlbfs_fallocate(struct f * as input to create an allocation policy. */ memset(&pseudo_vma, 0, sizeof(struct vm_area_struct)); + vma_init(&pseudo_vma, mm); pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED); pseudo_vma.vm_file = file; --- a/mm/mempolicy.c~mm-use-vma_init-to-initialize-vmas-on-stack-and-data-segments +++ a/mm/mempolicy.c @@ -2505,6 +2505,7 @@ void mpol_shared_policy_init(struct shar /* Create pseudo-vma that contains just the policy */ memset(&pvma, 0, sizeof(struct vm_area_struct)); + vma_init(&pvma, NULL); pvma.vm_end = TASK_SIZE; /* policy covers entire file */ mpol_set_shared_policy(sp, &pvma, new); /* adds ref */ --- a/mm/shmem.c~mm-use-vma_init-to-initialize-vmas-on-stack-and-data-segments +++ a/mm/shmem.c @@ -1421,6 +1421,7 @@ static void shmem_pseudo_vma_init(struct { /* Create a pseudo vma that just contains the policy */ memset(vma, 0, sizeof(*vma)); + vma_init(vma, NULL); /* Bias interleave by inode number to distribute better across nodes */ vma->vm_pgoff = index + info->vfs_inode.i_ino; vma->vm_policy = mpol_shared_policy_lookup(&info->policy, index); _

7 years, 1 month

1
0
0 0

[patch 06/11] mm: introduce vma_init()

by akpm＠linux-foundation.org

From: "Kirill A. Shutemov" <kirill.shutemov(a)linux.intel.com> Subject: mm: introduce vma_init() Not all VMAs allocated with vm_area_alloc(). Some of them allocated on stack or in data segment. The new helper can be use to initialize VMA properly regardless where it was allocated. Link: http://lkml.kernel.org/r/20180724121139.62570-2-kirill.shutemov@linux.intel… Signed-off-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com> Acked-by: Linus Torvalds <torvalds(a)linux-foundation.org> Reviewed-by: Andrew Morton <akpm(a)linux-foundation.org> Cc: Dmitry Vyukov <dvyukov(a)google.com> Cc: Oleg Nesterov <oleg(a)redhat.com> Cc: Andrea Arcangeli <aarcange(a)redhat.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- include/linux/mm.h | 6 ++++++ kernel/fork.c | 6 ++---- 2 files changed, 8 insertions(+), 4 deletions(-) --- a/include/linux/mm.h~mm-introduce-vma_init +++ a/include/linux/mm.h @@ -452,6 +452,12 @@ struct vm_operations_struct { unsigned long addr); }; +static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) +{ + vma->vm_mm = mm; + INIT_LIST_HEAD(&vma->anon_vma_chain); +} + struct mmu_gather; struct inode; --- a/kernel/fork.c~mm-introduce-vma_init +++ a/kernel/fork.c @@ -312,10 +312,8 @@ struct vm_area_struct *vm_area_alloc(str { struct vm_area_struct *vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); - if (vma) { - vma->vm_mm = mm; - INIT_LIST_HEAD(&vma->anon_vma_chain); - } + if (vma) + vma_init(vma, mm); return vma; } _

7 years, 1 month

1
0
0 0

[patch 03/11] mm: disallow mappings that conflict for devm_memremap_pages()

by akpm＠linux-foundation.org

From: Dave Jiang <dave.jiang(a)intel.com> Subject: mm: disallow mappings that conflict for devm_memremap_pages() When pmem namespaces created are smaller than section size, this can cause an issue during removal and gpf was observed: [ 249.613597] general protection fault: 0000 1 SMP PTI [ 249.725203] CPU: 36 PID: 3941 Comm: ndctl Tainted: G W 4.14.28-1.el7uek.x86_64 #2 [ 249.745495] task: ffff88acda150000 task.stack: ffffc900233a4000 [ 249.752107] RIP: 0010:__put_page+0x56/0x79 [ 249.844675] Call Trace: [ 249.847410] devm_memremap_pages_release+0x155/0x23a [ 249.852953] release_nodes+0x21e/0x260 [ 249.857138] devres_release_all+0x3c/0x48 [ 249.861606] device_release_driver_internal+0x15c/0x207 [ 249.867439] device_release_driver+0x12/0x14 [ 249.872204] unbind_store+0xba/0xd8 [ 249.876098] drv_attr_store+0x27/0x31 [ 249.880186] sysfs_kf_write+0x3f/0x46 [ 249.884266] kernfs_fop_write+0x10f/0x18b [ 249.888734] __vfs_write+0x3a/0x16d [ 249.892628] ? selinux_file_permission+0xe5/0x116 [ 249.897881] ? security_file_permission+0x41/0xbb [ 249.903133] vfs_write+0xb2/0x1a1 [ 249.906835] ? syscall_trace_enter+0x1ce/0x2b8 [ 249.911795] SyS_write+0x55/0xb9 [ 249.915397] do_syscall_64+0x79/0x1ae [ 249.919485] entry_SYSCALL_64_after_hwframe+0x3d/0x0 Add code to check whether we have a mapping already in the same section and prevent additional mappings from being created if that is the case. Link: http://lkml.kernel.org/r/152909478401.50143.312364396244072931.stgit@djiang… Signed-off-by: Dave Jiang <dave.jiang(a)intel.com> Cc: Dan Williams <dan.j.williams(a)intel.com> Cc: Robert Elliott <elliott(a)hpe.com> Cc: Jeff Moyer <jmoyer(a)redhat.com> Cc: Matthew Wilcox <willy(a)infradead.org> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- kernel/memremap.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) --- a/kernel/memremap.c~mm-disallow-mapping-that-conflict-for-devm_memremap_pages +++ a/kernel/memremap.c @@ -176,10 +176,27 @@ void *devm_memremap_pages(struct device unsigned long pfn, pgoff, order; pgprot_t pgprot = PAGE_KERNEL; int error, nid, is_ram; + struct dev_pagemap *conflict_pgmap; align_start = res->start & ~(SECTION_SIZE - 1); align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) - align_start; + align_end = align_start + align_size - 1; + + conflict_pgmap = get_dev_pagemap(PHYS_PFN(align_start), NULL); + if (conflict_pgmap) { + dev_WARN(dev, "Conflicting mapping in same section\n"); + put_dev_pagemap(conflict_pgmap); + return ERR_PTR(-ENOMEM); + } + + conflict_pgmap = get_dev_pagemap(PHYS_PFN(align_end), NULL); + if (conflict_pgmap) { + dev_WARN(dev, "Conflicting mapping in same section\n"); + put_dev_pagemap(conflict_pgmap); + return ERR_PTR(-ENOMEM); + } + is_ram = region_intersects(align_start, align_size, IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE); @@ -199,7 +216,6 @@ void *devm_memremap_pages(struct device mutex_lock(&pgmap_lock); error = 0; - align_end = align_start + align_size - 1; foreach_order_pgoff(res, order, pgoff) { error = __radix_tree_insert(&pgmap_radix, _

7 years, 1 month

1
0
0 0

[patch 01/11] delayacct: fix crash in delayacct_blkio_end() after delayacct init failure

by akpm＠linux-foundation.org

From: Tejun Heo <tj(a)kernel.org> Subject: delayacct: fix crash in delayacct_blkio_end() after delayacct init failure While forking, if delayacct init fails due to memory shortage, it continues expecting all delayacct users to check task->delays pointer against NULL before dereferencing it, which all of them used to do. c96f5471ce7d ("delayacct: Account blkio completion on the correct task"), while updating delayacct_blkio_end() to take the target task instead of always using %current, made the function test NULL on %current->delays and then continue to operated on @p->delays. If %current succeeded init while @p didn't, it leads to the following crash. BUG: unable to handle kernel NULL pointer dereference at 0000000000000004 IP: __delayacct_blkio_end+0xc/0x40 PGD 8000001fd07e1067 P4D 8000001fd07e1067 PUD 1fcffbb067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 4 PID: 25774 Comm: QIOThread0 Not tainted 4.16.0-9_fbk1_rc2_1180_g6b593215b4d7 #9 Hardware name: Quanta Leopard ORv2-DDR4/Leopard ORv2-DDR4, BIOS F06_3B12 08/17/2017 RIP: 0010:__delayacct_blkio_end+0xc/0x40 RSP: 0000:ffff881fff703bf8 EFLAGS: 00010086 RAX: ffff881f1ec8b800 RBX: ffff8804f735cd54 RCX: ffff881fff703cb0 RDX: 0000000000000002 RSI: 0000000000000003 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000000 R09: ffff881fff703cc0 R10: 0000000000001000 R11: ffff881fd3f73d00 R12: ffff8804f735c600 R13: 0000000000000000 R14: 000000000000001d R15: ffff881fff703cb0 FS: 00007f5003f7d700(0000) GS:ffff881fff700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000004 CR3: 0000001f401a6006 CR4: 00000000003606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <IRQ> try_to_wake_up+0x2c0/0x600 autoremove_wake_function+0xe/0x30 __wake_up_common+0x74/0x120 wake_up_page_bit+0x9c/0xe0 mpage_end_io+0x27/0x70 blk_update_request+0x78/0x2c0 scsi_end_request+0x2c/0x1e0 scsi_io_completion+0x20b/0x5f0 blk_mq_complete_request+0xa2/0x100 ata_scsi_qc_complete+0x79/0x400 ata_qc_complete_multiple+0x86/0xd0 ahci_handle_port_interrupt+0xc9/0x5c0 ahci_handle_port_intr+0x54/0xb0 ahci_single_level_irq_intr+0x3b/0x60 __handle_irq_event_percpu+0x43/0x190 handle_irq_event_percpu+0x20/0x50 handle_irq_event+0x2a/0x50 handle_edge_irq+0x80/0x1c0 handle_irq+0xaf/0x120 do_IRQ+0x41/0xc0 common_interrupt+0xf/0xf </IRQ> Fix it by updating delayacct_blkio_end() check @p->delays instead. Link: http://lkml.kernel.org/r/20180724175542.GP1934745@devbig577.frc2.facebook.c… Fixes: c96f5471ce7d ("delayacct: Account blkio completion on the correct task") Signed-off-by: Tejun Heo <tj(a)kernel.org> Reported-by: Dave Jones <dsj(a)fb.com> Debugged-by: Dave Jones <dsj(a)fb.com> Reviewed-by: Andrew Morton <akpm(a)linux-foundation.org> Cc: Josh Snyder <joshs(a)netflix.com> Cc: <stable(a)vger.kernel.org> [4.15+] Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- include/linux/delayacct.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/include/linux/delayacct.h~delayacct-fix-crash-in-delayacct_blkio_end-after-delayacct-init-failure +++ a/include/linux/delayacct.h @@ -124,7 +124,7 @@ static inline void delayacct_blkio_start static inline void delayacct_blkio_end(struct task_struct *p) { - if (current->delays) + if (p->delays) __delayacct_blkio_end(p); delayacct_clear_flag(DELAYACCT_PF_BLKIO); } _

7 years, 1 month

1
0
0 0

[obsolete] slub-track-number-of-slabs-irrespective-of-config_slub_debug.patch removed from -mm tree

by akpm＠linux-foundation.org

The patch titled Subject: slub: track number of slabs irrespective of CONFIG_SLUB_DEBUG has been removed from the -mm tree. Its filename was slub-track-number-of-slabs-irrespective-of-config_slub_debug.patch This patch was dropped because it is obsolete ------------------------------------------------------ From: Shakeel Butt <shakeelb(a)google.com> Subject: slub: track number of slabs irrespective of CONFIG_SLUB_DEBUG For !CONFIG_SLUB_DEBUG, SLUB does not maintain the number of slabs allocated per node for a kmem_cache. Thus, slabs_node() in __kmem_cache_empty(), __kmem_cache_shrink() and __kmem_cache_destroy() will always return 0 for such config. This is wrong and can cause issues for all users of these functions. In fact in [1] Jason has reported a system crash while using SLUB without CONFIG_SLUB_DEBUG. The reason was the usage of slabs_node() by __kmem_cache_empty(). The right solution is to make slabs_node() work even for !CONFIG_SLUB_DEBUG. The commit 0f389ec63077 ("slub: No need for per node slab counters if !SLUB_DEBUG") had put the per node slab counter under CONFIG_SLUB_DEBUG because it was only read through sysfs API and the sysfs API was disabled on !CONFIG_SLUB_DEBUG. However the users of the per node slab counter assumed that it will work in the absence of CONFIG_SLUB_DEBUG. So, make the counter work for !CONFIG_SLUB_DEBUG. Please note that f9e13c0a5a33 ("slab, slub: skip unnecessary kasan_cache_shutdown()") exposed this issue but it is present even before. [1] http://lkml.kernel.org/r/CAHmME9rtoPwxUSnktxzKso14iuVCWT7BE_-_8PAC=pGw1iJnQ… Link: http://lkml.kernel.org/r/20180620224147.23777-1-shakeelb@google.com Fixes: f9e13c0a5a33 ("slab, slub: skip unnecessary kasan_cache_shutdown()") Signed-off-by: Shakeel Butt <shakeelb(a)google.com> Suggested-by: David Rientjes <rientjes(a)google.com> Reported-by: Jason A . Donenfeld <Jason(a)zx2c4.com> Cc: Christoph Lameter <cl(a)linux.com> Cc: Pekka Enberg <penberg(a)kernel.org> Cc: Joonsoo Kim <iamjoonsoo.kim(a)lge.com> Cc: Andrey Ryabinin <aryabinin(a)virtuozzo.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/slab.h | 2 - mm/slub.c | 80 ++++++++++++++++++++++++---------------------------- 2 files changed, 38 insertions(+), 44 deletions(-) diff -puN mm/slab.h~slub-track-number-of-slabs-irrespective-of-config_slub_debug mm/slab.h --- a/mm/slab.h~slub-track-number-of-slabs-irrespective-of-config_slub_debug +++ a/mm/slab.h @@ -473,8 +473,8 @@ struct kmem_cache_node { #ifdef CONFIG_SLUB unsigned long nr_partial; struct list_head partial; -#ifdef CONFIG_SLUB_DEBUG atomic_long_t nr_slabs; +#ifdef CONFIG_SLUB_DEBUG atomic_long_t total_objects; struct list_head full; #endif diff -puN mm/slub.c~slub-track-number-of-slabs-irrespective-of-config_slub_debug mm/slub.c --- a/mm/slub.c~slub-track-number-of-slabs-irrespective-of-config_slub_debug +++ a/mm/slub.c @@ -1030,42 +1030,6 @@ static void remove_full(struct kmem_cach list_del(&page->lru); } -/* Tracking of the number of slabs for debugging purposes */ -static inline unsigned long slabs_node(struct kmem_cache *s, int node) -{ - struct kmem_cache_node *n = get_node(s, node); - - return atomic_long_read(&n->nr_slabs); -} - -static inline unsigned long node_nr_slabs(struct kmem_cache_node *n) -{ - return atomic_long_read(&n->nr_slabs); -} - -static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects) -{ - struct kmem_cache_node *n = get_node(s, node); - - /* - * May be called early in order to allocate a slab for the - * kmem_cache_node structure. Solve the chicken-egg - * dilemma by deferring the increment of the count during - * bootstrap (see early_kmem_cache_node_alloc). - */ - if (likely(n)) { - atomic_long_inc(&n->nr_slabs); - atomic_long_add(objects, &n->total_objects); - } -} -static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects) -{ - struct kmem_cache_node *n = get_node(s, node); - - atomic_long_dec(&n->nr_slabs); - atomic_long_sub(objects, &n->total_objects); -} - /* Object debug checks for alloc/free paths */ static void setup_object_debug(struct kmem_cache *s, struct page *page, void *object) @@ -1321,16 +1285,46 @@ slab_flags_t kmem_cache_flags(unsigned i #define disable_higher_order_debug 0 +#endif /* CONFIG_SLUB_DEBUG */ + static inline unsigned long slabs_node(struct kmem_cache *s, int node) - { return 0; } +{ + struct kmem_cache_node *n = get_node(s, node); + + return atomic_long_read(&n->nr_slabs); +} + static inline unsigned long node_nr_slabs(struct kmem_cache_node *n) - { return 0; } -static inline void inc_slabs_node(struct kmem_cache *s, int node, - int objects) {} -static inline void dec_slabs_node(struct kmem_cache *s, int node, - int objects) {} +{ + return atomic_long_read(&n->nr_slabs); +} -#endif /* CONFIG_SLUB_DEBUG */ +static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects) +{ + struct kmem_cache_node *n = get_node(s, node); + + /* + * May be called early in order to allocate a slab for the + * kmem_cache_node structure. Solve the chicken-egg + * dilemma by deferring the increment of the count during + * bootstrap (see early_kmem_cache_node_alloc). + */ + if (likely(n)) { + atomic_long_inc(&n->nr_slabs); +#ifdef CONFIG_SLUB_DEBUG + atomic_long_add(objects, &n->total_objects); +#endif + } +} +static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects) +{ + struct kmem_cache_node *n = get_node(s, node); + + atomic_long_dec(&n->nr_slabs); +#ifdef CONFIG_SLUB_DEBUG + atomic_long_sub(objects, &n->total_objects); +#endif +} /* * Hooks for other subsystems that check memory allocations. In a typical _ Patches currently in -mm which might be from shakeelb(a)google.com are kvm-mm-account-shadow-page-tables-to-kmemcg.patch fs-fsnotify-account-fsnotify-metadata-to-kmemcg.patch fs-fsnotify-account-fsnotify-metadata-to-kmemcg-fix.patch fs-mm-account-buffer_head-to-kmemcg.patch fs-mm-account-buffer_head-to-kmemcgpatchfix.patch memcg-reduce-memcg-tree-traversals-for-stats-collection.patch

7 years, 1 month

1
0
0 0

Re: [PATCH 1/7] tracing: Fix double free of event_trigger_data

by Steven Rostedt

On Thu, 26 Jul 2018 19:06:08 -0400 Steven Rostedt <rostedt(a)goodmis.org> wrote: > From: "Steven Rostedt (VMware)" <rostedt(a)goodmis.org> > > > Cc: stable(a)vger.kerne.org Ug, I had a typo here. :-/ -- Steve

7 years, 1 month

1
0
0 0

[PATCH 7/7] kthread, tracing: Dont expose half-written comm when creating kthreads

by Steven Rostedt

From: Snild Dolkow <snild(a)sony.com> There is a window for racing when printing directly to task->comm, allowing other threads to see a non-terminated string. The vsnprintf function fills the buffer, counts the truncated chars, then finally writes the \0 at the end. creator other vsnprintf: fill (not terminated) count the rest trace_sched_waking(p): ... memcpy(comm, p->comm, TASK_COMM_LEN) write \0 The consequences depend on how 'other' uses the string. In our case, it was copied into the tracing system's saved cmdlines, a buffer of adjacent TASK_COMM_LEN-byte buffers (note the 'n' where 0 should be): crash-arm64> x/1024s savedcmd->saved_cmdlines | grep 'evenk' 0xffffffd5b3818640: "irq/497-pwr_evenkworker/u16:12" ...and a strcpy out of there would cause stack corruption: [224761.522292] Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: ffffff9bf9783c78 crash-arm64> kbt | grep 'comm\|trace_print_context' #6 0xffffff9bf9783c78 in trace_print_context+0x18c(+396) comm (char [16]) = "irq/497-pwr_even" crash-arm64> rd 0xffffffd4d0e17d14 8 ffffffd4d0e17d14: 2f71726900000000 5f7277702d373934 ....irq/497-pwr_ ffffffd4d0e17d24: 726f776b6e657665 3a3631752f72656b evenkworker/u16: ffffffd4d0e17d34: f9780248ff003231 cede60e0ffffff9b 12..H.x......`.. ffffffd4d0e17d44: cede60c8ffffffd4 00000fffffffffd4 .....`.......... The workaround in e09e28671 (use strlcpy in __trace_find_cmdline) was likely needed because of this same bug. Solved by vsnprintf:ing to a local buffer, then using set_task_comm(). This way, there won't be a window where comm is not terminated. Link: http://lkml.kernel.org/r/20180726071539.188015-1-snild@sony.com Cc: stable(a)vger.kernel.org Fixes: bc0c38d139ec7 ("ftrace: latency tracer infrastructure") Reviewed-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org> Signed-off-by: Snild Dolkow <snild(a)sony.com> Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org> --- kernel/kthread.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/kthread.c b/kernel/kthread.c index 750cb8082694..486dedbd9af5 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -325,8 +325,14 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data), task = create->result; if (!IS_ERR(task)) { static const struct sched_param param = { .sched_priority = 0 }; + char name[TASK_COMM_LEN]; - vsnprintf(task->comm, sizeof(task->comm), namefmt, args); + /* + * task is already visible to other tasks, so updating + * COMM must be protected. + */ + vsnprintf(name, sizeof(name), namefmt, args); + set_task_comm(task, name); /* * root may have changed our (kthreadd's) priority or CPU mask. * The kernel thread should not inherit these properties. -- 2.17.1

7 years, 1 month

1
0
0 0

[PATCH 6/7] tracing: Quiet gcc warning about maybe unused link variable

by Steven Rostedt

From: "Steven Rostedt (VMware)" <rostedt(a)goodmis.org> Commit 57ea2a34adf4 ("tracing/kprobes: Fix trace_probe flags on enable_trace_kprobe() failure") added an if statement that depends on another if statement that gcc doesn't see will initialize the "link" variable and gives the warning: "warning: 'link' may be used uninitialized in this function" It is really a false positive, but to quiet the warning, and also to make sure that it never actually is used uninitialized, initialize the "link" variable to NULL and add an if (!WARN_ON_ONCE(!link)) where the compiler thinks it could be used uninitialized. Cc: stable(a)vger.kernel.org Fixes: 57ea2a34adf4 ("tracing/kprobes: Fix trace_probe flags on enable_trace_kprobe() failure") Reported-by: kbuild test robot <lkp(a)intel.com> Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org> --- kernel/trace/trace_kprobe.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 27ace4513c43..6b71860f3998 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -400,7 +400,7 @@ static struct trace_kprobe *find_trace_kprobe(const char *event, static int enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) { - struct event_file_link *link; + struct event_file_link *link = NULL; int ret = 0; if (file) { @@ -426,7 +426,9 @@ enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) if (ret) { if (file) { - list_del_rcu(&link->list); + /* Notice the if is true on not WARN() */ + if (!WARN_ON_ONCE(!link)) + list_del_rcu(&link->list); kfree(link); tk->tp.flags &= ~TP_FLAG_TRACE; } else { -- 2.17.1

7 years, 1 month

1
0
0 0

[PATCH 5/7] tracing: Fix possible double free in event_enable_trigger_func()

by Steven Rostedt

From: "Steven Rostedt (VMware)" <rostedt(a)goodmis.org> There was a case that triggered a double free in event_trigger_callback() due to the called reg() function freeing the trigger_data and then it getting freed again by the error return by the caller. The solution there was to up the trigger_data ref count. Code inspection found that event_enable_trigger_func() has the same issue, but is not as easy to trigger (requires harder to trigger failures). It needs to be solved slightly different as it needs more to clean up when the reg() function fails. Link: http://lkml.kernel.org/r/20180725124008.7008e586@gandalf.local.home Cc: stable(a)vger.kernel.org Fixes: 7862ad1846e99 ("tracing: Add 'enable_event' and 'disable_event' event trigger commands") Reivewed-by: Masami Hiramatsu <mhiramat(a)kernel.org> Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org> --- kernel/trace/trace_events_trigger.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index d18ec0e58be2..5dea177cef53 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -1420,6 +1420,9 @@ int event_enable_trigger_func(struct event_command *cmd_ops, goto out; } + /* Up the trigger_data count to make sure nothing frees it on failure */ + event_trigger_init(trigger_ops, trigger_data); + if (trigger) { number = strsep(&trigger, ":"); @@ -1470,6 +1473,7 @@ int event_enable_trigger_func(struct event_command *cmd_ops, goto out_disable; /* Just return zero, not the number of enabled functions */ ret = 0; + event_trigger_free(trigger_ops, trigger_data); out: return ret; @@ -1480,7 +1484,7 @@ int event_enable_trigger_func(struct event_command *cmd_ops, out_free: if (cmd_ops->set_filter) cmd_ops->set_filter(NULL, trigger_data, NULL); - kfree(trigger_data); + event_trigger_free(trigger_ops, trigger_data); kfree(enable_data); goto out; } -- 2.17.1

7 years, 1 month

1
0
0 0

[PATCH 4/7] tracing/kprobes: Fix trace_probe flags on enable_trace_kprobe() failure

by Steven Rostedt

From: Artem Savkov <asavkov(a)redhat.com> If enable_trace_kprobe fails to enable the probe in enable_k(ret)probe it returns an error, but does not unset the tp flags it set previously. This results in a probe being considered enabled and failures like being unable to remove the probe through kprobe_events file since probes_open() expects every probe to be disabled. Link: http://lkml.kernel.org/r/20180725102826.8300-1-asavkov@redhat.com Link: http://lkml.kernel.org/r/20180725142038.4765-1-asavkov@redhat.com Cc: Ingo Molnar <mingo(a)redhat.com> Cc: stable(a)vger.kernel.org Fixes: 41a7dd420c57 ("tracing/kprobes: Support ftrace_event_file base multibuffer") Acked-by: Masami Hiramatsu <mhiramat(a)kernel.org> Reviewed-by: Josh Poimboeuf <jpoimboe(a)redhat.com> Signed-off-by: Artem Savkov <asavkov(a)redhat.com> Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org> --- kernel/trace/trace_kprobe.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 21f718472942..27ace4513c43 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -400,11 +400,10 @@ static struct trace_kprobe *find_trace_kprobe(const char *event, static int enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) { + struct event_file_link *link; int ret = 0; if (file) { - struct event_file_link *link; - link = kmalloc(sizeof(*link), GFP_KERNEL); if (!link) { ret = -ENOMEM; @@ -424,6 +423,16 @@ enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) else ret = enable_kprobe(&tk->rp.kp); } + + if (ret) { + if (file) { + list_del_rcu(&link->list); + kfree(link); + tk->tp.flags &= ~TP_FLAG_TRACE; + } else { + tk->tp.flags &= ~TP_FLAG_PROFILE; + } + } out: return ret; } -- 2.17.1

7 years, 1 month

1
0
0 0

[PATCH 2/7] ring_buffer: tracing: Inherit the tracing setting to next ring buffer

by Steven Rostedt

From: Masami Hiramatsu <mhiramat(a)kernel.org> Maintain the tracing on/off setting of the ring_buffer when switching to the trace buffer snapshot. Taking a snapshot is done by swapping the backup ring buffer (max_tr_buffer). But since the tracing on/off setting is defined by the ring buffer, when swapping it, the tracing on/off setting can also be changed. This causes a strange result like below: /sys/kernel/debug/tracing # cat tracing_on 1 /sys/kernel/debug/tracing # echo 0 > tracing_on /sys/kernel/debug/tracing # cat tracing_on 0 /sys/kernel/debug/tracing # echo 1 > snapshot /sys/kernel/debug/tracing # cat tracing_on 1 /sys/kernel/debug/tracing # echo 1 > snapshot /sys/kernel/debug/tracing # cat tracing_on 0 We don't touch tracing_on, but snapshot changes tracing_on setting each time. This is an anomaly, because user doesn't know that each "ring_buffer" stores its own tracing-enable state and the snapshot is done by swapping ring buffers. Link: http://lkml.kernel.org/r/153149929558.11274.11730609978254724394.stgit@devb… Cc: Ingo Molnar <mingo(a)redhat.com> Cc: Shuah Khan <shuah(a)kernel.org> Cc: Tom Zanussi <tom.zanussi(a)linux.intel.com> Cc: Hiraku Toyooka <hiraku.toyooka(a)cybertrust.co.jp> Cc: stable(a)vger.kernel.org Fixes: debdd57f5145 ("tracing: Make a snapshot feature available from userspace") Signed-off-by: Masami Hiramatsu <mhiramat(a)kernel.org> [ Updated commit log and comment in the code ] Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org> --- include/linux/ring_buffer.h | 1 + kernel/trace/ring_buffer.c | 16 ++++++++++++++++ kernel/trace/trace.c | 6 ++++++ 3 files changed, 23 insertions(+) diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index b72ebdff0b77..003d09ab308d 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -165,6 +165,7 @@ void ring_buffer_record_enable(struct ring_buffer *buffer); void ring_buffer_record_off(struct ring_buffer *buffer); void ring_buffer_record_on(struct ring_buffer *buffer); int ring_buffer_record_is_on(struct ring_buffer *buffer); +int ring_buffer_record_is_set_on(struct ring_buffer *buffer); void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu); void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 6a46af21765c..0b0b688ea166 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3226,6 +3226,22 @@ int ring_buffer_record_is_on(struct ring_buffer *buffer) return !atomic_read(&buffer->record_disabled); } +/** + * ring_buffer_record_is_set_on - return true if the ring buffer is set writable + * @buffer: The ring buffer to see if write is set enabled + * + * Returns true if the ring buffer is set writable by ring_buffer_record_on(). + * Note that this does NOT mean it is in a writable state. + * + * It may return true when the ring buffer has been disabled by + * ring_buffer_record_disable(), as that is a temporary disabling of + * the ring buffer. + */ +int ring_buffer_record_is_set_on(struct ring_buffer *buffer) +{ + return !(atomic_read(&buffer->record_disabled) & RB_BUFFER_OFF); +} + /** * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer * @buffer: The ring buffer to stop writes to. diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 87cf25171fb8..823687997b01 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1373,6 +1373,12 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) arch_spin_lock(&tr->max_lock); + /* Inherit the recordable setting from trace_buffer */ + if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer)) + ring_buffer_record_on(tr->max_buffer.buffer); + else + ring_buffer_record_off(tr->max_buffer.buffer); + swap(tr->trace_buffer.buffer, tr->max_buffer.buffer); __update_max_tr(tr, tsk, cpu); -- 2.17.1

7 years, 1 month

1
0
0 0

FAILED: patch "[PATCH] drm/nouveau: Set DRIVER_ATOMIC cap earlier to fix debugfs" failed to apply to 4.17-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.17-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From eb493fbc150f4a28151ae1ee84f24395989f3600 Mon Sep 17 00:00:00 2001 From: Lyude Paul <lyude(a)redhat.com> Date: Tue, 3 Jul 2018 16:31:41 -0400 Subject: [PATCH] drm/nouveau: Set DRIVER_ATOMIC cap earlier to fix debugfs Currently nouveau doesn't actually expose the state debugfs file that's usually provided for any modesetting driver that supports atomic, even if nouveau is loaded with atomic=1. This is due to the fact that the standard debugfs files that DRM creates for atomic drivers is called when drm_get_pci_dev() is called from nouveau_drm.c. This happens well before we've initialized the display core, which is currently responsible for setting the DRIVER_ATOMIC cap. So, move the atomic option into nouveau_drm.c and just add the DRIVER_ATOMIC cap whenever it's enabled on the kernel commandline. This shouldn't cause any actual issues, as the atomic ioctl will still fail as expected even if the display core doesn't disable it until later in the init sequence. This also provides the added benefit of being able to use the state debugfs file to check the current display state even if clients aren't allowed to modify it through anything other than the legacy ioctls. Additionally, disable the DRIVER_ATOMIC cap in nv04's display core, as this was already disabled there previously. Signed-off-by: Lyude Paul <lyude(a)redhat.com> Cc: stable(a)vger.kernel.org Signed-off-by: Ben Skeggs <bskeggs(a)redhat.com> diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.c b/drivers/gpu/drm/nouveau/dispnv04/disp.c index 501d2d290e9c..70dce544984e 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv04/disp.c @@ -55,6 +55,9 @@ nv04_display_create(struct drm_device *dev) nouveau_display(dev)->init = nv04_display_init; nouveau_display(dev)->fini = nv04_display_fini; + /* Pre-nv50 doesn't support atomic, so don't expose the ioctls */ + dev->driver->driver_features &= ~DRIVER_ATOMIC; + nouveau_hw_save_vga_fonts(dev, 1); nv04_crtc_create(dev, 0); diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 31b12b4f321a..9bae4db84cfb 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -2126,10 +2126,6 @@ nv50_display_destroy(struct drm_device *dev) kfree(disp); } -MODULE_PARM_DESC(atomic, "Expose atomic ioctl (default: disabled)"); -static int nouveau_atomic = 0; -module_param_named(atomic, nouveau_atomic, int, 0400); - int nv50_display_create(struct drm_device *dev) { @@ -2154,8 +2150,6 @@ nv50_display_create(struct drm_device *dev) disp->disp = &nouveau_display(dev)->disp; dev->mode_config.funcs = &nv50_disp_func; dev->driver->driver_features |= DRIVER_PREFER_XBGR_30BPP; - if (nouveau_atomic) - dev->driver->driver_features |= DRIVER_ATOMIC; /* small shared memory area we use for notifiers and semaphores */ ret = nouveau_bo_new(&drm->client, 4096, 0x1000, TTM_PL_FLAG_VRAM, diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 514903338782..f5d3158f0378 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -81,6 +81,10 @@ MODULE_PARM_DESC(modeset, "enable driver (default: auto, " int nouveau_modeset = -1; module_param_named(modeset, nouveau_modeset, int, 0400); +MODULE_PARM_DESC(atomic, "Expose atomic ioctl (default: disabled)"); +static int nouveau_atomic = 0; +module_param_named(atomic, nouveau_atomic, int, 0400); + MODULE_PARM_DESC(runpm, "disable (0), force enable (1), optimus only default (-1)"); static int nouveau_runtime_pm = -1; module_param_named(runpm, nouveau_runtime_pm, int, 0400); @@ -509,6 +513,9 @@ static int nouveau_drm_probe(struct pci_dev *pdev, pci_set_master(pdev); + if (nouveau_atomic) + driver_pci.driver_features |= DRIVER_ATOMIC; + ret = drm_get_pci_dev(pdev, pent, &driver_pci); if (ret) { nvkm_device_del(&device);

7 years, 1 month

3
3
0 0

Re: [PATCH] clk: meson-gxbb: set fclk_div2 as CLK_IS_CRITICAL

by Neil Armstrong

Hi Stable Team, On 13/06/2018 14:20, Neil Armstrong wrote: > On Amlogic Meson GXBB & GXL platforms, the SCPI Cortex-M4 Co-Processor > seems to be dependent on the FCLK_DIV2 to be operationnal. > > The issue occured since v4.17-rc1 by freezing the kernel boot when > the 'schedutil' cpufreq governor was selected as default : > > [ 12.071837] scpi_protocol scpi: SCP Protocol 0.0 Firmware 0.0.0 version > domain-0 init dvfs: 4 > [ 12.087757] hctosys: unable to open rtc device (rtc0) > [ 12.087907] cfg80211: Loading compiled-in X.509 certificates for regulatory database > [ 12.102241] cfg80211: Loaded X.509 cert 'sforshee: 00b28ddf47aef9cea7' > > But when disabling the MMC driver, the boot finished but cpufreq failed to > change the CPU frequency : > > [ 12.153045] cpufreq: __target_index: Failed to change cpu frequency: -5 > > A bisect between v4.16 and v4.16-rc1 gave the 05f814402d61 commit to be > the first bad commit. > This commit added support for the missing clock gates before the fixed PLL > fixed dividers (FCLK_DIVx) and the clock framework basically disabled > all the unused fixed dividers, thus disabled a critical clock path for > the SCPI Co-Processor. > > This patch simply sets the FCLK_DIV2 gate as critical to ensure > nobody can disable it. > > Fixes: 05f814402d61 ("clk: meson: add fdiv clock gates") > Signed-off-by: Neil Armstrong <narmstrong(a)baylibre.com> This patch hit linux master with commit id c987ac6f1f088663b6dad39281071aeb31d450a8 Could this be backported to the next 4.17 stable release ? Thanks, Neil > --- > drivers/clk/meson/gxbb.c | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c > index b1e4d95..0e053c1 100644 > --- a/drivers/clk/meson/gxbb.c > +++ b/drivers/clk/meson/gxbb.c > @@ -511,6 +511,7 @@ static struct clk_regmap gxbb_fclk_div2 = { > .ops = &clk_regmap_gate_ops, > .parent_names = (const char *[]){ "fclk_div2_div" }, > .num_parents = 1, > + .flags = CLK_IS_CRITICAL, > }, > }; > >

7 years, 1 month

2
1
0 0

[PATCH 4.4 000/105] 4.4.139-stable review

by Greg Kroah-Hartman

This is the start of the stable review cycle for the 4.4.139 release. There are 105 patches in this series, all will be posted as a response to this one. If anyone has any issues with these being applied, please let me know. Responses should be made by Tue Jul 3 15:31:30 UTC 2018. Anything received after that time might be too late. The whole patch series can be found in one patch at: https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.4.139-rc… or in the git tree and branch at: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.4.y and the diffstat can be found below. thanks, greg k-h ------------- Pseudo-Shortlog of commits: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> Linux 4.4.139-rc1 Szymon Janc <szymon.janc(a)codecoup.pl> Bluetooth: Fix connection if directed advertising and privacy is used Bjørn Mork <bjorn(a)mork.no> cdc_ncm: avoid padding beyond end of skb Mike Snitzer <snitzer(a)redhat.com> dm thin: handle running out of data space vs concurrent discard Keith Busch <keith.busch(a)intel.com> block: Fix transfer when chunk sectors exceeds max Maxime Chevallier <maxime.chevallier(a)bootlin.com> spi: Fix scatterlist elements size in spi_map_buf Liu Bo <bo.li.liu(a)oracle.com> Btrfs: fix unexpected cow in run_delalloc_nocow Takashi Iwai <tiwai(a)suse.de> ALSA: hda/realtek - Add a quirk for FSC ESPRIMO U9210 ??? <kt.liao(a)emc.com.tw> Input: elantech - fix V4 report decoding for module with middle key Aaron Ma <aaron.ma(a)canonical.com> Input: elantech - enable middle button of touchpads on ThinkPad P52 Ben Hutchings <ben.hutchings(a)codethink.co.uk> Input: elan_i2c_smbus - fix more potential stack buffer overflows Jan Kara <jack(a)suse.cz> udf: Detect incorrect directory size Boris Ostrovsky <boris.ostrovsky(a)oracle.com> xen: Remove unnecessary BUG_ON from __unbind_from_irq() Alexandr Savca <alexandr.savca(a)saltedge.com> Input: elan_i2c - add ELAN0618 (Lenovo v330 15IKB) ACPI ID Kees Cook <keescook(a)chromium.org> video: uvesafb: Fix integer overflow in allocation Dave Wysochanski <dwysocha(a)redhat.com> NFSv4: Fix possible 1-byte stack overflow in nfs_idmap_read_and_verify_message Scott Mayhew <smayhew(a)redhat.com> nfsd: restrict rd_maxcount to svc_max_payload in nfsd_encode_readdir Mauro Carvalho Chehab <mchehab(a)s-opensource.com> media: dvb_frontend: fix locking issues at dvb_frontend_get_event() Kai-Heng Feng <kai.heng.feng(a)canonical.com> media: cx231xx: Add support for AverMedia DVD EZMaker 7 Mauro Carvalho Chehab <mchehab(a)s-opensource.com> media: v4l2-compat-ioctl32: prevent go past max size Adrian Hunter <adrian.hunter(a)intel.com> perf intel-pt: Fix packet decoding of CYC packets Adrian Hunter <adrian.hunter(a)intel.com> perf intel-pt: Fix "Unexpected indirect branch" error Adrian Hunter <adrian.hunter(a)intel.com> perf intel-pt: Fix MTC timing after overflow Adrian Hunter <adrian.hunter(a)intel.com> perf intel-pt: Fix decoding to accept CBR between FUP and corresponding TIP Adrian Hunter <adrian.hunter(a)intel.com> perf intel-pt: Fix sync_switch INTEL_PT_SS_NOT_TRACING Adrian Hunter <adrian.hunter(a)intel.com> perf tools: Fix symbol and object code resolution for vdso32 and vdsox32 Andy Shevchenko <andriy.shevchenko(a)linux.intel.com> mfd: intel-lpss: Program REMAP register in PIO mode Johan Hovold <johan(a)kernel.org> backlight: tps65217_bl: Fix Device Tree node lookup Johan Hovold <johan(a)kernel.org> backlight: max8925_bl: Fix Device Tree node lookup Johan Hovold <johan(a)kernel.org> backlight: as3711_bl: Fix Device Tree node lookup Florian Westphal <fw(a)strlen.de> xfrm: skip policies marked as dead while rehashing Tobias Brunner <tobias(a)strongswan.org> xfrm: Ignore socket policies when rebuilding hash tables Silvio Cesare <silvio.cesare(a)gmail.com> UBIFS: Fix potential integer overflow in allocation Richard Weinberger <richard(a)nod.at> ubi: fastmap: Cancel work upon detach NeilBrown <neilb(a)suse.com> md: fix two problems with setting the "re-add" device state. Robert Elliott <elliott(a)hpe.com> linvdimm, pmem: Preserve read-only setting for pmem devices Steffen Maier <maier(a)linux.ibm.com> scsi: zfcp: fix missing REC trigger trace on enqueue without ERP thread Steffen Maier <maier(a)linux.ibm.com> scsi: zfcp: fix missing REC trigger trace for all objects in ERP_FAILED Steffen Maier <maier(a)linux.ibm.com> scsi: zfcp: fix missing REC trigger trace on terminate_rport_io for ERP_FAILED Steffen Maier <maier(a)linux.ibm.com> scsi: zfcp: fix missing REC trigger trace on terminate_rport_io early return Steffen Maier <maier(a)linux.ibm.com> scsi: zfcp: fix misleading REC trigger trace where erp_action setup failed Steffen Maier <maier(a)linux.ibm.com> scsi: zfcp: fix missing SCSI trace for retry of abort / scsi_eh TMF Steffen Maier <maier(a)linux.ibm.com> scsi: zfcp: fix missing SCSI trace for result of eh_host_reset_handler Himanshu Madhani <himanshu.madhani(a)cavium.com> scsi: qla2xxx: Fix setting lower transfer speed if GPSC fails Martin Kelly <mkelly(a)xevo.com> iio:buffer: make length types match kfifo types Omar Sandoval <osandov(a)fb.com> Btrfs: fix clone vs chattr NODATASUM race Geert Uytterhoeven <geert(a)linux-m68k.org> time: Make sure jiffies_to_msecs() preserves non-zero time periods Huacai Chen <chenhc(a)lemote.com> MIPS: io: Add barrier after register read in inX() Mika Westerberg <mika.westerberg(a)linux.intel.com> PCI: pciehp: Clear Presence Detect and Data Link Layer Status Changed on resume Tokunori Ikegami <ikegami(a)allied-telesis.co.jp> MIPS: BCM47XX: Enable 74K Core ExternalSync for PCIe erratum Joakim Tjernlund <joakim.tjernlund(a)infinera.com> mtd: cfi_cmdset_0002: Avoid walking all chips when unlocking. Joakim Tjernlund <joakim.tjernlund(a)infinera.com> mtd: cfi_cmdset_0002: Fix unlocking requests crossing a chip boudary Joakim Tjernlund <joakim.tjernlund(a)infinera.com> mtd: cfi_cmdset_0002: fix SEGV unlocking multiple chips Joakim Tjernlund <joakim.tjernlund(a)infinera.com> mtd: cfi_cmdset_0002: Use right chip in do_ppb_xxlock() Tokunori Ikegami <ikegami(a)allied-telesis.co.jp> mtd: cfi_cmdset_0002: Change write buffer to check correct value Leon Romanovsky <leonro(a)mellanox.com> RDMA/mlx4: Discard unknown SQP work requests Mike Marciniszyn <mike.marciniszyn(a)intel.com> IB/qib: Fix DMA api warning with debug kernel Stefan M Schaeckeler <sschaeck(a)cisco.com> of: unittest: for strings, account for trailing \0 in property length field David Rivshin <DRivshin(a)allworx.com> ARM: 8764/1: kgdb: fix NUMREGBYTES so that gdb_regs[] is the correct size Mahesh Salgaonkar <mahesh(a)linux.vnet.ibm.com> powerpc/fadump: Unregister fadump on kexec down path. Gautham R. Shenoy <ego(a)linux.vnet.ibm.com> cpuidle: powernv: Fix promotion from snooze if next state disabled Michael Neuling <mikey(a)neuling.org> powerpc/ptrace: Fix enforcement of DAWR constraints Michael Neuling <mikey(a)neuling.org> powerpc/ptrace: Fix setting 512B aligned breakpoints with PTRACE_SET_DEBUGREG Aneesh Kumar K.V <aneesh.kumar(a)linux.ibm.com> powerpc/mm/hash: Add missing isync prior to kernel stack SLB switch Miklos Szeredi <mszeredi(a)redhat.com> fuse: fix control dir setup and teardown Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp> fuse: don't keep dead fuse_conn at fuse_fill_super(). Miklos Szeredi <mszeredi(a)redhat.com> fuse: atomic_o_trunc should truncate pagecache Amit Pundir <amit.pundir(a)linaro.org> Bluetooth: hci_qca: Avoid missing rampatch failure with userspace fw loader Corey Minyard <cminyard(a)mvista.com> ipmi:bt: Set the timeout before doing a capabilities check Mikulas Patocka <mpatocka(a)redhat.com> branch-check: fix long->int truncation when profiling branches Matthias Schiffer <mschiffer(a)universe-factory.net> mips: ftrace: fix static function graph tracing Geert Uytterhoeven <geert+renesas(a)glider.be> lib/vsprintf: Remove atomic-unsafe support for %pCr Alexander Sverdlin <alexander.sverdlin(a)gmail.com> ASoC: cirrus: i2s: Fix {TX|RX}LinCtrlData setup Alexander Sverdlin <alexander.sverdlin(a)gmail.com> ASoC: cirrus: i2s: Fix LRCLK configuration Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org> ASoC: dapm: delete dapm_kcontrol_data paths list before freeing it Ingo Flaschberger <ingo.flaschberger(a)gmail.com> 1wire: family module autoload fails because of upper/lower case mismatch. Maxim Moseychuk <franchesko.salias.hudro.pedros(a)gmail.com> usb: do not reset if a low-speed or full-speed device timed out Eric W. Biederman <ebiederm(a)xmission.com> signal/xtensa: Consistenly use SIGBUS in do_unaligned_user Daniel Wagner <daniel.wagner(a)siemens.com> serial: sh-sci: Use spin_{try}lock_irqsave instead of open coding version Michael Schmitz <schmitzmic(a)gmail.com> m68k/mm: Adjust VM area to be unmapped by gap size for __iounmap() Dan Williams <dan.j.williams(a)intel.com> x86/spectre_v1: Disable compiler optimizations over array_index_mask_nospec() Thadeu Lima de Souza Cascardo <cascardo(a)canonical.com> fs/binfmt_misc.c: do not allow offset overflow Stefan Potyra <Stefan.Potyra(a)elektrobit.com> w1: mxc_w1: Enable clock before calling clk_get_rate() on it Hans de Goede <hdegoede(a)redhat.com> libata: Drop SanDisk SD7UB3Q*G1001 NOLPM quirk Dan Carpenter <dan.carpenter(a)oracle.com> libata: zpodd: small read overflow in eject_tray() Colin Ian King <colin.king(a)canonical.com> libata: zpodd: make arrays cdb static, reduces object code size Tao Wang <kevin.wangtao(a)hisilicon.com> cpufreq: Fix new policy initialization during limits updates via sysfs Dennis Wassenberg <dennis.wassenberg(a)secunet.com> ALSA: hda: add dock and led support for HP ProBook 640 G4 Dennis Wassenberg <dennis.wassenberg(a)secunet.com> ALSA: hda: add dock and led support for HP EliteBook 830 G5 Bo Chen <chenbo(a)pdx.edu> ALSA: hda - Handle kzalloc() failure in snd_hda_attach_pcm_stream() Qu Wenruo <wqu(a)suse.com> btrfs: scrub: Don't use inode pages for device replace Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp> driver core: Don't ignore class_dir_create_and_add() failure. Jan Kara <jack(a)suse.cz> ext4: fix fencepost error in check for inode count overflow during resize Lukas Czerner <lczerner(a)redhat.com> ext4: update mtime in ext4_punch_hole even if no blocks are released Frank van der Linden <fllinden(a)amazon.com> tcp: verify the checksum of the first data segment in a new connection Xiangning Yu <yuxiangning(a)gmail.com> bonding: re-evaluate force_primary when the primary slave name changes Daniel Glöckner <dg(a)emlix.com> usb: musb: fix remote wakeup racing with suspend Liu Bo <bo.li.liu(a)oracle.com> Btrfs: make raid6 rebuild retry more Eric Dumazet <edumazet(a)google.com> tcp: do not overshoot window_clamp in tcp_rcv_space_adjust() Sasha Levin <Alexander.Levin(a)microsoft.com> Revert "Btrfs: fix scrub to repair raid6 corruption" Finn Thain <fthain(a)telegraphics.com.au> net/sonic: Use dma_mapping_error() Josh Hill <josh(a)joshuajhill.com> net: qmi_wwan: Add Netgear Aircard 779S Ivan Bornyakov <brnkv.i1(a)gmail.com> atm: zatm: fix memcmp casting Julian Anastasov <ja(a)ssi.bg> ipvs: fix buffer overflow with sync daemon and service Paolo Abeni <pabeni(a)redhat.com> netfilter: ebtables: handle string from userspace with care Eric Dumazet <edumazet(a)google.com> xfrm6: avoid potential infinite loop in _decode_session6() ------------- Diffstat: Documentation/printk-formats.txt | 3 +- Makefile | 4 +- arch/arm/include/asm/kgdb.h | 2 +- arch/m68k/mm/kmap.c | 3 +- arch/mips/bcm47xx/setup.c | 6 + arch/mips/include/asm/io.h | 2 + arch/mips/include/asm/mipsregs.h | 3 + arch/mips/kernel/mcount.S | 27 ++--- arch/powerpc/kernel/entry_64.S | 1 + arch/powerpc/kernel/fadump.c | 3 + arch/powerpc/kernel/hw_breakpoint.c | 4 +- arch/powerpc/kernel/ptrace.c | 1 + arch/x86/include/asm/barrier.h | 2 +- arch/xtensa/kernel/traps.c | 2 +- drivers/ata/libata-core.c | 3 - drivers/ata/libata-zpodd.c | 4 +- drivers/atm/zatm.c | 4 +- drivers/base/core.c | 14 ++- drivers/bluetooth/hci_qca.c | 6 + drivers/char/ipmi/ipmi_bt_sm.c | 3 +- drivers/cpufreq/cpufreq.c | 2 + drivers/cpuidle/cpuidle-powernv.c | 32 +++++- drivers/iio/buffer/kfifo_buf.c | 4 +- drivers/infiniband/hw/mlx4/mad.c | 1 - drivers/infiniband/hw/qib/qib.h | 3 +- drivers/infiniband/hw/qib/qib_file_ops.c | 10 +- drivers/infiniband/hw/qib/qib_user_pages.c | 20 ++-- drivers/input/mouse/elan_i2c.h | 2 + drivers/input/mouse/elan_i2c_core.c | 3 +- drivers/input/mouse/elan_i2c_smbus.c | 10 +- drivers/input/mouse/elantech.c | 11 +- drivers/md/dm-thin.c | 11 +- drivers/md/md.c | 4 +- drivers/media/dvb-core/dvb_frontend.c | 23 ++-- drivers/media/usb/cx231xx/cx231xx-cards.c | 3 + drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 2 +- drivers/mfd/intel-lpss.c | 4 +- drivers/mtd/chips/cfi_cmdset_0002.c | 21 ++-- drivers/mtd/ubi/build.c | 3 + drivers/mtd/ubi/wl.c | 4 +- drivers/net/bonding/bond_options.c | 1 + drivers/net/ethernet/natsemi/sonic.c | 2 +- drivers/net/usb/cdc_ncm.c | 4 +- drivers/net/usb/qmi_wwan.c | 1 + drivers/nvdimm/bus.c | 14 ++- drivers/of/unittest.c | 8 +- drivers/pci/hotplug/pciehp.h | 2 +- drivers/pci/hotplug/pciehp_core.c | 2 +- drivers/pci/hotplug/pciehp_hpc.c | 13 ++- drivers/s390/scsi/zfcp_dbf.c | 40 +++++++ drivers/s390/scsi/zfcp_erp.c | 123 ++++++++++++++++----- drivers/s390/scsi/zfcp_ext.h | 5 + drivers/s390/scsi/zfcp_scsi.c | 18 ++- drivers/scsi/qla2xxx/qla_init.c | 3 +- drivers/spi/spi.c | 10 +- drivers/tty/serial/sh-sci.c | 8 +- drivers/usb/core/hub.c | 4 +- drivers/usb/musb/musb_host.c | 5 +- drivers/usb/musb/musb_host.h | 7 +- drivers/usb/musb/musb_virthub.c | 25 +++-- drivers/video/backlight/as3711_bl.c | 33 ++++-- drivers/video/backlight/max8925_bl.c | 4 +- drivers/video/backlight/tps65217_bl.c | 4 +- drivers/video/fbdev/uvesafb.c | 3 +- drivers/w1/masters/mxc_w1.c | 20 ++-- drivers/w1/w1.c | 2 +- drivers/xen/events/events_base.c | 2 - fs/binfmt_misc.c | 12 +- fs/btrfs/inode.c | 33 +++++- fs/btrfs/ioctl.c | 12 +- fs/btrfs/scrub.c | 2 +- fs/ext4/inode.c | 36 +++--- fs/ext4/resize.c | 2 +- fs/fuse/control.c | 13 ++- fs/fuse/dir.c | 13 ++- fs/fuse/inode.c | 1 + fs/nfs/nfs4idmap.c | 5 +- fs/nfsd/nfs4xdr.c | 5 +- fs/ubifs/journal.c | 2 +- fs/udf/directory.c | 3 + include/linux/blkdev.h | 4 +- include/linux/compiler.h | 2 +- include/linux/iio/buffer.h | 6 +- include/net/bluetooth/hci_core.h | 2 +- kernel/time/time.c | 6 +- lib/vsprintf.c | 3 - net/bluetooth/hci_conn.c | 27 +++-- net/bluetooth/hci_event.c | 15 ++- net/bridge/netfilter/ebtables.c | 3 +- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_ipv4.c | 4 + net/ipv6/tcp_ipv6.c | 4 + net/ipv6/xfrm6_policy.c | 2 +- net/netfilter/ipvs/ip_vs_ctl.c | 21 +++- net/xfrm/xfrm_policy.c | 5 + sound/pci/hda/hda_controller.c | 4 +- sound/pci/hda/patch_conexant.c | 2 + sound/pci/hda/patch_realtek.c | 1 + sound/soc/cirrus/edb93xx.c | 2 +- sound/soc/cirrus/ep93xx-i2s.c | 26 +++-- sound/soc/cirrus/snappercl15.c | 2 +- sound/soc/soc-dapm.c | 2 + tools/perf/util/dso.c | 2 + .../perf/util/intel-pt-decoder/intel-pt-decoder.c | 23 +++- .../perf/util/intel-pt-decoder/intel-pt-decoder.h | 9 ++ .../util/intel-pt-decoder/intel-pt-pkt-decoder.c | 2 +- tools/perf/util/intel-pt.c | 5 + 107 files changed, 685 insertions(+), 273 deletions(-)

7 years, 1 month

5
105
0 0

request for 4.17-stable: 7ec916f82c48 ("Revert "iommu/intel-iommu: Enable CONFIG_DMA_DIRECT_OPS=y and clean up intel_{alloc,free}_coherent()"")

by Jeremy Cline

Hi Greg, Please consider backporting commit 7ec916f82c48, which fixes an issue with iwlwifi module loading in some cases. Fabio initially reported the issue and confirmed reverting fixed the problem, and it has also been reported by at least one Fedora user[0] as fixing the problem. Thanks! [0] https://bugzilla.redhat.com/show_bug.cgi?id=1607092

7 years, 1 month

2
1
0 0

[PATCH kernel for v4.14 and v4.17 stable] KVM: PPC: Check if IOMMU page is contained in the pinned physical page

by Alexey Kardashevskiy

A VM which has: - a DMA capable device passed through to it (eg. network card); - running a malicious kernel that ignores H_PUT_TCE failure; - capability of using IOMMU pages bigger that physical pages can create an IOMMU mapping that exposes (for example) 16MB of the host physical memory to the device when only 64K was allocated to the VM. The remaining 16MB - 64K will be some other content of host memory, possibly including pages of the VM, but also pages of host kernel memory, host programs or other VMs. The attacking VM does not control the location of the page it can map, and is only allowed to map as many pages as it has pages of RAM. We already have a check in drivers/vfio/vfio_iommu_spapr_tce.c that an IOMMU page is contained in the physical page so the PCI hardware won't get access to unassigned host memory; however this check is missing in the KVM fastpath (H_PUT_TCE accelerated code). We were lucky so far and did not hit this yet as the very first time when the mapping happens we do not have tbl::it_userspace allocated yet and fall back to the userspace which in turn calls VFIO IOMMU driver, this fails and the guest does not retry, This stores the smallest preregistered page size in the preregistered region descriptor and changes the mm_iommu_xxx API to check this against the IOMMU page size. This calculates maximum page size as a minimum of the natural region alignment and compound page size. For the page shift this uses the shift returned by find_linux_pte() which indicates how the page is mapped to the current userspace - if the page is huge and this is not a zero, then it is a leaf pte and the page is mapped within the range. Fixes: 121f80ba68f1 ("KVM: PPC: VFIO: Add in-kernel acceleration for VFIO") Cc: stable(a)vger.kernel.org # v4.12+ Signed-off-by: Alexey Kardashevskiy <aik(a)ozlabs.ru> Reviewed-by: David Gibson <david(a)gibson.dropbear.id.au> Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au> (cherry picked from commit 76fa4975f3ed12d15762bc979ca44078598ed8ee) Signed-off-by: Alexey Kardashevskiy <aik(a)ozlabs.ru> --- The original patch did not apply because of fad953ce which fixed all vmalloc's to use array_size() so the backport is pretty trivial and applies to v4.17 stable as well. --- arch/powerpc/include/asm/mmu_context.h | 4 ++-- arch/powerpc/kvm/book3s_64_vio.c | 2 +- arch/powerpc/kvm/book3s_64_vio_hv.c | 6 ++++-- arch/powerpc/mm/mmu_context_iommu.c | 37 ++++++++++++++++++++++++++++++++-- drivers/vfio/vfio_iommu_spapr_tce.c | 2 +- 5 files changed, 43 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 44fdf47..6f67ff5 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -35,9 +35,9 @@ extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm( extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, unsigned long ua, unsigned long entries); extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, - unsigned long ua, unsigned long *hpa); + unsigned long ua, unsigned int pageshift, unsigned long *hpa); extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, - unsigned long ua, unsigned long *hpa); + unsigned long ua, unsigned int pageshift, unsigned long *hpa); extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem); extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem); #endif diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 4dffa61..e14cec6 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -433,7 +433,7 @@ long kvmppc_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl, /* This only handles v2 IOMMU type, v1 is handled via ioctl() */ return H_TOO_HARD; - if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, &hpa))) + if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, tbl->it_page_shift, &hpa))) return H_HARDWARE; if (mm_iommu_mapped_inc(mem)) diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index c32e9bfe..648cf6c 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -262,7 +262,8 @@ static long kvmppc_rm_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl, if (!mem) return H_TOO_HARD; - if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, &hpa))) + if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, tbl->it_page_shift, + &hpa))) return H_HARDWARE; pua = (void *) vmalloc_to_phys(pua); @@ -431,7 +432,8 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K); if (mem) - prereg = mm_iommu_ua_to_hpa_rm(mem, ua, &tces) == 0; + prereg = mm_iommu_ua_to_hpa_rm(mem, ua, + IOMMU_PAGE_SHIFT_4K, &tces) == 0; } if (!prereg) { diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index e0a2d8e..8160559 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -19,6 +19,7 @@ #include <linux/hugetlb.h> #include <linux/swap.h> #include <asm/mmu_context.h> +#include <asm/pte-walk.h> static DEFINE_MUTEX(mem_list_mutex); @@ -27,6 +28,7 @@ struct mm_iommu_table_group_mem_t { struct rcu_head rcu; unsigned long used; atomic64_t mapped; + unsigned int pageshift; u64 ua; /* userspace address */ u64 entries; /* number of entries in hpas[] */ u64 *hpas; /* vmalloc'ed */ @@ -126,6 +128,8 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, { struct mm_iommu_table_group_mem_t *mem; long i, j, ret = 0, locked_entries = 0; + unsigned int pageshift; + unsigned long flags; struct page *page = NULL; mutex_lock(&mem_list_mutex); @@ -160,6 +164,12 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, goto unlock_exit; } + /* + * For a starting point for a maximum page size calculation + * we use @ua and @entries natural alignment to allow IOMMU pages + * smaller than huge pages but still bigger than PAGE_SIZE. + */ + mem->pageshift = __ffs(ua | (entries << PAGE_SHIFT)); mem->hpas = vzalloc(entries * sizeof(mem->hpas[0])); if (!mem->hpas) { kfree(mem); @@ -200,6 +210,23 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, } } populate: + pageshift = PAGE_SHIFT; + if (PageCompound(page)) { + pte_t *pte; + struct page *head = compound_head(page); + unsigned int compshift = compound_order(head); + + local_irq_save(flags); /* disables as well */ + pte = find_linux_pte(mm->pgd, ua, NULL, &pageshift); + local_irq_restore(flags); + + /* Double check it is still the same pinned page */ + if (pte && pte_page(*pte) == head && + pageshift == compshift) + pageshift = max_t(unsigned int, pageshift, + PAGE_SHIFT); + } + mem->pageshift = min(mem->pageshift, pageshift); mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; } @@ -350,7 +377,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, EXPORT_SYMBOL_GPL(mm_iommu_find); long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, - unsigned long ua, unsigned long *hpa) + unsigned long ua, unsigned int pageshift, unsigned long *hpa) { const long entry = (ua - mem->ua) >> PAGE_SHIFT; u64 *va = &mem->hpas[entry]; @@ -358,6 +385,9 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, if (entry >= mem->entries) return -EFAULT; + if (pageshift > mem->pageshift) + return -EFAULT; + *hpa = *va | (ua & ~PAGE_MASK); return 0; @@ -365,7 +395,7 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa); long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, - unsigned long ua, unsigned long *hpa) + unsigned long ua, unsigned int pageshift, unsigned long *hpa) { const long entry = (ua - mem->ua) >> PAGE_SHIFT; void *va = &mem->hpas[entry]; @@ -374,6 +404,9 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, if (entry >= mem->entries) return -EFAULT; + if (pageshift > mem->pageshift) + return -EFAULT; + pa = (void *) vmalloc_to_phys(va); if (!pa) return -EFAULT; diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index b751dd6..b4c68f3 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -467,7 +467,7 @@ static int tce_iommu_prereg_ua_to_hpa(struct tce_container *container, if (!mem) return -EINVAL; - ret = mm_iommu_ua_to_hpa(mem, tce, phpa); + ret = mm_iommu_ua_to_hpa(mem, tce, shift, phpa); if (ret) return -EINVAL; -- 2.11.0

7 years, 1 month

2
1
0 0

Request: xen/PVH: Set up GS segment for stack canary

by Jason Andryuk

xen/PVH: Set up GS segment for stack canary commit 98014068328c5574de9a4a30b604111fd9d8f901 upstream A 32bit PVH Xen kernel with CONFIG_CC_STACKPROTECTOR_STRONG fails to boot. Xen detects a triple fault and kills the domain. The IP was xen_prepare_pvh+9 corresponding to: mov %gs:0x14,%eax The 32bit kernel hasn't setup %gs when calling into xen_prepare_pvh. Curiously, 64bit was not affected. The requested patch sets up the canary for PVH to boot successfully. This is applicable to and has been tested on 4.14. It is also applicable to 4.17. Thanks, Jason

7 years, 1 month

2
1
0 0

Applied "ASoC: zte: Fix incorrect PCM format bit usages" to the asoc tree

by Mark Brown

The patch ASoC: zte: Fix incorrect PCM format bit usages has been applied to the asoc tree at https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git All being well this means that it will be integrated into the linux-next tree (usually sometime in the next 24 hours) and sent to Linus during the next merge window (or sooner if it is a bug fix), however if problems are discovered then the patch may be dropped or reverted. You may get further e-mails resulting from automated or manual testing and review of the tree, please engage with people reporting problems and send followup patches addressing any issues that are reported if needed. If any updates are required or you are submitting further changes they should be sent as incremental updates against current git, existing patches will not be replaced. Please add any relevant lists and maintainers to the CCs when replying to this mail. Thanks, Mark >From c889a45d229938a94b50aadb819def8bb11a6a54 Mon Sep 17 00:00:00 2001 From: Takashi Iwai <tiwai(a)suse.de> Date: Wed, 25 Jul 2018 22:40:49 +0200 Subject: [PATCH] ASoC: zte: Fix incorrect PCM format bit usages zx-tdm driver sets the DAI driver definitions with the format bits wrongly set with SNDRV_PCM_FORMAT_*, instead of SNDRV_PCM_FMTBIT_*. This patch corrects the definitions. Spotted by a sparse warning: sound/soc/zte/zx-tdm.c:363:35: warning: restricted snd_pcm_format_t degrades to integer Fixes: 870e0ddc4345 ("ASoC: zx-tdm: add zte's tdm controller driver") Cc: <stable(a)vger.kernel.org> Signed-off-by: Takashi Iwai <tiwai(a)suse.de> Signed-off-by: Mark Brown <broonie(a)kernel.org> --- sound/soc/zte/zx-tdm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/zte/zx-tdm.c b/sound/soc/zte/zx-tdm.c index dc955272f58b..389272eeba9a 100644 --- a/sound/soc/zte/zx-tdm.c +++ b/sound/soc/zte/zx-tdm.c @@ -144,8 +144,8 @@ static void zx_tdm_rx_dma_en(struct zx_tdm_info *tdm, bool on) #define ZX_TDM_RATES (SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000) #define ZX_TDM_FMTBIT \ - (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FORMAT_MU_LAW | \ - SNDRV_PCM_FORMAT_A_LAW) + (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_MU_LAW | \ + SNDRV_PCM_FMTBIT_A_LAW) static int zx_tdm_dai_probe(struct snd_soc_dai *dai) { -- 2.18.0

7 years, 1 month

1
0
0 0

[PATCH 9/9] bcache: set max writeback rate when I/O request is idle

by Coly Li

Commit b1092c9af9ed ("bcache: allow quick writeback when backing idle") allows the writeback rate to be faster if there is no I/O request on a bcache device. It works well if there is only one bcache device attached to the cache set. If there are many bcache devices attached to a cache set, it may introduce performance regression because multiple faster writeback threads of the idle bcache devices will compete the btree level locks with the bcache device who have I/O requests coming. This patch fixes the above issue by only permitting fast writebac when all bcache devices attached on the cache set are idle. And if one of the bcache devices has new I/O request coming, minimized all writeback throughput immediately and let PI controller __update_writeback_rate() to decide the upcoming writeback rate for each bcache device. Also when all bcache devices are idle, limited wrieback rate to a small number is wast of thoughput, especially when backing devices are slower non-rotation devices (e.g. SATA SSD). This patch sets a max writeback rate for each backing device if the whole cache set is idle. A faster writeback rate in idle time means new I/Os may have more available space for dirty data, and people may observe a better write performance then. Please note bcache may change its cache mode in run time, and this patch still works if the cache mode is switched from writeback mode and there is still dirty data on cache. Fixes: Commit b1092c9af9ed ("bcache: allow quick writeback when backing idle") Cc: stable(a)vger.kernel.org #4.16+ Signed-off-by: Coly Li <colyli(a)suse.de> Tested-by: Kai Krakow <kai(a)kaishome.de> Tested-by: Stefan Priebe <s.priebe(a)profihost.ag> Cc: Michael Lyle <mlyle(a)lyle.org> --- drivers/md/bcache/bcache.h | 10 ++-- drivers/md/bcache/request.c | 54 ++++++++++++++++++++- drivers/md/bcache/super.c | 4 ++ drivers/md/bcache/sysfs.c | 15 ++++-- drivers/md/bcache/util.c | 2 +- drivers/md/bcache/util.h | 2 +- drivers/md/bcache/writeback.c | 91 +++++++++++++++++++++++------------ 7 files changed, 134 insertions(+), 44 deletions(-) diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 5f7082aab1b0..97489573dedc 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -328,13 +328,6 @@ struct cached_dev { */ atomic_t has_dirty; - /* - * Set to zero by things that touch the backing volume-- except - * writeback. Incremented by writeback. Used to determine when to - * accelerate idle writeback. - */ - atomic_t backing_idle; - struct bch_ratelimit writeback_rate; struct delayed_work writeback_rate_update; @@ -515,6 +508,8 @@ struct cache_set { struct cache_accounting accounting; unsigned long flags; + atomic_t idle_counter; + atomic_t at_max_writeback_rate; struct cache_sb sb; @@ -524,6 +519,7 @@ struct cache_set { struct bcache_device **devices; unsigned devices_max_used; + atomic_t attached_dev_nr; struct list_head cached_devs; uint64_t cached_dev_sectors; atomic_long_t flash_dev_dirty_sectors; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 91206f329971..86a977c2a176 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1105,6 +1105,44 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio) generic_make_request(bio); } +static void quit_max_writeback_rate(struct cache_set *c, + struct cached_dev *this_dc) +{ + int i; + struct bcache_device *d; + struct cached_dev *dc; + + /* + * mutex bch_register_lock may compete with other parallel requesters, + * or attach/detach operations on other backing device. Waiting to + * the mutex lock may increase I/O request latency for seconds or more. + * To avoid such situation, if mutext_trylock() failed, only writeback + * rate of current cached device is set to 1, and __update_write_back() + * will decide writeback rate of other cached devices (remember now + * c->idle_counter is 0 already). + */ + if (mutex_trylock(&bch_register_lock)) { + for (i = 0; i < c->devices_max_used; i++) { + if (!c->devices[i]) + continue; + + if (UUID_FLASH_ONLY(&c->uuids[i])) + continue; + + d = c->devices[i]; + dc = container_of(d, struct cached_dev, disk); + /* + * set writeback rate to default minimum value, + * then let update_writeback_rate() to decide the + * upcoming rate. + */ + atomic_long_set(&dc->writeback_rate.rate, 1); + } + mutex_unlock(&bch_register_lock); + } else + atomic_long_set(&this_dc->writeback_rate.rate, 1); +} + /* Cached devices - read & write stuff */ static blk_qc_t cached_dev_make_request(struct request_queue *q, @@ -1122,7 +1160,21 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q, return BLK_QC_T_NONE; } - atomic_set(&dc->backing_idle, 0); + if (likely(d->c)) { + if (atomic_read(&d->c->idle_counter)) + atomic_set(&d->c->idle_counter, 0); + /* + * If at_max_writeback_rate of cache set is true and new I/O + * comes, quit max writeback rate of all cached devices + * attached to this cache set, and set at_max_writeback_rate + * to false. + */ + if (unlikely(atomic_read(&d->c->at_max_writeback_rate) == 1)) { + atomic_set(&d->c->at_max_writeback_rate, 0); + quit_max_writeback_rate(d->c, dc); + } + } + generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0); bio_set_dev(bio, dc->bdev); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index f517d7d1fa10..32b95f3b9461 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -696,6 +696,8 @@ static void bcache_device_detach(struct bcache_device *d) { lockdep_assert_held(&bch_register_lock); + atomic_dec(&d->c->attached_dev_nr); + if (test_bit(BCACHE_DEV_DETACHING, &d->flags)) { struct uuid_entry *u = d->c->uuids + d->id; @@ -1144,6 +1146,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, bch_cached_dev_run(dc); bcache_device_link(&dc->disk, c, "bdev"); + atomic_inc(&c->attached_dev_nr); /* Allow the writeback thread to proceed */ up_write(&dc->writeback_lock); @@ -1696,6 +1699,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) c->block_bits = ilog2(sb->block_size); c->nr_uuids = bucket_bytes(c) / sizeof(struct uuid_entry); c->devices_max_used = 0; + atomic_set(&c->attached_dev_nr, 0); c->btree_pages = bucket_pages(c); if (c->btree_pages > BTREE_MAX_PAGES) c->btree_pages = max_t(int, c->btree_pages / 4, diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 3e9d3459a224..6e88142514fb 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -171,7 +171,8 @@ SHOW(__bch_cached_dev) var_printf(writeback_running, "%i"); var_print(writeback_delay); var_print(writeback_percent); - sysfs_hprint(writeback_rate, wb ? dc->writeback_rate.rate << 9 : 0); + sysfs_hprint(writeback_rate, + wb ? atomic_long_read(&dc->writeback_rate.rate) << 9 : 0); sysfs_hprint(io_errors, atomic_read(&dc->io_errors)); sysfs_printf(io_error_limit, "%i", dc->error_limit); sysfs_printf(io_disable, "%i", dc->io_disable); @@ -193,7 +194,9 @@ SHOW(__bch_cached_dev) * Except for dirty and target, other values should * be 0 if writeback is not running. */ - bch_hprint(rate, wb ? dc->writeback_rate.rate << 9 : 0); + bch_hprint(rate, + wb ? atomic_long_read(&dc->writeback_rate.rate) << 9 + : 0); bch_hprint(dirty, bcache_dev_sectors_dirty(&dc->disk) << 9); bch_hprint(target, dc->writeback_rate_target << 9); bch_hprint(proportional, @@ -261,8 +264,12 @@ STORE(__cached_dev) sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, 40); - sysfs_strtoul_clamp(writeback_rate, - dc->writeback_rate.rate, 1, INT_MAX); + if (attr == &sysfs_writeback_rate) { + int v; + + sysfs_strtoul_clamp(writeback_rate, v, 1, INT_MAX); + atomic_long_set(&dc->writeback_rate.rate, v); + } sysfs_strtoul_clamp(writeback_rate_update_seconds, dc->writeback_rate_update_seconds, diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index f912c372978c..c6a99dfa1ad9 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -200,7 +200,7 @@ uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done) { uint64_t now = local_clock(); - d->next += div_u64(done * NSEC_PER_SEC, d->rate); + d->next += div_u64(done * NSEC_PER_SEC, atomic_long_read(&d->rate)); /* Bound the time. Don't let us fall further than 2 seconds behind * (this prevents unnecessary backlog that would make it impossible diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index a1579e28049f..5ff055f0a653 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -443,7 +443,7 @@ struct bch_ratelimit { * Rate at which we want to do work, in units per second * The units here correspond to the units passed to bch_next_delay() */ - uint32_t rate; + atomic_long_t rate; }; static inline void bch_ratelimit_reset(struct bch_ratelimit *d) diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 912e969fedba..481d4cf38ac0 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -104,11 +104,56 @@ static void __update_writeback_rate(struct cached_dev *dc) dc->writeback_rate_proportional = proportional_scaled; dc->writeback_rate_integral_scaled = integral_scaled; - dc->writeback_rate_change = new_rate - dc->writeback_rate.rate; - dc->writeback_rate.rate = new_rate; + dc->writeback_rate_change = new_rate - + atomic_long_read(&dc->writeback_rate.rate); + atomic_long_set(&dc->writeback_rate.rate, new_rate); dc->writeback_rate_target = target; } +static bool set_at_max_writeback_rate(struct cache_set *c, + struct cached_dev *dc) +{ + /* + * Idle_counter is increased everytime when update_writeback_rate() is + * called. If all backing devices attached to the same cache set have + * identical dc->writeback_rate_update_seconds values, it is about 6 + * rounds of update_writeback_rate() on each backing device before + * c->at_max_writeback_rate is set to 1, and then max wrteback rate set + * to each dc->writeback_rate.rate. + * In order to avoid extra locking cost for counting exact dirty cached + * devices number, c->attached_dev_nr is used to calculate the idle + * throushold. It might be bigger if not all cached device are in write- + * back mode, but it still works well with limited extra rounds of + * update_writeback_rate(). + */ + if (atomic_inc_return(&c->idle_counter) < + atomic_read(&c->attached_dev_nr) * 6) + return false; + + if (atomic_read(&c->at_max_writeback_rate) != 1) + atomic_set(&c->at_max_writeback_rate, 1); + + atomic_long_set(&dc->writeback_rate.rate, INT_MAX); + + /* keep writeback_rate_target as existing value */ + dc->writeback_rate_proportional = 0; + dc->writeback_rate_integral_scaled = 0; + dc->writeback_rate_change = 0; + + /* + * Check c->idle_counter and c->at_max_writeback_rate agagain in case + * new I/O arrives during before set_at_max_writeback_rate() returns. + * Then the writeback rate is set to 1, and its new value should be + * decided via __update_writeback_rate(). + */ + if ((atomic_read(&c->idle_counter) < + atomic_read(&c->attached_dev_nr) * 6) || + !atomic_read(&c->at_max_writeback_rate)) + return false; + + return true; +} + static void update_writeback_rate(struct work_struct *work) { struct cached_dev *dc = container_of(to_delayed_work(work), @@ -136,13 +181,20 @@ static void update_writeback_rate(struct work_struct *work) return; } - down_read(&dc->writeback_lock); - - if (atomic_read(&dc->has_dirty) && - dc->writeback_percent) - __update_writeback_rate(dc); + if (atomic_read(&dc->has_dirty) && dc->writeback_percent) { + /* + * If the whole cache set is idle, set_at_max_writeback_rate() + * will set writeback rate to a max number. Then it is + * unncessary to update writeback rate for an idle cache set + * in maximum writeback rate number(s). + */ + if (!set_at_max_writeback_rate(c, dc)) { + down_read(&dc->writeback_lock); + __update_writeback_rate(dc); + up_read(&dc->writeback_lock); + } + } - up_read(&dc->writeback_lock); /* * CACHE_SET_IO_DISABLE might be set via sysfs interface, @@ -422,27 +474,6 @@ static void read_dirty(struct cached_dev *dc) delay = writeback_delay(dc, size); - /* If the control system would wait for at least half a - * second, and there's been no reqs hitting the backing disk - * for awhile: use an alternate mode where we have at most - * one contiguous set of writebacks in flight at a time. If - * someone wants to do IO it will be quick, as it will only - * have to contend with one operation in flight, and we'll - * be round-tripping data to the backing disk as quickly as - * it can accept it. - */ - if (delay >= HZ / 2) { - /* 3 means at least 1.5 seconds, up to 7.5 if we - * have slowed way down. - */ - if (atomic_inc_return(&dc->backing_idle) >= 3) { - /* Wait for current I/Os to finish */ - closure_sync(&cl); - /* And immediately launch a new set. */ - delay = 0; - } - } - while (!kthread_should_stop() && !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) && delay) { @@ -741,7 +772,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc) dc->writeback_running = true; dc->writeback_percent = 10; dc->writeback_delay = 30; - dc->writeback_rate.rate = 1024; + atomic_long_set(&dc->writeback_rate.rate, 1024); dc->writeback_rate_minimum = 8; dc->writeback_rate_update_seconds = WRITEBACK_RATE_UPDATE_SECS_DEFAULT; -- 2.17.1

7 years, 1 month

1
0
0 0

[PATCH] kthread, tracing: Don't expose half-written comm when creating kthreads

by Snild Dolkow

There is a window for racing when printing directly to task->comm, allowing other threads to see a non-terminated string. The vsnprintf function fills the buffer, counts the truncated chars, then finally writes the \0 at the end. creator other vsnprintf: fill (not terminated) count the rest trace_sched_waking(p): ... memcpy(comm, p->comm, TASK_COMM_LEN) write \0 The consequences depend on how 'other' uses the string. In our case, it was copied into the tracing system's saved cmdlines, a buffer of adjacent TASK_COMM_LEN-byte buffers (note the 'n' where 0 should be): crash-arm64> x/1024s savedcmd->saved_cmdlines | grep 'evenk' 0xffffffd5b3818640: "irq/497-pwr_evenkworker/u16:12" ...and a strcpy out of there would cause stack corruption: [224761.522292] Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: ffffff9bf9783c78 crash-arm64> kbt | grep 'comm\|trace_print_context' #6 0xffffff9bf9783c78 in trace_print_context+0x18c(+396) comm (char [16]) = "irq/497-pwr_even" crash-arm64> rd 0xffffffd4d0e17d14 8 ffffffd4d0e17d14: 2f71726900000000 5f7277702d373934 ....irq/497-pwr_ ffffffd4d0e17d24: 726f776b6e657665 3a3631752f72656b evenkworker/u16: ffffffd4d0e17d34: f9780248ff003231 cede60e0ffffff9b 12..H.x......`.. ffffffd4d0e17d44: cede60c8ffffffd4 00000fffffffffd4 .....`.......... The workaround in e09e28671 (use strlcpy in __trace_find_cmdline) was likely needed because of this same bug. Solved by vsnprintf:ing to a local buffer, then using set_task_comm(). This way, there won't be a window where comm is not terminated. Cc: stable(a)vger.kernel.org Fixes: bc0c38d139ec7 ("ftrace: latency tracer infrastructure") Reviewed-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org> Signed-off-by: Snild Dolkow <snild(a)sony.com> --- kernel/kthread.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/kthread.c b/kernel/kthread.c index 481951bf091d..1a481ae12dec 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -319,8 +319,14 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data), task = create->result; if (!IS_ERR(task)) { static const struct sched_param param = { .sched_priority = 0 }; + char name[TASK_COMM_LEN]; - vsnprintf(task->comm, sizeof(task->comm), namefmt, args); + /* + * task is already visible to other tasks, so updating + * COMM must be protected. + */ + vsnprintf(name, sizeof(name), namefmt, args); + set_task_comm(task, name); /* * root may have changed our (kthreadd's) priority or CPU mask. * The kernel thread should not inherit these properties. -- 2.15.1

7 years, 1 month

2
1
0 0

[PATCH 4.17.y] Revert "iommu/intel-iommu: Enable CONFIG_DMA_DIRECT_OPS=y and clean up intel_{alloc, free}_coherent()"

by Jason A. Donenfeld

From: Christoph Hellwig <hch(a)lst.de> commit 7ec916f82c48dcfc115eee2e3e0e6d400e310fc5 upstream. This commit may cause a less than required dma mask to be used for some allocations, which apparently leads to module load failures for iwlwifi sometimes. This reverts commit d657c5c73ca987214a6f9436e435b34fc60f332a. Signed-off-by: Christoph Hellwig <hch(a)lst.de> Reported-by: Fabio Coatti <fabio.coatti(a)gmail.com> Tested-by: Fabio Coatti <fabio.coatti(a)gmail.com> --- Backporting this and submitting this to stable@, because without it, ordinary WiFi is broken on a fairly vanilla Thinkpad P50, on all 4.17 kernels. drivers/iommu/Kconfig | 1 - drivers/iommu/intel-iommu.c | 62 +++++++++++++++++++++++++++---------- 2 files changed, 46 insertions(+), 17 deletions(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index b38798cc5288..f3a21343e636 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -142,7 +142,6 @@ config DMAR_TABLE config INTEL_IOMMU bool "Support for Intel IOMMU using DMA Remapping Devices" depends on PCI_MSI && ACPI && (X86 || IA64_GENERIC) - select DMA_DIRECT_OPS select IOMMU_API select IOMMU_IOVA select DMAR_TABLE diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 749d8f235346..6392a4964fc5 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -31,7 +31,6 @@ #include <linux/pci.h> #include <linux/dmar.h> #include <linux/dma-mapping.h> -#include <linux/dma-direct.h> #include <linux/mempool.h> #include <linux/memory.h> #include <linux/cpu.h> @@ -3709,30 +3708,61 @@ static void *intel_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) { - void *vaddr; + struct page *page = NULL; + int order; - vaddr = dma_direct_alloc(dev, size, dma_handle, flags, attrs); - if (iommu_no_mapping(dev) || !vaddr) - return vaddr; + size = PAGE_ALIGN(size); + order = get_order(size); - *dma_handle = __intel_map_single(dev, virt_to_phys(vaddr), - PAGE_ALIGN(size), DMA_BIDIRECTIONAL, - dev->coherent_dma_mask); - if (!*dma_handle) - goto out_free_pages; - return vaddr; + if (!iommu_no_mapping(dev)) + flags &= ~(GFP_DMA | GFP_DMA32); + else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) { + if (dev->coherent_dma_mask < DMA_BIT_MASK(32)) + flags |= GFP_DMA; + else + flags |= GFP_DMA32; + } + + if (gfpflags_allow_blocking(flags)) { + unsigned int count = size >> PAGE_SHIFT; + + page = dma_alloc_from_contiguous(dev, count, order, flags); + if (page && iommu_no_mapping(dev) && + page_to_phys(page) + size > dev->coherent_dma_mask) { + dma_release_from_contiguous(dev, page, count); + page = NULL; + } + } + + if (!page) + page = alloc_pages(flags, order); + if (!page) + return NULL; + memset(page_address(page), 0, size); + + *dma_handle = __intel_map_single(dev, page_to_phys(page), size, + DMA_BIDIRECTIONAL, + dev->coherent_dma_mask); + if (*dma_handle) + return page_address(page); + if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) + __free_pages(page, order); -out_free_pages: - dma_direct_free(dev, size, vaddr, *dma_handle, attrs); return NULL; } static void intel_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { - if (!iommu_no_mapping(dev)) - intel_unmap(dev, dma_handle, PAGE_ALIGN(size)); - dma_direct_free(dev, size, vaddr, dma_handle, attrs); + int order; + struct page *page = virt_to_page(vaddr); + + size = PAGE_ALIGN(size); + order = get_order(size); + + intel_unmap(dev, dma_handle, size); + if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) + __free_pages(page, order); } static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist, -- 2.18.0

7 years, 1 month

2
1
0 0

[PATCH v3] bcache: set max writeback rate when I/O request is idle

by Coly Li

Commit b1092c9af9ed ("bcache: allow quick writeback when backing idle") allows the writeback rate to be faster if there is no I/O request on a bcache device. It works well if there is only one bcache device attached to the cache set. If there are many bcache devices attached to a cache set, it may introduce performance regression because multiple faster writeback threads of the idle bcache devices will compete the btree level locks with the bcache device who have I/O requests coming. This patch fixes the above issue by only permitting fast writebac when all bcache devices attached on the cache set are idle. And if one of the bcache devices has new I/O request coming, minimized all writeback throughput immediately and let PI controller __update_writeback_rate() to decide the upcoming writeback rate for each bcache device. Also when all bcache devices are idle, limited wrieback rate to a small number is wast of thoughput, especially when backing devices are slower non-rotation devices (e.g. SATA SSD). This patch sets a max writeback rate for each backing device if the whole cache set is idle. A faster writeback rate in idle time means new I/Os may have more available space for dirty data, and people may observe a better write performance then. Please note bcache may change its cache mode in run time, and this patch still works if the cache mode is switched from writeback mode and there is still dirty data on cache. Fixes: Commit b1092c9af9ed ("bcache: allow quick writeback when backing idle") Cc: stable(a)vger.kernel.org #4.16+ Signed-off-by: Coly Li <colyli(a)suse.de> Tested-by: Kai Krakow <kai(a)kaishome.de> Cc: Michael Lyle <mlyle(a)lyle.org> Cc: Stefan Priebe <s.priebe(a)profihost.ag> --- Channgelog: v3, Do not acquire bch_register_lock in set_at_max_writeback_rate(). v2, Fix a deadlock reported by Stefan Priebe. v1, Initial version. drivers/md/bcache/bcache.h | 10 ++-- drivers/md/bcache/request.c | 54 ++++++++++++++++++++- drivers/md/bcache/super.c | 4 ++ drivers/md/bcache/sysfs.c | 14 ++++-- drivers/md/bcache/util.c | 2 +- drivers/md/bcache/util.h | 2 +- drivers/md/bcache/writeback.c | 91 +++++++++++++++++++++++------------ 7 files changed, 133 insertions(+), 44 deletions(-) diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 872ef4d67711..13f908be42ba 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -328,13 +328,6 @@ struct cached_dev { */ atomic_t has_dirty; - /* - * Set to zero by things that touch the backing volume-- except - * writeback. Incremented by writeback. Used to determine when to - * accelerate idle writeback. - */ - atomic_t backing_idle; - struct bch_ratelimit writeback_rate; struct delayed_work writeback_rate_update; @@ -515,6 +508,8 @@ struct cache_set { struct cache_accounting accounting; unsigned long flags; + atomic_t idle_counter; + atomic_t at_max_writeback_rate; struct cache_sb sb; @@ -524,6 +519,7 @@ struct cache_set { struct bcache_device **devices; unsigned devices_max_used; + atomic_t attached_dev_nr; struct list_head cached_devs; uint64_t cached_dev_sectors; atomic_long_t flash_dev_dirty_sectors; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 8eece9ef9f46..26f97acde403 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1105,6 +1105,44 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio) generic_make_request(bio); } +static void quit_max_writeback_rate(struct cache_set *c, + struct cached_dev *this_dc) +{ + int i; + struct bcache_device *d; + struct cached_dev *dc; + + /* + * mutex bch_register_lock may compete with other parallel requesters, + * or attach/detach operations on other backing device. Waiting to + * the mutex lock may increase I/O request latency for seconds or more. + * To avoid such situation, if mutext_trylock() failed, only writeback + * rate of current cached device is set to 1, and __update_write_back() + * will decide writeback rate of other cached devices (remember now + * c->idle_counter is 0 already). + */ + if (mutex_trylock(&bch_register_lock)) { + for (i = 0; i < c->devices_max_used; i++) { + if (!c->devices[i]) + continue; + + if (UUID_FLASH_ONLY(&c->uuids[i])) + continue; + + d = c->devices[i]; + dc = container_of(d, struct cached_dev, disk); + /* + * set writeback rate to default minimum value, + * then let update_writeback_rate() to decide the + * upcoming rate. + */ + atomic_long_set(&dc->writeback_rate.rate, 1); + } + mutex_unlock(&bch_register_lock); + } else + atomic_long_set(&this_dc->writeback_rate.rate, 1); +} + /* Cached devices - read & write stuff */ static blk_qc_t cached_dev_make_request(struct request_queue *q, @@ -1122,7 +1160,21 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q, return BLK_QC_T_NONE; } - atomic_set(&dc->backing_idle, 0); + if (likely(d->c)) { + if (atomic_read(&d->c->idle_counter)) + atomic_set(&d->c->idle_counter, 0); + /* + * If at_max_writeback_rate of cache set is true and new I/O + * comes, quit max writeback rate of all cached devices + * attached to this cache set, and set at_max_writeback_rate + * to false. + */ + if (unlikely(atomic_read(&d->c->at_max_writeback_rate) == 1)) { + atomic_set(&d->c->at_max_writeback_rate, 0); + quit_max_writeback_rate(d->c, dc); + } + } + generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0); bio_set_dev(bio, dc->bdev); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index e0a92104ca23..8db6696e2bff 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -696,6 +696,8 @@ static void bcache_device_detach(struct bcache_device *d) { lockdep_assert_held(&bch_register_lock); + atomic_dec(&d->c->attached_dev_nr); + if (test_bit(BCACHE_DEV_DETACHING, &d->flags)) { struct uuid_entry *u = d->c->uuids + d->id; @@ -1144,6 +1146,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, bch_cached_dev_run(dc); bcache_device_link(&dc->disk, c, "bdev"); + atomic_inc(&c->attached_dev_nr); /* Allow the writeback thread to proceed */ up_write(&dc->writeback_lock); @@ -1695,6 +1698,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) c->block_bits = ilog2(sb->block_size); c->nr_uuids = bucket_bytes(c) / sizeof(struct uuid_entry); c->devices_max_used = 0; + atomic_set(&c->attached_dev_nr, 0); c->btree_pages = bucket_pages(c); if (c->btree_pages > BTREE_MAX_PAGES) c->btree_pages = max_t(int, c->btree_pages / 4, diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 225b15aa0340..a56067e80b10 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -170,7 +170,8 @@ SHOW(__bch_cached_dev) var_printf(writeback_running, "%i"); var_print(writeback_delay); var_print(writeback_percent); - sysfs_hprint(writeback_rate, dc->writeback_rate.rate << 9); + sysfs_hprint(writeback_rate, + atomic_long_read(&dc->writeback_rate.rate) << 9); sysfs_hprint(io_errors, atomic_read(&dc->io_errors)); sysfs_printf(io_error_limit, "%i", dc->error_limit); sysfs_printf(io_disable, "%i", dc->io_disable); @@ -188,7 +189,8 @@ SHOW(__bch_cached_dev) char change[20]; s64 next_io; - bch_hprint(rate, dc->writeback_rate.rate << 9); + bch_hprint(rate, + atomic_long_read(&dc->writeback_rate.rate) << 9); bch_hprint(dirty, bcache_dev_sectors_dirty(&dc->disk) << 9); bch_hprint(target, dc->writeback_rate_target << 9); bch_hprint(proportional,dc->writeback_rate_proportional << 9); @@ -255,8 +257,12 @@ STORE(__cached_dev) sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, 40); - sysfs_strtoul_clamp(writeback_rate, - dc->writeback_rate.rate, 1, INT_MAX); + if (attr == &sysfs_writeback_rate) { + int v; + + sysfs_strtoul_clamp(writeback_rate, v, 1, INT_MAX); + atomic_long_set(&dc->writeback_rate.rate, v); + } sysfs_strtoul_clamp(writeback_rate_update_seconds, dc->writeback_rate_update_seconds, diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index f912c372978c..c6a99dfa1ad9 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -200,7 +200,7 @@ uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done) { uint64_t now = local_clock(); - d->next += div_u64(done * NSEC_PER_SEC, d->rate); + d->next += div_u64(done * NSEC_PER_SEC, atomic_long_read(&d->rate)); /* Bound the time. Don't let us fall further than 2 seconds behind * (this prevents unnecessary backlog that would make it impossible diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index a1579e28049f..5ff055f0a653 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -443,7 +443,7 @@ struct bch_ratelimit { * Rate at which we want to do work, in units per second * The units here correspond to the units passed to bch_next_delay() */ - uint32_t rate; + atomic_long_t rate; }; static inline void bch_ratelimit_reset(struct bch_ratelimit *d) diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 912e969fedba..907fa6c0d192 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -104,11 +104,56 @@ static void __update_writeback_rate(struct cached_dev *dc) dc->writeback_rate_proportional = proportional_scaled; dc->writeback_rate_integral_scaled = integral_scaled; - dc->writeback_rate_change = new_rate - dc->writeback_rate.rate; - dc->writeback_rate.rate = new_rate; + dc->writeback_rate_change = new_rate - + atomic_long_read(&dc->writeback_rate.rate); + atomic_long_set(&dc->writeback_rate.rate, new_rate); dc->writeback_rate_target = target; } +static bool set_at_max_writeback_rate(struct cache_set *c, + struct cached_dev *dc) +{ + /* + * Idle_counter is increased everytime when update_writeback_rate() is + * called. If all backing devices attached to the same cache set have + * identical dc->writeback_rate_update_seconds values, it is about 6 + * rounds of update_writeback_rate() on each backing device before + * c->at_max_writeback_rate is set to 1, and then max wrteback rate set + * to each dc->writeback_rate.rate. + * In order to avoid extra locking cost for counting exact dirty cached + * devices number, c->attached_dev_nr is used to calculate the idle + * throushold. It might be bigger if not all cached device are in write- + * back mode, but it still works well with limited extra rounds of + * update_writeback_rate(). + */ + if (atomic_inc_return(&c->idle_counter) < + atomic_read(&c->attached_dev_nr) * 6) + return false; + + if (atomic_read(&c->at_max_writeback_rate) != 1) + atomic_set(&c->at_max_writeback_rate, 1); + + atomic_long_set(&dc->writeback_rate.rate, INT_MAX); + + /* keep writeback_rate_target as existing value */ + dc->writeback_rate_proportional = 0; + dc->writeback_rate_integral_scaled = 0; + dc->writeback_rate_change = 0; + + /* + * Check c->idle_counter and c->at_max_writeback_rate agagain in case + * new I/O arrives during before set_at_max_writeback_rate() returns. + * Then the writeback rate is set to 1, and its new value should be + * decided via __update_writeback_rate(). + */ + if ((atomic_read(&c->idle_counter) < + atomic_read(&c->attached_dev_nr) * 6) || + !atomic_read(&c->at_max_writeback_rate)) + return false; + + return true; +} + static void update_writeback_rate(struct work_struct *work) { struct cached_dev *dc = container_of(to_delayed_work(work), @@ -136,13 +181,20 @@ static void update_writeback_rate(struct work_struct *work) return; } - down_read(&dc->writeback_lock); - - if (atomic_read(&dc->has_dirty) && - dc->writeback_percent) - __update_writeback_rate(dc); + if (atomic_read(&dc->has_dirty) && dc->writeback_percent) { + /* + * If the whole cache set is idle, set_at_max_writeback_rate() + * will set writeback rate to a max number. Then it is + * unncessary to update writeback rate for an idle cache set + * in maximum writeback rate number(s). + */ + if (!set_at_max_writeback_rate(c, dc)) { + down_read(&dc->writeback_lock); + __update_writeback_rate(dc); + up_read(&dc->writeback_lock); + } + } - up_read(&dc->writeback_lock); /* * CACHE_SET_IO_DISABLE might be set via sysfs interface, @@ -422,27 +474,6 @@ static void read_dirty(struct cached_dev *dc) delay = writeback_delay(dc, size); - /* If the control system would wait for at least half a - * second, and there's been no reqs hitting the backing disk - * for awhile: use an alternate mode where we have at most - * one contiguous set of writebacks in flight at a time. If - * someone wants to do IO it will be quick, as it will only - * have to contend with one operation in flight, and we'll - * be round-tripping data to the backing disk as quickly as - * it can accept it. - */ - if (delay >= HZ / 2) { - /* 3 means at least 1.5 seconds, up to 7.5 if we - * have slowed way down. - */ - if (atomic_inc_return(&dc->backing_idle) >= 3) { - /* Wait for current I/Os to finish */ - closure_sync(&cl); - /* And immediately launch a new set. */ - delay = 0; - } - } - while (!kthread_should_stop() && !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) && delay) { @@ -741,7 +772,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc) dc->writeback_running = true; dc->writeback_percent = 10; dc->writeback_delay = 30; - dc->writeback_rate.rate = 1024; + atomic_long_set(&dc->writeback_rate.rate, 1024); dc->writeback_rate_minimum = 8; dc->writeback_rate_update_seconds = WRITEBACK_RATE_UPDATE_SECS_DEFAULT; -- 2.17.1

7 years, 1 month

2
2
0 0

[PATCH 1/3] KVM: x86: ensures all MSRs can always be KVM_GET/SET_MSR'd

by Paolo Bonzini

Some of the MSRs returned by GET_MSR_INDEX_LIST currently cannot be sent back to KVM_GET_MSR and/or KVM_SET_MSR; either they can never be sent back, or you they are only accepted under special conditions. This makes the API a pain to use. To avoid this pain, this patch makes it so that the result of the get-list ioctl can always be used for host-initiated get and set. Since we don't have a separate way to check for read-only MSRs, this means some Hyper-V MSRs are ignored when written. Arguably they should not even be in the result of GET_MSR_INDEX_LIST, but I am leaving there in case userspace is using the outcome of GET_MSR_INDEX_LIST to derive the support for the corresponding Hyper-V feature. Cc: stable(a)vger.kernel.org Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> --- arch/x86/kvm/hyperv.c | 27 ++++++++++++++++++++------- arch/x86/kvm/hyperv.h | 2 +- arch/x86/kvm/x86.c | 15 +++++++++------ 3 files changed, 30 insertions(+), 14 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index af8caf965baa..01d209ab5481 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -235,7 +235,7 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, struct kvm_vcpu *vcpu = synic_to_vcpu(synic); int ret; - if (!synic->active) + if (!synic->active && !host) return 1; trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host); @@ -295,11 +295,12 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, return ret; } -static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata) +static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata, + bool host) { int ret; - if (!synic->active) + if (!synic->active && !host) return 1; ret = 0; @@ -1014,6 +1015,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, case HV_X64_MSR_TSC_EMULATION_STATUS: hv->hv_tsc_emulation_status = data; break; + case HV_X64_MSR_TIME_REF_COUNT: + /* read-only, but still ignore it if host-initiated */ + if (!host) + return 1; + break; default: vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", msr, data); @@ -1101,6 +1107,12 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) return stimer_set_count(vcpu_to_stimer(vcpu, timer_index), data, host); } + case HV_X64_MSR_TSC_FREQUENCY: + case HV_X64_MSR_APIC_FREQUENCY: + /* read-only, but still ignore it if host-initiated */ + if (!host) + return 1; + break; default: vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", msr, data); @@ -1156,7 +1168,8 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) return 0; } -static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) +static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, + bool host) { u64 data = 0; struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; @@ -1183,7 +1196,7 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case HV_X64_MSR_SIMP: case HV_X64_MSR_EOM: case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: - return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata); + return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata, host); case HV_X64_MSR_STIMER0_CONFIG: case HV_X64_MSR_STIMER1_CONFIG: case HV_X64_MSR_STIMER2_CONFIG: @@ -1229,7 +1242,7 @@ int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) return kvm_hv_set_msr(vcpu, msr, data, host); } -int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) +int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) { if (kvm_hv_msr_partition_wide(msr)) { int r; @@ -1239,7 +1252,7 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock); return r; } else - return kvm_hv_get_msr(vcpu, msr, pdata); + return kvm_hv_get_msr(vcpu, msr, pdata, host); } static __always_inline int get_sparse_bank_no(u64 valid_bank_mask, int bank_no) diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index 837465d69c6d..d6aa969e20f1 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -48,7 +48,7 @@ static inline struct kvm_vcpu *synic_to_vcpu(struct kvm_vcpu_hv_synic *synic) } int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host); -int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); +int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host); bool kvm_hv_hypercall_enabled(struct kvm *kvm); int kvm_hv_hypercall(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 153564db7980..f2876053e28b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2166,10 +2166,11 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vcpu->arch.mcg_status = data; break; case MSR_IA32_MCG_CTL: - if (!(mcg_cap & MCG_CTL_P)) + if (!(mcg_cap & MCG_CTL_P) && + (data || !msr_info->host_initiated)) return 1; if (data != 0 && data != ~(u64)0) - return -1; + return 1; vcpu->arch.mcg_ctl = data; break; default: @@ -2557,7 +2558,7 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) } EXPORT_SYMBOL_GPL(kvm_get_msr); -static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) +static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) { u64 data; u64 mcg_cap = vcpu->arch.mcg_cap; @@ -2572,7 +2573,7 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) data = vcpu->arch.mcg_cap; break; case MSR_IA32_MCG_CTL: - if (!(mcg_cap & MCG_CTL_P)) + if (!(mcg_cap & MCG_CTL_P) && !host) return 1; data = vcpu->arch.mcg_ctl; break; @@ -2705,7 +2706,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_MCG_CTL: case MSR_IA32_MCG_STATUS: case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1: - return get_msr_mce(vcpu, msr_info->index, &msr_info->data); + return get_msr_mce(vcpu, msr_info->index, &msr_info->data, + msr_info->host_initiated); case MSR_K7_CLK_CTL: /* * Provide expected ramp-up count for K7. All other @@ -2726,7 +2728,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case HV_X64_MSR_TSC_EMULATION_CONTROL: case HV_X64_MSR_TSC_EMULATION_STATUS: return kvm_hv_get_msr_common(vcpu, - msr_info->index, &msr_info->data); + msr_info->index, &msr_info->data, + msr_info->host_initiated); break; case MSR_IA32_BBL_CR_CTL3: /* This legacy MSR exists but isn't fully documented in current -- 2.17.1

7 years, 1 month

1
0
0 0

Re: [PATCH 4.14 12/53] cifs: Fix slab-out-of-bounds in send_set_info() on SMB2 ACE setting

by Philip Müller

Hi Greg, hi Stefano, seems adding "cifs: Fix slab-out-of-bounds in send_set_info() on SMB2 ACE setting" (commit 748144f) [1] created a regression within linux v4.14 kernel series. Writing to a mounted cifs either freezes on writing or crashes the PC. A more detailed explanation you may find in our forums [2]. Reverting the patch, seems to "fix" it. Thoughts? Best, Philip ---------------------- Manjaro Project Lead --- [1] https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/fs/… [2] https://forum.manjaro.org/t/53250 --- FSTAB entries: //192.168.0.100/TRANSFER /mnt/TRANSFER cifs noperm,x-systemd.automount,iocharset=utf8,file_mode=0775,dir_mode=0775,user=xxx,pass=yyy,_netdev,noacl 0 0 //192.168.0.100/MEDIA /mnt/MEDIA cifs noperm,x-systemd.automount,iocharset=utf8,file_mode=0775,dir_mode=0775,user=xxx,pass=yyy,_netdev,noacl 0 0 Message log: [ 19.785788] No dialect specified on mount. Default has changed to a more secure dialect, SMB2.1 or later (e.g. SMB3), from CIFS (SMB1). To use the less secure SMB1 dialect to access old servers which do not support SMB3 (or SMB2.1) specify vers=1.0 on mount. [ 20.652361] CIFS VFS: ioctl error in smb2_get_dfs_refer rc=-2 [ 20.814693] No dialect specified on mount. Default has changed to a more secure dialect, SMB2.1 or later (e.g. SMB3), from CIFS (SMB1). To use the less secure SMB1 dialect to access old servers which do not support SMB3 (or SMB2.1) specify vers=1.0 on mount. [ 20.992157] CIFS VFS: ioctl error in smb2_get_dfs_refer rc=-2 [ 212.648892] cache_from_obj: Wrong slab cache. cifs_request but object is from xfrm_dst_cache [ 212.648951] ------------[ cut here ]------------ [ 212.648978] WARNING: CPU: 1 PID: 1379 at mm/slab.h:377 kmem_cache_free+0x14d/0x200 [ 212.648985] Modules linked in: md4 nls_utf8 cifs ccm dns_resolver fscache cmac rfcomm fuse snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic snd_soc_skl snd_soc_skl_ipc snd_soc_sst_ipc snd_soc_sst_dsp snd_hda_ext_core snd_soc_sst_match snd_soc_core bnep snd_compress snd_pcm_dmaengine ac97_bus vmnet(O) intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp arc4 kvm_intel i915 iTCO_wdt iTCO_vendor_support kvm iwlmvm ext4 mac80211 crc32c_generic mbcache jbd2 fscrypto irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc wmi_bmof i2c_algo_bit snd_hda_intel drm_kms_helper iwlwifi uvcvideo snd_hda_codec aesni_intel snd_hda_core videobuf2_vmalloc aes_x86_64 videobuf2_memops crypto_simd glue_helper btusb cryptd btrtl videobuf2_v4l2 btbcm intel_cstate videobuf2_core snd_hwdep intel_rapl_perf [ 212.649203] btintel drm e1000e cfg80211 bluetooth snd_pcm videodev psmouse media snd_timer pcspkr ptp pps_core thinkpad_acpi i2c_i801 evdev joydev mousedev input_leds mac_hid rtsx_pci_ms ecdh_generic crc16 memstick intel_gtt nvram agpgart snd shpchp soundcore mei_me syscopyarea rfkill sysfillrect sysimgblt mei fb_sys_fops intel_pch_thermal thermal led_class wmi battery ac video acpi_pad button sch_fq_codel vmmon(O) vmw_vmci uinput crypto_user ip_tables x_tables btrfs xor zstd_decompress zstd_compress xxhash hid_logitech_hidpp raid6_pq hid_logitech_dj usbhid hid sd_mod rtsx_pci_sdmmc mmc_core serio_raw atkbd libps2 ahci libahci xhci_pci libata xhci_hcd rtsx_pci usbcore scsi_mod usb_common i8042 serio crc32c_intel [ 212.649453] CPU: 1 PID: 1379 Comm: pool Tainted: G O 4.14.57-1-MANJARO #1 [ 212.649457] Hardware name: LENOVO 20J4000LGE/20J4000LGE, BIOS R0GET60W (1.60 ) 12/15/2017 [ 212.649465] task: ffff88a7197f8f00 task.stack: ffffb1dac2184000 [ 212.649481] RIP: 0010:kmem_cache_free+0x14d/0x200 [ 212.649488] RSP: 0018:ffffb1dac2187c90 EFLAGS: 00010246 [ 212.649497] RAX: 0000000000000050 RBX: ffff88a75ba90000 RCX: 0000000000000000 [ 212.649503] RDX: 0000000000000000 RSI: ffff88a77f4965d8 RDI: ffff88a77f4965d8 [ 212.649509] RBP: ffff88a73962f380 R08: ffffffff8d474920 R09: 000000000000035c [ 212.649515] R10: 0000000000000004 R11: ffffffff8e56a36d R12: ffff88a75812c000 [ 212.649521] R13: ffff88a77489b600 R14: ffffb1dac2187d78 R15: 0000000000000000 [ 212.649531] FS: 00007f253ccc3700(0000) GS:ffff88a77f480000(0000) knlGS:0000000000000000 [ 212.649538] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 212.649545] CR2: 00007f386b887810 CR3: 0000000211452003 CR4: 00000000003606e0 [ 212.649549] Call Trace: [ 212.649657] cifs_buf_release.part.6+0x11/0x20 [cifs] [ 212.649763] send_set_info+0x1ac/0x210 [cifs] [ 212.649878] SMB2_rmdir+0x5d/0x80 [cifs] [ 212.649977] smb2_open_op_close+0x1bd/0x220 [cifs] [ 212.649992] ? __kmalloc+0x19e/0x220 [ 212.650080] ? build_path_from_dentry_optional_prefix+0x1c1/0x400 [cifs] [ 212.650176] smb2_rmdir+0x25/0x30 [cifs] [ 212.650271] cifs_rmdir+0xb8/0x290 [cifs] [ 212.650287] vfs_rmdir+0xd1/0x140 [ 212.650300] do_rmdir+0x17d/0x1e0 [ 212.650318] do_syscall_64+0x67/0x100 [ 212.650332] entry_SYSCALL_64_after_hwframe+0x3d/0xa2 [ 212.650342] RIP: 0033:0x7f2558e5f647 [ 212.650348] RSP: 002b:00007f253ccc2b38 EFLAGS: 00000246 ORIG_RAX: 0000000000000054 [ 212.650359] RAX: ffffffffffffffda RBX: 00007f253801f500 RCX: 00007f2558e5f647 [ 212.650364] RDX: 00007f253ccc2b90 RSI: 0000563e90e93d40 RDI: 00007f253801f500 [ 212.650369] RBP: 0000563e90ab08c0 R08: 0000563e908c3468 R09: 0000563e908c3470 [ 212.650375] R10: 0000563e908df8c8 R11: 0000000000000246 R12: 00007f253ccc2b90 [ 212.650380] R13: 00007f253ccc2c20 R14: 00007f253ccc2b90 R15: 0000563e8ead215b [ 212.650389] Code: fe ff ff 48 3b a8 d8 00 00 00 0f 84 83 00 00 00 48 8b 48 60 48 8b 55 60 48 c7 c6 20 44 c3 8d 48 c7 c7 00 48 e1 8d e8 8e 44 ed ff <0f> 0b e9 ca fe ff ff 65 8b 05 6d 51 e0 72 89 c0 48 0f a3 05 8b [ 212.650572] —[ end trace 05a8377b2d80ea1c ]— [ 212.680246] cache_from_obj: Wrong slab cache. cifs_request but object is from xfrm_dst_cache [ 212.725303] cache_from_obj: Wrong slab cache. cifs_request but object is from xfrm_dst_cache [ 212.740595] general protection fault: 0000 [#1] PREEMPT SMP PTI [ 212.740602] Modules linked in: md4 nls_utf8 cifs ccm dns_resolver fscache cmac rfcomm fuse snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic snd_soc_skl snd_soc_skl_ipc snd_soc_sst_ipc snd_soc_sst_dsp snd_hda_ext_core snd_soc_sst_match snd_soc_core bnep snd_compress snd_pcm_dmaengine ac97_bus vmnet(O) intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp arc4 kvm_intel i915 iTCO_wdt iTCO_vendor_support kvm iwlmvm ext4 mac80211 crc32c_generic mbcache jbd2 fscrypto irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc wmi_bmof i2c_algo_bit snd_hda_intel drm_kms_helper iwlwifi uvcvideo snd_hda_codec aesni_intel snd_hda_core videobuf2_vmalloc aes_x86_64 videobuf2_memops crypto_simd glue_helper btusb cryptd btrtl videobuf2_v4l2 btbcm intel_cstate videobuf2_core snd_hwdep intel_rapl_perf [ 212.740687] btintel drm e1000e cfg80211 bluetooth snd_pcm videodev psmouse media snd_timer pcspkr ptp pps_core thinkpad_acpi i2c_i801 evdev joydev mousedev input_leds mac_hid rtsx_pci_ms ecdh_generic crc16 memstick intel_gtt nvram agpgart snd shpchp soundcore mei_me syscopyarea rfkill sysfillrect sysimgblt mei fb_sys_fops intel_pch_thermal thermal led_class wmi battery ac video acpi_pad button sch_fq_codel vmmon(O) vmw_vmci uinput crypto_user ip_tables x_tables btrfs xor zstd_decompress zstd_compress xxhash hid_logitech_hidpp raid6_pq hid_logitech_dj usbhid hid sd_mod rtsx_pci_sdmmc mmc_core serio_raw atkbd libps2 ahci libahci xhci_pci libata xhci_hcd rtsx_pci usbcore scsi_mod usb_common i8042 serio crc32c_intel [ 212.740793] CPU: 1 PID: 1162 Comm: cifsd Tainted: G W O 4.14.57-1-MANJARO #1 [ 212.740797] Hardware name: LENOVO 20J4000LGE/20J4000LGE, BIOS R0GET60W (1.60 ) 12/15/2017 [ 212.740802] task: ffff88a772a99e00 task.stack: ffffb1dac1ec8000 [ 212.740810] RIP: 0010:prefetch_freepointer+0x11/0x20 [ 212.740815] RSP: 0018:ffffb1dac1ecbde0 EFLAGS: 00010202 [ 212.740820] RAX: 0000000000000000 RBX: 0c24ecb2149c4fdf RCX: 0000000000012681 [ 212.740824] RDX: 0000000000012601 RSI: 0c24ecb2149c4fdf RDI: ffff88a775401c80 [ 212.740828] RBP: 0000000001011200 R08: ffff88a775e78f00 R09: 0000000000000000 [ 212.740832] R10: 0000000000000000 R11: 000000002f32988b R12: ffff88a75ba90000 [ 212.740836] R13: ffff88a775401c80 R14: ffff88a775401c80 R15: ffffffff8d19a8b5 [ 212.740841] FS: 0000000000000000(0000) GS:ffff88a77f480000(0000) knlGS:0000000000000000 [ 212.740845] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 212.740849] CR2: 00007f386b887810 CR3: 000000013200a006 CR4: 00000000003606e0 [ 212.740852] Call Trace: [ 212.740861] kmem_cache_alloc+0x94/0x1a0 [ 212.740870] ? wait_woken+0x80/0x80 [ 212.740878] mempool_alloc+0x65/0x190 [ 212.740886] ? try_to_wake_up+0x54/0x4b0 [ 212.740925] cifs_small_buf_get+0x16/0x20 [cifs] [ 212.740957] cifs_demultiplex_thread+0x619/0xb10 [cifs] [ 212.740989] ? cifs_handle_standard+0x190/0x190 [cifs] [ 212.740996] kthread+0x119/0x130 [ 212.741003] ? kthread_create_on_node+0x60/0x60 [ 212.741011] ret_from_fork+0x35/0x40 [ 212.741016] Code: 89 d3 e8 63 f9 47 00 85 c0 0f 85 b1 70 00 00 48 83 c4 08 5b 5d 41 5c 41 5d c3 0f 1f 44 00 00 48 85 f6 74 14 48 63 47 20 48 01 c6 <48> 33 36 48 33 b7 40 01 00 00 0f 18 0e c3 90 0f 1f 44 00 00 55 [ 212.741096] RIP: prefetch_freepointer+0x11/0x20 RSP: ffffb1dac1ecbde0 [ 212.741101] —[ end trace 05a8377b2d80ea1d ]—

7 years, 1 month

3
2
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror