March 2019 - Linux-stable-mirror

[PATCHES] Networking

by David Miller

Please queue up the following networking bug fixes for v4.19 and v5.0 -stable, respectively. Thanks!

6 years, 3 months

2
4
0 0

[patch 22/22] fs/proc/proc_sysctl.c: fix NULL pointer dereference in put_links

by akpm＠linux-foundation.org

From: YueHaibing <yuehaibing(a)huawei.com> Subject: fs/proc/proc_sysctl.c: fix NULL pointer dereference in put_links Syzkaller reports: kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN PTI CPU: 1 PID: 5373 Comm: syz-executor.0 Not tainted 5.0.0-rc8+ #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 RIP: 0010:put_links+0x101/0x440 fs/proc/proc_sysctl.c:1599 Code: 00 0f 85 3a 03 00 00 48 8b 43 38 48 89 44 24 20 48 83 c0 38 48 89 c2 48 89 44 24 28 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 fe 02 00 00 48 8b 74 24 20 48 c7 c7 60 2a 9d 91 RSP: 0018:ffff8881d828f238 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: ffff8881e01b1140 RCX: ffffffff8ee98267 RDX: 0000000000000007 RSI: ffffc90001479000 RDI: ffff8881e01b1178 RBP: dffffc0000000000 R08: ffffed103ee27259 R09: ffffed103ee27259 R10: 0000000000000001 R11: ffffed103ee27258 R12: fffffffffffffff4 R13: 0000000000000006 R14: ffff8881f59838c0 R15: dffffc0000000000 FS: 00007f072254f700(0000) GS:ffff8881f7100000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fff8b286668 CR3: 00000001f0542002 CR4: 00000000007606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: drop_sysctl_table+0x152/0x9f0 fs/proc/proc_sysctl.c:1629 get_subdir fs/proc/proc_sysctl.c:1022 [inline] __register_sysctl_table+0xd65/0x1090 fs/proc/proc_sysctl.c:1335 ? 0xffffffffc1a18000 br_netfilter_init+0xbc/0x1000 [br_netfilter] ? 0xffffffffc1a18000 ? 0xffffffffc1a18000 do_one_initcall+0xfa/0x5ca init/main.c:887 do_init_module+0x204/0x5f6 kernel/module.c:3460 load_module+0x66b2/0x8570 kernel/module.c:3808 __do_sys_finit_module+0x238/0x2a0 kernel/module.c:3902 do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x462e99 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f072254ec58 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99 RDX: 0000000000000000 RSI: 0000000020000280 RDI: 0000000000000003 RBP: 00007f072254ec70 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f072254f6bc R13: 00000000004bcefa R14: 00000000006f6fb0 R15: 0000000000000004 Modules linked in: br_netfilter(+) dvb_usb_dibusb_mc_common dib3000mc dibx000_common dvb_usb_dibusb_common dvb_usb_dw2102 dvb_usb classmate_laptop palmas_regulator cn videobuf2_v4l2 v4l2_common snd_soc_bd28623 mptbase snd_usb_usx2y snd_usbmidi_lib snd_rawmidi wmi libnvdimm lockd sunrpc grace rc_kworld_pc150u rc_core rtc_da9063 sha1_ssse3 i2c_cros_ec_tunnel adxl34x_spi adxl34x nfnetlink lib80211 i5500_temp dvb_as102 dvb_core videobuf2_common videodev media videobuf2_vmalloc videobuf2_memops udc_core lnbp22 leds_lp3952 hid_roccat_ryos s1d13xxxfb mtd vport_geneve openvswitch nf_conncount nf_nat_ipv6 nsh geneve udp_tunnel ip6_udp_tunnel snd_soc_mt6351 sis_agp phylink snd_soc_adau1761_spi snd_soc_adau1761 snd_soc_adau17x1 snd_soc_core snd_pcm_dmaengine ac97_bus snd_compress snd_soc_adau_utils snd_soc_sigmadsp_regmap snd_soc_sigmadsp raid_class hid_roccat_konepure hid_roccat_common hid_roccat c2port_duramar2150 core mdio_bcm_unimac iptable_security iptable_raw iptable_mangle iptable_nat nf_nat_ipv4 nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_filter bpfilter ip6_vti ip_vti ip_gre ipip sit tunnel4 ip_tunnel hsr veth netdevsim devlink vxcan batman_adv cfg80211 rfkill chnl_net caif nlmon dummy team bonding vcan bridge stp llc ip6_gre gre ip6_tunnel tunnel6 tun crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel joydev mousedev ide_pci_generic piix aesni_intel aes_x86_64 ide_core crypto_simd atkbd cryptd glue_helper serio_raw ata_generic pata_acpi i2c_piix4 floppy sch_fq_codel ip_tables x_tables ipv6 [last unloaded: lm73] Dumping ftrace buffer: (ftrace buffer empty) ---[ end trace 770020de38961fd0 ]--- A new dir entry can be created in get_subdir and its 'header->parent' is set to NULL. Only after insert_header success, it will be set to 'dir', otherwise 'header->parent' is set to NULL and drop_sysctl_table is called. However in err handling path of get_subdir, drop_sysctl_table also be called on 'new->header' regardless its value of parent pointer. Then put_links is called, which triggers NULL-ptr deref when access member of header->parent. In fact we have multiple error paths which call drop_sysctl_table() there, upon failure on insert_links() we also call drop_sysctl_table().And even in the successful case on __register_sysctl_table() we still always call drop_sysctl_table().This patch fix it. Link: http://lkml.kernel.org/r/20190314085527.13244-1-yuehaibing@huawei.com Fixes: 0e47c99d7fe25 ("sysctl: Replace root_list with links between sysctl_table_sets") Signed-off-by: YueHaibing <yuehaibing(a)huawei.com> Reported-by: Hulk Robot <hulkci(a)huawei.com> Acked-by: Luis Chamberlain <mcgrof(a)kernel.org> Cc: Kees Cook <keescook(a)chromium.org> Cc: Alexey Dobriyan <adobriyan(a)gmail.com> Cc: Alexei Starovoitov <ast(a)kernel.org> Cc: Daniel Borkmann <daniel(a)iogearbox.net> Cc: Al Viro <viro(a)zeniv.linux.org.uk> Cc: Eric W. Biederman <ebiederm(a)xmission.com> Cc: <stable(a)vger.kernel.org> [3.4+] Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- fs/proc/proc_sysctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) --- a/fs/proc/proc_sysctl.c~proc-sysctl-fix-null-pointer-dereference-in-put_links +++ a/fs/proc/proc_sysctl.c @@ -1626,7 +1626,8 @@ static void drop_sysctl_table(struct ctl if (--header->nreg) return; - put_links(header); + if (parent) + put_links(header); start_unregistering(header); if (!--header->count) kfree_rcu(header, rcu); _

6 years, 3 months

1
0
0 0

[patch 19/22] mm/migrate.c: add missing flush_dcache_page for non-mapped page migrate

by akpm＠linux-foundation.org

From: Lars Persson <lars.persson(a)axis.com> Subject: mm/migrate.c: add missing flush_dcache_page for non-mapped page migrate Our MIPS 1004Kc SoCs were seeing random userspace crashes with SIGILL and SIGSEGV that could not be traced back to a userspace code bug. They had all the magic signs of an I/D cache coherency issue. Now recently we noticed that the /proc/sys/vm/compact_memory interface was quite efficient at provoking this class of userspace crashes. Studying the code in mm/migrate.c there is a distinction made between migrating a page that is mapped at the instant of migration and one that is not mapped. Our problem turned out to be the non-mapped pages. For the non-mapped page the code performs a copy of the page content and all relevant meta-data of the page without doing the required D-cache maintenance. This leaves dirty data in the D-cache of the CPU and on the 1004K cores this data is not visible to the I-cache. A subsequent page-fault that triggers a mapping of the page will happily serve the process with potentially stale code. What about ARM then, this bug should have seen greater exposure? Well ARM became immune to this flaw back in 2010, see commit c01778001a4f ("ARM: 6379/1: Assume new page cache pages have dirty D-cache"). My proposed fix moves the D-cache maintenance inside move_to_new_page to make it common for both cases. Link: http://lkml.kernel.org/r/20190315083502.11849-1-larper@axis.com Fixes: 97ee0524614 ("flush cache before installing new page at migraton") Signed-off-by: Lars Persson <larper(a)axis.com> Reviewed-by: Paul Burton <paul.burton(a)mips.com> Acked-by: Mel Gorman <mgorman(a)techsingularity.net> Cc: Ralf Baechle <ralf(a)linux-mips.org> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/migrate.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) --- a/mm/migrate.c~mm-migrate-add-missing-flush_dcache_page-for-non-mapped-page-migrate +++ a/mm/migrate.c @@ -248,10 +248,8 @@ static bool remove_migration_pte(struct pte = swp_entry_to_pte(entry); } else if (is_device_public_page(new)) { pte = pte_mkdevmap(pte); - flush_dcache_page(new); } - } else - flush_dcache_page(new); + } #ifdef CONFIG_HUGETLB_PAGE if (PageHuge(new)) { @@ -995,6 +993,13 @@ static int move_to_new_page(struct page */ if (!PageMappingFlags(page)) page->mapping = NULL; + + if (unlikely(is_zone_device_page(newpage))) { + if (is_device_public_page(newpage)) + flush_dcache_page(newpage); + } else + flush_dcache_page(newpage); + } out: return rc; _

6 years, 3 months

1
0
0 0

[patch 18/22] drivers/block/zram/zram_drv.c: fix idle/writeback string compare

by akpm＠linux-foundation.org

From: Minchan Kim <minchan(a)kernel.org> Subject: drivers/block/zram/zram_drv.c: fix idle/writeback string compare Makoto report a below KASAN error: zram does out-of-bounds read. Because strscpy copies from source up to count bytes unconditionally. It could cause out-of-bounds read on next object in slab To prevent it, use strlcpy which checks source's length automatically. [ 280.626730] c0 1314 ================================================================== [ 280.626855] c0 1314 BUG: KASAN: slab-out-of-bounds in strscpy+0x68/0x154 [ 280.626896] c0 1314 Read of size 8 at addr ffffffc0c3495a00 by task system_server/1314 [ 280.626921] c0 1314 .. [ 280.627041] c0 1314 Call trace: [ 280.627097] c0 1314 [<ffffff90080902b8>] dump_backtrace+0x0/0x6bc [ 280.627142] c0 1314 [<ffffff90080902ac>] show_stack+0x20/0x2c [ 280.627193] c0 1314 [<ffffff900871fa90>] dump_stack+0xfc/0x140 [ 280.627250] c0 1314 [<ffffff90083364ec>] print_address_description+0x80/0x2d8 [ 280.627294] c0 1314 [<ffffff9008336b48>] kasan_report_error+0x198/0x1fc [ 280.627335] c0 1314 [<ffffff90083369b0>] kasan_report_error+0x0/0x1fc [ 280.627376] c0 1314 [<ffffff9008335c1c>] __asan_load8+0x1b0/0x1b8 [ 280.627415] c0 1314 [<ffffff900872fb6c>] strscpy+0x68/0x154 [ 280.627465] c0 1314 [<ffffff9008ceca44>] idle_store+0xc4/0x34c [ 280.627511] c0 1314 [<ffffff9008c91a98>] dev_attr_store+0x50/0x6c [ 280.627558] c0 1314 [<ffffff900845602c>] sysfs_kf_write+0x98/0xb4 [ 280.627596] c0 1314 [<ffffff9008453d20>] kernfs_fop_write+0x198/0x260 [ 280.627642] c0 1314 [<ffffff90083578b4>] __vfs_write+0x10c/0x338 [ 280.627684] c0 1314 [<ffffff9008357dac>] vfs_write+0x114/0x238 [ 280.627726] c0 1314 [<ffffff9008358100>] SyS_write+0xc8/0x168 [ 280.627767] c0 1314 [<ffffff900808425c>] __sys_trace_return+0x0/0x4 [ 280.627791] c0 1314 [ 280.627824] c0 1314 Allocated by task 1314: [ 280.627866] c0 1314 kasan_kmalloc+0xe0/0x1ac [ 280.627903] c0 1314 __kmalloc+0x280/0x318 [ 280.627938] c0 1314 kernfs_fop_write+0xac/0x260 [ 280.627980] c0 1314 __vfs_write+0x10c/0x338 [ 280.628020] c0 1314 vfs_write+0x114/0x238 [ 280.628061] c0 1314 SyS_write+0xc8/0x168 [ 280.628098] c0 1314 __sys_trace_return+0x0/0x4 [ 280.628125] c0 1314 [ 280.628154] c0 1314 Freed by task 2855: [ 280.628194] c0 1314 kasan_slab_free+0xb8/0x194 [ 280.628229] c0 1314 kfree+0x138/0x630 [ 280.628266] c0 1314 kernfs_put_open_node+0x10c/0x124 [ 280.628302] c0 1314 kernfs_fop_release+0xd8/0x114 [ 280.628336] c0 1314 __fput+0x130/0x2a4 [ 280.628370] c0 1314 ____fput+0x1c/0x28 [ 280.628410] c0 1314 task_work_run+0x16c/0x1c8 [ 280.628449] c0 1314 do_notify_resume+0x2bc/0x107c [ 280.628483] c0 1314 work_pending+0x8/0x10 [ 280.628506] c0 1314 [ 280.628542] c0 1314 The buggy address belongs to the object at ffffffc0c3495a00 [ 280.628542] c0 1314 which belongs to the cache kmalloc-128 of size 128 [ 280.628597] c0 1314 The buggy address is located 0 bytes inside of [ 280.628597] c0 1314 128-byte region [ffffffc0c3495a00, ffffffc0c3495a80) [ 280.628642] c0 1314 The buggy address belongs to the page: [ 280.628680] c0 1314 page:ffffffbf030d2500 count:1 mapcount:0 mapping: (null) index:0x0 compound_mapcount: 0 [ 280.628721] c0 1314 flags: 0x4000000000010200(slab|head) [ 280.628748] c0 1314 page dumped because: kasan: bad access detected [ 280.628772] c0 1314 [ 280.628797] c0 1314 Memory state around the buggy address: [ 280.628840] c0 1314 ffffffc0c3495900: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 280.628874] c0 1314 ffffffc0c3495980: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 280.628909] c0 1314 >ffffffc0c3495a00: 04 fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 280.628935] c0 1314 ^ [ 280.628969] c0 1314 ffffffc0c3495a80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 280.629005] c0 1314 ffffffc0c3495b00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 Link: http://lkml.kernel.org/r/20190319231911.145968-1-minchan@kernel.org Cc: <stable(a)vger.kernel.org> [5.0] Signed-off-by: Minchan Kim <minchan(a)kernel.org> Reported-by: Makoto Wu <makotowu(a)google.com> Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky(a)gmail.com> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- drivers/block/zram/zram_drv.c | 32 ++++++-------------------------- 1 file changed, 6 insertions(+), 26 deletions(-) --- a/drivers/block/zram/zram_drv.c~zram-fix-idle-writeback-string-compare +++ a/drivers/block/zram/zram_drv.c @@ -290,18 +290,8 @@ static ssize_t idle_store(struct device struct zram *zram = dev_to_zram(dev); unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; int index; - char mode_buf[8]; - ssize_t sz; - sz = strscpy(mode_buf, buf, sizeof(mode_buf)); - if (sz <= 0) - return -EINVAL; - - /* ignore trailing new line */ - if (mode_buf[sz - 1] == '\n') - mode_buf[sz - 1] = 0x00; - - if (strcmp(mode_buf, "all")) + if (!sysfs_streq(buf, "all")) return -EINVAL; down_read(&zram->init_lock); @@ -635,25 +625,15 @@ static ssize_t writeback_store(struct de struct bio bio; struct bio_vec bio_vec; struct page *page; - ssize_t ret, sz; - char mode_buf[8]; - int mode = -1; + ssize_t ret; + int mode; unsigned long blk_idx = 0; - sz = strscpy(mode_buf, buf, sizeof(mode_buf)); - if (sz <= 0) - return -EINVAL; - - /* ignore trailing newline */ - if (mode_buf[sz - 1] == '\n') - mode_buf[sz - 1] = 0x00; - - if (!strcmp(mode_buf, "idle")) + if (sysfs_streq(buf, "idle")) mode = IDLE_WRITEBACK; - else if (!strcmp(mode_buf, "huge")) + else if (sysfs_streq(buf, "huge")) mode = HUGE_WRITEBACK; - - if (mode == -1) + else return -EINVAL; down_read(&zram->init_lock); _

6 years, 3 months

1
0
0 0

[patch 17/22] mm/page_isolation.c: fix a wrong flag in set_migratetype_isolate()

by akpm＠linux-foundation.org

From: Qian Cai <cai(a)lca.pw> Subject: mm/page_isolation.c: fix a wrong flag in set_migratetype_isolate() Due to has_unmovable_pages() taking an incorrect irqsave flag instead of the isolation flag in set_migratetype_isolate(), there are issues with HWPOSION and error reporting where dump_page() is not called when there is an unmovable page. Link: http://lkml.kernel.org/r/20190320204941.53731-1-cai@lca.pw Fixes: d381c54760dc ("mm: only report isolation failures when offlining memory") Acked-by: Michal Hocko <mhocko(a)suse.com> Reviewed-by: Oscar Salvador <osalvador(a)suse.de> Signed-off-by: Qian Cai <cai(a)lca.pw> Cc: <stable(a)vger.kernel.org> [5.0.x] Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/page_isolation.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) --- a/mm/page_isolation.c~mm-fix-a-wrong-flag-in-set_migratetype_isolate +++ a/mm/page_isolation.c @@ -59,7 +59,8 @@ static int set_migratetype_isolate(struc * FIXME: Now, memory hotplug doesn't call shrink_slab() by itself. * We just check MOVABLE pages. */ - if (!has_unmovable_pages(zone, page, arg.pages_found, migratetype, flags)) + if (!has_unmovable_pages(zone, page, arg.pages_found, migratetype, + isol_flags)) ret = 0; /* _

6 years, 3 months

1
0
0 0

[patch 16/22] mm/memory_hotplug.c: fix notification in offline error path

by akpm＠linux-foundation.org

From: Qian Cai <cai(a)lca.pw> Subject: mm/memory_hotplug.c: fix notification in offline error path When start_isolate_page_range() returned -EBUSY in __offline_pages(), it calls memory_notify(MEM_CANCEL_OFFLINE, &arg) with an uninitialized "arg". As the result, it triggers warnings below. Also, it is only necessary to notify MEM_CANCEL_OFFLINE after MEM_GOING_OFFLINE. page:ffffea0001200000 count:1 mapcount:0 mapping:0000000000000000 index:0x0 flags: 0x3fffe000001000(reserved) raw: 003fffe000001000 ffffea0001200008 ffffea0001200008 0000000000000000 raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000 page dumped because: unmovable page WARNING: CPU: 25 PID: 1665 at mm/kasan/common.c:665 kasan_mem_notifier+0x34/0x23b CPU: 25 PID: 1665 Comm: bash Tainted: G W 5.0.0+ #94 Hardware name: HP ProLiant DL180 Gen9/ProLiant DL180 Gen9, BIOS U20 10/25/2017 RIP: 0010:kasan_mem_notifier+0x34/0x23b RSP: 0018:ffff8883ec737890 EFLAGS: 00010206 RAX: 0000000000000246 RBX: ff10f0f4435f1000 RCX: f887a7a21af88000 RDX: dffffc0000000000 RSI: 0000000000000020 RDI: ffff8881f221af88 RBP: ffff8883ec737898 R08: ffff888000000000 R09: ffffffffb0bddcd0 R10: ffffed103e857088 R11: ffff8881f42b8443 R12: dffffc0000000000 R13: 00000000fffffff9 R14: dffffc0000000000 R15: 0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000560fbd31d730 CR3: 00000004049c6003 CR4: 00000000001606a0 Call Trace: notifier_call_chain+0xbf/0x130 __blocking_notifier_call_chain+0x76/0xc0 blocking_notifier_call_chain+0x16/0x20 memory_notify+0x1b/0x20 __offline_pages+0x3e2/0x1210 offline_pages+0x11/0x20 memory_block_action+0x144/0x300 memory_subsys_offline+0xe5/0x170 device_offline+0x13f/0x1e0 state_store+0xeb/0x110 dev_attr_store+0x3f/0x70 sysfs_kf_write+0x104/0x150 kernfs_fop_write+0x25c/0x410 __vfs_write+0x66/0x120 vfs_write+0x15a/0x4f0 ksys_write+0xd2/0x1b0 __x64_sys_write+0x73/0xb0 do_syscall_64+0xeb/0xb78 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f14f75cc3b8 RSP: 002b:00007ffe84d01d68 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000008 RCX: 00007f14f75cc3b8 RDX: 0000000000000008 RSI: 0000563f8e433d70 RDI: 0000000000000001 RBP: 0000563f8e433d70 R08: 000000000000000a R09: 00007ffe84d018f0 R10: 000000000000000a R11: 0000000000000246 R12: 00007f14f789e780 R13: 0000000000000008 R14: 00007f14f7899740 R15: 0000000000000008 Link: http://lkml.kernel.org/r/20190320204255.53571-1-cai@lca.pw Fixes: 7960509329c2 ("mm, memory_hotplug: print reason for the offlining failure") Reviewed-by: Oscar Salvador <osalvador(a)suse.de> Acked-by: Michal Hocko <mhocko(a)suse.com> Signed-off-by: Qian Cai <cai(a)lca.pw> Reviewed-by: Andrew Morton <akpm(a)linux-foundation.org> Cc: <stable(a)vger.kernel.org> [5.0.x] Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/memory_hotplug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/mm/memory_hotplug.c~mm-hotplug-fix-notification-in-offline-error-path +++ a/mm/memory_hotplug.c @@ -1699,12 +1699,12 @@ static int __ref __offline_pages(unsigne failed_removal_isolated: undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); + memory_notify(MEM_CANCEL_OFFLINE, &arg); failed_removal: pr_debug("memory offlining [mem %#010llx-%#010llx] failed due to %s\n", (unsigned long long) start_pfn << PAGE_SHIFT, ((unsigned long long) end_pfn << PAGE_SHIFT) - 1, reason); - memory_notify(MEM_CANCEL_OFFLINE, &arg); /* pushback to free area */ mem_hotplug_done(); return ret; _

6 years, 3 months

1
0
0 0

[patch 12/22] mm/debug.c: fix __dump_page when mapping->host is not set

by akpm＠linux-foundation.org

From: Oscar Salvador <osalvador(a)suse.de> Subject: mm/debug.c: fix __dump_page when mapping->host is not set While debugging something, I added a dump_page() into do_swap_page(), and I got the splat from below. The issue happens when dereferencing mapping->host in __dump_page(): ... else if (mapping) { pr_warn("%ps ", mapping->a_ops); if (mapping->host->i_dentry.first) { struct dentry *dentry; dentry = container_of(mapping->host->i_dentry.first, struct dentry, d_u.d_alias); pr_warn("name:\"%pd\" ", dentry); } } ... Swap address space does not contain an inode information, and so mapping->host equals NULL. Although the dump_page() call was added artificially into do_swap_page(), I am not sure if we can hit this from any other path, so it looks worth fixing it. We can easily do that by checking mapping->host first. Link: http://lkml.kernel.org/r/20190318072931.29094-1-osalvador@suse.de Fixes: 1c6fb1d89e73c ("mm: print more information about mapping in __dump_page") Signed-off-by: Oscar Salvador <osalvador(a)suse.de> Acked-by: Michal Hocko <mhocko(a)suse.com> Acked-by: Hugh Dickins <hughd(a)google.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- a/mm/debug.c~mm-fix-__dump_page-when-mapping-host-is-not-set +++ a/mm/debug.c @@ -79,7 +79,7 @@ void __dump_page(struct page *page, cons pr_warn("ksm "); else if (mapping) { pr_warn("%ps ", mapping->a_ops); - if (mapping->host->i_dentry.first) { + if (mapping->host && mapping->host->i_dentry.first) { struct dentry *dentry; dentry = container_of(mapping->host->i_dentry.first, struct dentry, d_u.d_alias); pr_warn("name:\"%pd\" ", dentry); _

6 years, 3 months

1
0
0 0

[patch 11/22] mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified

by akpm＠linux-foundation.org

From: Yang Shi <yang.shi(a)linux.alibaba.com> Subject: mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified When MPOL_MF_STRICT was specified and an existing page was already on a node that does not follow the policy, mbind() should return -EIO. But 6f4576e3687b ("mempolicy: apply page table walker on queue_pages_range()") broke the rule. And c8633798497c ("mm: mempolicy: mbind and migrate_pages support thp migration") didn't return the correct value for THP mbind() too. If MPOL_MF_STRICT is set, ignore vma_migratable() to make sure it reaches queue_pages_to_pte_range() or queue_pages_pmd() to check if an existing page was already on a node that does not follow the policy. And, non-migratable vma may be used, return -EIO too if MPOL_MF_MOVE or MPOL_MF_MOVE_ALL was specified. Tested with https://github.com/metan-ucw/ltp/blob/master/testcases/kernel/syscalls/mbin… [akpm(a)linux-foundation.org: tweak code comment] Link: http://lkml.kernel.org/r/1553020556-38583-1-git-send-email-yang.shi@linux.a… Fixes: 6f4576e3687b ("mempolicy: apply page table walker on queue_pages_range()") Signed-off-by: Yang Shi <yang.shi(a)linux.alibaba.com> Signed-off-by: Oscar Salvador <osalvador(a)suse.de> Reported-by: Cyril Hrubis <chrubis(a)suse.cz> Suggested-by: Kirill A. Shutemov <kirill(a)shutemov.name> Acked-by: Rafael Aquini <aquini(a)redhat.com> Reviewed-by: Oscar Salvador <osalvador(a)suse.de> Acked-by: David Rientjes <rientjes(a)google.com> Cc: Vlastimil Babka <vbabka(a)suse.cz> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/mempolicy.c | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) --- a/mm/mempolicy.c~mm-mempolicy-make-mbind-return-eio-when-mpol_mf_strict-is-specified +++ a/mm/mempolicy.c @@ -428,6 +428,13 @@ static inline bool queue_pages_required( return node_isset(nid, *qp->nmask) == !(flags & MPOL_MF_INVERT); } +/* + * queue_pages_pmd() has three possible return values: + * 1 - pages are placed on the right node or queued successfully. + * 0 - THP was split. + * -EIO - is migration entry or MPOL_MF_STRICT was specified and an existing + * page was already on a node that does not follow the policy. + */ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, unsigned long end, struct mm_walk *walk) { @@ -437,7 +444,7 @@ static int queue_pages_pmd(pmd_t *pmd, s unsigned long flags; if (unlikely(is_pmd_migration_entry(*pmd))) { - ret = 1; + ret = -EIO; goto unlock; } page = pmd_page(*pmd); @@ -454,8 +461,15 @@ static int queue_pages_pmd(pmd_t *pmd, s ret = 1; flags = qp->flags; /* go to thp migration */ - if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) + if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { + if (!vma_migratable(walk->vma)) { + ret = -EIO; + goto unlock; + } + migrate_page_add(page, qp->pagelist, flags); + } else + ret = -EIO; unlock: spin_unlock(ptl); out: @@ -480,8 +494,10 @@ static int queue_pages_pte_range(pmd_t * ptl = pmd_trans_huge_lock(pmd, vma); if (ptl) { ret = queue_pages_pmd(pmd, ptl, addr, end, walk); - if (ret) + if (ret > 0) return 0; + else if (ret < 0) + return ret; } if (pmd_trans_unstable(pmd)) @@ -502,11 +518,16 @@ static int queue_pages_pte_range(pmd_t * continue; if (!queue_pages_required(page, qp)) continue; - migrate_page_add(page, qp->pagelist, flags); + if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { + if (!vma_migratable(vma)) + break; + migrate_page_add(page, qp->pagelist, flags); + } else + break; } pte_unmap_unlock(pte - 1, ptl); cond_resched(); - return 0; + return addr != end ? -EIO : 0; } static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask, @@ -576,7 +597,12 @@ static int queue_pages_test_walk(unsigne unsigned long endvma = vma->vm_end; unsigned long flags = qp->flags; - if (!vma_migratable(vma)) + /* + * Need check MPOL_MF_STRICT to return -EIO if possible + * regardless of vma_migratable + */ + if (!vma_migratable(vma) && + !(flags & MPOL_MF_STRICT)) return 1; if (endvma > end) @@ -603,7 +629,7 @@ static int queue_pages_test_walk(unsigne } /* queue pages from current vma */ - if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) + if (flags & MPOL_MF_VALID) return 0; return 1; } _

6 years, 3 months

1
0
0 0

[patch 09/22] iommu/io-pgtable-arm-v7s: request DMA32 memory, and improve debugging

by akpm＠linux-foundation.org

From: Nicolas Boichat <drinkcat(a)chromium.org> Subject: iommu/io-pgtable-arm-v7s: request DMA32 memory, and improve debugging IOMMUs using ARMv7 short-descriptor format require page tables (level 1 and 2) to be allocated within the first 4GB of RAM, even on 64-bit systems. For level 1/2 pages, ensure GFP_DMA32 is used if CONFIG_ZONE_DMA32 is defined (e.g. on arm64 platforms). For level 2 pages, allocate a slab cache in SLAB_CACHE_DMA32. Note that we do not explicitly pass GFP_DMA[32] to kmem_cache_zalloc, as this is not strictly necessary, and would cause a warning in mm/sl*b.c, as we did not update GFP_SLAB_BUG_MASK. Also, print an error when the physical address does not fit in 32-bit, to make debugging easier in the future. Link: http://lkml.kernel.org/r/20181210011504.122604-3-drinkcat@chromium.org Fixes: ad67f5a6545f ("arm64: replace ZONE_DMA with ZONE_DMA32") Signed-off-by: Nicolas Boichat <drinkcat(a)chromium.org> Acked-by: Will Deacon <will.deacon(a)arm.com> Cc: Christoph Hellwig <hch(a)infradead.org> Cc: Christoph Lameter <cl(a)linux.com> Cc: David Rientjes <rientjes(a)google.com> Cc: Hsin-Yi Wang <hsinyi(a)chromium.org> Cc: Huaisheng Ye <yehs1(a)lenovo.com> Cc: Joerg Roedel <joro(a)8bytes.org> Cc: Joonsoo Kim <iamjoonsoo.kim(a)lge.com> Cc: Matthew Wilcox <willy(a)infradead.org> Cc: Matthias Brugger <matthias.bgg(a)gmail.com> Cc: Mel Gorman <mgorman(a)techsingularity.net> Cc: Michal Hocko <mhocko(a)suse.com> Cc: Mike Rapoport <rppt(a)linux.vnet.ibm.com> Cc: Pekka Enberg <penberg(a)kernel.org> Cc: Robin Murphy <robin.murphy(a)arm.com> Cc: Sasha Levin <Alexander.Levin(a)microsoft.com> Cc: Tomasz Figa <tfiga(a)google.com> Cc: Vlastimil Babka <vbabka(a)suse.cz> Cc: Yingjoe Chen <yingjoe.chen(a)mediatek.com> Cc: Yong Wu <yong.wu(a)mediatek.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- drivers/iommu/io-pgtable-arm-v7s.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) --- a/drivers/iommu/io-pgtable-arm-v7s.c~iommu-io-pgtable-arm-v7s-request-dma32-memory-and-improve-debugging +++ a/drivers/iommu/io-pgtable-arm-v7s.c @@ -160,6 +160,14 @@ #define ARM_V7S_TCR_PD1 BIT(5) +#ifdef CONFIG_ZONE_DMA32 +#define ARM_V7S_TABLE_GFP_DMA GFP_DMA32 +#define ARM_V7S_TABLE_SLAB_FLAGS SLAB_CACHE_DMA32 +#else +#define ARM_V7S_TABLE_GFP_DMA GFP_DMA +#define ARM_V7S_TABLE_SLAB_FLAGS SLAB_CACHE_DMA +#endif + typedef u32 arm_v7s_iopte; static bool selftest_running; @@ -197,13 +205,16 @@ static void *__arm_v7s_alloc_table(int l void *table = NULL; if (lvl == 1) - table = (void *)__get_dma_pages(__GFP_ZERO, get_order(size)); + table = (void *)__get_free_pages( + __GFP_ZERO | ARM_V7S_TABLE_GFP_DMA, get_order(size)); else if (lvl == 2) - table = kmem_cache_zalloc(data->l2_tables, gfp | GFP_DMA); + table = kmem_cache_zalloc(data->l2_tables, gfp); phys = virt_to_phys(table); - if (phys != (arm_v7s_iopte)phys) + if (phys != (arm_v7s_iopte)phys) { /* Doesn't fit in PTE */ + dev_err(dev, "Page table does not fit in PTE: %pa", &phys); goto out_free; + } if (table && !(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) { dma = dma_map_single(dev, table, size, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma)) @@ -733,7 +744,7 @@ static struct io_pgtable *arm_v7s_alloc_ data->l2_tables = kmem_cache_create("io-pgtable_armv7s_l2", ARM_V7S_TABLE_SIZE(2), ARM_V7S_TABLE_SIZE(2), - SLAB_CACHE_DMA, NULL); + ARM_V7S_TABLE_SLAB_FLAGS, NULL); if (!data->l2_tables) goto out_free_data; _

6 years, 3 months

1
0
0 0

[patch 07/22] ocfs2: fix inode bh swapping mixup in ocfs2_reflink_inodes_lock

by akpm＠linux-foundation.org

From: Darrick J. Wong <darrick.wong(a)oracle.com> Subject: ocfs2: fix inode bh swapping mixup in ocfs2_reflink_inodes_lock ocfs2_reflink_inodes_lock() can swap the inode1/inode2 variables so that we always grab cluster locks in order of increasing inode number. Unfortunately, we forget to swap the inode record buffer head pointers when we've done this, which leads to incorrect bookkeepping when we're trying to make the two inodes have the same refcount tree. This has the effect of causing filesystem shutdowns if you're trying to reflink data from inode 100 into inode 97, where inode 100 already has a refcount tree attached and inode 97 doesn't. The reflink code decides to copy the refcount tree pointer from 100 to 97, but uses inode 97's inode record to open the tree root (which it doesn't have) and blows up. This issue causes filesystem shutdowns and metadata corruption! Link: http://lkml.kernel.org/r/20190312214910.GK20533@magnolia Fixes: 29ac8e856cb369 ("ocfs2: implement the VFS clone_range, copy_range, and dedupe_range features") Signed-off-by: Darrick J. Wong <darrick.wong(a)oracle.com> Reviewed-by: Joseph Qi <jiangqi903(a)gmail.com> Cc: Mark Fasheh <mfasheh(a)versity.com> Cc: Joel Becker <jlbec(a)evilplan.org> Cc: Junxiao Bi <junxiao.bi(a)oracle.com> Cc: Joseph Qi <joseph.qi(a)huawei.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- fs/ocfs2/refcounttree.c | 42 +++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 18 deletions(-) --- a/fs/ocfs2/refcounttree.c~ocfs2-fix-inode-bh-swapping-mixup-in-ocfs2_reflink_inodes_lock +++ a/fs/ocfs2/refcounttree.c @@ -4719,22 +4719,23 @@ out: /* Lock an inode and grab a bh pointing to the inode. */ int ocfs2_reflink_inodes_lock(struct inode *s_inode, - struct buffer_head **bh1, + struct buffer_head **bh_s, struct inode *t_inode, - struct buffer_head **bh2) + struct buffer_head **bh_t) { - struct inode *inode1; - struct inode *inode2; + struct inode *inode1 = s_inode; + struct inode *inode2 = t_inode; struct ocfs2_inode_info *oi1; struct ocfs2_inode_info *oi2; + struct buffer_head *bh1 = NULL; + struct buffer_head *bh2 = NULL; bool same_inode = (s_inode == t_inode); + bool need_swap = (inode1->i_ino > inode2->i_ino); int status; /* First grab the VFS and rw locks. */ lock_two_nondirectories(s_inode, t_inode); - inode1 = s_inode; - inode2 = t_inode; - if (inode1->i_ino > inode2->i_ino) + if (need_swap) swap(inode1, inode2); status = ocfs2_rw_lock(inode1, 1); @@ -4757,17 +4758,13 @@ int ocfs2_reflink_inodes_lock(struct ino trace_ocfs2_double_lock((unsigned long long)oi1->ip_blkno, (unsigned long long)oi2->ip_blkno); - if (*bh1) - *bh1 = NULL; - if (*bh2) - *bh2 = NULL; - /* We always want to lock the one with the lower lockid first. */ if (oi1->ip_blkno > oi2->ip_blkno) mlog_errno(-ENOLCK); /* lock id1 */ - status = ocfs2_inode_lock_nested(inode1, bh1, 1, OI_LS_REFLINK_TARGET); + status = ocfs2_inode_lock_nested(inode1, &bh1, 1, + OI_LS_REFLINK_TARGET); if (status < 0) { if (status != -ENOENT) mlog_errno(status); @@ -4776,15 +4773,25 @@ int ocfs2_reflink_inodes_lock(struct ino /* lock id2 */ if (!same_inode) { - status = ocfs2_inode_lock_nested(inode2, bh2, 1, + status = ocfs2_inode_lock_nested(inode2, &bh2, 1, OI_LS_REFLINK_TARGET); if (status < 0) { if (status != -ENOENT) mlog_errno(status); goto out_cl1; } - } else - *bh2 = *bh1; + } else { + bh2 = bh1; + } + + /* + * If we swapped inode order above, we have to swap the buffer heads + * before passing them back to the caller. + */ + if (need_swap) + swap(bh1, bh2); + *bh_s = bh1; + *bh_t = bh2; trace_ocfs2_double_lock_end( (unsigned long long)oi1->ip_blkno, @@ -4794,8 +4801,7 @@ int ocfs2_reflink_inodes_lock(struct ino out_cl1: ocfs2_inode_unlock(inode1, 1); - brelse(*bh1); - *bh1 = NULL; + brelse(bh1); out_rw2: ocfs2_rw_unlock(inode2, 1); out_i2: _

6 years, 3 months

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror March 2019