Since the addition of platform MSI support, there were two helpers
supposed to allocate/free IRQs for a device:
platform_msi_domain_alloc_irqs()
platform_msi_domain_free_irqs()
In these helpers, IRQ descriptors are allocated in the "alloc" routine
while they are freed in the "free" one.
Later, two other helpers have been added to handle IRQ domains on top
of MSI domains:
platform_msi_domain_alloc()
platform_msi_domain_free()
Seen from the outside, the logic is pretty close with the former
helpers and people used it with the same logic as before: a
platform_msi_domain_alloc() call should be balanced with a
platform_msi_domain_free() call. While this is probably what was
intended to do, the platform_msi_domain_free() does not remove/free
the IRQ descriptor(s) created/inserted in
platform_msi_domain_alloc().
One effect of such situation is that removing a module that requested
an IRQ will let one orphaned IRQ descriptor (with an allocated MSI
entry) in the device descriptors list. Next time the module will be
inserted back, one will observe that the allocation will happen twice
in the MSI domain, one time for the remaining descriptor, one time for
the new one. It also has the side effect to quickly overshoot the
maximum number of allocated MSI and then prevent any module requesting
an interrupt in the same domain to be inserted anymore.
This situation has been met with loops of insertion/removal of the
mvpp2.ko module (requesting 15 MSIs each time).
Fixes: 552c494a7666 ("platform-msi: Allow creation of a MSI-based stacked irq domain")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
---
drivers/base/platform-msi.c | 6 ++++--
include/linux/msi.h | 2 ++
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index 60d6cc618f1c..6d54905c6263 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -366,14 +366,16 @@ void platform_msi_domain_free(struct irq_domain *domain, unsigned int virq,
unsigned int nvec)
{
struct platform_msi_priv_data *data = domain->host_data;
- struct msi_desc *desc;
- for_each_msi_entry(desc, data->dev) {
+ struct msi_desc *desc, *tmp;
+ for_each_msi_entry_safe(desc, tmp, data->dev) {
if (WARN_ON(!desc->irq || desc->nvec_used != 1))
return;
if (!(desc->irq >= virq && desc->irq < (virq + nvec)))
continue;
irq_domain_free_irqs_common(domain, desc->irq, 1);
+ list_del(&desc->list);
+ free_msi_entry(desc);
}
}
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 5839d8062dfc..be8ec813dbfb 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -116,6 +116,8 @@ struct msi_desc {
list_first_entry(dev_to_msi_list((dev)), struct msi_desc, list)
#define for_each_msi_entry(desc, dev) \
list_for_each_entry((desc), dev_to_msi_list((dev)), list)
+#define for_each_msi_entry_safe(desc, tmp, dev) \
+ list_for_each_entry_safe((desc), (tmp), dev_to_msi_list((dev)), list)
#ifdef CONFIG_PCI_MSI
#define first_pci_msi_entry(pdev) first_msi_entry(&(pdev)->dev)
--
2.17.1
Yongqin reported that /proc/zoneinfo format is broken in 4.14
due to commit 7aaf77272358 ("mm: don't show nr_indirectly_reclaimable
in /proc/vmstat")
Node 0, zone DMA
per-node stats
nr_inactive_anon 403
nr_active_anon 89123
nr_inactive_file 128887
nr_active_file 47377
nr_unevictable 2053
nr_slab_reclaimable 7510
nr_slab_unreclaimable 10775
nr_isolated_anon 0
nr_isolated_file 0
<...>
nr_vmscan_write 0
nr_vmscan_immediate_reclaim 0
nr_dirtied 6022
nr_written 5985
74240
^^^^^^^^^^
pages free 131656
The problem is caused by the nr_indirectly_reclaimable counter,
which is hidden from the /proc/vmstat, but not from the
/proc/zoneinfo. Let's fix this inconsistency and hide the
counter from /proc/zoneinfo exactly as from /proc/vmstat.
BTW, in 4.19+ the counter has been renamed and exported by
the commit b29940c1abd7 ("mm: rename and change semantics of
nr_indirectly_reclaimable_bytes"), so there is no such a problem
anymore.
Cc: <stable(a)vger.kernel.org> # 4.14.x-4.18.x
Fixes: 7aaf77272358 ("mm: don't show nr_indirectly_reclaimable in /proc/vmstat")
Reported-by: Yongqin Liu <yongqin.liu(a)linaro.org>
Signed-off-by: Roman Gushchin <guro(a)fb.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/vmstat.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 527ae727d547..6389e876c7a7 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1500,6 +1500,10 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
if (is_zone_first_populated(pgdat, zone)) {
seq_printf(m, "\n per-node stats");
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
+ /* Skip hidden vmstat items. */
+ if (*vmstat_text[i + NR_VM_ZONE_STAT_ITEMS +
+ NR_VM_NUMA_STAT_ITEMS] == '\0')
+ continue;
seq_printf(m, "\n %-12s %lu",
vmstat_text[i + NR_VM_ZONE_STAT_ITEMS +
NR_VM_NUMA_STAT_ITEMS],
--
2.17.2
Hi,
[This is an automated email]
This commit has been processed by the -stable helper bot and determined
to be a high probability candidate for -stable trees. (score: 46.9374)
The bot has tested the following trees: v4.16.1, v4.15.16, v4.14.33, v4.9.93, v4.4.127.
v4.16.1: Build OK!
v4.15.16: Build OK!
v4.14.33: Build OK!
v4.9.93: Failed to apply! Possible dependencies:
9648dc15772d ("recordmcount: arm: Implement make_nop")
v4.4.127: Failed to apply! Possible dependencies:
9648dc15772d ("recordmcount: arm: Implement make_nop")
Please let us know if you'd like to have this patch included in a stable tree.
--
Thanks,
Sasha
This is the start of the stable review cycle for the 4.14.71 release.
There are 126 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Wed Sep 19 21:16:12 UTC 2018.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.14.71-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.14.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.14.71-rc1
Linus Torvalds <torvalds(a)linux-foundation.org>
mm: get rid of vmacache_flush_all() entirely
Ian Kent <raven(a)themaw.net>
autofs: fix autofs_sbi() does not check super block type
Jason Wang <jasowang(a)redhat.com>
tuntap: fix use after free during release
Jason Wang <jasowang(a)redhat.com>
tun: fix use after free for ptr_ring
Wei Yongjun <weiyongjun1(a)huawei.com>
mtd: ubi: wl: Fix error return code in ubi_wl_init()
Taehee Yoo <ap420073(a)gmail.com>
ip: frags: fix crash in ip_do_fragment()
Peter Oskolkov <posk(a)google.com>
ip: process in-order fragments efficiently
Peter Oskolkov <posk(a)google.com>
ip: add helpers to process in-order fragments faster.
Dan Carpenter <dan.carpenter(a)oracle.com>
ipv4: frags: precedence bug in ip_expire()
Eric Dumazet <edumazet(a)google.com>
net: sk_buff rbnode reorg
Eric Dumazet <edumazet(a)google.com>
net: add rb_to_skb() and other rb tree helpers
Eric Dumazet <edumazet(a)google.com>
net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends
Florian Westphal <fw(a)strlen.de>
ipv6: defrag: drop non-last frags smaller than min mtu
Peter Oskolkov <posk(a)google.com>
net: modify skb_rbtree_purge to return the truesize of all purged skbs.
Eric Dumazet <edumazet(a)google.com>
net: speed up skb_rbtree_purge()
Peter Oskolkov <posk(a)google.com>
ip: discard IPv4 datagrams with overlapping segments.
Eric Dumazet <edumazet(a)google.com>
inet: frags: fix ip6frag_low_thresh boundary
Eric Dumazet <edumazet(a)google.com>
inet: frags: get rid of ipfrag_skb_cb/FRAG_CB
Eric Dumazet <edumazet(a)google.com>
inet: frags: reorganize struct netns_frags
Eric Dumazet <edumazet(a)google.com>
rhashtable: reorganize struct rhashtable layout
Eric Dumazet <edumazet(a)google.com>
ipv6: frags: rewrite ip6_expire_frag_queue()
Eric Dumazet <edumazet(a)google.com>
inet: frags: do not clone skb in ip_expire()
Eric Dumazet <edumazet(a)google.com>
inet: frags: break the 2GB limit for frags storage
Eric Dumazet <edumazet(a)google.com>
inet: frags: remove inet_frag_maybe_warn_overflow()
Eric Dumazet <edumazet(a)google.com>
inet: frags: get rif of inet_frag_evicting()
Eric Dumazet <edumazet(a)google.com>
inet: frags: remove some helpers
Eric Dumazet <edumazet(a)google.com>
inet: frags: use rhashtables for reassembly units
Eric Dumazet <edumazet(a)google.com>
rhashtable: add schedule points
Eric Dumazet <edumazet(a)google.com>
ipv6: export ip6 fragments sysctl to unprivileged users
Eric Dumazet <edumazet(a)google.com>
inet: frags: refactor lowpan_net_frag_init()
Eric Dumazet <edumazet(a)google.com>
inet: frags: refactor ipv6_frag_init()
Kees Cook <keescook(a)chromium.org>
inet: frags: Convert timers to use timer_setup()
Eric Dumazet <edumazet(a)google.com>
inet: frags: refactor ipfrag_init()
Eric Dumazet <edumazet(a)google.com>
inet: frags: add a pointer to struct netns_frags
Eric Dumazet <edumazet(a)google.com>
inet: frags: change inet_frags_init_net() return value
Jani Nikula <jani.nikula(a)intel.com>
drm/i915: set DP Main Stream Attribute for color range on DDI platforms
Parav Pandit <parav(a)mellanox.com>
RDMA/cma: Do not ignore net namespace for unbound cm_id
Paul Burton <paul.burton(a)mips.com>
MIPS: WARN_ON invalid DMA cache maintenance, not BUG_ON
Trond Myklebust <trond.myklebust(a)hammerspace.com>
NFSv4.1: Fix a potential layoutget/layoutrecall deadlock
Chao Yu <yuchao0(a)huawei.com>
f2fs: fix to do sanity check with {sit,nat}_ver_bitmap_bytesize
Zumeng Chen <zumeng.chen(a)gmail.com>
mfd: ti_am335x_tscadc: Fix struct clk memory leak
Geert Uytterhoeven <geert+renesas(a)glider.be>
iommu/ipmmu-vmsa: Fix allocation in atomic context
Dan Carpenter <dan.carpenter(a)oracle.com>
f2fs: Fix uninitialized return in f2fs_ioc_shutdown()
Chao Yu <yuchao0(a)huawei.com>
f2fs: fix to wait on page writeback before updating page
Katsuhiro Suzuki <suzuki.katsuhiro(a)socionext.com>
media: helene: fix xtal frequency setting at power on
Mauricio Faria de Oliveira <mfo(a)canonical.com>
partitions/aix: fix usage of uninitialized lv_info and lvname structures
Mauricio Faria de Oliveira <mfo(a)canonical.com>
partitions/aix: append null character to print data from disk
Sylwester Nawrocki <s.nawrocki(a)samsung.com>
media: s5p-mfc: Fix buffer look up in s5p_mfc_handle_frame_{new, copy_time} functions
Nick Dyer <nick.dyer(a)itdev.co.uk>
Input: atmel_mxt_ts - only use first T9 instance
John Pittman <jpittman(a)redhat.com>
dm cache: only allow a single io_mode cache feature to be requested
Petr Machata <petrm(a)mellanox.com>
net: dcb: For wild-card lookups, use priority -1, not 0
Nicholas Mc Guire <hofrat(a)osadl.org>
MIPS: generic: fix missing of_node_put()
Nicholas Mc Guire <hofrat(a)osadl.org>
MIPS: Octeon: add missing of_node_put()
Chao Yu <yuchao0(a)huawei.com>
f2fs: fix to do sanity check with reserved blkaddr of inline inode
Peter Rosin <peda(a)axentia.se>
tpm/tpm_i2c_infineon: switch to i2c_lock_bus(..., I2C_LOCK_SEGMENT)
Linus Walleij <linus.walleij(a)linaro.org>
tpm_tis_spi: Pass the SPI IRQ down to the driver
Chao Yu <yuchao0(a)huawei.com>
f2fs: fix to skip GC if type in SSA and SIT is inconsistent
Jinbum Park <jinb.park7(a)gmail.com>
pktcdvd: Fix possible Spectre-v1 for pkt_devs
Chao Yu <yuchao0(a)huawei.com>
f2fs: try grabbing node page lock aggressively in sync scenario
Yelena Krivosheev <yelena(a)marvell.com>
net: mvneta: fix mtu change on port without link
Daniel Kurtz <djkurtz(a)chromium.org>
pinctrl/amd: only handle irq if it is pending and unmasked
Anton Vasilyev <vasilyev(a)ispras.ru>
gpio: ml-ioh: Fix buffer underwrite on probe error path
Dan Carpenter <dan.carpenter(a)oracle.com>
pinctrl: imx: off by one in imx_pinconf_group_dbg_show()
Joerg Roedel <jroedel(a)suse.de>
x86/mm: Remove in_nmi() warning from vmalloc_fault()
Marcel Holtmann <marcel(a)holtmann.org>
Bluetooth: hidp: Fix handling of strncpy for hid->name information
Surabhi Vishnoi <svishnoi(a)codeaurora.org>
ath10k: disable bundle mgmt tx completion event support
Huaisheng Ye <yehs1(a)lenovo.com>
tools/testing/nvdimm: kaddr and pfn can be NULL to ->direct_access()
Anton Vasilyev <vasilyev(a)ispras.ru>
scsi: 3ware: fix return 0 on the error path of probe
Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com>
ata: libahci: Correct setting of DEVSLP register
Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com>
ata: libahci: Allow reconfigure of DEVSLP register
Paul Burton <paul.burton(a)mips.com>
MIPS: Fix ISA virt/bus conversion for non-zero PHYS_OFFSET
Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org>
rpmsg: core: add support to power domains for devices
Loic Poulain <loic.poulain(a)linaro.org>
wlcore: Set rx_status boottime_ns field on rx
Sven Eckelmann <sven.eckelmann(a)openmesh.com>
ath10k: prevent active scans on potential unusable channels
Felix Fietkau <nbd(a)nbd.name>
ath9k_hw: fix channel maximum power level test
Felix Fietkau <nbd(a)nbd.name>
ath9k: report tx status on EOSP
Finn Thain <fthain(a)telegraphics.com.au>
macintosh/via-pmu: Add missing mmio accessors
Kan Liang <kan.liang(a)linux.intel.com>
perf evlist: Fix error out while applying initial delay and LBR
Jiri Olsa <jolsa(a)kernel.org>
perf c2c report: Fix crash for empty browser
Olga Kornievskaia <kolga(a)netapp.com>
NFSv4.0 fix client reference leak in callback
Christophe Leroy <christophe.leroy(a)c-s.fr>
perf tools: Allow overriding MAX_NR_CPUS at compile time
Randy Dunlap <rdunlap(a)infradead.org>
f2fs: fix defined but not used build warnings
Yunlong Song <yunlong.song(a)huawei.com>
f2fs: do not set free of current section
Chao Yu <yuchao0(a)huawei.com>
f2fs: fix to active page in lru list for read path
Anton Vasilyev <vasilyev(a)ispras.ru>
tty: rocket: Fix possible buffer overwrite on register_PCI
Michael Kelley <mikelley(a)microsoft.com>
Drivers: hv: vmbus: Cleanup synic memory free path
Anton Vasilyev <vasilyev(a)ispras.ru>
firmware: vpd: Fix section enabled flag on vpd_section_destroy
Dan Carpenter <dan.carpenter(a)oracle.com>
uio: potential double frees if __uio_register_device() fails
Anton Vasilyev <vasilyev(a)ispras.ru>
misc: ti-st: Fix memory leak in the error path of probe()
Philipp Zabel <p.zabel(a)pengutronix.de>
gpu: ipu-v3: default to id 0 on missing OF alias
Todor Tomov <todor.tomov(a)linaro.org>
media: camss: csid: Configure data type and decode format properly
Gaurav Kohli <gkohli(a)codeaurora.org>
timers: Clear timer_base::must_forward_clk with timer_base::lock held
BingJing Chang <bingjingc(a)synology.com>
md/raid5: fix data corruption of replacements after originals dropped
Mike Christie <mchristi(a)redhat.com>
scsi: target: fix __transport_register_session locking
Ming Lei <ming.lei(a)redhat.com>
blk-mq: fix updating tags depth
Arun Parameswaran <arun.parameswaran(a)broadcom.com>
net: phy: Fix the register offsets in Broadcom iProc mdio mux driver
Anton Vasilyev <vasilyev(a)ispras.ru>
media: dw2102: Fix memleak on sequence of probes
Anton Vasilyev <vasilyev(a)ispras.ru>
media: davinci: vpif_display: Mix memory leak on probe error path
Roman Gushchin <guro(a)fb.com>
selftests/bpf: fix a typo in map in map test
Reza Arbab <arbab(a)linux.ibm.com>
powerpc/powernv: Fix concurrency issue with npu->mmio_atsd_usage
Dmitry Osipenko <digetx(a)gmail.com>
gpio: tegra: Move driver registration to subsys_init level
Johan Hedberg <johan.hedberg(a)intel.com>
Bluetooth: h5: Fix missing dependency on BT_HCIUART_SERDEV
Jae Hyun Yoo <jae.hyun.yoo(a)linux.intel.com>
i2c: aspeed: Add an explicit type casting for *get_clk_reg_val
Florian Fainelli <f.fainelli(a)gmail.com>
ethtool: Remove trailing semicolon for static inline
Dan Carpenter <dan.carpenter(a)oracle.com>
misc: mic: SCIF Fix scif_get_new_port() error handling
Alexey Brodkin <abrodkin(a)synopsys.com>
ARC: [plat-axs*]: Enable SWAP
Tomas Winkler <tomas.winkler(a)intel.com>
tpm: separate cmd_ready/go_idle from runtime_pm
Arnd Bergmann <arnd(a)arndb.de>
crypto: aes-generic - fix aes-generic regression on powerpc
Gustavo A. R. Silva <gustavo(a)embeddedor.com>
switchtec: Fix Spectre v1 vulnerability
Filippo Sironi <sironi(a)amazon.de>
x86/microcode: Update the new microcode revision unconditionally
Prarit Bhargava <prarit(a)redhat.com>
x86/microcode: Make sure boot_cpu_data.microcode is up-to-date
Thomas Gleixner <tglx(a)linutronix.de>
cpu/hotplug: Prevent state corruption on error rollback
Neeraj Upadhyay <neeraju(a)codeaurora.org>
cpu/hotplug: Adjust misplaced smb() in cpuhp_thread_fun()
Takashi Iwai <tiwai(a)suse.de>
ALSA: hda - Fix cancel_work_sync() stall from jackpoll work
Sean Christopherson <sean.j.christopherson(a)intel.com>
KVM: VMX: Do not allow reexecute_instruction() when skipping MMIO instr
Pierre Morel <pmorel(a)linux.ibm.com>
KVM: s390: vsie: copy wrapping keys to right place
Filipe Manana <fdmanana(a)suse.com>
Btrfs: fix data corruption when deduplicating between different files
Steve French <stfrench(a)microsoft.com>
smb3: check for and properly advertise directory lease support
Steve French <stfrench(a)microsoft.com>
SMB3: Backup intent flag missing for directory opens with backupuid mounts
Paul Burton <paul.burton(a)mips.com>
MIPS: VDSO: Match data page cache colouring when D$ aliases
Minchan Kim <minchan(a)kernel.org>
android: binder: fix the race mmap and alloc_new_buf_locked
Konstantin Khlebnikov <khlebnikov(a)yandex-team.ru>
block: bfq: swap puts in bfqg_and_blkg_put
Jens Axboe <axboe(a)kernel.dk>
nbd: don't allow invalid blocksize settings
James Smart <jsmart2021(a)gmail.com>
scsi: lpfc: Correct MDS diag and nvmet configuration
Felipe Balbi <felipe.balbi(a)linux.intel.com>
i2c: i801: fix DNV's SMBCTRL register offset
Shubhrajyoti Datta <shubhrajyoti.datta(a)xilinx.com>
i2c: xiic: Make the start and the byte count write atomic
-------------
Diffstat:
Documentation/networking/ip-sysctl.txt | 13 +-
Makefile | 4 +-
arch/arc/configs/axs101_defconfig | 1 -
arch/arc/configs/axs103_defconfig | 1 -
arch/arc/configs/axs103_smp_defconfig | 1 -
arch/mips/cavium-octeon/octeon-platform.c | 2 +
arch/mips/generic/init.c | 1 +
arch/mips/include/asm/io.h | 8 +-
arch/mips/kernel/vdso.c | 20 +
arch/mips/mm/c-r4k.c | 6 +-
arch/powerpc/platforms/powernv/npu-dma.c | 5 +-
arch/s390/kvm/vsie.c | 3 +-
arch/x86/kernel/cpu/microcode/amd.c | 24 +-
arch/x86/kernel/cpu/microcode/intel.c | 17 +-
arch/x86/kvm/vmx.c | 4 +-
arch/x86/mm/fault.c | 2 -
block/bfq-cgroup.c | 4 +-
block/blk-mq-tag.c | 8 +-
block/partitions/aix.c | 13 +-
crypto/Makefile | 2 +-
drivers/android/binder_alloc.c | 42 +-
drivers/ata/libahci.c | 20 +-
drivers/block/nbd.c | 3 +
drivers/block/pktcdvd.c | 4 +-
drivers/bluetooth/Kconfig | 1 +
drivers/char/tpm/tpm-interface.c | 50 +-
drivers/char/tpm/tpm.h | 12 +-
drivers/char/tpm/tpm2-space.c | 16 +-
drivers/char/tpm/tpm_crb.c | 101 +---
drivers/char/tpm/tpm_i2c_infineon.c | 8 +-
drivers/char/tpm/tpm_tis_spi.c | 9 +-
drivers/firmware/google/vpd.c | 5 +-
drivers/gpio/gpio-ml-ioh.c | 3 +-
drivers/gpio/gpio-tegra.c | 2 +-
drivers/gpu/drm/i915/i915_reg.h | 1 +
drivers/gpu/drm/i915/intel_ddi.c | 4 +
drivers/gpu/ipu-v3/ipu-common.c | 2 +
drivers/hv/hv.c | 14 +-
drivers/i2c/busses/i2c-aspeed.c | 2 +-
drivers/i2c/busses/i2c-i801.c | 7 +-
drivers/i2c/busses/i2c-xiic.c | 4 +
drivers/infiniband/core/cma.c | 13 +-
drivers/input/touchscreen/atmel_mxt_ts.c | 7 +-
drivers/iommu/ipmmu-vmsa.c | 9 +-
drivers/macintosh/via-pmu.c | 9 +-
drivers/md/dm-cache-target.c | 19 +-
drivers/md/raid5.c | 6 +
drivers/media/dvb-frontends/helene.c | 5 +-
drivers/media/platform/davinci/vpif_display.c | 24 +-
.../media/platform/qcom/camss-8x16/camss-csid.c | 16 +-
drivers/media/platform/s5p-mfc/s5p_mfc.c | 23 +-
drivers/media/usb/dvb-usb/dw2102.c | 19 +-
drivers/mfd/ti_am335x_tscadc.c | 3 +-
drivers/misc/mic/scif/scif_api.c | 20 +-
drivers/misc/ti-st/st_kim.c | 4 +-
drivers/mtd/ubi/wl.c | 8 +-
drivers/net/ethernet/marvell/mvneta.c | 1 -
drivers/net/phy/mdio-mux-bcm-iproc.c | 20 +-
drivers/net/tun.c | 21 +-
drivers/net/wireless/ath/ath10k/mac.c | 7 +
drivers/net/wireless/ath/ath10k/wmi-tlv.c | 5 +
drivers/net/wireless/ath/ath10k/wmi-tlv.h | 5 +
drivers/net/wireless/ath/ath9k/hw.c | 7 +-
drivers/net/wireless/ath/ath9k/xmit.c | 3 +-
drivers/net/wireless/ti/wlcore/rx.c | 8 +-
drivers/pci/switch/switchtec.c | 4 +
drivers/pinctrl/freescale/pinctrl-imx.c | 2 +-
drivers/pinctrl/pinctrl-amd.c | 3 +-
drivers/rpmsg/rpmsg_core.c | 7 +
drivers/scsi/3w-9xxx.c | 6 +-
drivers/scsi/3w-sas.c | 3 +
drivers/scsi/3w-xxxx.c | 2 +
drivers/scsi/lpfc/lpfc.h | 2 +-
drivers/target/target_core_transport.c | 5 +-
drivers/tty/rocket.c | 2 +-
drivers/uio/uio.c | 3 +-
fs/autofs4/autofs_i.h | 4 +-
fs/autofs4/inode.c | 1 -
fs/btrfs/ioctl.c | 19 +
fs/cifs/inode.c | 2 +
fs/cifs/smb2ops.c | 35 +-
fs/cifs/smb2pdu.c | 3 +
fs/f2fs/f2fs.h | 7 +-
fs/f2fs/file.c | 2 +-
fs/f2fs/gc.c | 8 +-
fs/f2fs/inline.c | 22 +
fs/f2fs/node.c | 4 +-
fs/f2fs/segment.h | 3 +
fs/f2fs/super.c | 21 +-
fs/f2fs/sysfs.c | 10 +-
fs/nfs/callback_proc.c | 4 +-
fs/nfs/callback_xdr.c | 11 +-
include/linux/mm_types.h | 2 +-
include/linux/mm_types_task.h | 2 +-
include/linux/rhashtable.h | 8 +-
include/linux/skbuff.h | 50 +-
include/linux/tpm.h | 2 +
include/linux/vm_event_item.h | 1 -
include/linux/vmacache.h | 5 -
include/net/inet_frag.h | 135 +++--
include/net/ip.h | 1 -
include/net/ipv6.h | 26 +-
include/uapi/linux/ethtool.h | 4 +-
include/uapi/linux/snmp.h | 1 +
kernel/cpu.c | 11 +-
kernel/time/timer.c | 29 +-
lib/rhashtable.c | 2 +
mm/debug.c | 4 +-
mm/vmacache.c | 38 --
net/bluetooth/hidp/core.c | 2 +-
net/core/skbuff.c | 31 +-
net/dcb/dcbnl.c | 11 +-
net/ieee802154/6lowpan/6lowpan_i.h | 26 +-
net/ieee802154/6lowpan/reassembly.c | 153 +++---
net/ipv4/inet_fragment.c | 378 +++-----------
net/ipv4/ip_fragment.c | 578 ++++++++++++---------
net/ipv4/proc.c | 7 +-
net/ipv4/tcp_fastopen.c | 8 +-
net/ipv4/tcp_input.c | 33 +-
net/ipv6/netfilter/nf_conntrack_reasm.c | 105 ++--
net/ipv6/proc.c | 5 +-
net/ipv6/reassembly.c | 217 ++++----
net/sched/sch_netem.c | 14 +-
sound/pci/hda/hda_codec.c | 3 +-
tools/perf/builtin-c2c.c | 3 +
tools/perf/perf.h | 2 +
tools/perf/util/evsel.c | 14 +
tools/testing/nvdimm/pmem-dax.c | 12 +-
tools/testing/selftests/bpf/test_verifier.c | 6 +-
129 files changed, 1473 insertions(+), 1362 deletions(-)
This is the start of the stable review cycle for the 4.9.134 release.
There are 71 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Thu Oct 18 17:05:18 UTC 2018.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.9.134-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.9.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.9.134-rc1
Dan Carpenter <dan.carpenter(a)oracle.com>
ipv4: frags: precedence bug in ip_expire()
Taehee Yoo <ap420073(a)gmail.com>
ip: frags: fix crash in ip_do_fragment()
Peter Oskolkov <posk(a)google.com>
ip: process in-order fragments efficiently
Peter Oskolkov <posk(a)google.com>
ip: add helpers to process in-order fragments faster.
Peter Oskolkov <posk(a)google.com>
ip: use rb trees for IP frag queue.
Eric Dumazet <edumazet(a)google.com>
net: add rb_to_skb() and other rb tree helpers
Eric Dumazet <edumazet(a)google.com>
net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends
Florian Westphal <fw(a)strlen.de>
ipv6: defrag: drop non-last frags smaller than min mtu
Peter Oskolkov <posk(a)google.com>
net: modify skb_rbtree_purge to return the truesize of all purged skbs.
Eric Dumazet <edumazet(a)google.com>
net: speed up skb_rbtree_purge()
Peter Oskolkov <posk(a)google.com>
ip: discard IPv4 datagrams with overlapping segments.
Eric Dumazet <edumazet(a)google.com>
inet: frags: fix ip6frag_low_thresh boundary
Eric Dumazet <edumazet(a)google.com>
inet: frags: get rid of ipfrag_skb_cb/FRAG_CB
Eric Dumazet <edumazet(a)google.com>
inet: frags: reorganize struct netns_frags
Eric Dumazet <edumazet(a)google.com>
rhashtable: reorganize struct rhashtable layout
Eric Dumazet <edumazet(a)google.com>
ipv6: frags: rewrite ip6_expire_frag_queue()
Eric Dumazet <edumazet(a)google.com>
inet: frags: do not clone skb in ip_expire()
Eric Dumazet <edumazet(a)google.com>
inet: frags: break the 2GB limit for frags storage
Eric Dumazet <edumazet(a)google.com>
inet: frags: remove inet_frag_maybe_warn_overflow()
Eric Dumazet <edumazet(a)google.com>
inet: frags: get rif of inet_frag_evicting()
Eric Dumazet <edumazet(a)google.com>
inet: frags: remove some helpers
Eric Dumazet <edumazet(a)google.com>
inet: frags: use rhashtables for reassembly units
Eric Dumazet <edumazet(a)google.com>
rhashtable: add schedule points
Eric Dumazet <edumazet(a)google.com>
ipv6: export ip6 fragments sysctl to unprivileged users
Eric Dumazet <edumazet(a)google.com>
inet: frags: refactor lowpan_net_frag_init()
Eric Dumazet <edumazet(a)google.com>
inet: frags: refactor ipv6_frag_init()
Eric Dumazet <edumazet(a)google.com>
inet: frags: refactor ipfrag_init()
Eric Dumazet <edumazet(a)google.com>
inet: frags: add a pointer to struct netns_frags
Eric Dumazet <edumazet(a)google.com>
inet: frags: change inet_frags_init_net() return value
Eric Dumazet <edumazet(a)google.com>
inet: make sure to grab rcu_read_lock before using ireq->ireq_opt
Eric Dumazet <edumazet(a)google.com>
tcp/dccp: fix lockdep issue when SYN is backlogged
Eric Dumazet <edumazet(a)google.com>
rtnl: limit IFLA_NUM_TX_QUEUES and IFLA_NUM_RX_QUEUES to 4096
Florian Fainelli <f.fainelli(a)gmail.com>
net: systemport: Fix wake-up interrupt race during resume
Maxime Chevallier <maxime.chevallier(a)bootlin.com>
net: mvpp2: Extract the correct ethtype from the skb for tx csum offload
Florian Fainelli <f.fainelli(a)gmail.com>
net: dsa: bcm_sf2: Fix unbind ordering
Ido Schimmel <idosch(a)mellanox.com>
team: Forbid enslaving team device to itself
Giacinto Cifelli <gciofono(a)gmail.com>
qmi_wwan: Added support for Gemalto's Cinterion ALASxx WWAN interface
Shahed Shaikh <shahed.shaikh(a)cavium.com>
qlcnic: fix Tx descriptor corruption on 82xx devices
Yu Zhao <yuzhao(a)google.com>
net/usb: cancel pending work when unbinding smsc75xx
Sean Tranchetti <stranche(a)codeaurora.org>
netlabel: check for IPV4MASK in addrinfo_get
Jeff Barnhill <0xeffeff(a)gmail.com>
net/ipv6: Display all addresses in output of /proc/net/if_inet6
Sabrina Dubroca <sd(a)queasysnail.net>
net: ipv4: update fnhe_pmtu when first hop's MTU changes
Yunsheng Lin <linyunsheng(a)huawei.com>
net: hns: fix for unmapping problem when SMMU is on
Florian Fainelli <f.fainelli(a)gmail.com>
net: dsa: bcm_sf2: Call setup during switch resume
Wei Wang <weiwan(a)google.com>
ipv6: take rcu lock in rawv6_send_hdrinc()
Eric Dumazet <edumazet(a)google.com>
ipv4: fix use-after-free in ip_cmsg_recv_dstaddr()
Paolo Abeni <pabeni(a)redhat.com>
ip_tunnel: be careful when accessing the inner header
Paolo Abeni <pabeni(a)redhat.com>
ip6_tunnel: be careful when accessing the inner header
Mahesh Bandewar <maheshb(a)google.com>
bonding: avoid possible dead-lock
Michael Chan <michael.chan(a)broadcom.com>
bnxt_en: Fix TX timeout during netpoll.
Mathias Nyman <mathias.nyman(a)linux.intel.com>
xhci: Don't print a warning when setting link state for disabled ports
Edgar Cherkasov <echerkasov(a)dev.rtsoft.ru>
i2c: i2c-scmi: fix for i2c_smbus_write_block_data
Jan Kara <jack(a)suse.cz>
mm: Preserve _PAGE_DEVMAP across mprotect() calls
Adrian Hunter <adrian.hunter(a)intel.com>
perf script python: Fix export-to-postgresql.py occasional failure
Mikulas Patocka <mpatocka(a)redhat.com>
mach64: detect the dot clock divider correctly on sparc
Paul Burton <paul.burton(a)mips.com>
MIPS: VDSO: Always map near top of user memory
Jann Horn <jannh(a)google.com>
mm/vmstat.c: fix outdated vmstat_text
Daniel Rosenberg <drosen(a)google.com>
ext4: Fix error code in ext4_xattr_set_entry()
Amber Lin <Amber.Lin(a)amd.com>
drm/amdgpu: Fix SDMA HQD destroy error on gfx_v7
Vitaly Kuznetsov <vkuznets(a)redhat.com>
x86/kvm/lapic: always disable MMIO interface in x2APIC mode
Nicolas Ferre <nicolas.ferre(a)microchip.com>
ARM: dts: at91: add new compatibility string for macb on sama5d3
Nicolas Ferre <nicolas.ferre(a)microchip.com>
net: macb: disable scatter-gather for macb on sama5d3
Jongsung Kim <neidhard.kim(a)lge.com>
stmmac: fix valid numbers of unicast filter entries
Yu Zhao <yuzhao(a)google.com>
sound: enable interrupt after dma buffer initialization
Dan Carpenter <dan.carpenter(a)oracle.com>
scsi: qla2xxx: Fix an endian bug in fcpcmd_is_corrupted()
Laura Abbott <labbott(a)redhat.com>
scsi: iscsi: target: Don't use stack buffer for scatterlist
Tony Lindgren <tony(a)atomide.com>
mfd: omap-usb-host: Fix dts probe of children
Lei Yang <Lei.Yang(a)windriver.com>
selftests: memory-hotplug: add required configs
Lei Yang <Lei.Yang(a)windriver.com>
selftests/efivarfs: add required kernel configs
Danny Smith <danny.smith(a)axis.com>
ASoC: sigmadsp: safeload should not have lower byte limit
Pierre-Louis Bossart <pierre-louis.bossart(a)linux.intel.com>
ASoC: wm8804: Add ACPI support
-------------
Diffstat:
Documentation/devicetree/bindings/net/macb.txt | 1 +
Documentation/networking/ip-sysctl.txt | 13 +-
Makefile | 4 +-
arch/arm/boot/dts/sama5d3_emac.dtsi | 2 +-
arch/mips/include/asm/processor.h | 10 +-
arch/mips/kernel/process.c | 25 +
arch/mips/kernel/vdso.c | 18 +-
arch/powerpc/include/asm/book3s/64/pgtable.h | 4 +-
arch/x86/include/asm/pgtable_types.h | 2 +-
arch/x86/include/uapi/asm/kvm.h | 1 +
arch/x86/kvm/lapic.c | 22 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 2 +-
drivers/i2c/busses/i2c-scmi.c | 1 +
drivers/mfd/omap-usb-host.c | 11 +-
drivers/net/bonding/bond_main.c | 43 +-
drivers/net/dsa/bcm_sf2.c | 12 +-
drivers/net/ethernet/broadcom/bcmsysport.c | 22 +-
drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 +-
drivers/net/ethernet/cadence/macb.c | 8 +
drivers/net/ethernet/hisilicon/hns/hnae.c | 2 +-
drivers/net/ethernet/hisilicon/hns/hns_enet.c | 30 +-
drivers/net/ethernet/marvell/mvpp2.c | 10 +-
drivers/net/ethernet/qlogic/qlcnic/qlcnic.h | 8 +-
.../net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 3 +-
.../net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h | 3 +-
drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h | 3 +-
drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c | 12 +-
.../net/ethernet/stmicro/stmmac/stmmac_platform.c | 5 +-
drivers/net/team/team.c | 5 +
drivers/net/usb/qmi_wwan.c | 1 +
drivers/net/usb/smsc75xx.c | 1 +
drivers/scsi/qla2xxx/qla_target.h | 4 +-
drivers/target/iscsi/iscsi_target.c | 22 +-
drivers/usb/host/xhci-hub.c | 18 +-
drivers/video/fbdev/aty/atyfb.h | 3 +-
drivers/video/fbdev/aty/atyfb_base.c | 7 +-
drivers/video/fbdev/aty/mach64_ct.c | 10 +-
fs/ext4/xattr.c | 2 +-
include/linux/netdevice.h | 7 +
include/linux/rhashtable.h | 4 +-
include/linux/skbuff.h | 34 +-
include/net/bonding.h | 7 +-
include/net/inet_frag.h | 133 +++--
include/net/inet_sock.h | 6 -
include/net/ip.h | 1 -
include/net/ip_fib.h | 1 +
include/net/ipv6.h | 26 +-
include/uapi/linux/snmp.h | 1 +
lib/rhashtable.c | 5 +-
mm/vmstat.c | 1 -
net/core/dev.c | 28 +-
net/core/rtnetlink.c | 6 +
net/core/skbuff.c | 31 +-
net/dccp/input.c | 4 +-
net/dccp/ipv4.c | 4 +-
net/ieee802154/6lowpan/6lowpan_i.h | 26 +-
net/ieee802154/6lowpan/reassembly.c | 148 +++---
net/ipv4/fib_frontend.c | 12 +-
net/ipv4/fib_semantics.c | 50 ++
net/ipv4/inet_connection_sock.c | 5 +-
net/ipv4/inet_fragment.c | 379 +++-----------
net/ipv4/ip_fragment.c | 573 ++++++++++++---------
net/ipv4/ip_sockglue.c | 3 +-
net/ipv4/ip_tunnel.c | 9 +
net/ipv4/proc.c | 7 +-
net/ipv4/tcp_input.c | 37 +-
net/ipv4/tcp_ipv4.c | 4 +-
net/ipv6/addrconf.c | 4 +-
net/ipv6/ip6_tunnel.c | 13 +-
net/ipv6/netfilter/nf_conntrack_reasm.c | 100 ++--
net/ipv6/proc.c | 5 +-
net/ipv6/raw.c | 29 +-
net/ipv6/reassembly.c | 212 ++++----
net/netlabel/netlabel_unlabeled.c | 3 +-
sound/hda/hdac_controller.c | 8 +-
sound/soc/codecs/sigmadsp.c | 3 +-
sound/soc/codecs/wm8804-i2c.c | 15 +-
tools/perf/scripts/python/export-to-postgresql.py | 9 +
tools/testing/selftests/efivarfs/config | 1 +
tools/testing/selftests/memory-hotplug/config | 1 +
80 files changed, 1185 insertions(+), 1133 deletions(-)
Hello,
Please picked up this patch for linux 4.4 (backported version).
Indeed, this code will be beneficial to the GNU/Linux distributions that use a longterm kernel.
Compiled/tested without problem.
Thank.
[ Upstream commit 30aba6656f61ed44cba445a3c0d38b296fa9e8f5 ]
From: Salvatore Mesoraca <s.mesoraca16(a)gmail.com>
Date: Thu, 23 Aug 2018 17:00:35 -0700
Subject: namei: allow restricted O_CREAT of FIFOs and regular files
Disallows open of FIFOs or regular files not owned by the user in world
writable sticky directories, unless the owner is the same as that of the
directory or the file is opened without the O_CREAT flag. The purpose
is to make data spoofing attacks harder. This protection can be turned
on and off separately for FIFOs and regular files via sysctl, just like
the symlinks/hardlinks protection. This patch is based on Openwall's
"HARDEN_FIFO" feature by Solar Designer.
This is a brief list of old vulnerabilities that could have been prevented
by this feature, some of them even allow for privilege escalation:
CVE-2000-1134
CVE-2007-3852
CVE-2008-0525
CVE-2009-0416
CVE-2011-4834
CVE-2015-1838
CVE-2015-7442
CVE-2016-7489
This list is not meant to be complete. It's difficult to track down all
vulnerabilities of this kind because they were often reported without any
mention of this particular attack vector. In fact, before
hardlinks/symlinks restrictions, fifos/regular files weren't the favorite
vehicle to exploit them.
[s.mesoraca16(a)gmail.com: fix bug reported by Dan Carpenter]
Link: https://lkml.kernel.org/r/20180426081456.GA7060@mwanda
Link: http://lkml.kernel.org/r/1524829819-11275-1-git-send-email-s.mesoraca16@gma…
[keescook(a)chromium.org: drop pr_warn_ratelimited() in favor of audit changes in the future]
[keescook(a)chromium.org: adjust commit subjet]
Link: http://lkml.kernel.org/r/20180416175918.GA13494@beast
Signed-off-by: Salvatore Mesoraca <s.mesoraca16(a)gmail.com>
Signed-off-by: Kees Cook <keescook(a)chromium.org>
Suggested-by: Solar Designer <solar(a)openwall.com>
Suggested-by: Kees Cook <keescook(a)chromium.org>
Cc: Al Viro <viro(a)zeniv.linux.org.uk>
Cc: Dan Carpenter <dan.carpenter(a)oracle.com>
[backported to 4.4 by Loic]
Cc: Loic <hackurx(a)opensec.fr>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
---
Documentation/sysctl/fs.txt | 36 ++++++++++++++++++++++++++++++
fs/namei.c | 53 ++++++++++++++++++++++++++++++++++++++++++---
include/linux/fs.h | 2 ++
kernel/sysctl.c | 18 +++++++++++++++
4 files changed, 106 insertions(+), 3 deletions(-)
diff -Nurp a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
--- a/Documentation/sysctl/fs.txt 2018-10-20 09:52:38.000000000 +0200
+++ b/Documentation/sysctl/fs.txt 2018-10-23 18:08:20.398649373 +0200
@@ -34,7 +34,9 @@ Currently, these files are in /proc/sys/
- overflowgid
- pipe-user-pages-hard
- pipe-user-pages-soft
+- protected_fifos
- protected_hardlinks
+- protected_regular
- protected_symlinks
- suid_dumpable
- super-max
@@ -182,6 +184,24 @@ applied.
==============================================================
+protected_fifos:
+
+The intent of this protection is to avoid unintentional writes to
+an attacker-controlled FIFO, where a program expected to create a regular
+file.
+
+When set to "0", writing to FIFOs is unrestricted.
+
+When set to "1" don't allow O_CREAT open on FIFOs that we don't own
+in world writable sticky directories, unless they are owned by the
+owner of the directory.
+
+When set to "2" it also applies to group writable sticky directories.
+
+This protection is based on the restrictions in Openwall.
+
+==============================================================
+
protected_hardlinks:
A long-standing class of security issues is the hardlink-based
@@ -202,6 +222,22 @@ This protection is based on the restrict
==============================================================
+protected_regular:
+
+This protection is similar to protected_fifos, but it
+avoids writes to an attacker-controlled regular file, where a program
+expected to create one.
+
+When set to "0", writing to regular files is unrestricted.
+
+When set to "1" don't allow O_CREAT open on regular files that we
+don't own in world writable sticky directories, unless they are
+owned by the owner of the directory.
+
+When set to "2" it also applies to group writable sticky directories.
+
+==============================================================
+
protected_symlinks:
A long-standing class of security issues is the symlink-based
diff -Nurp a/fs/namei.c b/fs/namei.c
--- a/fs/namei.c 2018-10-20 09:52:38.000000000 +0200
+++ b/fs/namei.c 2018-10-23 18:09:35.450879869 +0200
@@ -869,6 +869,8 @@ static inline void put_link(struct namei
int sysctl_protected_symlinks __read_mostly = 0;
int sysctl_protected_hardlinks __read_mostly = 0;
+int sysctl_protected_fifos __read_mostly;
+int sysctl_protected_regular __read_mostly;
/**
* may_follow_link - Check symlink following for unsafe situations
@@ -982,6 +984,45 @@ static int may_linkat(struct path *link)
return -EPERM;
}
+/**
+ * may_create_in_sticky - Check whether an O_CREAT open in a sticky directory
+ * should be allowed, or not, on files that already
+ * exist.
+ * @dir: the sticky parent directory
+ * @inode: the inode of the file to open
+ *
+ * Block an O_CREAT open of a FIFO (or a regular file) when:
+ * - sysctl_protected_fifos (or sysctl_protected_regular) is enabled
+ * - the file already exists
+ * - we are in a sticky directory
+ * - we don't own the file
+ * - the owner of the directory doesn't own the file
+ * - the directory is world writable
+ * If the sysctl_protected_fifos (or sysctl_protected_regular) is set to 2
+ * the directory doesn't have to be world writable: being group writable will
+ * be enough.
+ *
+ * Returns 0 if the open is allowed, -ve on error.
+ */
+static int may_create_in_sticky(struct dentry * const dir,
+ struct inode * const inode)
+{
+ if ((!sysctl_protected_fifos && S_ISFIFO(inode->i_mode)) ||
+ (!sysctl_protected_regular && S_ISREG(inode->i_mode)) ||
+ likely(!(dir->d_inode->i_mode & S_ISVTX)) ||
+ uid_eq(inode->i_uid, dir->d_inode->i_uid) ||
+ uid_eq(current_fsuid(), inode->i_uid))
+ return 0;
+
+ if (likely(dir->d_inode->i_mode & 0002) ||
+ (dir->d_inode->i_mode & 0020 &&
+ ((sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) ||
+ (sysctl_protected_regular >= 2 && S_ISREG(inode->i_mode))))) {
+ return -EACCES;
+ }
+ return 0;
+}
+
static __always_inline
const char *get_link(struct nameidata *nd)
{
@@ -3166,9 +3207,15 @@ finish_open:
error = -ELOOP;
goto out;
}
- error = -EISDIR;
- if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry))
- goto out;
+ if (open_flag & O_CREAT) {
+ error = -EISDIR;
+ if (d_is_dir(nd->path.dentry))
+ goto out;
+ error = may_create_in_sticky(dir,
+ d_backing_inode(nd->path.dentry));
+ if (unlikely(error))
+ goto out;
+ }
error = -ENOTDIR;
if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry))
goto out;
diff -Nurp a/include/linux/fs.h b/include/linux/fs.h
--- a/include/linux/fs.h 2018-10-20 09:52:38.000000000 +0200
+++ b/include/linux/fs.h 2018-10-23 18:08:20.402649386 +0200
@@ -65,6 +65,8 @@ extern struct inodes_stat_t inodes_stat;
extern int leases_enable, lease_break_time;
extern int sysctl_protected_symlinks;
extern int sysctl_protected_hardlinks;
+extern int sysctl_protected_fifos;
+extern int sysctl_protected_regular;
struct buffer_head;
typedef int (get_block_t)(struct inode *inode, sector_t iblock,
diff -Nurp a/kernel/sysctl.c b/kernel/sysctl.c
--- a/kernel/sysctl.c 2018-10-20 09:52:38.000000000 +0200
+++ b/kernel/sysctl.c 2018-10-23 18:08:20.402649386 +0200
@@ -1716,6 +1716,24 @@ static struct ctl_table fs_table[] = {
.extra2 = &one,
},
{
+ .procname = "protected_fifos",
+ .data = &sysctl_protected_fifos,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &two,
+ },
+ {
+ .procname = "protected_regular",
+ .data = &sysctl_protected_regular,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &two,
+ },
+ {
.procname = "suid_dumpable",
.data = &suid_dumpable,
.maxlen = sizeof(int),
Hello,
Please picked up this patch for linux 4.9 and 4.14 (linux 4.4 needs a small modification).
Indeed, this code will be beneficial to the GNU/Linux distributions that use a longterm kernel.
Compiled/tested without problem.
Thank.
[ Upstream commit 30aba6656f61ed44cba445a3c0d38b296fa9e8f5 ]
From: Salvatore Mesoraca <s.mesoraca16(a)gmail.com>
Date: Thu, 23 Aug 2018 17:00:35 -0700
Subject: namei: allow restricted O_CREAT of FIFOs and regular files
Disallows open of FIFOs or regular files not owned by the user in world
writable sticky directories, unless the owner is the same as that of the
directory or the file is opened without the O_CREAT flag. The purpose
is to make data spoofing attacks harder. This protection can be turned
on and off separately for FIFOs and regular files via sysctl, just like
the symlinks/hardlinks protection. This patch is based on Openwall's
"HARDEN_FIFO" feature by Solar Designer.
This is a brief list of old vulnerabilities that could have been prevented
by this feature, some of them even allow for privilege escalation:
CVE-2000-1134
CVE-2007-3852
CVE-2008-0525
CVE-2009-0416
CVE-2011-4834
CVE-2015-1838
CVE-2015-7442
CVE-2016-7489
This list is not meant to be complete. It's difficult to track down all
vulnerabilities of this kind because they were often reported without any
mention of this particular attack vector. In fact, before
hardlinks/symlinks restrictions, fifos/regular files weren't the favorite
vehicle to exploit them.
[s.mesoraca16(a)gmail.com: fix bug reported by Dan Carpenter]
Link: https://lkml.kernel.org/r/20180426081456.GA7060@mwanda
Link: http://lkml.kernel.org/r/1524829819-11275-1-git-send-email-s.mesoraca16@gma…
[keescook(a)chromium.org: drop pr_warn_ratelimited() in favor of audit changes in the future]
[keescook(a)chromium.org: adjust commit subjet]
Link: http://lkml.kernel.org/r/20180416175918.GA13494@beast
Signed-off-by: Salvatore Mesoraca <s.mesoraca16(a)gmail.com>
Signed-off-by: Kees Cook <keescook(a)chromium.org>
Suggested-by: Solar Designer <solar(a)openwall.com>
Suggested-by: Kees Cook <keescook(a)chromium.org>
Cc: Al Viro <viro(a)zeniv.linux.org.uk>
Cc: Dan Carpenter <dan.carpenter(a)oracle.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
---
Documentation/sysctl/fs.txt | 36 ++++++++++++++++++++++++++++++
fs/namei.c | 53 ++++++++++++++++++++++++++++++++++++++++++---
include/linux/fs.h | 2 ++
kernel/sysctl.c | 18 +++++++++++++++
4 files changed, 106 insertions(+), 3 deletions(-)
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
index 6c00c1e2743f..819caf8ca05f 100644
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -34,7 +34,9 @@ Currently, these files are in /proc/sys/fs:
- overflowgid
- pipe-user-pages-hard
- pipe-user-pages-soft
+- protected_fifos
- protected_hardlinks
+- protected_regular
- protected_symlinks
- suid_dumpable
- super-max
@@ -182,6 +184,24 @@ applied.
==============================================================
+protected_fifos:
+
+The intent of this protection is to avoid unintentional writes to
+an attacker-controlled FIFO, where a program expected to create a regular
+file.
+
+When set to "0", writing to FIFOs is unrestricted.
+
+When set to "1" don't allow O_CREAT open on FIFOs that we don't own
+in world writable sticky directories, unless they are owned by the
+owner of the directory.
+
+When set to "2" it also applies to group writable sticky directories.
+
+This protection is based on the restrictions in Openwall.
+
+==============================================================
+
protected_hardlinks:
A long-standing class of security issues is the hardlink-based
@@ -202,6 +222,22 @@ This protection is based on the restrictions in Openwall and grsecurity.
==============================================================
+protected_regular:
+
+This protection is similar to protected_fifos, but it
+avoids writes to an attacker-controlled regular file, where a program
+expected to create one.
+
+When set to "0", writing to regular files is unrestricted.
+
+When set to "1" don't allow O_CREAT open on regular files that we
+don't own in world writable sticky directories, unless they are
+owned by the owner of the directory.
+
+When set to "2" it also applies to group writable sticky directories.
+
+==============================================================
+
protected_symlinks:
A long-standing class of security issues is the symlink-based
diff --git a/fs/namei.c b/fs/namei.c
index ae6aa9ae757c..0cab6494978c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -887,6 +887,8 @@ static inline void put_link(struct nameidata *nd)
int sysctl_protected_symlinks __read_mostly = 0;
int sysctl_protected_hardlinks __read_mostly = 0;
+int sysctl_protected_fifos __read_mostly;
+int sysctl_protected_regular __read_mostly;
/**
* may_follow_link - Check symlink following for unsafe situations
@@ -1003,6 +1005,45 @@ static int may_linkat(struct path *link)
return -EPERM;
}
+/**
+ * may_create_in_sticky - Check whether an O_CREAT open in a sticky directory
+ * should be allowed, or not, on files that already
+ * exist.
+ * @dir: the sticky parent directory
+ * @inode: the inode of the file to open
+ *
+ * Block an O_CREAT open of a FIFO (or a regular file) when:
+ * - sysctl_protected_fifos (or sysctl_protected_regular) is enabled
+ * - the file already exists
+ * - we are in a sticky directory
+ * - we don't own the file
+ * - the owner of the directory doesn't own the file
+ * - the directory is world writable
+ * If the sysctl_protected_fifos (or sysctl_protected_regular) is set to 2
+ * the directory doesn't have to be world writable: being group writable will
+ * be enough.
+ *
+ * Returns 0 if the open is allowed, -ve on error.
+ */
+static int may_create_in_sticky(struct dentry * const dir,
+ struct inode * const inode)
+{
+ if ((!sysctl_protected_fifos && S_ISFIFO(inode->i_mode)) ||
+ (!sysctl_protected_regular && S_ISREG(inode->i_mode)) ||
+ likely(!(dir->d_inode->i_mode & S_ISVTX)) ||
+ uid_eq(inode->i_uid, dir->d_inode->i_uid) ||
+ uid_eq(current_fsuid(), inode->i_uid))
+ return 0;
+
+ if (likely(dir->d_inode->i_mode & 0002) ||
+ (dir->d_inode->i_mode & 0020 &&
+ ((sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) ||
+ (sysctl_protected_regular >= 2 && S_ISREG(inode->i_mode))))) {
+ return -EACCES;
+ }
+ return 0;
+}
+
static __always_inline
const char *get_link(struct nameidata *nd)
{
@@ -3348,9 +3389,15 @@ finish_open:
if (error)
return error;
audit_inode(nd->name, nd->path.dentry, 0);
- error = -EISDIR;
- if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry))
- goto out;
+ if (open_flag & O_CREAT) {
+ error = -EISDIR;
+ if (d_is_dir(nd->path.dentry))
+ goto out;
+ error = may_create_in_sticky(dir,
+ d_backing_inode(nd->path.dentry));
+ if (unlikely(error))
+ goto out;
+ }
error = -ENOTDIR;
if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry))
goto out;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e5710541183b..33322702c910 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -74,6 +74,8 @@ extern struct inodes_stat_t inodes_stat;
extern int leases_enable, lease_break_time;
extern int sysctl_protected_symlinks;
extern int sysctl_protected_hardlinks;
+extern int sysctl_protected_fifos;
+extern int sysctl_protected_regular;
typedef __kernel_rwf_t rwf_t;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 71ceb6c13c1a..cc02050fd0c4 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1807,6 +1807,24 @@ static struct ctl_table fs_table[] = {
.extra1 = &zero,
.extra2 = &one,
},
+ {
+ .procname = "protected_fifos",
+ .data = &sysctl_protected_fifos,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &two,
+ },
+ {
+ .procname = "protected_regular",
+ .data = &sysctl_protected_regular,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &two,
+ },
{
.procname = "suid_dumpable",
.data = &suid_dumpable,
--
cgit 1.2-0.3.lf.el7
From: Parav Pandit <parav(a)mellanox.com>
commit b2bedfb39541a7e14798d066b6f8685d84c8fcf5 upstream
Currently qp->port stores the port number whenever IB_QP_PORT
QP attribute mask is set (during QP state transition to INIT state).
This port number should be stored for the real QP when XRC target QP
is used.
Follow the ib_modify_qp() implementation and hide the access to ->real_qp.
This commit is required for proper operation of commit 65be9cbe1224
("RDMA/uverbs: Expand primary and alt AV port checks") which was added
to 4.14.61.
Without this commit, XRC qp's do not work.
Fixes: a512c2fbef9c ("IB/core: Introduce modify QP operation with udata")
Signed-off-by: Jack Morgenstein <jackm(a)dev.mellanox.co.il>
Signed-off-by: Parav Pandit <parav(a)mellanox.com>
Reviewed-by: Daniel Jurgens <danielj(a)mellanox.com>
Signed-off-by: Leon Romanovsky <leon(a)kernel.org>
Signed-off-by: Jason Gunthorpe <jgg(a)mellanox.com>
---
drivers/infiniband/core/verbs.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index feb80db..6d59af0 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1285,7 +1285,7 @@ int ib_resolve_eth_dmac(struct ib_device *device,
/**
* ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
- * @qp: The QP to modify.
+ * @ib_qp: The QP to modify.
* @attr: On input, specifies the QP attributes to modify. On output,
* the current values of selected QP attributes are returned.
* @attr_mask: A bit-mask used to specify which attributes of the QP
@@ -1294,9 +1294,10 @@ int ib_resolve_eth_dmac(struct ib_device *device,
* are being modified.
* It returns 0 on success and returns appropriate error code on error.
*/
-int ib_modify_qp_with_udata(struct ib_qp *qp, struct ib_qp_attr *attr,
+int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
+ struct ib_qp *qp = ib_qp->real_qp;
int ret;
if (attr_mask & IB_QP_AV) {
--
1.8.1.2
From: Michael J. Ruhl <michael.j.ruhl(a)intel.com>
commit a0e0cb82804a6a21d9067022c2dfdf80d11da429 upstream
pq_update() can only be called in two places: from the completion
function when the complete (npkts) sequence of packets has been
submitted and processed, or from setup function if a subset of the
packets were submitted (i.e. the error path).
Currently both paths can call pq_update() if an error occurrs. This
race will cause the n_req value to go negative, hanging file_close(),
or cause a crash by freeing the txlist more than once.
Several variables are used to determine SDMA send state. Most of
these are unnecessary, and have code inspectible races between the
setup function and the completion function, in both the send path and
the error path.
The request 'status' value can be set by the setup or by the
completion function. This is code inspectibly racy. Since the status
is not needed in the completion code or by the caller it has been
removed.
The request 'done' value races between usage by the setup and the
completion function. The completion function does not need this.
When the number of processed packets matches npkts, it is done.
The 'has_error' value races between usage of the setup and the
completion function. This can cause incorrect error handling and leave
the n_req in an incorrect value (i.e. negative).
Simplify the code by removing all of the unneeded state checks and
variables.
Clean up iovs node when it is freed.
Eliminate race conditions in the error path:
If all packets are submitted, the completion handler will set the
completion status correctly (ok or aborted).
If all packets are not submitted, the caller must wait until the
submitted packets have completed, and then set the completion status.
These two change eliminate the race condition in the error path.
Cc: <stable(a)vger.kernel.org> # 4.14.0
Reviewed-by: Mitko Haralanov <mitko.haralanov(a)intel.com>
Reviewed-by: Mike Marciniszyn <mike.marciniszyn(a)intel.com>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl(a)intel.com>
---
drivers/infiniband/hw/hfi1/user_sdma.c | 87 ++++++++++++++------------------
drivers/infiniband/hw/hfi1/user_sdma.h | 3 -
2 files changed, 39 insertions(+), 51 deletions(-)
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 8c954a0..c14ec04 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -328,7 +328,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
u8 opcode, sc, vl;
u16 pkey;
u32 slid;
- int req_queued = 0;
u16 dlid;
u32 selector;
@@ -392,7 +391,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
req->data_len = 0;
req->pq = pq;
req->cq = cq;
- req->status = -1;
req->ahg_idx = -1;
req->iov_idx = 0;
req->sent = 0;
@@ -400,12 +398,14 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
req->seqcomp = 0;
req->seqsubmitted = 0;
req->tids = NULL;
- req->done = 0;
req->has_error = 0;
INIT_LIST_HEAD(&req->txps);
memcpy(&req->info, &info, sizeof(info));
+ /* The request is initialized, count it */
+ atomic_inc(&pq->n_reqs);
+
if (req_opcode(info.ctrl) == EXPECTED) {
/* expected must have a TID info and at least one data vector */
if (req->data_iovs < 2) {
@@ -500,7 +500,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
ret = pin_vector_pages(req, &req->iovs[i]);
if (ret) {
req->data_iovs = i;
- req->status = ret;
goto free_req;
}
req->data_len += req->iovs[i].iov.iov_len;
@@ -561,14 +560,10 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
req->ahg_idx = sdma_ahg_alloc(req->sde);
set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
- atomic_inc(&pq->n_reqs);
- req_queued = 1;
/* Send the first N packets in the request to buy us some time */
ret = user_sdma_send_pkts(req, pcount);
- if (unlikely(ret < 0 && ret != -EBUSY)) {
- req->status = ret;
+ if (unlikely(ret < 0 && ret != -EBUSY))
goto free_req;
- }
/*
* It is possible that the SDMA engine would have processed all the
@@ -588,14 +583,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
while (req->seqsubmitted != req->info.npkts) {
ret = user_sdma_send_pkts(req, pcount);
if (ret < 0) {
- if (ret != -EBUSY) {
- req->status = ret;
- WRITE_ONCE(req->has_error, 1);
- if (ACCESS_ONCE(req->seqcomp) ==
- req->seqsubmitted - 1)
- goto free_req;
- return ret;
- }
+ if (ret != -EBUSY)
+ goto free_req;
wait_event_interruptible_timeout(
pq->busy.wait_dma,
(pq->state == SDMA_PKT_Q_ACTIVE),
@@ -606,10 +595,19 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
*count += idx;
return 0;
free_req:
- user_sdma_free_request(req, true);
- if (req_queued)
+ /*
+ * If the submitted seqsubmitted == npkts, the completion routine
+ * controls the final state. If sequbmitted < npkts, wait for any
+ * outstanding packets to finish before cleaning up.
+ */
+ if (req->seqsubmitted < req->info.npkts) {
+ if (req->seqsubmitted)
+ wait_event(pq->busy.wait_dma,
+ (req->seqcomp == req->seqsubmitted - 1));
+ user_sdma_free_request(req, true);
pq_update(pq);
- set_comp_state(pq, cq, info.comp_idx, ERROR, req->status);
+ set_comp_state(pq, cq, info.comp_idx, ERROR, ret);
+ }
return ret;
}
@@ -917,7 +915,6 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count);
req->seqsubmitted += count;
if (req->seqsubmitted == req->info.npkts) {
- WRITE_ONCE(req->done, 1);
/*
* The txreq has already been submitted to the HW queue
* so we can free the AHG entry now. Corruption will not
@@ -1347,11 +1344,15 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
return diff;
}
-/*
- * SDMA tx request completion callback. Called when the SDMA progress
- * state machine gets notification that the SDMA descriptors for this
- * tx request have been processed by the DMA engine. Called in
- * interrupt context.
+/**
+ * user_sdma_txreq_cb() - SDMA tx request completion callback.
+ * @txreq: valid sdma tx request
+ * @status: success/failure of request
+ *
+ * Called when the SDMA progress state machine gets notification that
+ * the SDMA descriptors for this tx request have been processed by the
+ * DMA engine. Called in interrupt context.
+ * Only do work on completed sequences.
*/
static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
{
@@ -1360,7 +1361,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
struct user_sdma_request *req;
struct hfi1_user_sdma_pkt_q *pq;
struct hfi1_user_sdma_comp_q *cq;
- u16 idx;
+ enum hfi1_sdma_comp_state state = COMPLETE;
if (!tx->req)
return;
@@ -1373,31 +1374,19 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
SDMA_DBG(req, "SDMA completion with error %d",
status);
WRITE_ONCE(req->has_error, 1);
+ state = ERROR;
}
req->seqcomp = tx->seqnum;
kmem_cache_free(pq->txreq_cache, tx);
- tx = NULL;
-
- idx = req->info.comp_idx;
- if (req->status == -1 && status == SDMA_TXREQ_S_OK) {
- if (req->seqcomp == req->info.npkts - 1) {
- req->status = 0;
- user_sdma_free_request(req, false);
- pq_update(pq);
- set_comp_state(pq, cq, idx, COMPLETE, 0);
- }
- } else {
- if (status != SDMA_TXREQ_S_OK)
- req->status = status;
- if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) &&
- (READ_ONCE(req->done) ||
- READ_ONCE(req->has_error))) {
- user_sdma_free_request(req, false);
- pq_update(pq);
- set_comp_state(pq, cq, idx, ERROR, req->status);
- }
- }
+
+ /* sequence isn't complete? We are done */
+ if (req->seqcomp != req->info.npkts - 1)
+ return;
+
+ user_sdma_free_request(req, false);
+ set_comp_state(pq, cq, req->info.comp_idx, state, status);
+ pq_update(pq);
}
static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
@@ -1430,6 +1419,8 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
if (!node)
continue;
+ req->iovs[i].node = NULL;
+
if (unpin)
hfi1_mmu_rb_remove(req->pq->handler,
&node->rb);
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
index 9b8bb56..5af5233 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h
@@ -196,8 +196,6 @@ struct user_sdma_request {
/* Writeable fields shared with interrupt */
u64 seqcomp ____cacheline_aligned_in_smp;
u64 seqsubmitted;
- /* status of the last txreq completed */
- int status;
/* Send side fields */
struct list_head txps ____cacheline_aligned_in_smp;
@@ -219,7 +217,6 @@ struct user_sdma_request {
u16 tididx;
/* progress index moving along the iovs array */
u8 iov_idx;
- u8 done;
u8 has_error;
struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ];