I'm announcing the release of the 4.9.258 kernel.
All users of the 4.9 kernel series must upgrade.
The updated 4.9.y git tree can be found at: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-4.9.y and can be browsed at the normal kernel.org git web browser: https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git%3Ba=summa...
thanks,
greg k-h
------------
Makefile | 9 arch/arm/boot/dts/lpc32xx.dtsi | 3 arch/arm/xen/p2m.c | 6 arch/h8300/kernel/asm-offsets.c | 3 arch/x86/Makefile | 6 arch/x86/xen/p2m.c | 15 - drivers/block/xen-blkback/blkback.c | 30 +- drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c | 3 drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 3 drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 5 drivers/net/xen-netback/netback.c | 4 drivers/net/xen-netback/rx.c | 9 drivers/remoteproc/qcom_q6v5_pil.c | 6 drivers/scsi/qla2xxx/qla_tmpl.c | 9 drivers/scsi/qla2xxx/qla_tmpl.h | 2 drivers/usb/dwc3/ulpi.c | 20 + drivers/xen/gntdev.c | 33 +- drivers/xen/xen-scsiback.c | 4 fs/fs-writeback.c | 2 fs/overlayfs/copy_up.c | 15 - fs/squashfs/export.c | 41 ++- fs/squashfs/id.c | 40 ++- fs/squashfs/squashfs_fs_sb.h | 1 fs/squashfs/super.c | 6 fs/squashfs/xattr.h | 10 fs/squashfs/xattr_id.c | 66 ++++- include/linux/backing-dev.h | 10 include/linux/ftrace.h | 4 include/linux/memcontrol.h | 33 ++ include/linux/netdevice.h | 2 include/linux/string.h | 4 include/linux/sunrpc/xdr.h | 3 include/trace/events/writeback.h | 35 +- include/xen/grant_table.h | 1 kernel/bpf/stackmap.c | 2 kernel/futex.c | 233 +++++++++++++++---- kernel/trace/ftrace.c | 2 kernel/trace/trace.c | 2 kernel/trace/trace_events.c | 3 lib/string.c | 47 +++ mm/backing-dev.c | 1 mm/memblock.c | 48 --- mm/memcontrol.c | 43 ++- mm/page-writeback.c | 14 - net/key/af_key.c | 6 net/netfilter/nf_conntrack_core.c | 3 net/netfilter/xt_recent.c | 12 net/sunrpc/auth_gss/auth_gss.c | 30 -- net/sunrpc/auth_gss/auth_gss_internal.h | 45 +++ net/sunrpc/auth_gss/gss_krb5_mech.c | 31 -- net/vmw_vsock/af_vsock.c | 13 - net/vmw_vsock/virtio_transport_common.c | 4 scripts/Makefile.build | 3 virt/kvm/kvm_main.c | 3 54 files changed, 680 insertions(+), 308 deletions(-)
Alexandre Belloni (1): ARM: dts: lpc32xx: Revert set default clock rate of HCLK PLL
Amir Goldstein (1): ovl: skip getxattr of security labels
Andi Kleen (1): trace: Use -mcount-record for dynamic ftrace
Arun Easi (1): scsi: qla2xxx: Fix crash during driver load on big endian machines
Borislav Petkov (1): x86/build: Disable CET instrumentation in the kernel for 32-bit too
Bui Quang Minh (1): bpf: Check for integer overflow when using roundup_pow_of_two()
Cong Wang (1): af_key: relax availability checks for skb size calculation
Dave Wysochanski (2): SUNRPC: Move simple_get_bytes and simple_get_netobj into private header SUNRPC: Handle 0 length opaque XDR object data properly
Edwin Peer (1): net: watchdog: hold device global xmit lock during tx disable
Emmanuel Grumbach (1): iwlwifi: pcie: add a NULL check in iwl_pcie_txq_unmap
Felipe Balbi (1): usb: dwc3: ulpi: fix checkpatch warning
Florian Westphal (1): netfilter: conntrack: skip identical origin tuple in same zone only
Greg Kroah-Hartman (1): Linux 4.9.258
Greg Thelen (1): tracing: Fix SKIP_STACK_VALIDATION=1 build due to bad merge with -mrecord-mcount
Jan Beulich (8): Xen/x86: don't bail early from clear_foreign_p2m_mapping() Xen/x86: also check kernel mapping in set_foreign_p2m_mapping() Xen/gntdev: correct dev_bus_addr handling in gntdev_map_grant_pages() Xen/gntdev: correct error checking in gntdev_map_grant_pages() xen-blkback: don't "handle" error by BUG() xen-netback: don't "handle" error by BUG() xen-scsiback: don't "handle" error by BUG() xen-blkback: fix error handling in xen_blkbk_map()
Johannes Berg (2): iwlwifi: mvm: take mutex for calling iwl_mvm_get_sync_time() iwlwifi: mvm: guard against device removal in reprobe
Johannes Weiner (1): mm: memcontrol: fix NULL pointer crash in test_clear_page_writeback()
Jozsef Kadlecsik (1): netfilter: xt_recent: Fix attempt to update deleted entry
Juergen Gross (1): xen/netback: avoid race in xenvif_rx_ring_slots_available()
Lai Jiangshan (1): kvm: check tlbs_dirty directly
Norbert Slusarek (1): net/vmw_vsock: improve locking in vsock_connect_timeout()
Peter Zijlstra (1): futex: Change locking rules
Phillip Lougher (3): squashfs: add more sanity checks in id lookup squashfs: add more sanity checks in inode lookup squashfs: add more sanity checks in xattr id lookup
Qian Cai (1): include/trace/events/writeback.h: fix -Wstringop-truncation warnings
Randy Dunlap (1): h8300: fix PREEMPTION build, TI_PRE_COUNT undefined
Roman Gushchin (1): memblock: do not start bottom-up allocations with kernel_end
Serge Semin (1): usb: dwc3: ulpi: Replace CPU-based busyloop with Protocol-based one
Sibi Sankar (1): remoteproc: qcom_q6v5_mss: Validate MBA firmware size before load
Stefano Garzarella (2): vsock/virtio: update credit only if socket is not closed vsock: fix locking in vsock_shutdown()
Stefano Stabellini (1): xen/arm: don't ignore return errors from set_phys_to_machine
Steven Rostedt (VMware) (3): fgraph: Initialize tracing_graph_pause at task creation tracing: Do not count ftrace events in top level enable output tracing: Check length before giving out the filter buffer
Theodore Ts'o (1): memcg: fix a crash in wb_workfn when a device disappears
Thomas Gleixner (2): futex: Ensure the correct return value from futex_lock_pi() futex: Cure exit race
Tobin C. Harding (1): lib/string: Add strscpy_pad() function
Vasily Gorbik (1): tracing: Avoid calling cc-option -mrecord-mcount for every Makefile
diff --git a/Makefile b/Makefile index e53096154f81..e5955f122ffd 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 257 +SUBLEVEL = 258 EXTRAVERSION = NAME = Roaring Lionus
@@ -762,6 +762,13 @@ ifdef CONFIG_FUNCTION_TRACER ifndef CC_FLAGS_FTRACE CC_FLAGS_FTRACE := -pg endif +ifdef CONFIG_FTRACE_MCOUNT_RECORD + # gcc 5 supports generating the mcount tables directly + ifeq ($(call cc-option-yn,-mrecord-mcount),y) + CC_FLAGS_FTRACE += -mrecord-mcount + export CC_USING_RECORD_MCOUNT := 1 + endif +endif export CC_FLAGS_FTRACE ifdef CONFIG_HAVE_FENTRY CC_USING_FENTRY := $(call cc-option, -mfentry -DCC_USING_FENTRY) diff --git a/arch/arm/boot/dts/lpc32xx.dtsi b/arch/arm/boot/dts/lpc32xx.dtsi index 2802c9565b6c..976a75a4eb2c 100644 --- a/arch/arm/boot/dts/lpc32xx.dtsi +++ b/arch/arm/boot/dts/lpc32xx.dtsi @@ -323,9 +323,6 @@
clocks = <&xtal_32k>, <&xtal>; clock-names = "xtal_32k", "xtal"; - - assigned-clocks = <&clk LPC32XX_CLK_HCLK_PLL>; - assigned-clock-rates = <208000000>; }; };
diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c index 0ed01f2d5ee4..02579e6569f0 100644 --- a/arch/arm/xen/p2m.c +++ b/arch/arm/xen/p2m.c @@ -93,8 +93,10 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, for (i = 0; i < count; i++) { if (map_ops[i].status) continue; - set_phys_to_machine(map_ops[i].host_addr >> XEN_PAGE_SHIFT, - map_ops[i].dev_bus_addr >> XEN_PAGE_SHIFT); + if (unlikely(!set_phys_to_machine(map_ops[i].host_addr >> XEN_PAGE_SHIFT, + map_ops[i].dev_bus_addr >> XEN_PAGE_SHIFT))) { + return -ENOMEM; + } }
return 0; diff --git a/arch/h8300/kernel/asm-offsets.c b/arch/h8300/kernel/asm-offsets.c index dc2d16ce8a0d..3e33a9844d99 100644 --- a/arch/h8300/kernel/asm-offsets.c +++ b/arch/h8300/kernel/asm-offsets.c @@ -62,6 +62,9 @@ int main(void) OFFSET(TI_FLAGS, thread_info, flags); OFFSET(TI_CPU, thread_info, cpu); OFFSET(TI_PRE, thread_info, preempt_count); +#ifdef CONFIG_PREEMPTION + DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count)); +#endif
return 0; } diff --git a/arch/x86/Makefile b/arch/x86/Makefile index a95d414663b1..9f0099c46c88 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -61,6 +61,9 @@ endif KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow KBUILD_CFLAGS += $(call cc-option,-mno-avx,)
+# Intel CET isn't enabled in the kernel +KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none) + ifeq ($(CONFIG_X86_32),y) BITS := 32 UTS_MACHINE := i386 @@ -137,9 +140,6 @@ else KBUILD_CFLAGS += -mno-red-zone KBUILD_CFLAGS += -mcmodel=kernel
- # Intel CET isn't enabled in the kernel - KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none) - # -funit-at-a-time shrinks the kernel .text considerably # unfortunately it makes reading oopses harder. KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time) diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 37129db76d33..fbf8508e558a 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -725,7 +725,8 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, unsigned long mfn, pfn;
/* Do not add to override if the map failed. */ - if (map_ops[i].status) + if (map_ops[i].status != GNTST_okay || + (kmap_ops && kmap_ops[i].status != GNTST_okay)) continue;
if (map_ops[i].flags & GNTMAP_contains_pte) { @@ -763,17 +764,15 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i])); unsigned long pfn = page_to_pfn(pages[i]);
- if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) { + if (mfn != INVALID_P2M_ENTRY && (mfn & FOREIGN_FRAME_BIT)) + set_phys_to_machine(pfn, INVALID_P2M_ENTRY); + else ret = -EINVAL; - goto out; - } - - set_phys_to_machine(pfn, INVALID_P2M_ENTRY); } if (kunmap_ops) ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, - kunmap_ops, count); -out: + kunmap_ops, count) ?: ret; + return ret; } EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping); diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 4f643a87f9c7..2b739ba841b1 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -843,8 +843,11 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring, pages[i]->page = persistent_gnt->page; pages[i]->persistent_gnt = persistent_gnt; } else { - if (get_free_page(ring, &pages[i]->page)) - goto out_of_memory; + if (get_free_page(ring, &pages[i]->page)) { + put_free_pages(ring, pages_to_gnt, segs_to_map); + ret = -ENOMEM; + goto out; + } addr = vaddr(pages[i]->page); pages_to_gnt[segs_to_map] = pages[i]->page; pages[i]->persistent_gnt = NULL; @@ -860,10 +863,8 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring, break; }
- if (segs_to_map) { + if (segs_to_map) ret = gnttab_map_refs(map, NULL, pages_to_gnt, segs_to_map); - BUG_ON(ret); - }
/* * Now swizzle the MFN in our domain with the MFN from the other domain @@ -878,7 +879,7 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring, pr_debug("invalid buffer -- could not remap it\n"); put_free_pages(ring, &pages[seg_idx]->page, 1); pages[seg_idx]->handle = BLKBACK_INVALID_HANDLE; - ret |= 1; + ret |= !ret; goto next; } pages[seg_idx]->handle = map[new_map_idx].handle; @@ -930,17 +931,18 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring, } segs_to_map = 0; last_map = map_until; - if (map_until != num) + if (!ret && map_until != num) goto again;
- return ret; - -out_of_memory: - pr_alert("%s: out of memory\n", __func__); - put_free_pages(ring, pages_to_gnt, segs_to_map); - for (i = last_map; i < num; i++) +out: + for (i = last_map; i < num; i++) { + /* Don't zap current batch's valid persistent grants. */ + if(i >= last_map + segs_to_map) + pages[i]->persistent_gnt = NULL; pages[i]->handle = BLKBACK_INVALID_HANDLE; - return -ENOMEM; + } + + return ret; }
static int xen_blkbk_map_seg(struct pending_req *pending_req) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c index f4d75ffe3d8a..7f01fb91ea66 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c @@ -518,7 +518,10 @@ static ssize_t iwl_dbgfs_os_device_timediff_read(struct file *file, const size_t bufsz = sizeof(buf); int pos = 0;
+ mutex_lock(&mvm->mutex); iwl_mvm_get_sync_time(mvm, &curr_gp2, &curr_os); + mutex_unlock(&mvm->mutex); + do_div(curr_os, NSEC_PER_USEC); diff = curr_os - curr_gp2; pos += scnprintf(buf + pos, bufsz - pos, "diff=%lld\n", diff); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 6d38eec3f9d3..a78aaf17116e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -1104,6 +1104,7 @@ static void iwl_mvm_reprobe_wk(struct work_struct *wk) reprobe = container_of(wk, struct iwl_mvm_reprobe, work); if (device_reprobe(reprobe->dev)) dev_err(reprobe->dev, "reprobe failed!\n"); + put_device(reprobe->dev); kfree(reprobe); module_put(THIS_MODULE); } @@ -1202,7 +1203,7 @@ void iwl_mvm_nic_restart(struct iwl_mvm *mvm, bool fw_error) module_put(THIS_MODULE); return; } - reprobe->dev = mvm->trans->dev; + reprobe->dev = get_device(mvm->trans->dev); INIT_WORK(&reprobe->work, iwl_mvm_reprobe_wk); schedule_work(&reprobe->work); } else if (mvm->cur_ucode == IWL_UCODE_REGULAR) { diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 174e45d78c46..ff564198d2ce 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -676,6 +676,11 @@ static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id) struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_txq *txq = &trans_pcie->txq[txq_id];
+ if (!txq) { + IWL_ERR(trans, "Trying to free a queue that wasn't allocated?\n"); + return; + } + spin_lock_bh(&txq->lock); while (txq->write_ptr != txq->read_ptr) { IWL_DEBUG_TX_REPLY(trans, "Q %d Free %d\n", diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index fd2ac6cd0c69..0024200c30ce 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1328,13 +1328,11 @@ int xenvif_tx_action(struct xenvif_queue *queue, int budget) return 0;
gnttab_batch_copy(queue->tx_copy_ops, nr_cops); - if (nr_mops != 0) { + if (nr_mops != 0) ret = gnttab_map_refs(queue->tx_map_ops, NULL, queue->pages_to_map, nr_mops); - BUG_ON(ret); - }
work_done = xenvif_tx_submit(queue);
diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index f152246c7dfb..ddfb1cfa2dd9 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -38,10 +38,15 @@ static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) RING_IDX prod, cons; struct sk_buff *skb; int needed; + unsigned long flags; + + spin_lock_irqsave(&queue->rx_queue.lock, flags);
skb = skb_peek(&queue->rx_queue); - if (!skb) + if (!skb) { + spin_unlock_irqrestore(&queue->rx_queue.lock, flags); return false; + }
needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE); if (skb_is_gso(skb)) @@ -49,6 +54,8 @@ static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) if (skb->sw_hash) needed++;
+ spin_unlock_irqrestore(&queue->rx_queue.lock, flags); + do { prod = queue->rx.sring->req_prod; cons = queue->rx.req_cons; diff --git a/drivers/remoteproc/qcom_q6v5_pil.c b/drivers/remoteproc/qcom_q6v5_pil.c index 2e0caaaa766a..72fc33bba99c 100644 --- a/drivers/remoteproc/qcom_q6v5_pil.c +++ b/drivers/remoteproc/qcom_q6v5_pil.c @@ -193,6 +193,12 @@ static int q6v5_load(struct rproc *rproc, const struct firmware *fw) { struct q6v5 *qproc = rproc->priv;
+ /* MBA is restricted to a maximum size of 1M */ + if (fw->size > qproc->mba_size || fw->size > SZ_1M) { + dev_err(qproc->dev, "MBA firmware load failed\n"); + return -EINVAL; + } + memcpy(qproc->mba_region, fw->data, fw->size);
return 0; diff --git a/drivers/scsi/qla2xxx/qla_tmpl.c b/drivers/scsi/qla2xxx/qla_tmpl.c index 9c2c7fe61280..ba83c36b76bd 100644 --- a/drivers/scsi/qla2xxx/qla_tmpl.c +++ b/drivers/scsi/qla2xxx/qla_tmpl.c @@ -878,7 +878,8 @@ qla27xx_template_checksum(void *p, ulong size) static inline int qla27xx_verify_template_checksum(struct qla27xx_fwdt_template *tmp) { - return qla27xx_template_checksum(tmp, tmp->template_size) == 0; + return qla27xx_template_checksum(tmp, + le32_to_cpu(tmp->template_size)) == 0; }
static inline int @@ -894,7 +895,7 @@ qla27xx_execute_fwdt_template(struct scsi_qla_host *vha) ulong len;
if (qla27xx_fwdt_template_valid(tmp)) { - len = tmp->template_size; + len = le32_to_cpu(tmp->template_size); tmp = memcpy(vha->hw->fw_dump, tmp, len); ql27xx_edit_template(vha, tmp); qla27xx_walk_template(vha, tmp, tmp, &len); @@ -910,7 +911,7 @@ qla27xx_fwdt_calculate_dump_size(struct scsi_qla_host *vha) ulong len = 0;
if (qla27xx_fwdt_template_valid(tmp)) { - len = tmp->template_size; + len = le32_to_cpu(tmp->template_size); qla27xx_walk_template(vha, tmp, NULL, &len); }
@@ -922,7 +923,7 @@ qla27xx_fwdt_template_size(void *p) { struct qla27xx_fwdt_template *tmp = p;
- return tmp->template_size; + return le32_to_cpu(tmp->template_size); }
ulong diff --git a/drivers/scsi/qla2xxx/qla_tmpl.h b/drivers/scsi/qla2xxx/qla_tmpl.h index 141c1c5e73f4..2d3e1a8349b3 100644 --- a/drivers/scsi/qla2xxx/qla_tmpl.h +++ b/drivers/scsi/qla2xxx/qla_tmpl.h @@ -13,7 +13,7 @@ struct __packed qla27xx_fwdt_template { uint32_t template_type; uint32_t entry_offset; - uint32_t template_size; + __le32 template_size; uint32_t reserved_1;
uint32_t entry_count; diff --git a/drivers/usb/dwc3/ulpi.c b/drivers/usb/dwc3/ulpi.c index bd86f84f3790..3862edf59f7d 100644 --- a/drivers/usb/dwc3/ulpi.c +++ b/drivers/usb/dwc3/ulpi.c @@ -10,6 +10,8 @@ * published by the Free Software Foundation. */
+#include <linux/delay.h> +#include <linux/time64.h> #include <linux/ulpi/regs.h>
#include "core.h" @@ -20,12 +22,22 @@ DWC3_GUSB2PHYACC_ADDR(ULPI_ACCESS_EXTENDED) | \ DWC3_GUSB2PHYACC_EXTEND_ADDR(a) : DWC3_GUSB2PHYACC_ADDR(a))
-static int dwc3_ulpi_busyloop(struct dwc3 *dwc) +#define DWC3_ULPI_BASE_DELAY DIV_ROUND_UP(NSEC_PER_SEC, 60000000L) + +static int dwc3_ulpi_busyloop(struct dwc3 *dwc, u8 addr, bool read) { - unsigned count = 1000; + unsigned long ns = 5L * DWC3_ULPI_BASE_DELAY; + unsigned int count = 1000; u32 reg;
+ if (addr >= ULPI_EXT_VENDOR_SPECIFIC) + ns += DWC3_ULPI_BASE_DELAY; + + if (read) + ns += DWC3_ULPI_BASE_DELAY; + while (count--) { + ndelay(ns); reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYACC(0)); if (!(reg & DWC3_GUSB2PHYACC_BUSY)) return 0; @@ -44,7 +56,7 @@ static int dwc3_ulpi_read(struct device *dev, u8 addr) reg = DWC3_GUSB2PHYACC_NEWREGREQ | DWC3_ULPI_ADDR(addr); dwc3_writel(dwc->regs, DWC3_GUSB2PHYACC(0), reg);
- ret = dwc3_ulpi_busyloop(dwc); + ret = dwc3_ulpi_busyloop(dwc, addr, true); if (ret) return ret;
@@ -62,7 +74,7 @@ static int dwc3_ulpi_write(struct device *dev, u8 addr, u8 val) reg |= DWC3_GUSB2PHYACC_WRITE | val; dwc3_writel(dwc->regs, DWC3_GUSB2PHYACC(0), reg);
- return dwc3_ulpi_busyloop(dwc); + return dwc3_ulpi_busyloop(dwc, addr, false); }
static const struct ulpi_ops dwc3_ulpi_ops = { diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 910b5d40c6e9..69d59102ff1b 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -293,36 +293,47 @@ static int map_grant_pages(struct grant_map *map) * to the kernel linear addresses of the struct pages. * These ptes are completely different from the user ptes dealt * with find_grant_ptes. + * Note that GNTMAP_device_map isn't needed here: The + * dev_bus_addr output field gets consumed only from ->map_ops, + * and by not requesting it when mapping we also avoid needing + * to mirror dev_bus_addr into ->unmap_ops (and holding an extra + * reference to the page in the hypervisor). */ + unsigned int flags = (map->flags & ~GNTMAP_device_map) | + GNTMAP_host_map; + for (i = 0; i < map->count; i++) { unsigned long address = (unsigned long) pfn_to_kaddr(page_to_pfn(map->pages[i])); BUG_ON(PageHighMem(map->pages[i]));
- gnttab_set_map_op(&map->kmap_ops[i], address, - map->flags | GNTMAP_host_map, + gnttab_set_map_op(&map->kmap_ops[i], address, flags, map->grants[i].ref, map->grants[i].domid); gnttab_set_unmap_op(&map->kunmap_ops[i], address, - map->flags | GNTMAP_host_map, -1); + flags, -1); } }
pr_debug("map %d+%d\n", map->index, map->count); err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL, map->pages, map->count); - if (err) - return err;
for (i = 0; i < map->count; i++) { - if (map->map_ops[i].status) { + if (map->map_ops[i].status == GNTST_okay) + map->unmap_ops[i].handle = map->map_ops[i].handle; + else if (!err) err = -EINVAL; - continue; - }
- map->unmap_ops[i].handle = map->map_ops[i].handle; - if (use_ptemod) - map->kunmap_ops[i].handle = map->kmap_ops[i].handle; + if (map->flags & GNTMAP_device_map) + map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr; + + if (use_ptemod) { + if (map->kmap_ops[i].status == GNTST_okay) + map->kunmap_ops[i].handle = map->kmap_ops[i].handle; + else if (!err) + err = -EINVAL; + } } return err; } diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index 3243d917651a..4bba877ef547 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -423,12 +423,12 @@ static int scsiback_gnttab_data_map_batch(struct gnttab_map_grant_ref *map, return 0;
err = gnttab_map_refs(map, NULL, pg, cnt); - BUG_ON(err); for (i = 0; i < cnt; i++) { if (unlikely(map[i].status != GNTST_okay)) { pr_err("invalid buffer -- could not remap it\n"); map[i].handle = SCSIBACK_INVALID_HANDLE; - err = -ENOMEM; + if (!err) + err = -ENOMEM; } else { get_page(pg[i]); } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index f978ae2bb846..2de656ecc48b 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1971,7 +1971,7 @@ void wb_workfn(struct work_struct *work) struct bdi_writeback, dwork); long pages_written;
- set_worker_desc("flush-%s", dev_name(wb->bdi->dev)); + set_worker_desc("flush-%s", bdi_dev_name(wb->bdi)); current->flags |= PF_SWAPWRITE;
if (likely(!current_is_workqueue_rescuer() || diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 299dbf59f28f..3a583aa1fafe 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -92,6 +92,14 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new)
if (ovl_is_private_xattr(name)) continue; + + error = security_inode_copy_up_xattr(name); + if (error < 0 && error != -EOPNOTSUPP) + break; + if (error == 1) { + error = 0; + continue; /* Discard */ + } retry: size = vfs_getxattr(old, name, value, value_size); if (size == -ERANGE) @@ -115,13 +123,6 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new) goto retry; }
- error = security_inode_copy_up_xattr(name); - if (error < 0 && error != -EOPNOTSUPP) - break; - if (error == 1) { - error = 0; - continue; /* Discard */ - } error = vfs_setxattr(new, name, value, size, 0); if (error) break; diff --git a/fs/squashfs/export.c b/fs/squashfs/export.c index 8073b6532cf0..d2a806416c3a 100644 --- a/fs/squashfs/export.c +++ b/fs/squashfs/export.c @@ -54,12 +54,17 @@ static long long squashfs_inode_lookup(struct super_block *sb, int ino_num) struct squashfs_sb_info *msblk = sb->s_fs_info; int blk = SQUASHFS_LOOKUP_BLOCK(ino_num - 1); int offset = SQUASHFS_LOOKUP_BLOCK_OFFSET(ino_num - 1); - u64 start = le64_to_cpu(msblk->inode_lookup_table[blk]); + u64 start; __le64 ino; int err;
TRACE("Entered squashfs_inode_lookup, inode_number = %d\n", ino_num);
+ if (ino_num == 0 || (ino_num - 1) >= msblk->inodes) + return -EINVAL; + + start = le64_to_cpu(msblk->inode_lookup_table[blk]); + err = squashfs_read_metadata(sb, &ino, &start, &offset, sizeof(ino)); if (err < 0) return err; @@ -124,7 +129,10 @@ __le64 *squashfs_read_inode_lookup_table(struct super_block *sb, u64 lookup_table_start, u64 next_table, unsigned int inodes) { unsigned int length = SQUASHFS_LOOKUP_BLOCK_BYTES(inodes); + unsigned int indexes = SQUASHFS_LOOKUP_BLOCKS(inodes); + int n; __le64 *table; + u64 start, end;
TRACE("In read_inode_lookup_table, length %d\n", length);
@@ -134,20 +142,37 @@ __le64 *squashfs_read_inode_lookup_table(struct super_block *sb, if (inodes == 0) return ERR_PTR(-EINVAL);
- /* length bytes should not extend into the next table - this check - * also traps instances where lookup_table_start is incorrectly larger - * than the next table start + /* + * The computed size of the lookup table (length bytes) should exactly + * match the table start and end points */ - if (lookup_table_start + length > next_table) + if (length != (next_table - lookup_table_start)) return ERR_PTR(-EINVAL);
table = squashfs_read_table(sb, lookup_table_start, length); + if (IS_ERR(table)) + return table;
/* - * table[0] points to the first inode lookup table metadata block, - * this should be less than lookup_table_start + * table0], table[1], ... table[indexes - 1] store the locations + * of the compressed inode lookup blocks. Each entry should be + * less than the next (i.e. table[0] < table[1]), and the difference + * between them should be SQUASHFS_METADATA_SIZE or less. + * table[indexes - 1] should be less than lookup_table_start, and + * again the difference should be SQUASHFS_METADATA_SIZE or less */ - if (!IS_ERR(table) && le64_to_cpu(table[0]) >= lookup_table_start) { + for (n = 0; n < (indexes - 1); n++) { + start = le64_to_cpu(table[n]); + end = le64_to_cpu(table[n + 1]); + + if (start >= end || (end - start) > SQUASHFS_METADATA_SIZE) { + kfree(table); + return ERR_PTR(-EINVAL); + } + } + + start = le64_to_cpu(table[indexes - 1]); + if (start >= lookup_table_start || (lookup_table_start - start) > SQUASHFS_METADATA_SIZE) { kfree(table); return ERR_PTR(-EINVAL); } diff --git a/fs/squashfs/id.c b/fs/squashfs/id.c index d38ea3dab951..8ccc0e3f6ea5 100644 --- a/fs/squashfs/id.c +++ b/fs/squashfs/id.c @@ -48,10 +48,15 @@ int squashfs_get_id(struct super_block *sb, unsigned int index, struct squashfs_sb_info *msblk = sb->s_fs_info; int block = SQUASHFS_ID_BLOCK(index); int offset = SQUASHFS_ID_BLOCK_OFFSET(index); - u64 start_block = le64_to_cpu(msblk->id_table[block]); + u64 start_block; __le32 disk_id; int err;
+ if (index >= msblk->ids) + return -EINVAL; + + start_block = le64_to_cpu(msblk->id_table[block]); + err = squashfs_read_metadata(sb, &disk_id, &start_block, &offset, sizeof(disk_id)); if (err < 0) @@ -69,7 +74,10 @@ __le64 *squashfs_read_id_index_table(struct super_block *sb, u64 id_table_start, u64 next_table, unsigned short no_ids) { unsigned int length = SQUASHFS_ID_BLOCK_BYTES(no_ids); + unsigned int indexes = SQUASHFS_ID_BLOCKS(no_ids); + int n; __le64 *table; + u64 start, end;
TRACE("In read_id_index_table, length %d\n", length);
@@ -80,20 +88,36 @@ __le64 *squashfs_read_id_index_table(struct super_block *sb, return ERR_PTR(-EINVAL);
/* - * length bytes should not extend into the next table - this check - * also traps instances where id_table_start is incorrectly larger - * than the next table start + * The computed size of the index table (length bytes) should exactly + * match the table start and end points */ - if (id_table_start + length > next_table) + if (length != (next_table - id_table_start)) return ERR_PTR(-EINVAL);
table = squashfs_read_table(sb, id_table_start, length); + if (IS_ERR(table)) + return table;
/* - * table[0] points to the first id lookup table metadata block, this - * should be less than id_table_start + * table[0], table[1], ... table[indexes - 1] store the locations + * of the compressed id blocks. Each entry should be less than + * the next (i.e. table[0] < table[1]), and the difference between them + * should be SQUASHFS_METADATA_SIZE or less. table[indexes - 1] + * should be less than id_table_start, and again the difference + * should be SQUASHFS_METADATA_SIZE or less */ - if (!IS_ERR(table) && le64_to_cpu(table[0]) >= id_table_start) { + for (n = 0; n < (indexes - 1); n++) { + start = le64_to_cpu(table[n]); + end = le64_to_cpu(table[n + 1]); + + if (start >= end || (end - start) > SQUASHFS_METADATA_SIZE) { + kfree(table); + return ERR_PTR(-EINVAL); + } + } + + start = le64_to_cpu(table[indexes - 1]); + if (start >= id_table_start || (id_table_start - start) > SQUASHFS_METADATA_SIZE) { kfree(table); return ERR_PTR(-EINVAL); } diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index ef69c31947bf..5234c19a0eab 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h @@ -77,5 +77,6 @@ struct squashfs_sb_info { unsigned int inodes; unsigned int fragments; int xattr_ids; + unsigned int ids; }; #endif diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 1516bb779b8d..5abc9d03397c 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -176,6 +176,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) msblk->directory_table = le64_to_cpu(sblk->directory_table_start); msblk->inodes = le32_to_cpu(sblk->inodes); msblk->fragments = le32_to_cpu(sblk->fragments); + msblk->ids = le16_to_cpu(sblk->no_ids); flags = le16_to_cpu(sblk->flags);
TRACE("Found valid superblock on %pg\n", sb->s_bdev); @@ -187,7 +188,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) TRACE("Block size %d\n", msblk->block_size); TRACE("Number of inodes %d\n", msblk->inodes); TRACE("Number of fragments %d\n", msblk->fragments); - TRACE("Number of ids %d\n", le16_to_cpu(sblk->no_ids)); + TRACE("Number of ids %d\n", msblk->ids); TRACE("sblk->inode_table_start %llx\n", msblk->inode_table); TRACE("sblk->directory_table_start %llx\n", msblk->directory_table); TRACE("sblk->fragment_table_start %llx\n", @@ -244,8 +245,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) allocate_id_index_table: /* Allocate and read id index table */ msblk->id_table = squashfs_read_id_index_table(sb, - le64_to_cpu(sblk->id_table_start), next_table, - le16_to_cpu(sblk->no_ids)); + le64_to_cpu(sblk->id_table_start), next_table, msblk->ids); if (IS_ERR(msblk->id_table)) { ERROR("unable to read id index table\n"); err = PTR_ERR(msblk->id_table); diff --git a/fs/squashfs/xattr.h b/fs/squashfs/xattr.h index afe70f815e3d..86b0a0073e51 100644 --- a/fs/squashfs/xattr.h +++ b/fs/squashfs/xattr.h @@ -30,8 +30,16 @@ extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *, static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 start, u64 *xattr_table_start, int *xattr_ids) { + struct squashfs_xattr_id_table *id_table; + + id_table = squashfs_read_table(sb, start, sizeof(*id_table)); + if (IS_ERR(id_table)) + return (__le64 *) id_table; + + *xattr_table_start = le64_to_cpu(id_table->xattr_table_start); + kfree(id_table); + ERROR("Xattrs in filesystem, these will be ignored\n"); - *xattr_table_start = start; return ERR_PTR(-ENOTSUPP); }
diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c index c89607d690c4..3a655d879600 100644 --- a/fs/squashfs/xattr_id.c +++ b/fs/squashfs/xattr_id.c @@ -44,10 +44,15 @@ int squashfs_xattr_lookup(struct super_block *sb, unsigned int index, struct squashfs_sb_info *msblk = sb->s_fs_info; int block = SQUASHFS_XATTR_BLOCK(index); int offset = SQUASHFS_XATTR_BLOCK_OFFSET(index); - u64 start_block = le64_to_cpu(msblk->xattr_id_table[block]); + u64 start_block; struct squashfs_xattr_id id; int err;
+ if (index >= msblk->xattr_ids) + return -EINVAL; + + start_block = le64_to_cpu(msblk->xattr_id_table[block]); + err = squashfs_read_metadata(sb, &id, &start_block, &offset, sizeof(id)); if (err < 0) @@ -63,13 +68,17 @@ int squashfs_xattr_lookup(struct super_block *sb, unsigned int index, /* * Read uncompressed xattr id lookup table indexes from disk into memory */ -__le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 start, +__le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start, u64 *xattr_table_start, int *xattr_ids) { - unsigned int len; + struct squashfs_sb_info *msblk = sb->s_fs_info; + unsigned int len, indexes; struct squashfs_xattr_id_table *id_table; + __le64 *table; + u64 start, end; + int n;
- id_table = squashfs_read_table(sb, start, sizeof(*id_table)); + id_table = squashfs_read_table(sb, table_start, sizeof(*id_table)); if (IS_ERR(id_table)) return (__le64 *) id_table;
@@ -83,13 +92,52 @@ __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 start, if (*xattr_ids == 0) return ERR_PTR(-EINVAL);
- /* xattr_table should be less than start */ - if (*xattr_table_start >= start) + len = SQUASHFS_XATTR_BLOCK_BYTES(*xattr_ids); + indexes = SQUASHFS_XATTR_BLOCKS(*xattr_ids); + + /* + * The computed size of the index table (len bytes) should exactly + * match the table start and end points + */ + start = table_start + sizeof(*id_table); + end = msblk->bytes_used; + + if (len != (end - start)) return ERR_PTR(-EINVAL);
- len = SQUASHFS_XATTR_BLOCK_BYTES(*xattr_ids); + table = squashfs_read_table(sb, start, len); + if (IS_ERR(table)) + return table; + + /* table[0], table[1], ... table[indexes - 1] store the locations + * of the compressed xattr id blocks. Each entry should be less than + * the next (i.e. table[0] < table[1]), and the difference between them + * should be SQUASHFS_METADATA_SIZE or less. table[indexes - 1] + * should be less than table_start, and again the difference + * shouls be SQUASHFS_METADATA_SIZE or less. + * + * Finally xattr_table_start should be less than table[0]. + */ + for (n = 0; n < (indexes - 1); n++) { + start = le64_to_cpu(table[n]); + end = le64_to_cpu(table[n + 1]); + + if (start >= end || (end - start) > SQUASHFS_METADATA_SIZE) { + kfree(table); + return ERR_PTR(-EINVAL); + } + } + + start = le64_to_cpu(table[indexes - 1]); + if (start >= table_start || (table_start - start) > SQUASHFS_METADATA_SIZE) { + kfree(table); + return ERR_PTR(-EINVAL); + }
- TRACE("In read_xattr_index_table, length %d\n", len); + if (*xattr_table_start >= le64_to_cpu(table[0])) { + kfree(table); + return ERR_PTR(-EINVAL); + }
- return squashfs_read_table(sb, start + sizeof(*id_table), len); + return table; } diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 63f17b106a4a..57db558c9a61 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -12,6 +12,7 @@ #include <linux/fs.h> #include <linux/sched.h> #include <linux/blkdev.h> +#include <linux/device.h> #include <linux/writeback.h> #include <linux/blk-cgroup.h> #include <linux/backing-dev-defs.h> @@ -517,4 +518,13 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi) (1 << WB_async_congested)); }
+extern const char *bdi_unknown_name; + +static inline const char *bdi_dev_name(struct backing_dev_info *bdi) +{ + if (!bdi || !bdi->dev) + return bdi_unknown_name; + return dev_name(bdi->dev); +} + #endif /* _LINUX_BACKING_DEV_H */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b3d34d3e0e7e..9b8b014d13af 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -783,7 +783,9 @@ typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
/* for init task */ -#define INIT_FTRACE_GRAPH .ret_stack = NULL, +#define INIT_FTRACE_GRAPH \ + .ret_stack = NULL, \ + .tracing_graph_pause = ATOMIC_INIT(0),
/* * Stack of return addresses for functions diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 8b35bdbdc214..fd77f8303ab9 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -490,9 +490,21 @@ bool mem_cgroup_oom_synchronize(bool wait); extern int do_swap_account; #endif
-void lock_page_memcg(struct page *page); +struct mem_cgroup *lock_page_memcg(struct page *page); +void __unlock_page_memcg(struct mem_cgroup *memcg); void unlock_page_memcg(struct page *page);
+static inline void __mem_cgroup_update_page_stat(struct page *page, + struct mem_cgroup *memcg, + enum mem_cgroup_stat_index idx, + int val) +{ + VM_BUG_ON(!(rcu_read_lock_held() || PageLocked(page))); + + if (memcg && memcg->stat) + this_cpu_add(memcg->stat->count[idx], val); +} + /** * mem_cgroup_update_page_stat - update page state statistics * @page: the page @@ -508,13 +520,12 @@ void unlock_page_memcg(struct page *page); * mem_cgroup_update_page_stat(page, state, -1); * unlock_page(page) or unlock_page_memcg(page) */ + static inline void mem_cgroup_update_page_stat(struct page *page, enum mem_cgroup_stat_index idx, int val) { - VM_BUG_ON(!(rcu_read_lock_held() || PageLocked(page)));
- if (page->mem_cgroup) - this_cpu_add(page->mem_cgroup->stat->count[idx], val); + __mem_cgroup_update_page_stat(page, page->mem_cgroup, idx, val); }
static inline void mem_cgroup_inc_page_stat(struct page *page, @@ -709,7 +720,12 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) { }
-static inline void lock_page_memcg(struct page *page) +static inline struct mem_cgroup *lock_page_memcg(struct page *page) +{ + return NULL; +} + +static inline void __unlock_page_memcg(struct mem_cgroup *memcg) { }
@@ -745,6 +761,13 @@ static inline void mem_cgroup_update_page_stat(struct page *page, { }
+static inline void __mem_cgroup_update_page_stat(struct page *page, + struct mem_cgroup *memcg, + enum mem_cgroup_stat_index idx, + int nr) +{ +} + static inline void mem_cgroup_inc_page_stat(struct page *page, enum mem_cgroup_stat_index idx) { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4d1b1056ac97..2aacafe2bce5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3701,6 +3701,7 @@ static inline void netif_tx_disable(struct net_device *dev)
local_bh_disable(); cpu = smp_processor_id(); + spin_lock(&dev->tx_global_lock); for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
@@ -3708,6 +3709,7 @@ static inline void netif_tx_disable(struct net_device *dev) netif_tx_stop_queue(txq); __netif_tx_unlock(txq); } + spin_unlock(&dev->tx_global_lock); local_bh_enable(); }
diff --git a/include/linux/string.h b/include/linux/string.h index 42eed573ebb6..66a91f5a3449 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -29,6 +29,10 @@ size_t strlcpy(char *, const char *, size_t); #ifndef __HAVE_ARCH_STRSCPY ssize_t strscpy(char *, const char *, size_t); #endif + +/* Wraps calls to strscpy()/memset(), no arch specific code required */ +ssize_t strscpy_pad(char *dest, const char *src, size_t count); + #ifndef __HAVE_ARCH_STRCAT extern char * strcat(char *, const char *); #endif diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 56c48c884a24..bf0db32b40aa 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -23,8 +23,7 @@ #define XDR_QUADLEN(l) (((l) + 3) >> 2)
/* - * Generic opaque `network object.' At the kernel level, this type - * is used only by lockd. + * Generic opaque `network object.' */ #define XDR_MAX_NETOBJ 1024 struct xdr_netobj { diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index ec964a924cd2..49a72adc7135 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -65,8 +65,9 @@ TRACE_EVENT(writeback_dirty_page, ),
TP_fast_assign( - strncpy(__entry->name, - mapping ? dev_name(inode_to_bdi(mapping->host)->dev) : "(unknown)", 32); + strscpy_pad(__entry->name, + bdi_dev_name(mapping ? inode_to_bdi(mapping->host) : + NULL), 32); __entry->ino = mapping ? mapping->host->i_ino : 0; __entry->index = page->index; ), @@ -95,8 +96,7 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template, struct backing_dev_info *bdi = inode_to_bdi(inode);
/* may be called for files on pseudo FSes w/ unregistered bdi */ - strncpy(__entry->name, - bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32); + strscpy_pad(__entry->name, bdi_dev_name(bdi), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->flags = flags; @@ -175,8 +175,8 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template, ),
TP_fast_assign( - strncpy(__entry->name, - dev_name(inode_to_bdi(inode)->dev), 32); + strscpy_pad(__entry->name, + bdi_dev_name(inode_to_bdi(inode)), 32); __entry->ino = inode->i_ino; __entry->sync_mode = wbc->sync_mode; __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc); @@ -219,8 +219,7 @@ DECLARE_EVENT_CLASS(writeback_work_class, __field(unsigned int, cgroup_ino) ), TP_fast_assign( - strncpy(__entry->name, - wb->bdi->dev ? dev_name(wb->bdi->dev) : "(unknown)", 32); + strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->nr_pages = work->nr_pages; __entry->sb_dev = work->sb ? work->sb->s_dev : 0; __entry->sync_mode = work->sync_mode; @@ -273,7 +272,7 @@ DECLARE_EVENT_CLASS(writeback_class, __field(unsigned int, cgroup_ino) ), TP_fast_assign( - strncpy(__entry->name, dev_name(wb->bdi->dev), 32); + strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); ), TP_printk("bdi %s: cgroup_ino=%u", @@ -296,7 +295,7 @@ TRACE_EVENT(writeback_bdi_register, __array(char, name, 32) ), TP_fast_assign( - strncpy(__entry->name, dev_name(bdi->dev), 32); + strscpy_pad(__entry->name, bdi_dev_name(bdi), 32); ), TP_printk("bdi %s", __entry->name @@ -321,7 +320,7 @@ DECLARE_EVENT_CLASS(wbc_class, ),
TP_fast_assign( - strncpy(__entry->name, dev_name(bdi->dev), 32); + strscpy_pad(__entry->name, bdi_dev_name(bdi), 32); __entry->nr_to_write = wbc->nr_to_write; __entry->pages_skipped = wbc->pages_skipped; __entry->sync_mode = wbc->sync_mode; @@ -372,7 +371,7 @@ TRACE_EVENT(writeback_queue_io, __field(unsigned int, cgroup_ino) ), TP_fast_assign( - strncpy(__entry->name, dev_name(wb->bdi->dev), 32); + strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->older = dirtied_before; __entry->age = (jiffies - dirtied_before) * 1000 / HZ; __entry->moved = moved; @@ -457,7 +456,7 @@ TRACE_EVENT(bdi_dirty_ratelimit, ),
TP_fast_assign( - strlcpy(__entry->bdi, dev_name(wb->bdi->dev), 32); + strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32); __entry->write_bw = KBps(wb->write_bandwidth); __entry->avg_write_bw = KBps(wb->avg_write_bandwidth); __entry->dirty_rate = KBps(dirty_rate); @@ -522,7 +521,7 @@ TRACE_EVENT(balance_dirty_pages,
TP_fast_assign( unsigned long freerun = (thresh + bg_thresh) / 2; - strlcpy(__entry->bdi, dev_name(wb->bdi->dev), 32); + strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32);
__entry->limit = global_wb_domain.dirty_limit; __entry->setpoint = (global_wb_domain.dirty_limit + @@ -582,8 +581,8 @@ TRACE_EVENT(writeback_sb_inodes_requeue, ),
TP_fast_assign( - strncpy(__entry->name, - dev_name(inode_to_bdi(inode)->dev), 32); + strscpy_pad(__entry->name, + bdi_dev_name(inode_to_bdi(inode)), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->dirtied_when = inode->dirtied_when; @@ -656,8 +655,8 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template, ),
TP_fast_assign( - strncpy(__entry->name, - dev_name(inode_to_bdi(inode)->dev), 32); + strscpy_pad(__entry->name, + bdi_dev_name(inode_to_bdi(inode)), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->dirtied_when = inode->dirtied_when; diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 34b1379f9777..f9d8aac170fb 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -157,6 +157,7 @@ gnttab_set_map_op(struct gnttab_map_grant_ref *map, phys_addr_t addr, map->flags = flags; map->ref = ref; map->dom = domid; + map->status = 1; /* arbitrary positive value */ }
static inline void diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index a2a232dec236..2fdf6f96f976 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -70,6 +70,8 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
/* hash table size must be power of 2 */ n_buckets = roundup_pow_of_two(attr->max_entries); + if (!n_buckets) + return ERR_PTR(-E2BIG);
cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); if (cost >= U32_MAX - PAGE_SIZE) diff --git a/kernel/futex.c b/kernel/futex.c index 83db5787c67e..b65dbb5d60bb 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1019,6 +1019,39 @@ static void exit_pi_state_list(struct task_struct *curr) * [10] There is no transient state which leaves owner and user space * TID out of sync. Except one error case where the kernel is denied * write access to the user address, see fixup_pi_state_owner(). + * + * + * Serialization and lifetime rules: + * + * hb->lock: + * + * hb -> futex_q, relation + * futex_q -> pi_state, relation + * + * (cannot be raw because hb can contain arbitrary amount + * of futex_q's) + * + * pi_mutex->wait_lock: + * + * {uval, pi_state} + * + * (and pi_mutex 'obviously') + * + * p->pi_lock: + * + * p->pi_state_list -> pi_state->list, relation + * + * pi_state->refcount: + * + * pi_state lifetime + * + * + * Lock order: + * + * hb->lock + * pi_mutex->wait_lock + * p->pi_lock + * */
/* @@ -1026,10 +1059,12 @@ static void exit_pi_state_list(struct task_struct *curr) * the pi_state against the user space value. If correct, attach to * it. */ -static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state, +static int attach_to_pi_state(u32 __user *uaddr, u32 uval, + struct futex_pi_state *pi_state, struct futex_pi_state **ps) { pid_t pid = uval & FUTEX_TID_MASK; + int ret, uval2;
/* * Userspace might have messed up non-PI and PI futexes [3] @@ -1037,8 +1072,33 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state, if (unlikely(!pi_state)) return -EINVAL;
+ /* + * We get here with hb->lock held, and having found a + * futex_top_waiter(). This means that futex_lock_pi() of said futex_q + * has dropped the hb->lock in between queue_me() and unqueue_me_pi(), + * which in turn means that futex_lock_pi() still has a reference on + * our pi_state. + */ WARN_ON(!atomic_read(&pi_state->refcount));
+ /* + * Now that we have a pi_state, we can acquire wait_lock + * and do the state validation. + */ + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); + + /* + * Since {uval, pi_state} is serialized by wait_lock, and our current + * uval was read without holding it, it can have changed. Verify it + * still is what we expect it to be, otherwise retry the entire + * operation. + */ + if (get_futex_value_locked(&uval2, uaddr)) + goto out_efault; + + if (uval != uval2) + goto out_eagain; + /* * Handle the owner died case: */ @@ -1054,11 +1114,11 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state, * is not 0. Inconsistent state. [5] */ if (pid) - return -EINVAL; + goto out_einval; /* * Take a ref on the state and return success. [4] */ - goto out_state; + goto out_attach; }
/* @@ -1070,14 +1130,14 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state, * Take a ref on the state and return success. [6] */ if (!pid) - goto out_state; + goto out_attach; } else { /* * If the owner died bit is not set, then the pi_state * must have an owner. [7] */ if (!pi_state->owner) - return -EINVAL; + goto out_einval; }
/* @@ -1086,11 +1146,29 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state, * user space TID. [9/10] */ if (pid != task_pid_vnr(pi_state->owner)) - return -EINVAL; -out_state: + goto out_einval; + +out_attach: atomic_inc(&pi_state->refcount); + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); *ps = pi_state; return 0; + +out_einval: + ret = -EINVAL; + goto out_error; + +out_eagain: + ret = -EAGAIN; + goto out_error; + +out_efault: + ret = -EFAULT; + goto out_error; + +out_error: + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); + return ret; }
/** @@ -1123,11 +1201,67 @@ static void wait_for_owner_exiting(int ret, struct task_struct *exiting) put_task_struct(exiting); }
+static int handle_exit_race(u32 __user *uaddr, u32 uval, + struct task_struct *tsk) +{ + u32 uval2; + + /* + * If the futex exit state is not yet FUTEX_STATE_DEAD, wait + * for it to finish. + */ + if (tsk && tsk->futex_state != FUTEX_STATE_DEAD) + return -EAGAIN; + + /* + * Reread the user space value to handle the following situation: + * + * CPU0 CPU1 + * + * sys_exit() sys_futex() + * do_exit() futex_lock_pi() + * futex_lock_pi_atomic() + * exit_signals(tsk) No waiters: + * tsk->flags |= PF_EXITING; *uaddr == 0x00000PID + * mm_release(tsk) Set waiter bit + * exit_robust_list(tsk) { *uaddr = 0x80000PID; + * Set owner died attach_to_pi_owner() { + * *uaddr = 0xC0000000; tsk = get_task(PID); + * } if (!tsk->flags & PF_EXITING) { + * ... attach(); + * tsk->futex_state = } else { + * FUTEX_STATE_DEAD; if (tsk->futex_state != + * FUTEX_STATE_DEAD) + * return -EAGAIN; + * return -ESRCH; <--- FAIL + * } + * + * Returning ESRCH unconditionally is wrong here because the + * user space value has been changed by the exiting task. + * + * The same logic applies to the case where the exiting task is + * already gone. + */ + if (get_futex_value_locked(&uval2, uaddr)) + return -EFAULT; + + /* If the user space value has changed, try again. */ + if (uval2 != uval) + return -EAGAIN; + + /* + * The exiting task did not have a robust list, the robust list was + * corrupted or the user space value in *uaddr is simply bogus. + * Give up and tell user space. + */ + return -ESRCH; +} + /* * Lookup the task for the TID provided from user space and attach to * it after doing proper sanity checks. */ -static int attach_to_pi_owner(u32 uval, union futex_key *key, +static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key, struct futex_pi_state **ps, struct task_struct **exiting) { @@ -1138,12 +1272,15 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key, /* * We are the first waiter - try to look up the real owner and attach * the new pi_state to it, but bail out when TID = 0 [1] + * + * The !pid check is paranoid. None of the call sites should end up + * with pid == 0, but better safe than sorry. Let the caller retry */ if (!pid) - return -ESRCH; + return -EAGAIN; p = futex_find_get_task(pid); if (!p) - return -ESRCH; + return handle_exit_race(uaddr, uval, NULL);
if (unlikely(p->flags & PF_KTHREAD)) { put_task_struct(p); @@ -1162,7 +1299,7 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key, * FUTEX_STATE_DEAD, we know that the task has finished * the cleanup: */ - int ret = (p->futex_state = FUTEX_STATE_DEAD) ? -ESRCH : -EAGAIN; + int ret = handle_exit_race(uaddr, uval, p);
raw_spin_unlock_irq(&p->pi_lock); /* @@ -1183,6 +1320,9 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
/* * No existing pi state. First waiter. [2] + * + * This creates pi_state, we have hb->lock held, this means nothing can + * observe this state, wait_lock is irrelevant. */ pi_state = alloc_pi_state();
@@ -1207,7 +1347,8 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key, return 0; }
-static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, +static int lookup_pi_state(u32 __user *uaddr, u32 uval, + struct futex_hash_bucket *hb, union futex_key *key, struct futex_pi_state **ps, struct task_struct **exiting) { @@ -1218,13 +1359,13 @@ static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, * attach to the pi_state when the validation succeeds. */ if (match) - return attach_to_pi_state(uval, match->pi_state, ps); + return attach_to_pi_state(uaddr, uval, match->pi_state, ps);
/* * We are the first waiter - try to look up the owner based on * @uval and attach to it. */ - return attach_to_pi_owner(uval, key, ps, exiting); + return attach_to_pi_owner(uaddr, uval, key, ps, exiting); }
static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) @@ -1237,7 +1378,7 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))) return -EFAULT;
- /*If user space value changed, let the caller retry */ + /* If user space value changed, let the caller retry */ return curval != uval ? -EAGAIN : 0; }
@@ -1301,7 +1442,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, */ match = futex_top_waiter(hb, key); if (match) - return attach_to_pi_state(uval, match->pi_state, ps); + return attach_to_pi_state(uaddr, uval, match->pi_state, ps);
/* * No waiter and user TID is 0. We are here because the @@ -1340,7 +1481,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, * attach to the owner. If that fails, no harm done, we only * set the FUTEX_WAITERS bit in the user space variable. */ - return attach_to_pi_owner(uval, key, ps, exiting); + return attach_to_pi_owner(uaddr, newval, key, ps, exiting); }
/** @@ -1441,6 +1582,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) { ret = -EFAULT; + } else if (curval != uval) { /* * If a unconditional UNLOCK_PI operation (user space did not @@ -1977,7 +2119,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, * If that call succeeds then we have pi_state and an * initial refcount on it. */ - ret = lookup_pi_state(ret, hb2, &key2, + ret = lookup_pi_state(uaddr2, ret, hb2, &key2, &pi_state, &exiting); }
@@ -2282,7 +2424,6 @@ static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, int err = 0;
oldowner = pi_state->owner; - /* Owner died? */ if (!pi_state->owner) newtid |= FUTEX_OWNER_DIED; @@ -2305,11 +2446,10 @@ static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, * because we can fault here. Imagine swapped out pages or a fork * that marked all the anonymous memory readonly for cow. * - * Modifying pi_state _before_ the user space value would - * leave the pi_state in an inconsistent state when we fault - * here, because we need to drop the hash bucket lock to - * handle the fault. This might be observed in the PID check - * in lookup_pi_state. + * Modifying pi_state _before_ the user space value would leave the + * pi_state in an inconsistent state when we fault here, because we + * need to drop the locks to handle the fault. This might be observed + * in the PID check in lookup_pi_state. */ retry: if (!argowner) { @@ -2322,7 +2462,7 @@ static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, }
if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) { - /* We got the lock after all, nothing to fix. */ + /* We got the lock. pi_state is correct. Tell caller. */ return 1; }
@@ -2364,24 +2504,29 @@ static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, */ pi_state_update_owner(pi_state, newowner);
- return 0; + return argowner == current;
/* - * To handle the page fault we need to drop the hash bucket - * lock here. That gives the other task (either the highest priority - * waiter itself or the task which stole the rtmutex) the - * chance to try the fixup of the pi_state. So once we are - * back from handling the fault we need to check the pi_state - * after reacquiring the hash bucket lock and before trying to - * do another fixup. When the fixup has been done already we - * simply return. + * To handle the page fault we need to drop the locks here. That gives + * the other task (either the highest priority waiter itself or the + * task which stole the rtmutex) the chance to try the fixup of the + * pi_state. So once we are back from handling the fault we need to + * check the pi_state after reacquiring the locks and before trying to + * do another fixup. When the fixup has been done already we simply + * return. + * + * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely + * drop hb->lock since the caller owns the hb -> futex_q relation. + * Dropping the pi_mutex->wait_lock requires the state revalidate. */ handle_fault: + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); spin_unlock(q->lock_ptr);
err = fault_in_user_writeable(uaddr);
spin_lock(q->lock_ptr); + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
/* * Check if someone else fixed it for us: @@ -2447,8 +2592,6 @@ static long futex_wait_restart(struct restart_block *restart); */ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) { - int ret = 0; - if (locked) { /* * Got the lock. We might not be the anticipated owner if we @@ -2459,8 +2602,8 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) * stable state, anything else needs more attention. */ if (q->pi_state->owner != current) - ret = fixup_pi_state_owner(uaddr, q, current); - goto out; + return fixup_pi_state_owner(uaddr, q, current); + return 1; }
/* @@ -2471,10 +2614,8 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) * Another speculative read; pi_state->owner == current is unstable * but needs our attention. */ - if (q->pi_state->owner == current) { - ret = fixup_pi_state_owner(uaddr, q, NULL); - goto out; - } + if (q->pi_state->owner == current) + return fixup_pi_state_owner(uaddr, q, NULL);
/* * Paranoia check. If we did not take the lock, then we should not be @@ -2483,8 +2624,7 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current)) return fixup_pi_state_owner(uaddr, q, current);
-out: - return ret ? ret : locked; + return 0; }
/** @@ -3106,6 +3246,11 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, */ put_pi_state(q.pi_state); spin_unlock(q.lock_ptr); + /* + * Adjust the return value. It's either -EFAULT or + * success (1) but the caller expects 0 for success. + */ + ret = ret < 0 ? ret : 0; } } else { struct rt_mutex *pi_mutex; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 16ca877745f6..ce49b62b0834 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5803,7 +5803,6 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) }
if (t->ret_stack == NULL) { - atomic_set(&t->tracing_graph_pause, 0); atomic_set(&t->trace_overrun, 0); t->curr_ret_stack = -1; /* Make sure the tasks see the -1 first: */ @@ -6015,7 +6014,6 @@ static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack); static void graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack) { - atomic_set(&t->tracing_graph_pause, 0); atomic_set(&t->trace_overrun, 0); t->ftrace_timestamp = 0; /* make curr_ret_stack visible before we add the ret_stack */ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 005929d13c7d..b87ab105fa22 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2090,7 +2090,7 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb, (entry = this_cpu_read(trace_buffered_event))) { /* Try to use the per cpu buffer first */ val = this_cpu_inc_return(trace_buffered_event_cnt); - if (val == 1) { + if ((len < (PAGE_SIZE - sizeof(*entry))) && val == 1) { trace_event_setup(entry, type, flags, pc); entry->array[0] = len; return entry; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 5bf072e437c4..1499b2c2799c 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1105,7 +1105,8 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, mutex_lock(&event_mutex); list_for_each_entry(file, &tr->events, list) { call = file->event_call; - if (!trace_event_name(call) || !call->class || !call->class->reg) + if ((call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) || + !trace_event_name(call) || !call->class || !call->class->reg) continue;
if (system && strcmp(call->class->system, system->name) != 0) diff --git a/lib/string.c b/lib/string.c index d099762a9bd6..8fe13371aed7 100644 --- a/lib/string.c +++ b/lib/string.c @@ -157,11 +157,9 @@ EXPORT_SYMBOL(strlcpy); * @src: Where to copy the string from * @count: Size of destination buffer * - * Copy the string, or as much of it as fits, into the dest buffer. - * The routine returns the number of characters copied (not including - * the trailing NUL) or -E2BIG if the destination buffer wasn't big enough. - * The behavior is undefined if the string buffers overlap. - * The destination buffer is always NUL terminated, unless it's zero-sized. + * Copy the string, or as much of it as fits, into the dest buffer. The + * behavior is undefined if the string buffers overlap. The destination + * buffer is always NUL terminated, unless it's zero-sized. * * Preferred to strlcpy() since the API doesn't require reading memory * from the src string beyond the specified "count" bytes, and since @@ -171,8 +169,10 @@ EXPORT_SYMBOL(strlcpy); * * Preferred to strncpy() since it always returns a valid string, and * doesn't unnecessarily force the tail of the destination buffer to be - * zeroed. If the zeroing is desired, it's likely cleaner to use strscpy() - * with an overflow test, then just memset() the tail of the dest buffer. + * zeroed. If zeroing is desired please use strscpy_pad(). + * + * Return: The number of characters copied (not including the trailing + * %NUL) or -E2BIG if the destination buffer wasn't big enough. */ ssize_t strscpy(char *dest, const char *src, size_t count) { @@ -259,6 +259,39 @@ char *stpcpy(char *__restrict__ dest, const char *__restrict__ src) } EXPORT_SYMBOL(stpcpy);
+/** + * strscpy_pad() - Copy a C-string into a sized buffer + * @dest: Where to copy the string to + * @src: Where to copy the string from + * @count: Size of destination buffer + * + * Copy the string, or as much of it as fits, into the dest buffer. The + * behavior is undefined if the string buffers overlap. The destination + * buffer is always %NUL terminated, unless it's zero-sized. + * + * If the source string is shorter than the destination buffer, zeros + * the tail of the destination buffer. + * + * For full explanation of why you may want to consider using the + * 'strscpy' functions please see the function docstring for strscpy(). + * + * Return: The number of characters copied (not including the trailing + * %NUL) or -E2BIG if the destination buffer wasn't big enough. + */ +ssize_t strscpy_pad(char *dest, const char *src, size_t count) +{ + ssize_t written; + + written = strscpy(dest, src, count); + if (written < 0 || written == count - 1) + return written; + + memset(dest + written + 1, 0, count - written - 1); + + return written; +} +EXPORT_SYMBOL(strscpy_pad); + #ifndef __HAVE_ARCH_STRCAT /** * strcat - Append one %NUL-terminated string to another diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 113b7d317079..aad61d0175a1 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -21,6 +21,7 @@ struct backing_dev_info noop_backing_dev_info = { EXPORT_SYMBOL_GPL(noop_backing_dev_info);
static struct class *bdi_class; +const char *bdi_unknown_name = "(unknown)";
/* * bdi_lock protects updates to bdi_list. bdi_list has RCU reader side diff --git a/mm/memblock.c b/mm/memblock.c index 42b98af6a415..e43065b13c08 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -186,14 +186,6 @@ __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end, * * Find @size free area aligned to @align in the specified range and node. * - * When allocation direction is bottom-up, the @start should be greater - * than the end of the kernel image. Otherwise, it will be trimmed. The - * reason is that we want the bottom-up allocation just near the kernel - * image so it is highly likely that the allocated memory and the kernel - * will reside in the same node. - * - * If bottom-up allocation failed, will try to allocate memory top-down. - * * RETURNS: * Found address on success, 0 on failure. */ @@ -201,8 +193,6 @@ phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size, phys_addr_t align, phys_addr_t start, phys_addr_t end, int nid, ulong flags) { - phys_addr_t kernel_end, ret; - /* pump up @end */ if (end == MEMBLOCK_ALLOC_ACCESSIBLE) end = memblock.current_limit; @@ -210,39 +200,13 @@ phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size, /* avoid allocating the first page */ start = max_t(phys_addr_t, start, PAGE_SIZE); end = max(start, end); - kernel_end = __pa_symbol(_end); - - /* - * try bottom-up allocation only when bottom-up mode - * is set and @end is above the kernel image. - */ - if (memblock_bottom_up() && end > kernel_end) { - phys_addr_t bottom_up_start; - - /* make sure we will allocate above the kernel */ - bottom_up_start = max(start, kernel_end);
- /* ok, try bottom-up allocation first */ - ret = __memblock_find_range_bottom_up(bottom_up_start, end, - size, align, nid, flags); - if (ret) - return ret; - - /* - * we always limit bottom-up allocation above the kernel, - * but top-down allocation doesn't have the limit, so - * retrying top-down allocation may succeed when bottom-up - * allocation failed. - * - * bottom-up allocation is expected to be fail very rarely, - * so we use WARN_ONCE() here to see the stack trace if - * fail happens. - */ - WARN_ONCE(1, "memblock: bottom-up allocation failed, memory hotunplug may be affected\n"); - } - - return __memblock_find_range_top_down(start, end, size, align, nid, - flags); + if (memblock_bottom_up()) + return __memblock_find_range_bottom_up(start, end, size, align, + nid, flags); + else + return __memblock_find_range_top_down(start, end, size, align, + nid, flags); }
/** diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d4232744c59f..27b0b4f03fcd 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1638,9 +1638,13 @@ bool mem_cgroup_oom_synchronize(bool handle) * @page: the page * * This function protects unlocked LRU pages from being moved to - * another cgroup and stabilizes their page->mem_cgroup binding. + * another cgroup. + * + * It ensures lifetime of the returned memcg. Caller is responsible + * for the lifetime of the page; __unlock_page_memcg() is available + * when @page might get freed inside the locked section. */ -void lock_page_memcg(struct page *page) +struct mem_cgroup *lock_page_memcg(struct page *page) { struct mem_cgroup *memcg; unsigned long flags; @@ -1649,18 +1653,24 @@ void lock_page_memcg(struct page *page) * The RCU lock is held throughout the transaction. The fast * path can get away without acquiring the memcg->move_lock * because page moving starts with an RCU grace period. - */ + * + * The RCU lock also protects the memcg from being freed when + * the page state that is going to change is the only thing + * preventing the page itself from being freed. E.g. writeback + * doesn't hold a page reference and relies on PG_writeback to + * keep off truncation, migration and so forth. + */ rcu_read_lock();
if (mem_cgroup_disabled()) - return; + return NULL; again: memcg = page->mem_cgroup; if (unlikely(!memcg)) - return; + return NULL;
if (atomic_read(&memcg->moving_account) <= 0) - return; + return memcg;
spin_lock_irqsave(&memcg->move_lock, flags); if (memcg != page->mem_cgroup) { @@ -1676,18 +1686,18 @@ void lock_page_memcg(struct page *page) memcg->move_lock_task = current; memcg->move_lock_flags = flags;
- return; + return memcg; } EXPORT_SYMBOL(lock_page_memcg);
/** - * unlock_page_memcg - unlock a page->mem_cgroup binding - * @page: the page + * __unlock_page_memcg - unlock and unpin a memcg + * @memcg: the memcg + * + * Unlock and unpin a memcg returned by lock_page_memcg(). */ -void unlock_page_memcg(struct page *page) +void __unlock_page_memcg(struct mem_cgroup *memcg) { - struct mem_cgroup *memcg = page->mem_cgroup; - if (memcg && memcg->move_lock_task == current) { unsigned long flags = memcg->move_lock_flags;
@@ -1699,6 +1709,15 @@ void unlock_page_memcg(struct page *page)
rcu_read_unlock(); } + +/** + * unlock_page_memcg - unlock a page->mem_cgroup binding + * @page: the page + */ +void unlock_page_memcg(struct page *page) +{ + __unlock_page_memcg(page->mem_cgroup); +} EXPORT_SYMBOL(unlock_page_memcg);
/* diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 462c778b9fb5..498c924f2fcd 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2717,9 +2717,10 @@ EXPORT_SYMBOL(clear_page_dirty_for_io); int test_clear_page_writeback(struct page *page) { struct address_space *mapping = page_mapping(page); + struct mem_cgroup *memcg; int ret;
- lock_page_memcg(page); + memcg = lock_page_memcg(page); if (mapping && mapping_use_writeback_tags(mapping)) { struct inode *inode = mapping->host; struct backing_dev_info *bdi = inode_to_bdi(inode); @@ -2747,13 +2748,20 @@ int test_clear_page_writeback(struct page *page) } else { ret = TestClearPageWriteback(page); } + /* + * NOTE: Page might be free now! Writeback doesn't hold a page + * reference on its own, it relies on truncation to wait for + * the clearing of PG_writeback. The below can only access + * page state that is static across allocation cycles. + */ if (ret) { - mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK); + __mem_cgroup_update_page_stat(page, memcg, + MEM_CGROUP_STAT_WRITEBACK, -1); dec_node_page_state(page, NR_WRITEBACK); dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); inc_node_page_state(page, NR_WRITTEN); } - unlock_page_memcg(page); + __unlock_page_memcg(memcg); return ret; }
diff --git a/net/key/af_key.c b/net/key/af_key.c index 76a008b1cbe5..adc93329e6aa 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2933,7 +2933,7 @@ static int count_ah_combs(const struct xfrm_tmpl *t) break; if (!aalg->pfkey_supported) continue; - if (aalg_tmpl_set(t, aalg) && aalg->available) + if (aalg_tmpl_set(t, aalg)) sz += sizeof(struct sadb_comb); } return sz + sizeof(struct sadb_prop); @@ -2951,7 +2951,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!ealg->pfkey_supported) continue;
- if (!(ealg_tmpl_set(t, ealg) && ealg->available)) + if (!(ealg_tmpl_set(t, ealg))) continue;
for (k = 1; ; k++) { @@ -2962,7 +2962,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!aalg->pfkey_supported) continue;
- if (aalg_tmpl_set(t, aalg) && aalg->available) + if (aalg_tmpl_set(t, aalg)) sz += sizeof(struct sadb_comb); } } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index d507d0fc7858..ddd90a3820d3 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -903,7 +903,8 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, * Let nf_ct_resolve_clash() deal with this later. */ if (nf_ct_tuple_equal(&ignored_conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, - &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)) + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple) && + nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) continue;
NF_CT_STAT_INC_ATOMIC(net, found); diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 79d7ad621a80..03c8bd854e56 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -155,7 +155,8 @@ static void recent_entry_remove(struct recent_table *t, struct recent_entry *e) /* * Drop entries with timestamps older then 'time'. */ -static void recent_entry_reap(struct recent_table *t, unsigned long time) +static void recent_entry_reap(struct recent_table *t, unsigned long time, + struct recent_entry *working, bool update) { struct recent_entry *e;
@@ -164,6 +165,12 @@ static void recent_entry_reap(struct recent_table *t, unsigned long time) */ e = list_entry(t->lru_list.next, struct recent_entry, lru_list);
+ /* + * Do not reap the entry which are going to be updated. + */ + if (e == working && update) + return; + /* * The last time stamp is the most recent. */ @@ -306,7 +313,8 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
/* info->seconds must be non-zero */ if (info->check_set & XT_RECENT_REAP) - recent_entry_reap(t, time); + recent_entry_reap(t, time, e, + info->check_set & XT_RECENT_UPDATE && ret); }
if (info->check_set & XT_RECENT_SET || diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 591d378d1a18..1d00f49dfe48 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -53,6 +53,7 @@ #include <asm/uaccess.h> #include <linux/hashtable.h>
+#include "auth_gss_internal.h" #include "../netns.h"
static const struct rpc_authops authgss_ops; @@ -147,35 +148,6 @@ gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx) clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags); }
-static const void * -simple_get_bytes(const void *p, const void *end, void *res, size_t len) -{ - const void *q = (const void *)((const char *)p + len); - if (unlikely(q > end || q < p)) - return ERR_PTR(-EFAULT); - memcpy(res, p, len); - return q; -} - -static inline const void * -simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest) -{ - const void *q; - unsigned int len; - - p = simple_get_bytes(p, end, &len, sizeof(len)); - if (IS_ERR(p)) - return p; - q = (const void *)((const char *)p + len); - if (unlikely(q > end || q < p)) - return ERR_PTR(-EFAULT); - dest->data = kmemdup(p, len, GFP_NOFS); - if (unlikely(dest->data == NULL)) - return ERR_PTR(-ENOMEM); - dest->len = len; - return q; -} - static struct gss_cl_ctx * gss_cred_get_ctx(struct rpc_cred *cred) { diff --git a/net/sunrpc/auth_gss/auth_gss_internal.h b/net/sunrpc/auth_gss/auth_gss_internal.h new file mode 100644 index 000000000000..f6d9631bd9d0 --- /dev/null +++ b/net/sunrpc/auth_gss/auth_gss_internal.h @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* + * linux/net/sunrpc/auth_gss/auth_gss_internal.h + * + * Internal definitions for RPCSEC_GSS client authentication + * + * Copyright (c) 2000 The Regents of the University of Michigan. + * All rights reserved. + * + */ +#include <linux/err.h> +#include <linux/string.h> +#include <linux/sunrpc/xdr.h> + +static inline const void * +simple_get_bytes(const void *p, const void *end, void *res, size_t len) +{ + const void *q = (const void *)((const char *)p + len); + if (unlikely(q > end || q < p)) + return ERR_PTR(-EFAULT); + memcpy(res, p, len); + return q; +} + +static inline const void * +simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest) +{ + const void *q; + unsigned int len; + + p = simple_get_bytes(p, end, &len, sizeof(len)); + if (IS_ERR(p)) + return p; + q = (const void *)((const char *)p + len); + if (unlikely(q > end || q < p)) + return ERR_PTR(-EFAULT); + if (len) { + dest->data = kmemdup(p, len, GFP_NOFS); + if (unlikely(dest->data == NULL)) + return ERR_PTR(-ENOMEM); + } else + dest->data = NULL; + dest->len = len; + return q; +} diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 60595835317a..ea2f6022b3d5 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -46,6 +46,8 @@ #include <linux/sunrpc/xdr.h> #include <linux/sunrpc/gss_krb5_enctypes.h>
+#include "auth_gss_internal.h" + #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif @@ -187,35 +189,6 @@ get_gss_krb5_enctype(int etype) return NULL; }
-static const void * -simple_get_bytes(const void *p, const void *end, void *res, int len) -{ - const void *q = (const void *)((const char *)p + len); - if (unlikely(q > end || q < p)) - return ERR_PTR(-EFAULT); - memcpy(res, p, len); - return q; -} - -static const void * -simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) -{ - const void *q; - unsigned int len; - - p = simple_get_bytes(p, end, &len, sizeof(len)); - if (IS_ERR(p)) - return p; - q = (const void *)((const char *)p + len); - if (unlikely(q > end || q < p)) - return ERR_PTR(-EFAULT); - res->data = kmemdup(p, len, GFP_NOFS); - if (unlikely(res->data == NULL)) - return ERR_PTR(-ENOMEM); - res->len = len; - return q; -} - static inline const void * get_key(const void *p, const void *end, struct krb5_ctx *ctx, struct crypto_skcipher **res) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 3a2543b9701a..bd3a5ef8e59b 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -830,10 +830,12 @@ static int vsock_shutdown(struct socket *sock, int mode) */
sk = sock->sk; + + lock_sock(sk); if (sock->state == SS_UNCONNECTED) { err = -ENOTCONN; if (sk->sk_type == SOCK_STREAM) - return err; + goto out; } else { sock->state = SS_DISCONNECTING; err = 0; @@ -842,10 +844,8 @@ static int vsock_shutdown(struct socket *sock, int mode) /* Receive and send shutdowns are treated alike. */ mode = mode & (RCV_SHUTDOWN | SEND_SHUTDOWN); if (mode) { - lock_sock(sk); sk->sk_shutdown |= mode; sk->sk_state_change(sk); - release_sock(sk);
if (sk->sk_type == SOCK_STREAM) { sock_reset_flag(sk, SOCK_DONE); @@ -853,6 +853,8 @@ static int vsock_shutdown(struct socket *sock, int mode) } }
+out: + release_sock(sk); return err; }
@@ -1121,7 +1123,6 @@ static void vsock_connect_timeout(struct work_struct *work) { struct sock *sk; struct vsock_sock *vsk; - int cancel = 0;
vsk = container_of(work, struct vsock_sock, connect_work.work); sk = sk_vsock(vsk); @@ -1132,11 +1133,9 @@ static void vsock_connect_timeout(struct work_struct *work) sk->sk_state = SS_UNCONNECTED; sk->sk_err = ETIMEDOUT; sk->sk_error_report(sk); - cancel = 1; + vsock_transport_cancel_pkt(vsk); } release_sock(sk); - if (cancel) - vsock_transport_cancel_pkt(vsk);
sock_put(sk); } diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index aa9d1c7780c3..5f7bcc7da460 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -959,10 +959,10 @@ void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt)
vsk = vsock_sk(sk);
- space_available = virtio_transport_space_update(sk, pkt); - lock_sock(sk);
+ space_available = virtio_transport_space_update(sk, pkt); + /* Update CID in case it has changed after a transport reset event */ vsk->local_addr.svm_cid = dst.svm_cid;
diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 6228a83156ea..f8ee4e33a085 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -222,6 +222,8 @@ cmd_modversions_c = \ endif
ifdef CONFIG_FTRACE_MCOUNT_RECORD +ifndef CC_USING_RECORD_MCOUNT +# compiler will not generate __mcount_loc use recordmcount or recordmcount.pl ifdef BUILD_C_RECORDMCOUNT ifeq ("$(origin RECORDMCOUNT_WARN)", "command line") RECORDMCOUNT_FLAGS = -w @@ -250,6 +252,7 @@ cmd_record_mcount = \ "$(CC_FLAGS_FTRACE)" ]; then \ $(sub_cmd_record_mcount) \ fi; +endif # CC_USING_RECORD_MCOUNT endif
ifdef CONFIG_STACK_VALIDATION diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5bddabb3de7c..db859b595dba 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -382,9 +382,8 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, */ kvm->mmu_notifier_count++; need_tlb_flush = kvm_unmap_hva_range(kvm, start, end); - need_tlb_flush |= kvm->tlbs_dirty; /* we've to flush the tlb before the pages can be freed */ - if (need_tlb_flush) + if (need_tlb_flush || kvm->tlbs_dirty) kvm_flush_remote_tlbs(kvm);
spin_unlock(&kvm->mmu_lock);
On Tue, 2021-02-23 at 15:00 +0100, Greg Kroah-Hartman wrote:
I'm announcing the release of the 4.9.258 kernel.
All users of the 4.9 kernel series must upgrade.
The updated 4.9.y git tree can be found at: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-4.9.y and can be browsed at the normal kernel.org git web browser:
The backported futex fixes are still incomplete/broken in this version. If I enable lockdep and run the futex self-tests (from 5.10):
- on 4.9.246, they pass with no lockdep output - on 4.9.257 and 4.9.258, they pass but futex_requeue_pi trigers a lockdep splat
I have a local branch that essentially updates futex and rtmutex in 4.9-stable to match 4.14-stable. With this, the tests pass and lockdep is happy.
Unfortunately, that branch has about another 60 commits. Further, the more we change futex in 4.9, the more difficult it is going to be to update the 4.9-rt branch. But I don't see any better option available at the moment.
Thoughts?
Ben.
On Mon, Mar 01, 2021 at 01:13:08AM +0100, Ben Hutchings wrote:
On Tue, 2021-02-23 at 15:00 +0100, Greg Kroah-Hartman wrote:
I'm announcing the release of the 4.9.258 kernel.
All users of the 4.9 kernel series must upgrade.
The updated 4.9.y git tree can be found at: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-4.9.y and can be browsed at the normal kernel.org git web browser:
The backported futex fixes are still incomplete/broken in this version. If I enable lockdep and run the futex self-tests (from 5.10):
- on 4.9.246, they pass with no lockdep output
- on 4.9.257 and 4.9.258, they pass but futex_requeue_pi trigers a lockdep splat
I have a local branch that essentially updates futex and rtmutex in 4.9-stable to match 4.14-stable. With this, the tests pass and lockdep is happy.
Unfortunately, that branch has about another 60 commits. Further, the more we change futex in 4.9, the more difficult it is going to be to update the 4.9-rt branch. But I don't see any better option available at the moment.
Thoughts?
There were some posted futex fixes for 4.9 (and 4.4) on the stable list that I have not gotten to yet.
Hopefully after these are merged (this week), these issues will be resolved.
If not, then yes, they need to be fixed and any help you can provide would be appreciated.
As for "difficulty", yes, it's rough, but the changes backported were required, for obvious reasons :(
thanks,
greg k-h
On Mon, 01 Mar 2021, Greg Kroah-Hartman wrote:
On Mon, Mar 01, 2021 at 01:13:08AM +0100, Ben Hutchings wrote:
On Tue, 2021-02-23 at 15:00 +0100, Greg Kroah-Hartman wrote:
I'm announcing the release of the 4.9.258 kernel.
All users of the 4.9 kernel series must upgrade.
The updated 4.9.y git tree can be found at: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-4.9.y and can be browsed at the normal kernel.org git web browser:
The backported futex fixes are still incomplete/broken in this version. If I enable lockdep and run the futex self-tests (from 5.10):
- on 4.9.246, they pass with no lockdep output
- on 4.9.257 and 4.9.258, they pass but futex_requeue_pi trigers a lockdep splat
I have a local branch that essentially updates futex and rtmutex in 4.9-stable to match 4.14-stable. With this, the tests pass and lockdep is happy.
Unfortunately, that branch has about another 60 commits. Further, the more we change futex in 4.9, the more difficult it is going to be to update the 4.9-rt branch. But I don't see any better option available at the moment.
Thoughts?
There were some posted futex fixes for 4.9 (and 4.4) on the stable list that I have not gotten to yet.
Hopefully after these are merged (this week), these issues will be resolved.
If not, then yes, they need to be fixed and any help you can provide would be appreciated.
As for "difficulty", yes, it's rough, but the changes backported were required, for obvious reasons :(
Apologies for the fuss.
The back-port become more complex the further back it was taken..
Had I known about the self-tests, I would have ensured those were passing too, as well as the the build/boot/auto-builder tests actually carried out.
Let me know if there's anything further I can do to help.
On Mon, Mar 01, 2021 at 09:07:03AM +0100, Greg Kroah-Hartman wrote:
On Mon, Mar 01, 2021 at 01:13:08AM +0100, Ben Hutchings wrote:
On Tue, 2021-02-23 at 15:00 +0100, Greg Kroah-Hartman wrote:
I'm announcing the release of the 4.9.258 kernel.
All users of the 4.9 kernel series must upgrade.
The updated 4.9.y git tree can be found at: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-4.9.y and can be browsed at the normal kernel.org git web browser:
The backported futex fixes are still incomplete/broken in this version. If I enable lockdep and run the futex self-tests (from 5.10):
- on 4.9.246, they pass with no lockdep output
- on 4.9.257 and 4.9.258, they pass but futex_requeue_pi trigers a lockdep splat
I have a local branch that essentially updates futex and rtmutex in 4.9-stable to match 4.14-stable. With this, the tests pass and lockdep is happy.
Unfortunately, that branch has about another 60 commits.
I have now rebased that on top of 4.9.258, and there are "only" 39 commits.
Further, the more we change futex in 4.9, the more difficult it is going to be to update the 4.9-rt branch. But I don't see any better option available at the moment.
Thoughts?
There were some posted futex fixes for 4.9 (and 4.4) on the stable list that I have not gotten to yet.
Hopefully after these are merged (this week), these issues will be resolved.
I'm afraid they are not sufficient.
If not, then yes, they need to be fixed and any help you can provide would be appreciated.
As for "difficulty", yes, it's rough, but the changes backported were required, for obvious reasons :(
I had another look at the locking bug and I was able to make a series of 7 commits (on top of the 2 already queued) that is sufficient to make lockdep happy. But I am not very confident that there won't be other regressions. I'll send that over shortly.
Ben.
On Mon, 2021-03-01 at 18:29 +0100, Ben Hutchings wrote:
On Mon, Mar 01, 2021 at 09:07:03AM +0100, Greg Kroah-Hartman wrote:
On Mon, Mar 01, 2021 at 01:13:08AM +0100, Ben Hutchings wrote:
On Tue, 2021-02-23 at 15:00 +0100, Greg Kroah-Hartman wrote:
I'm announcing the release of the 4.9.258 kernel.
All users of the 4.9 kernel series must upgrade.
The updated 4.9.y git tree can be found at: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-4.9.y and can be browsed at the normal kernel.org git web browser:
The backported futex fixes are still incomplete/broken in this version. If I enable lockdep and run the futex self-tests (from 5.10):
- on 4.9.246, they pass with no lockdep output
- on 4.9.257 and 4.9.258, they pass but futex_requeue_pi trigers a lockdep splat
I have a local branch that essentially updates futex and rtmutex in 4.9-stable to match 4.14-stable. With this, the tests pass and lockdep is happy.
Unfortunately, that branch has about another 60 commits.
I have now rebased that on top of 4.9.258, and there are "only" 39 commits.
Further, the more we change futex in 4.9, the more difficult it is going to be to update the 4.9-rt branch. But I don't see any better option available at the moment.
Thoughts?
There were some posted futex fixes for 4.9 (and 4.4) on the stable list that I have not gotten to yet.
Hopefully after these are merged (this week), these issues will be resolved.
I'm afraid they are not sufficient.
If not, then yes, they need to be fixed and any help you can provide would be appreciated.
As for "difficulty", yes, it's rough, but the changes backported were required, for obvious reasons :(
I had another look at the locking bug and I was able to make a series of 7 commits (on top of the 2 already queued) that is sufficient to make lockdep happy. But I am not very confident that there won't be other regressions. I'll send that over shortly.
This is all I had to do to make 4.4-stable a happy camper again.
futex: fix 4.4-stable 34c8e1c2c025 backport inspired lockdep complaint
1. 34c8e1c2c025 "futex: Provide and use pi_state_update_owner()" was backported to stable, leading to the therein assertion that pi_state->pi_mutex.wait_lock be held triggering in 4.4-stable. Fixing that leads to lockdep moan part 2.
2: b4abf91047cf "rtmutex: Make wait_lock irq safe" is absent in 4.4-stable, but wake_futex_pi() nonetheless managed to acquire an unbalanced raw_spin_lock() raw_spin_inlock_irq() pair, which inspires lockdep to moan after aforementioned assert has been appeased.
With this applied, futex tests pass, and no longer inspire lockdep gripeage.
Not-Signed-off-by: Mike Galbraith efault@gmx.de --- kernel/futex.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
--- a/kernel/futex.c +++ b/kernel/futex.c @@ -874,8 +874,12 @@ static void free_pi_state(struct futex_p * and has cleaned up the pi_state already */ if (pi_state->owner) { + unsigned long flags; + + raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags); pi_state_update_owner(pi_state, NULL); rt_mutex_proxy_unlock(&pi_state->pi_mutex); + raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags); }
if (current->pi_state_cache) @@ -1406,7 +1410,7 @@ static int wake_futex_pi(u32 __user *uad if (pi_state->owner != current) return -EINVAL;
- raw_spin_lock(&pi_state->pi_mutex.wait_lock); + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
/*
On Thu, Mar 04, 2021 at 10:12:56AM +0100, Mike Galbraith wrote:
On Mon, 2021-03-01 at 18:29 +0100, Ben Hutchings wrote:
On Mon, Mar 01, 2021 at 09:07:03AM +0100, Greg Kroah-Hartman wrote:
On Mon, Mar 01, 2021 at 01:13:08AM +0100, Ben Hutchings wrote:
On Tue, 2021-02-23 at 15:00 +0100, Greg Kroah-Hartman wrote:
I'm announcing the release of the 4.9.258 kernel.
All users of the 4.9 kernel series must upgrade.
The updated 4.9.y git tree can be found at: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-4.9.y and can be browsed at the normal kernel.org git web browser:
The backported futex fixes are still incomplete/broken in this version. If I enable lockdep and run the futex self-tests (from 5.10):
- on 4.9.246, they pass with no lockdep output
- on 4.9.257 and 4.9.258, they pass but futex_requeue_pi trigers a lockdep splat
I have a local branch that essentially updates futex and rtmutex in 4.9-stable to match 4.14-stable. With this, the tests pass and lockdep is happy.
Unfortunately, that branch has about another 60 commits.
I have now rebased that on top of 4.9.258, and there are "only" 39 commits.
Further, the more we change futex in 4.9, the more difficult it is going to be to update the 4.9-rt branch. But I don't see any better option available at the moment.
Thoughts?
There were some posted futex fixes for 4.9 (and 4.4) on the stable list that I have not gotten to yet.
Hopefully after these are merged (this week), these issues will be resolved.
I'm afraid they are not sufficient.
If not, then yes, they need to be fixed and any help you can provide would be appreciated.
As for "difficulty", yes, it's rough, but the changes backported were required, for obvious reasons :(
I had another look at the locking bug and I was able to make a series of 7 commits (on top of the 2 already queued) that is sufficient to make lockdep happy. But I am not very confident that there won't be other regressions. I'll send that over shortly.
This is all I had to do to make 4.4-stable a happy camper again.
futex: fix 4.4-stable 34c8e1c2c025 backport inspired lockdep complaint
- 34c8e1c2c025 "futex: Provide and use pi_state_update_owner()" was backported
to stable, leading to the therein assertion that pi_state->pi_mutex.wait_lock be held triggering in 4.4-stable. Fixing that leads to lockdep moan part 2.
2: b4abf91047cf "rtmutex: Make wait_lock irq safe" is absent in 4.4-stable, but wake_futex_pi() nonetheless managed to acquire an unbalanced raw_spin_lock() raw_spin_inlock_irq() pair, which inspires lockdep to moan after aforementioned assert has been appeased.
With this applied, futex tests pass, and no longer inspire lockdep gripeage.
Not-Signed-off-by: Mike Galbraith efault@gmx.de
kernel/futex.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
--- a/kernel/futex.c +++ b/kernel/futex.c @@ -874,8 +874,12 @@ static void free_pi_state(struct futex_p * and has cleaned up the pi_state already */ if (pi_state->owner) {
unsigned long flags;
raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
pi_state_update_owner(pi_state, NULL); rt_mutex_proxy_unlock(&pi_state->pi_mutex);
raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags);
}
if (current->pi_state_cache)
@@ -1406,7 +1410,7 @@ static int wake_futex_pi(u32 __user *uad if (pi_state->owner != current) return -EINVAL;
- raw_spin_lock(&pi_state->pi_mutex.wait_lock);
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
/*
Care to sign-off on it so that if this is correct, I can apply it? :)
thanks,
greg k-h
On Thu, 2021-03-04 at 14:11 +0100, Greg Kroah-Hartman wrote:
On Thu, Mar 04, 2021 at 10:12:56AM +0100, Mike Galbraith wrote:
futex: fix 4.4-stable 34c8e1c2c025 backport inspired lockdep complaint
- 34c8e1c2c025 "futex: Provide and use pi_state_update_owner()" was backported
to stable, leading to the therein assertion that pi_state->pi_mutex.wait_lock be held triggering in 4.4-stable. Fixing that leads to lockdep moan part 2.
2: b4abf91047cf "rtmutex: Make wait_lock irq safe" is absent in 4.4-stable, but wake_futex_pi() nonetheless managed to acquire an unbalanced raw_spin_lock() raw_spin_inlock_irq() pair, which inspires lockdep to moan after aforementioned assert has been appeased.
With this applied, futex tests pass, and no longer inspire lockdep gripeage.
Not-Signed-off-by: Mike Galbraith efault@gmx.de
kernel/futex.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
--- a/kernel/futex.c +++ b/kernel/futex.c @@ -874,8 +874,12 @@ static void free_pi_state(struct futex_p * and has cleaned up the pi_state already */ if (pi_state->owner) {
unsigned long flags;
raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
pi_state_update_owner(pi_state, NULL); rt_mutex_proxy_unlock(&pi_state->pi_mutex);
raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags);
}
if (current->pi_state_cache)
@@ -1406,7 +1410,7 @@ static int wake_futex_pi(u32 __user *uad if (pi_state->owner != current) return -EINVAL;
- raw_spin_lock(&pi_state->pi_mutex.wait_lock);
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
/*
Care to sign-off on it so that if this is correct, I can apply it? :)
Consider it signed off iff Thomas acks it. I think it's correct.. just like the guys who have installed every other bug in the damn things, just a wee bit less over-confident :)
-Mike
On Thu, Mar 04 2021 at 10:12, Mike Galbraith wrote:
On Mon, 2021-03-01 at 18:29 +0100, Ben Hutchings wrote:
--- a/kernel/futex.c +++ b/kernel/futex.c @@ -874,8 +874,12 @@ static void free_pi_state(struct futex_p * and has cleaned up the pi_state already */ if (pi_state->owner) {
unsigned long flags;
pi_state_update_owner(pi_state, NULL); rt_mutex_proxy_unlock(&pi_state->pi_mutex);raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags);
This hunk is missing in 4.9 as well.
}
if (current->pi_state_cache) @@ -1406,7 +1410,7 @@ static int wake_futex_pi(u32 __user *uad if (pi_state->owner != current) return -EINVAL;
- raw_spin_lock(&pi_state->pi_mutex.wait_lock);
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
/*
That looks correct.
Thanks,
tglx
On Thu, 2021-03-04 at 19:47 +0100, Thomas Gleixner wrote:
On Thu, Mar 04 2021 at 10:12, Mike Galbraith wrote:
On Mon, 2021-03-01 at 18:29 +0100, Ben Hutchings wrote:
--- a/kernel/futex.c +++ b/kernel/futex.c @@ -874,8 +874,12 @@ static void free_pi_state(struct futex_p * and has cleaned up the pi_state already */ if (pi_state->owner) {
unsigned long flags;
pi_state_update_owner(pi_state, NULL); rt_mutex_proxy_unlock(&pi_state->pi_mutex);raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags);
This hunk is missing in 4.9 as well.
}
if (current->pi_state_cache) @@ -1406,7 +1410,7 @@ static int wake_futex_pi(u32 __user *uad if (pi_state->owner != current) return -EINVAL;
- raw_spin_lock(&pi_state->pi_mutex.wait_lock);
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
/*
That looks correct.
Thanks for the eyeballs. FWIW, updated 4.4.259-rt214 on top of that is a happy camper as well.
-Mike
linux-stable-mirror@lists.linaro.org