The patch titled
Subject: Revert "zram: remove double compression logic"
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
revert-zram-remove-double-compression-logic.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Jiri Slaby <jslaby(a)suse.cz>
Subject: Revert "zram: remove double compression logic"
Date: Wed, 10 Aug 2022 09:06:09 +0200
This reverts commit e7be8d1dd983156b ("zram: remove double compression
logic") as it causes zram failures. It does not revert cleanly, PTR_ERR
handling was introduced in the meantime. This is handled by appropriate
IS_ERR.
When under memory pressure, zs_malloc() can fail. Before the above
commit, the allocation was retried with direct reclaim enabled (GFP_NOIO).
After the commit, it is not -- only __GFP_KSWAPD_RECLAIM is tried.
So when the failure occurs under memory pressure, the overlaying
filesystem such as ext2 (mounted by ext4 module in this case) can emit
failures, making the (file)system unusable:
EXT4-fs warning (device zram0): ext4_end_bio:343: I/O error 10 writing to inode 16386 starting block 159744)
Buffer I/O error on device zram0, logical block 159744
With direct reclaim, memory is really reclaimed and allocation succeeds,
eventually. In the worst case, the oom killer is invoked, which is proper
outcome if user sets up zram too large (in comparison to available RAM).
This very diff doesn't apply to 5.19 (stable) cleanly (see PTR_ERR note
above). Use revert of e7be8d1dd983 directly.
Link: https://bugzilla.suse.com/show_bug.cgi?id=1202203
Link: https://lkml.kernel.org/r/20220810070609.14402-1-jslaby@suse.cz
Fixes: e7be8d1dd983 ("zram: remove double compression logic")
Signed-off-by: Jiri Slaby <jslaby(a)suse.cz>
Reviewed-by: Sergey Senozhatsky <senozhatsky(a)chromium.org>
Cc: Minchan Kim <minchan(a)kernel.org>
Cc: Nitin Gupta <ngupta(a)vflare.org>
Cc: Alexey Romanov <avromanov(a)sberdevices.ru>
Cc: Dmitry Rokosov <ddrokosov(a)sberdevices.ru>
Cc: Lukas Czerner <lczerner(a)redhat.com>
Cc: <stable(a)vger.kernel.org> [5.19]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
drivers/block/zram/zram_drv.c | 42 ++++++++++++++++++++++++--------
drivers/block/zram/zram_drv.h | 1
2 files changed, 33 insertions(+), 10 deletions(-)
--- a/drivers/block/zram/zram_drv.c~revert-zram-remove-double-compression-logic
+++ a/drivers/block/zram/zram_drv.c
@@ -1146,14 +1146,15 @@ static ssize_t bd_stat_show(struct devic
static ssize_t debug_stat_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- int version = 2;
+ int version = 1;
struct zram *zram = dev_to_zram(dev);
ssize_t ret;
down_read(&zram->init_lock);
ret = scnprintf(buf, PAGE_SIZE,
- "version: %d\n%8llu\n",
+ "version: %d\n%8llu %8llu\n",
version,
+ (u64)atomic64_read(&zram->stats.writestall),
(u64)atomic64_read(&zram->stats.miss_free));
up_read(&zram->init_lock);
@@ -1351,7 +1352,7 @@ static int __zram_bvec_write(struct zram
{
int ret = 0;
unsigned long alloced_pages;
- unsigned long handle = 0;
+ unsigned long handle = -ENOMEM;
unsigned int comp_len = 0;
void *src, *dst, *mem;
struct zcomp_strm *zstrm;
@@ -1369,6 +1370,7 @@ static int __zram_bvec_write(struct zram
}
kunmap_atomic(mem);
+compress_again:
zstrm = zcomp_stream_get(zram->comp);
src = kmap_atomic(page);
ret = zcomp_compress(zstrm, src, &comp_len);
@@ -1377,20 +1379,39 @@ static int __zram_bvec_write(struct zram
if (unlikely(ret)) {
zcomp_stream_put(zram->comp);
pr_err("Compression failed! err=%d\n", ret);
+ zs_free(zram->mem_pool, handle);
return ret;
}
if (comp_len >= huge_class_size)
comp_len = PAGE_SIZE;
-
- handle = zs_malloc(zram->mem_pool, comp_len,
- __GFP_KSWAPD_RECLAIM |
- __GFP_NOWARN |
- __GFP_HIGHMEM |
- __GFP_MOVABLE);
-
+ /*
+ * handle allocation has 2 paths:
+ * a) fast path is executed with preemption disabled (for
+ * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
+ * since we can't sleep;
+ * b) slow path enables preemption and attempts to allocate
+ * the page with __GFP_DIRECT_RECLAIM bit set. we have to
+ * put per-cpu compression stream and, thus, to re-do
+ * the compression once handle is allocated.
+ *
+ * if we have a 'non-null' handle here then we are coming
+ * from the slow path and handle has already been allocated.
+ */
+ if (IS_ERR((void *)handle))
+ handle = zs_malloc(zram->mem_pool, comp_len,
+ __GFP_KSWAPD_RECLAIM |
+ __GFP_NOWARN |
+ __GFP_HIGHMEM |
+ __GFP_MOVABLE);
if (IS_ERR((void *)handle)) {
zcomp_stream_put(zram->comp);
+ atomic64_inc(&zram->stats.writestall);
+ handle = zs_malloc(zram->mem_pool, comp_len,
+ GFP_NOIO | __GFP_HIGHMEM |
+ __GFP_MOVABLE);
+ if (!IS_ERR((void *)handle))
+ goto compress_again;
return PTR_ERR((void *)handle);
}
@@ -1948,6 +1969,7 @@ static int zram_add(void)
if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
+ blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue);
ret = device_add_disk(NULL, zram->disk, zram_disk_groups);
if (ret)
goto out_cleanup_disk;
--- a/drivers/block/zram/zram_drv.h~revert-zram-remove-double-compression-logic
+++ a/drivers/block/zram/zram_drv.h
@@ -81,6 +81,7 @@ struct zram_stats {
atomic64_t huge_pages_since; /* no. of huge pages since zram set up */
atomic64_t pages_stored; /* no. of pages currently stored */
atomic_long_t max_used_pages; /* no. of maximum pages stored */
+ atomic64_t writestall; /* no. of write slow paths */
atomic64_t miss_free; /* no. of missed free */
#ifdef CONFIG_ZRAM_WRITEBACK
atomic64_t bd_count; /* no. of pages in backing device */
_
Patches currently in -mm which might be from jslaby(a)suse.cz are
revert-zram-remove-double-compression-logic.patch
This is the start of the stable review cycle for the 5.15.60 release.
There are 30 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Thu, 11 Aug 2022 17:55:02 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.15.60-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.15.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.15.60-rc1
Pawan Gupta <pawan.kumar.gupta(a)linux.intel.com>
x86/speculation: Add LFENCE to RSB fill sequence
Daniel Sneddon <daniel.sneddon(a)linux.intel.com>
x86/speculation: Add RSB VM Exit protections
Ning Qiang <sohu0106(a)126.com>
macintosh/adb: fix oob read in do_adb_query() function
Hilda Wu <hildawu(a)realtek.com>
Bluetooth: btusb: Add Realtek RTL8852C support ID 0x13D3:0x3586
Hilda Wu <hildawu(a)realtek.com>
Bluetooth: btusb: Add Realtek RTL8852C support ID 0x13D3:0x3587
Hilda Wu <hildawu(a)realtek.com>
Bluetooth: btusb: Add Realtek RTL8852C support ID 0x0CB8:0xC558
Hilda Wu <hildawu(a)realtek.com>
Bluetooth: btusb: Add Realtek RTL8852C support ID 0x04C5:0x1675
Hilda Wu <hildawu(a)realtek.com>
Bluetooth: btusb: Add Realtek RTL8852C support ID 0x04CA:0x4007
Aaron Ma <aaron.ma(a)canonical.com>
Bluetooth: btusb: Add support of IMC Networks PID 0x3568
Ahmad Fatoum <a.fatoum(a)pengutronix.de>
dt-bindings: bluetooth: broadcom: Add BCM4349B1 DT binding
Hakan Jansson <hakan.jansson(a)infineon.com>
Bluetooth: hci_bcm: Add DT compatible for CYW55572
Ahmad Fatoum <a.fatoum(a)pengutronix.de>
Bluetooth: hci_bcm: Add BCM4349B1 variant
Naohiro Aota <naohiro.aota(a)wdc.com>
btrfs: zoned: fix critical section of relocation inode writeback
Naohiro Aota <naohiro.aota(a)wdc.com>
btrfs: zoned: prevent allocation from previous data relocation BG
Peter Collingbourne <pcc(a)google.com>
arm64: set UXN on swapper page tables
Mingwei Zhang <mizhang(a)google.com>
KVM: x86/svm: add __GFP_ACCOUNT to __sev_dbg_{en,de}crypt_user()
Raghavendra Rao Ananta <rananta(a)google.com>
selftests: KVM: Handle compiler optimizations in ucall
Dmitry Klochkov <kdmitry556(a)gmail.com>
tools/kvm_stat: fix display of error when multiple processes are found
Vitaly Kuznetsov <vkuznets(a)redhat.com>
KVM: selftests: Make hyperv_clock selftest more stable
Paolo Bonzini <pbonzini(a)redhat.com>
KVM: x86: do not set st->preempted when going back to user space
Paolo Bonzini <pbonzini(a)redhat.com>
KVM: x86: do not report a vCPU as preempted outside instruction boundaries
GUO Zihua <guozihua(a)huawei.com>
crypto: arm64/poly1305 - fix a read out-of-bound
Tony Luck <tony.luck(a)intel.com>
ACPI: APEI: Better fix to avoid spamming the console with old error logs
Werner Sembach <wse(a)tuxedocomputers.com>
ACPI: video: Shortening quirk list by identifying Clevo by board_name only
Werner Sembach <wse(a)tuxedocomputers.com>
ACPI: video: Force backlight native for some TongFang devices
Stéphane Graber <stgraber(a)ubuntu.com>
tools/vm/slabinfo: Handle files in debugfs
Jan Kara <jack(a)suse.cz>
block: fix default IO priority handling again
Jakub Sitnicki <jakub(a)cloudflare.com>
selftests/bpf: Check dst_port only on the client socket
Jakub Sitnicki <jakub(a)cloudflare.com>
selftests/bpf: Extend verifier and bpf_sock tests for dst_port loads
Ben Hutchings <ben(a)decadent.org.uk>
x86/speculation: Make all RETbleed mitigations 64-bit only
-------------
Diffstat:
Documentation/admin-guide/hw-vuln/spectre.rst | 8 ++
.../bindings/net/broadcom-bluetooth.yaml | 1 +
Makefile | 4 +-
arch/arm64/crypto/poly1305-glue.c | 2 +-
arch/arm64/include/asm/kernel-pgtable.h | 4 +-
arch/arm64/kernel/head.S | 2 +-
arch/x86/Kconfig | 8 +-
arch/x86/include/asm/cpufeatures.h | 2 +
arch/x86/include/asm/kvm_host.h | 3 +
arch/x86/include/asm/msr-index.h | 4 +
arch/x86/include/asm/nospec-branch.h | 21 +++++-
arch/x86/kernel/cpu/bugs.c | 86 ++++++++++++++++------
arch/x86/kernel/cpu/common.c | 12 ++-
arch/x86/kvm/svm/sev.c | 4 +-
arch/x86/kvm/svm/svm.c | 2 +
arch/x86/kvm/vmx/vmenter.S | 8 +-
arch/x86/kvm/vmx/vmx.c | 1 +
arch/x86/kvm/x86.c | 48 +++++++++---
arch/x86/kvm/xen.h | 6 +-
block/blk-ioc.c | 1 +
block/ioprio.c | 4 +-
drivers/acpi/apei/bert.c | 31 ++++++--
drivers/acpi/video_detect.c | 55 +++++++++-----
drivers/bluetooth/btbcm.c | 2 +
drivers/bluetooth/btusb.c | 15 ++++
drivers/bluetooth/hci_bcm.c | 2 +
drivers/macintosh/adb.c | 2 +-
fs/btrfs/block-group.h | 1 +
fs/btrfs/extent-tree.c | 20 ++++-
fs/btrfs/extent_io.c | 3 +-
fs/btrfs/inode.c | 2 +
fs/btrfs/zoned.c | 27 +++++++
fs/btrfs/zoned.h | 5 ++
include/linux/ioprio.h | 2 +-
tools/arch/x86/include/asm/cpufeatures.h | 1 +
tools/arch/x86/include/asm/msr-index.h | 4 +
tools/include/uapi/linux/bpf.h | 3 +-
tools/kvm/kvm_stat/kvm_stat | 3 +-
.../testing/selftests/bpf/prog_tests/sock_fields.c | 58 ++++++++++-----
.../testing/selftests/bpf/progs/test_sock_fields.c | 45 +++++++++++
tools/testing/selftests/bpf/verifier/sock.c | 81 +++++++++++++++++++-
tools/testing/selftests/kvm/lib/aarch64/ucall.c | 9 +--
tools/testing/selftests/kvm/x86_64/hyperv_clock.c | 10 ++-
tools/vm/slabinfo.c | 26 ++++++-
44 files changed, 515 insertions(+), 123 deletions(-)
This is the start of the stable review cycle for the 4.19.255 release.
There are 32 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Thu, 11 Aug 2022 17:55:02 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.19.255-r…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.19.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.19.255-rc1
Pawan Gupta <pawan.kumar.gupta(a)linux.intel.com>
x86/speculation: Add LFENCE to RSB fill sequence
Daniel Sneddon <daniel.sneddon(a)linux.intel.com>
x86/speculation: Add RSB VM Exit protections
Ning Qiang <sohu0106(a)126.com>
macintosh/adb: fix oob read in do_adb_query() function
Werner Sembach <wse(a)tuxedocomputers.com>
ACPI: video: Shortening quirk list by identifying Clevo by board_name only
Werner Sembach <wse(a)tuxedocomputers.com>
ACPI: video: Force backlight native for some TongFang devices
Ming Lei <ming.lei(a)redhat.com>
scsi: core: Fix race between handling STS_RESOURCE and completion
Wei Mingzhi <whistler(a)member.fsf.org>
mt7601u: add USB device ID for some versions of XiaoDu WiFi Dongle.
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
ARM: crypto: comment out gcc warning that breaks clang builds
Leo Yan <leo.yan(a)linaro.org>
perf symbol: Correct address for bss symbols
Florian Westphal <fw(a)strlen.de>
netfilter: nf_queue: do not allow packet truncation below transport header offset
Duoming Zhou <duoming(a)zju.edu.cn>
sctp: fix sleep in atomic context bug in timer handlers
Michal Maloszewski <michal.maloszewski(a)intel.com>
i40e: Fix interface init with MSI interrupts (no MSI-X)
Kuniyuki Iwashima <kuniyu(a)amazon.com>
tcp: Fix a data-race around sysctl_tcp_comp_sack_nr.
Kuniyuki Iwashima <kuniyu(a)amazon.com>
tcp: Fix a data-race around sysctl_tcp_comp_sack_delay_ns.
Xin Long <lucien.xin(a)gmail.com>
Documentation: fix sctp_wmem in ip-sysctl.rst
Kuniyuki Iwashima <kuniyu(a)amazon.com>
tcp: Fix a data-race around sysctl_tcp_invalid_ratelimit.
Kuniyuki Iwashima <kuniyu(a)amazon.com>
tcp: Fix a data-race around sysctl_tcp_autocorking.
Kuniyuki Iwashima <kuniyu(a)amazon.com>
tcp: Fix a data-race around sysctl_tcp_min_rtt_wlen.
Kuniyuki Iwashima <kuniyu(a)amazon.com>
tcp: Fix a data-race around sysctl_tcp_min_tso_segs.
Liang He <windhl(a)126.com>
net: sungem_phy: Add of_node_put() for reference returned by of_get_parent()
Kuniyuki Iwashima <kuniyu(a)amazon.com>
igmp: Fix data-races around sysctl_igmp_qrv.
Kuniyuki Iwashima <kuniyu(a)amazon.com>
net: ping6: Fix memleak in ipv6_renew_options().
Kuniyuki Iwashima <kuniyu(a)amazon.com>
tcp: Fix a data-race around sysctl_tcp_challenge_ack_limit.
Liang He <windhl(a)126.com>
scsi: ufs: host: Hold reference returned by of_parse_phandle()
Kuniyuki Iwashima <kuniyu(a)amazon.com>
tcp: Fix a data-race around sysctl_tcp_nometrics_save.
Kuniyuki Iwashima <kuniyu(a)amazon.com>
tcp: Fix a data-race around sysctl_tcp_frto.
Kuniyuki Iwashima <kuniyu(a)amazon.com>
tcp: Fix a data-race around sysctl_tcp_adv_win_scale.
Kuniyuki Iwashima <kuniyu(a)amazon.com>
tcp: Fix a data-race around sysctl_tcp_app_win.
Kuniyuki Iwashima <kuniyu(a)amazon.com>
tcp: Fix data-races around sysctl_tcp_dsack.
Harald Freudenberger <freude(a)linux.ibm.com>
s390/archrandom: prevent CPACF trng invocations in interrupt context
ChenXiaoSong <chenxiaosong2(a)huawei.com>
ntfs: fix use-after-free in ntfs_ucsncmp()
Luiz Augusto von Dentz <luiz.von.dentz(a)intel.com>
Bluetooth: L2CAP: Fix use-after-free caused by l2cap_chan_put
-------------
Diffstat:
Documentation/admin-guide/hw-vuln/spectre.rst | 8 ++++
Documentation/networking/ip-sysctl.txt | 9 +++-
Makefile | 4 +-
arch/arm/lib/xor-neon.c | 3 +-
arch/s390/include/asm/archrandom.h | 9 ++--
arch/x86/include/asm/cpufeatures.h | 2 +
arch/x86/include/asm/msr-index.h | 4 ++
arch/x86/include/asm/nospec-branch.h | 19 ++++++++-
arch/x86/kernel/cpu/bugs.c | 61 ++++++++++++++++++++++++++-
arch/x86/kernel/cpu/common.c | 12 +++++-
arch/x86/kvm/vmx.c | 6 +--
drivers/acpi/video_detect.c | 55 +++++++++++++++---------
drivers/macintosh/adb.c | 2 +-
drivers/net/ethernet/intel/i40e/i40e_main.c | 4 ++
drivers/net/sungem_phy.c | 1 +
drivers/net/wireless/mediatek/mt7601u/usb.c | 1 +
drivers/scsi/scsi_lib.c | 3 +-
drivers/scsi/ufs/ufshcd-pltfrm.c | 15 ++++++-
fs/ntfs/attrib.c | 8 +++-
include/net/bluetooth/l2cap.h | 1 +
include/net/tcp.h | 2 +-
net/bluetooth/l2cap_core.c | 61 +++++++++++++++++++++------
net/ipv4/igmp.c | 24 ++++++-----
net/ipv4/tcp.c | 2 +-
net/ipv4/tcp_input.c | 20 +++++----
net/ipv4/tcp_metrics.c | 2 +-
net/ipv4/tcp_output.c | 2 +-
net/ipv6/ping.c | 6 +++
net/netfilter/nfnetlink_queue.c | 7 ++-
net/sctp/stream_sched.c | 2 +-
tools/perf/util/symbol-elf.c | 45 ++++++++++++++++++--
31 files changed, 316 insertions(+), 84 deletions(-)
This is the start of the stable review cycle for the 5.4.210 release.
There are 15 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Thu, 11 Aug 2022 17:55:02 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.4.210-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.4.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.4.210-rc1
Pawan Gupta <pawan.kumar.gupta(a)linux.intel.com>
x86/speculation: Add LFENCE to RSB fill sequence
Daniel Sneddon <daniel.sneddon(a)linux.intel.com>
x86/speculation: Add RSB VM Exit protections
Ning Qiang <sohu0106(a)126.com>
macintosh/adb: fix oob read in do_adb_query() function
Chen-Yu Tsai <wenst(a)chromium.org>
media: v4l2-mem2mem: Apply DST_QUEUE_OFF_BASE on MMAP buffers across ioctls
Raghavendra Rao Ananta <rananta(a)google.com>
selftests: KVM: Handle compiler optimizations in ucall
Alexey Kardashevskiy <aik(a)ozlabs.ru>
KVM: Don't null dereference ops->destroy
Jean-Philippe Brucker <jean-philippe(a)linaro.org>
selftests/bpf: Fix "dubious pointer arithmetic" test
Stanislav Fomichev <sdf(a)google.com>
selftests/bpf: Fix test_align verifier log patterns
John Fastabend <john.fastabend(a)gmail.com>
bpf: Test_verifier, #70 error message updates for 32-bit right shift
Jakub Sitnicki <jakub(a)cloudflare.com>
selftests/bpf: Extend verifier and bpf_sock tests for dst_port loads
John Fastabend <john.fastabend(a)gmail.com>
bpf: Verifer, adjust_scalar_min_max_vals to always call update_reg_bounds()
Tony Luck <tony.luck(a)intel.com>
ACPI: APEI: Better fix to avoid spamming the console with old error logs
Werner Sembach <wse(a)tuxedocomputers.com>
ACPI: video: Shortening quirk list by identifying Clevo by board_name only
Werner Sembach <wse(a)tuxedocomputers.com>
ACPI: video: Force backlight native for some TongFang devices
Subbaraman Narayanamurthy <quic_subbaram(a)quicinc.com>
thermal: Fix NULL pointer dereferences in of_thermal_ functions
-------------
Diffstat:
Documentation/admin-guide/hw-vuln/spectre.rst | 8 +++
Makefile | 4 +-
arch/x86/include/asm/cpufeatures.h | 2 +
arch/x86/include/asm/msr-index.h | 4 ++
arch/x86/include/asm/nospec-branch.h | 19 +++++-
arch/x86/kernel/cpu/bugs.c | 61 ++++++++++++++++++-
arch/x86/kernel/cpu/common.c | 12 +++-
arch/x86/kvm/vmx/vmenter.S | 1 +
drivers/acpi/apei/bert.c | 31 +++++++---
drivers/acpi/video_detect.c | 55 +++++++++++------
drivers/macintosh/adb.c | 2 +-
drivers/media/v4l2-core/v4l2-mem2mem.c | 60 +++++++++++++-----
drivers/thermal/of-thermal.c | 9 ++-
kernel/bpf/verifier.c | 1 +
tools/arch/x86/include/asm/cpufeatures.h | 1 +
tools/include/uapi/linux/bpf.h | 3 +-
tools/testing/selftests/bpf/test_align.c | 41 +++++++------
tools/testing/selftests/bpf/verifier/bounds.c | 6 +-
tools/testing/selftests/bpf/verifier/sock.c | 81 ++++++++++++++++++++++++-
tools/testing/selftests/kvm/lib/aarch64/ucall.c | 9 ++-
virt/kvm/kvm_main.c | 5 +-
21 files changed, 329 insertions(+), 86 deletions(-)
This is the start of the stable review cycle for the 5.10.136 release.
There are 23 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Thu, 11 Aug 2022 17:55:02 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.10.136-r…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.10.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.10.136-rc1
Pawan Gupta <pawan.kumar.gupta(a)linux.intel.com>
x86/speculation: Add LFENCE to RSB fill sequence
Daniel Sneddon <daniel.sneddon(a)linux.intel.com>
x86/speculation: Add RSB VM Exit protections
Ning Qiang <sohu0106(a)126.com>
macintosh/adb: fix oob read in do_adb_query() function
Hilda Wu <hildawu(a)realtek.com>
Bluetooth: btusb: Add Realtek RTL8852C support ID 0x13D3:0x3586
Hilda Wu <hildawu(a)realtek.com>
Bluetooth: btusb: Add Realtek RTL8852C support ID 0x13D3:0x3587
Hilda Wu <hildawu(a)realtek.com>
Bluetooth: btusb: Add Realtek RTL8852C support ID 0x0CB8:0xC558
Hilda Wu <hildawu(a)realtek.com>
Bluetooth: btusb: Add Realtek RTL8852C support ID 0x04C5:0x1675
Hilda Wu <hildawu(a)realtek.com>
Bluetooth: btusb: Add Realtek RTL8852C support ID 0x04CA:0x4007
Aaron Ma <aaron.ma(a)canonical.com>
Bluetooth: btusb: Add support of IMC Networks PID 0x3568
Hakan Jansson <hakan.jansson(a)infineon.com>
Bluetooth: hci_bcm: Add DT compatible for CYW55572
Ahmad Fatoum <a.fatoum(a)pengutronix.de>
Bluetooth: hci_bcm: Add BCM4349B1 variant
Raghavendra Rao Ananta <rananta(a)google.com>
selftests: KVM: Handle compiler optimizations in ucall
Dmitry Klochkov <kdmitry556(a)gmail.com>
tools/kvm_stat: fix display of error when multiple processes are found
GUO Zihua <guozihua(a)huawei.com>
crypto: arm64/poly1305 - fix a read out-of-bound
Tony Luck <tony.luck(a)intel.com>
ACPI: APEI: Better fix to avoid spamming the console with old error logs
Werner Sembach <wse(a)tuxedocomputers.com>
ACPI: video: Shortening quirk list by identifying Clevo by board_name only
Werner Sembach <wse(a)tuxedocomputers.com>
ACPI: video: Force backlight native for some TongFang devices
George Kennedy <george.kennedy(a)oracle.com>
tun: avoid double free in tun_free_netdev
Jakub Sitnicki <jakub(a)cloudflare.com>
selftests/bpf: Check dst_port only on the client socket
Jakub Sitnicki <jakub(a)cloudflare.com>
selftests/bpf: Extend verifier and bpf_sock tests for dst_port loads
Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
ath9k_htc: fix NULL pointer dereference at ath9k_htc_tx_get_packet()
Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
ath9k_htc: fix NULL pointer dereference at ath9k_htc_rxep()
Ben Hutchings <ben(a)decadent.org.uk>
x86/speculation: Make all RETbleed mitigations 64-bit only
-------------
Diffstat:
Documentation/admin-guide/hw-vuln/spectre.rst | 8 ++
Makefile | 4 +-
arch/arm64/crypto/poly1305-glue.c | 2 +-
arch/x86/Kconfig | 8 +-
arch/x86/include/asm/cpufeatures.h | 2 +
arch/x86/include/asm/msr-index.h | 4 +
arch/x86/include/asm/nospec-branch.h | 21 +++-
arch/x86/kernel/cpu/bugs.c | 86 +++++++++++-----
arch/x86/kernel/cpu/common.c | 12 ++-
arch/x86/kvm/vmx/vmenter.S | 8 +-
drivers/acpi/apei/bert.c | 31 ++++--
drivers/acpi/video_detect.c | 55 ++++++----
drivers/bluetooth/btbcm.c | 2 +
drivers/bluetooth/btusb.c | 15 +++
drivers/bluetooth/hci_bcm.c | 2 +
drivers/macintosh/adb.c | 2 +-
drivers/net/tun.c | 114 +++++++++++----------
drivers/net/wireless/ath/ath9k/htc.h | 2 +
drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 13 +++
drivers/net/wireless/ath/ath9k/wmi.c | 4 +
tools/arch/x86/include/asm/cpufeatures.h | 1 +
tools/arch/x86/include/asm/msr-index.h | 4 +
tools/include/uapi/linux/bpf.h | 3 +-
tools/kvm/kvm_stat/kvm_stat | 3 +-
.../testing/selftests/bpf/prog_tests/sock_fields.c | 60 +++++++----
.../testing/selftests/bpf/progs/test_sock_fields.c | 45 ++++++++
tools/testing/selftests/bpf/verifier/sock.c | 81 ++++++++++++++-
tools/testing/selftests/kvm/lib/aarch64/ucall.c | 9 +-
28 files changed, 451 insertions(+), 150 deletions(-)
This is the start of the stable review cycle for the 5.18.17 release.
There are 35 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Thu, 11 Aug 2022 17:55:02 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.18.17-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.18.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.18.17-rc1
Pawan Gupta <pawan.kumar.gupta(a)linux.intel.com>
x86/speculation: Add LFENCE to RSB fill sequence
Daniel Sneddon <daniel.sneddon(a)linux.intel.com>
x86/speculation: Add RSB VM Exit protections
Ning Qiang <sohu0106(a)126.com>
macintosh/adb: fix oob read in do_adb_query() function
Hilda Wu <hildawu(a)realtek.com>
Bluetooth: btusb: Add Realtek RTL8852C support ID 0x13D3:0x3586
Hilda Wu <hildawu(a)realtek.com>
Bluetooth: btusb: Add Realtek RTL8852C support ID 0x13D3:0x3587
Hilda Wu <hildawu(a)realtek.com>
Bluetooth: btusb: Add Realtek RTL8852C support ID 0x0CB8:0xC558
Hilda Wu <hildawu(a)realtek.com>
Bluetooth: btusb: Add Realtek RTL8852C support ID 0x04C5:0x1675
Hilda Wu <hildawu(a)realtek.com>
Bluetooth: btusb: Add Realtek RTL8852C support ID 0x04CA:0x4007
Aaron Ma <aaron.ma(a)canonical.com>
Bluetooth: btusb: Add support of IMC Networks PID 0x3568
Ahmad Fatoum <a.fatoum(a)pengutronix.de>
dt-bindings: bluetooth: broadcom: Add BCM4349B1 DT binding
Hakan Jansson <hakan.jansson(a)infineon.com>
Bluetooth: hci_bcm: Add DT compatible for CYW55572
Ahmad Fatoum <a.fatoum(a)pengutronix.de>
Bluetooth: hci_bcm: Add BCM4349B1 variant
Sai Teja Aluvala <quic_saluvala(a)quicinc.com>
Bluetooth: hci_qca: Return wakeup for qca_wakeup
Naohiro Aota <naohiro.aota(a)wdc.com>
btrfs: zoned: drop optimization of zone finish
Naohiro Aota <naohiro.aota(a)wdc.com>
btrfs: zoned: fix critical section of relocation inode writeback
Naohiro Aota <naohiro.aota(a)wdc.com>
btrfs: zoned: prevent allocation from previous data relocation BG
Peter Collingbourne <pcc(a)google.com>
arm64: set UXN on swapper page tables
Mingwei Zhang <mizhang(a)google.com>
KVM: x86/svm: add __GFP_ACCOUNT to __sev_dbg_{en,de}crypt_user()
Raghavendra Rao Ananta <rananta(a)google.com>
selftests: KVM: Handle compiler optimizations in ucall
Dmitry Klochkov <kdmitry556(a)gmail.com>
tools/kvm_stat: fix display of error when multiple processes are found
David Matlack <dmatlack(a)google.com>
KVM: selftests: Restrict test region to 48-bit physical addresses when using nested
Maxim Levitsky <mlevitsk(a)redhat.com>
KVM: x86: disable preemption around the call to kvm_arch_vcpu_{un|}blocking
Maxim Levitsky <mlevitsk(a)redhat.com>
KVM: x86: disable preemption while updating apicv inhibition
Seth Forshee <sforshee(a)digitalocean.com>
entry/kvm: Exit to user mode when TIF_NOTIFY_SIGNAL is set
Ben Gardon <bgardon(a)google.com>
KVM: x86/MMU: Zap non-leaf SPTEs when disabling dirty logging
Vitaly Kuznetsov <vkuznets(a)redhat.com>
KVM: selftests: Make hyperv_clock selftest more stable
Paolo Bonzini <pbonzini(a)redhat.com>
KVM: x86: do not set st->preempted when going back to user space
Paolo Bonzini <pbonzini(a)redhat.com>
KVM: x86: do not report a vCPU as preempted outside instruction boundaries
GUO Zihua <guozihua(a)huawei.com>
crypto: arm64/poly1305 - fix a read out-of-bound
Tony Luck <tony.luck(a)intel.com>
ACPI: APEI: Better fix to avoid spamming the console with old error logs
Werner Sembach <wse(a)tuxedocomputers.com>
ACPI: video: Shortening quirk list by identifying Clevo by board_name only
Werner Sembach <wse(a)tuxedocomputers.com>
ACPI: video: Force backlight native for some TongFang devices
Stéphane Graber <stgraber(a)ubuntu.com>
tools/vm/slabinfo: Handle files in debugfs
Jan Kara <jack(a)suse.cz>
block: fix default IO priority handling again
Ben Hutchings <ben(a)decadent.org.uk>
x86/speculation: Make all RETbleed mitigations 64-bit only
-------------
Diffstat:
Documentation/admin-guide/hw-vuln/spectre.rst | 8 ++
.../bindings/net/broadcom-bluetooth.yaml | 1 +
Makefile | 4 +-
arch/arm64/crypto/poly1305-glue.c | 2 +-
arch/arm64/include/asm/kernel-pgtable.h | 4 +-
arch/arm64/kernel/head.S | 2 +-
arch/x86/Kconfig | 8 +-
arch/x86/include/asm/cpufeatures.h | 2 +
arch/x86/include/asm/kvm_host.h | 3 +
arch/x86/include/asm/msr-index.h | 4 +
arch/x86/include/asm/nospec-branch.h | 21 +++++-
arch/x86/kernel/cpu/bugs.c | 86 ++++++++++++++++------
arch/x86/kernel/cpu/common.c | 12 ++-
arch/x86/kvm/mmu/tdp_iter.c | 9 +++
arch/x86/kvm/mmu/tdp_iter.h | 1 +
arch/x86/kvm/mmu/tdp_mmu.c | 38 ++++++++--
arch/x86/kvm/svm/sev.c | 4 +-
arch/x86/kvm/svm/svm.c | 2 +
arch/x86/kvm/vmx/vmenter.S | 8 +-
arch/x86/kvm/vmx/vmx.c | 1 +
arch/x86/kvm/x86.c | 50 ++++++++++---
arch/x86/kvm/xen.h | 6 +-
block/blk-ioc.c | 2 +
block/ioprio.c | 4 +-
drivers/acpi/apei/bert.c | 31 ++++++--
drivers/acpi/video_detect.c | 55 +++++++++-----
drivers/bluetooth/btbcm.c | 2 +
drivers/bluetooth/btusb.c | 15 ++++
drivers/bluetooth/hci_bcm.c | 2 +
drivers/bluetooth/hci_qca.c | 2 +-
drivers/macintosh/adb.c | 2 +-
fs/btrfs/block-group.h | 1 +
fs/btrfs/extent-tree.c | 20 ++++-
fs/btrfs/extent_io.c | 3 +-
fs/btrfs/inode.c | 2 +
fs/btrfs/zoned.c | 50 +++++++++++--
fs/btrfs/zoned.h | 5 ++
include/linux/ioprio.h | 2 +-
kernel/entry/kvm.c | 6 --
tools/arch/x86/include/asm/cpufeatures.h | 1 +
tools/arch/x86/include/asm/msr-index.h | 4 +
tools/kvm/kvm_stat/kvm_stat | 3 +-
tools/testing/selftests/kvm/lib/aarch64/ucall.c | 9 +--
tools/testing/selftests/kvm/lib/perf_test_util.c | 18 ++++-
tools/testing/selftests/kvm/x86_64/hyperv_clock.c | 10 ++-
tools/vm/slabinfo.c | 26 ++++++-
virt/kvm/kvm_main.c | 8 +-
47 files changed, 434 insertions(+), 125 deletions(-)
Hi Greg,
This backport series contains small fixes from v5.15 release.
From this point on, 5.10.y xfs can follow and pick changes
posted to 5.15.y.
I already have some debt of fixes from v5.17 already applied to
5.15.y, but not yet submitted to 5.10.y - those will be included
in my next batch.
Thanks,
Amir.
Changes from [v1]:
- Drop backport that disallows disabling of quota accounting
on a mounted xfs (Darrick)
- Added Acked-by Darrick
- CC stable
[v1] https://lore.kernel.org/linux-xfs/20220809111708.92768-1-amir73il@gmail.com/
Darrick J. Wong (1):
xfs: only set IOMAP_F_SHARED when providing a srcmap to a write
Dave Chinner (2):
mm: Add kvrealloc()
xfs: fix I_DONTCACHE
fs/xfs/xfs_icache.c | 3 ++-
fs/xfs/xfs_iomap.c | 8 ++++----
fs/xfs/xfs_iops.c | 2 +-
fs/xfs/xfs_log_recover.c | 4 +++-
include/linux/mm.h | 2 ++
mm/util.c | 15 +++++++++++++++
6 files changed, 27 insertions(+), 7 deletions(-)
--
2.25.1
Staring at hugetlb_wp(), one might wonder where all the logic for shared
mappings is when stumbling over a write-protected page in a shared
mapping. In fact, there is none, and so far we thought we could get
away with that because e.g., mprotect() should always do the right thing
and map all pages directly writable.
Looks like we were wrong:
--------------------------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <sys/mman.h>
#define HUGETLB_SIZE (2 * 1024 * 1024u)
static void clear_softdirty(void)
{
int fd = open("/proc/self/clear_refs", O_WRONLY);
const char *ctrl = "4";
int ret;
if (fd < 0) {
fprintf(stderr, "open(clear_refs) failed\n");
exit(1);
}
ret = write(fd, ctrl, strlen(ctrl));
if (ret != strlen(ctrl)) {
fprintf(stderr, "write(clear_refs) failed\n");
exit(1);
}
close(fd);
}
int main(int argc, char **argv)
{
char *map;
int fd;
fd = open("/dev/hugepages/tmp", O_RDWR | O_CREAT);
if (!fd) {
fprintf(stderr, "open() failed\n");
return -errno;
}
if (ftruncate(fd, HUGETLB_SIZE)) {
fprintf(stderr, "ftruncate() failed\n");
return -errno;
}
map = mmap(NULL, HUGETLB_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
if (map == MAP_FAILED) {
fprintf(stderr, "mmap() failed\n");
return -errno;
}
*map = 0;
if (mprotect(map, HUGETLB_SIZE, PROT_READ)) {
fprintf(stderr, "mmprotect() failed\n");
return -errno;
}
clear_softdirty();
if (mprotect(map, HUGETLB_SIZE, PROT_READ|PROT_WRITE)) {
fprintf(stderr, "mmprotect() failed\n");
return -errno;
}
*map = 0;
return 0;
}
--------------------------------------------------------------------------
Above test fails with SIGBUS when there is only a single free hugetlb page.
# echo 1 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
# ./test
Bus error (core dumped)
And worse, with sufficient free hugetlb pages it will map an anonymous page
into a shared mapping, for example, messing up accounting during unmap
and breaking MAP_SHARED semantics:
# echo 2 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
# ./test
# cat /proc/meminfo | grep HugePages_
HugePages_Total: 2
HugePages_Free: 1
HugePages_Rsvd: 18446744073709551615
HugePages_Surp: 0
Reason in this particular case is that vma_wants_writenotify() will
return "true", removing VM_SHARED in vma_set_page_prot() to map pages
write-protected. Let's teach vma_wants_writenotify() that hugetlb does not
support write-notify, including softdirty tracking.
Fixes: 64e455079e1b ("mm: softdirty: enable write notifications on VMAs after VM_SOFTDIRTY cleared")
Cc: <stable(a)vger.kernel.org> # v3.18+
Signed-off-by: David Hildenbrand <david(a)redhat.com>
---
mm/mmap.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/mm/mmap.c b/mm/mmap.c
index 61e6135c54ef..462a6b0344ac 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1683,6 +1683,13 @@ int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
return 0;
+ /*
+ * Hugetlb does not require/support writenotify; especially, it does not
+ * support softdirty tracking.
+ */
+ if (is_vm_hugetlb_page(vma))
+ return 0;
+
/* The backer wishes to know when pages are first written to? */
if (vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite))
return 1;
--
2.35.3
Ever since the Dirty COW (CVE-2016-5195) security issue happened, we know
that FOLL_FORCE can be possibly dangerous, especially if there are races
that can be exploited by user space.
Right now, it would be sufficient to have some code that sets a PTE of
a R/O-mapped shared page dirty, in order for it to erroneously become
writable by FOLL_FORCE. The implications of setting a write-protected PTE
dirty might not be immediately obvious to everyone.
And in fact ever since commit 9ae0f87d009c ("mm/shmem: unconditionally set
pte dirty in mfill_atomic_install_pte"), we can use UFFDIO_CONTINUE to map
a shmem page R/O while marking the pte dirty. This can be used by
unprivileged user space to modify tmpfs/shmem file content even if the user
does not have write permissions to the file, and to bypass memfd write
sealing -- Dirty COW restricted to tmpfs/shmem (CVE-2022-2590).
To fix such security issues for good, the insight is that we really only
need that fancy retry logic (FOLL_COW) for COW mappings that are not
writable (!VM_WRITE). And in a COW mapping, we really only broke COW if
we have an exclusive anonymous page mapped. If we have something else
mapped, or the mapped anonymous page might be shared (!PageAnonExclusive),
we have to trigger a write fault to break COW. If we don't find an
exclusive anonymous page when we retry, we have to trigger COW breaking
once again because something intervened.
Let's move away from this mandatory-retry + dirty handling and rely on
our PageAnonExclusive() flag for making a similar decision, to use the
same COW logic as in other kernel parts here as well. In case we stumble
over a PTE in a COW mapping that does not map an exclusive anonymous page,
COW was not properly broken and we have to trigger a fake write-fault to
break COW.
Just like we do in can_change_pte_writable() added via
commit 64fe24a3e05e ("mm/mprotect: try avoiding write faults for exclusive
anonymous pages when changing protection") and commit 76aefad628aa
("mm/mprotect: fix soft-dirty check in can_change_pte_writable()"), take
care of softdirty and uffd-wp manually.
For example, a write() via /proc/self/mem to a uffd-wp-protected range has
to fail instead of silently granting write access and bypassing the
userspace fault handler. Note that FOLL_FORCE is not only used for debug
access, but also triggered by applications without debug intentions, for
example, when pinning pages via RDMA.
This fixes CVE-2022-2590. Note that only x86_64 and aarch64 are
affected, because only those support CONFIG_HAVE_ARCH_USERFAULTFD_MINOR.
Fortunately, FOLL_COW is no longer required to handle FOLL_FORCE. So
let's just get rid of it.
Thanks to Nadav Amit for pointing out that the pte_dirty() check in
FOLL_FORCE code is problematic and might be exploitable.
Note 1: We don't check for the PTE being dirty because it doesn't matter
for making a "was COWed" decision anymore, and whoever modifies the
page has to set the page dirty either way.
Note 2: Kernels before extended uffd-wp support and before
PageAnonExclusive (< 5.19) can simply revert the problematic
commit instead and be safe regarding UFFDIO_CONTINUE. A backport to
v5.19 requires minor adjustments due to lack of
vma_soft_dirty_enabled().
Fixes: 9ae0f87d009c ("mm/shmem: unconditionally set pte dirty in mfill_atomic_install_pte")
Cc: <stable(a)vger.kernel.org> # 5.16+
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Axel Rasmussen <axelrasmussen(a)google.com>
Cc: Nadav Amit <nadav.amit(a)gmail.com>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: John Hubbard <jhubbard(a)nvidia.com>
Cc: Jason Gunthorpe <jgg(a)nvidia.com>
Signed-off-by: David Hildenbrand <david(a)redhat.com>
---
v1 -> v2:
- Make the code easier to digest and even more error prone by performing
more explicit checks, just failing gracefully and adding better comments.
- Avoid introducing new VM_BUG_ON().
- Mention Nadav's participation in the description
- Mention that we can bypass memfd write sealing
---
include/linux/mm.h | 1 -
mm/gup.c | 68 +++++++++++++++++++++++++++++++---------------
mm/huge_memory.c | 64 +++++++++++++++++++++++++++++--------------
3 files changed, 89 insertions(+), 44 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 18e01474cf6b..2222ed598112 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2885,7 +2885,6 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
#define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */
#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */
#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */
-#define FOLL_COW 0x4000 /* internal GUP flag */
#define FOLL_ANON 0x8000 /* don't do file mappings */
#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite: see below */
#define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */
diff --git a/mm/gup.c b/mm/gup.c
index 732825157430..5abdaf487460 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -478,14 +478,42 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
return -EEXIST;
}
-/*
- * FOLL_FORCE can write to even unwritable pte's, but only
- * after we've gone through a COW cycle and they are dirty.
- */
-static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
+/* FOLL_FORCE can write to even unwritable PTEs in COW mappings. */
+static inline bool can_follow_write_pte(pte_t pte, struct page *page,
+ struct vm_area_struct *vma,
+ unsigned int flags)
{
- return pte_write(pte) ||
- ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
+ /* If the pte is writable, we can write to the page. */
+ if (pte_write(pte))
+ return true;
+
+ /* Maybe FOLL_FORCE is set to override it? */
+ if (!(flags & FOLL_FORCE))
+ return false;
+
+ /* But FOLL_FORCE has no effect on shared mappings */
+ if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
+ return false;
+
+ /* ... or read-only private ones */
+ if (!(vma->vm_flags & VM_MAYWRITE))
+ return false;
+
+ /* ... or already writable ones that just need to take a write fault */
+ if (vma->vm_flags & VM_WRITE)
+ return false;
+
+ /*
+ * See can_change_pte_writable(): we broke COW and could map the page
+ * writable if we have an exclusive anonymous page ...
+ */
+ if (!page || !PageAnon(page) || !PageAnonExclusive(page))
+ return false;
+
+ /* ... and a write-fault isn't required for other reasons. */
+ if (vma_soft_dirty_enabled(vma) && !pte_soft_dirty(pte))
+ return false;
+ return !userfaultfd_pte_wp(vma, pte);
}
static struct page *follow_page_pte(struct vm_area_struct *vma,
@@ -528,12 +556,19 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
}
if ((flags & FOLL_NUMA) && pte_protnone(pte))
goto no_page;
- if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) {
- pte_unmap_unlock(ptep, ptl);
- return NULL;
- }
page = vm_normal_page(vma, address, pte);
+
+ /*
+ * We only care about anon pages in can_follow_write_pte() and don't
+ * have to worry about pte_devmap() because they are never anon.
+ */
+ if ((flags & FOLL_WRITE) &&
+ !can_follow_write_pte(pte, page, vma, flags)) {
+ page = NULL;
+ goto out;
+ }
+
if (!page && pte_devmap(pte) && (flags & (FOLL_GET | FOLL_PIN))) {
/*
* Only return device mapping pages in the FOLL_GET or FOLL_PIN
@@ -986,17 +1021,6 @@ static int faultin_page(struct vm_area_struct *vma,
return -EBUSY;
}
- /*
- * The VM_FAULT_WRITE bit tells us that do_wp_page has broken COW when
- * necessary, even if maybe_mkwrite decided not to set pte_write. We
- * can thus safely do subsequent page lookups as if they were reads.
- * But only do so when looping for pte_write is futile: in some cases
- * userspace may also be wanting to write to the gotten user page,
- * which a read fault here might prevent (a readonly page might get
- * reCOWed by userspace write).
- */
- if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE))
- *flags |= FOLL_COW;
return 0;
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 8a7c1b344abe..e9414ee57c5b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1040,12 +1040,6 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
assert_spin_locked(pmd_lockptr(mm, pmd));
- /*
- * When we COW a devmap PMD entry, we split it into PTEs, so we should
- * not be in this function with `flags & FOLL_COW` set.
- */
- WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set");
-
/* FOLL_GET and FOLL_PIN are mutually exclusive. */
if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
(FOLL_PIN | FOLL_GET)))
@@ -1395,14 +1389,42 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf)
return VM_FAULT_FALLBACK;
}
-/*
- * FOLL_FORCE can write to even unwritable pmd's, but only
- * after we've gone through a COW cycle and they are dirty.
- */
-static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags)
+/* FOLL_FORCE can write to even unwritable PMDs in COW mappings. */
+static inline bool can_follow_write_pmd(pmd_t pmd, struct page *page,
+ struct vm_area_struct *vma,
+ unsigned int flags)
{
- return pmd_write(pmd) ||
- ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd));
+ /* If the pmd is writable, we can write to the page. */
+ if (pmd_write(pmd))
+ return true;
+
+ /* Maybe FOLL_FORCE is set to override it? */
+ if (!(flags & FOLL_FORCE))
+ return false;
+
+ /* But FOLL_FORCE has no effect on shared mappings */
+ if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
+ return false;
+
+ /* ... or read-only private ones */
+ if (!(vma->vm_flags & VM_MAYWRITE))
+ return false;
+
+ /* ... or already writable ones that just need to take a write fault */
+ if (vma->vm_flags & VM_WRITE)
+ return false;
+
+ /*
+ * See can_change_pte_writable(): we broke COW and could map the page
+ * writable if we have an exclusive anonymous page ...
+ */
+ if (!page || !PageAnon(page) || !PageAnonExclusive(page))
+ return false;
+
+ /* ... and a write-fault isn't required for other reasons. */
+ if (vma_soft_dirty_enabled(vma) && !pmd_soft_dirty(pmd))
+ return false;
+ return !userfaultfd_huge_pmd_wp(vma, pmd);
}
struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
@@ -1411,12 +1433,16 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
unsigned int flags)
{
struct mm_struct *mm = vma->vm_mm;
- struct page *page = NULL;
+ struct page *page;
assert_spin_locked(pmd_lockptr(mm, pmd));
- if (flags & FOLL_WRITE && !can_follow_write_pmd(*pmd, flags))
- goto out;
+ page = pmd_page(*pmd);
+ VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page);
+
+ if ((flags & FOLL_WRITE) &&
+ !can_follow_write_pmd(*pmd, page, vma, flags))
+ return NULL;
/* Avoid dumping huge zero page */
if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd))
@@ -1424,10 +1450,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
/* Full NUMA hinting faults to serialise migration in fault paths */
if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
- goto out;
-
- page = pmd_page(*pmd);
- VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page);
+ return NULL;
if (!pmd_write(*pmd) && gup_must_unshare(flags, page))
return ERR_PTR(-EMLINK);
@@ -1444,7 +1467,6 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
VM_BUG_ON_PAGE(!PageCompound(page) && !is_zone_device_page(page), page);
-out:
return page;
}
base-commit: 1612c382ffbdf1f673caec76502b1c00e6d35363
--
2.35.3
This reverts commit e7be8d1dd983156bbdd22c0319b71119a8fbb697 as it
causes zram failures. It does not revert cleanly, PTR_ERR handling was
introduced in the meantime. This is handled by appropriate IS_ERR.
When under memory pressure, zs_malloc() can fail. Before the above
commit, the allocation was retried with direct reclaim enabled
(GFP_NOIO). After the commit, it is not -- only __GFP_KSWAPD_RECLAIM is
tried.
So when the failure occurs under memory pressure, the overlaying
filesystem such as ext2 (mounted by ext4 module in this case) can emit
failures, making the (file)system unusable:
EXT4-fs warning (device zram0): ext4_end_bio:343: I/O error 10 writing to inode 16386 starting block 159744)
Buffer I/O error on device zram0, logical block 159744
With direct reclaim, memory is really reclaimed and allocation succeeds,
eventually. In the worst case, the oom killer is invoked, which is
proper outcome if user sets up zram too large (in comparison to
available RAM).
This very diff doesn't apply to 5.19 (stable) cleanly (see PTR_ERR note
above). Use revert of e7be8d1dd983 directly.
Link: https://bugzilla.suse.com/show_bug.cgi?id=1202203
Fixes: e7be8d1dd983 ("zram: remove double compression logic")
Cc: stable(a)vger.kernel.org # 5.19
Cc: Minchan Kim <minchan(a)kernel.org>
Cc: Nitin Gupta <ngupta(a)vflare.org>
Cc: Sergey Senozhatsky <senozhatsky(a)chromium.org>
Cc: Alexey Romanov <avromanov(a)sberdevices.ru>
Cc: Dmitry Rokosov <ddrokosov(a)sberdevices.ru>
Cc: Lukas Czerner <lczerner(a)redhat.com>
Cc: Ext4 Developers List <linux-ext4(a)vger.kernel.org>
Signed-off-by: Jiri Slaby <jslaby(a)suse.cz>
---
drivers/block/zram/zram_drv.c | 42 ++++++++++++++++++++++++++---------
drivers/block/zram/zram_drv.h | 1 +
2 files changed, 33 insertions(+), 10 deletions(-)
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 92cb929a45b7..226ea76cc819 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1146,14 +1146,15 @@ static ssize_t bd_stat_show(struct device *dev,
static ssize_t debug_stat_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- int version = 2;
+ int version = 1;
struct zram *zram = dev_to_zram(dev);
ssize_t ret;
down_read(&zram->init_lock);
ret = scnprintf(buf, PAGE_SIZE,
- "version: %d\n%8llu\n",
+ "version: %d\n%8llu %8llu\n",
version,
+ (u64)atomic64_read(&zram->stats.writestall),
(u64)atomic64_read(&zram->stats.miss_free));
up_read(&zram->init_lock);
@@ -1351,7 +1352,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
{
int ret = 0;
unsigned long alloced_pages;
- unsigned long handle = 0;
+ unsigned long handle = -ENOMEM;
unsigned int comp_len = 0;
void *src, *dst, *mem;
struct zcomp_strm *zstrm;
@@ -1369,6 +1370,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
}
kunmap_atomic(mem);
+compress_again:
zstrm = zcomp_stream_get(zram->comp);
src = kmap_atomic(page);
ret = zcomp_compress(zstrm, src, &comp_len);
@@ -1377,20 +1379,39 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
if (unlikely(ret)) {
zcomp_stream_put(zram->comp);
pr_err("Compression failed! err=%d\n", ret);
+ zs_free(zram->mem_pool, handle);
return ret;
}
if (comp_len >= huge_class_size)
comp_len = PAGE_SIZE;
-
- handle = zs_malloc(zram->mem_pool, comp_len,
- __GFP_KSWAPD_RECLAIM |
- __GFP_NOWARN |
- __GFP_HIGHMEM |
- __GFP_MOVABLE);
-
+ /*
+ * handle allocation has 2 paths:
+ * a) fast path is executed with preemption disabled (for
+ * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
+ * since we can't sleep;
+ * b) slow path enables preemption and attempts to allocate
+ * the page with __GFP_DIRECT_RECLAIM bit set. we have to
+ * put per-cpu compression stream and, thus, to re-do
+ * the compression once handle is allocated.
+ *
+ * if we have a 'non-null' handle here then we are coming
+ * from the slow path and handle has already been allocated.
+ */
+ if (IS_ERR((void *)handle))
+ handle = zs_malloc(zram->mem_pool, comp_len,
+ __GFP_KSWAPD_RECLAIM |
+ __GFP_NOWARN |
+ __GFP_HIGHMEM |
+ __GFP_MOVABLE);
if (IS_ERR((void *)handle)) {
zcomp_stream_put(zram->comp);
+ atomic64_inc(&zram->stats.writestall);
+ handle = zs_malloc(zram->mem_pool, comp_len,
+ GFP_NOIO | __GFP_HIGHMEM |
+ __GFP_MOVABLE);
+ if (!IS_ERR((void *)handle))
+ goto compress_again;
return PTR_ERR((void *)handle);
}
@@ -1948,6 +1969,7 @@ static int zram_add(void)
if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
+ blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue);
ret = device_add_disk(NULL, zram->disk, zram_disk_groups);
if (ret)
goto out_cleanup_disk;
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 158c91e54850..80c3b43b4828 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -81,6 +81,7 @@ struct zram_stats {
atomic64_t huge_pages_since; /* no. of huge pages since zram set up */
atomic64_t pages_stored; /* no. of pages currently stored */
atomic_long_t max_used_pages; /* no. of maximum pages stored */
+ atomic64_t writestall; /* no. of write slow paths */
atomic64_t miss_free; /* no. of missed free */
#ifdef CONFIG_ZRAM_WRITEBACK
atomic64_t bd_count; /* no. of pages in backing device */
--
2.37.1
--
Hello,
We the Board Directors believe you are in good health, doing great and
with the hope that this mail will meet you in good condition, We are
privileged and delighted to reach you via email" And we are urgently
waiting to hear from you. and again your number is not connecting.
My regards,
Dr. Yakubu Abubakar..
Sincerely,
Prof. Chin Guang
Commit 55e8c8eb2c7b ("posix-cpu-timers: Store a reference to a pid not a
task") started looking up tasks by PID when deleting a CPU timer.
When a non-leader thread calls execve, it will switch PIDs with the leader
process. Then, as it calls exit_itimers, posix_cpu_timer_del cannot find
the task because the timer still points out to the old PID.
That means that armed timers won't be disarmed, that is, they won't be
removed from the timerqueue_list. exit_itimers will still release their
memory, and when that list is later processed, it leads to a
use-after-free.
Clean up the timers from the de-threaded task before freeing them. This
prevents a reported use-after-free.
Fixes: 55e8c8eb2c7b ("posix-cpu-timers: Store a reference to a pid not a task")
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo(a)canonical.com>
Reviewed-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: "Eric W. Biederman" <ebiederm(a)xmission.com>
Cc: <stable(a)vger.kernel.org>
---
fs/exec.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/fs/exec.c b/fs/exec.c
index 778123259e42..1c6b477dad69 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1301,6 +1301,9 @@ int begin_new_exec(struct linux_binprm * bprm)
bprm->mm = NULL;
#ifdef CONFIG_POSIX_TIMERS
+ spin_lock_irq(&me->sighand->siglock);
+ posix_cpu_timers_exit(me);
+ spin_unlock_irq(&me->sighand->siglock);
exit_itimers(me);
flush_itimer_signals();
#endif
--
2.34.1
When doing lookups for sets on the same batch by using its ID, a set from a
different table can be used.
Then, when the table is removed, a reference to the set may be kept after
the set is freed, leading to a potential use-after-free.
When looking for sets by ID, use the table that was used for the lookup by
name, and only return sets belonging to that same table.
This fixes CVE-2022-2586, also reported as ZDI-CAN-17470.
Reported-by: Team Orca of Sea Security (@seasecresponse)
Fixes: 958bee14d071 ("netfilter: nf_tables: use new transaction infrastructure to handle sets")
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo(a)canonical.com>
Cc: <stable(a)vger.kernel.org>
---
net/netfilter/nf_tables_api.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 9f976b11d896..86fae065f1d2 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3842,6 +3842,7 @@ static struct nft_set *nft_set_lookup_byhandle(const struct nft_table *table,
}
static struct nft_set *nft_set_lookup_byid(const struct net *net,
+ const struct nft_table *table,
const struct nlattr *nla, u8 genmask)
{
struct nftables_pernet *nft_net = nft_pernet(net);
@@ -3853,6 +3854,7 @@ static struct nft_set *nft_set_lookup_byid(const struct net *net,
struct nft_set *set = nft_trans_set(trans);
if (id == nft_trans_set_id(trans) &&
+ set->table == table &&
nft_active_genmask(set, genmask))
return set;
}
@@ -3873,7 +3875,7 @@ struct nft_set *nft_set_lookup_global(const struct net *net,
if (!nla_set_id)
return set;
- set = nft_set_lookup_byid(net, nla_set_id, genmask);
+ set = nft_set_lookup_byid(net, table, nla_set_id, genmask);
}
return set;
}
--
2.34.1
From: Wyes Karny <wyes.karny(a)amd.com>
[ Upstream commit 8bcedb4ce04750e1ccc9a6b6433387f6a9166a56 ]
When kernel is booted with idle=nomwait do not use MWAIT as the
default idle state.
If the user boots the kernel with idle=nomwait, it is a clear
direction to not use mwait as the default idle state.
However, the current code does not take this into consideration
while selecting the default idle state on x86.
Fix it by checking for the idle=nomwait boot option in
prefer_mwait_c1_over_halt().
Also update the documentation around idle=nomwait appropriately.
[ dhansen: tweak commit message ]
Signed-off-by: Wyes Karny <wyes.karny(a)amd.com>
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Tested-by: Zhang Rui <rui.zhang(a)intel.com>
Link: https://lkml.kernel.org/r/fdc2dc2d0a1bc21c2f53d989ea2d2ee3ccbc0dbe.16545383…
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
Documentation/admin-guide/pm/cpuidle.rst | 15 +++++++++------
arch/x86/kernel/process.c | 9 ++++++---
2 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst
index aec2cd2aaea7..19754beb5a4e 100644
--- a/Documentation/admin-guide/pm/cpuidle.rst
+++ b/Documentation/admin-guide/pm/cpuidle.rst
@@ -612,8 +612,8 @@ the ``menu`` governor to be used on the systems that use the ``ladder`` governor
by default this way, for example.
The other kernel command line parameters controlling CPU idle time management
-described below are only relevant for the *x86* architecture and some of
-them affect Intel processors only.
+described below are only relevant for the *x86* architecture and references
+to ``intel_idle`` affect Intel processors only.
The *x86* architecture support code recognizes three kernel command line
options related to CPU idle time management: ``idle=poll``, ``idle=halt``,
@@ -635,10 +635,13 @@ idle, so it very well may hurt single-thread computations performance as well as
energy-efficiency. Thus using it for performance reasons may not be a good idea
at all.]
-The ``idle=nomwait`` option disables the ``intel_idle`` driver and causes
-``acpi_idle`` to be used (as long as all of the information needed by it is
-there in the system's ACPI tables), but it is not allowed to use the
-``MWAIT`` instruction of the CPUs to ask the hardware to enter idle states.
+The ``idle=nomwait`` option prevents the use of ``MWAIT`` instruction of
+the CPU to enter idle states. When this option is used, the ``acpi_idle``
+driver will use the ``HLT`` instruction instead of ``MWAIT``. On systems
+running Intel processors, this option disables the ``intel_idle`` driver
+and forces the use of the ``acpi_idle`` driver instead. Note that in either
+case, ``acpi_idle`` driver will function only if all the information needed
+by it is in the system's ACPI tables.
In addition to the architecture-level kernel command line options affecting CPU
idle time management, there are parameters affecting individual ``CPUIdle``
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index d456ce21c255..9346c95e8879 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -821,6 +821,10 @@ static void amd_e400_idle(void)
*/
static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
{
+ /* User has disallowed the use of MWAIT. Fallback to HALT */
+ if (boot_option_idle_override == IDLE_NOMWAIT)
+ return 0;
+
if (c->x86_vendor != X86_VENDOR_INTEL)
return 0;
@@ -932,9 +936,8 @@ static int __init idle_setup(char *str)
} else if (!strcmp(str, "nomwait")) {
/*
* If the boot option of "idle=nomwait" is added,
- * it means that mwait will be disabled for CPU C2/C3
- * states. In such case it won't touch the variable
- * of boot_option_idle_override.
+ * it means that mwait will be disabled for CPU C1/C2/C3
+ * states.
*/
boot_option_idle_override = IDLE_NOMWAIT;
} else
--
2.35.1
Hello ,
It is my pleasure to communicate with you, I know that this message
will be a surprise to you my name is Mrs. Sophia Erick, I am diagnosed
with ovarian cancer which my doctor have confirmed that I have only
some weeks to live so I have decided you handover the sum of($
11,000,000.00) in my account to you for help of the orphanage homes
and the needy once
Please kindly reply me here as soon as possible to enable me give
you more information but before handing over my details to you please
assure me that you will only take 30% of the money and share the rest
to the poor orphanage home and the needy once, thank you am waiting to
hear from you
Mrs Sophia Erick.
Hi Greg and Sasha,
This two patches are backports for stable branchs from v4.9 to v5.4.
These two patches are reducing the chance of destructive RMW cycle,
where btrfs can use corrupted data to generate new P/Q, thus making some
repairable data unrepairable.
Those patches are more important than what I initially thought, thus
unfortunately they are not CCed to stable by themselves.
Furthermore due to recent refactors/renames, there are quite some member
change related to those patches, thus have to be manually backported.
One of the fastest way to verify the behavior is the existing btrfs/125
test case from fstests. (not in auto group AFAIK).
Qu Wenruo (2):
btrfs: only write the sectors in the vertical stripe which has data
stripes
btrfs: raid56: don't trust any cached sector in
__raid56_parity_recover()
fs/btrfs/raid56.c | 74 ++++++++++++++++++++++++++++++++++++-----------
1 file changed, 57 insertions(+), 17 deletions(-)
--
2.37.0
--
Congratulation! Your delayed payment has been released this morning.
Kindly reconfirm your details for payment to you.
Nigel Higgins, (Group Chairman),
Barclays Bank Plc,
Registered number: 1026167,
1 Churchill Place, London, ENG E14 5HP,
SWIFT Code: BARCGB21,
Direct Telephone: +44 770 000 8965,
WhatsApp, SMS Number: + 44 787 229 9022
www.barclays.co.uk
Sehr geehrter E-Mail-Besitzer
Bitte bestätigen Sie den Besitz Ihrer E-Mail. Es wurde nach dem Zufallsprinzip nach einer elektronischen Computer-Spin-Ball-Ziehung ausgewählt, um eine Spende von 2.500.000,00 USD von der Kristine Wellenstein Foundation zu erhalten. Senden Sie für weitere Einzelheiten eine Bestätigungs-E-Mail an wellensteinfoundation(a)gmail.com
Kristine Wellenstein.
Gründer;
Kristine Wellenstein Stiftung.
If we allocate a new page, we need to make sure that our folio matches
that new page. If we don't, we store the wrong folio in the shmem page
cache which will lead to data corruption. This problem will be solved
by changing shmem_replace_page() to shmem_replace_folio(), but this
patch is the minimal fix.
Fixes: da08e9b79323 ("mm/shmem: convert shmem_swapin_page() to shmem_swapin_folio()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Reviewed-by: William Kucharski <william.kucharski(a)oracle.com>
---
mm/shmem.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/mm/shmem.c b/mm/shmem.c
index e975fcd9d2e1..4ae43cffeda3 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1780,6 +1780,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
if (shmem_should_replace_folio(folio, gfp)) {
error = shmem_replace_page(&page, gfp, info, index);
+ folio = page_folio(page);
if (error)
goto failed;
}
--
2.35.1
Stop Xen timer (if it's running) prior to changing the IRQ vector and
potentially (re)starting the timer. Changing the IRQ vector while the
timer is still running can result in KVM injecting a garbage event, e.g.
vm_xen_inject_timer_irqs() could see a non-zero xen.timer_pending from
a previous timer but inject the new xen.timer_virq.
Fixes: 536395260582 ("KVM: x86/xen: handle PV timers oneshot mode")
Cc: stable(a)vger.kernel.org
Link: https://syzkaller.appspot.com/bug?id=8234a9dfd3aafbf092cc5a7cd9842e3ebc45fc…
Reported-by: syzbot+e54f930ed78eb0f85281(a)syzkaller.appspotmail.com
Signed-off-by: Coleman Dietsch <dietschc(a)csp.edu>
---
arch/x86/kvm/xen.c | 35 +++++++++++++++++------------------
1 file changed, 17 insertions(+), 18 deletions(-)
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 6e554041e862..280cb5dc7341 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -707,26 +707,25 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
break;
case KVM_XEN_VCPU_ATTR_TYPE_TIMER:
- if (data->u.timer.port) {
- if (data->u.timer.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) {
- r = -EINVAL;
- break;
- }
- vcpu->arch.xen.timer_virq = data->u.timer.port;
-
- if (!vcpu->arch.xen.timer.function)
- kvm_xen_init_timer(vcpu);
-
- /* Restart the timer if it's set */
- if (data->u.timer.expires_ns)
- kvm_xen_start_timer(vcpu, data->u.timer.expires_ns,
- data->u.timer.expires_ns -
- get_kvmclock_ns(vcpu->kvm));
- } else if (kvm_xen_timer_enabled(vcpu)) {
- kvm_xen_stop_timer(vcpu);
- vcpu->arch.xen.timer_virq = 0;
+ if (data->u.timer.port &&
+ data->u.timer.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) {
+ r = -EINVAL;
+ break;
}
+ if (!vcpu->arch.xen.timer.function)
+ kvm_xen_init_timer(vcpu);
+
+ /* Stop the timer (if it's running) before changing the vector */
+ kvm_xen_stop_timer(vcpu);
+ vcpu->arch.xen.timer_virq = data->u.timer.port;
+
+ /* Start the timer if the new value has a valid vector+expiry. */
+ if (data->u.timer.port && data->u.timer.expires_ns)
+ kvm_xen_start_timer(vcpu, data->u.timer.expires_ns,
+ data->u.timer.expires_ns -
+ get_kvmclock_ns(vcpu->kvm));
+
r = 0;
break;
--
2.34.1
From: Wyes Karny <wyes.karny(a)amd.com>
[ Upstream commit 8bcedb4ce04750e1ccc9a6b6433387f6a9166a56 ]
When kernel is booted with idle=nomwait do not use MWAIT as the
default idle state.
If the user boots the kernel with idle=nomwait, it is a clear
direction to not use mwait as the default idle state.
However, the current code does not take this into consideration
while selecting the default idle state on x86.
Fix it by checking for the idle=nomwait boot option in
prefer_mwait_c1_over_halt().
Also update the documentation around idle=nomwait appropriately.
[ dhansen: tweak commit message ]
Signed-off-by: Wyes Karny <wyes.karny(a)amd.com>
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Tested-by: Zhang Rui <rui.zhang(a)intel.com>
Link: https://lkml.kernel.org/r/fdc2dc2d0a1bc21c2f53d989ea2d2ee3ccbc0dbe.16545383…
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
Documentation/admin-guide/pm/cpuidle.rst | 15 +++++++++------
arch/x86/kernel/process.c | 9 ++++++---
2 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst
index aec2cd2aaea7..19754beb5a4e 100644
--- a/Documentation/admin-guide/pm/cpuidle.rst
+++ b/Documentation/admin-guide/pm/cpuidle.rst
@@ -612,8 +612,8 @@ the ``menu`` governor to be used on the systems that use the ``ladder`` governor
by default this way, for example.
The other kernel command line parameters controlling CPU idle time management
-described below are only relevant for the *x86* architecture and some of
-them affect Intel processors only.
+described below are only relevant for the *x86* architecture and references
+to ``intel_idle`` affect Intel processors only.
The *x86* architecture support code recognizes three kernel command line
options related to CPU idle time management: ``idle=poll``, ``idle=halt``,
@@ -635,10 +635,13 @@ idle, so it very well may hurt single-thread computations performance as well as
energy-efficiency. Thus using it for performance reasons may not be a good idea
at all.]
-The ``idle=nomwait`` option disables the ``intel_idle`` driver and causes
-``acpi_idle`` to be used (as long as all of the information needed by it is
-there in the system's ACPI tables), but it is not allowed to use the
-``MWAIT`` instruction of the CPUs to ask the hardware to enter idle states.
+The ``idle=nomwait`` option prevents the use of ``MWAIT`` instruction of
+the CPU to enter idle states. When this option is used, the ``acpi_idle``
+driver will use the ``HLT`` instruction instead of ``MWAIT``. On systems
+running Intel processors, this option disables the ``intel_idle`` driver
+and forces the use of the ``acpi_idle`` driver instead. Note that in either
+case, ``acpi_idle`` driver will function only if all the information needed
+by it is in the system's ACPI tables.
In addition to the architecture-level kernel command line options affecting CPU
idle time management, there are parameters affecting individual ``CPUIdle``
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 8d9d72fc27a2..707376453525 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -805,6 +805,10 @@ static void amd_e400_idle(void)
*/
static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
{
+ /* User has disallowed the use of MWAIT. Fallback to HALT */
+ if (boot_option_idle_override == IDLE_NOMWAIT)
+ return 0;
+
if (c->x86_vendor != X86_VENDOR_INTEL)
return 0;
@@ -913,9 +917,8 @@ static int __init idle_setup(char *str)
} else if (!strcmp(str, "nomwait")) {
/*
* If the boot option of "idle=nomwait" is added,
- * it means that mwait will be disabled for CPU C2/C3
- * states. In such case it won't touch the variable
- * of boot_option_idle_override.
+ * it means that mwait will be disabled for CPU C1/C2/C3
+ * states.
*/
boot_option_idle_override = IDLE_NOMWAIT;
} else
--
2.35.1
The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: e6cfcdda8cbe81eaf821c897369a65fec987b404
Gitweb: https://git.kernel.org/tip/e6cfcdda8cbe81eaf821c897369a65fec987b404
Author: Kim Phillips <kim.phillips(a)amd.com>
AuthorDate: Mon, 08 Aug 2022 09:32:33 -05:00
Committer: Borislav Petkov <bp(a)suse.de>
CommitterDate: Mon, 08 Aug 2022 19:12:17 +02:00
x86/bugs: Enable STIBP for IBPB mitigated RETBleed
AMD's "Technical Guidance for Mitigating Branch Type Confusion,
Rev. 1.0 2022-07-12" whitepaper, under section 6.1.2 "IBPB On
Privileged Mode Entry / SMT Safety" says:
Similar to the Jmp2Ret mitigation, if the code on the sibling thread
cannot be trusted, software should set STIBP to 1 or disable SMT to
ensure SMT safety when using this mitigation.
So, like already being done for retbleed=unret, and now also for
retbleed=ibpb, force STIBP on machines that have it, and report its SMT
vulnerability status accordingly.
[ bp: Remove the "we" and remove "[AMD]" applicability parameter which
doesn't work here. ]
Fixes: 3ebc17006888 ("x86/bugs: Add retbleed=ibpb")
Signed-off-by: Kim Phillips <kim.phillips(a)amd.com>
Signed-off-by: Borislav Petkov <bp(a)suse.de>
Cc: stable(a)vger.kernel.org # 5.10, 5.15, 5.19
Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537
Link: https://lore.kernel.org/r/20220804192201.439596-1-kim.phillips@amd.com
---
Documentation/admin-guide/kernel-parameters.txt | 29 +++++++++++-----
arch/x86/kernel/cpu/bugs.c | 10 +++---
2 files changed, 27 insertions(+), 12 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 5e9147f..523b196 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5209,20 +5209,33 @@
Speculative Code Execution with Return Instructions)
vulnerability.
+ AMD-based UNRET and IBPB mitigations alone do not stop
+ sibling threads from influencing the predictions of other
+ sibling threads. For that reason, STIBP is used on pro-
+ cessors that support it, and mitigate SMT on processors
+ that don't.
+
off - no mitigation
auto - automatically select a migitation
auto,nosmt - automatically select a mitigation,
disabling SMT if necessary for
the full mitigation (only on Zen1
and older without STIBP).
- ibpb - mitigate short speculation windows on
- basic block boundaries too. Safe, highest
- perf impact.
- unret - force enable untrained return thunks,
- only effective on AMD f15h-f17h
- based systems.
- unret,nosmt - like unret, will disable SMT when STIBP
- is not available.
+ ibpb - On AMD, mitigate short speculation
+ windows on basic block boundaries too.
+ Safe, highest perf impact. It also
+ enables STIBP if present. Not suitable
+ on Intel.
+ ibpb,nosmt - Like "ibpb" above but will disable SMT
+ when STIBP is not available. This is
+ the alternative for systems which do not
+ have STIBP.
+ unret - Force enable untrained return thunks,
+ only effective on AMD f15h-f17h based
+ systems.
+ unret,nosmt - Like unret, but will disable SMT when STIBP
+ is not available. This is the alternative for
+ systems which do not have STIBP.
Selecting 'auto' will choose a mitigation method at run
time according to the CPU.
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 6761668..d50686c 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -152,7 +152,7 @@ void __init check_bugs(void)
/*
* spectre_v2_user_select_mitigation() relies on the state set by
* retbleed_select_mitigation(); specifically the STIBP selection is
- * forced for UNRET.
+ * forced for UNRET or IBPB.
*/
spectre_v2_user_select_mitigation();
ssb_select_mitigation();
@@ -1179,7 +1179,8 @@ spectre_v2_user_select_mitigation(void)
boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON))
mode = SPECTRE_V2_USER_STRICT_PREFERRED;
- if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) {
+ if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET ||
+ retbleed_mitigation == RETBLEED_MITIGATION_IBPB) {
if (mode != SPECTRE_V2_USER_STRICT &&
mode != SPECTRE_V2_USER_STRICT_PREFERRED)
pr_info("Selecting STIBP always-on mode to complement retbleed mitigation\n");
@@ -2320,10 +2321,11 @@ static ssize_t srbds_show_state(char *buf)
static ssize_t retbleed_show_state(char *buf)
{
- if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) {
+ if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET ||
+ retbleed_mitigation == RETBLEED_MITIGATION_IBPB) {
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
- return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n");
+ return sprintf(buf, "Vulnerable: untrained return thunk / IBPB on non-AMD based uarch\n");
return sprintf(buf, "%s; SMT %s\n",
retbleed_strings[retbleed_mitigation],
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1dd498e5e26ad71e3e9130daf72cfb6a693fee03 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse(a)arm.com>
Date: Thu, 27 Jan 2022 12:20:52 +0000
Subject: [PATCH] KVM: arm64: Workaround Cortex-A510's single-step and PAC trap
errata
Cortex-A510's erratum #2077057 causes SPSR_EL2 to be corrupted when
single-stepping authenticated ERET instructions. A single step is
expected, but a pointer authentication trap is taken instead. The
erratum causes SPSR_EL1 to be copied to SPSR_EL2, which could allow
EL1 to cause a return to EL2 with a guest controlled ELR_EL2.
Because the conditions require an ERET into active-not-pending state,
this is only a problem for the EL2 when EL2 is stepping EL1. In this case
the previous SPSR_EL2 value is preserved in struct kvm_vcpu, and can be
restored.
Cc: stable(a)vger.kernel.org # 53960faf2b73: arm64: Add Cortex-A510 CPU part definition
Cc: stable(a)vger.kernel.org
Signed-off-by: James Morse <james.morse(a)arm.com>
[maz: fixup cpucaps ordering]
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Link: https://lore.kernel.org/r/20220127122052.1584324-5-james.morse@arm.com
diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
index 0ec7b7f1524b..ea281dd75517 100644
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@@ -100,6 +100,8 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A510 | #2051678 | ARM64_ERRATUM_2051678 |
+----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A510 | #2077057 | ARM64_ERRATUM_2077057 |
++----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index f2b5a4abef21..cbcd42decb2a 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -680,6 +680,22 @@ config ARM64_ERRATUM_2051678
If unsure, say Y.
+config ARM64_ERRATUM_2077057
+ bool "Cortex-A510: 2077057: workaround software-step corrupting SPSR_EL2"
+ help
+ This option adds the workaround for ARM Cortex-A510 erratum 2077057.
+ Affected Cortex-A510 may corrupt SPSR_EL2 when the a step exception is
+ expected, but a Pointer Authentication trap is taken instead. The
+ erratum causes SPSR_EL1 to be copied to SPSR_EL2, which could allow
+ EL1 to cause a return to EL2 with a guest controlled ELR_EL2.
+
+ This can only happen when EL2 is stepping EL1.
+
+ When these conditions occur, the SPSR_EL2 value is unchanged from the
+ previous guest entry, and can be restored from the in-memory copy.
+
+ If unsure, say Y.
+
config ARM64_ERRATUM_2119858
bool "Cortex-A710/X2: 2119858: workaround TRBE overwriting trace data in FILL mode"
default y
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 066098198c24..b217941713a8 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -600,6 +600,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
CAP_MIDR_RANGE_LIST(trbe_write_out_of_range_cpus),
},
#endif
+#ifdef CONFIG_ARM64_ERRATUM_2077057
+ {
+ .desc = "ARM erratum 2077057",
+ .capability = ARM64_WORKAROUND_2077057,
+ .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+ ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2),
+ },
+#endif
#ifdef CONFIG_ARM64_ERRATUM_2064142
{
.desc = "ARM erratum 2064142",
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 331dd10821df..701cfb964905 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -402,6 +402,24 @@ static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
return false;
}
+static inline void synchronize_vcpu_pstate(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ /*
+ * Check for the conditions of Cortex-A510's #2077057. When these occur
+ * SPSR_EL2 can't be trusted, but isn't needed either as it is
+ * unchanged from the value in vcpu_gp_regs(vcpu)->pstate.
+ * Are we single-stepping the guest, and took a PAC exception from the
+ * active-not-pending state?
+ */
+ if (cpus_have_final_cap(ARM64_WORKAROUND_2077057) &&
+ vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
+ *vcpu_cpsr(vcpu) & DBG_SPSR_SS &&
+ ESR_ELx_EC(read_sysreg_el2(SYS_ESR)) == ESR_ELx_EC_PAC)
+ write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR);
+
+ vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR);
+}
+
/*
* Return true when we were able to fixup the guest exit and should return to
* the guest, false when we should restore the host state and return to the
@@ -413,7 +431,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
* Save PSTATE early so that we can evaluate the vcpu mode
* early on.
*/
- vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR);
+ synchronize_vcpu_pstate(vcpu, exit_code);
/*
* Check whether we want to repaint the state one way or
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index e7719e8f18de..9c65b1e25a96 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -55,9 +55,10 @@ WORKAROUND_1418040
WORKAROUND_1463225
WORKAROUND_1508412
WORKAROUND_1542419
-WORKAROUND_2064142
-WORKAROUND_2038923
WORKAROUND_1902691
+WORKAROUND_2038923
+WORKAROUND_2064142
+WORKAROUND_2077057
WORKAROUND_TRBE_OVERWRITE_FILL_MODE
WORKAROUND_TSB_FLUSH_FAILURE
WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
AMD's "Technical Guidance for Mitigating Branch Type Confusion,
Rev. 1.0 2022-07-12" whitepaper, under section 6.1.2 "IBPB On
Privileged Mode Entry / SMT Safety" says:
"Similar to the Jmp2Ret mitigation, if the code on the sibling thread
cannot be trusted, software should set STIBP to 1 or disable SMT to
ensure SMT safety when using this mitigation."
So, like already being done for retbleed=unret, the also for
retbleed=ibpb, force STIBP on machines that have it, and report
its SMT vulnerability status accordingly.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537
Fixes: 3ebc17006888 ("x86/bugs: Add retbleed=ibpb")
Cc: stable(a)vger.kernel.org # 5.10, 5.15, 5.19
Signed-off-by: Kim Phillips <kim.phillips(a)amd.com>
---
v4: Cc: stable (Greg K-H)
v3: "unret and ibpb mitigations" -> "UNRET and IBPB mitigations" (Mingo)
v2: Justify and explain STIBP's role with IBPB (Boris)
.../admin-guide/kernel-parameters.txt | 20 ++++++++++++++-----
arch/x86/kernel/cpu/bugs.c | 10 ++++++----
2 files changed, 21 insertions(+), 9 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index bab2b0bf5988..ed6a19ae0dd6 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5260,20 +5260,30 @@
Speculative Code Execution with Return Instructions)
vulnerability.
+ AMD-based UNRET and IBPB mitigations alone do not stop
+ sibling threads influencing the predictions of other sibling
+ threads. For that reason, we use STIBP on processors
+ that support it, and mitigate SMT on processors that don't.
+
off - no mitigation
auto - automatically select a migitation
auto,nosmt - automatically select a mitigation,
disabling SMT if necessary for
the full mitigation (only on Zen1
and older without STIBP).
- ibpb - mitigate short speculation windows on
+ ibpb - [AMD] Mitigate short speculation windows on
basic block boundaries too. Safe, highest
- perf impact.
- unret - force enable untrained return thunks,
+ perf impact. It also enables STIBP if
+ present.
+ ibpb,nosmt - [AMD] Like ibpb, but will disable SMT when STIBP
+ is not available. This is the alternative for
+ systems which do not have STIBP.
+ unret - [AMD] Force enable untrained return thunks,
only effective on AMD f15h-f17h
based systems.
- unret,nosmt - like unret, will disable SMT when STIBP
- is not available.
+ unret,nosmt - [AMD] Like unret, but will disable SMT when STIBP
+ is not available. This is the alternative for
+ systems which do not have STIBP.
Selecting 'auto' will choose a mitigation method at run
time according to the CPU.
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 6761668100b9..d50686ca5870 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -152,7 +152,7 @@ void __init check_bugs(void)
/*
* spectre_v2_user_select_mitigation() relies on the state set by
* retbleed_select_mitigation(); specifically the STIBP selection is
- * forced for UNRET.
+ * forced for UNRET or IBPB.
*/
spectre_v2_user_select_mitigation();
ssb_select_mitigation();
@@ -1179,7 +1179,8 @@ spectre_v2_user_select_mitigation(void)
boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON))
mode = SPECTRE_V2_USER_STRICT_PREFERRED;
- if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) {
+ if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET ||
+ retbleed_mitigation == RETBLEED_MITIGATION_IBPB) {
if (mode != SPECTRE_V2_USER_STRICT &&
mode != SPECTRE_V2_USER_STRICT_PREFERRED)
pr_info("Selecting STIBP always-on mode to complement retbleed mitigation\n");
@@ -2320,10 +2321,11 @@ static ssize_t srbds_show_state(char *buf)
static ssize_t retbleed_show_state(char *buf)
{
- if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) {
+ if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET ||
+ retbleed_mitigation == RETBLEED_MITIGATION_IBPB) {
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
- return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n");
+ return sprintf(buf, "Vulnerable: untrained return thunk / IBPB on non-AMD based uarch\n");
return sprintf(buf, "%s; SMT %s\n",
retbleed_strings[retbleed_mitigation],
--
2.34.1
These patches are backport for the 5.18 branch.
They all fixes zoned mode related issued on btrfs.
The patch 3 looks different from upstream commit b3a3b0255797 ("btrfs:
zoned: drop optimization of zone finish") as a refactoring patch is not
picked into the stable branch. But, essentially, they do the same thing
which always zone finish the zones after (nearly) full write.
The v2 just amend a line to add a missing variable declaration.
Naohiro Aota (3):
btrfs: zoned: prevent allocation from previous data relocation BG
btrfs: zoned: fix critical section of relocation inode writeback
btrfs: zoned: drop optimization of zone finish
fs/btrfs/block-group.h | 1 +
fs/btrfs/extent-tree.c | 20 +++++++++++++++--
fs/btrfs/extent_io.c | 3 ++-
fs/btrfs/inode.c | 2 ++
fs/btrfs/zoned.c | 50 +++++++++++++++++++++++++++++++++++++-----
fs/btrfs/zoned.h | 5 +++++
6 files changed, 73 insertions(+), 8 deletions(-)
--
2.35.1
Backport summary:
-----------------
The return logic in v4l2_m2m_qbuf() was adjusted manually so that it matches
the logic in the original commit (v4l2_m2m_adjust_mem_offset() being called
only if !ret and before the v4l2_m2m_try_schedule() call):
@@ -500,10 +510,16 @@ int v4l2_m2m_qbuf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
return -EPERM;
}
ret = vb2_qbuf(vq, vdev->v4l2_dev->mdev, buf);
- if (!ret && !(buf->flags & V4L2_BUF_FLAG_IN_REQUEST))
+ if (ret)
+ return ret;
+
+ /* Adjust MMAP memory offsets for the CAPTURE queue */
+ v4l2_m2m_adjust_mem_offset(vq, buf);
+
+ if (!(buf->flags & V4L2_BUF_FLAG_IN_REQUEST))
v4l2_m2m_try_schedule(m2m_ctx);
- return ret;
+ return 0;
}
EXPORT_SYMBOL_GPL(v4l2_m2m_qbuf);
Build tested only.
Chen-Yu Tsai (1):
media: v4l2-mem2mem: Apply DST_QUEUE_OFF_BASE on MMAP buffers across
ioctls
drivers/media/v4l2-core/v4l2-mem2mem.c | 60 ++++++++++++++++++++------
1 file changed, 46 insertions(+), 14 deletions(-)
--
2.37.1
Commit 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time
/ preempted status", 2021-11-11) open coded the previous call to
kvm_map_gfn, but in doing so it dropped the comparison between the cached
guest physical address and the one in the MSR. This cause an incorrect
cache hit if the guest modifies the steal time address while the memslots
remain the same. This can happen with kexec, in which case the steal
time data is written at the address used by the old kernel instead of
the old one.
While at it, rename the variable from gfn to gpa since it is a plain
physical address and not a right-shifted one.
Reported-by: Dave Young <ruyang(a)redhat.com>
Reported-by: Xiaoying Yan <yiyan(a)redhat.com>
Analyzed-by: Dr. David Alan Gilbert <dgilbert(a)redhat.com>
Cc: David Woodhouse <dwmw(a)amazon.co.uk>
Cc: stable(a)vger.kernel.org
Fixes: 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time / preempted status")
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
---
arch/x86/kvm/x86.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e5fa335a4ea7..36dcf18b04bf 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3380,6 +3380,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
struct kvm_steal_time __user *st;
struct kvm_memslots *slots;
+ gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
u64 steal;
u32 version;
@@ -3397,13 +3398,12 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
slots = kvm_memslots(vcpu->kvm);
if (unlikely(slots->generation != ghc->generation ||
+ gpa != ghc->gpa ||
kvm_is_error_hva(ghc->hva) || !ghc->memslot)) {
- gfn_t gfn = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
-
/* We rely on the fact that it fits in a single page. */
BUILD_BUG_ON((sizeof(*st) - 1) & KVM_STEAL_VALID_BITS);
- if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gfn, sizeof(*st)) ||
+ if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa, sizeof(*st)) ||
kvm_is_error_hva(ghc->hva) || !ghc->memslot)
return;
}
--
2.37.1
These patches are backport for the 5.18 branch.
They all fixes zoned mode related issued on btrfs.
The patch 3 looks different from upstream commit b3a3b0255797 ("btrfs:
zoned: drop optimization of zone finish") as a refactoring patch is not
picked into the stable branch. But, essentially, they do the same thing
which always zone finish the zones after (nearly) full write.
Naohiro Aota (3):
btrfs: zoned: prevent allocation from previous data relocation BG
btrfs: zoned: fix critical section of relocation inode writeback
btrfs: zoned: drop optimization of zone finish
fs/btrfs/block-group.h | 1 +
fs/btrfs/extent-tree.c | 20 +++++++++++++++--
fs/btrfs/extent_io.c | 3 ++-
fs/btrfs/inode.c | 2 ++
fs/btrfs/zoned.c | 49 +++++++++++++++++++++++++++++++++++++-----
fs/btrfs/zoned.h | 5 +++++
6 files changed, 72 insertions(+), 8 deletions(-)
--
2.35.1
From: Wyes Karny <wyes.karny(a)amd.com>
[ Upstream commit 8bcedb4ce04750e1ccc9a6b6433387f6a9166a56 ]
When kernel is booted with idle=nomwait do not use MWAIT as the
default idle state.
If the user boots the kernel with idle=nomwait, it is a clear
direction to not use mwait as the default idle state.
However, the current code does not take this into consideration
while selecting the default idle state on x86.
Fix it by checking for the idle=nomwait boot option in
prefer_mwait_c1_over_halt().
Also update the documentation around idle=nomwait appropriately.
[ dhansen: tweak commit message ]
Signed-off-by: Wyes Karny <wyes.karny(a)amd.com>
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Tested-by: Zhang Rui <rui.zhang(a)intel.com>
Link: https://lkml.kernel.org/r/fdc2dc2d0a1bc21c2f53d989ea2d2ee3ccbc0dbe.16545383…
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
Documentation/admin-guide/pm/cpuidle.rst | 15 +++++++++------
arch/x86/kernel/process.c | 9 ++++++---
2 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst
index e70b365dbc60..80cf2ef2a506 100644
--- a/Documentation/admin-guide/pm/cpuidle.rst
+++ b/Documentation/admin-guide/pm/cpuidle.rst
@@ -676,8 +676,8 @@ the ``menu`` governor to be used on the systems that use the ``ladder`` governor
by default this way, for example.
The other kernel command line parameters controlling CPU idle time management
-described below are only relevant for the *x86* architecture and some of
-them affect Intel processors only.
+described below are only relevant for the *x86* architecture and references
+to ``intel_idle`` affect Intel processors only.
The *x86* architecture support code recognizes three kernel command line
options related to CPU idle time management: ``idle=poll``, ``idle=halt``,
@@ -699,10 +699,13 @@ idle, so it very well may hurt single-thread computations performance as well as
energy-efficiency. Thus using it for performance reasons may not be a good idea
at all.]
-The ``idle=nomwait`` option disables the ``intel_idle`` driver and causes
-``acpi_idle`` to be used (as long as all of the information needed by it is
-there in the system's ACPI tables), but it is not allowed to use the
-``MWAIT`` instruction of the CPUs to ask the hardware to enter idle states.
+The ``idle=nomwait`` option prevents the use of ``MWAIT`` instruction of
+the CPU to enter idle states. When this option is used, the ``acpi_idle``
+driver will use the ``HLT`` instruction instead of ``MWAIT``. On systems
+running Intel processors, this option disables the ``intel_idle`` driver
+and forces the use of the ``acpi_idle`` driver instead. Note that in either
+case, ``acpi_idle`` driver will function only if all the information needed
+by it is in the system's ACPI tables.
In addition to the architecture-level kernel command line options affecting CPU
idle time management, there are parameters affecting individual ``CPUIdle``
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 571e38c9ee1d..068715a52ac1 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -659,6 +659,10 @@ static void amd_e400_idle(void)
*/
static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
{
+ /* User has disallowed the use of MWAIT. Fallback to HALT */
+ if (boot_option_idle_override == IDLE_NOMWAIT)
+ return 0;
+
if (c->x86_vendor != X86_VENDOR_INTEL)
return 0;
@@ -769,9 +773,8 @@ static int __init idle_setup(char *str)
} else if (!strcmp(str, "nomwait")) {
/*
* If the boot option of "idle=nomwait" is added,
- * it means that mwait will be disabled for CPU C2/C3
- * states. In such case it won't touch the variable
- * of boot_option_idle_override.
+ * it means that mwait will be disabled for CPU C1/C2/C3
+ * states.
*/
boot_option_idle_override = IDLE_NOMWAIT;
} else
--
2.35.1
From: Wyes Karny <wyes.karny(a)amd.com>
[ Upstream commit 8bcedb4ce04750e1ccc9a6b6433387f6a9166a56 ]
When kernel is booted with idle=nomwait do not use MWAIT as the
default idle state.
If the user boots the kernel with idle=nomwait, it is a clear
direction to not use mwait as the default idle state.
However, the current code does not take this into consideration
while selecting the default idle state on x86.
Fix it by checking for the idle=nomwait boot option in
prefer_mwait_c1_over_halt().
Also update the documentation around idle=nomwait appropriately.
[ dhansen: tweak commit message ]
Signed-off-by: Wyes Karny <wyes.karny(a)amd.com>
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Tested-by: Zhang Rui <rui.zhang(a)intel.com>
Link: https://lkml.kernel.org/r/fdc2dc2d0a1bc21c2f53d989ea2d2ee3ccbc0dbe.16545383…
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
Documentation/admin-guide/pm/cpuidle.rst | 15 +++++++++------
arch/x86/kernel/process.c | 9 ++++++---
2 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst
index 10fde58d0869..3596e3714ec1 100644
--- a/Documentation/admin-guide/pm/cpuidle.rst
+++ b/Documentation/admin-guide/pm/cpuidle.rst
@@ -685,8 +685,8 @@ the ``menu`` governor to be used on the systems that use the ``ladder`` governor
by default this way, for example.
The other kernel command line parameters controlling CPU idle time management
-described below are only relevant for the *x86* architecture and some of
-them affect Intel processors only.
+described below are only relevant for the *x86* architecture and references
+to ``intel_idle`` affect Intel processors only.
The *x86* architecture support code recognizes three kernel command line
options related to CPU idle time management: ``idle=poll``, ``idle=halt``,
@@ -708,10 +708,13 @@ idle, so it very well may hurt single-thread computations performance as well as
energy-efficiency. Thus using it for performance reasons may not be a good idea
at all.]
-The ``idle=nomwait`` option disables the ``intel_idle`` driver and causes
-``acpi_idle`` to be used (as long as all of the information needed by it is
-there in the system's ACPI tables), but it is not allowed to use the
-``MWAIT`` instruction of the CPUs to ask the hardware to enter idle states.
+The ``idle=nomwait`` option prevents the use of ``MWAIT`` instruction of
+the CPU to enter idle states. When this option is used, the ``acpi_idle``
+driver will use the ``HLT`` instruction instead of ``MWAIT``. On systems
+running Intel processors, this option disables the ``intel_idle`` driver
+and forces the use of the ``acpi_idle`` driver instead. Note that in either
+case, ``acpi_idle`` driver will function only if all the information needed
+by it is in the system's ACPI tables.
In addition to the architecture-level kernel command line options affecting CPU
idle time management, there are parameters affecting individual ``CPUIdle``
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index a2823682d64e..4505d845daba 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -777,6 +777,10 @@ static void amd_e400_idle(void)
*/
static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
{
+ /* User has disallowed the use of MWAIT. Fallback to HALT */
+ if (boot_option_idle_override == IDLE_NOMWAIT)
+ return 0;
+
if (c->x86_vendor != X86_VENDOR_INTEL)
return 0;
@@ -885,9 +889,8 @@ static int __init idle_setup(char *str)
} else if (!strcmp(str, "nomwait")) {
/*
* If the boot option of "idle=nomwait" is added,
- * it means that mwait will be disabled for CPU C2/C3
- * states. In such case it won't touch the variable
- * of boot_option_idle_override.
+ * it means that mwait will be disabled for CPU C1/C2/C3
+ * states.
*/
boot_option_idle_override = IDLE_NOMWAIT;
} else
--
2.35.1
From: Wyes Karny <wyes.karny(a)amd.com>
[ Upstream commit 8bcedb4ce04750e1ccc9a6b6433387f6a9166a56 ]
When kernel is booted with idle=nomwait do not use MWAIT as the
default idle state.
If the user boots the kernel with idle=nomwait, it is a clear
direction to not use mwait as the default idle state.
However, the current code does not take this into consideration
while selecting the default idle state on x86.
Fix it by checking for the idle=nomwait boot option in
prefer_mwait_c1_over_halt().
Also update the documentation around idle=nomwait appropriately.
[ dhansen: tweak commit message ]
Signed-off-by: Wyes Karny <wyes.karny(a)amd.com>
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Tested-by: Zhang Rui <rui.zhang(a)intel.com>
Link: https://lkml.kernel.org/r/fdc2dc2d0a1bc21c2f53d989ea2d2ee3ccbc0dbe.16545383…
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
Documentation/admin-guide/pm/cpuidle.rst | 15 +++++++++------
arch/x86/kernel/process.c | 9 ++++++---
2 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst
index aec2cd2aaea7..19754beb5a4e 100644
--- a/Documentation/admin-guide/pm/cpuidle.rst
+++ b/Documentation/admin-guide/pm/cpuidle.rst
@@ -612,8 +612,8 @@ the ``menu`` governor to be used on the systems that use the ``ladder`` governor
by default this way, for example.
The other kernel command line parameters controlling CPU idle time management
-described below are only relevant for the *x86* architecture and some of
-them affect Intel processors only.
+described below are only relevant for the *x86* architecture and references
+to ``intel_idle`` affect Intel processors only.
The *x86* architecture support code recognizes three kernel command line
options related to CPU idle time management: ``idle=poll``, ``idle=halt``,
@@ -635,10 +635,13 @@ idle, so it very well may hurt single-thread computations performance as well as
energy-efficiency. Thus using it for performance reasons may not be a good idea
at all.]
-The ``idle=nomwait`` option disables the ``intel_idle`` driver and causes
-``acpi_idle`` to be used (as long as all of the information needed by it is
-there in the system's ACPI tables), but it is not allowed to use the
-``MWAIT`` instruction of the CPUs to ask the hardware to enter idle states.
+The ``idle=nomwait`` option prevents the use of ``MWAIT`` instruction of
+the CPU to enter idle states. When this option is used, the ``acpi_idle``
+driver will use the ``HLT`` instruction instead of ``MWAIT``. On systems
+running Intel processors, this option disables the ``intel_idle`` driver
+and forces the use of the ``acpi_idle`` driver instead. Note that in either
+case, ``acpi_idle`` driver will function only if all the information needed
+by it is in the system's ACPI tables.
In addition to the architecture-level kernel command line options affecting CPU
idle time management, there are parameters affecting individual ``CPUIdle``
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 622dc3673c37..8011536ba5c4 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -824,6 +824,10 @@ static void amd_e400_idle(void)
*/
static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
{
+ /* User has disallowed the use of MWAIT. Fallback to HALT */
+ if (boot_option_idle_override == IDLE_NOMWAIT)
+ return 0;
+
if (c->x86_vendor != X86_VENDOR_INTEL)
return 0;
@@ -932,9 +936,8 @@ static int __init idle_setup(char *str)
} else if (!strcmp(str, "nomwait")) {
/*
* If the boot option of "idle=nomwait" is added,
- * it means that mwait will be disabled for CPU C2/C3
- * states. In such case it won't touch the variable
- * of boot_option_idle_override.
+ * it means that mwait will be disabled for CPU C1/C2/C3
+ * states.
*/
boot_option_idle_override = IDLE_NOMWAIT;
} else
--
2.35.1
Hello,
I'm MacKenzie Scott Ex-wife of Amazon CEO and founder, I'm donating $ 4 billion Dollars to charities, individuals, colleges across the Globe from Scott's foundation, to provide immediate support to people suffering economically from COVID-19 pandemic and you're one of the lucky winners, i have a donation grant worth $100,800,000.00 Dollars for you, you can contact me for more information if you're interested.
Regards,
MacKenzie Scott.
The Qualcomm dwc3 runtime-PM implementation checks the xhci
platform-device pointer in the wakeup-interrupt handler to determine
whether the controller is in host mode and if so triggers a resume.
After a role switch in OTG mode the xhci platform-device would have been
freed and the next wakeup from runtime suspend would access the freed
memory.
Note that role switching is executed from a freezable workqueue, which
guarantees that the pointer is stable during suspend.
Also note that runtime PM has been broken since commit 2664deb09306
("usb: dwc3: qcom: Honor wakeup enabled/disabled state"), which
incidentally also prevents this issue from being triggered.
Fixes: a4333c3a6ba9 ("usb: dwc3: Add Qualcomm DWC3 glue driver")
Cc: stable(a)vger.kernel.org # 4.18
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
Changes in v2
- new patch
drivers/usb/dwc3/dwc3-qcom.c | 14 +++++++++++++-
drivers/usb/dwc3/host.c | 1 +
2 files changed, 14 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c
index e9364141661b..6884026b9fad 100644
--- a/drivers/usb/dwc3/dwc3-qcom.c
+++ b/drivers/usb/dwc3/dwc3-qcom.c
@@ -298,6 +298,14 @@ static void dwc3_qcom_interconnect_exit(struct dwc3_qcom *qcom)
icc_put(qcom->icc_path_apps);
}
+/* Only usable in contexts where the role can not change. */
+static bool dwc3_qcom_is_host(struct dwc3_qcom *qcom)
+{
+ struct dwc3 *dwc = platform_get_drvdata(qcom->dwc3);
+
+ return dwc->xhci;
+}
+
static enum usb_device_speed dwc3_qcom_read_usb2_speed(struct dwc3_qcom *qcom)
{
struct dwc3 *dwc = platform_get_drvdata(qcom->dwc3);
@@ -460,7 +468,11 @@ static irqreturn_t qcom_dwc3_resume_irq(int irq, void *data)
if (qcom->pm_suspended)
return IRQ_HANDLED;
- if (dwc->xhci)
+ /*
+ * This is safe as role switching is done from a freezable workqueue
+ * and the wakeup interrupts are disabled as part of resume.
+ */
+ if (dwc3_qcom_is_host(qcom))
pm_runtime_resume(&dwc->xhci->dev);
return IRQ_HANDLED;
diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c
index f56c30cf151e..f6f13e7f1ba1 100644
--- a/drivers/usb/dwc3/host.c
+++ b/drivers/usb/dwc3/host.c
@@ -135,4 +135,5 @@ int dwc3_host_init(struct dwc3 *dwc)
void dwc3_host_exit(struct dwc3 *dwc)
{
platform_device_unregister(dwc->xhci);
+ dwc->xhci = NULL;
}
--
2.35.1
From: Paolo Bonzini <pbonzini(a)redhat.com>
[ Upstream commit 6cd88243c7e03845a450795e134b488fc2afb736 ]
If a vCPU is outside guest mode and is scheduled out, it might be in the
process of making a memory access. A problem occurs if another vCPU uses
the PV TLB flush feature during the period when the vCPU is scheduled
out, and a virtual address has already been translated but has not yet
been accessed, because this is equivalent to using a stale TLB entry.
To avoid this, only report a vCPU as preempted if sure that the guest
is at an instruction boundary. A rescheduling request will be delivered
to the host physical CPU as an external interrupt, so for simplicity
consider any vmexit *not* instruction boundary except for external
interrupts.
It would in principle be okay to report the vCPU as preempted also
if it is sleeping in kvm_vcpu_block(): a TLB flush IPI will incur the
vmentry/vmexit overhead unnecessarily, and optimistic spinning is
also unlikely to succeed. However, leave it for later because right
now kvm_vcpu_check_block() is doing memory accesses. Even
though the TLB flush issue only applies to virtual memory address,
it's very much preferrable to be conservative.
Reported-by: Jann Horn <jannh(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
arch/x86/include/asm/kvm_host.h | 3 +++
arch/x86/kvm/svm/svm.c | 2 ++
arch/x86/kvm/vmx/vmx.c | 1 +
arch/x86/kvm/x86.c | 22 ++++++++++++++++++++++
4 files changed, 28 insertions(+)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4ff36610af6a..9fdaa847d4b6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -651,6 +651,7 @@ struct kvm_vcpu_arch {
u64 ia32_misc_enable_msr;
u64 smbase;
u64 smi_count;
+ bool at_instruction_boundary;
bool tpr_access_reporting;
bool xsaves_enabled;
bool xfd_no_write_intercept;
@@ -1289,6 +1290,8 @@ struct kvm_vcpu_stat {
u64 nested_run;
u64 directed_yield_attempted;
u64 directed_yield_successful;
+ u64 preemption_reported;
+ u64 preemption_other;
u64 guest_mode;
};
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 7e45d03cd018..5842abf1eac4 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4165,6 +4165,8 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
{
+ if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_INTR)
+ vcpu->arch.at_instruction_boundary = true;
}
static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 982df9c000d3..c44f8e1d30c8 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6549,6 +6549,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
return;
handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc));
+ vcpu->arch.at_instruction_boundary = true;
}
static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 39c571224ac2..36453517e847 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -291,6 +291,8 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
STATS_DESC_COUNTER(VCPU, nested_run),
STATS_DESC_COUNTER(VCPU, directed_yield_attempted),
STATS_DESC_COUNTER(VCPU, directed_yield_successful),
+ STATS_DESC_COUNTER(VCPU, preemption_reported),
+ STATS_DESC_COUNTER(VCPU, preemption_other),
STATS_DESC_ICOUNTER(VCPU, guest_mode)
};
@@ -4604,6 +4606,19 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
struct kvm_memslots *slots;
static const u8 preempted = KVM_VCPU_PREEMPTED;
+ /*
+ * The vCPU can be marked preempted if and only if the VM-Exit was on
+ * an instruction boundary and will not trigger guest emulation of any
+ * kind (see vcpu_run). Vendor specific code controls (conservatively)
+ * when this is true, for example allowing the vCPU to be marked
+ * preempted if and only if the VM-Exit was due to a host interrupt.
+ */
+ if (!vcpu->arch.at_instruction_boundary) {
+ vcpu->stat.preemption_other++;
+ return;
+ }
+
+ vcpu->stat.preemption_reported++;
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;
@@ -10358,6 +10373,13 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
vcpu->arch.l1tf_flush_l1d = true;
for (;;) {
+ /*
+ * If another guest vCPU requests a PV TLB flush in the middle
+ * of instruction emulation, the rest of the emulation could
+ * use a stale page translation. Assume that any code after
+ * this point can start executing an instruction.
+ */
+ vcpu->arch.at_instruction_boundary = false;
if (kvm_vcpu_running(vcpu)) {
r = vcpu_enter_guest(vcpu);
} else {
--
2.35.1
From: Kyle Huey <me(a)kylehuey.com>
When management of the PKRU register was moved away from XSTATE, emulation
of PKRU's existence in XSTATE was added for APIs that read XSTATE, but not
for APIs that write XSTATE. This can be seen by running gdb and executing
`p $pkru`, `set $pkru = 42`, and `p $pkru`. On affected kernels (5.14+) the
write to the PKRU register (which gdb performs through ptrace) is ignored.
There are three relevant APIs: PTRACE_SETREGSET with NT_X86_XSTATE,
sigreturn, and KVM_SET_XSAVE. KVM_SET_XSAVE has its own special handling to
make PKRU writes take effect (in fpu_copy_uabi_to_guest_fpstate). Push that
down into copy_uabi_to_xstate and have PTRACE_SETREGSET with NT_X86_XSTATE
and sigreturn pass in pointers to the appropriate PKRU value.
This also adds code to initialize the PKRU value to the hardware init value
(namely 0) if the PKRU bit is not set in the XSTATE header to match XRSTOR.
This is a change to the current KVM_SET_XSAVE behavior.
Changelog since v3:
- The v3 patch is now part 1 of 2.
- Adds a selftest in part 2 of 2.
Changelog since v2:
- Removed now unused variables in fpu_copy_uabi_to_guest_fpstate
Changelog since v1:
- Handles the error case of copy_to_buffer().
Signed-off-by: Kyle Huey <me(a)kylehuey.com>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Borislav Petkov <bp(a)suse.de>
Cc: kvm(a)vger.kernel.org # For edge case behavior of KVM_SET_XSAVE
Cc: stable(a)vger.kernel.org # 5.14+
Fixes: e84ba47e313d ("x86/fpu: Hook up PKRU into ptrace()")
---
arch/x86/kernel/fpu/core.c | 13 +------------
arch/x86/kernel/fpu/regset.c | 2 +-
arch/x86/kernel/fpu/signal.c | 2 +-
arch/x86/kernel/fpu/xstate.c | 28 +++++++++++++++++++++++-----
arch/x86/kernel/fpu/xstate.h | 4 ++--
5 files changed, 28 insertions(+), 21 deletions(-)
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 3b28c5b25e12..46b935bc87c8 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -391,8 +391,6 @@ int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf,
{
struct fpstate *kstate = gfpu->fpstate;
const union fpregs_state *ustate = buf;
- struct pkru_state *xpkru;
- int ret;
if (!cpu_feature_enabled(X86_FEATURE_XSAVE)) {
if (ustate->xsave.header.xfeatures & ~XFEATURE_MASK_FPSSE)
@@ -406,16 +404,7 @@ int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf,
if (ustate->xsave.header.xfeatures & ~xcr0)
return -EINVAL;
- ret = copy_uabi_from_kernel_to_xstate(kstate, ustate);
- if (ret)
- return ret;
-
- /* Retrieve PKRU if not in init state */
- if (kstate->regs.xsave.header.xfeatures & XFEATURE_MASK_PKRU) {
- xpkru = get_xsave_addr(&kstate->regs.xsave, XFEATURE_PKRU);
- *vpkru = xpkru->pkru;
- }
- return 0;
+ return copy_uabi_from_kernel_to_xstate(kstate, ustate, vpkru);
}
EXPORT_SYMBOL_GPL(fpu_copy_uabi_to_guest_fpstate);
#endif /* CONFIG_KVM */
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 75ffaef8c299..6d056b68f4ed 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -167,7 +167,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
}
fpu_force_restore(fpu);
- ret = copy_uabi_from_kernel_to_xstate(fpu->fpstate, kbuf ?: tmpbuf);
+ ret = copy_uabi_from_kernel_to_xstate(fpu->fpstate, kbuf ?: tmpbuf, &target->thread.pkru);
out:
vfree(tmpbuf);
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 91d4b6de58ab..558076dbde5b 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -396,7 +396,7 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx,
fpregs = &fpu->fpstate->regs;
if (use_xsave() && !fx_only) {
- if (copy_sigframe_from_user_to_xstate(fpu->fpstate, buf_fx))
+ if (copy_sigframe_from_user_to_xstate(tsk, buf_fx))
return false;
} else {
if (__copy_from_user(&fpregs->fxsave, buf_fx,
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index c8340156bfd2..e01d3514ae68 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1197,7 +1197,7 @@ static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size,
static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
- const void __user *ubuf)
+ const void __user *ubuf, u32 *pkru)
{
struct xregs_state *xsave = &fpstate->regs.xsave;
unsigned int offset, size;
@@ -1235,6 +1235,24 @@ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
for (i = 0; i < XFEATURE_MAX; i++) {
mask = BIT_ULL(i);
+ if (i == XFEATURE_PKRU) {
+ /*
+ * Retrieve PKRU if not in init state, otherwise
+ * initialize it.
+ */
+ if (hdr.xfeatures & mask) {
+ struct pkru_state xpkru = {0};
+
+ if (copy_from_buffer(&xpkru, xstate_offsets[i],
+ sizeof(xpkru), kbuf, ubuf))
+ return -EFAULT;
+
+ *pkru = xpkru.pkru;
+ } else {
+ *pkru = 0;
+ }
+ }
+
if (hdr.xfeatures & mask) {
void *dst = __raw_xsave_addr(xsave, i);
@@ -1264,9 +1282,9 @@ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
* Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S]
* format and copy to the target thread. Used by ptrace and KVM.
*/
-int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf)
+int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru)
{
- return copy_uabi_to_xstate(fpstate, kbuf, NULL);
+ return copy_uabi_to_xstate(fpstate, kbuf, NULL, pkru);
}
/*
@@ -1274,10 +1292,10 @@ int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf)
* XSAVE[S] format and copy to the target thread. This is called from the
* sigreturn() and rt_sigreturn() system calls.
*/
-int copy_sigframe_from_user_to_xstate(struct fpstate *fpstate,
+int copy_sigframe_from_user_to_xstate(struct task_struct *tsk,
const void __user *ubuf)
{
- return copy_uabi_to_xstate(fpstate, NULL, ubuf);
+ return copy_uabi_to_xstate(tsk->thread.fpu.fpstate, NULL, ubuf, &tsk->thread.pkru);
}
static bool validate_independent_components(u64 mask)
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index 5ad47031383b..a4ecb04d8d64 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -46,8 +46,8 @@ extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
u32 pkru_val, enum xstate_copy_mode copy_mode);
extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
enum xstate_copy_mode mode);
-extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf);
-extern int copy_sigframe_from_user_to_xstate(struct fpstate *fpstate, const void __user *ubuf);
+extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru);
+extern int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, const void __user *ubuf);
extern void fpu__init_cpu_xstate(void);
--
2.37.0
From: Kyle Huey <me(a)kylehuey.com>
When management of the PKRU register was moved away from XSTATE, emulation
of PKRU's existence in XSTATE was added for APIs that read XSTATE, but not
for APIs that write XSTATE. This can be seen by running gdb and executing
`p $pkru`, `set $pkru = 42`, and `p $pkru`. On affected kernels (5.14+) the
write to the PKRU register (which gdb performs through ptrace) is ignored.
There are three relevant APIs: PTRACE_SETREGSET with NT_X86_XSTATE,
sigreturn, and KVM_SET_XSAVE. KVM_SET_XSAVE has its own special handling to
make PKRU writes take effect (in fpu_copy_uabi_to_guest_fpstate). Push that
down into copy_uabi_to_xstate and have PTRACE_SETREGSET with NT_X86_XSTATE
and sigreturn pass in pointers to the appropriate PKRU value.
This also adds code to initialize the PKRU value to the hardware init value
(namely 0) if the PKRU bit is not set in the XSTATE header to match XRSTOR.
This is a change to the current KVM_SET_XSAVE behavior.
Changelog since v1:
- Handles the error case of copy_to_buffer().
Signed-off-by: Kyle Huey <me(a)kylehuey.com>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Borislav Petkov <bp(a)suse.de>
Cc: kvm(a)vger.kernel.org # For edge case behavior of KVM_SET_XSAVE
Cc: stable(a)vger.kernel.org # 5.14+
Fixes: e84ba47e313d ("x86/fpu: Hook up PKRU into ptrace()")
---
arch/x86/kernel/fpu/core.c | 11 +----------
arch/x86/kernel/fpu/regset.c | 2 +-
arch/x86/kernel/fpu/signal.c | 2 +-
arch/x86/kernel/fpu/xstate.c | 28 +++++++++++++++++++++++-----
arch/x86/kernel/fpu/xstate.h | 4 ++--
5 files changed, 28 insertions(+), 19 deletions(-)
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 0531d6a06df5..dfb79e2ee81f 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -406,16 +406,7 @@ int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf,
if (ustate->xsave.header.xfeatures & ~xcr0)
return -EINVAL;
- ret = copy_uabi_from_kernel_to_xstate(kstate, ustate);
- if (ret)
- return ret;
-
- /* Retrieve PKRU if not in init state */
- if (kstate->regs.xsave.header.xfeatures & XFEATURE_MASK_PKRU) {
- xpkru = get_xsave_addr(&kstate->regs.xsave, XFEATURE_PKRU);
- *vpkru = xpkru->pkru;
- }
- return 0;
+ return copy_uabi_from_kernel_to_xstate(kstate, ustate, vpkru);
}
EXPORT_SYMBOL_GPL(fpu_copy_uabi_to_guest_fpstate);
#endif /* CONFIG_KVM */
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 75ffaef8c299..6d056b68f4ed 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -167,7 +167,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
}
fpu_force_restore(fpu);
- ret = copy_uabi_from_kernel_to_xstate(fpu->fpstate, kbuf ?: tmpbuf);
+ ret = copy_uabi_from_kernel_to_xstate(fpu->fpstate, kbuf ?: tmpbuf, &target->thread.pkru);
out:
vfree(tmpbuf);
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 91d4b6de58ab..558076dbde5b 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -396,7 +396,7 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx,
fpregs = &fpu->fpstate->regs;
if (use_xsave() && !fx_only) {
- if (copy_sigframe_from_user_to_xstate(fpu->fpstate, buf_fx))
+ if (copy_sigframe_from_user_to_xstate(tsk, buf_fx))
return false;
} else {
if (__copy_from_user(&fpregs->fxsave, buf_fx,
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index c8340156bfd2..e01d3514ae68 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1197,7 +1197,7 @@ static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size,
static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
- const void __user *ubuf)
+ const void __user *ubuf, u32 *pkru)
{
struct xregs_state *xsave = &fpstate->regs.xsave;
unsigned int offset, size;
@@ -1235,6 +1235,24 @@ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
for (i = 0; i < XFEATURE_MAX; i++) {
mask = BIT_ULL(i);
+ if (i == XFEATURE_PKRU) {
+ /*
+ * Retrieve PKRU if not in init state, otherwise
+ * initialize it.
+ */
+ if (hdr.xfeatures & mask) {
+ struct pkru_state xpkru = {0};
+
+ if (copy_from_buffer(&xpkru, xstate_offsets[i],
+ sizeof(xpkru), kbuf, ubuf))
+ return -EFAULT;
+
+ *pkru = xpkru.pkru;
+ } else {
+ *pkru = 0;
+ }
+ }
+
if (hdr.xfeatures & mask) {
void *dst = __raw_xsave_addr(xsave, i);
@@ -1264,9 +1282,9 @@ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
* Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S]
* format and copy to the target thread. Used by ptrace and KVM.
*/
-int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf)
+int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru)
{
- return copy_uabi_to_xstate(fpstate, kbuf, NULL);
+ return copy_uabi_to_xstate(fpstate, kbuf, NULL, pkru);
}
/*
@@ -1274,10 +1292,10 @@ int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf)
* XSAVE[S] format and copy to the target thread. This is called from the
* sigreturn() and rt_sigreturn() system calls.
*/
-int copy_sigframe_from_user_to_xstate(struct fpstate *fpstate,
+int copy_sigframe_from_user_to_xstate(struct task_struct *tsk,
const void __user *ubuf)
{
- return copy_uabi_to_xstate(fpstate, NULL, ubuf);
+ return copy_uabi_to_xstate(tsk->thread.fpu.fpstate, NULL, ubuf, &tsk->thread.pkru);
}
static bool validate_independent_components(u64 mask)
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index 5ad47031383b..a4ecb04d8d64 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -46,8 +46,8 @@ extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
u32 pkru_val, enum xstate_copy_mode copy_mode);
extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
enum xstate_copy_mode mode);
-extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf);
-extern int copy_sigframe_from_user_to_xstate(struct fpstate *fpstate, const void __user *ubuf);
+extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru);
+extern int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, const void __user *ubuf);
extern void fpu__init_cpu_xstate(void);
--
2.37.0
From: Aurabindo Pillai <aurabindo.pillai(a)amd.com>
[Why & How]
eng_id for DCN303 cannot be more than 1, since we have only two
instances of stream encoders.
Check the correct boundary condition for engine ID for DCN303 prevent
the potential out of bounds access.
Fixes: cd6d421e3d1a ("drm/amd/display: Initial DC support for Beige Goby")
Reported-by: Dan Carpenter <dan.carpenter(a)oracle.com>
Cc: <Stable(a)vger.kernel.org>
Reviewed-by: Chris Park <Chris.Park(a)amd.com>
Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira(a)amd.com>
Acked-by: Tom Chung <chiahsuan.chung(a)amd.com>
Signed-off-by: Aurabindo Pillai <aurabindo.pillai(a)amd.com>
---
drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
index 0a67f8a5656d..d97076648acb 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
@@ -372,7 +372,7 @@ static struct stream_encoder *dcn303_stream_encoder_create(enum engine_id eng_id
int afmt_inst;
/* Mapping of VPG, AFMT, DME register blocks to DIO block instance */
- if (eng_id <= ENGINE_ID_DIGE) {
+ if (eng_id <= ENGINE_ID_DIGB) {
vpg_inst = eng_id;
afmt_inst = eng_id;
} else
--
2.25.1
Please revert the commits.
The usage of FLAG_SEND_ZLP cause problems with other firmware/hardware
versions that have no issues.
The patch needs reworking because using FLAG_SEND_ZLP in this context is not safe.
See:
https://patchwork.ozlabs.org/project/netdev/patch/1270599787.8900.8.camel@L…
reported by:
Ronald Wahl <ronald.wahl(a)raritan.com>
https://bugzilla.kernel.org/show_bug.cgi?id=216327https://bugs.archlinux.org/task/75491
Signed-off-by: Jose Alonso <joalonsof(a)gmail.com>
--
net: usb: ax88179_178a needs FLAG_SEND_ZLP
commit 36a15e1cb134c0395261ba1940762703f778438c
stable tree:
v4.9.x
commit 1b1cf809a7e606230ac7015bf90259ac74c414f3
v4.14.x
commit 631b2b75867cb67a9e8dfa865f5b4e2037b6dd8b
v4.19.x
commit c46cc6297180a889449ebf1724ff2fa231532b89
v5.4.x
commit f88d8c18822963d75a2b85d0ea6bb5d791dd95bf
v5.10.x
commit f7c1fc0dec97b882c105a0887bef585471cf1262
v5.15.x
commit b34229f4b212367196d787170b02be6f31802622
v5.18.x
commit f8bfce2177cdd64357dd57b8f43154f236d25f51
From: Kyle Huey <me(a)kylehuey.com>
When management of the PKRU register was moved away from XSTATE, emulation
of PKRU's existence in XSTATE was added for APIs that read XSTATE, but not
for APIs that write XSTATE. This can be seen by running gdb and executing
`p $pkru`, `set $pkru = 42`, and `p $pkru`. On affected kernels (5.14+) the
write to the PKRU register (which gdb performs through ptrace) is ignored.
There are three relevant APIs: PTRACE_SETREGSET with NT_X86_XSTATE,
sigreturn, and KVM_SET_XSAVE. KVM_SET_XSAVE has its own special handling to
make PKRU writes take effect (in fpu_copy_uabi_to_guest_fpstate). Push that
down into copy_uabi_to_xstate and have PTRACE_SETREGSET with NT_X86_XSTATE
and sigreturn pass in pointers to the appropriate PKRU value.
This also adds code to initialize the PKRU value to the hardware init value
(namely 0) if the PKRU bit is not set in the XSTATE header to match XRSTOR.
This is a change to the current KVM_SET_XSAVE behavior.
Changelog since v2:
- Removed now unused variables in fpu_copy_uabi_to_guest_fpstate
Changelog since v1:
- Handles the error case of copy_to_buffer().
Signed-off-by: Kyle Huey <me(a)kylehuey.com>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Borislav Petkov <bp(a)suse.de>
Cc: kvm(a)vger.kernel.org # For edge case behavior of KVM_SET_XSAVE
Cc: stable(a)vger.kernel.org # 5.14+
Fixes: e84ba47e313d ("x86/fpu: Hook up PKRU into ptrace()")
---
arch/x86/kernel/fpu/core.c | 13 +------------
arch/x86/kernel/fpu/regset.c | 2 +-
arch/x86/kernel/fpu/signal.c | 2 +-
arch/x86/kernel/fpu/xstate.c | 28 +++++++++++++++++++++++-----
arch/x86/kernel/fpu/xstate.h | 4 ++--
5 files changed, 28 insertions(+), 21 deletions(-)
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 3b28c5b25e12..46b935bc87c8 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -391,8 +391,6 @@ int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf,
{
struct fpstate *kstate = gfpu->fpstate;
const union fpregs_state *ustate = buf;
- struct pkru_state *xpkru;
- int ret;
if (!cpu_feature_enabled(X86_FEATURE_XSAVE)) {
if (ustate->xsave.header.xfeatures & ~XFEATURE_MASK_FPSSE)
@@ -406,16 +404,7 @@ int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf,
if (ustate->xsave.header.xfeatures & ~xcr0)
return -EINVAL;
- ret = copy_uabi_from_kernel_to_xstate(kstate, ustate);
- if (ret)
- return ret;
-
- /* Retrieve PKRU if not in init state */
- if (kstate->regs.xsave.header.xfeatures & XFEATURE_MASK_PKRU) {
- xpkru = get_xsave_addr(&kstate->regs.xsave, XFEATURE_PKRU);
- *vpkru = xpkru->pkru;
- }
- return 0;
+ return copy_uabi_from_kernel_to_xstate(kstate, ustate, vpkru);
}
EXPORT_SYMBOL_GPL(fpu_copy_uabi_to_guest_fpstate);
#endif /* CONFIG_KVM */
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 75ffaef8c299..6d056b68f4ed 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -167,7 +167,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
}
fpu_force_restore(fpu);
- ret = copy_uabi_from_kernel_to_xstate(fpu->fpstate, kbuf ?: tmpbuf);
+ ret = copy_uabi_from_kernel_to_xstate(fpu->fpstate, kbuf ?: tmpbuf, &target->thread.pkru);
out:
vfree(tmpbuf);
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 91d4b6de58ab..558076dbde5b 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -396,7 +396,7 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx,
fpregs = &fpu->fpstate->regs;
if (use_xsave() && !fx_only) {
- if (copy_sigframe_from_user_to_xstate(fpu->fpstate, buf_fx))
+ if (copy_sigframe_from_user_to_xstate(tsk, buf_fx))
return false;
} else {
if (__copy_from_user(&fpregs->fxsave, buf_fx,
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index c8340156bfd2..e01d3514ae68 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1197,7 +1197,7 @@ static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size,
static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
- const void __user *ubuf)
+ const void __user *ubuf, u32 *pkru)
{
struct xregs_state *xsave = &fpstate->regs.xsave;
unsigned int offset, size;
@@ -1235,6 +1235,24 @@ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
for (i = 0; i < XFEATURE_MAX; i++) {
mask = BIT_ULL(i);
+ if (i == XFEATURE_PKRU) {
+ /*
+ * Retrieve PKRU if not in init state, otherwise
+ * initialize it.
+ */
+ if (hdr.xfeatures & mask) {
+ struct pkru_state xpkru = {0};
+
+ if (copy_from_buffer(&xpkru, xstate_offsets[i],
+ sizeof(xpkru), kbuf, ubuf))
+ return -EFAULT;
+
+ *pkru = xpkru.pkru;
+ } else {
+ *pkru = 0;
+ }
+ }
+
if (hdr.xfeatures & mask) {
void *dst = __raw_xsave_addr(xsave, i);
@@ -1264,9 +1282,9 @@ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
* Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S]
* format and copy to the target thread. Used by ptrace and KVM.
*/
-int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf)
+int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru)
{
- return copy_uabi_to_xstate(fpstate, kbuf, NULL);
+ return copy_uabi_to_xstate(fpstate, kbuf, NULL, pkru);
}
/*
@@ -1274,10 +1292,10 @@ int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf)
* XSAVE[S] format and copy to the target thread. This is called from the
* sigreturn() and rt_sigreturn() system calls.
*/
-int copy_sigframe_from_user_to_xstate(struct fpstate *fpstate,
+int copy_sigframe_from_user_to_xstate(struct task_struct *tsk,
const void __user *ubuf)
{
- return copy_uabi_to_xstate(fpstate, NULL, ubuf);
+ return copy_uabi_to_xstate(tsk->thread.fpu.fpstate, NULL, ubuf, &tsk->thread.pkru);
}
static bool validate_independent_components(u64 mask)
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index 5ad47031383b..a4ecb04d8d64 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -46,8 +46,8 @@ extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
u32 pkru_val, enum xstate_copy_mode copy_mode);
extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
enum xstate_copy_mode mode);
-extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf);
-extern int copy_sigframe_from_user_to_xstate(struct fpstate *fpstate, const void __user *ubuf);
+extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru);
+extern int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, const void __user *ubuf);
extern void fpu__init_cpu_xstate(void);
--
2.37.0
Hello.
Thanks for read my message.
In https://docs.kernel.org/admin-guide/reporting-issues.html has
information to send message to stable(a)vger.kernel.org.
I see an problem with BTRFS when using dm-writecache module.
I wish report that bug issue.
Where is the correct area to report kernel modules bugs ?
If stable(a)vger.kernel.org not is the correct email to ask about my
message please excuse me.
Have an good day and thanks for reply.
[CHANGELOG]
v2:
- Fix a crash in btrfs/158 caused by uninitialized
btrfs_raid_bio::fs_info
This is another change in the code base which needs manual handling.
Hi Greg and Sasha,
This two patches are backports for v5.15 and v5.10 (for v5.10 conflicts
can be auto resolved) stable branches.
(For older branches from v4.9 to v5.4, due to some naming change,
although the patches can be applied with auto-resolve, they won't compile).
These two patches are reducing the chance of destructive RMW cycle,
where btrfs can use corrupted data to generate new P/Q, thus making some
repairable data unrepairable.
Those patches are more important than what I initially thought, thus
unfortunately they are not CCed to stable by themselves.
Furthermore due to recent refactors/renames, there are quite some member
change related to those patches, thus have to be manually backported.
One of the fastest way to verify the behavior is the existing btrfs/125
test case from fstests. (not in auto group AFAIK).
Qu Wenruo (2):
btrfs: only write the sectors in the vertical stripe which has data
stripes
btrfs: raid56: don't trust any cached sector in
__raid56_parity_recover()
fs/btrfs/raid56.c | 74 ++++++++++++++++++++++++++++++++++++-----------
1 file changed, 57 insertions(+), 17 deletions(-)
--
2.37.0
Commit 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time
/ preempted status", 2021-11-11) open coded the previous call to
kvm_map_gfn, but in doing so it dropped the comparison between the cached
guest physical address and the one in the MSR. This cause an incorrect
cache hit if the guest modifies the steal time address while the memslots
remain the same. This can happen with kexec, in which case the preempted
bit is written at the address used by the old kernel instead of
the old one.
Cc: David Woodhouse <dwmw(a)amazon.co.uk>
Cc: stable(a)vger.kernel.org
Fixes: 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time / preempted status")
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
---
arch/x86/kvm/x86.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0f3c2e034740..8ee4698cb90a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4715,6 +4715,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
struct kvm_steal_time __user *st;
struct kvm_memslots *slots;
static const u8 preempted = KVM_VCPU_PREEMPTED;
+ gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
/*
* The vCPU can be marked preempted if and only if the VM-Exit was on
@@ -4742,6 +4743,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
slots = kvm_memslots(vcpu->kvm);
if (unlikely(slots->generation != ghc->generation ||
+ gpa != ghc->gpa ||
kvm_is_error_hva(ghc->hva) || !ghc->memslot))
return;
--
2.31.1
Hi,
Recently i forwarded you an important message and i have expecting
your response, please confirm my previous message and get back to me.
Mrs. Linda Harakan.
Hi Greg and Sasha,
This two patches are backports for v5.15 and v5.10 (for v5.10 conflicts
can be auto resolved) stable branches.
(For older branches from v4.9 to v5.4, due to some naming change,
although the patches can be applied with auto-resolve, they won't compile).
These two patches are reducing the chance of destructive RMW cycle,
where btrfs can use corrupted data to generate new P/Q, thus making some
repairable data unrepairable.
Those patches are more important than what I initially thought, thus
unfortunately they are not CCed to stable by themselves.
Furthermore due to recent refactors/renames, there are quite some member
change related to those patches, thus have to be manually backported.
One of the fastest way to verify the behavior is the existing btrfs/125
test case from fstests. (not in auto group AFAIK).
Qu Wenruo (2):
btrfs: only write the sectors in the vertical stripe which has data
stripes
btrfs: raid56: don't trust any cached sector in
__raid56_parity_recover()
fs/btrfs/raid56.c | 74 ++++++++++++++++++++++++++++++++++++-----------
1 file changed, 57 insertions(+), 17 deletions(-)
--
2.37.0
https://lore.kernel.org/stable/20220729031140.21806-1-chenjun102@huawei.com/
will make build failed if CONFIG_FB = M.
The error is:
ERROR: "is_console_locked" [drivers/video/fbdev/core/fb.ko] undefined!
To fix it, backport d48de54a9dab ("printk: Export is_console_locked").
Hans de Goede (1):
printk: Export is_console_locked
kernel/printk/printk.c | 1 +
1 file changed, 1 insertion(+)
--
2.17.1
refer to https://lore.kernel.org/all/20220706150253.2186-1-deller@gmx.de/
3 patches are provided to fix CVE-2021-33655 (When sending malicous data
to kernel by ioctl cmd FBIOPUT_VSCREENINFO,kernel will write memory out
of bounds. https://nvd.nist.gov/vuln/detail/CVE-2021-33655) in mainline.
The problem exists in 4.9
static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, unsigned long arg)
fb_set_var(info, &var);
fb_notifier_call_chain(evnt, &event); // evnt = FB_EVENT_MODE_CHANGE
static int fbcon_event_notify(struct notifier_block *self,
unsigned long action, void *data)
fbcon_modechanged(info);
updatescrollmode(p, info, vc);
...
p->vrows = vyres/fh;
if (yres > (fh * (vc->vc_rows + 1)))
p->vrows -= (yres - (fh * vc->vc_rows)) / fh;
if ((yres % fh) && (vyres % fh < yres % fh))
p->vrows--; [1]
[1]: p->vrows could be -1, like what CVE-2021-33655 described.
To fix it, backport the three patches.
d48de54a9dab ("printk: Export is_console_locked") should also be backported
to avoid building failed if CONFIG_FB = m.
Hans de Goede (1):
printk: Export is_console_locked
Helge Deller (3):
fbmem: Check virtual screen sizes in fb_set_var()
fbcon: Disallow setting font bigger than screen size
fbcon: Prevent that screen size is smaller than font size
drivers/video/console/fbcon.c | 33 ++++++++++++++++++++++++++++++++
drivers/video/fbdev/core/fbmem.c | 20 ++++++++++++++++---
include/linux/fbcon.h | 12 ++++++++++++
kernel/printk/printk.c | 1 +
4 files changed, 63 insertions(+), 3 deletions(-)
create mode 100644 include/linux/fbcon.h
--
2.17.1
From: Pablo Sun <pablo.sun(a)mediatek.com>
From: Pablo Sun <pablo.sun(a)mediatek.com>
Fix incorrect pin assignment values when connecting to a monitor with
Type-C receptacle instead of a plug.
According to specification, an UFP_D receptacle's pin assignment
should came from the UFP_D pin assignments field (bit 23:16), while
an UFP_D plug's assignments are described in the DFP_D pin assignments
(bit 15:8) during Mode Discovery.
For example the LG 27 UL850-W is a monitor with Type-C receptacle.
The monitor responds to MODE DISCOVERY command with following
DisplayPort Capability flag:
dp->alt->vdo=0x140045
The existing logic only take cares of UPF_D plug case,
and would take the bit 15:8 for this 0x140045 case.
This results in an non-existing pin assignment 0x0 in
dp_altmode_configure.
To fix this problem a new set of macros are introduced
to take plug/receptacle differences into consideration.
Co-developed-by: Pablo Sun <pablo.sun(a)mediatek.com>
Signed-off-by: Pablo Sun <pablo.sun(a)mediatek.com>
Co-developed-by: Macpaul Lin <macpaul.lin(a)mediatek.com>
Signed-off-by: Macpaul Lin <macpaul.lin(a)mediatek.com>
Reviewed-by: Guillaume Ranquet <granquet(a)baylibre.com>
Cc: stable(a)vger.kernel.org
---
drivers/usb/typec/altmodes/displayport.c | 4 ++--
include/linux/usb/typec_dp.h | 5 +++++
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c
index 9360ca177c7d..8dd0e505ef99 100644
--- a/drivers/usb/typec/altmodes/displayport.c
+++ b/drivers/usb/typec/altmodes/displayport.c
@@ -98,8 +98,8 @@ static int dp_altmode_configure(struct dp_altmode *dp, u8 con)
case DP_STATUS_CON_UFP_D:
case DP_STATUS_CON_BOTH: /* NOTE: First acting as DP source */
conf |= DP_CONF_UFP_U_AS_UFP_D;
- pin_assign = DP_CAP_DFP_D_PIN_ASSIGN(dp->alt->vdo) &
- DP_CAP_UFP_D_PIN_ASSIGN(dp->port->vdo);
+ pin_assign = DP_CAP_PIN_ASSIGN_UFP_D(dp->alt->vdo) &
+ DP_CAP_PIN_ASSIGN_DFP_D(dp->port->vdo);
break;
default:
break;
diff --git a/include/linux/usb/typec_dp.h b/include/linux/usb/typec_dp.h
index cfb916cccd31..8d09c2f0a9b8 100644
--- a/include/linux/usb/typec_dp.h
+++ b/include/linux/usb/typec_dp.h
@@ -73,6 +73,11 @@ enum {
#define DP_CAP_USB BIT(7)
#define DP_CAP_DFP_D_PIN_ASSIGN(_cap_) (((_cap_) & GENMASK(15, 8)) >> 8)
#define DP_CAP_UFP_D_PIN_ASSIGN(_cap_) (((_cap_) & GENMASK(23, 16)) >> 16)
+/* Get pin assignment taking plug & receptacle into consideration */
+#define DP_CAP_PIN_ASSIGN_UFP_D(_cap_) ((_cap_ & DP_CAP_RECEPTACLE) ? \
+ DP_CAP_UFP_D_PIN_ASSIGN(_cap_) : DP_CAP_DFP_D_PIN_ASSIGN(_cap_))
+#define DP_CAP_PIN_ASSIGN_DFP_D(_cap_) ((_cap_ & DP_CAP_RECEPTACLE) ? \
+ DP_CAP_DFP_D_PIN_ASSIGN(_cap_) : DP_CAP_UFP_D_PIN_ASSIGN(_cap_))
/* DisplayPort Status Update VDO bits */
#define DP_STATUS_CONNECTION(_status_) ((_status_) & 3)
--
2.18.0
Hello, Good Morning,
My name is Jean-Jacque Kwame, an Ivorian. I would like to talk to you about a business opportunity. Please get back to me for more details.
Jean-Jacque Kwame
From: Kyle Huey <me(a)kylehuey.com>
When management of the PKRU register was moved away from XSTATE, emulation
of PKRU's existence in XSTATE was added for APIs that read XSTATE, but not
for APIs that write XSTATE. This can be seen by running gdb and executing
`p $pkru`, `set $pkru = 42`, and `p $pkru`. On affected kernels (5.14+) the
write to the PKRU register (which gdb performs through ptrace) is ignored.
There are three relevant APIs: PTRACE_SETREGSET with NT_X86_XSTATE,
sigreturn, and KVM_SET_XSAVE. KVM_SET_XSAVE has its own special handling to
make PKRU writes take effect (in fpu_copy_uabi_to_guest_fpstate). Push that
down into copy_uabi_to_xstate and have PTRACE_SETREGSET with NT_X86_XSTATE
and sigreturn pass in pointers to the appropriate PKRU value.
This also adds code to initialize the PKRU value to the hardware init value
(namely 0) if the PKRU bit is not set in the XSTATE header to match XRSTOR.
This is a change to the current KVM_SET_XSAVE behavior.
Signed-off-by: Kyle Huey <me(a)kylehuey.com>
Cc: kvm(a)vger.kernel.org # For edge case behavior of KVM_SET_XSAVE
Cc: stable(a)vger.kernel.org # 5.14+
Fixes: e84ba47e313dbc097bf859bb6e4f9219883d5f78
---
arch/x86/kernel/fpu/core.c | 11 +----------
arch/x86/kernel/fpu/regset.c | 2 +-
arch/x86/kernel/fpu/signal.c | 2 +-
arch/x86/kernel/fpu/xstate.c | 26 +++++++++++++++++++++-----
arch/x86/kernel/fpu/xstate.h | 4 ++--
5 files changed, 26 insertions(+), 19 deletions(-)
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 0531d6a06df5..dfb79e2ee81f 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -406,16 +406,7 @@ int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf,
if (ustate->xsave.header.xfeatures & ~xcr0)
return -EINVAL;
- ret = copy_uabi_from_kernel_to_xstate(kstate, ustate);
- if (ret)
- return ret;
-
- /* Retrieve PKRU if not in init state */
- if (kstate->regs.xsave.header.xfeatures & XFEATURE_MASK_PKRU) {
- xpkru = get_xsave_addr(&kstate->regs.xsave, XFEATURE_PKRU);
- *vpkru = xpkru->pkru;
- }
- return 0;
+ return copy_uabi_from_kernel_to_xstate(kstate, ustate, vpkru);
}
EXPORT_SYMBOL_GPL(fpu_copy_uabi_to_guest_fpstate);
#endif /* CONFIG_KVM */
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 75ffaef8c299..6d056b68f4ed 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -167,7 +167,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
}
fpu_force_restore(fpu);
- ret = copy_uabi_from_kernel_to_xstate(fpu->fpstate, kbuf ?: tmpbuf);
+ ret = copy_uabi_from_kernel_to_xstate(fpu->fpstate, kbuf ?: tmpbuf, &target->thread.pkru);
out:
vfree(tmpbuf);
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 91d4b6de58ab..558076dbde5b 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -396,7 +396,7 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx,
fpregs = &fpu->fpstate->regs;
if (use_xsave() && !fx_only) {
- if (copy_sigframe_from_user_to_xstate(fpu->fpstate, buf_fx))
+ if (copy_sigframe_from_user_to_xstate(tsk, buf_fx))
return false;
} else {
if (__copy_from_user(&fpregs->fxsave, buf_fx,
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index c8340156bfd2..1eea7af4afd9 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1197,7 +1197,7 @@ static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size,
static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
- const void __user *ubuf)
+ const void __user *ubuf, u32 *pkru)
{
struct xregs_state *xsave = &fpstate->regs.xsave;
unsigned int offset, size;
@@ -1235,6 +1235,22 @@ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
for (i = 0; i < XFEATURE_MAX; i++) {
mask = BIT_ULL(i);
+ if (i == XFEATURE_PKRU) {
+ /*
+ * Retrieve PKRU if not in init state, otherwise
+ * initialize it.
+ */
+ if (hdr.xfeatures & mask) {
+ struct pkru_state xpkru = {0};
+
+ copy_from_buffer(&xpkru, xstate_offsets[i],
+ sizeof(xpkru), kbuf, ubuf);
+ *pkru = xpkru.pkru;
+ } else {
+ *pkru = 0;
+ }
+ }
+
if (hdr.xfeatures & mask) {
void *dst = __raw_xsave_addr(xsave, i);
@@ -1264,9 +1280,9 @@ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
* Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S]
* format and copy to the target thread. Used by ptrace and KVM.
*/
-int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf)
+int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru)
{
- return copy_uabi_to_xstate(fpstate, kbuf, NULL);
+ return copy_uabi_to_xstate(fpstate, kbuf, NULL, pkru);
}
/*
@@ -1274,10 +1290,10 @@ int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf)
* XSAVE[S] format and copy to the target thread. This is called from the
* sigreturn() and rt_sigreturn() system calls.
*/
-int copy_sigframe_from_user_to_xstate(struct fpstate *fpstate,
+int copy_sigframe_from_user_to_xstate(struct task_struct *tsk,
const void __user *ubuf)
{
- return copy_uabi_to_xstate(fpstate, NULL, ubuf);
+ return copy_uabi_to_xstate(tsk->thread.fpu.fpstate, NULL, ubuf, &tsk->thread.pkru);
}
static bool validate_independent_components(u64 mask)
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index 5ad47031383b..a4ecb04d8d64 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -46,8 +46,8 @@ extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
u32 pkru_val, enum xstate_copy_mode copy_mode);
extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
enum xstate_copy_mode mode);
-extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf);
-extern int copy_sigframe_from_user_to_xstate(struct fpstate *fpstate, const void __user *ubuf);
+extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru);
+extern int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, const void __user *ubuf);
extern void fpu__init_cpu_xstate(void);
--
2.37.0
The patch titled
Subject: mm: vmscan: fix extreme overreclaim and swap floods
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-vmscan-fix-extreme-overreclaim-and-swap-floods.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Johannes Weiner <hannes(a)cmpxchg.org>
Subject: mm: vmscan: fix extreme overreclaim and swap floods
Date: Tue, 2 Aug 2022 12:28:11 -0400
During proactive reclaim, we sometimes observe severe overreclaim, with
several thousand times more pages reclaimed than requested.
This trace was obtained from shrink_lruvec() during such an instance:
prio:0 anon_cost:1141521 file_cost:7767
nr_reclaimed:4387406 nr_to_reclaim:1047 (or_factor:4190)
nr=[7161123 345 578 1111]
While he reclaimer requested 4M, vmscan reclaimed close to 16G, most of it
by swapping. These requests take over a minute, during which the write()
to memory.reclaim is unkillably stuck inside the kernel.
Digging into the source, this is caused by the proportional reclaim
bailout logic. This code tries to resolve a fundamental conflict: to
reclaim roughly what was requested, while also aging all LRUs fairly and
in accordance to their size, swappiness, refault rates etc. The way it
attempts fairness is that once the reclaim goal has been reached, it stops
scanning the LRUs with the smaller remaining scan targets, and adjusts the
remainder of the bigger LRUs according to how much of the smaller LRUs was
scanned. It then finishes scanning that remainder regardless of the
reclaim goal.
This works fine if priority levels are low and the LRU lists are
comparable in size. However, in this instance, the cgroup that is
targeted by proactive reclaim has almost no files left - they've already
been squeezed out by proactive reclaim earlier - and the remaining anon
pages are hot. Anon rotations cause the priority level to drop to 0,
which results in reclaim targeting all of anon (a lot) and all of file
(almost nothing). By the time reclaim decides to bail, it has scanned
most or all of the file target, and therefor must also scan most or all of
the enormous anon target. This target is thousands of times larger than
the reclaim goal, thus causing the overreclaim.
The bailout code hasn't changed in years, why is this failing now? The
most likely explanations are two other recent changes in anon reclaim:
1. Before the series starting with commit 5df741963d52 ("mm: fix LRU
balancing effect of new transparent huge pages"), the VM was
overall relatively reluctant to swap at all, even if swap was
configured. This means the LRU balancing code didn't come into play
as often as it does now, and mostly in high pressure situations
where pronounced swap activity wouldn't be as surprising.
2. For historic reasons, shrink_lruvec() loops on the scan targets of
all LRU lists except the active anon one, meaning it would bail if
the only remaining pages to scan were active anon - even if there
were a lot of them.
Before the series starting with commit ccc5dc67340c ("mm/vmscan:
make active/inactive ratio as 1:1 for anon lru"), most anon pages
would live on the active LRU; the inactive one would contain only a
handful of preselected reclaim candidates. After the series, anon
gets aged similarly to file, and the inactive list is the default
for new anon pages as well, making it often the much bigger list.
As a result, the VM is now more likely to actually finish large
anon targets than before.
Change the code such that only one SWAP_CLUSTER_MAX-sized nudge toward the
larger LRU lists is made before bailing out on a met reclaim goal.
This fixes the extreme overreclaim problem.
Fairness is more subtle and harder to evaluate. No obvious misbehavior
was observed on the test workload, in any case. Conceptually, fairness
should primarily be a cumulative effect from regular, lower priority
scans. Once the VM is in trouble and needs to escalate scan targets to
make forward progress, fairness needs to take a backseat. This is also
acknowledged by the myriad exceptions in get_scan_count(). This patch
makes fairness decrease gradually, as it keeps fairness work static over
increasing priority levels with growing scan targets. This should make
more sense - although we may have to re-visit the exact values.
Link: https://lkml.kernel.org/r/20220802162811.39216-1-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Mel Gorman <mgorman(a)techsingularity.net>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim(a)lge.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/vmscan.c | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
--- a/mm/vmscan.c~mm-vmscan-fix-extreme-overreclaim-and-swap-floods
+++ a/mm/vmscan.c
@@ -2897,8 +2897,8 @@ static void shrink_lruvec(struct lruvec
enum lru_list lru;
unsigned long nr_reclaimed = 0;
unsigned long nr_to_reclaim = sc->nr_to_reclaim;
+ bool proportional_reclaim;
struct blk_plug plug;
- bool scan_adjusted;
get_scan_count(lruvec, sc, nr);
@@ -2916,8 +2916,8 @@ static void shrink_lruvec(struct lruvec
* abort proportional reclaim if either the file or anon lru has already
* dropped to zero at the first pass.
*/
- scan_adjusted = (!cgroup_reclaim(sc) && !current_is_kswapd() &&
- sc->priority == DEF_PRIORITY);
+ proportional_reclaim = (!cgroup_reclaim(sc) && !current_is_kswapd() &&
+ sc->priority == DEF_PRIORITY);
blk_start_plug(&plug);
while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
@@ -2937,7 +2937,7 @@ static void shrink_lruvec(struct lruvec
cond_resched();
- if (nr_reclaimed < nr_to_reclaim || scan_adjusted)
+ if (nr_reclaimed < nr_to_reclaim || proportional_reclaim)
continue;
/*
@@ -2988,8 +2988,6 @@ static void shrink_lruvec(struct lruvec
nr_scanned = targets[lru] - nr[lru];
nr[lru] = targets[lru] * (100 - percentage) / 100;
nr[lru] -= min(nr[lru], nr_scanned);
-
- scan_adjusted = true;
}
blk_finish_plug(&plug);
sc->nr_reclaimed += nr_reclaimed;
_
Patches currently in -mm which might be from hannes(a)cmpxchg.org are
mm-vmscan-fix-extreme-overreclaim-and-swap-floods.patch
This reverts commit c814153c83a892dfd42026eaa661ae2c1f298792.
The commit c814153c83a8 "nvme-fc: fold t fc_update_appid into fc_appid_store"
changed the userspace interface, because the code that decrements "count"
to remove a trailing '\n' in the parsing results in the decremented value being
incorrectly be returned from the sysfs write. Fix this by revering the commit.
Cc: stable(a)vger.kernel.org
Signed-off-by: Ewan D. Milne <emilne(a)redhat.com>
---
drivers/nvme/host/fc.c | 19 ++++++++++++++++---
1 file changed, 16 insertions(+), 3 deletions(-)
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 9987797620b6..27f6dfad5d3b 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3873,10 +3873,12 @@ static int fc_parse_cgrpid(const char *buf, u64 *id)
}
/*
- * Parse and update the appid in the blkcg associated with the cgroupid.
+ * fc_update_appid: Parse and update the appid in the blkcg associated with
+ * cgroupid.
+ * @buf: buf contains both cgrpid and appid info
+ * @count: size of the buffer
*/
-static ssize_t fc_appid_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t count)
+static int fc_update_appid(const char *buf, size_t count)
{
u64 cgrp_id;
int appid_len = 0;
@@ -3904,6 +3906,17 @@ static ssize_t fc_appid_store(struct device *dev,
return ret;
return count;
}
+
+static ssize_t fc_appid_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ int ret = 0;
+
+ ret = fc_update_appid(buf, count);
+ if (ret < 0)
+ return -EINVAL;
+ return count;
+}
static DEVICE_ATTR(appid_store, 0200, NULL, fc_appid_store);
#endif /* CONFIG_BLK_CGROUP_FC_APPID */
--
2.20.1
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b648ab487f31bc4c38941bc770ea97fe394304bb Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben(a)decadent.org.uk>
Date: Sat, 23 Jul 2022 17:22:47 +0200
Subject: [PATCH] x86/speculation: Make all RETbleed mitigations 64-bit only
The mitigations for RETBleed are currently ineffective on x86_32 since
entry_32.S does not use the required macros. However, for an x86_32
target, the kconfig symbols for them are still enabled by default and
/sys/devices/system/cpu/vulnerabilities/retbleed will wrongly report
that mitigations are in place.
Make all of these symbols depend on X86_64, and only enable RETHUNK by
default on X86_64.
Fixes: f43b9876e857 ("x86/retbleed: Add fine grained Kconfig knobs")
Signed-off-by: Ben Hutchings <ben(a)decadent.org.uk>
Signed-off-by: Borislav Petkov <bp(a)suse.de>
Cc: <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/YtwSR3NNsWp1ohfV@decadent.org.uk
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e58798f636d4..1670a3fed263 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2473,7 +2473,7 @@ config RETHUNK
bool "Enable return-thunks"
depends on RETPOLINE && CC_HAS_RETURN_THUNK
select OBJTOOL if HAVE_OBJTOOL
- default y
+ default y if X86_64
help
Compile the kernel with the return-thunks compiler option to guard
against kernel-to-user data leaks by avoiding return speculation.
@@ -2482,21 +2482,21 @@ config RETHUNK
config CPU_UNRET_ENTRY
bool "Enable UNRET on kernel entry"
- depends on CPU_SUP_AMD && RETHUNK
+ depends on CPU_SUP_AMD && RETHUNK && X86_64
default y
help
Compile the kernel with support for the retbleed=unret mitigation.
config CPU_IBPB_ENTRY
bool "Enable IBPB on kernel entry"
- depends on CPU_SUP_AMD
+ depends on CPU_SUP_AMD && X86_64
default y
help
Compile the kernel with support for the retbleed=ibpb mitigation.
config CPU_IBRS_ENTRY
bool "Enable IBRS on kernel entry"
- depends on CPU_SUP_INTEL
+ depends on CPU_SUP_INTEL && X86_64
default y
help
Compile the kernel with support for the spectre_v2=ibrs mitigation.
I'm announcing the release of the 5.15.59 kernel.
All users of the 5.15 kernel series must upgrade.
The updated 5.15.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-5.15.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Documentation/admin-guide/kernel-parameters.txt | 2
Documentation/networking/ip-sysctl.rst | 9 +
Makefile | 2
arch/arm/include/asm/dma.h | 2
arch/arm/lib/xor-neon.c | 3
arch/s390/include/asm/archrandom.h | 9 +
arch/x86/kernel/cpu/bugs.c | 1
drivers/edac/ghes_edac.c | 11 +
drivers/gpu/drm/nouveau/nouveau_dmem.c | 6 -
drivers/gpu/drm/tiny/simpledrm.c | 2
drivers/net/ethernet/intel/i40e/i40e_main.c | 4
drivers/net/ethernet/intel/ice/ice_ethtool.c | 3
drivers/net/ethernet/intel/ice/ice_main.c | 8 -
drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c | 106 +++++++++++++------
drivers/net/ethernet/sfc/ptp.c | 22 +++
drivers/net/macsec.c | 33 +++--
drivers/net/pcs/pcs-xpcs.c | 2
drivers/net/sungem_phy.c | 1
drivers/net/virtio_net.c | 37 ++++++
drivers/scsi/mpt3sas/mpt3sas_scsih.c | 1
drivers/scsi/scsi_ioctl.c | 2
drivers/scsi/ufs/ufshcd-pltfrm.c | 15 ++
fs/ntfs/attrib.c | 8 +
fs/ocfs2/ocfs2.h | 4
fs/ocfs2/slot_map.c | 46 +++-----
fs/ocfs2/super.c | 21 ---
fs/read_write.c | 3
include/asm-generic/io.h | 2
include/net/addrconf.h | 3
include/net/bluetooth/l2cap.h | 1
include/net/inet_connection_sock.h | 10 -
include/net/sock.h | 8 -
include/net/tcp.h | 2
kernel/locking/rwsem.c | 30 +++--
kernel/watch_queue.c | 58 ++++++----
mm/hmm.c | 19 +--
mm/hugetlb.c | 1
mm/memory.c | 7 -
mm/page_alloc.c | 12 +-
mm/secretmem.c | 33 ++++-
net/bluetooth/l2cap_core.c | 61 ++++++++--
net/decnet/af_decnet.c | 4
net/ipv4/fib_trie.c | 7 -
net/ipv4/igmp.c | 24 ++--
net/ipv4/tcp.c | 8 -
net/ipv4/tcp_input.c | 41 +++----
net/ipv4/tcp_ipv4.c | 4
net/ipv4/tcp_metrics.c | 10 -
net/ipv4/tcp_output.c | 21 +--
net/ipv6/mcast.c | 14 +-
net/ipv6/ping.c | 6 +
net/ipv6/tcp_ipv6.c | 4
net/mptcp/protocol.c | 8 -
net/netfilter/nfnetlink_queue.c | 7 +
net/sctp/associola.c | 5
net/sctp/stream.c | 19 ---
net/sctp/stream_sched.c | 2
net/tipc/socket.c | 2
net/tls/tls_device.c | 7 +
tools/perf/util/symbol-elf.c | 45 +++++++-
60 files changed, 546 insertions(+), 302 deletions(-)
Alejandro Lucero (1):
sfc: disable softirqs for ptp TX
Alistair Popple (1):
nouveau/svm: Fix to migrate all requested pages
Andrei Vagin (1):
fs: sendfile handles O_NONBLOCK of out_fd
ChenXiaoSong (1):
ntfs: fix use-after-free in ntfs_ucsncmp()
David Howells (1):
watch_queue: Fix missing rcu annotation
David Jeffery (1):
scsi: mpt3sas: Stop fw fault watchdog work item during system shutdown
Duoming Zhou (1):
sctp: fix sleep in atomic context bug in timer handlers
Eiichi Tsukata (1):
docs/kernel-parameters: Update descriptions for "mitigations=" param with retbleed
Florian Fainelli (1):
ARM: 9216/1: Fix MAX_DMA_ADDRESS overflow
Florian Westphal (1):
netfilter: nf_queue: do not allow packet truncation below transport header offset
Greg Kroah-Hartman (2):
ARM: crypto: comment out gcc warning that breaks clang builds
Linux 5.15.59
Harald Freudenberger (1):
s390/archrandom: prevent CPACF trng invocations in interrupt context
Jaewon Kim (1):
page_alloc: fix invalid watermark check on a negative value
Jason Wang (1):
virtio-net: fix the race between refill work and close
Jason Yan (1):
scsi: core: Fix warning in scsi_alloc_sgtables()
Jianglei Nie (1):
net: macsec: fix potential resource leak in macsec_add_rxsa() and macsec_add_txsa()
Josef Bacik (1):
mm: fix page leak with multiple threads mapping the same page
Junxiao Bi (1):
Revert "ocfs2: mount shared volume without ha stack"
Kuniyuki Iwashima (22):
tcp: Fix data-races around sysctl_tcp_dsack.
tcp: Fix a data-race around sysctl_tcp_app_win.
tcp: Fix a data-race around sysctl_tcp_adv_win_scale.
tcp: Fix a data-race around sysctl_tcp_frto.
tcp: Fix a data-race around sysctl_tcp_nometrics_save.
tcp: Fix data-races around sysctl_tcp_no_ssthresh_metrics_save.
tcp: Fix data-races around sysctl_tcp_moderate_rcvbuf.
tcp: Fix a data-race around sysctl_tcp_limit_output_bytes.
tcp: Fix a data-race around sysctl_tcp_challenge_ack_limit.
net: ping6: Fix memleak in ipv6_renew_options().
igmp: Fix data-races around sysctl_igmp_qrv.
tcp: Fix a data-race around sysctl_tcp_min_tso_segs.
tcp: Fix a data-race around sysctl_tcp_min_rtt_wlen.
tcp: Fix a data-race around sysctl_tcp_autocorking.
tcp: Fix a data-race around sysctl_tcp_invalid_ratelimit.
tcp: Fix data-races around sk_pacing_rate.
net: Fix data-races around sysctl_[rw]mem(_offset)?.
tcp: Fix a data-race around sysctl_tcp_comp_sack_delay_ns.
tcp: Fix a data-race around sysctl_tcp_comp_sack_slack_ns.
tcp: Fix a data-race around sysctl_tcp_comp_sack_nr.
tcp: Fix data-races around sysctl_tcp_reflect_tos.
ipv4: Fix data-races around sysctl_fib_notify_on_flag_change.
Leo Yan (1):
perf symbol: Correct address for bss symbols
Liang He (2):
scsi: ufs: host: Hold reference returned by of_parse_phandle()
net: sungem_phy: Add of_node_put() for reference returned by of_get_parent()
Linus Torvalds (1):
watch_queue: Fix missing locking in add_watch_to_object()
Luiz Augusto von Dentz (1):
Bluetooth: L2CAP: Fix use-after-free caused by l2cap_chan_put
Lukas Bulwahn (1):
asm-generic: remove a broken and needless ifdef conditional
Maciej Fijalkowski (2):
ice: check (DD | EOF) bits on Rx descriptor rather than (EOP | RS)
ice: do not setup vlan for loopback VSI
Maxim Mikityanskiy (1):
net/tls: Remove the context from the list in tls_device_down
Miaohe Lin (1):
hugetlb: fix memoryleak in hugetlb_mcopy_atomic_pte
Michal Maloszewski (1):
i40e: Fix interface init with MSI interrupts (no MSI-X)
Mike Rapoport (1):
secretmem: fix unhandled fault in truncate
Nathan Chancellor (1):
drm/simpledrm: Fix return type of simpledrm_simple_display_pipe_mode_valid()
Ralph Campbell (1):
mm/hmm: fault non-owner device private entries
Sabrina Dubroca (4):
macsec: fix NULL deref in macsec_add_rxsa
macsec: fix error message in macsec_add_rxsa and _txsa
macsec: limit replay window size with XPN
macsec: always read MACSEC_SA_ATTR_PN as a u64
Subbaraya Sundeep (1):
octeontx2-pf: Fix UDP/TCP src and dst port tc filters
Sunil Goutham (1):
octeontx2-pf: cn10k: Fix egress ratelimit configuration
Taehee Yoo (1):
net: mld: fix reference count leak in mld_{query | report}_work()
Thadeu Lima de Souza Cascardo (1):
x86/bugs: Do not enable IBPB at firmware entry when IBPB is not available
Toshi Kani (1):
EDAC/ghes: Set the DIMM label unconditionally
Vladimir Oltean (1):
net: pcs: xpcs: propagate xpcs_read error to xpcs_get_state_c37_sgmii
Waiman Long (1):
locking/rwsem: Allow slowpath writer to ignore handoff bit if not set by first waiter
Wei Wang (1):
Revert "tcp: change pingpong threshold to 3"
Xin Long (2):
Documentation: fix sctp_wmem in ip-sysctl.rst
sctp: leave the err path free in sctp_stream_init to sctp_stream_free
Ziyang Xuan (1):
ipv6/addrconf: fix a null-ptr-deref bug for ip6_ptr
I'm announcing the release of the 5.10.135 kernel.
All users of the 5.10 kernel series must upgrade.
The updated 5.10.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-5.10.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Documentation/admin-guide/kernel-parameters.txt | 2
Documentation/networking/ip-sysctl.rst | 9
Makefile | 2
arch/arm/include/asm/dma.h | 2
arch/arm/lib/xor-neon.c | 3
arch/s390/include/asm/archrandom.h | 9
arch/x86/kernel/cpu/bugs.c | 1
drivers/edac/ghes_edac.c | 11
drivers/gpu/drm/nouveau/nouveau_dmem.c | 6
drivers/net/ethernet/intel/i40e/i40e_main.c | 4
drivers/net/ethernet/intel/ice/ice_ethtool.c | 3
drivers/net/ethernet/intel/ice/ice_main.c | 8
drivers/net/ethernet/sfc/ptp.c | 22 +
drivers/net/macsec.c | 33 +-
drivers/net/sungem_phy.c | 1
drivers/net/virtio_net.c | 37 ++
drivers/net/wireless/mediatek/mt7601u/usb.c | 1
drivers/scsi/ufs/ufshcd-pltfrm.c | 15 +
fs/ntfs/attrib.c | 8
fs/ocfs2/ocfs2.h | 4
fs/ocfs2/slot_map.c | 46 +--
fs/ocfs2/super.c | 21 -
fs/xfs/libxfs/xfs_log_format.h | 11
fs/xfs/libxfs/xfs_types.h | 1
fs/xfs/xfs_buf_item.c | 60 +---
fs/xfs/xfs_buf_item_recover.c | 1
fs/xfs/xfs_dquot_item.c | 2
fs/xfs/xfs_file.c | 81 +++---
fs/xfs/xfs_inode.c | 10
fs/xfs/xfs_inode_item.c | 4
fs/xfs/xfs_inode_item.h | 2
fs/xfs/xfs_inode_item_recover.c | 39 ++-
fs/xfs/xfs_log.c | 30 +-
fs/xfs/xfs_log.h | 4
fs/xfs/xfs_log_cil.c | 32 --
fs/xfs/xfs_log_priv.h | 15 -
fs/xfs/xfs_log_recover.c | 5
fs/xfs/xfs_mount.c | 10
fs/xfs/xfs_trans.c | 6
fs/xfs/xfs_trans.h | 4
include/linux/bpf.h | 10
include/net/addrconf.h | 3
include/net/bluetooth/l2cap.h | 1
include/net/inet_connection_sock.h | 10
include/net/tcp.h | 2
include/uapi/linux/bpf.h | 5
kernel/watch_queue.c | 58 ++--
mm/page_alloc.c | 12
net/bluetooth/l2cap_core.c | 61 +++-
net/bpf/test_run.c | 243 ++++++++++++++-----
net/core/filter.c | 1
net/ipv4/igmp.c | 24 +
net/ipv4/tcp.c | 2
net/ipv4/tcp_input.c | 24 +
net/ipv4/tcp_ipv4.c | 4
net/ipv4/tcp_metrics.c | 10
net/ipv4/tcp_output.c | 19 -
net/ipv6/ping.c | 6
net/ipv6/tcp_ipv6.c | 4
net/mptcp/protocol.c | 2
net/netfilter/nfnetlink_queue.c | 7
net/sctp/associola.c | 5
net/sctp/stream.c | 19 -
net/sctp/stream_sched.c | 2
net/tls/tls_device.c | 7
tools/include/uapi/linux/bpf.h | 5
tools/perf/util/symbol-elf.c | 45 +++
tools/testing/selftests/bpf/test_verifier.c | 4
tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c | 1
69 files changed, 754 insertions(+), 407 deletions(-)
Alejandro Lucero (1):
sfc: disable softirqs for ptp TX
Alistair Popple (1):
nouveau/svm: Fix to migrate all requested pages
Brian Foster (2):
xfs: hold buffer across unpin and potential shutdown processing
xfs: remove dead stale buf unpin handling code
ChenXiaoSong (1):
ntfs: fix use-after-free in ntfs_ucsncmp()
Christoph Hellwig (1):
xfs: refactor xfs_file_fsync
Darrick J. Wong (3):
xfs: prevent UAF in xfs_log_item_in_current_chkpt
xfs: fix log intent recovery ENOSPC shutdowns when inactivating inodes
xfs: force the log offline when log intent item recovery fails
Dave Chinner (3):
xfs: xfs_log_force_lsn isn't passed a LSN
xfs: logging the on disk inode LSN can make it go backwards
xfs: Enforce attr3 buffer recovery order
David Howells (1):
watch_queue: Fix missing rcu annotation
Duoming Zhou (1):
sctp: fix sleep in atomic context bug in timer handlers
Eiichi Tsukata (1):
docs/kernel-parameters: Update descriptions for "mitigations=" param with retbleed
Florian Fainelli (1):
ARM: 9216/1: Fix MAX_DMA_ADDRESS overflow
Florian Westphal (1):
netfilter: nf_queue: do not allow packet truncation below transport header offset
Greg Kroah-Hartman (2):
ARM: crypto: comment out gcc warning that breaks clang builds
Linux 5.10.135
Harald Freudenberger (1):
s390/archrandom: prevent CPACF trng invocations in interrupt context
Jaewon Kim (1):
page_alloc: fix invalid watermark check on a negative value
Jason Wang (1):
virtio-net: fix the race between refill work and close
Jianglei Nie (1):
net: macsec: fix potential resource leak in macsec_add_rxsa() and macsec_add_txsa()
Junxiao Bi (1):
Revert "ocfs2: mount shared volume without ha stack"
Kuniyuki Iwashima (19):
tcp: Fix data-races around sysctl_tcp_dsack.
tcp: Fix a data-race around sysctl_tcp_app_win.
tcp: Fix a data-race around sysctl_tcp_adv_win_scale.
tcp: Fix a data-race around sysctl_tcp_frto.
tcp: Fix a data-race around sysctl_tcp_nometrics_save.
tcp: Fix data-races around sysctl_tcp_no_ssthresh_metrics_save.
tcp: Fix data-races around sysctl_tcp_moderate_rcvbuf.
tcp: Fix a data-race around sysctl_tcp_limit_output_bytes.
tcp: Fix a data-race around sysctl_tcp_challenge_ack_limit.
net: ping6: Fix memleak in ipv6_renew_options().
igmp: Fix data-races around sysctl_igmp_qrv.
tcp: Fix a data-race around sysctl_tcp_min_tso_segs.
tcp: Fix a data-race around sysctl_tcp_min_rtt_wlen.
tcp: Fix a data-race around sysctl_tcp_autocorking.
tcp: Fix a data-race around sysctl_tcp_invalid_ratelimit.
tcp: Fix a data-race around sysctl_tcp_comp_sack_delay_ns.
tcp: Fix a data-race around sysctl_tcp_comp_sack_slack_ns.
tcp: Fix a data-race around sysctl_tcp_comp_sack_nr.
tcp: Fix data-races around sysctl_tcp_reflect_tos.
Leo Yan (1):
perf symbol: Correct address for bss symbols
Liang He (2):
scsi: ufs: host: Hold reference returned by of_parse_phandle()
net: sungem_phy: Add of_node_put() for reference returned by of_get_parent()
Linus Torvalds (1):
watch_queue: Fix missing locking in add_watch_to_object()
Lorenz Bauer (3):
bpf: Consolidate shared test timing code
bpf: Add PROG_TEST_RUN support for sk_lookup programs
selftests: bpf: Don't run sk_lookup in verifier tests
Luiz Augusto von Dentz (1):
Bluetooth: L2CAP: Fix use-after-free caused by l2cap_chan_put
Maciej Fijalkowski (2):
ice: check (DD | EOF) bits on Rx descriptor rather than (EOP | RS)
ice: do not setup vlan for loopback VSI
Maxim Mikityanskiy (1):
net/tls: Remove the context from the list in tls_device_down
Michal Maloszewski (1):
i40e: Fix interface init with MSI interrupts (no MSI-X)
Sabrina Dubroca (4):
macsec: fix NULL deref in macsec_add_rxsa
macsec: fix error message in macsec_add_rxsa and _txsa
macsec: limit replay window size with XPN
macsec: always read MACSEC_SA_ATTR_PN as a u64
Thadeu Lima de Souza Cascardo (1):
x86/bugs: Do not enable IBPB at firmware entry when IBPB is not available
Toshi Kani (1):
EDAC/ghes: Set the DIMM label unconditionally
Wei Mingzhi (1):
mt7601u: add USB device ID for some versions of XiaoDu WiFi Dongle.
Wei Wang (1):
Revert "tcp: change pingpong threshold to 3"
Xin Long (2):
Documentation: fix sctp_wmem in ip-sysctl.rst
sctp: leave the err path free in sctp_stream_init to sctp_stream_free
Ziyang Xuan (1):
ipv6/addrconf: fix a null-ptr-deref bug for ip6_ptr
I'm announcing the release of the 5.4.209 kernel.
All users of the 5.4 kernel series must upgrade.
The updated 5.4.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-5.4.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Documentation/networking/ip-sysctl.txt | 9 +++
Makefile | 2
arch/arm/lib/xor-neon.c | 3 -
arch/s390/include/asm/archrandom.h | 9 ++-
drivers/net/ethernet/intel/i40e/i40e_main.c | 4 +
drivers/net/ethernet/intel/ice/ice_ethtool.c | 3 -
drivers/net/ethernet/intel/ice/ice_main.c | 8 ++-
drivers/net/ethernet/sfc/ptp.c | 22 +++++++++
drivers/net/sungem_phy.c | 1
drivers/net/virtio_net.c | 37 +++++++++++++++-
drivers/net/wireless/mediatek/mt7601u/usb.c | 1
drivers/scsi/scsi_lib.c | 3 -
drivers/scsi/ufs/ufshcd-pltfrm.c | 15 +++++-
fs/ntfs/attrib.c | 8 ++-
include/net/addrconf.h | 3 +
include/net/bluetooth/l2cap.h | 1
include/net/tcp.h | 2
net/bluetooth/l2cap_core.c | 61 +++++++++++++++++++++------
net/ipv4/igmp.c | 24 +++++-----
net/ipv4/tcp.c | 2
net/ipv4/tcp_input.c | 20 ++++----
net/ipv4/tcp_metrics.c | 2
net/ipv4/tcp_output.c | 4 -
net/ipv6/ping.c | 6 ++
net/netfilter/nfnetlink_queue.c | 7 ++-
net/sctp/associola.c | 5 --
net/sctp/stream.c | 19 +-------
net/sctp/stream_sched.c | 2
tools/perf/util/symbol-elf.c | 45 ++++++++++++++++++-
29 files changed, 246 insertions(+), 82 deletions(-)
Alejandro Lucero (1):
sfc: disable softirqs for ptp TX
ChenXiaoSong (1):
ntfs: fix use-after-free in ntfs_ucsncmp()
Duoming Zhou (1):
sctp: fix sleep in atomic context bug in timer handlers
Florian Westphal (1):
netfilter: nf_queue: do not allow packet truncation below transport header offset
Greg Kroah-Hartman (2):
ARM: crypto: comment out gcc warning that breaks clang builds
Linux 5.4.209
Harald Freudenberger (1):
s390/archrandom: prevent CPACF trng invocations in interrupt context
Jason Wang (1):
virtio-net: fix the race between refill work and close
Kuniyuki Iwashima (15):
tcp: Fix data-races around sysctl_tcp_dsack.
tcp: Fix a data-race around sysctl_tcp_app_win.
tcp: Fix a data-race around sysctl_tcp_adv_win_scale.
tcp: Fix a data-race around sysctl_tcp_frto.
tcp: Fix a data-race around sysctl_tcp_nometrics_save.
tcp: Fix a data-race around sysctl_tcp_limit_output_bytes.
tcp: Fix a data-race around sysctl_tcp_challenge_ack_limit.
net: ping6: Fix memleak in ipv6_renew_options().
igmp: Fix data-races around sysctl_igmp_qrv.
tcp: Fix a data-race around sysctl_tcp_min_tso_segs.
tcp: Fix a data-race around sysctl_tcp_min_rtt_wlen.
tcp: Fix a data-race around sysctl_tcp_autocorking.
tcp: Fix a data-race around sysctl_tcp_invalid_ratelimit.
tcp: Fix a data-race around sysctl_tcp_comp_sack_delay_ns.
tcp: Fix a data-race around sysctl_tcp_comp_sack_nr.
Leo Yan (1):
perf symbol: Correct address for bss symbols
Liang He (2):
scsi: ufs: host: Hold reference returned by of_parse_phandle()
net: sungem_phy: Add of_node_put() for reference returned by of_get_parent()
Luiz Augusto von Dentz (1):
Bluetooth: L2CAP: Fix use-after-free caused by l2cap_chan_put
Maciej Fijalkowski (2):
ice: check (DD | EOF) bits on Rx descriptor rather than (EOP | RS)
ice: do not setup vlan for loopback VSI
Michal Maloszewski (1):
i40e: Fix interface init with MSI interrupts (no MSI-X)
Ming Lei (1):
scsi: core: Fix race between handling STS_RESOURCE and completion
Wei Mingzhi (1):
mt7601u: add USB device ID for some versions of XiaoDu WiFi Dongle.
Xin Long (2):
Documentation: fix sctp_wmem in ip-sysctl.rst
sctp: leave the err path free in sctp_stream_init to sctp_stream_free
Ziyang Xuan (1):
ipv6/addrconf: fix a null-ptr-deref bug for ip6_ptr
Hallo,
Wir haben nicht die Absicht, Ihre Zeit zu verschwenden.
Wir vergeben Kredite an Privatpersonen und Unternehmen. Der Zinssatz beträgt 2% jährlich. Wir haben die Kapazität, Projekte in diesen Bereichen zu finanzieren; Immobilienentwicklung, Bau, Bergbau, Öl und Gas, Fluggesellschaft und verschiedene Handelsgeschäfte. Bei Interesse bitte folgendes angeben:
1- Vollständiger Name:
2- Erforderlicher Kreditbetrag:
3- Amortisationsdauer (1 – 30 Jahre):
4- Telefonnummer / WhatsApp-Nummer:
Ich werde Ihnen die Kreditbedingungen zusenden, sobald ich die oben genannten Informationen erhalten habe.
Ihr Rückerstattungsplan beginnt nach 6 Monaten (6-Monats-Nachfrist).
Die Finanzierung dauert ungefähr 7 Bankarbeitstage ab dem Tag, an dem Sie Ihren Kreditantrag stellen.
Antworten Sie uns für weitere Informationen.
Mit freundlichen Grüßen
Uwe Wanner
--
This email has been checked for viruses by Avast antivirus software.
https://www.avast.com/antivirus
The patch titled
Subject: writeback: avoid use-after-free after removing device
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
writeback-avoid-use-after-free-after-removing-device.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Khazhismel Kumykov <khazhy(a)chromium.org>
Subject: writeback: avoid use-after-free after removing device
Date: Mon, 1 Aug 2022 08:50:34 -0700
When a disk is removed, bdi_unregister gets called to stop further
writeback and wait for associated delayed work to complete. However,
wb_inode_writeback_end() may schedule bandwidth estimation dwork after
this has completed, which can result in the timer attempting to access the
just freed bdi_writeback.
Fix this by checking if the bdi_writeback is alive, similar to when
scheduling writeback work.
Since this requires wb->work_lock, and wb_inode_writeback_end() may get
called from interrupt, switch wb->work_lock to an irqsafe lock.
Link: https://lkml.kernel.org/r/20220801155034.3772543-1-khazhy@google.com
Fixes: 45a2966fd641 ("writeback: fix bandwidth estimate for spiky workload")
Signed-off-by: Khazhismel Kumykov <khazhy(a)google.com>
Cc: Jan Kara <jack(a)suse.cz>
Cc: Michael Stapelberg <stapelberg+linux(a)google.com>
Cc: Wu Fengguang <fengguang.wu(a)intel.com>
Cc: Alexander Viro <viro(a)zeniv.linux.org.uk>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/fs-writeback.c | 12 ++++++------
mm/backing-dev.c | 10 +++++-----
mm/page-writeback.c | 6 +++++-
3 files changed, 16 insertions(+), 12 deletions(-)
--- a/fs/fs-writeback.c~writeback-avoid-use-after-free-after-removing-device
+++ a/fs/fs-writeback.c
@@ -134,10 +134,10 @@ static bool inode_io_list_move_locked(st
static void wb_wakeup(struct bdi_writeback *wb)
{
- spin_lock_bh(&wb->work_lock);
+ spin_lock_irq(&wb->work_lock);
if (test_bit(WB_registered, &wb->state))
mod_delayed_work(bdi_wq, &wb->dwork, 0);
- spin_unlock_bh(&wb->work_lock);
+ spin_unlock_irq(&wb->work_lock);
}
static void finish_writeback_work(struct bdi_writeback *wb,
@@ -164,7 +164,7 @@ static void wb_queue_work(struct bdi_wri
if (work->done)
atomic_inc(&work->done->cnt);
- spin_lock_bh(&wb->work_lock);
+ spin_lock_irq(&wb->work_lock);
if (test_bit(WB_registered, &wb->state)) {
list_add_tail(&work->list, &wb->work_list);
@@ -172,7 +172,7 @@ static void wb_queue_work(struct bdi_wri
} else
finish_writeback_work(wb, work);
- spin_unlock_bh(&wb->work_lock);
+ spin_unlock_irq(&wb->work_lock);
}
/**
@@ -2082,13 +2082,13 @@ static struct wb_writeback_work *get_nex
{
struct wb_writeback_work *work = NULL;
- spin_lock_bh(&wb->work_lock);
+ spin_lock_irq(&wb->work_lock);
if (!list_empty(&wb->work_list)) {
work = list_entry(wb->work_list.next,
struct wb_writeback_work, list);
list_del_init(&work->list);
}
- spin_unlock_bh(&wb->work_lock);
+ spin_unlock_irq(&wb->work_lock);
return work;
}
--- a/mm/backing-dev.c~writeback-avoid-use-after-free-after-removing-device
+++ a/mm/backing-dev.c
@@ -260,10 +260,10 @@ void wb_wakeup_delayed(struct bdi_writeb
unsigned long timeout;
timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
- spin_lock_bh(&wb->work_lock);
+ spin_lock_irq(&wb->work_lock);
if (test_bit(WB_registered, &wb->state))
queue_delayed_work(bdi_wq, &wb->dwork, timeout);
- spin_unlock_bh(&wb->work_lock);
+ spin_unlock_irq(&wb->work_lock);
}
static void wb_update_bandwidth_workfn(struct work_struct *work)
@@ -334,12 +334,12 @@ static void cgwb_remove_from_bdi_list(st
static void wb_shutdown(struct bdi_writeback *wb)
{
/* Make sure nobody queues further work */
- spin_lock_bh(&wb->work_lock);
+ spin_lock_irq(&wb->work_lock);
if (!test_and_clear_bit(WB_registered, &wb->state)) {
- spin_unlock_bh(&wb->work_lock);
+ spin_unlock_irq(&wb->work_lock);
return;
}
- spin_unlock_bh(&wb->work_lock);
+ spin_unlock_irq(&wb->work_lock);
cgwb_remove_from_bdi_list(wb);
/*
--- a/mm/page-writeback.c~writeback-avoid-use-after-free-after-removing-device
+++ a/mm/page-writeback.c
@@ -2867,6 +2867,7 @@ static void wb_inode_writeback_start(str
static void wb_inode_writeback_end(struct bdi_writeback *wb)
{
+ unsigned long flags;
atomic_dec(&wb->writeback_inodes);
/*
* Make sure estimate of writeback throughput gets updated after
@@ -2875,7 +2876,10 @@ static void wb_inode_writeback_end(struc
* that if multiple inodes end writeback at a similar time, they get
* batched into one bandwidth update.
*/
- queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL);
+ spin_lock_irqsave(&wb->work_lock, flags);
+ if (test_bit(WB_registered, &wb->state))
+ queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL);
+ spin_unlock_irqrestore(&wb->work_lock, flags);
}
bool __folio_end_writeback(struct folio *folio)
_
Patches currently in -mm which might be from khazhy(a)chromium.org are
writeback-avoid-use-after-free-after-removing-device.patch