lwt xmit hook does not expect positive return values in function
ip_finish_output2 and ip6_finish_output. However, BPF programs can
directly return positive statuses such like NET_XMIT_DROP, NET_RX_DROP,
and etc to the caller. Such return values would make the kernel continue
processing already freed skbs and eventually panic.
This set fixes the return values from BPF ops to unexpected continue
processing, and checks strictly on the correct continue condition for
future proof. In addition, add missing selftests for BPF_REDIRECT
and BPF_REROUTE cases for BPF-CI.
v4: https://lore.kernel.org/bpf/ZMD1sFTW8SFiex+x@debian.debian/T/
v3: https://lore.kernel.org/bpf/cover.1690255889.git.yan@cloudflare.com/
v2: https://lore.kernel.org/netdev/ZLdY6JkWRccunvu0@debian.debian/
v1: https://lore.kernel.org/bpf/ZLbYdpWC8zt9EJtq@debian.debian/
changes since v4:
* fixed same error on BPF_REROUTE path
* re-implemented selftests under BPF-CI requirement
changes since v3:
* minor change in commit message and changelogs
* tested by Jakub Sitnicki
changes since v2:
* subject name changed
* also covered redirect to ingress case
* added selftests
changes since v1:
* minor code style changes
Yan Zhai (4):
lwt: fix return values of BPF ops
lwt: check LWTUNNEL_XMIT_CONTINUE strictly
selftests/bpf: add lwt_xmit tests for BPF_REDIRECT
selftests/bpf: add lwt_xmit tests for BPF_REROUTE
include/net/lwtunnel.h | 5 +-
net/core/lwt_bpf.c | 7 +-
net/ipv4/ip_output.c | 2 +-
net/ipv6/ip6_output.c | 2 +-
.../selftests/bpf/prog_tests/lwt_helpers.h | 139 ++++++++
.../selftests/bpf/prog_tests/lwt_redirect.c | 319 ++++++++++++++++++
.../selftests/bpf/prog_tests/lwt_reroute.c | 256 ++++++++++++++
.../selftests/bpf/progs/test_lwt_redirect.c | 58 ++++
.../selftests/bpf/progs/test_lwt_reroute.c | 36 ++
9 files changed, 817 insertions(+), 7 deletions(-)
create mode 100644 tools/testing/selftests/bpf/prog_tests/lwt_helpers.h
create mode 100644 tools/testing/selftests/bpf/prog_tests/lwt_redirect.c
create mode 100644 tools/testing/selftests/bpf/prog_tests/lwt_reroute.c
create mode 100644 tools/testing/selftests/bpf/progs/test_lwt_redirect.c
create mode 100644 tools/testing/selftests/bpf/progs/test_lwt_reroute.c
--
2.30.2
A recent change in clang allows it to consider more expressions as
compile time constants, which causes it to point out an implicit
conversion in the scanf tests:
lib/test_scanf.c:661:2: warning: implicit conversion from 'int' to 'unsigned char' changes value from -168 to 88 [-Wconstant-conversion]
661 | test_number_prefix(unsigned char, "0xA7", "%2hhx%hhx", 0, 0xa7, 2, check_uchar);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
lib/test_scanf.c:609:29: note: expanded from macro 'test_number_prefix'
609 | T result[2] = {~expect[0], ~expect[1]}; \
| ~ ^~~~~~~~~~
1 warning generated.
The result of the bitwise negation is the type of the operand after
going through the integer promotion rules, so this truncation is
expected but harmless, as the initial values in the result array get
overwritten by _test() anyways. Add an explicit cast to the expected
type in test_number_prefix() to silence the warning. There is no
functional change, as all the tests still pass with GCC 13.1.0 and clang
18.0.0.
Cc: stable(a)vger.kernel.org
Closes: https://github.com/ClangBuiltLinux/linux/issues/1899
Link: https://github.com/llvm/llvm-project/commit/610ec954e1f81c0e8fcadedcd25afe6…
Suggested-by: Nick Desaulniers <ndesaulniers(a)google.com>
Signed-off-by: Nathan Chancellor <nathan(a)kernel.org>
---
Changes in v2:
- Add spaces in initializer to match result (Andy)
- Add 'Cc: stable', as builds with CONFIG_WERROR will be broken, such as
allmodconfig
- Link to v1: https://lore.kernel.org/r/20230803-test_scanf-wconstant-conversion-v1-1-74d…
---
lib/test_scanf.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/test_scanf.c b/lib/test_scanf.c
index b620cf7de503..a2707af2951a 100644
--- a/lib/test_scanf.c
+++ b/lib/test_scanf.c
@@ -606,7 +606,7 @@ static void __init numbers_slice(void)
#define test_number_prefix(T, str, scan_fmt, expect0, expect1, n_args, fn) \
do { \
const T expect[2] = { expect0, expect1 }; \
- T result[2] = {~expect[0], ~expect[1]}; \
+ T result[2] = { (T)~expect[0], (T)~expect[1] }; \
\
_test(fn, &expect, str, scan_fmt, n_args, &result[0], &result[1]); \
} while (0)
---
base-commit: 5d0c230f1de8c7515b6567d9afba1f196fb4e2f4
change-id: 20230803-test_scanf-wconstant-conversion-d97efbf3bb5d
Best regards,
--
Nathan Chancellor <nathan(a)kernel.org>
This is the start of the stable review cycle for the 5.4.254 release.
There are 39 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Tue, 15 Aug 2023 21:16:53 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.4.254-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.4.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.4.254-rc1
Eric Dumazet <edumazet(a)google.com>
sch_netem: fix issues in netem_change() vs get_dist_table()
Masahiro Yamada <masahiroy(a)kernel.org>
alpha: remove __init annotation from exported page_is_ram()
Zhu Wang <wangzhu9(a)huawei.com>
scsi: core: Fix possible memory leak if device_add() fails
Zhu Wang <wangzhu9(a)huawei.com>
scsi: snic: Fix possible memory leak if device_add() fails
Alexandra Diupina <adiupina(a)astralinux.ru>
scsi: 53c700: Check that command slot is not NULL
Michael Kelley <mikelley(a)microsoft.com>
scsi: storvsc: Fix handling of virtual Fibre Channel timeouts
Tony Battersby <tonyb(a)cybernetics.com>
scsi: core: Fix legacy /proc parsing buffer overflow
Pablo Neira Ayuso <pablo(a)netfilter.org>
netfilter: nf_tables: report use refcount overflow
Ming Lei <ming.lei(a)redhat.com>
nvme-rdma: fix potential unbalanced freeze & unfreeze
Ming Lei <ming.lei(a)redhat.com>
nvme-tcp: fix potential unbalanced freeze & unfreeze
Josef Bacik <josef(a)toxicpanda.com>
btrfs: set cache_block_group_error if we find an error
Christoph Hellwig <hch(a)lst.de>
btrfs: don't stop integrity writeback too early
Nick Child <nnac123(a)linux.ibm.com>
ibmvnic: Handle DMA unmapping of login buffs in release functions
Daniel Jurgens <danielj(a)nvidia.com>
net/mlx5: Allow 0 for total host VFs
Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
dmaengine: mcf-edma: Fix a potential un-allocated memory access
Felix Fietkau <nbd(a)nbd.name>
wifi: cfg80211: fix sband iftype data lookup for AP_VLAN
Douglas Miller <doug.miller(a)cornelisnetworks.com>
IB/hfi1: Fix possible panic during hotplug remove
Andrew Kanner <andrew.kanner(a)gmail.com>
drivers: net: prevent tun_build_skb() to exceed the packet size limit
Eric Dumazet <edumazet(a)google.com>
dccp: fix data-race around dp->dccps_mss_cache
Ziyang Xuan <william.xuanziyang(a)huawei.com>
bonding: Fix incorrect deletion of ETH_P_8021AD protocol vid from slaves
Eric Dumazet <edumazet(a)google.com>
net/packet: annotate data-races around tp->status
Nathan Chancellor <nathan(a)kernel.org>
mISDN: Update parameter type of dsp_cmx_send()
Mark Brown <broonie(a)kernel.org>
selftests/rseq: Fix build with undefined __weak
Karol Herbst <kherbst(a)redhat.com>
drm/nouveau/disp: Revert a NULL check inside nouveau_connector_get_modes
Arnd Bergmann <arnd(a)arndb.de>
x86: Move gds_ucode_mitigated() declaration to header
Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
x86/mm: Fix VDSO and VVAR placement on 5-level paging machines
Cristian Ciocaltea <cristian.ciocaltea(a)collabora.com>
x86/cpu/amd: Enable Zenbleed fix for AMD Custom APU 0405
Prashanth K <quic_prashk(a)quicinc.com>
usb: common: usb-conn-gpio: Prevent bailing out if initial role is none
Elson Roy Serrao <quic_eserrao(a)quicinc.com>
usb: dwc3: Properly handle processing of pending events
Alan Stern <stern(a)rowland.harvard.edu>
usb-storage: alauda: Fix uninit-value in alauda_check_media()
Qi Zheng <zhengqi.arch(a)bytedance.com>
binder: fix memory leak in binder_init()
Yiyuan Guo <yguoaz(a)gmail.com>
iio: cros_ec: Fix the allocation size for cros_ec_command
Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
nilfs2: fix use-after-free of nilfs_root in dirtying inodes via iput
Thomas Gleixner <tglx(a)linutronix.de>
x86/pkeys: Revert a5eff7259790 ("x86/pkeys: Add PKRU value to init_fpstate")
Colin Ian King <colin.i.king(a)gmail.com>
radix tree test suite: fix incorrect allocation size for pthreads
Karol Herbst <kherbst(a)redhat.com>
drm/nouveau/gr: enable memory loads on helper invocation on all channels
Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
dmaengine: pl330: Return DMA_PAUSED when transaction is paused
Maciej Żenczykowski <maze(a)google.com>
ipv6: adjust ndisc_is_useropt() to also return true for PIO
Sergei Antonov <saproj(a)gmail.com>
mmc: moxart: read scr register without changing byte order
-------------
Diffstat:
Makefile | 4 +-
arch/alpha/kernel/setup.c | 3 +-
arch/x86/entry/vdso/vma.c | 4 +-
arch/x86/include/asm/processor.h | 2 +
arch/x86/kernel/cpu/amd.c | 1 +
arch/x86/kernel/cpu/common.c | 5 -
arch/x86/kvm/x86.c | 2 -
arch/x86/mm/pkeys.c | 6 -
drivers/android/binder.c | 1 +
drivers/android/binder_alloc.c | 6 +
drivers/android/binder_alloc.h | 1 +
drivers/dma/mcf-edma.c | 13 +-
drivers/dma/pl330.c | 18 ++-
drivers/gpu/drm/nouveau/nouveau_connector.c | 2 +-
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h | 1 +
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c | 4 +-
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c | 10 ++
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c | 1 +
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c | 1 +
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c | 1 +
.../common/cros_ec_sensors/cros_ec_sensors_core.c | 2 +-
drivers/infiniband/hw/hfi1/chip.c | 1 +
drivers/isdn/mISDN/dsp.h | 2 +-
drivers/isdn/mISDN/dsp_cmx.c | 2 +-
drivers/isdn/mISDN/dsp_core.c | 2 +-
drivers/mmc/host/moxart-mmc.c | 8 +-
drivers/net/bonding/bond_main.c | 4 +-
drivers/net/ethernet/ibm/ibmvnic.c | 15 +-
drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 3 +-
drivers/net/tun.c | 2 +-
drivers/nvme/host/rdma.c | 3 +-
drivers/nvme/host/tcp.c | 3 +-
drivers/scsi/53c700.c | 2 +-
drivers/scsi/raid_class.c | 1 +
drivers/scsi/scsi_proc.c | 30 ++--
drivers/scsi/snic/snic_disc.c | 1 +
drivers/scsi/storvsc_drv.c | 4 -
drivers/usb/common/usb-conn-gpio.c | 6 +-
drivers/usb/dwc3/gadget.c | 9 +-
drivers/usb/storage/alauda.c | 9 +-
fs/btrfs/extent-tree.c | 5 +-
fs/btrfs/extent_io.c | 7 +-
fs/nilfs2/inode.c | 8 +
fs/nilfs2/segment.c | 2 +
fs/nilfs2/the_nilfs.h | 2 +
include/net/cfg80211.h | 3 +
include/net/netfilter/nf_tables.h | 31 +++-
net/dccp/output.c | 2 +-
net/dccp/proto.c | 10 +-
net/ipv6/ndisc.c | 3 +-
net/netfilter/nf_tables_api.c | 166 +++++++++++++--------
net/netfilter/nft_flow_offload.c | 6 +-
net/netfilter/nft_objref.c | 8 +-
net/packet/af_packet.c | 16 +-
net/sched/sch_netem.c | 59 ++++----
tools/testing/radix-tree/regression1.c | 2 +-
tools/testing/selftests/rseq/Makefile | 4 +-
tools/testing/selftests/rseq/rseq.c | 2 +
58 files changed, 337 insertions(+), 194 deletions(-)
This is the start of the stable review cycle for the 4.19.292 release.
There are 33 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Tue, 15 Aug 2023 21:16:53 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.19.292-r…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.19.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.19.292-rc1
Eric Dumazet <edumazet(a)google.com>
sch_netem: fix issues in netem_change() vs get_dist_table()
Masahiro Yamada <masahiroy(a)kernel.org>
alpha: remove __init annotation from exported page_is_ram()
Zhu Wang <wangzhu9(a)huawei.com>
scsi: core: Fix possible memory leak if device_add() fails
Zhu Wang <wangzhu9(a)huawei.com>
scsi: snic: Fix possible memory leak if device_add() fails
Alexandra Diupina <adiupina(a)astralinux.ru>
scsi: 53c700: Check that command slot is not NULL
Michael Kelley <mikelley(a)microsoft.com>
scsi: storvsc: Fix handling of virtual Fibre Channel timeouts
Tony Battersby <tonyb(a)cybernetics.com>
scsi: core: Fix legacy /proc parsing buffer overflow
Pablo Neira Ayuso <pablo(a)netfilter.org>
netfilter: nf_tables: report use refcount overflow
Pablo Neira Ayuso <pablo(a)netfilter.org>
netfilter: nf_tables: bogus EBUSY when deleting flowtable after flush
Christoph Hellwig <hch(a)lst.de>
btrfs: don't stop integrity writeback too early
Nick Child <nnac123(a)linux.ibm.com>
ibmvnic: Handle DMA unmapping of login buffs in release functions
Felix Fietkau <nbd(a)nbd.name>
wifi: cfg80211: fix sband iftype data lookup for AP_VLAN
Douglas Miller <doug.miller(a)cornelisnetworks.com>
IB/hfi1: Fix possible panic during hotplug remove
Andrew Kanner <andrew.kanner(a)gmail.com>
drivers: net: prevent tun_build_skb() to exceed the packet size limit
Eric Dumazet <edumazet(a)google.com>
dccp: fix data-race around dp->dccps_mss_cache
Ziyang Xuan <william.xuanziyang(a)huawei.com>
bonding: Fix incorrect deletion of ETH_P_8021AD protocol vid from slaves
Eric Dumazet <edumazet(a)google.com>
net/packet: annotate data-races around tp->status
Nathan Chancellor <nathan(a)kernel.org>
mISDN: Update parameter type of dsp_cmx_send()
Karol Herbst <kherbst(a)redhat.com>
drm/nouveau/disp: Revert a NULL check inside nouveau_connector_get_modes
Arnd Bergmann <arnd(a)arndb.de>
x86: Move gds_ucode_mitigated() declaration to header
Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
x86/mm: Fix VDSO and VVAR placement on 5-level paging machines
Cristian Ciocaltea <cristian.ciocaltea(a)collabora.com>
x86/cpu/amd: Enable Zenbleed fix for AMD Custom APU 0405
Elson Roy Serrao <quic_eserrao(a)quicinc.com>
usb: dwc3: Properly handle processing of pending events
Alan Stern <stern(a)rowland.harvard.edu>
usb-storage: alauda: Fix uninit-value in alauda_check_media()
Qi Zheng <zhengqi.arch(a)bytedance.com>
binder: fix memory leak in binder_init()
Yiyuan Guo <yguoaz(a)gmail.com>
iio: cros_ec: Fix the allocation size for cros_ec_command
Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
nilfs2: fix use-after-free of nilfs_root in dirtying inodes via iput
Colin Ian King <colin.i.king(a)gmail.com>
radix tree test suite: fix incorrect allocation size for pthreads
Karol Herbst <kherbst(a)redhat.com>
drm/nouveau/gr: enable memory loads on helper invocation on all channels
Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
dmaengine: pl330: Return DMA_PAUSED when transaction is paused
Maciej Żenczykowski <maze(a)google.com>
ipv6: adjust ndisc_is_useropt() to also return true for PIO
Sergei Antonov <saproj(a)gmail.com>
mmc: moxart: read scr register without changing byte order
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
sparc: fix up arch_cpu_finalize_init() build breakage.
-------------
Diffstat:
Makefile | 4 +-
arch/alpha/kernel/setup.c | 3 +-
arch/sparc/Kconfig | 2 +-
arch/x86/entry/vdso/vma.c | 4 +-
arch/x86/include/asm/processor.h | 2 +
arch/x86/kernel/cpu/amd.c | 1 +
arch/x86/kvm/x86.c | 2 -
drivers/android/binder.c | 1 +
drivers/android/binder_alloc.c | 6 +
drivers/android/binder_alloc.h | 1 +
drivers/dma/pl330.c | 18 ++-
drivers/gpu/drm/nouveau/nouveau_connector.c | 2 +-
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h | 1 +
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c | 4 +-
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c | 10 ++
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c | 1 +
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c | 1 +
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c | 1 +
.../common/cros_ec_sensors/cros_ec_sensors_core.c | 2 +-
drivers/infiniband/hw/hfi1/chip.c | 1 +
drivers/isdn/mISDN/dsp.h | 2 +-
drivers/isdn/mISDN/dsp_cmx.c | 2 +-
drivers/isdn/mISDN/dsp_core.c | 2 +-
drivers/mmc/host/moxart-mmc.c | 8 +-
drivers/net/bonding/bond_main.c | 4 +-
drivers/net/ethernet/ibm/ibmvnic.c | 15 +-
drivers/net/tun.c | 2 +-
drivers/scsi/53c700.c | 2 +-
drivers/scsi/raid_class.c | 1 +
drivers/scsi/scsi_proc.c | 30 ++--
drivers/scsi/snic/snic_disc.c | 1 +
drivers/scsi/storvsc_drv.c | 4 -
drivers/usb/dwc3/gadget.c | 9 +-
drivers/usb/storage/alauda.c | 9 +-
fs/btrfs/extent_io.c | 7 +-
fs/nilfs2/inode.c | 8 +
fs/nilfs2/segment.c | 2 +
fs/nilfs2/the_nilfs.h | 2 +
include/net/cfg80211.h | 3 +
include/net/netfilter/nf_tables.h | 35 +++-
net/dccp/output.c | 2 +-
net/dccp/proto.c | 10 +-
net/ipv6/ndisc.c | 3 +-
net/netfilter/nf_tables_api.c | 180 ++++++++++++++-------
net/netfilter/nft_flow_offload.c | 23 ++-
net/netfilter/nft_objref.c | 8 +-
net/packet/af_packet.c | 16 +-
net/sched/sch_netem.c | 59 +++----
tools/testing/radix-tree/regression1.c | 2 +-
49 files changed, 349 insertions(+), 169 deletions(-)
Some usb hubs will negotiate DisplayPort Alt mode with the device
but will then negotiate a data role swap after entering the alt
mode. The data role swap causes the device to unregister all alt
modes, however the usb hub will still send Attention messages
even after failing to reregister the Alt Mode. type_altmode_attention
currently does not verify whether or not a device's altmode partner
exists, which results in a NULL pointer error when dereferencing
the typec_altmode and typec_altmode_ops belonging to the altmode
partner.
Verify the presence of a device's altmode partner before sending
the Attention message to the Alt Mode driver.
Fixes: 8a37d87d72f0 ("usb: typec: Bus type for alternate modes")
Cc: stable(a)vger.kernel.org
Signed-off-by: RD Babiera <rdbabiera(a)google.com>
---
Changes since v1:
* Only assigns pdev if altmode partner exists in typec_altmode_attention
* Removed error return in typec_altmode_attention if Alt Mode does
not implement Attention messages.
* Changed tcpm_log message to indicate that altmode partner does not exist,
as it only logs in that case.
---
Changes since v2:
* Changed tcpm_log message to accurately reflect error
* Revised commit message
---
Changes since v3:
* Fixed nits
---
drivers/usb/typec/bus.c | 12 ++++++++++--
drivers/usb/typec/tcpm/tcpm.c | 3 ++-
include/linux/usb/typec_altmode.h | 2 +-
3 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/drivers/usb/typec/bus.c b/drivers/usb/typec/bus.c
index fe5b9a2e61f5..e95ec7e382bb 100644
--- a/drivers/usb/typec/bus.c
+++ b/drivers/usb/typec/bus.c
@@ -183,12 +183,20 @@ EXPORT_SYMBOL_GPL(typec_altmode_exit);
*
* Notifies the partner of @adev about Attention command.
*/
-void typec_altmode_attention(struct typec_altmode *adev, u32 vdo)
+int typec_altmode_attention(struct typec_altmode *adev, u32 vdo)
{
- struct typec_altmode *pdev = &to_altmode(adev)->partner->adev;
+ struct altmode *partner = to_altmode(adev)->partner;
+ struct typec_altmode *pdev;
+
+ if (!partner)
+ return -ENODEV;
+
+ pdev = &partner->adev;
if (pdev->ops && pdev->ops->attention)
pdev->ops->attention(pdev, vdo);
+
+ return 0;
}
EXPORT_SYMBOL_GPL(typec_altmode_attention);
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 5a7d8cc04628..77fe16190766 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -1877,7 +1877,8 @@ static void tcpm_handle_vdm_request(struct tcpm_port *port,
}
break;
case ADEV_ATTENTION:
- typec_altmode_attention(adev, p[1]);
+ if (typec_altmode_attention(adev, p[1]))
+ tcpm_log(port, "typec_altmode_attention no port partner altmode");
break;
}
}
diff --git a/include/linux/usb/typec_altmode.h b/include/linux/usb/typec_altmode.h
index 350d49012659..28aeef8f9e7b 100644
--- a/include/linux/usb/typec_altmode.h
+++ b/include/linux/usb/typec_altmode.h
@@ -67,7 +67,7 @@ struct typec_altmode_ops {
int typec_altmode_enter(struct typec_altmode *altmode, u32 *vdo);
int typec_altmode_exit(struct typec_altmode *altmode);
-void typec_altmode_attention(struct typec_altmode *altmode, u32 vdo);
+int typec_altmode_attention(struct typec_altmode *altmode, u32 vdo);
int typec_altmode_vdm(struct typec_altmode *altmode,
const u32 header, const u32 *vdo, int count);
int typec_altmode_notify(struct typec_altmode *altmode, unsigned long conf,
base-commit: f176638af476c6d46257cc3303f5c7cf47d5967d
--
2.41.0.694.ge786442a9b-goog
A KVM guest using SEV-ES or SEV-SNP with multiple vCPUs can trigger
a double fetch race condition vulnerability and invoke the VMGEXIT
handler recursively.
sev_handle_vmgexit() maps the GHCB page using kvm_vcpu_map() and then
fetches the exit code using ghcb_get_sw_exit_code(). Soon after,
sev_es_validate_vmgexit() fetches the exit code again. Since the GHCB
page is shared with the guest, the guest is able to quickly swap the
values with another vCPU and hence bypass the validation. One vmexit code
that can be rejected by sev_es_validate_vmgexit() is SVM_EXIT_VMGEXIT;
if sev_handle_vmgexit() observes it in the second fetch, the call
to svm_invoke_exit_handler() will invoke sev_handle_vmgexit() again
recursively.
To avoid the race, always fetch the GHCB data from the places where
sev_es_sync_from_ghcb stores it.
Exploiting recursions on linux kernel has been proven feasible
in the past, but the impact is mitigated by stack guard pages
(CONFIG_VMAP_STACK). Still, if an attacker manages to call the handler
multiple times, they can theoretically trigger a stack overflow and
cause a denial-of-service, or potentially guest-to-host escape in kernel
configurations without stack guard pages.
Note that winning the race reliably in every iteration is very tricky
due to the very tight window of the fetches; depending on the compiler
settings, they are often consecutive because of optimization and inlining.
Tested by booting an SEV-ES RHEL9 guest.
Fixes: CVE-2023-4155
Fixes: 291bd20d5d88 ("KVM: SVM: Add initial support for a VMGEXIT VMEXIT")
Cc: stable(a)vger.kernel.org
Reported-by: Andy Nguyen <theflow(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
---
arch/x86/kvm/svm/sev.c | 25 ++++++++++++++-----------
1 file changed, 14 insertions(+), 11 deletions(-)
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index e898f0b2b0ba..ca4ba5fe9a01 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2445,9 +2445,15 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
}
+static u64 kvm_ghcb_get_sw_exit_code(struct vmcb_control_area *control)
+{
+ return (((u64)control->exit_code_hi) << 32) | control->exit_code;
+}
+
static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
{
- struct kvm_vcpu *vcpu;
+ struct vmcb_control_area *control = &svm->vmcb->control;
+ struct kvm_vcpu *vcpu = &svm->vcpu;
struct ghcb *ghcb;
u64 exit_code;
u64 reason;
@@ -2458,7 +2464,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
* Retrieve the exit code now even though it may not be marked valid
* as it could help with debugging.
*/
- exit_code = ghcb_get_sw_exit_code(ghcb);
+ exit_code = kvm_ghcb_get_sw_exit_code(control);
/* Only GHCB Usage code 0 is supported */
if (ghcb->ghcb_usage) {
@@ -2473,7 +2479,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
!kvm_ghcb_sw_exit_info_2_is_valid(svm))
goto vmgexit_err;
- switch (ghcb_get_sw_exit_code(ghcb)) {
+ switch (exit_code) {
case SVM_EXIT_READ_DR7:
break;
case SVM_EXIT_WRITE_DR7:
@@ -2490,18 +2496,18 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
if (!kvm_ghcb_rax_is_valid(svm) ||
!kvm_ghcb_rcx_is_valid(svm))
goto vmgexit_err;
- if (ghcb_get_rax(ghcb) == 0xd)
+ if (vcpu->arch.regs[VCPU_REGS_RAX] == 0xd)
if (!kvm_ghcb_xcr0_is_valid(svm))
goto vmgexit_err;
break;
case SVM_EXIT_INVD:
break;
case SVM_EXIT_IOIO:
- if (ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_STR_MASK) {
+ if (control->exit_info_1 & SVM_IOIO_STR_MASK) {
if (!kvm_ghcb_sw_scratch_is_valid(svm))
goto vmgexit_err;
} else {
- if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK))
+ if (!(control->exit_info_1 & SVM_IOIO_TYPE_MASK))
if (!kvm_ghcb_rax_is_valid(svm))
goto vmgexit_err;
}
@@ -2509,7 +2515,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
case SVM_EXIT_MSR:
if (!kvm_ghcb_rcx_is_valid(svm))
goto vmgexit_err;
- if (ghcb_get_sw_exit_info_1(ghcb)) {
+ if (control->exit_info_1) {
if (!kvm_ghcb_rax_is_valid(svm) ||
!kvm_ghcb_rdx_is_valid(svm))
goto vmgexit_err;
@@ -2553,8 +2559,6 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
return 0;
vmgexit_err:
- vcpu = &svm->vcpu;
-
if (reason == GHCB_ERR_INVALID_USAGE) {
vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n",
ghcb->ghcb_usage);
@@ -2852,8 +2856,6 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
trace_kvm_vmgexit_enter(vcpu->vcpu_id, ghcb);
- exit_code = ghcb_get_sw_exit_code(ghcb);
-
sev_es_sync_from_ghcb(svm);
ret = sev_es_validate_vmgexit(svm);
if (ret)
@@ -2862,6 +2864,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
ghcb_set_sw_exit_info_1(ghcb, 0);
ghcb_set_sw_exit_info_2(ghcb, 0);
+ exit_code = kvm_ghcb_get_sw_exit_code(control);
switch (exit_code) {
case SVM_VMGEXIT_MMIO_READ:
ret = setup_vmgexit_scratch(svm, true, control->exit_info_2);
--
2.39.0