From: Jerome Brunet jbrunet@baylibre.com
[ Upstream commit e39f9dd8206ad66992ac0e6218ef1ba746f2cce9 ]
If a bias is enabled on a pin of an Amlogic SoC, calling .pin_config_set() with PIN_CONFIG_BIAS_DISABLE will not disable the bias. Instead it will force a pull-down bias on the pin.
Instead of the pull type register bank, the driver should access the pull enable register bank.
Fixes: 6ac730951104 ("pinctrl: add driver for Amlogic Meson SoCs") Signed-off-by: Jerome Brunet jbrunet@baylibre.com Acked-by: Neil Armstrong narmstrong@baylibre.com Signed-off-by: Linus Walleij linus.walleij@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/pinctrl/meson/pinctrl-meson.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c index 66ed70c12733..6c43322dbb97 100644 --- a/drivers/pinctrl/meson/pinctrl-meson.c +++ b/drivers/pinctrl/meson/pinctrl-meson.c @@ -273,7 +273,7 @@ static int meson_pinconf_set(struct pinctrl_dev *pcdev, unsigned int pin, dev_dbg(pc->dev, "pin %u: disable bias\n", pin);
meson_calc_reg_and_bit(bank, pin, REG_PULL, ®, &bit); - ret = regmap_update_bits(pc->reg_pull, reg, + ret = regmap_update_bits(pc->reg_pullen, reg, BIT(bit), 0); if (ret) return ret;
From: Finn Thain fthain@telegraphics.com.au
[ Upstream commit 96edebd6bb995f2acb7694bed6e01bf6f5a7b634 ]
I overlooked this statement when I recently converted the function result type from struct scsi_cmnd * to bool. No change to object code.
Signed-off-by: Finn Thain fthain@telegraphics.com.au Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/NCR5380.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c index 777b0222d021..f2783441b830 100644 --- a/drivers/scsi/NCR5380.c +++ b/drivers/scsi/NCR5380.c @@ -1190,7 +1190,7 @@ static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *instance,
out: if (!hostdata->selecting) - return NULL; + return false; hostdata->selecting = NULL; return cmd; }
From: Scott Wood oss@buserror.net
[ Upstream commit 28c5bcf74fa07c25d5bd118d1271920f51ce2a98 ]
TRACE_INCLUDE_PATH and TRACE_INCLUDE_FILE are used by <trace/define_trace.h>, so like that #include, they should be outside #ifdef protection.
They also need to be #undefed before defining, in case multiple trace headers are included by the same C file. This became the case on book3e after commit cf4a6085151a ("powerpc/mm: Add missing tracepoint for tlbie"), leading to the following build error:
CC arch/powerpc/kvm/powerpc.o In file included from arch/powerpc/kvm/powerpc.c:51:0: arch/powerpc/kvm/trace.h:9:0: error: "TRACE_INCLUDE_PATH" redefined [-Werror] #define TRACE_INCLUDE_PATH . ^ In file included from arch/powerpc/kvm/../mm/mmu_decl.h:25:0, from arch/powerpc/kvm/powerpc.c:48: ./arch/powerpc/include/asm/trace.h:224:0: note: this is the location of the previous definition #define TRACE_INCLUDE_PATH asm ^ cc1: all warnings being treated as errors
Reported-by: Christian Zigotzky chzigotzky@xenosoft.de Signed-off-by: Scott Wood oss@buserror.net Signed-off-by: Michael Ellerman mpe@ellerman.id.au Signed-off-by: Sasha Levin sashal@kernel.org --- arch/powerpc/kvm/trace.h | 8 ++++++-- arch/powerpc/kvm/trace_booke.h | 9 +++++++-- arch/powerpc/kvm/trace_hv.h | 9 +++++++-- arch/powerpc/kvm/trace_pr.h | 9 +++++++-- 4 files changed, 27 insertions(+), 8 deletions(-)
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h index 491b0f715d6b..ea1d7c808319 100644 --- a/arch/powerpc/kvm/trace.h +++ b/arch/powerpc/kvm/trace.h @@ -6,8 +6,6 @@
#undef TRACE_SYSTEM #define TRACE_SYSTEM kvm -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace
/* * Tracepoint for guest mode entry. @@ -120,4 +118,10 @@ TRACE_EVENT(kvm_check_requests, #endif /* _TRACE_KVM_H */
/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace + #include <trace/define_trace.h> diff --git a/arch/powerpc/kvm/trace_booke.h b/arch/powerpc/kvm/trace_booke.h index ac640e81fdc5..3837842986aa 100644 --- a/arch/powerpc/kvm/trace_booke.h +++ b/arch/powerpc/kvm/trace_booke.h @@ -6,8 +6,6 @@
#undef TRACE_SYSTEM #define TRACE_SYSTEM kvm_booke -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace_booke
#define kvm_trace_symbol_exit \ {0, "CRITICAL"}, \ @@ -218,4 +216,11 @@ TRACE_EVENT(kvm_booke_queue_irqprio, #endif
/* This part must be outside protection */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_booke + #include <trace/define_trace.h> diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h index bcfe8a987f6a..8a1e3b0047f1 100644 --- a/arch/powerpc/kvm/trace_hv.h +++ b/arch/powerpc/kvm/trace_hv.h @@ -9,8 +9,6 @@
#undef TRACE_SYSTEM #define TRACE_SYSTEM kvm_hv -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace_hv
#define kvm_trace_symbol_hcall \ {H_REMOVE, "H_REMOVE"}, \ @@ -497,4 +495,11 @@ TRACE_EVENT(kvmppc_run_vcpu_exit, #endif /* _TRACE_KVM_HV_H */
/* This part must be outside protection */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_hv + #include <trace/define_trace.h> diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h index 85785a370c0e..256530eb1354 100644 --- a/arch/powerpc/kvm/trace_pr.h +++ b/arch/powerpc/kvm/trace_pr.h @@ -8,8 +8,6 @@
#undef TRACE_SYSTEM #define TRACE_SYSTEM kvm_pr -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace_pr
TRACE_EVENT(kvm_book3s_reenter, TP_PROTO(int r, struct kvm_vcpu *vcpu), @@ -272,4 +270,11 @@ TRACE_EVENT(kvm_unmap_hva, #endif /* _TRACE_KVM_H */
/* This part must be outside protection */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_pr + #include <trace/define_trace.h>
From: Anson Huang anson.huang@nxp.com
[ Upstream commit 6ef28a04d1ccf718eee069b72132ce4aa1e52ab9 ]
Add return value check for voltage scale when ARM clock rate change fail.
Signed-off-by: Anson Huang Anson.Huang@nxp.com Acked-by: Viresh Kumar viresh.kumar@linaro.org Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/cpufreq/imx6q-cpufreq.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index 14466a9b01c0..63d28323a29c 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -135,8 +135,13 @@ static int imx6q_set_target(struct cpufreq_policy *policy, unsigned int index) /* Ensure the arm clock divider is what we expect */ ret = clk_set_rate(arm_clk, new_freq * 1000); if (ret) { + int ret1; + dev_err(cpu_dev, "failed to set clock rate: %d\n", ret); - regulator_set_voltage_tol(arm_reg, volt_old, 0); + ret1 = regulator_set_voltage_tol(arm_reg, volt_old, 0); + if (ret1) + dev_warn(cpu_dev, + "failed to restore vddarm voltage: %d\n", ret1); return ret; }
From: Xulin Sun xulin.sun@windriver.com
[ Upstream commit 9bde0afb7a906f1dabdba37162551565740b862d ]
pcf2127_i2c_gather_write() allocates memory as local variable for i2c_master_send(), after finishing the master transfer, the allocated memory should be freed. The kmemleak is reported:
unreferenced object 0xffff80231e7dba80 (size 64): comm "hwclock", pid 27762, jiffies 4296880075 (age 356.944s) hex dump (first 32 bytes): 03 00 12 03 19 02 11 13 00 80 98 18 00 00 ff ff ................ 00 50 00 00 00 00 00 00 02 00 00 00 00 00 00 00 .P.............. backtrace: [<ffff000008221398>] create_object+0xf8/0x278 [<ffff000008a96264>] kmemleak_alloc+0x74/0xa0 [<ffff00000821070c>] __kmalloc+0x1ac/0x348 [<ffff0000087ed1dc>] pcf2127_i2c_gather_write+0x54/0xf8 [<ffff0000085fd9d4>] _regmap_raw_write+0x464/0x850 [<ffff0000085fe3f4>] regmap_bulk_write+0x1a4/0x348 [<ffff0000087ed32c>] pcf2127_rtc_set_time+0xac/0xe8 [<ffff0000087eaad8>] rtc_set_time+0x80/0x138 [<ffff0000087ebfb0>] rtc_dev_ioctl+0x398/0x610 [<ffff00000823f2c0>] do_vfs_ioctl+0xb0/0x848 [<ffff00000823fae4>] SyS_ioctl+0x8c/0xa8 [<ffff000008083ac0>] el0_svc_naked+0x34/0x38 [<ffffffffffffffff>] 0xffffffffffffffff
Signed-off-by: Xulin Sun xulin.sun@windriver.com Signed-off-by: Alexandre Belloni alexandre.belloni@bootlin.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/rtc/rtc-pcf2127.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index f33447c5db85..9f1b14bf91ae 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -248,6 +248,9 @@ static int pcf2127_i2c_gather_write(void *context, memcpy(buf + 1, val, val_size);
ret = i2c_master_send(client, buf, val_size + 1); + + kfree(buf); + if (ret != val_size + 1) return ret < 0 ? ret : -EIO;
From: Ard Biesheuvel ard.biesheuvel@linaro.org
[ Upstream commit 508a1c4df085a547187eed346f1bfe5e381797f1 ]
The simd wrapper's skcipher request context structure consists of a single subrequest whose size is taken from the subordinate skcipher. However, in simd_skcipher_init(), the reqsize that is retrieved is not from the subordinate skcipher but from the cryptd request structure, whose size is completely unrelated to the actual wrapped skcipher.
Reported-by: Qian Cai cai@gmx.us Signed-off-by: Ard Biesheuvel ard.biesheuvel@linaro.org Tested-by: Qian Cai cai@gmx.us Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Sasha Levin sashal@kernel.org --- crypto/simd.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/crypto/simd.c b/crypto/simd.c index 88203370a62f..894c62944106 100644 --- a/crypto/simd.c +++ b/crypto/simd.c @@ -126,8 +126,9 @@ static int simd_skcipher_init(struct crypto_skcipher *tfm)
ctx->cryptd_tfm = cryptd_tfm;
- reqsize = sizeof(struct skcipher_request); - reqsize += crypto_skcipher_reqsize(&cryptd_tfm->base); + reqsize = crypto_skcipher_reqsize(cryptd_skcipher_child(cryptd_tfm)); + reqsize = max(reqsize, crypto_skcipher_reqsize(&cryptd_tfm->base)); + reqsize += sizeof(struct skcipher_request);
crypto_skcipher_set_reqsize(tfm, reqsize);
From: Jens Axboe axboe@kernel.dk
[ Upstream commit de7b75d82f70c5469675b99ad632983c50b6f7e7 ]
LKP recently reported a hang at bootup in the floppy code:
[ 245.678853] INFO: task mount:580 blocked for more than 120 seconds. [ 245.679906] Tainted: G T 4.19.0-rc6-00172-ga9f38e1 #1 [ 245.680959] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 245.682181] mount D 6372 580 1 0x00000004 [ 245.683023] Call Trace: [ 245.683425] __schedule+0x2df/0x570 [ 245.683975] schedule+0x2d/0x80 [ 245.684476] schedule_timeout+0x19d/0x330 [ 245.685090] ? wait_for_common+0xa5/0x170 [ 245.685735] wait_for_common+0xac/0x170 [ 245.686339] ? do_sched_yield+0x90/0x90 [ 245.686935] wait_for_completion+0x12/0x20 [ 245.687571] __floppy_read_block_0+0xfb/0x150 [ 245.688244] ? floppy_resume+0x40/0x40 [ 245.688844] floppy_revalidate+0x20f/0x240 [ 245.689486] check_disk_change+0x43/0x60 [ 245.690087] floppy_open+0x1ea/0x360 [ 245.690653] __blkdev_get+0xb4/0x4d0 [ 245.691212] ? blkdev_get+0x1db/0x370 [ 245.691777] blkdev_get+0x1f3/0x370 [ 245.692351] ? path_put+0x15/0x20 [ 245.692871] ? lookup_bdev+0x4b/0x90 [ 245.693539] blkdev_get_by_path+0x3d/0x80 [ 245.694165] mount_bdev+0x2a/0x190 [ 245.694695] squashfs_mount+0x10/0x20 [ 245.695271] ? squashfs_alloc_inode+0x30/0x30 [ 245.695960] mount_fs+0xf/0x90 [ 245.696451] vfs_kern_mount+0x43/0x130 [ 245.697036] do_mount+0x187/0xc40 [ 245.697563] ? memdup_user+0x28/0x50 [ 245.698124] ksys_mount+0x60/0xc0 [ 245.698639] sys_mount+0x19/0x20 [ 245.699167] do_int80_syscall_32+0x61/0x130 [ 245.699813] entry_INT80_32+0xc7/0xc7
showing that we never complete that read request. The reason is that the completion setup is racy - it initializes the completion event AFTER submitting the IO, which means that the IO could complete before/during the init. If it does, we are passing garbage to complete() and we may sleep forever waiting for the event to occur.
Fixes: 7b7b68bba5ef ("floppy: bail out in open() if drive is not responding to block0 read") Reviewed-by: Omar Sandoval osandov@fb.com Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/block/floppy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 3d0287e212fe..a7f212ea17bf 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4146,10 +4146,11 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive) bio.bi_end_io = floppy_rb0_cb; bio_set_op_attrs(&bio, REQ_OP_READ, 0);
+ init_completion(&cbdata.complete); + submit_bio(&bio); process_fd_request();
- init_completion(&cbdata.complete); wait_for_completion(&cbdata.complete);
__free_page(page);
From: Michael Ellerman mpe@ellerman.id.au
[ Upstream commit 43c6494fa1499912c8177e71450c0279041152a6 ]
Back in 2006 Ben added some workarounds for a misbehaviour in the Spider IO bridge used on early Cell machines, see commit 014da7ff47b5 ("[POWERPC] Cell "Spider" MMIO workarounds"). Later these were made to be generic, ie. not tied specifically to Spider.
The code stashes a token in the high bits (59-48) of virtual addresses used for IO (eg. returned from ioremap()). This works fine when using the Hash MMU, but when we're using the Radix MMU the bits used for the token overlap with some of the bits of the virtual address.
This is because the maximum virtual address is larger with Radix, up to c00fffffffffffff, and in fact we use that high part of the address range for ioremap(), see RADIX_KERN_IO_START.
As it happens the bits that are used overlap with the bits that differentiate an IO address vs a linear map address. If the resulting address lies outside the linear mapping we will crash (see below), if not we just corrupt memory.
virtio-pci 0000:00:00.0: Using 64-bit direct DMA at offset 800000000000000 Unable to handle kernel paging request for data at address 0xc000000080000014 ... CFAR: c000000000626b98 DAR: c000000080000014 DSISR: 42000000 IRQMASK: 0 GPR00: c0000000006c54fc c00000003e523378 c0000000016de600 0000000000000000 GPR04: c00c000080000014 0000000000000007 0fffffff000affff 0000000000000030 ^^^^ ... NIP [c000000000626c5c] .iowrite8+0xec/0x100 LR [c0000000006c992c] .vp_reset+0x2c/0x90 Call Trace: .pci_bus_read_config_dword+0xc4/0x120 (unreliable) .register_virtio_device+0x13c/0x1c0 .virtio_pci_probe+0x148/0x1f0 .local_pci_probe+0x68/0x140 .pci_device_probe+0x164/0x220 .really_probe+0x274/0x3b0 .driver_probe_device+0x80/0x170 .__driver_attach+0x14c/0x150 .bus_for_each_dev+0xb8/0x130 .driver_attach+0x34/0x50 .bus_add_driver+0x178/0x2f0 .driver_register+0x90/0x1a0 .__pci_register_driver+0x6c/0x90 .virtio_pci_driver_init+0x2c/0x40 .do_one_initcall+0x64/0x280 .kernel_init_freeable+0x36c/0x474 .kernel_init+0x24/0x160 .ret_from_kernel_thread+0x58/0x7c
This hasn't been a problem because CONFIG_PPC_IO_WORKAROUNDS which enables this code is usually not enabled. It is only enabled when it's selected by PPC_CELL_NATIVE which is only selected by PPC_IBM_CELL_BLADE and that in turn depends on BIG_ENDIAN. So in order to hit the bug you need to build a big endian kernel, with IBM Cell Blade support enabled, as well as Radix MMU support, and then boot that on Power9 using Radix MMU.
Still we can fix the bug, so let's do that. We simply use fewer bits for the token, taking the union of the restrictions on the address from both Hash and Radix, we end up with 8 bits we can use for the token. The only user of the token is iowa_mem_find_bus() which only supports 8 token values, so 8 bits is plenty for that.
Fixes: 566ca99af026 ("powerpc/mm/radix: Add dummy radix_enabled()") Signed-off-by: Michael Ellerman mpe@ellerman.id.au Signed-off-by: Sasha Levin sashal@kernel.org --- arch/powerpc/include/asm/io.h | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-)
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 422f99cf9924..e6d33eed8202 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -287,19 +287,13 @@ extern void _memcpy_toio(volatile void __iomem *dest, const void *src, * their hooks, a bitfield is reserved for use by the platform near the * top of MMIO addresses (not PIO, those have to cope the hard way). * - * This bit field is 12 bits and is at the top of the IO virtual - * addresses PCI_IO_INDIRECT_TOKEN_MASK. + * The highest address in the kernel virtual space are: * - * The kernel virtual space is thus: + * d0003fffffffffff # with Hash MMU + * c00fffffffffffff # with Radix MMU * - * 0xD000000000000000 : vmalloc - * 0xD000080000000000 : PCI PHB IO space - * 0xD000080080000000 : ioremap - * 0xD0000fffffffffff : end of ioremap region - * - * Since the top 4 bits are reserved as the region ID, we use thus - * the next 12 bits and keep 4 bits available for the future if the - * virtual address space is ever to be extended. + * The top 4 bits are reserved as the region ID on hash, leaving us 8 bits + * that can be used for the field. * * The direct IO mapping operations will then mask off those bits * before doing the actual access, though that only happen when @@ -311,8 +305,8 @@ extern void _memcpy_toio(volatile void __iomem *dest, const void *src, */
#ifdef CONFIG_PPC_INDIRECT_MMIO -#define PCI_IO_IND_TOKEN_MASK 0x0fff000000000000ul -#define PCI_IO_IND_TOKEN_SHIFT 48 +#define PCI_IO_IND_TOKEN_SHIFT 52 +#define PCI_IO_IND_TOKEN_MASK (0xfful << PCI_IO_IND_TOKEN_SHIFT) #define PCI_FIX_ADDR(addr) \ ((PCI_IO_ADDR)(((unsigned long)(addr)) & ~PCI_IO_IND_TOKEN_MASK)) #define PCI_GET_ADDR_TOKEN(addr) \
From: Kan Liang kan.liang@linux.intel.com
[ Upstream commit c10a8de0d32e95b0b8c7c17b6dc09baea5a5a899 ]
KabyLake and CoffeeLake CPUs have the same client uncore events as SkyLake.
Add the PCI IDs for the KabyLake Y, U, S processor lines and CoffeeLake U, H, S processor lines.
Signed-off-by: Kan Liang kan.liang@linux.intel.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Cc: Alexander Shishkin alexander.shishkin@linux.intel.com Cc: Arnaldo Carvalho de Melo acme@redhat.com Cc: Jiri Olsa jolsa@redhat.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Peter Zijlstra peterz@infradead.org Cc: Stephane Eranian eranian@google.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Vince Weaver vincent.weaver@maine.edu Link: http://lkml.kernel.org/r/20181019170419.378-1-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/events/intel/uncore_snb.c | 115 ++++++++++++++++++++++++++++- 1 file changed, 114 insertions(+), 1 deletion(-)
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index aee5e8496be4..aa4e6f4e6a01 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -15,6 +15,25 @@ #define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910 #define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f #define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f +#define PCI_DEVICE_ID_INTEL_KBL_Y_IMC 0x590c +#define PCI_DEVICE_ID_INTEL_KBL_U_IMC 0x5904 +#define PCI_DEVICE_ID_INTEL_KBL_UQ_IMC 0x5914 +#define PCI_DEVICE_ID_INTEL_KBL_SD_IMC 0x590f +#define PCI_DEVICE_ID_INTEL_KBL_SQ_IMC 0x591f +#define PCI_DEVICE_ID_INTEL_CFL_2U_IMC 0x3ecc +#define PCI_DEVICE_ID_INTEL_CFL_4U_IMC 0x3ed0 +#define PCI_DEVICE_ID_INTEL_CFL_4H_IMC 0x3e10 +#define PCI_DEVICE_ID_INTEL_CFL_6H_IMC 0x3ec4 +#define PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC 0x3e0f +#define PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC 0x3e1f +#define PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC 0x3ec2 +#define PCI_DEVICE_ID_INTEL_CFL_8S_D_IMC 0x3e30 +#define PCI_DEVICE_ID_INTEL_CFL_4S_W_IMC 0x3e18 +#define PCI_DEVICE_ID_INTEL_CFL_6S_W_IMC 0x3ec6 +#define PCI_DEVICE_ID_INTEL_CFL_8S_W_IMC 0x3e31 +#define PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC 0x3e33 +#define PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC 0x3eca +#define PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC 0x3e32
/* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff @@ -632,7 +651,82 @@ static const struct pci_device_id skl_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SQ_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, - + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_Y_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_UQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_SD_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_SQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4H_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6H_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_D_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_W_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_W_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_W_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ }, };
@@ -681,6 +775,25 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(SKL_HQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core H Quad Core */ IMC_DEV(SKL_SD_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Dual Core */ IMC_DEV(SKL_SQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Quad Core */ + IMC_DEV(KBL_Y_IMC, &skl_uncore_pci_driver), /* 7th Gen Core Y */ + IMC_DEV(KBL_U_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U */ + IMC_DEV(KBL_UQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U Quad Core */ + IMC_DEV(KBL_SD_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Dual Core */ + IMC_DEV(KBL_SQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Quad Core */ + IMC_DEV(CFL_2U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 2 Cores */ + IMC_DEV(CFL_4U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 4 Cores */ + IMC_DEV(CFL_4H_IMC, &skl_uncore_pci_driver), /* 8th Gen Core H 4 Cores */ + IMC_DEV(CFL_6H_IMC, &skl_uncore_pci_driver), /* 8th Gen Core H 6 Cores */ + IMC_DEV(CFL_2S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 2 Cores Desktop */ + IMC_DEV(CFL_4S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Desktop */ + IMC_DEV(CFL_6S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Desktop */ + IMC_DEV(CFL_8S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Desktop */ + IMC_DEV(CFL_4S_W_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Work Station */ + IMC_DEV(CFL_6S_W_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Work Station */ + IMC_DEV(CFL_8S_W_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Work Station */ + IMC_DEV(CFL_4S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Server */ + IMC_DEV(CFL_6S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Server */ + IMC_DEV(CFL_8S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Server */ { /* end marker */ } };
From: Russell King rmk+kernel@armlinux.org.uk
[ Upstream commit 899a42f836678a595f7d2bc36a5a0c2b03d08cbc ]
Move lookup_processor_type() out of the __init section so it is callable from (eg) the secondary startup code during hotplug.
Reviewed-by: Julien Thierry julien.thierry@arm.com Signed-off-by: Russell King rmk+kernel@armlinux.org.uk Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/kernel/head-common.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index 8733012d231f..7e662bdd5cb3 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -122,6 +122,9 @@ __mmap_switched_data: .long init_thread_union + THREAD_START_SP @ sp .size __mmap_switched_data, . - __mmap_switched_data
+ __FINIT + .text + /* * This provides a C-API version of __lookup_processor_type */ @@ -133,9 +136,6 @@ ENTRY(lookup_processor_type) ldmfd sp!, {r4 - r6, r9, pc} ENDPROC(lookup_processor_type)
- __FINIT - .text - /* * Read processor ID register (CP#15, CR0), and look up in the linker-built * supported processor list. Note that we can't use the absolute addresses
Hi Sasha,
We need to keep track of which Spectre patches have been backported and which haven't. David Long has been doing the backport work, which doesn't include all the patches in my present spectre branch. You've picked up the last five, meaning there's a bunch in the middle of the entire series which haven't yet been considered - for example, the Spectre variant 1.1 patches.
The entire series of 32-bit ARM Spectre patches can be listed in mainline via:
$ git log v4.16..383fb3ee8024
where almost all of those need to be applied to stable kernels, with possible manual back-porting where they don't apply.
On Thu, Nov 22, 2018 at 02:54:41PM -0500, Sasha Levin wrote:
From: Russell King rmk+kernel@armlinux.org.uk
[ Upstream commit 899a42f836678a595f7d2bc36a5a0c2b03d08cbc ]
Move lookup_processor_type() out of the __init section so it is callable from (eg) the secondary startup code during hotplug.
Reviewed-by: Julien Thierry julien.thierry@arm.com Signed-off-by: Russell King rmk+kernel@armlinux.org.uk Signed-off-by: Sasha Levin sashal@kernel.org
arch/arm/kernel/head-common.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index 8733012d231f..7e662bdd5cb3 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -122,6 +122,9 @@ __mmap_switched_data: .long init_thread_union + THREAD_START_SP @ sp .size __mmap_switched_data, . - __mmap_switched_data
- __FINIT
- .text
/*
- This provides a C-API version of __lookup_processor_type
*/ @@ -133,9 +136,6 @@ ENTRY(lookup_processor_type) ldmfd sp!, {r4 - r6, r9, pc} ENDPROC(lookup_processor_type)
- __FINIT
- .text
/*
- Read processor ID register (CP#15, CR0), and look up in the linker-built
- supported processor list. Note that we can't use the absolute addresses
-- 2.17.1
On Fri, Nov 23, 2018 at 12:02:54AM +0000, Russell King - ARM Linux wrote:
Hi Sasha,
We need to keep track of which Spectre patches have been backported and which haven't. David Long has been doing the backport work, which doesn't include all the patches in my present spectre branch. You've picked up the last five, meaning there's a bunch in the middle of the entire series which haven't yet been considered - for example, the Spectre variant 1.1 patches.
I'll drop all the ones you pointed out from my branches. Sometimes it's difficult to tell if a certain patch is part of the spectre fix.
The entire series of 32-bit ARM Spectre patches can be listed in mainline via:
$ git log v4.16..383fb3ee8024
where almost all of those need to be applied to stable kernels, with possible manual back-porting where they don't apply.
Is it something that will be backported at some point?
-- Thanks, Sasha
On Wed, Nov 28, 2018 at 09:12:52AM -0500, Sasha Levin wrote:
On Fri, Nov 23, 2018 at 12:02:54AM +0000, Russell King - ARM Linux wrote:
Hi Sasha,
We need to keep track of which Spectre patches have been backported and which haven't. David Long has been doing the backport work, which doesn't include all the patches in my present spectre branch. You've picked up the last five, meaning there's a bunch in the middle of the entire series which haven't yet been considered - for example, the Spectre variant 1.1 patches.
I'll drop all the ones you pointed out from my branches. Sometimes it's difficult to tell if a certain patch is part of the spectre fix.
Given that all the 32-bit ARM Spectre patches are sequential in a branch, it shouldn't be too difficult as it's possible to list all the commit IDs using:
$ git rev-list v4.16..383fb3ee8024
where 383fb3ee8024 is the current head. I'm not anticipating at this time anything further.
The entire series of 32-bit ARM Spectre patches can be listed in mainline via:
$ git log v4.16..383fb3ee8024
where almost all of those need to be applied to stable kernels, with possible manual back-porting where they don't apply.
Is it something that will be backported at some point?
Yes, as I mentioned, this is work that David Long is undertaking, with a separate review process before submitting the backported patches for inclusion in the appropriate stable tree.
From: Russell King rmk+kernel@armlinux.org.uk
[ Upstream commit 65987a8553061515b5851b472081aedb9837a391 ]
Split out the lookup of the processor type and associated error handling from the rest of setup_processor() - we will need to use this in the secondary CPU bringup path for big.Little Spectre variant 2 mitigation.
Reviewed-by: Julien Thierry julien.thierry@arm.com Signed-off-by: Russell King rmk+kernel@armlinux.org.uk Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/include/asm/cputype.h | 1 + arch/arm/kernel/setup.c | 31 +++++++++++++++++++------------ 2 files changed, 20 insertions(+), 12 deletions(-)
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index 3379c2c684c2..25d523185c6a 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -107,6 +107,7 @@ #define ARM_CPU_PART_SCORPION 0x510002d0
extern unsigned int processor_id; +struct proc_info_list *lookup_processor(u32 midr);
#ifdef CONFIG_CPU_CP15 #define read_cpuid(reg) \ diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 8e9a3e40d949..1a041ad4881e 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -667,22 +667,29 @@ static void __init smp_build_mpidr_hash(void) } #endif
-static void __init setup_processor(void) +/* + * locate processor in the list of supported processor types. The linker + * builds this table for us from the entries in arch/arm/mm/proc-*.S + */ +struct proc_info_list *lookup_processor(u32 midr) { - struct proc_info_list *list; + struct proc_info_list *list = lookup_processor_type(midr);
- /* - * locate processor in the list of supported processor - * types. The linker builds this table for us from the - * entries in arch/arm/mm/proc-*.S - */ - list = lookup_processor_type(read_cpuid_id()); if (!list) { - pr_err("CPU configuration botched (ID %08x), unable to continue.\n", - read_cpuid_id()); - while (1); + pr_err("CPU%u: configuration botched (ID %08x), CPU halted\n", + smp_processor_id(), midr); + while (1) + /* can't use cpu_relax() here as it may require MMU setup */; }
+ return list; +} + +static void __init setup_processor(void) +{ + unsigned int midr = read_cpuid_id(); + struct proc_info_list *list = lookup_processor(midr); + cpu_name = list->cpu_name; __cpu_architecture = __get_cpu_architecture();
@@ -700,7 +707,7 @@ static void __init setup_processor(void) #endif
pr_info("CPU: %s [%08x] revision %d (ARMv%s), cr=%08lx\n", - cpu_name, read_cpuid_id(), read_cpuid_id() & 15, + list->cpu_name, midr, midr & 15, proc_arch[cpu_architecture()], get_cr());
snprintf(init_utsname()->machine, __NEW_UTS_LEN + 1, "%s%c",
From: Russell King rmk+kernel@armlinux.org.uk
[ Upstream commit 945aceb1db8885d3a35790cf2e810f681db52756 ]
Call the per-processor type check_bugs() method in the same way as we do other per-processor functions - move the "processor." detail into proc-fns.h.
Reviewed-by: Julien Thierry julien.thierry@arm.com Signed-off-by: Russell King rmk+kernel@armlinux.org.uk Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/include/asm/proc-fns.h | 1 + arch/arm/kernel/bugs.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h index e25f4392e1b2..30c499146320 100644 --- a/arch/arm/include/asm/proc-fns.h +++ b/arch/arm/include/asm/proc-fns.h @@ -99,6 +99,7 @@ extern void cpu_do_suspend(void *); extern void cpu_do_resume(void *); #else #define cpu_proc_init processor._proc_init +#define cpu_check_bugs processor.check_bugs #define cpu_proc_fin processor._proc_fin #define cpu_reset processor.reset #define cpu_do_idle processor._do_idle diff --git a/arch/arm/kernel/bugs.c b/arch/arm/kernel/bugs.c index 7be511310191..d41d3598e5e5 100644 --- a/arch/arm/kernel/bugs.c +++ b/arch/arm/kernel/bugs.c @@ -6,8 +6,8 @@ void check_other_bugs(void) { #ifdef MULTI_CPU - if (processor.check_bugs) - processor.check_bugs(); + if (cpu_check_bugs) + cpu_check_bugs(); #endif }
From: Russell King rmk+kernel@armlinux.org.uk
[ Upstream commit e209950fdd065d2cc46e6338e47e52841b830cba ]
Allow the way we access members of the processor vtable to be changed at compile time. We will need to move to per-CPU vtables to fix the Spectre variant 2 issues on big.Little systems.
However, we have a couple of calls that do not need the vtable treatment, and indeed cause a kernel warning due to the (later) use of smp_processor_id(), so also introduce the PROC_TABLE macro for these which always use CPU 0's function pointers.
Reviewed-by: Julien Thierry julien.thierry@arm.com Signed-off-by: Russell King rmk+kernel@armlinux.org.uk Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/include/asm/proc-fns.h | 39 ++++++++++++++++++++++----------- arch/arm/kernel/setup.c | 4 +--- 2 files changed, 27 insertions(+), 16 deletions(-)
diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h index 30c499146320..c259cc49c641 100644 --- a/arch/arm/include/asm/proc-fns.h +++ b/arch/arm/include/asm/proc-fns.h @@ -23,7 +23,7 @@ struct mm_struct; /* * Don't change this structure - ASM code relies on it. */ -extern struct processor { +struct processor { /* MISC * get data abort address/flags */ @@ -79,9 +79,13 @@ extern struct processor { unsigned int suspend_size; void (*do_suspend)(void *); void (*do_resume)(void *); -} processor; +};
#ifndef MULTI_CPU +static inline void init_proc_vtable(const struct processor *p) +{ +} + extern void cpu_proc_init(void); extern void cpu_proc_fin(void); extern int cpu_do_idle(void); @@ -98,18 +102,27 @@ extern void cpu_reset(unsigned long addr, bool hvc) __attribute__((noreturn)); extern void cpu_do_suspend(void *); extern void cpu_do_resume(void *); #else -#define cpu_proc_init processor._proc_init -#define cpu_check_bugs processor.check_bugs -#define cpu_proc_fin processor._proc_fin -#define cpu_reset processor.reset -#define cpu_do_idle processor._do_idle -#define cpu_dcache_clean_area processor.dcache_clean_area -#define cpu_set_pte_ext processor.set_pte_ext -#define cpu_do_switch_mm processor.switch_mm
-/* These three are private to arch/arm/kernel/suspend.c */ -#define cpu_do_suspend processor.do_suspend -#define cpu_do_resume processor.do_resume +extern struct processor processor; +#define PROC_VTABLE(f) processor.f +#define PROC_TABLE(f) processor.f +static inline void init_proc_vtable(const struct processor *p) +{ + processor = *p; +} + +#define cpu_proc_init PROC_VTABLE(_proc_init) +#define cpu_check_bugs PROC_VTABLE(check_bugs) +#define cpu_proc_fin PROC_VTABLE(_proc_fin) +#define cpu_reset PROC_VTABLE(reset) +#define cpu_do_idle PROC_VTABLE(_do_idle) +#define cpu_dcache_clean_area PROC_TABLE(dcache_clean_area) +#define cpu_set_pte_ext PROC_TABLE(set_pte_ext) +#define cpu_do_switch_mm PROC_VTABLE(switch_mm) + +/* These two are private to arch/arm/kernel/suspend.c */ +#define cpu_do_suspend PROC_VTABLE(do_suspend) +#define cpu_do_resume PROC_VTABLE(do_resume) #endif
extern void cpu_resume(void); diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 1a041ad4881e..c1588e31aa83 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -693,9 +693,7 @@ static void __init setup_processor(void) cpu_name = list->cpu_name; __cpu_architecture = __get_cpu_architecture();
-#ifdef MULTI_CPU - processor = *list->proc; -#endif + init_proc_vtable(list->proc); #ifdef MULTI_TLB cpu_tlb = *list->tlb; #endif
From: Russell King rmk+kernel@armlinux.org.uk
[ Upstream commit 383fb3ee8024d596f488d2dbaf45e572897acbdb ]
In big.Little systems, some CPUs require the Spectre workarounds in paths such as the context switch, but other CPUs do not. In order to handle these differences, we need per-CPU vtables.
We are unable to use the kernel's per-CPU variables to support this as per-CPU is not initialised at times when we need access to the vtables, so we have to use an array indexed by logical CPU number.
We use an array-of-pointers to avoid having function pointers in the kernel's read/write .data section.
Reviewed-by: Julien Thierry julien.thierry@arm.com Signed-off-by: Russell King rmk+kernel@armlinux.org.uk Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/include/asm/proc-fns.h | 23 +++++++++++++++++++++++ arch/arm/kernel/setup.c | 5 +++++ arch/arm/kernel/smp.c | 31 +++++++++++++++++++++++++++++++ arch/arm/mm/proc-v7-bugs.c | 17 ++--------------- 4 files changed, 61 insertions(+), 15 deletions(-)
diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h index c259cc49c641..e1b6f280ab08 100644 --- a/arch/arm/include/asm/proc-fns.h +++ b/arch/arm/include/asm/proc-fns.h @@ -104,12 +104,35 @@ extern void cpu_do_resume(void *); #else
extern struct processor processor; +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) +#include <linux/smp.h> +/* + * This can't be a per-cpu variable because we need to access it before + * per-cpu has been initialised. We have a couple of functions that are + * called in a pre-emptible context, and so can't use smp_processor_id() + * there, hence PROC_TABLE(). We insist in init_proc_vtable() that the + * function pointers for these are identical across all CPUs. + */ +extern struct processor *cpu_vtable[]; +#define PROC_VTABLE(f) cpu_vtable[smp_processor_id()]->f +#define PROC_TABLE(f) cpu_vtable[0]->f +static inline void init_proc_vtable(const struct processor *p) +{ + unsigned int cpu = smp_processor_id(); + *cpu_vtable[cpu] = *p; + WARN_ON_ONCE(cpu_vtable[cpu]->dcache_clean_area != + cpu_vtable[0]->dcache_clean_area); + WARN_ON_ONCE(cpu_vtable[cpu]->set_pte_ext != + cpu_vtable[0]->set_pte_ext); +} +#else #define PROC_VTABLE(f) processor.f #define PROC_TABLE(f) processor.f static inline void init_proc_vtable(const struct processor *p) { processor = *p; } +#endif
#define cpu_proc_init PROC_VTABLE(_proc_init) #define cpu_check_bugs PROC_VTABLE(check_bugs) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index c1588e31aa83..a6d27284105a 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -115,6 +115,11 @@ EXPORT_SYMBOL(elf_hwcap2);
#ifdef MULTI_CPU struct processor processor __ro_after_init; +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) +struct processor *cpu_vtable[NR_CPUS] = { + [0] = &processor, +}; +#endif #endif #ifdef MULTI_TLB struct cpu_tlb_fns cpu_tlb __ro_after_init; diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index e61af0600133..f6b1c9d2e178 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -42,6 +42,7 @@ #include <asm/mmu_context.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> +#include <asm/procinfo.h> #include <asm/processor.h> #include <asm/sections.h> #include <asm/tlbflush.h> @@ -102,6 +103,30 @@ static unsigned long get_arch_pgd(pgd_t *pgd) #endif }
+#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) +static int secondary_biglittle_prepare(unsigned int cpu) +{ + if (!cpu_vtable[cpu]) + cpu_vtable[cpu] = kzalloc(sizeof(*cpu_vtable[cpu]), GFP_KERNEL); + + return cpu_vtable[cpu] ? 0 : -ENOMEM; +} + +static void secondary_biglittle_init(void) +{ + init_proc_vtable(lookup_processor(read_cpuid_id())->proc); +} +#else +static int secondary_biglittle_prepare(unsigned int cpu) +{ + return 0; +} + +static void secondary_biglittle_init(void) +{ +} +#endif + int __cpu_up(unsigned int cpu, struct task_struct *idle) { int ret; @@ -109,6 +134,10 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) if (!smp_ops.smp_boot_secondary) return -ENOSYS;
+ ret = secondary_biglittle_prepare(cpu); + if (ret) + return ret; + /* * We need to tell the secondary core where to find * its stack and the page tables. @@ -360,6 +389,8 @@ asmlinkage void secondary_start_kernel(void) struct mm_struct *mm = &init_mm; unsigned int cpu;
+ secondary_biglittle_init(); + /* * The identity mapping is uncached (strongly ordered), so * switch away from it before attempting any exclusive accesses. diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index 5544b82a2e7a..9a07916af8dd 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -52,8 +52,6 @@ static void cpu_v7_spectre_init(void) case ARM_CPU_PART_CORTEX_A17: case ARM_CPU_PART_CORTEX_A73: case ARM_CPU_PART_CORTEX_A75: - if (processor.switch_mm != cpu_v7_bpiall_switch_mm) - goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = harden_branch_predictor_bpiall; spectre_v2_method = "BPIALL"; @@ -61,8 +59,6 @@ static void cpu_v7_spectre_init(void)
case ARM_CPU_PART_CORTEX_A15: case ARM_CPU_PART_BRAHMA_B15: - if (processor.switch_mm != cpu_v7_iciallu_switch_mm) - goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = harden_branch_predictor_iciallu; spectre_v2_method = "ICIALLU"; @@ -88,11 +84,9 @@ static void cpu_v7_spectre_init(void) ARM_SMCCC_ARCH_WORKAROUND_1, &res); if ((int)res.a0 != 0) break; - if (processor.switch_mm != cpu_v7_hvc_switch_mm && cpu) - goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = call_hvc_arch_workaround_1; - processor.switch_mm = cpu_v7_hvc_switch_mm; + cpu_do_switch_mm = cpu_v7_hvc_switch_mm; spectre_v2_method = "hypervisor"; break;
@@ -101,11 +95,9 @@ static void cpu_v7_spectre_init(void) ARM_SMCCC_ARCH_WORKAROUND_1, &res); if ((int)res.a0 != 0) break; - if (processor.switch_mm != cpu_v7_smc_switch_mm && cpu) - goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = call_smc_arch_workaround_1; - processor.switch_mm = cpu_v7_smc_switch_mm; + cpu_do_switch_mm = cpu_v7_smc_switch_mm; spectre_v2_method = "firmware"; break;
@@ -119,11 +111,6 @@ static void cpu_v7_spectre_init(void) if (spectre_v2_method) pr_info("CPU%u: Spectre v2: using %s workaround\n", smp_processor_id(), spectre_v2_method); - return; - -bl_error: - pr_err("CPU%u: Spectre v2: incorrect context switching function, system vulnerable\n", - cpu); } #else static void cpu_v7_spectre_init(void)
From: Trond Myklebust trond.myklebust@hammerspace.com
[ Upstream commit e3d5e573a54dabdc0f9f3cb039d799323372b251 ]
Signed-off-by: Trond Myklebust trond.myklebust@hammerspace.com Signed-off-by: Sasha Levin sashal@kernel.org --- net/sunrpc/auth_generic.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-)
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index f1df9837f1ac..1ac08dcbf85d 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -281,13 +281,7 @@ static bool generic_key_to_expire(struct rpc_cred *cred) { struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred; - bool ret; - - get_rpccred(cred); - ret = test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags); - put_rpccred(cred); - - return ret; + return test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags); }
static const struct rpc_credops generic_credops = {
From: Prarit Bhargava prarit@redhat.com
[ Upstream commit c2b94c72d93d0929f48157eef128c4f9d2e603ce ]
gcc 8.1.0 warns with:
kernel/debug/kdb/kdb_support.c: In function ‘kallsyms_symbol_next’: kernel/debug/kdb/kdb_support.c:239:4: warning: ‘strncpy’ specified bound depends on the length of the source argument [-Wstringop-overflow=] strncpy(prefix_name, name, strlen(name)+1); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ kernel/debug/kdb/kdb_support.c:239:31: note: length computed here
Use strscpy() with the destination buffer size, and use ellipses when displaying truncated symbols.
v2: Use strscpy()
Signed-off-by: Prarit Bhargava prarit@redhat.com Cc: Jonathan Toppins jtoppins@redhat.com Cc: Jason Wessel jason.wessel@windriver.com Cc: Daniel Thompson daniel.thompson@linaro.org Cc: kgdb-bugreport@lists.sourceforge.net Reviewed-by: Daniel Thompson daniel.thompson@linaro.org Signed-off-by: Daniel Thompson daniel.thompson@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/debug/kdb/kdb_io.c | 15 +++++++++------ kernel/debug/kdb/kdb_private.h | 2 +- kernel/debug/kdb/kdb_support.c | 10 +++++----- 3 files changed, 15 insertions(+), 12 deletions(-)
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index ed5d34925ad0..6a4b41484afe 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -216,7 +216,7 @@ static char *kdb_read(char *buffer, size_t bufsize) int count; int i; int diag, dtab_count; - int key; + int key, buf_size, ret;
diag = kdbgetintenv("DTABCOUNT", &dtab_count); @@ -336,9 +336,8 @@ static char *kdb_read(char *buffer, size_t bufsize) else p_tmp = tmpbuffer; len = strlen(p_tmp); - count = kallsyms_symbol_complete(p_tmp, - sizeof(tmpbuffer) - - (p_tmp - tmpbuffer)); + buf_size = sizeof(tmpbuffer) - (p_tmp - tmpbuffer); + count = kallsyms_symbol_complete(p_tmp, buf_size); if (tab == 2 && count > 0) { kdb_printf("\n%d symbols are found.", count); if (count > dtab_count) { @@ -350,9 +349,13 @@ static char *kdb_read(char *buffer, size_t bufsize) } kdb_printf("\n"); for (i = 0; i < count; i++) { - if (WARN_ON(!kallsyms_symbol_next(p_tmp, i))) + ret = kallsyms_symbol_next(p_tmp, i, buf_size); + if (WARN_ON(!ret)) break; - kdb_printf("%s ", p_tmp); + if (ret != -E2BIG) + kdb_printf("%s ", p_tmp); + else + kdb_printf("%s... ", p_tmp); *(p_tmp + len) = '\0'; } if (i >= dtab_count) diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index fc224fbcf954..f2158e463a0f 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -83,7 +83,7 @@ typedef struct __ksymtab { unsigned long sym_start; unsigned long sym_end; } kdb_symtab_t; -extern int kallsyms_symbol_next(char *prefix_name, int flag); +extern int kallsyms_symbol_next(char *prefix_name, int flag, int buf_size); extern int kallsyms_symbol_complete(char *prefix_name, int max_len);
/* Exported Symbols for kernel loadable modules to use. */ diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index d35cc2d3a4cc..2aed4a33521b 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c @@ -221,11 +221,13 @@ int kallsyms_symbol_complete(char *prefix_name, int max_len) * Parameters: * prefix_name prefix of a symbol name to lookup * flag 0 means search from the head, 1 means continue search. + * buf_size maximum length that can be written to prefix_name + * buffer * Returns: * 1 if a symbol matches the given prefix. * 0 if no string found */ -int kallsyms_symbol_next(char *prefix_name, int flag) +int kallsyms_symbol_next(char *prefix_name, int flag, int buf_size) { int prefix_len = strlen(prefix_name); static loff_t pos; @@ -235,10 +237,8 @@ int kallsyms_symbol_next(char *prefix_name, int flag) pos = 0;
while ((name = kdb_walk_kallsyms(&pos))) { - if (strncmp(name, prefix_name, prefix_len) == 0) { - strncpy(prefix_name, name, strlen(name)+1); - return 1; - } + if (!strncmp(name, prefix_name, prefix_len)) + return strscpy(prefix_name, name, buf_size); } return 0; }
From: Satheesh Rajendran sathnaga@linux.vnet.ibm.com
[ Upstream commit 437ccdc8ce629470babdda1a7086e2f477048cbd ]
When VPHN function is not supported and during cpu hotplug event, kernel prints message 'VPHN function not supported. Disabling polling...'. Currently it prints on every hotplug event, it floods dmesg when a KVM guest tries to hotplug huge number of vcpus, let's just print once and suppress further kernel prints.
Signed-off-by: Satheesh Rajendran sathnaga@linux.vnet.ibm.com Signed-off-by: Michael Ellerman mpe@ellerman.id.au Signed-off-by: Sasha Levin sashal@kernel.org --- arch/powerpc/mm/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 9fead0796364..40fb9a8835fe 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1261,7 +1261,7 @@ static long vphn_get_associativity(unsigned long cpu,
switch (rc) { case H_FUNCTION: - printk(KERN_INFO + printk_once(KERN_INFO "VPHN is not supported. Disabling polling...\n"); stop_topology_update(); break;
From: Ard Biesheuvel ard.biesheuvel@linaro.org
[ Upstream commit 33412b8673135b18ea42beb7f5117ed0091798b6 ]
Commit:
3ea86495aef2 ("efi/arm: preserve early mapping of UEFI memory map longer for BGRT")
deferred the unmap of the early mapping of the UEFI memory map to accommodate the ACPI BGRT code, which looks up the memory type that backs the BGRT table to validate it against the requirements of the UEFI spec.
Unfortunately, this causes problems on ARM, which does not permit early mappings to persist after paging_init() is called, resulting in a WARN() splat. Since we don't support the BGRT table on ARM anway, let's revert ARM to the old behaviour, which is to take down the early mapping at the end of efi_init().
Signed-off-by: Ard Biesheuvel ard.biesheuvel@linaro.org Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: linux-efi@vger.kernel.org Fixes: 3ea86495aef2 ("efi/arm: preserve early mapping of UEFI memory ...") Link: http://lkml.kernel.org/r/20181114175544.12860-3-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/firmware/efi/arm-init.c | 4 ++++ drivers/firmware/efi/arm-runtime.c | 2 +- drivers/firmware/efi/memmap.c | 3 +++ 3 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c index a7c522eac640..312f9f32e168 100644 --- a/drivers/firmware/efi/arm-init.c +++ b/drivers/firmware/efi/arm-init.c @@ -265,6 +265,10 @@ void __init efi_init(void) (params.mmap & ~PAGE_MASK)));
init_screen_info(); + + /* ARM does not permit early mappings to persist across paging_init() */ + if (IS_ENABLED(CONFIG_ARM)) + efi_memmap_unmap(); }
static int __init register_gop_device(void) diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 8995a48bd067..ad1530aff633 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -122,7 +122,7 @@ static int __init arm_enable_runtime_services(void) { u64 mapsize;
- if (!efi_enabled(EFI_BOOT) || !efi_enabled(EFI_MEMMAP)) { + if (!efi_enabled(EFI_BOOT)) { pr_info("EFI services will not be available.\n"); return 0; } diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c index 5fc70520e04c..1907db2b38d8 100644 --- a/drivers/firmware/efi/memmap.c +++ b/drivers/firmware/efi/memmap.c @@ -118,6 +118,9 @@ int __init efi_memmap_init_early(struct efi_memory_map_data *data)
void __init efi_memmap_unmap(void) { + if (!efi_enabled(EFI_MEMMAP)) + return; + if (!efi.memmap.late) { unsigned long size;
From: Vitaly Wool vitalywool@gmail.com
[ Upstream commit ca0246bb97c23da9d267c2107c07fb77e38205c9 ]
Reclaim and free can race on an object which is basically fine but in order for reclaim to be able to map "freed" object we need to encode object length in the handle. handle_to_chunks() is then introduced to extract object length from a handle and use it during mapping.
Moreover, to avoid racing on a z3fold "headless" page release, we should not try to free that page in z3fold_free() if the reclaim bit is set. Also, in the unlikely case of trying to reclaim a page being freed, we should not proceed with that page.
While at it, fix the page accounting in reclaim function.
This patch supersedes "[PATCH] z3fold: fix reclaim lock-ups".
Link: http://lkml.kernel.org/r/20181105162225.74e8837d03583a9b707cf559@gmail.com Signed-off-by: Vitaly Wool vitaly.vul@sony.com Signed-off-by: Jongseok Kim ks77sj@gmail.com Reported-by-by: Jongseok Kim ks77sj@gmail.com Reviewed-by: Snild Dolkow snild@sony.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- mm/z3fold.c | 101 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 62 insertions(+), 39 deletions(-)
diff --git a/mm/z3fold.c b/mm/z3fold.c index f33403d718ac..2813cdfa46b9 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -99,6 +99,7 @@ struct z3fold_header { #define NCHUNKS ((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT)
#define BUDDY_MASK (0x3) +#define BUDDY_SHIFT 2
/** * struct z3fold_pool - stores metadata for each z3fold pool @@ -145,7 +146,7 @@ enum z3fold_page_flags { MIDDLE_CHUNK_MAPPED, NEEDS_COMPACTING, PAGE_STALE, - UNDER_RECLAIM + PAGE_CLAIMED, /* by either reclaim or free */ };
/***************** @@ -174,7 +175,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page, clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); clear_bit(NEEDS_COMPACTING, &page->private); clear_bit(PAGE_STALE, &page->private); - clear_bit(UNDER_RECLAIM, &page->private); + clear_bit(PAGE_CLAIMED, &page->private);
spin_lock_init(&zhdr->page_lock); kref_init(&zhdr->refcount); @@ -223,8 +224,11 @@ static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud) unsigned long handle;
handle = (unsigned long)zhdr; - if (bud != HEADLESS) - handle += (bud + zhdr->first_num) & BUDDY_MASK; + if (bud != HEADLESS) { + handle |= (bud + zhdr->first_num) & BUDDY_MASK; + if (bud == LAST) + handle |= (zhdr->last_chunks << BUDDY_SHIFT); + } return handle; }
@@ -234,6 +238,12 @@ static struct z3fold_header *handle_to_z3fold_header(unsigned long handle) return (struct z3fold_header *)(handle & PAGE_MASK); }
+/* only for LAST bud, returns zero otherwise */ +static unsigned short handle_to_chunks(unsigned long handle) +{ + return (handle & ~PAGE_MASK) >> BUDDY_SHIFT; +} + /* * (handle & BUDDY_MASK) < zhdr->first_num is possible in encode_handle * but that doesn't matter. because the masking will result in the @@ -717,37 +727,39 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) page = virt_to_page(zhdr);
if (test_bit(PAGE_HEADLESS, &page->private)) { - /* HEADLESS page stored */ - bud = HEADLESS; - } else { - z3fold_page_lock(zhdr); - bud = handle_to_buddy(handle); - - switch (bud) { - case FIRST: - zhdr->first_chunks = 0; - break; - case MIDDLE: - zhdr->middle_chunks = 0; - zhdr->start_middle = 0; - break; - case LAST: - zhdr->last_chunks = 0; - break; - default: - pr_err("%s: unknown bud %d\n", __func__, bud); - WARN_ON(1); - z3fold_page_unlock(zhdr); - return; + /* if a headless page is under reclaim, just leave. + * NB: we use test_and_set_bit for a reason: if the bit + * has not been set before, we release this page + * immediately so we don't care about its value any more. + */ + if (!test_and_set_bit(PAGE_CLAIMED, &page->private)) { + spin_lock(&pool->lock); + list_del(&page->lru); + spin_unlock(&pool->lock); + free_z3fold_page(page); + atomic64_dec(&pool->pages_nr); } + return; }
- if (bud == HEADLESS) { - spin_lock(&pool->lock); - list_del(&page->lru); - spin_unlock(&pool->lock); - free_z3fold_page(page); - atomic64_dec(&pool->pages_nr); + /* Non-headless case */ + z3fold_page_lock(zhdr); + bud = handle_to_buddy(handle); + + switch (bud) { + case FIRST: + zhdr->first_chunks = 0; + break; + case MIDDLE: + zhdr->middle_chunks = 0; + break; + case LAST: + zhdr->last_chunks = 0; + break; + default: + pr_err("%s: unknown bud %d\n", __func__, bud); + WARN_ON(1); + z3fold_page_unlock(zhdr); return; }
@@ -755,7 +767,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) atomic64_dec(&pool->pages_nr); return; } - if (test_bit(UNDER_RECLAIM, &page->private)) { + if (test_bit(PAGE_CLAIMED, &page->private)) { z3fold_page_unlock(zhdr); return; } @@ -833,20 +845,30 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) } list_for_each_prev(pos, &pool->lru) { page = list_entry(pos, struct page, lru); + + /* this bit could have been set by free, in which case + * we pass over to the next page in the pool. + */ + if (test_and_set_bit(PAGE_CLAIMED, &page->private)) + continue; + + zhdr = page_address(page); if (test_bit(PAGE_HEADLESS, &page->private)) - /* candidate found */ break;
- zhdr = page_address(page); - if (!z3fold_page_trylock(zhdr)) + if (!z3fold_page_trylock(zhdr)) { + zhdr = NULL; continue; /* can't evict at this point */ + } kref_get(&zhdr->refcount); list_del_init(&zhdr->buddy); zhdr->cpu = -1; - set_bit(UNDER_RECLAIM, &page->private); break; }
+ if (!zhdr) + break; + list_del_init(&page->lru); spin_unlock(&pool->lock);
@@ -895,6 +917,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) if (test_bit(PAGE_HEADLESS, &page->private)) { if (ret == 0) { free_z3fold_page(page); + atomic64_dec(&pool->pages_nr); return 0; } spin_lock(&pool->lock); @@ -902,7 +925,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) spin_unlock(&pool->lock); } else { z3fold_page_lock(zhdr); - clear_bit(UNDER_RECLAIM, &page->private); + clear_bit(PAGE_CLAIMED, &page->private); if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) { atomic64_dec(&pool->pages_nr); @@ -961,7 +984,7 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle) set_bit(MIDDLE_CHUNK_MAPPED, &page->private); break; case LAST: - addr += PAGE_SIZE - (zhdr->last_chunks << CHUNK_SHIFT); + addr += PAGE_SIZE - (handle_to_chunks(handle) << CHUNK_SHIFT); break; default: pr_err("unknown buddy id %d\n", buddy);
From: Yufen Yu yuyufen@huawei.com
[ Upstream commit 1a413646931cb14442065cfc17561e50f5b5bb44 ]
Other filesystems such as ext4, f2fs and ubifs all return ENXIO when lseek (SEEK_DATA or SEEK_HOLE) requests a negative offset.
man 2 lseek says
: EINVAL whence is not valid. Or: the resulting file offset would be : negative, or beyond the end of a seekable device. : : ENXIO whence is SEEK_DATA or SEEK_HOLE, and the file offset is beyond : the end of the file.
Make tmpfs return ENXIO under these circumstances as well. After this, tmpfs also passes xfstests's generic/448.
[akpm@linux-foundation.org: rewrite changelog] Link: http://lkml.kernel.org/r/1540434176-14349-1-git-send-email-yuyufen@huawei.co... Signed-off-by: Yufen Yu yuyufen@huawei.com Reviewed-by: Andrew Morton akpm@linux-foundation.org Cc: Al Viro viro@zeniv.linux.org.uk Cc: Hugh Dickins hughd@google.com Cc: William Kucharski william.kucharski@oracle.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- mm/shmem.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/mm/shmem.c b/mm/shmem.c index ea786a504e1b..fa08f56fd5e5 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2590,9 +2590,7 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence) inode_lock(inode); /* We're holding i_mutex so we can access i_size directly */
- if (offset < 0) - offset = -EINVAL; - else if (offset >= inode->i_size) + if (offset < 0 || offset >= inode->i_size) offset = -ENXIO; else { start = offset >> PAGE_SHIFT;
From: Michal Hocko mhocko@suse.com
[ Upstream commit c63ae43ba53bc432b414fd73dd5f4b01fcb1ab43 ]
Konstantin has noticed that kvmalloc might trigger the following warning:
WARNING: CPU: 0 PID: 6676 at mm/vmstat.c:986 __fragmentation_index+0x54/0x60 [...] Call Trace: fragmentation_index+0x76/0x90 compaction_suitable+0x4f/0xf0 shrink_node+0x295/0x310 node_reclaim+0x205/0x250 get_page_from_freelist+0x649/0xad0 __alloc_pages_nodemask+0x12a/0x2a0 kmalloc_large_node+0x47/0x90 __kmalloc_node+0x22b/0x2e0 kvmalloc_node+0x3e/0x70 xt_alloc_table_info+0x3a/0x80 [x_tables] do_ip6t_set_ctl+0xcd/0x1c0 [ip6_tables] nf_setsockopt+0x44/0x60 SyS_setsockopt+0x6f/0xc0 do_syscall_64+0x67/0x120 entry_SYSCALL_64_after_hwframe+0x3d/0xa2
the problem is that we only check for an out of bound order in the slow path and the node reclaim might happen from the fast path already. This is fixable by making sure that kvmalloc doesn't ever use kmalloc for requests that are larger than KMALLOC_MAX_SIZE but this also shows that the code is rather fragile. A recent UBSAN report just underlines that by the following report
UBSAN: Undefined behaviour in mm/page_alloc.c:3117:19 shift exponent 51 is too large for 32-bit type 'int' CPU: 0 PID: 6520 Comm: syz-executor1 Not tainted 4.19.0-rc2 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xd2/0x148 lib/dump_stack.c:113 ubsan_epilogue+0x12/0x94 lib/ubsan.c:159 __ubsan_handle_shift_out_of_bounds+0x2b6/0x30b lib/ubsan.c:425 __zone_watermark_ok+0x2c7/0x400 mm/page_alloc.c:3117 zone_watermark_fast mm/page_alloc.c:3216 [inline] get_page_from_freelist+0xc49/0x44c0 mm/page_alloc.c:3300 __alloc_pages_nodemask+0x21e/0x640 mm/page_alloc.c:4370 alloc_pages_current+0xcc/0x210 mm/mempolicy.c:2093 alloc_pages include/linux/gfp.h:509 [inline] __get_free_pages+0x12/0x60 mm/page_alloc.c:4414 dma_mem_alloc+0x36/0x50 arch/x86/include/asm/floppy.h:156 raw_cmd_copyin drivers/block/floppy.c:3159 [inline] raw_cmd_ioctl drivers/block/floppy.c:3206 [inline] fd_locked_ioctl+0xa00/0x2c10 drivers/block/floppy.c:3544 fd_ioctl+0x40/0x60 drivers/block/floppy.c:3571 __blkdev_driver_ioctl block/ioctl.c:303 [inline] blkdev_ioctl+0xb3c/0x1a30 block/ioctl.c:601 block_ioctl+0x105/0x150 fs/block_dev.c:1883 vfs_ioctl fs/ioctl.c:46 [inline] do_vfs_ioctl+0x1c0/0x1150 fs/ioctl.c:687 ksys_ioctl+0x9e/0xb0 fs/ioctl.c:702 __do_sys_ioctl fs/ioctl.c:709 [inline] __se_sys_ioctl fs/ioctl.c:707 [inline] __x64_sys_ioctl+0x7e/0xc0 fs/ioctl.c:707 do_syscall_64+0xc4/0x510 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe
Note that this is not a kvmalloc path. It is just that the fast path really depends on having sanitzed order as well. Therefore move the order check to the fast path.
Link: http://lkml.kernel.org/r/20181113094305.GM15120@dhcp22.suse.cz Signed-off-by: Michal Hocko mhocko@suse.com Reported-by: Konstantin Khlebnikov khlebnikov@yandex-team.ru Reported-by: Kyungtae Kim kt0755@gmail.com Acked-by: Vlastimil Babka vbabka@suse.cz Cc: Balbir Singh bsingharora@gmail.com Cc: Mel Gorman mgorman@techsingularity.net Cc: Pavel Tatashin pavel.tatashin@microsoft.com Cc: Oscar Salvador osalvador@suse.de Cc: Mike Rapoport rppt@linux.vnet.ibm.com Cc: Aaron Lu aaron.lu@intel.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Byoungyoung Lee lifeasageek@gmail.com Cc: "Dae R. Jeong" threeearcat@gmail.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- mm/page_alloc.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a604b5da6755..2074f424dabf 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3867,17 +3867,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, unsigned int cpuset_mems_cookie; int reserve_flags;
- /* - * In the slowpath, we sanity check order to avoid ever trying to - * reclaim >= MAX_ORDER areas which will never succeed. Callers may - * be using allocators in order of preference for an area that is - * too large. - */ - if (order >= MAX_ORDER) { - WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN)); - return NULL; - } - /* * We also sanity check to catch abuse of atomic reserves being used by * callers that are not in atomic context. @@ -4179,6 +4168,15 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */ struct alloc_context ac = { };
+ /* + * There are several places where we assume that the order value is sane + * so bail out early if the request is out of bound. + */ + if (unlikely(order >= MAX_ORDER)) { + WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN)); + return NULL; + } + gfp_mask &= gfp_allowed_mask; alloc_mask = gfp_mask; if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags))
linux-stable-mirror@lists.linaro.org