[ Upstream commit 8782fb61cc848364e1e1599d76d3c9dd58a1cc06 ]
The mmap lock protects the page walker from changes to the page tables
during the walk. However a read lock is insufficient to protect those
areas which don't have a VMA as munmap() detaches the VMAs before
downgrading to a read lock and actually tearing down PTEs/page tables.
For users of walk_page_range() the solution is to simply call pte_hole()
immediately without checking the actual page tables when a VMA is not
present. We now never call __walk_page_range() without a valid vma.
For walk_page_range_novma() the locking requirements are tightened to
require the mmap write lock to be taken, and then walking the pgd
directly with 'no_vma' set.
This in turn means that all page walkers either have a valid vma, or
it's that special 'novma' case for page table debugging. As a result,
all the odd '(!walk->vma && !walk->no_vma)' tests can be removed.
Fixes: dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in munmap")
Reported-by: Jann Horn <jannh(a)google.com>
Signed-off-by: Steven Price <steven.price(a)arm.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Cc: Konstantin Khlebnikov <koct9i(a)gmail.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
[manually backported. backport note: walk_page_range_novma() does not exist in
5.4, so I'm omitting it from the backport]
Signed-off-by: Jann Horn <jannh(a)google.com>
---
mm/pagewalk.c | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 4eb09e0898817..ec41e7552f37c 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -38,7 +38,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
do {
again:
next = pmd_addr_end(addr, end);
- if (pmd_none(*pmd) || !walk->vma) {
+ if (pmd_none(*pmd)) {
if (ops->pte_hole)
err = ops->pte_hole(addr, next, walk);
if (err)
@@ -84,7 +84,7 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
do {
again:
next = pud_addr_end(addr, end);
- if (pud_none(*pud) || !walk->vma) {
+ if (pud_none(*pud)) {
if (ops->pte_hole)
err = ops->pte_hole(addr, next, walk);
if (err)
@@ -254,7 +254,7 @@ static int __walk_page_range(unsigned long start, unsigned long end,
int err = 0;
struct vm_area_struct *vma = walk->vma;
- if (vma && is_vm_hugetlb_page(vma)) {
+ if (is_vm_hugetlb_page(vma)) {
if (walk->ops->hugetlb_entry)
err = walk_hugetlb_range(start, end, walk);
} else
@@ -324,9 +324,13 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
if (!vma) { /* after the last vma */
walk.vma = NULL;
next = end;
+ if (ops->pte_hole)
+ err = ops->pte_hole(start, next, &walk);
} else if (start < vma->vm_start) { /* outside vma */
walk.vma = NULL;
next = min(end, vma->vm_start);
+ if (ops->pte_hole)
+ err = ops->pte_hole(start, next, &walk);
} else { /* inside vma */
walk.vma = vma;
next = min(end, vma->vm_end);
@@ -344,9 +348,8 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
}
if (err < 0)
break;
- }
- if (walk.vma || walk.ops->pte_hole)
err = __walk_page_range(start, next, &walk);
+ }
if (err)
break;
} while (start = next, start < end);
base-commit: f28b7414ab715e6069e72a7bbe2f1354b2524beb
--
2.38.0.rc1.362.ged0d419d3c-goog
tl;dr: The existing mitigation for eIBRS PBRSB predictions uses an INT3 to
ensure a call instruction retires before a following unbalanced RET. Replace
this with a WRMSR serialising instruction which has a lower performance
penalty.
== Background ==
eIBRS (enhanced indirect branch restricted speculation) is used to prevent
predictor addresses from one privilege domain from being used for prediction
in a higher privilege domain.
== Problem ==
On processors with eIBRS protections there can be a case where upon VM exit
a guest address may be used as an RSB prediction for an unbalanced RET if a
CALL instruction hasn't yet been retired. This is termed PBRSB (Post-Barrier
Return Stack Buffer).
A mitigation for this was introduced in:
(2b1299322016731d56807aa49254a5ea3080b6b3 x86/speculation: Add RSB VM Exit protections)
This mitigation [1] has a ~1% performance impact on VM exit compared to without
it [2].
== Solution ==
The WRMSR instruction can be used as a speculation barrier and a serialising
instruction. Use this on the VM exit path instead to ensure that a CALL
instruction (in this case the call to vmx_spec_ctrl_restore_host) has retired
before the prediction of a following unbalanced RET.
This mitigation [3] has a negligible performance impact.
== Testing ==
Run the outl_to_kernel kvm-unit-tests test 200 times per configuration which
counts the cycles for an exit to kernel mode.
[1] With existing mitigation:
Average: 2026 cycles
[2] With no mitigation:
Average: 2008 cycles
[3] With proposed mitigation:
Average: 2008 cycles
Signed-off-by: Suraj Jitindar Singh <surajjs(a)amazon.com>
Cc: stable(a)vger.kernel.org
---
arch/x86/include/asm/nospec-branch.h | 7 +++----
arch/x86/kvm/vmx/vmenter.S | 3 +--
arch/x86/kvm/vmx/vmx.c | 5 +++++
3 files changed, 9 insertions(+), 6 deletions(-)
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index c936ce9f0c47..e5723e024b47 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -159,10 +159,9 @@
* A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
* monstrosity above, manually.
*/
-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS)
- ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \
- __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \
- __stringify(__FILL_ONE_RETURN), \ftr2
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
+ ALTERNATIVE "jmp .Lskip_rsb_\@", \
+ __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr
.Lskip_rsb_\@:
.endm
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 6de96b943804..eb82797bd7bf 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -231,8 +231,7 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
* single call to retire, before the first unbalanced RET.
*/
- FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT,\
- X86_FEATURE_RSB_VMEXIT_LITE
+ FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT
pop %_ASM_ARG2 /* @flags */
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c9b49a09e6b5..fdcd8e10c2ab 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7049,8 +7049,13 @@ void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx,
* For legacy IBRS, the IBRS bit always needs to be written after
* transitioning from a less privileged predictor mode, regardless of
* whether the guest/host values differ.
+ *
+ * For eIBRS affected by Post Barrier RSB Predictions a serialising
+ * instruction (wrmsr) must be executed to ensure a call instruction has
+ * retired before the prediction of a following unbalanced ret.
*/
if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) ||
+ cpu_feature_enabled(X86_FEATURE_RSB_VMEXIT_LITE) ||
vmx->spec_ctrl != hostval)
native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval);
--
2.17.1
From: Fangzhi Zuo <Jerry.Zuo(a)amd.com>
Before enabling new crtc, stream_count in dc_state does not sync with
that in drm_atomic_state. Validating dsc in such case would leave
newly added stream not jointly participating in dsc optimization with
existing streams, but simply using default initialized vcpi all the time
which gives wrong dsc determination decision.
Consider the scenaio where one 4k60 connected to the dock under dp-alt mode.
Since dp-alt mode is 2-lane setup, stream 1 consumes 63 slots with dsc needed.
Then hook up a second 4k60 to the dock.
stream 2 connected with 65 slot initialized by default without dsc.
dsc pre validate will not jointly optimize stream 2 with stream 1 before crtc 2 added
into the dc_state. That leads to stream 2 not getting dsc optimization,
and trigger atomic_check failure all the time, as 65 > 63 limit.
After getting all new crtcs added into the state, stream_count in dc_state
correctly reflect that in drm_atomic_state which comes up with correct dsc decision.
Fixes: 71be4b16d39a ("drm/amd/display: dsc validate fail not pass to atomic check")
Reviewed-by: Roman Li <Roman.Li(a)amd.com>
Acked-by: Qingqing Zhuo <qingqing.zhuo(a)amd.com>
Signed-off-by: Fangzhi Zuo <Jerry.Zuo(a)amd.com>
Tested-by: Mark Broadworth <mark.broadworth(a)amd.com>
Cc: stable(a)vger.kernel.org
---
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 17c3daac837a..63f076a46260 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -9408,10 +9408,6 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
}
}
}
- if (!pre_validate_dsc(state, &dm_state, vars)) {
- ret = -EINVAL;
- goto fail;
- }
}
#endif
for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
@@ -9545,6 +9541,15 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
}
}
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+ if (dc_resource_is_dsc_encoding_supported(dc)) {
+ if (!pre_validate_dsc(state, &dm_state, vars)) {
+ ret = -EINVAL;
+ goto fail;
+ }
+ }
+#endif
+
/* Run this here since we want to validate the streams we created */
ret = drm_atomic_helper_check_planes(dev, state);
if (ret) {
--
2.25.1
доброе утро
Я знаю, что вы будете удивлены, получив это письмо от меня сегодня.
Я Анна С. Уильям, я работаю в Королевском банке Шотландии. Это письмо
является очень привилегированным и требует вашего немедленного
внимания, потому что мы потеряли одного из наших клиентов, который
тоже из вашей страны имеет ту же фамилию, что и вы, и у него был
срочный депозит на сумму 4,7 миллиона долларов в нашем банке до своей
смерти.
Учитывая вашу национальность с нашим покойным Заказчиком Александром, я хочу
представить вас банку в качестве бенефициара наследственного фонда и
мы оба разделим средства 50% 50%, как только деньги будут переведены
на ваш счет.
Я с нетерпением жду вашего немедленного ответа.
С уважением,
Анна С. Уильям.
[CCing regression and stable lists, to make sure they are aware of the
regression]
On 05.10.22 17:47, Hamza Mahfooz wrote:
> This reverts commit 10b6e91bd1ee9cd237ffbc244ad9c25b5fd3e167.
/me can't find that id and wonders what he did wrong -- or is this not
meant to refer to Linus tree?
And isn't this reverting both 66f99628eb24409cb8feb5061f78283c8b65f820
and abbc7a3dafb91b9d4ec56b70ec9a7520f8e13334 in one go?
> Unfortunately, this commit causes performance regressions on non-PSR
> setups. So, just revert it until FB_DAMAGE_CLIPS support can be added.
>
> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2189
> Signed-off-by: Hamza Mahfooz <hamza.mahfooz(a)amd.com>
This seems to be missing a Reported-by tag, a CC: stable tag (needed to
ensure backporting), and a Fixes: tag.
But the reason why I started writing this mail is totally different from
the comments above:
In case you are not aware of it, that patch apparently broke amdgpu for
some users of 5.4.215:
https://bugzilla.kernel.org/show_bug.cgi?id=216554
So more Link: and Reported-by: tags might would be nice.
Ciao, Thorsten
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 14 ++------------
> 1 file changed, 2 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> index 23998f727c7f..1a06b8d724f3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> @@ -38,8 +38,6 @@
> #include <linux/pci.h>
> #include <linux/pm_runtime.h>
> #include <drm/drm_crtc_helper.h>
> -#include <drm/drm_damage_helper.h>
> -#include <drm/drm_drv.h>
> #include <drm/drm_edid.h>
> #include <drm/drm_gem_framebuffer_helper.h>
> #include <drm/drm_fb_helper.h>
> @@ -500,12 +498,6 @@ static const struct drm_framebuffer_funcs amdgpu_fb_funcs = {
> .create_handle = drm_gem_fb_create_handle,
> };
>
> -static const struct drm_framebuffer_funcs amdgpu_fb_funcs_atomic = {
> - .destroy = drm_gem_fb_destroy,
> - .create_handle = drm_gem_fb_create_handle,
> - .dirty = drm_atomic_helper_dirtyfb,
> -};
> -
> uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
> uint64_t bo_flags)
> {
> @@ -1108,10 +1100,8 @@ static int amdgpu_display_gem_fb_verify_and_init(struct drm_device *dev,
> if (ret)
> goto err;
>
> - if (drm_drv_uses_atomic_modeset(dev))
> - ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs_atomic);
> - else
> - ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
> + ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
> +
> if (ret)
> goto err;
>
From: "Steven Rostedt (Google)" <rostedt(a)goodmis.org>
The ftrace_boot_snapshot and alloc_snapshot cmdline options allocate the
snapshot buffer at boot up for use later. The ftrace_boot_snapshot in
particular requires the snapshot to be allocated because it will take a
snapshot at the end of boot up allowing to see the traces that happened
during boot so that it's not lost when user space takes over.
When a tracer is registered (started) there's a path that checks if it
requires the snapshot buffer or not, and if it does not and it was
allocated it will do a synchronization and free the snapshot buffer.
This is only required if the previous tracer was using it for "max
latency" snapshots, as it needs to make sure all max snapshots are
complete before freeing. But this is only needed if the previous tracer
was using the snapshot buffer for latency (like irqoff tracer and
friends). But it does not make sense to free it, if the previous tracer
was not using it, and the snapshot was allocated by the cmdline
parameters. This basically takes away the point of allocating it in the
first place!
Note, the allocated snapshot worked fine for just trace events, but fails
when a tracer is enabled on the cmdline.
Further investigation, this goes back even further and it does not require
a tracer on the cmdline to fail. Simply enable snapshots and then enable a
tracer, and it will remove the snapshot.
Link: https://lkml.kernel.org/r/20221005113757.041df7fe@gandalf.local.home
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: stable(a)vger.kernel.org
Fixes: 45ad21ca5530 ("tracing: Have trace_array keep track if snapshot buffer is allocated")
Reported-by: Ross Zwisler <zwisler(a)kernel.org>
Tested-by: Ross Zwisler <zwisler(a)kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/trace.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index def721de68a0..47a44b055a1d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6428,12 +6428,12 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
if (tr->current_trace->reset)
tr->current_trace->reset(tr);
+#ifdef CONFIG_TRACER_MAX_TRACE
+ had_max_tr = tr->current_trace->use_max_tr;
+
/* Current trace needs to be nop_trace before synchronize_rcu */
tr->current_trace = &nop_trace;
-#ifdef CONFIG_TRACER_MAX_TRACE
- had_max_tr = tr->allocated_snapshot;
-
if (had_max_tr && !t->use_max_tr) {
/*
* We need to make sure that the update_max_tr sees that
@@ -6446,11 +6446,13 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
free_snapshot(tr);
}
- if (t->use_max_tr && !had_max_tr) {
+ if (t->use_max_tr && !tr->allocated_snapshot) {
ret = tracing_alloc_snapshot_instance(tr);
if (ret < 0)
goto out;
}
+#else
+ tr->current_trace = &nop_trace;
#endif
if (t->init) {
--
2.35.1
From: "Steven Rostedt (Google)" <rostedt(a)goodmis.org>
Weak functions started causing havoc as they showed up in the
"available_filter_functions" and this confused people as to why some
functions marked as "notrace" were listed, but when enabled they did
nothing. This was because weak functions can still have fentry calls, and
these addresses get added to the "available_filter_functions" file.
kallsyms is what converts those addresses to names, and since the weak
functions are not listed in kallsyms, it would just pick the function
before that.
To solve this, there was a trick to detect weak functions listed, and
these records would be marked as DISABLED so that they do not get enabled
and are mostly ignored. As the processing of the list of all functions to
figure out what is weak or not can take a long time, this process is put
off into a kernel thread and run in parallel with the rest of start up.
Now the issue happens whet function tracing is enabled via the kernel
command line. As it starts very early in boot up, it can be enabled before
the records that are weak are marked to be disabled. This causes an issue
in the accounting, as the weak records are enabled by the command line
function tracing, but after boot up, they are not disabled.
The ftrace records have several accounting flags and a ref count. The
DISABLED flag is just one. If the record is enabled before it is marked
DISABLED it will get an ENABLED flag and also have its ref counter
incremented. After it is marked for DISABLED, neither the ENABLED flag nor
the ref counter is cleared. There's sanity checks on the records that are
performed after an ftrace function is registered or unregistered, and this
detected that there were records marked as ENABLED with ref counter that
should not have been.
Note, the module loading code uses the DISABLED flag as well to keep its
functions from being modified while its being loaded and some of these
flags may get set in this process. So changing the verification code to
ignore DISABLED records is a no go, as it still needs to verify that the
module records are working too.
Also, the weak functions still are calling a trampoline. Even though they
should never be called, it is dangerous to leave these weak functions
calling a trampoline that is freed, so they should still be set back to
nops.
There's two places that need to not skip records that have the ENABLED
and the DISABLED flags set. That is where the ftrace_ops is processed and
sets the records ref counts, and then later when the function itself is to
be updated, and the ENABLED flag gets removed. Add a helper function
"skip_record()" that returns true if the record has the DISABLED flag set
but not the ENABLED flag.
Link: https://lkml.kernel.org/r/20221005003809.27d2b97b@gandalf.local.home
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: stable(a)vger.kernel.org
Fixes: b39181f7c6907 ("ftrace: Add FTRACE_MCOUNT_MAX_OFFSET to avoid adding weak function")
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/ftrace.c | 20 ++++++++++++++++----
1 file changed, 16 insertions(+), 4 deletions(-)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 406d0597c409..83362a155791 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1644,6 +1644,18 @@ ftrace_find_tramp_ops_any_other(struct dyn_ftrace *rec, struct ftrace_ops *op_ex
static struct ftrace_ops *
ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops);
+static bool skip_record(struct dyn_ftrace *rec)
+{
+ /*
+ * At boot up, weak functions are set to disable. Function tracing
+ * can be enabled before they are, and they still need to be disabled now.
+ * If the record is disabled, still continue if it is marked as already
+ * enabled (this is needed to keep the accounting working).
+ */
+ return rec->flags & FTRACE_FL_DISABLED &&
+ !(rec->flags & FTRACE_FL_ENABLED);
+}
+
static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
int filter_hash,
bool inc)
@@ -1693,7 +1705,7 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
int in_hash = 0;
int match = 0;
- if (rec->flags & FTRACE_FL_DISABLED)
+ if (skip_record(rec))
continue;
if (all) {
@@ -2126,7 +2138,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update)
ftrace_bug_type = FTRACE_BUG_UNKNOWN;
- if (rec->flags & FTRACE_FL_DISABLED)
+ if (skip_record(rec))
return FTRACE_UPDATE_IGNORE;
/*
@@ -2241,7 +2253,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update)
if (update) {
/* If there's no more users, clear all flags */
if (!ftrace_rec_count(rec))
- rec->flags = 0;
+ rec->flags &= FTRACE_FL_DISABLED;
else
/*
* Just disable the record, but keep the ops TRAMP
@@ -2634,7 +2646,7 @@ void __weak ftrace_replace_code(int mod_flags)
do_for_each_ftrace_rec(pg, rec) {
- if (rec->flags & FTRACE_FL_DISABLED)
+ if (skip_record(rec))
continue;
failed = __ftrace_replace_code(rec, enable);
--
2.35.1