The patch titled
Subject: mm: initialize deferred pages with interrupts enabled
has been added to the -mm tree. Its filename is
mm-initialize-deferred-pages-with-interrupts-enabled.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/mm-initialize-deferred-pages-with-…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/mm-initialize-deferred-pages-with-…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Pavel Tatashin <pasha.tatashin(a)soleen.com>
Subject: mm: initialize deferred pages with interrupts enabled
Initializing struct pages is a long task and keeping interrupts disabled
for the duration of this operation introduces a number of problems.
1. jiffies are not updated for long period of time, and thus incorrect time
is reported. See proposed solution and discussion here:
lkml/20200311123848.118638-1-shile.zhang(a)linux.alibaba.com
2. It prevents farther improving deferred page initialization by allowing
intra-node multi-threading.
We are keeping interrupts disabled to solve a rather theoretical problem
that was never observed in real world (See 3a2d7fa8a3d5).
Let's keep interrupts enabled. In case we ever encounter a scenario where
an interrupt thread wants to allocate large amount of memory this early in
boot we can deal with that by growing zone (see deferred_grow_zone()) by
the needed amount before starting deferred_init_memmap() threads.
Before:
[ 1.232459] node 0 initialised, 12058412 pages in 1ms
After:
[ 1.632580] node 0 initialised, 12051227 pages in 436ms
Link: http://lkml.kernel.org/r/20200403140952.17177-3-pasha.tatashin@soleen.com
Fixes: 3a2d7fa8a3d5 ("mm: disable interrupts while initializing deferred pages")
Reported-by: Shile Zhang <shile.zhang(a)linux.alibaba.com>
Signed-off-by: Pavel Tatashin <pasha.tatashin(a)soleen.com>
Reviewed-by: Daniel Jordan <daniel.m.jordan(a)oracle.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Acked-by: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: James Morris <jmorris(a)namei.org>
Cc: Kirill Tkhai <ktkhai(a)virtuozzo.com>
Cc: Sasha Levin <sashal(a)kernel.org>
Cc: Yiqian Wei <yiwei(a)redhat.com>
Cc: <stable(a)vger.kernel.org> [4.17+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/mmzone.h | 2 ++
mm/page_alloc.c | 20 +++++++-------------
2 files changed, 9 insertions(+), 13 deletions(-)
--- a/include/linux/mmzone.h~mm-initialize-deferred-pages-with-interrupts-enabled
+++ a/include/linux/mmzone.h
@@ -678,6 +678,8 @@ typedef struct pglist_data {
/*
* Must be held any time you expect node_start_pfn,
* node_present_pages, node_spanned_pages or nr_zones to stay constant.
+ * Also synchronizes pgdat->first_deferred_pfn during deferred page
+ * init.
*
* pgdat_resize_lock() and pgdat_resize_unlock() are provided to
* manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG
--- a/mm/page_alloc.c~mm-initialize-deferred-pages-with-interrupts-enabled
+++ a/mm/page_alloc.c
@@ -1843,6 +1843,13 @@ static int __init deferred_init_memmap(v
BUG_ON(pgdat->first_deferred_pfn > pgdat_end_pfn(pgdat));
pgdat->first_deferred_pfn = ULONG_MAX;
+ /*
+ * Once we unlock here, the zone cannot be grown anymore, thus if an
+ * interrupt thread must allocate this early in boot, zone must be
+ * pre-grown prior to start of deferred page initialization.
+ */
+ pgdat_resize_unlock(pgdat, &flags);
+
/* Only the highest zone is deferred so find it */
for (zid = 0; zid < MAX_NR_ZONES; zid++) {
zone = pgdat->node_zones + zid;
@@ -1865,8 +1872,6 @@ static int __init deferred_init_memmap(v
touch_nmi_watchdog();
}
zone_empty:
- pgdat_resize_unlock(pgdat, &flags);
-
/* Sanity check that the next zone really is unpopulated */
WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));
@@ -1909,17 +1914,6 @@ deferred_grow_zone(struct zone *zone, un
pgdat_resize_lock(pgdat, &flags);
/*
- * If deferred pages have been initialized while we were waiting for
- * the lock, return true, as the zone was grown. The caller will retry
- * this zone. We won't return to this function since the caller also
- * has this static branch.
- */
- if (!static_branch_unlikely(&deferred_pages)) {
- pgdat_resize_unlock(pgdat, &flags);
- return true;
- }
-
- /*
* If someone grew this zone while we were waiting for spinlock, return
* true, as there might be enough pages already.
*/
_
Patches currently in -mm which might be from pasha.tatashin(a)soleen.com are
mm-initialize-deferred-pages-with-interrupts-enabled.patch
mm-call-cond_resched-from-deferred_init_memmap.patch
The patch titled
Subject: mm/pagealloc.c: call touch_nmi_watchdog() on max order boundaries in deferred init
has been added to the -mm tree. Its filename is
mm-call-touch_nmi_watchdog-on-max-order-boundaries-in-deferred-init.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/mm-call-touch_nmi_watchdog-on-max-…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/mm-call-touch_nmi_watchdog-on-max-…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Daniel Jordan <daniel.m.jordan(a)oracle.com>
Subject: mm/pagealloc.c: call touch_nmi_watchdog() on max order boundaries in deferred init
Patch series "initialize deferred pages with interrupts enabled", v4.
Keep interrupts enabled during deferred page initialization in order to
make code more modular and allow jiffies to update.
Original approach, and discussion can be found here:
http://lkml.kernel.org/r/20200311123848.118638-1-shile.zhang@linux.alibaba.…
This patch (of 3):
deferred_init_memmap() disables interrupts the entire time, so it calls
touch_nmi_watchdog() periodically to avoid soft lockup splats. Soon it
will run with interrupts enabled, at which point cond_resched() should be
used instead.
deferred_grow_zone() makes the same watchdog calls through code shared
with deferred init but will continue to run with interrupts disabled, so
it can't call cond_resched().
Pull the watchdog calls up to these two places to allow the first to be
changed later, independently of the second. The frequency reduces from
twice per pageblock (init and free) to once per max order block.
Link: http://lkml.kernel.org/r/20200403140952.17177-2-pasha.tatashin@soleen.com
Fixes: 3a2d7fa8a3d5 ("mm: disable interrupts while initializing deferred pages")
Signed-off-by: Daniel Jordan <daniel.m.jordan(a)oracle.com>
Signed-off-by: Pavel Tatashin <pasha.tatashin(a)soleen.com>
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Acked-by: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Shile Zhang <shile.zhang(a)linux.alibaba.com>
Cc: Kirill Tkhai <ktkhai(a)virtuozzo.com>
Cc: James Morris <jmorris(a)namei.org>
Cc: Sasha Levin <sashal(a)kernel.org>
Cc: Yiqian Wei <yiwei(a)redhat.com>
Cc: <stable(a)vger.kernel.org> [4.17+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/page_alloc.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
--- a/mm/page_alloc.c~mm-call-touch_nmi_watchdog-on-max-order-boundaries-in-deferred-init
+++ a/mm/page_alloc.c
@@ -1692,7 +1692,6 @@ static void __init deferred_free_pages(u
} else if (!(pfn & nr_pgmask)) {
deferred_free_range(pfn - nr_free, nr_free);
nr_free = 1;
- touch_nmi_watchdog();
} else {
nr_free++;
}
@@ -1722,7 +1721,6 @@ static unsigned long __init deferred_in
continue;
} else if (!page || !(pfn & nr_pgmask)) {
page = pfn_to_page(pfn);
- touch_nmi_watchdog();
} else {
page++;
}
@@ -1862,8 +1860,10 @@ static int __init deferred_init_memmap(v
* that we can avoid introducing any issues with the buddy
* allocator.
*/
- while (spfn < epfn)
+ while (spfn < epfn) {
nr_pages += deferred_init_maxorder(&i, zone, &spfn, &epfn);
+ touch_nmi_watchdog();
+ }
zone_empty:
pgdat_resize_unlock(pgdat, &flags);
@@ -1947,6 +1947,7 @@ deferred_grow_zone(struct zone *zone, un
first_deferred_pfn = spfn;
nr_pages += deferred_init_maxorder(&i, zone, &spfn, &epfn);
+ touch_nmi_watchdog();
/* We should only stop along section boundaries */
if ((first_deferred_pfn ^ spfn) < PAGES_PER_SECTION)
_
Patches currently in -mm which might be from daniel.m.jordan(a)oracle.com are
mm-call-touch_nmi_watchdog-on-max-order-boundaries-in-deferred-init.patch
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 6a13a0d7b4d1171ef9b80ad69abc37e1daa941b3 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat(a)kernel.org>
Date: Tue, 24 Mar 2020 16:34:48 +0900
Subject: [PATCH] ftrace/kprobe: Show the maxactive number on kprobe_events
Show maxactive parameter on kprobe_events.
This allows user to save the current configuration and
restore it without losing maxactive parameter.
Link: http://lkml.kernel.org/r/4762764a-6df7-bc93-ed60-e336146dce1f@gmail.com
Link: http://lkml.kernel.org/r/158503528846.22706.5549974121212526020.stgit@devno…
Cc: stable(a)vger.kernel.org
Fixes: 696ced4fb1d76 ("tracing/kprobes: expose maxactive for kretprobe in kprobe_events")
Reported-by: Taeung Song <treeze.taeung(a)gmail.com>
Signed-off-by: Masami Hiramatsu <mhiramat(a)kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 362cca52f5de..d0568af4a0ef 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1078,6 +1078,8 @@ static int trace_kprobe_show(struct seq_file *m, struct dyn_event *ev)
int i;
seq_putc(m, trace_kprobe_is_return(tk) ? 'r' : 'p');
+ if (trace_kprobe_is_return(tk) && tk->rp.maxactive)
+ seq_printf(m, "%d", tk->rp.maxactive);
seq_printf(m, ":%s/%s", trace_probe_group_name(&tk->tp),
trace_probe_name(&tk->tp));
The patch below does not apply to the 5.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 33238c50451596be86db1505ab65fee5172844d0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz(a)infradead.org>
Date: Wed, 18 Mar 2020 20:33:37 +0100
Subject: [PATCH] perf/core: Fix event cgroup tracking
Song reports that installing cgroup events is broken since:
db0503e4f675 ("perf/core: Optimize perf_install_in_event()")
The problem being that cgroup events try to track cpuctx->cgrp even
for disabled events, which is pointless and actively harmful since the
above commit. Rework the code to have explicit enable/disable hooks
for cgroup events, such that we can limit cgroup tracking to active
events.
More specifically, since the above commit disabled events are no
longer added to their context from the 'right' CPU, and we can't
access things like the current cgroup for a remote CPU.
Cc: <stable(a)vger.kernel.org> # v5.5+
Fixes: db0503e4f675 ("perf/core: Optimize perf_install_in_event()")
Reported-by: Song Liu <songliubraving(a)fb.com>
Tested-by: Song Liu <songliubraving(a)fb.com>
Reviewed-by: Song Liu <songliubraving(a)fb.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Link: https://lkml.kernel.org/r/20200318193337.GB20760@hirez.programming.kicks-as…
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 55e44417f66d..7afd0b503406 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -983,16 +983,10 @@ perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
event->shadow_ctx_time = now - t->timestamp;
}
-/*
- * Update cpuctx->cgrp so that it is set when first cgroup event is added and
- * cleared when last cgroup event is removed.
- */
static inline void
-list_update_cgroup_event(struct perf_event *event,
- struct perf_event_context *ctx, bool add)
+perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx)
{
struct perf_cpu_context *cpuctx;
- struct list_head *cpuctx_entry;
if (!is_cgroup_event(event))
return;
@@ -1009,28 +1003,41 @@ list_update_cgroup_event(struct perf_event *event,
* because if the first would mismatch, the second would not try again
* and we would leave cpuctx->cgrp unset.
*/
- if (add && !cpuctx->cgrp) {
+ if (ctx->is_active && !cpuctx->cgrp) {
struct perf_cgroup *cgrp = perf_cgroup_from_task(current, ctx);
if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
cpuctx->cgrp = cgrp;
}
- if (add && ctx->nr_cgroups++)
+ if (ctx->nr_cgroups++)
return;
- else if (!add && --ctx->nr_cgroups)
+
+ list_add(&cpuctx->cgrp_cpuctx_entry,
+ per_cpu_ptr(&cgrp_cpuctx_list, event->cpu));
+}
+
+static inline void
+perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *ctx)
+{
+ struct perf_cpu_context *cpuctx;
+
+ if (!is_cgroup_event(event))
return;
- /* no cgroup running */
- if (!add)
+ /*
+ * Because cgroup events are always per-cpu events,
+ * @ctx == &cpuctx->ctx.
+ */
+ cpuctx = container_of(ctx, struct perf_cpu_context, ctx);
+
+ if (--ctx->nr_cgroups)
+ return;
+
+ if (ctx->is_active && cpuctx->cgrp)
cpuctx->cgrp = NULL;
- cpuctx_entry = &cpuctx->cgrp_cpuctx_entry;
- if (add)
- list_add(cpuctx_entry,
- per_cpu_ptr(&cgrp_cpuctx_list, event->cpu));
- else
- list_del(cpuctx_entry);
+ list_del(&cpuctx->cgrp_cpuctx_entry);
}
#else /* !CONFIG_CGROUP_PERF */
@@ -1096,11 +1103,14 @@ static inline u64 perf_cgroup_event_time(struct perf_event *event)
}
static inline void
-list_update_cgroup_event(struct perf_event *event,
- struct perf_event_context *ctx, bool add)
+perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx)
{
}
+static inline void
+perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *ctx)
+{
+}
#endif
/*
@@ -1791,13 +1801,14 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
add_event_to_groups(event, ctx);
}
- list_update_cgroup_event(event, ctx, true);
-
list_add_rcu(&event->event_entry, &ctx->event_list);
ctx->nr_events++;
if (event->attr.inherit_stat)
ctx->nr_stat++;
+ if (event->state > PERF_EVENT_STATE_OFF)
+ perf_cgroup_event_enable(event, ctx);
+
ctx->generation++;
}
@@ -1976,8 +1987,6 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
event->attach_state &= ~PERF_ATTACH_CONTEXT;
- list_update_cgroup_event(event, ctx, false);
-
ctx->nr_events--;
if (event->attr.inherit_stat)
ctx->nr_stat--;
@@ -1994,8 +2003,10 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
* of error state is by explicit re-enabling
* of the event
*/
- if (event->state > PERF_EVENT_STATE_OFF)
+ if (event->state > PERF_EVENT_STATE_OFF) {
+ perf_cgroup_event_disable(event, ctx);
perf_event_set_state(event, PERF_EVENT_STATE_OFF);
+ }
ctx->generation++;
}
@@ -2226,6 +2237,7 @@ event_sched_out(struct perf_event *event,
if (READ_ONCE(event->pending_disable) >= 0) {
WRITE_ONCE(event->pending_disable, -1);
+ perf_cgroup_event_disable(event, ctx);
state = PERF_EVENT_STATE_OFF;
}
perf_event_set_state(event, state);
@@ -2363,6 +2375,7 @@ static void __perf_event_disable(struct perf_event *event,
event_sched_out(event, cpuctx, ctx);
perf_event_set_state(event, PERF_EVENT_STATE_OFF);
+ perf_cgroup_event_disable(event, ctx);
}
/*
@@ -2746,7 +2759,7 @@ static int __perf_install_in_context(void *info)
}
#ifdef CONFIG_CGROUP_PERF
- if (is_cgroup_event(event)) {
+ if (event->state > PERF_EVENT_STATE_OFF && is_cgroup_event(event)) {
/*
* If the current cgroup doesn't match the event's
* cgroup, we should not try to schedule it.
@@ -2906,6 +2919,7 @@ static void __perf_event_enable(struct perf_event *event,
ctx_sched_out(ctx, cpuctx, EVENT_TIME);
perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE);
+ perf_cgroup_event_enable(event, ctx);
if (!ctx->is_active)
return;
@@ -3616,8 +3630,10 @@ static int merge_sched_in(struct perf_event *event, void *data)
}
if (event->state == PERF_EVENT_STATE_INACTIVE) {
- if (event->attr.pinned)
+ if (event->attr.pinned) {
+ perf_cgroup_event_disable(event, ctx);
perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
+ }
*can_add_hw = 0;
ctx->rotate_necessary = 1;
The patch below does not apply to the 5.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 0b72a251bf92ca2378530fa1f9b35a71830ab51c Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Tue, 31 Mar 2020 16:23:48 +0100
Subject: [PATCH] drm/i915/gt: Fill all the unused space in the GGTT
When we allocate space in the GGTT we may have to allocate a larger
region than will be populated by the object to accommodate fencing. Make
sure that this space beyond the end of the buffer points safely into
scratch space, in case the HW tries to access it anyway (e.g. fenced
access to the last tile row).
v2: Preemptively / conservatively guard gen6 ggtt as well.
Reported-by: Imre Deak <imre.deak(a)intel.com>
References: https://gitlab.freedesktop.org/drm/intel/-/issues/1554
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld(a)intel.com>
Cc: Imre Deak <imre.deak(a)intel.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Matthew Auld <matthew.auld(a)intel.com>
Reviewed-by: Imre Deak <imre.deak(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200331152348.26946-1-chris@…
(cherry picked from commit 4d6c18590870fbac1e65dde5e01e621c8e0ca096)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index aed498a0d032..4c5a209cb669 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -191,10 +191,11 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
enum i915_cache_level level,
u32 flags)
{
- struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
- struct sgt_iter sgt_iter;
- gen8_pte_t __iomem *gtt_entries;
const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, 0);
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ gen8_pte_t __iomem *gte;
+ gen8_pte_t __iomem *end;
+ struct sgt_iter iter;
dma_addr_t addr;
/*
@@ -202,10 +203,17 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
* not to allow the user to override access to a read only page.
*/
- gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
- gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE;
- for_each_sgt_daddr(addr, sgt_iter, vma->pages)
- gen8_set_pte(gtt_entries++, pte_encode | addr);
+ gte = (gen8_pte_t __iomem *)ggtt->gsm;
+ gte += vma->node.start / I915_GTT_PAGE_SIZE;
+ end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
+
+ for_each_sgt_daddr(addr, iter, vma->pages)
+ gen8_set_pte(gte++, pte_encode | addr);
+ GEM_BUG_ON(gte > end);
+
+ /* Fill the allocated but "unused" space beyond the end of the buffer */
+ while (gte < end)
+ gen8_set_pte(gte++, vm->scratch[0].encode);
/*
* We want to flush the TLBs only after we're certain all the PTE
@@ -241,13 +249,22 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
u32 flags)
{
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
- gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm;
- unsigned int i = vma->node.start / I915_GTT_PAGE_SIZE;
+ gen6_pte_t __iomem *gte;
+ gen6_pte_t __iomem *end;
struct sgt_iter iter;
dma_addr_t addr;
+ gte = (gen6_pte_t __iomem *)ggtt->gsm;
+ gte += vma->node.start / I915_GTT_PAGE_SIZE;
+ end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
+
for_each_sgt_daddr(addr, iter, vma->pages)
- iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]);
+ iowrite32(vm->pte_encode(addr, level, flags), gte++);
+ GEM_BUG_ON(gte > end);
+
+ /* Fill the allocated but "unused" space beyond the end of the buffer */
+ while (gte < end)
+ iowrite32(vm->scratch[0].encode, gte++);
/*
* We want to flush the TLBs only after we're certain all the PTE
The patch below does not apply to the 5.5-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 6e8a36c13382b7165d23928caee8d91c1b301142 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak(a)intel.com>
Date: Mon, 30 Mar 2020 18:22:44 +0300
Subject: [PATCH] drm/i915/icl+: Don't enable DDI IO power on a TypeC port in
TBT mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The DDI IO power well must not be enabled for a TypeC port in TBT mode,
ensure this during driver loading/system resume.
This gets rid of error messages like
[drm] *ERROR* power well DDI E TC2 IO state mismatch (refcount 1/enabled 0)
and avoids leaking the power ref when disabling the output.
Cc: <stable(a)vger.kernel.org> # v5.4+
Signed-off-by: Imre Deak <imre.deak(a)intel.com>
Reviewed-by: José Roberto de Souza <jose.souza(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200330152244.11316-1-imre.d…
(cherry picked from commit f77a2db27f26c3ccba0681f7e89fef083718f07f)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 73d0f4648c06..5202fdec8e0a 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -1869,7 +1869,11 @@ static void intel_ddi_get_power_domains(struct intel_encoder *encoder,
return;
dig_port = enc_to_dig_port(encoder);
- intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain);
+
+ if (!intel_phy_is_tc(dev_priv, phy) ||
+ dig_port->tc_mode != TC_PORT_TBT_ALT)
+ intel_display_power_get(dev_priv,
+ dig_port->ddi_io_power_domain);
/*
* AUX power is only needed for (e)DP mode, and for HDMI mode on TC
The patch below does not apply to the 5.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 487eca11a321ef33bcf4ca5adb3c0c4954db1b58 Mon Sep 17 00:00:00 2001
From: Prike Liang <Prike.Liang(a)amd.com>
Date: Tue, 7 Apr 2020 20:21:26 +0800
Subject: [PATCH] drm/amdgpu: fix gfx hang during suspend with video playback
(v2)
The system will be hang up during S3 suspend because of SMU is pending
for GC not respose the register CP_HQD_ACTIVE access request.This issue
root cause of accessing the GC register under enter GFX CGGPG and can
be fixed by disable GFX CGPG before perform suspend.
v2: Use disable the GFX CGPG instead of RLC safe mode guard.
Signed-off-by: Prike Liang <Prike.Liang(a)amd.com>
Tested-by: Mengbing Wang <Mengbing.Wang(a)amd.com>
Reviewed-by: Huang Rui <ray.huang(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index faa3e7102156..559dc24ef436 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2340,8 +2340,6 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
{
int i, r;
- amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
- amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
if (!adev->ip_blocks[i].status.valid)
@@ -3356,6 +3354,9 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
}
}
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
+
amdgpu_amdkfd_suspend(adev, !fbcon);
amdgpu_ras_suspend(adev);
The patch below does not apply to the 5.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 8732fe46b20c951493bfc4dba0ad08efdf41de81 Mon Sep 17 00:00:00 2001
From: Lyude Paul <lyude(a)redhat.com>
Date: Wed, 22 Jan 2020 14:43:20 -0500
Subject: [PATCH] drm/dp_mst: Fix clearing payload state on topology disable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The issues caused by:
commit 64e62bdf04ab ("drm/dp_mst: Remove VCPI while disabling topology
mgr")
Prompted me to take a closer look at how we clear the payload state in
general when disabling the topology, and it turns out there's actually
two subtle issues here.
The first is that we're not grabbing &mgr.payload_lock when clearing the
payloads in drm_dp_mst_topology_mgr_set_mst(). Seeing as the canonical
lock order is &mgr.payload_lock -> &mgr.lock (because we always want
&mgr.lock to be the inner-most lock so topology validation always
works), this makes perfect sense. It also means that -technically- there
could be racing between someone calling
drm_dp_mst_topology_mgr_set_mst() to disable the topology, along with a
modeset occurring that's modifying the payload state at the same time.
The second is the more obvious issue that Wayne Lin discovered, that
we're not clearing proposed_payloads when disabling the topology.
I actually can't see any obvious places where the racing caused by the
first issue would break something, and it could be that some of our
higher-level locks already prevent this by happenstance, but better safe
then sorry. So, let's make it so that drm_dp_mst_topology_mgr_set_mst()
first grabs &mgr.payload_lock followed by &mgr.lock so that we never
race when modifying the payload state. Then, we also clear
proposed_payloads to fix the original issue of enabling a new topology
with a dirty payload state. This doesn't clear any of the drm_dp_vcpi
structures, but those are getting destroyed along with the ports anyway.
Changes since v1:
* Use sizeof(mgr->payloads[0])/sizeof(mgr->proposed_vcpis[0]) instead -
vsyrjala
Cc: Sean Paul <sean(a)poorly.run>
Cc: Wayne Lin <Wayne.Lin(a)amd.com>
Cc: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Cc: stable(a)vger.kernel.org # v4.4+
Signed-off-by: Lyude Paul <lyude(a)redhat.com>
Reviewed-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200122194321.14953-1-lyude@…
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index e78c73e975ed..4104f15f4594 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -3447,6 +3447,7 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms
int ret = 0;
struct drm_dp_mst_branch *mstb = NULL;
+ mutex_lock(&mgr->payload_lock);
mutex_lock(&mgr->lock);
if (mst_state == mgr->mst_state)
goto out_unlock;
@@ -3505,7 +3506,10 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms
/* this can fail if the device is gone */
drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL, 0);
ret = 0;
- memset(mgr->payloads, 0, mgr->max_payloads * sizeof(struct drm_dp_payload));
+ memset(mgr->payloads, 0,
+ mgr->max_payloads * sizeof(mgr->payloads[0]));
+ memset(mgr->proposed_vcpis, 0,
+ mgr->max_payloads * sizeof(mgr->proposed_vcpis[0]));
mgr->payload_mask = 0;
set_bit(0, &mgr->payload_mask);
mgr->vcpi_mask = 0;
@@ -3514,6 +3518,7 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms
out_unlock:
mutex_unlock(&mgr->lock);
+ mutex_unlock(&mgr->payload_lock);
if (mstb)
drm_dp_mst_topology_put_mstb(mstb);
return ret;
The patch below does not apply to the 5.5-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 3e138a63d6674a4567a018a31e467567c40b14d5 Mon Sep 17 00:00:00 2001
From: Torsten Duwe <duwe(a)lst.de>
Date: Tue, 18 Feb 2020 16:57:44 +0100
Subject: [PATCH] drm/bridge: analogix-anx78xx: Fix drm_dp_link helper removal
drm_dp_link_rate_to_bw_code and ...bw_code_to_link_rate simply divide by
and multiply with 27000, respectively. Avoid an overflow in the u8 dpcd[0]
and the multiply+divide alltogether.
Signed-off-by: Torsten Duwe <duwe(a)lst.de>
Fixes: ff1e8fb68ea0 ("drm/bridge: analogix-anx78xx: Avoid drm_dp_link helpers")
Cc: Thierry Reding <treding(a)nvidia.com>
Cc: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Cc: Andrzej Hajda <a.hajda(a)samsung.com>
Cc: Neil Armstrong <narmstrong(a)baylibre.com>
Cc: Laurent Pinchart <Laurent.pinchart(a)ideasonboard.com>
Cc: Jonas Karlman <jonas(a)kwiboo.se>
Cc: Jernej Skrabec <jernej.skrabec(a)siol.net>
Cc: <stable(a)vger.kernel.org> # v5.5+
Reviewed-by: Thierry Reding <treding(a)nvidia.com>
Signed-off-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20200218155744.9675368BE1@ver…
diff --git a/drivers/gpu/drm/bridge/analogix/analogix-anx78xx.c b/drivers/gpu/drm/bridge/analogix/analogix-anx78xx.c
index 41867be03751..864423f59d66 100644
--- a/drivers/gpu/drm/bridge/analogix/analogix-anx78xx.c
+++ b/drivers/gpu/drm/bridge/analogix/analogix-anx78xx.c
@@ -722,10 +722,9 @@ static int anx78xx_dp_link_training(struct anx78xx *anx78xx)
if (err)
return err;
- dpcd[0] = drm_dp_max_link_rate(anx78xx->dpcd);
- dpcd[0] = drm_dp_link_rate_to_bw_code(dpcd[0]);
err = regmap_write(anx78xx->map[I2C_IDX_TX_P0],
- SP_DP_MAIN_LINK_BW_SET_REG, dpcd[0]);
+ SP_DP_MAIN_LINK_BW_SET_REG,
+ anx78xx->dpcd[DP_MAX_LINK_RATE]);
if (err)
return err;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 835214f5d5f516a38069bc077c879c7da00d6108 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021(a)gmail.com>
Date: Mon, 27 Jan 2020 16:23:03 -0800
Subject: [PATCH] scsi: lpfc: Fix broken Credit Recovery after driver load
When driver is set to enable bb credit recovery, the switch displayed the
setting as inactive. If the link bounces, it switches to Active.
During link up processing, the driver currently does a MBX_READ_SPARAM
followed by a MBX_CONFIG_LINK. These mbox commands are queued to be
executed, one at a time and the completion is processed by the worker
thread. Since the MBX_READ_SPARAM is done BEFORE the MBX_CONFIG_LINK, the
BB_SC_N bit is never set the the returned values. BB Credit recovery status
only gets set after the driver requests the feature in CONFIG_LINK, which
is done after the link up. Thus the ordering of READ_SPARAM needs to follow
the CONFIG_LINK.
Fix by reordering so that READ_SPARAM is done after CONFIG_LINK. Added a
HBA_DEFER_FLOGI flag so that any FLOGI handling waits until after the
READ_SPARAM is done so that the proper BB credit value is set in the FLOGI
payload.
Fixes: 6bfb16208298 ("scsi: lpfc: Fix configuration of BB credit recovery in service parameters")
Cc: <stable(a)vger.kernel.org> # v5.4+
Link: https://lore.kernel.org/r/20200128002312.16346-4-jsmart2021@gmail.com
Signed-off-by: Dick Kennedy <dick.kennedy(a)broadcom.com>
Signed-off-by: James Smart <jsmart2021(a)gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 04d73e2be373..3f2cb17c4574 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -749,6 +749,7 @@ struct lpfc_hba {
* capability
*/
#define HBA_FLOGI_ISSUED 0x100000 /* FLOGI was issued */
+#define HBA_DEFER_FLOGI 0x800000 /* Defer FLOGI till read_sparm cmpl */
uint32_t fcp_ring_in_use; /* When polling test if intr-hndlr active*/
struct lpfc_dmabuf slim2p;
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index dcc8999c6a68..6a2bdae0e52a 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -1163,13 +1163,16 @@ lpfc_mbx_cmpl_local_config_link(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
}
/* Start discovery by sending a FLOGI. port_state is identically
- * LPFC_FLOGI while waiting for FLOGI cmpl
+ * LPFC_FLOGI while waiting for FLOGI cmpl. Check if sending
+ * the FLOGI is being deferred till after MBX_READ_SPARAM completes.
*/
- if (vport->port_state != LPFC_FLOGI)
- lpfc_initial_flogi(vport);
- else if (vport->fc_flag & FC_PT2PT)
- lpfc_disc_start(vport);
-
+ if (vport->port_state != LPFC_FLOGI) {
+ if (!(phba->hba_flag & HBA_DEFER_FLOGI))
+ lpfc_initial_flogi(vport);
+ } else {
+ if (vport->fc_flag & FC_PT2PT)
+ lpfc_disc_start(vport);
+ }
return;
out:
@@ -3094,6 +3097,14 @@ lpfc_mbx_cmpl_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
lpfc_mbuf_free(phba, mp->virt, mp->phys);
kfree(mp);
mempool_free(pmb, phba->mbox_mem_pool);
+
+ /* Check if sending the FLOGI is being deferred to after we get
+ * up to date CSPs from MBX_READ_SPARAM.
+ */
+ if (phba->hba_flag & HBA_DEFER_FLOGI) {
+ lpfc_initial_flogi(vport);
+ phba->hba_flag &= ~HBA_DEFER_FLOGI;
+ }
return;
out:
@@ -3224,6 +3235,23 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, struct lpfc_mbx_read_top *la)
}
lpfc_linkup(phba);
+ sparam_mbox = NULL;
+
+ if (!(phba->hba_flag & HBA_FCOE_MODE)) {
+ cfglink_mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+ if (!cfglink_mbox)
+ goto out;
+ vport->port_state = LPFC_LOCAL_CFG_LINK;
+ lpfc_config_link(phba, cfglink_mbox);
+ cfglink_mbox->vport = vport;
+ cfglink_mbox->mbox_cmpl = lpfc_mbx_cmpl_local_config_link;
+ rc = lpfc_sli_issue_mbox(phba, cfglink_mbox, MBX_NOWAIT);
+ if (rc == MBX_NOT_FINISHED) {
+ mempool_free(cfglink_mbox, phba->mbox_mem_pool);
+ goto out;
+ }
+ }
+
sparam_mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
if (!sparam_mbox)
goto out;
@@ -3244,20 +3272,7 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, struct lpfc_mbx_read_top *la)
goto out;
}
- if (!(phba->hba_flag & HBA_FCOE_MODE)) {
- cfglink_mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
- if (!cfglink_mbox)
- goto out;
- vport->port_state = LPFC_LOCAL_CFG_LINK;
- lpfc_config_link(phba, cfglink_mbox);
- cfglink_mbox->vport = vport;
- cfglink_mbox->mbox_cmpl = lpfc_mbx_cmpl_local_config_link;
- rc = lpfc_sli_issue_mbox(phba, cfglink_mbox, MBX_NOWAIT);
- if (rc == MBX_NOT_FINISHED) {
- mempool_free(cfglink_mbox, phba->mbox_mem_pool);
- goto out;
- }
- } else {
+ if (phba->hba_flag & HBA_FCOE_MODE) {
vport->port_state = LPFC_VPORT_UNKNOWN;
/*
* Add the driver's default FCF record at FCF index 0 now. This
@@ -3314,6 +3329,10 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, struct lpfc_mbx_read_top *la)
}
/* Reset FCF roundrobin bmask for new discovery */
lpfc_sli4_clear_fcf_rr_bmask(phba);
+ } else {
+ if (phba->bbcredit_support && phba->cfg_enable_bbcr &&
+ !(phba->link_flag & LS_LOOPBACK_MODE))
+ phba->hba_flag |= HBA_DEFER_FLOGI;
}
/* Prepare for LINK up registrations */