When introducing support for irqchip in userspace we needed a way to
mask the timer signal to prevent the guest continuously exiting due to a
screaming timer.
We did this by disabling the corresponding percpu interrupt on the
host interrupt controller, because we cannot rely on the host system
having a GIC, and therefore cannot make any assumptions about having an
active state to hide the timer signal.
Unfortunately, when introducing this feature, it became entirely
possible that a VCPU which belongs to a VM that has a userspace irqchip
can disable the vtimer irq on the host on some physical CPU, and then go
away without ever enabling the vtimer irq on that physical CPU again.
This means that using irqchips in userspace on a system that also
supports running VMs with an in-kernel GIC can prevent forward progress
from in-kernel GIC VMs.
Later on, when we started taking virtual timer interrupts in the arch
timer code, we would also leave this timer state active for userspace
irqchip VMs, because we leave it up to a VGIC-enabled guest to
deactivate the hardware IRQ using the HW bit in the LR.
Both issues are solved by only using the enable/disable trick on systems
that do not have a host GIC which supports the active state, because all
VMs on such systems must use irqchips in userspace. Systems that have a
working GIC with support for an active state use the active state to
mask the timer signal for both userspace and in-kernel irqchips.
Cc: Alexander Graf <agraf(a)suse.de>
Cc: <stable(a)vger.kernel.org> # v4.12+
Fixes: d9e139778376 ("KVM: arm/arm64: Support arch timers with a userspace gic")
Signed-off-by: Christoffer Dall <christoffer.dall(a)linaro.org>
---
This conflicts horribly with everything when applied to either
kvmarm/queue or kvmarm/master. Therefore, this patch is written for
(and applies to) v4.15 with kvmarm/queue merged and should therefore
apply cleanly after v4.16-rc1. An example with this patch applied can
be found on kvmarm/temp-for-v4.16-rc2. I plan on sending this along
with any other potential fixes post v4.16-rc1.
Changes since v1:
- Added userspace_irqchip() wrapper to simplify logic
- Changed has_gic_active_state to a static key
- Fixed typos in commentary and commit message
- Clear the active state on sync for userspace irqchips on systems
with a working GIC.
- Get rid of __timer_snapshot_state() in unmaks_vtimer_irq_user()
because kvm_timer_should_fire() has already been reworked in other
patches to look at the timer state when it's loaded onto the
hardware. As a result, we don't need the
__timer_snapshot_state() indirection anymore and this logic is
now inlined in vtimer_save_state().
virt/kvm/arm/arch_timer.c | 116 +++++++++++++++++++++++++---------------------
1 file changed, 64 insertions(+), 52 deletions(-)
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 70268c0bec79..70f4c30918eb 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -36,6 +36,8 @@ static struct timecounter *timecounter;
static unsigned int host_vtimer_irq;
static u32 host_vtimer_irq_flags;
+static DEFINE_STATIC_KEY_FALSE(has_gic_active_state);
+
static const struct kvm_irq_level default_ptimer_irq = {
.irq = 30,
.level = 1,
@@ -56,6 +58,12 @@ u64 kvm_phys_timer_read(void)
return timecounter->cc->read(timecounter->cc);
}
+static inline bool userspace_irqchip(struct kvm *kvm)
+{
+ return static_branch_unlikely(&userspace_irqchip_in_use) &&
+ unlikely(!irqchip_in_kernel(kvm));
+}
+
static void soft_timer_start(struct hrtimer *hrt, u64 ns)
{
hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns),
@@ -69,25 +77,6 @@ static void soft_timer_cancel(struct hrtimer *hrt, struct work_struct *work)
cancel_work_sync(work);
}
-static void kvm_vtimer_update_mask_user(struct kvm_vcpu *vcpu)
-{
- struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-
- /*
- * When using a userspace irqchip with the architected timers, we must
- * prevent continuously exiting from the guest, and therefore mask the
- * physical interrupt by disabling it on the host interrupt controller
- * when the virtual level is high, such that the guest can make
- * forward progress. Once we detect the output level being
- * de-asserted, we unmask the interrupt again so that we exit from the
- * guest when the timer fires.
- */
- if (vtimer->irq.level)
- disable_percpu_irq(host_vtimer_irq);
- else
- enable_percpu_irq(host_vtimer_irq, 0);
-}
-
static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
{
struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
@@ -106,9 +95,9 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
if (kvm_timer_should_fire(vtimer))
kvm_timer_update_irq(vcpu, true, vtimer);
- if (static_branch_unlikely(&userspace_irqchip_in_use) &&
- unlikely(!irqchip_in_kernel(vcpu->kvm)))
- kvm_vtimer_update_mask_user(vcpu);
+ if (userspace_irqchip(vcpu->kvm) &&
+ !static_branch_unlikely(&has_gic_active_state))
+ disable_percpu_irq(host_vtimer_irq);
return IRQ_HANDLED;
}
@@ -290,8 +279,7 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq,
timer_ctx->irq.level);
- if (!static_branch_unlikely(&userspace_irqchip_in_use) ||
- likely(irqchip_in_kernel(vcpu->kvm))) {
+ if (!userspace_irqchip(vcpu->kvm)) {
ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
timer_ctx->irq.irq,
timer_ctx->irq.level,
@@ -350,12 +338,6 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
phys_timer_emulate(vcpu);
}
-static void __timer_snapshot_state(struct arch_timer_context *timer)
-{
- timer->cnt_ctl = read_sysreg_el0(cntv_ctl);
- timer->cnt_cval = read_sysreg_el0(cntv_cval);
-}
-
static void vtimer_save_state(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
@@ -367,8 +349,10 @@ static void vtimer_save_state(struct kvm_vcpu *vcpu)
if (!vtimer->loaded)
goto out;
- if (timer->enabled)
- __timer_snapshot_state(vtimer);
+ if (timer->enabled) {
+ vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
+ vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
+ }
/* Disable the virtual timer */
write_sysreg_el0(0, cntv_ctl);
@@ -460,23 +444,43 @@ static void set_cntvoff(u64 cntvoff)
kvm_call_hyp(__kvm_timer_set_cntvoff, low, high);
}
-static void kvm_timer_vcpu_load_vgic(struct kvm_vcpu *vcpu)
+static inline void set_vtimer_irq_phys_active(struct kvm_vcpu *vcpu, bool active)
+{
+ int r;
+ r = irq_set_irqchip_state(host_vtimer_irq, IRQCHIP_STATE_ACTIVE, active);
+ WARN_ON(r);
+}
+
+static void kvm_timer_vcpu_load_gic(struct kvm_vcpu *vcpu)
{
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
bool phys_active;
- int ret;
- phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
-
- ret = irq_set_irqchip_state(host_vtimer_irq,
- IRQCHIP_STATE_ACTIVE,
- phys_active);
- WARN_ON(ret);
+ if (irqchip_in_kernel(vcpu->kvm))
+ phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
+ else
+ phys_active = vtimer->irq.level;
+ set_vtimer_irq_phys_active(vcpu, phys_active);
}
-static void kvm_timer_vcpu_load_user(struct kvm_vcpu *vcpu)
+static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
{
- kvm_vtimer_update_mask_user(vcpu);
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+
+ /*
+ * When using a userspace irqchip with the architected timers and a
+ * host interrupt controller that doesn't support an active state, we
+ * must still prevent continuously exiting from the guest, and
+ * therefore mask the physical interrupt by disabling it on the host
+ * interrupt controller when the virtual level is high, such that the
+ * guest can make forward progress. Once we detect the output level
+ * being de-asserted, we unmask the interrupt again so that we exit
+ * from the guest when the timer fires.
+ */
+ if (vtimer->irq.level)
+ disable_percpu_irq(host_vtimer_irq);
+ else
+ enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
}
void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
@@ -487,10 +491,10 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
if (unlikely(!timer->enabled))
return;
- if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
- kvm_timer_vcpu_load_user(vcpu);
+ if (static_branch_likely(&has_gic_active_state))
+ kvm_timer_vcpu_load_gic(vcpu);
else
- kvm_timer_vcpu_load_vgic(vcpu);
+ kvm_timer_vcpu_load_nogic(vcpu);
set_cntvoff(vtimer->cntvoff);
@@ -555,18 +559,24 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
{
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
- if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
- __timer_snapshot_state(vtimer);
- if (!kvm_timer_should_fire(vtimer)) {
- kvm_timer_update_irq(vcpu, false, vtimer);
- kvm_vtimer_update_mask_user(vcpu);
- }
+ if (!kvm_timer_should_fire(vtimer)) {
+ kvm_timer_update_irq(vcpu, false, vtimer);
+ if (static_branch_likely(&has_gic_active_state))
+ set_vtimer_irq_phys_active(vcpu, false);
+ else
+ enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
}
}
void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
{
- unmask_vtimer_irq_user(vcpu);
+ struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+ if (unlikely(!timer->enabled))
+ return;
+
+ if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
+ unmask_vtimer_irq_user(vcpu);
}
int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
@@ -753,6 +763,8 @@ int kvm_timer_hyp_init(bool has_gic)
kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
goto out_free_irq;
}
+
+ static_branch_enable(&has_gic_active_state);
}
kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
--
2.14.2
This is a note to let you know that I've just added the patch titled
um: Stop abusing __KERNEL__
to the 3.18-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
um-stop-abusing-__kernel__.patch
and it can be found in the queue-3.18 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 298e20ba8c197e8d429a6c8671550c41c7919033 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard(a)nod.at>
Date: Sun, 31 May 2015 19:50:57 +0200
Subject: um: Stop abusing __KERNEL__
From: Richard Weinberger <richard(a)nod.at>
commit 298e20ba8c197e8d429a6c8671550c41c7919033 upstream.
Currently UML is abusing __KERNEL__ to distinguish between
kernel and host code (os-Linux). It is better to use a custom
define such that existing users of __KERNEL__ don't get confused.
Signed-off-by: Richard Weinberger <richard(a)nod.at>
Cc: Greg Hackmann <ghackmann(a)google.com>
Cc: Bernie Innocenti <codewiz(a)google.com>
Cc: Lorenzo Colitti <lorenzo(a)google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/um/Makefile | 7 ++++---
arch/um/drivers/mconsole.h | 2 +-
arch/um/include/shared/init.h | 4 ++--
arch/um/include/shared/user.h | 2 +-
arch/x86/um/shared/sysdep/tls.h | 6 +++---
5 files changed, 11 insertions(+), 10 deletions(-)
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -68,9 +68,10 @@ KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -
KBUILD_AFLAGS += $(ARCH_INCLUDE)
-USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -D__KERNEL__,,\
- $(patsubst -I%,,$(KBUILD_CFLAGS)))) $(ARCH_INCLUDE) $(MODE_INCLUDE) \
- $(filter -I%,$(CFLAGS)) -D_FILE_OFFSET_BITS=64 -idirafter include
+USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -I%,,$(KBUILD_CFLAGS))) \
+ $(ARCH_INCLUDE) $(MODE_INCLUDE) $(filter -I%,$(CFLAGS)) \
+ -D_FILE_OFFSET_BITS=64 -idirafter include \
+ -D__KERNEL__ -D__UM_HOST__
#This will adjust *FLAGS accordingly to the platform.
include $(srctree)/$(ARCH_DIR)/Makefile-os-$(OS)
--- a/arch/um/drivers/mconsole.h
+++ b/arch/um/drivers/mconsole.h
@@ -7,7 +7,7 @@
#ifndef __MCONSOLE_H__
#define __MCONSOLE_H__
-#ifndef __KERNEL__
+#ifdef __UM_HOST__
#include <stdint.h>
#define u32 uint32_t
#endif
--- a/arch/um/include/shared/init.h
+++ b/arch/um/include/shared/init.h
@@ -40,7 +40,7 @@
typedef int (*initcall_t)(void);
typedef void (*exitcall_t)(void);
-#ifndef __KERNEL__
+#ifdef __UM_HOST__
#ifndef __section
# define __section(S) __attribute__ ((__section__(#S)))
#endif
@@ -131,7 +131,7 @@ extern struct uml_param __uml_setup_star
#define __uml_postsetup_call __used __section(.uml.postsetup.init)
#define __uml_exit_call __used __section(.uml.exitcall.exit)
-#ifndef __KERNEL__
+#ifdef __UM_HOST__
#define __define_initcall(level,fn) \
static initcall_t __initcall_##fn __used \
--- a/arch/um/include/shared/user.h
+++ b/arch/um/include/shared/user.h
@@ -17,7 +17,7 @@
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
/* This is to get size_t */
-#ifdef __KERNEL__
+#ifndef __UM_HOST__
#include <linux/types.h>
#else
#include <stddef.h>
--- a/arch/x86/um/shared/sysdep/tls.h
+++ b/arch/x86/um/shared/sysdep/tls.h
@@ -1,7 +1,7 @@
#ifndef _SYSDEP_TLS_H
#define _SYSDEP_TLS_H
-# ifndef __KERNEL__
+#ifdef __UM_HOST__
/* Change name to avoid conflicts with the original one from <asm/ldt.h>, which
* may be named user_desc (but in 2.4 and in header matching its API was named
@@ -22,11 +22,11 @@ typedef struct um_dup_user_desc {
#endif
} user_desc_t;
-# else /* __KERNEL__ */
+#else /* __UM_HOST__ */
typedef struct user_desc user_desc_t;
-# endif /* __KERNEL__ */
+#endif /* __UM_HOST__ */
extern int os_set_thread_area(user_desc_t *info, int pid);
extern int os_get_thread_area(user_desc_t *info, int pid);
Patches currently in stable-queue which might be from richard(a)nod.at are
queue-3.18/um-stop-abusing-__kernel__.patch
queue-3.18/um-link-vmlinux-with-no-pie.patch
queue-3.18/um-remove-copy-paste-code-from-init.h.patch
This is a note to let you know that I've just added the patch titled
um: Remove copy&paste code from init.h
to the 3.18-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
um-remove-copy-paste-code-from-init.h.patch
and it can be found in the queue-3.18 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 30b11ee9ae23d78de66b9ae315880af17a64ba83 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard(a)nod.at>
Date: Sun, 31 May 2015 22:15:58 +0200
Subject: um: Remove copy&paste code from init.h
From: Richard Weinberger <richard(a)nod.at>
commit 30b11ee9ae23d78de66b9ae315880af17a64ba83 upstream.
As we got rid of the __KERNEL__ abuse, we can directly
include linux/compiler.h now.
This also allows gcc 5 to build UML.
Reported-by: Hans-Werner Hilse <hwhilse(a)gmail.com>
Signed-off-by: Richard Weinberger <richard(a)nod.at>
Cc: Greg Hackmann <ghackmann(a)google.com>
Cc: Bernie Innocenti <codewiz(a)google.com>
Cc: Lorenzo Colitti <lorenzo(a)google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/um/include/shared/init.h | 22 +---------------------
1 file changed, 1 insertion(+), 21 deletions(-)
--- a/arch/um/include/shared/init.h
+++ b/arch/um/include/shared/init.h
@@ -40,28 +40,8 @@
typedef int (*initcall_t)(void);
typedef void (*exitcall_t)(void);
-#ifdef __UM_HOST__
-#ifndef __section
-# define __section(S) __attribute__ ((__section__(#S)))
-#endif
-
-#if __GNUC__ == 3
-
-#if __GNUC_MINOR__ >= 3
-# define __used __attribute__((__used__))
-#else
-# define __used __attribute__((__unused__))
-#endif
-
-#else
-#if __GNUC__ == 4
-# define __used __attribute__((__used__))
-#endif
-#endif
-
-#else
#include <linux/compiler.h>
-#endif
+
/* These are for everybody (although not all archs will actually
discard it in modules) */
#define __init __section(.init.text)
Patches currently in stable-queue which might be from richard(a)nod.at are
queue-3.18/um-stop-abusing-__kernel__.patch
queue-3.18/um-link-vmlinux-with-no-pie.patch
queue-3.18/um-remove-copy-paste-code-from-init.h.patch
This is a note to let you know that I've just added the patch titled
um: link vmlinux with -no-pie
to the 3.18-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
um-link-vmlinux-with-no-pie.patch
and it can be found in the queue-3.18 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 883354afbc109c57f925ccc19840055193da0cc0 Mon Sep 17 00:00:00 2001
From: Thomas Meyer <thomas(a)m3y3r.de>
Date: Sun, 20 Aug 2017 13:26:04 +0200
Subject: um: link vmlinux with -no-pie
From: Thomas Meyer <thomas(a)m3y3r.de>
commit 883354afbc109c57f925ccc19840055193da0cc0 upstream.
Debian's gcc defaults to pie. The global Makefile already defines the -fno-pie option.
Link UML dynamic kernel image also with -no-pie to fix the build.
Signed-off-by: Thomas Meyer <thomas(a)m3y3r.de>
Signed-off-by: Richard Weinberger <richard(a)nod.at>
Cc: Bernie Innocenti <codewiz(a)google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/um/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -116,7 +116,7 @@ archheaders:
archprepare: include/generated/user_constants.h
LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static
-LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib
+LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib $(call cc-option, -no-pie)
CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \
$(call cc-option, -fno-stack-protector,) \
Patches currently in stable-queue which might be from thomas(a)m3y3r.de are
queue-3.18/um-link-vmlinux-with-no-pie.patch
This is a note to let you know that I've just added the patch titled
Input: do not emit unneeded EV_SYN when suspending
to the 3.18-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
input-do-not-emit-unneeded-ev_syn-when-suspending.patch
and it can be found in the queue-3.18 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 00159f19a5057cb779146afce1cceede692af346 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov(a)gmail.com>
Date: Thu, 6 Aug 2015 19:15:30 -0700
Subject: Input: do not emit unneeded EV_SYN when suspending
From: Dmitry Torokhov <dmitry.torokhov(a)gmail.com>
commit 00159f19a5057cb779146afce1cceede692af346 upstream.
Do not emit EV_SYN/SYN_REPORT on suspend if there were no keys that are
still pressed as we are suspending the device (and in all other cases when
input core is forcibly releasing keys via input_dev_release_keys() call).
Reviewed-by: Benson Leung <bleung(a)chromium.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov(a)gmail.com>
Signed-off-by: Bo Hu <bohu(a)google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 78d24990a816..5391abd28b27 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -674,13 +674,19 @@ EXPORT_SYMBOL(input_close_device);
*/
static void input_dev_release_keys(struct input_dev *dev)
{
+ bool need_sync = false;
int code;
if (is_event_supported(EV_KEY, dev->evbit, EV_MAX)) {
- for_each_set_bit(code, dev->key, KEY_CNT)
+ for_each_set_bit(code, dev->key, KEY_CNT) {
input_pass_event(dev, EV_KEY, code, 0);
+ need_sync = true;
+ }
+
+ if (need_sync)
+ input_pass_event(dev, EV_SYN, SYN_REPORT, 1);
+
memset(dev->key, 0, sizeof(dev->key));
- input_pass_event(dev, EV_SYN, SYN_REPORT, 1);
}
}
Patches currently in stable-queue which might be from dmitry.torokhov(a)gmail.com are
queue-3.18/input-do-not-emit-unneeded-ev_syn-when-suspending.patch
Hi Marcin,
Since it's been a week, could you confirm the patch is ok as-is or do
you think some comment(s) from James should be incorporated ?
On Tue, Jan 23, 2018 at 3:17 PM, James Hogan <jhogan(a)kernel.org> wrote:
> On Thu, Dec 21, 2017 at 10:00:59PM +0100, Mathieu Malaterre wrote:
>> From: Marcin Nowakowski <marcin.nowakowski(a)mips.com>
>>
>> Change 73fbc1eba7ff added a fix to ensure that the memory range between
>
> Please refer to commits with e.g. commit 73fbc1eba7ff ("MIPS: fix
> mem=X@Y commandline processing").
>
>> PHYS_OFFSET and low memory address specified by mem= cmdline argument is
>> not later processed by free_all_bootmem.
>> This change was incorrect for systems where the commandline specifies
>> more than 1 mem argument, as it will cause all memory between
>> PHYS_OFFSET and each of the memory offsets to be marked as reserved,
>> which results in parts of the RAM marked as reserved (Creator CI20's
>> u-boot has a default commandline argument 'mem=256M@0x0
>> mem=768M@0x30000000').
>>
>> Change the behaviour to ensure that only the range between PHYS_OFFSET
>> and the lowest start address of the memories is marked as protected.
>>
>> This change also ensures that the range is marked protected even if it's
>> only defined through the devicetree and not only via commandline
>> arguments.
>>
>> Reported-by: Mathieu Malaterre <mathieu.malaterre(a)gmail.com>
>> Signed-off-by: Marcin Nowakowski <marcin.nowakowski(a)mips.com>
>> Fixes: 73fbc1eba7ff ("MIPS: fix mem=X@Y commandline processing")
>> Cc: <stable(a)vger.kernel.org> # v4.11
>
> I'm guessing that should technically be v4.11+
My fault, if this is the only change, I can re-submit.
>> ---
>> v2: Use updated email adress, add tag for stable.
>> arch/mips/kernel/setup.c | 19 ++++++++++++++++---
>> 1 file changed, 16 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
>> index 702c678de116..f19d61224c71 100644
>> --- a/arch/mips/kernel/setup.c
>> +++ b/arch/mips/kernel/setup.c
>> @@ -375,6 +375,7 @@ static void __init bootmem_init(void)
>> unsigned long reserved_end;
>> unsigned long mapstart = ~0UL;
>> unsigned long bootmap_size;
>> + phys_addr_t ramstart = ~0UL;
>
> Although practically it might not matter, technically phys_addr_t may be
> 64-bits (CONFIG_PHYS_ADDR_T_64BIT) even on a 32-bit kernels, in which
> case ~0UL may not be sufficiently large.
>
> Maybe that should be ~(phys_addr_t)0, or perhaps (phys_addr_t)ULLONG_MAX
> to match add_memory_region().
>
>> bool bootmap_valid = false;
>> int i;
>>
>> @@ -395,6 +396,21 @@ static void __init bootmem_init(void)
>> max_low_pfn = 0;
>>
>> /*
>> + * Reserve any memory between the start of RAM and PHYS_OFFSET
>> + */
>> + for (i = 0; i < boot_mem_map.nr_map; i++) {
>> + if (boot_mem_map.map[i].type != BOOT_MEM_RAM)
>> + continue;
>> +
>> + ramstart = min(ramstart, boot_mem_map.map[i].addr);
>
> Is it worth incorporating this into the existing loop below ...
>
>> + }
>> +
>> + if (ramstart > PHYS_OFFSET)
>> + add_memory_region(PHYS_OFFSET, ramstart - PHYS_OFFSET,
>> + BOOT_MEM_RESERVED);
>
> ... and this then placed below that loop?
>
> Otherwise I can't find fault with this patch, though i'm not intimately
> familiar with bootmem.
>
> Cheers
> James
>
>> +
>> +
>> + /*
>> * Find the highest page frame number we have available.
>> */
>> for (i = 0; i < boot_mem_map.nr_map; i++) {
>> @@ -664,9 +680,6 @@ static int __init early_parse_mem(char *p)
>>
>> add_memory_region(start, size, BOOT_MEM_RAM);
>>
>> - if (start && start > PHYS_OFFSET)
>> - add_memory_region(PHYS_OFFSET, start - PHYS_OFFSET,
>> - BOOT_MEM_RESERVED);
>> return 0;
>> }
>> early_param("mem", early_parse_mem);
>> --
>> 2.11.0
>>
When introducing support for irqchip in userspace we needed a way to
mask the timer signal to prevent the guest continuously exiting due to a
screaming timer.
We did this by disabling the corresponding percpu interrupt on the
host interrupt controller, because we cannot rely on the host system
having a GIC, and therefore cannot make any assumptions about having an
active state to hide the timer signal.
Unfortunately, when introducing this feature, it became entirely
possible that a VCPU which belongs to a VM that has a userspace irqchip
can disable the vtimer irq on the host on some physical CPU, and then go
away without ever enabling the vimter irq on that physical CPU again.
This means that using irqchips in userspace on a system that also
supports running VMs with an in-kernel GIC can prevent forward progress
from in-kernel GIC VMs.
Later on, when we started taking virtual timer interrupts in the arch
timer code, we would also leave this timer state active for userspace
irqchip VMs, because we leave it up to a VGIC-enabled guest to
deactivate the hardware IRQ using the HW bit in the LR.
Both issues are solved by only using the enable/disable trick on systems
that do not have a host GIC which supports the active state, because all
VMs on such systems must use irqchips in userspace. Systems that have a
working GIC with support for an active state use the active state to
mask the timer signal for both userspace an in-kernel irqchips.
Cc: Alexander Graf <agraf(a)suse.de>
Cc: <stable(a)vger.kernel.org> # v4.12+
Fixes: d9e139778376 ("KVM: arm/arm64: Support arch timers with a userspace gic")
Signed-off-by: Christoffer Dall <christoffer.dall(a)linaro.org>
---
This conflicts horribly with everything when applied to either
kvmarm/queue or kvmarm/master. Therefore, this patch is written for
(and applies to) v4.15 with kvmarm/queue merged and should therefore
apply cleanly after v4.16-rc1. An example with this patch applied can
be found on kvmarm/temp-for-v4.16-rc2. I plan on sending this along
with any other potential fixes post v4.16-rc1.
virt/kvm/arm/arch_timer.c | 77 ++++++++++++++++++++++++++---------------------
1 file changed, 42 insertions(+), 35 deletions(-)
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 70268c0bec79..228906ceb722 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -35,6 +35,7 @@
static struct timecounter *timecounter;
static unsigned int host_vtimer_irq;
static u32 host_vtimer_irq_flags;
+static bool has_gic_active_state;
static const struct kvm_irq_level default_ptimer_irq = {
.irq = 30,
@@ -69,25 +70,6 @@ static void soft_timer_cancel(struct hrtimer *hrt, struct work_struct *work)
cancel_work_sync(work);
}
-static void kvm_vtimer_update_mask_user(struct kvm_vcpu *vcpu)
-{
- struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-
- /*
- * When using a userspace irqchip with the architected timers, we must
- * prevent continuously exiting from the guest, and therefore mask the
- * physical interrupt by disabling it on the host interrupt controller
- * when the virtual level is high, such that the guest can make
- * forward progress. Once we detect the output level being
- * de-asserted, we unmask the interrupt again so that we exit from the
- * guest when the timer fires.
- */
- if (vtimer->irq.level)
- disable_percpu_irq(host_vtimer_irq);
- else
- enable_percpu_irq(host_vtimer_irq, 0);
-}
-
static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
{
struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
@@ -107,8 +89,8 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
kvm_timer_update_irq(vcpu, true, vtimer);
if (static_branch_unlikely(&userspace_irqchip_in_use) &&
- unlikely(!irqchip_in_kernel(vcpu->kvm)))
- kvm_vtimer_update_mask_user(vcpu);
+ unlikely(!irqchip_in_kernel(vcpu->kvm)) && !has_gic_active_state)
+ disable_percpu_irq(host_vtimer_irq);
return IRQ_HANDLED;
}
@@ -460,13 +442,16 @@ static void set_cntvoff(u64 cntvoff)
kvm_call_hyp(__kvm_timer_set_cntvoff, low, high);
}
-static void kvm_timer_vcpu_load_vgic(struct kvm_vcpu *vcpu)
+static void kvm_timer_vcpu_load_gic(struct kvm_vcpu *vcpu)
{
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
bool phys_active;
int ret;
- phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
+ if (irqchip_in_kernel(vcpu->kvm))
+ phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
+ else
+ phys_active = vtimer->irq.level;
ret = irq_set_irqchip_state(host_vtimer_irq,
IRQCHIP_STATE_ACTIVE,
@@ -474,9 +459,24 @@ static void kvm_timer_vcpu_load_vgic(struct kvm_vcpu *vcpu)
WARN_ON(ret);
}
-static void kvm_timer_vcpu_load_user(struct kvm_vcpu *vcpu)
+static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
{
- kvm_vtimer_update_mask_user(vcpu);
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+
+ /*
+ * When using a userspace irqchip with the architected timers and a
+ * host interrupt controller that doesn't support an active state, we
+ * must still we must prevent continuously exiting from the guest, and
+ * therefore mask the physical interrupt by disabling it on the host
+ * interrupt controller when the virtual level is high, such that the
+ * guest can make forward progress. Once we detect the output level
+ * being de-asserted, we unmask the interrupt again so that we exit
+ * from the guest when the timer fires.
+ */
+ if (vtimer->irq.level)
+ disable_percpu_irq(host_vtimer_irq);
+ else
+ enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
}
void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
@@ -487,10 +487,10 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
if (unlikely(!timer->enabled))
return;
- if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
- kvm_timer_vcpu_load_user(vcpu);
+ if (has_gic_active_state)
+ kvm_timer_vcpu_load_gic(vcpu);
else
- kvm_timer_vcpu_load_vgic(vcpu);
+ kvm_timer_vcpu_load_nogic(vcpu);
set_cntvoff(vtimer->cntvoff);
@@ -555,18 +555,23 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
{
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
- if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
- __timer_snapshot_state(vtimer);
- if (!kvm_timer_should_fire(vtimer)) {
- kvm_timer_update_irq(vcpu, false, vtimer);
- kvm_vtimer_update_mask_user(vcpu);
- }
+ __timer_snapshot_state(vtimer);
+ if (!kvm_timer_should_fire(vtimer)) {
+ kvm_timer_update_irq(vcpu, false, vtimer);
+ if (!has_gic_active_state)
+ enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
}
}
void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
{
- unmask_vtimer_irq_user(vcpu);
+ struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+ if (unlikely(!timer->enabled))
+ return;
+
+ if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
+ unmask_vtimer_irq_user(vcpu);
}
int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
@@ -753,6 +758,8 @@ int kvm_timer_hyp_init(bool has_gic)
kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
goto out_free_irq;
}
+
+ has_gic_active_state = true;
}
kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
--
2.14.2
In the past the ast driver relied upon the fbdev emulation helpers to
call ->load_lut at boot-up. But since
commit b8e2b0199cc377617dc238f5106352c06dcd3fa2
Author: Peter Rosin <peda(a)axentia.se>
Date: Tue Jul 4 12:36:57 2017 +0200
drm/fb-helper: factor out pseudo-palette
that's cleaned up and drivers are expected to boot into a consistent
lut state. This patch fixes that.
Fixes: b8e2b0199cc3 ("drm/fb-helper: factor out pseudo-palette")
Cc: Peter Rosin <peda(a)axenita.se>
Cc: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Cc: <stable(a)vger.kernel.org> # v4.14+
References: https://bugzilla.kernel.org/show_bug.cgi?id=198123
Signed-off-by: Daniel Vetter <daniel.vetter(a)intel.com>
---
drivers/gpu/drm/cirrus/cirrus_mode.c | 40 +++++++++++++++++++++---------------
1 file changed, 23 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/cirrus/cirrus_mode.c b/drivers/gpu/drm/cirrus/cirrus_mode.c
index cd23b1b28259..c91b9b054e3f 100644
--- a/drivers/gpu/drm/cirrus/cirrus_mode.c
+++ b/drivers/gpu/drm/cirrus/cirrus_mode.c
@@ -294,22 +294,7 @@ static void cirrus_crtc_prepare(struct drm_crtc *crtc)
{
}
-/*
- * This is called after a mode is programmed. It should reverse anything done
- * by the prepare function
- */
-static void cirrus_crtc_commit(struct drm_crtc *crtc)
-{
-}
-
-/*
- * The core can pass us a set of gamma values to program. We actually only
- * use this for 8-bit mode so can't perform smooth fades on deeper modes,
- * but it's a requirement that we provide the function
- */
-static int cirrus_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
- u16 *blue, uint32_t size,
- struct drm_modeset_acquire_ctx *ctx)
+static void cirrus_crtc_load_lut(struct drm_crtc *crtc)
{
struct drm_device *dev = crtc->dev;
struct cirrus_device *cdev = dev->dev_private;
@@ -317,7 +302,7 @@ static int cirrus_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
int i;
if (!crtc->enabled)
- return 0;
+ return;
r = crtc->gamma_store;
g = r + crtc->gamma_size;
@@ -330,6 +315,27 @@ static int cirrus_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
WREG8(PALETTE_DATA, *g++ >> 8);
WREG8(PALETTE_DATA, *b++ >> 8);
}
+}
+
+/*
+ * This is called after a mode is programmed. It should reverse anything done
+ * by the prepare function
+ */
+static void cirrus_crtc_commit(struct drm_crtc *crtc)
+{
+ cirrus_crtc_load_lut(crtc);
+}
+
+/*
+ * The core can pass us a set of gamma values to program. We actually only
+ * use this for 8-bit mode so can't perform smooth fades on deeper modes,
+ * but it's a requirement that we provide the function
+ */
+static int cirrus_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
+ u16 *blue, uint32_t size,
+ struct drm_modeset_acquire_ctx *ctx)
+{
+ cirrus_crtc_load_lut(crtc);
return 0;
}
--
2.15.1
In the past the ast driver relied upon the fbdev emulation helpers to
call ->load_lut at boot-up. But since
commit b8e2b0199cc377617dc238f5106352c06dcd3fa2
Author: Peter Rosin <peda(a)axentia.se>
Date: Tue Jul 4 12:36:57 2017 +0200
drm/fb-helper: factor out pseudo-palette
that's cleaned up and drivers are expected to boot into a consistent
lut state. This patch fixes that.
Fixes: b8e2b0199cc3 ("drm/fb-helper: factor out pseudo-palette")
Cc: Peter Rosin <peda(a)axenita.se>
Cc: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Cc: <stable(a)vger.kernel.org> # v4.14+
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=198123
Cc: Bill Fraser <bill.fraser(a)gmail.com>
Reported-and-Tested-by: Bill Fraser <bill.fraser(a)gmail.com>
Tested-by: Konstantin Khlebnikov <koct9i(a)gmail.com>
Tested-by: Paul Tobias <tobias.pal(a)gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter(a)intel.com>
---
drivers/gpu/drm/ast/ast_mode.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c
index 9555a3542022..831b73392d82 100644
--- a/drivers/gpu/drm/ast/ast_mode.c
+++ b/drivers/gpu/drm/ast/ast_mode.c
@@ -644,6 +644,7 @@ static void ast_crtc_commit(struct drm_crtc *crtc)
{
struct ast_private *ast = crtc->dev->dev_private;
ast_set_index_reg_mask(ast, AST_IO_SEQ_PORT, 0x1, 0xdf, 0);
+ ast_crtc_load_lut(crtc);
}
--
2.15.1
The bounce buffer is gone from the MMC core, and now we found out
that there are some (crippled) i.MX boards out there that have broken
ADMA (cannot do scatter-gather), and also broken PIO so they must
use SDMA. Closer examination shows a less significant slowdown
also on SDMA-only capable Laptop hosts.
SDMA sets down the number of segments to one, so that each segment
gets turned into a singular request that ping-pongs to the block
layer before the next request/segment is issued.
Apparently it happens a lot that the block layer send requests
that include a lot of physically discontiguous segments. My guess
is that this phenomenon is coming from the file system.
These devices that cannot handle scatterlists in hardware can see
major benefits from a DMA-contiguous bounce buffer.
This patch accumulates those fragmented scatterlists in a physically
contiguous bounce buffer so that we can issue bigger DMA data chunks
to/from the card.
When tested with a PCI-integrated host (1217:8221) that
only supports SDMA:
0b:00.0 SD Host controller: O2 Micro, Inc. OZ600FJ0/OZ900FJ0/OZ600FJS
SD/MMC Card Reader Controller (rev 05)
This patch gave ~1Mbyte/s improved throughput on large reads and
writes when testing using iozone than without the patch.
dmesg:
sdhci-pci 0000:0b:00.0: SDHCI controller found [1217:8221] (rev 5)
mmc0 bounce up to 128 segments into one, max segment size 65536 bytes
mmc0: SDHCI controller on PCI [0000:0b:00.0] using DMA
On the i.MX SDHCI controllers on the crippled i.MX 25 and i.MX 35
the patch restores the performance to what it was before we removed
the bounce buffers.
Cc: Pierre Ossman <pierre(a)ossman.eu>
Cc: Benoît Thébaudeau <benoit(a)wsystem.com>
Cc: Fabio Estevam <fabio.estevam(a)nxp.com>
Cc: Benjamin Beckmeyer <beckmeyer.b(a)rittal.de>
Cc: stable(a)vger.kernel.org # v4.14+
Fixes: de3ee99b097d ("mmc: Delete bounce buffer handling")
Tested-by: Benjamin Beckmeyer <beckmeyer.b(a)rittal.de>
Acked-by: Adrian Hunter <adrian.hunter(a)intel.com>
Signed-off-by: Linus Walleij <linus.walleij(a)linaro.org>
---
ChangeLog v7->v8:
- Fixed bad information and spelling mistakes in the commit
message.
- Use sdhci_sdma_address() in one more spot identified by Adrian.
- Collected Adrian's ACK.
ChangeLog v6->v7:
- Fix the directions on dma_sync_for[device|cpu]() so the
ownership of the buffer gets swapped properly and in the right
direction for every transfer. Didn't see this because x86 PCI is
DMA coherent...
- Tested and greelighted on i.MX 25.
- Also tested on the PCI version.
ChangeLog v5->v6:
- Again switch back to explicit sync of buffers. I want to get this
solution to work because it gives more control and it's more
elegant.
- Update host->max_req_size as noted by Adrian, hopefully this
fixes the i.MX. I was just lucky on my Intel laptop I guess:
the block stack never requested anything bigger than 64KB and
that was why it worked even if max_req_size was bigger than
what would fit in the bounce buffer.
- Copy the number of bytes in the mmc_data instead of the number
of bytes in the bounce buffer. For RX this is blksize * blocks
and for TX this is bytes_xfered.
- Break out a sdhci_sdma_address() for getting the DMA address
for either the raw sglist or the bounce buffer depending on
configuration.
- Add some explicit bounds check for the data so that we do not
attempt to copy more than the bounce buffer size even if the
block layer is erroneously configured.
- Move allocation of bounce buffer out to its own function.
- Use pr_[info|err] throughout so all debug prints from the
driver come out in the same manner and style.
- Use unsigned int for the bounce buffer size.
- Re-tested with iozone: we still get the same nice performance
improvements.
- Request a text on i.MX (hi Benjamin)
ChangeLog v4->v5:
- Go back to dma_alloc_coherent() as this apparently works better.
- Keep the other changes, cap for 64KB, fall back to single segments.
- Requesting a test of this on i.MX. (Sorry Benjamin.)
ChangeLog v3->v4:
- Cap the bounce buffer to 64KB instead of the biggest segment
as we experience diminishing returns with buffers > 64KB.
- Instead of using dma_alloc_coherent(), use good old devm_kmalloc()
and issue dma_sync_single_for*() to explicitly switch
ownership between CPU and the device. This way we exercise the
cache better and may consume less CPU.
- Bail out with single segments if we cannot allocate a bounce
buffer.
- Tested on the PCI SDHCI on my laptop: requesting a new test
on i.MX from Benjamin. (Please!)
ChangeLog v2->v3:
- Rewrite the commit message a bit
- Add Benjamin's Tested-by
- Add Fixes and stable tags
ChangeLog v1->v2:
- Skip the remapping and fiddling with the buffer, instead use
dma_alloc_coherent() and use a simple, coherent bounce buffer.
- Couple kernel messages to ->parent of the mmc_host as it relates
to the hardware characteristics.
---
drivers/mmc/host/sdhci.c | 164 ++++++++++++++++++++++++++++++++++++++++++++---
drivers/mmc/host/sdhci.h | 3 +
2 files changed, 159 insertions(+), 8 deletions(-)
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index e9290a3439d5..d24306b2b839 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -21,6 +21,7 @@
#include <linux/dma-mapping.h>
#include <linux/slab.h>
#include <linux/scatterlist.h>
+#include <linux/sizes.h>
#include <linux/swiotlb.h>
#include <linux/regulator/consumer.h>
#include <linux/pm_runtime.h>
@@ -502,8 +503,35 @@ static int sdhci_pre_dma_transfer(struct sdhci_host *host,
if (data->host_cookie == COOKIE_PRE_MAPPED)
return data->sg_count;
- sg_count = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
- mmc_get_dma_dir(data));
+ /* Bounce write requests to the bounce buffer */
+ if (host->bounce_buffer) {
+ unsigned int length = data->blksz * data->blocks;
+
+ if (length > host->bounce_buffer_size) {
+ pr_err("%s: asked for transfer of %u bytes exceeds bounce buffer %u bytes\n",
+ mmc_hostname(host->mmc), length,
+ host->bounce_buffer_size);
+ return -EIO;
+ }
+ if (mmc_get_dma_dir(data) == DMA_TO_DEVICE) {
+ /* Copy the data to the bounce buffer */
+ sg_copy_to_buffer(data->sg, data->sg_len,
+ host->bounce_buffer,
+ length);
+ }
+ /* Switch ownership to the DMA */
+ dma_sync_single_for_device(host->mmc->parent,
+ host->bounce_addr,
+ host->bounce_buffer_size,
+ mmc_get_dma_dir(data));
+ /* Just a dummy value */
+ sg_count = 1;
+ } else {
+ /* Just access the data directly from memory */
+ sg_count = dma_map_sg(mmc_dev(host->mmc),
+ data->sg, data->sg_len,
+ mmc_get_dma_dir(data));
+ }
if (sg_count == 0)
return -ENOSPC;
@@ -673,6 +701,14 @@ static void sdhci_adma_table_post(struct sdhci_host *host,
}
}
+static u32 sdhci_sdma_address(struct sdhci_host *host)
+{
+ if (host->bounce_buffer)
+ return host->bounce_addr;
+ else
+ return sg_dma_address(host->data->sg);
+}
+
static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_command *cmd)
{
u8 count;
@@ -858,8 +894,8 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd)
SDHCI_ADMA_ADDRESS_HI);
} else {
WARN_ON(sg_cnt != 1);
- sdhci_writel(host, sg_dma_address(data->sg),
- SDHCI_DMA_ADDRESS);
+ sdhci_writel(host, sdhci_sdma_address(host),
+ SDHCI_DMA_ADDRESS);
}
}
@@ -2248,7 +2284,12 @@ static void sdhci_pre_req(struct mmc_host *mmc, struct mmc_request *mrq)
mrq->data->host_cookie = COOKIE_UNMAPPED;
- if (host->flags & SDHCI_REQ_USE_DMA)
+ /*
+ * No pre-mapping in the pre hook if we're using the bounce buffer,
+ * for that we would need two bounce buffers since one buffer is
+ * in flight when this is getting called.
+ */
+ if (host->flags & SDHCI_REQ_USE_DMA && !host->bounce_buffer)
sdhci_pre_dma_transfer(host, mrq->data, COOKIE_PRE_MAPPED);
}
@@ -2352,8 +2393,45 @@ static bool sdhci_request_done(struct sdhci_host *host)
struct mmc_data *data = mrq->data;
if (data && data->host_cookie == COOKIE_MAPPED) {
- dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
- mmc_get_dma_dir(data));
+ if (host->bounce_buffer) {
+ /*
+ * On reads, copy the bounced data into the
+ * sglist
+ */
+ if (mmc_get_dma_dir(data) == DMA_FROM_DEVICE) {
+ unsigned int length = data->bytes_xfered;
+
+ if (length > host->bounce_buffer_size) {
+ pr_err("%s: bounce buffer is %u bytes but DMA claims to have transferred %u bytes\n",
+ mmc_hostname(host->mmc),
+ host->bounce_buffer_size,
+ data->bytes_xfered);
+ /* Cap it down and continue */
+ length = host->bounce_buffer_size;
+ }
+ dma_sync_single_for_cpu(
+ host->mmc->parent,
+ host->bounce_addr,
+ host->bounce_buffer_size,
+ DMA_FROM_DEVICE);
+ sg_copy_from_buffer(data->sg,
+ data->sg_len,
+ host->bounce_buffer,
+ length);
+ } else {
+ /* No copying, just switch ownership */
+ dma_sync_single_for_cpu(
+ host->mmc->parent,
+ host->bounce_addr,
+ host->bounce_buffer_size,
+ mmc_get_dma_dir(data));
+ }
+ } else {
+ /* Unmap the raw data */
+ dma_unmap_sg(mmc_dev(host->mmc), data->sg,
+ data->sg_len,
+ mmc_get_dma_dir(data));
+ }
data->host_cookie = COOKIE_UNMAPPED;
}
}
@@ -2636,7 +2714,8 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
*/
if (intmask & SDHCI_INT_DMA_END) {
u32 dmastart, dmanow;
- dmastart = sg_dma_address(host->data->sg);
+
+ dmastart = sdhci_sdma_address(host);
dmanow = dmastart + host->data->bytes_xfered;
/*
* Force update to the next DMA block boundary.
@@ -3217,6 +3296,68 @@ void __sdhci_read_caps(struct sdhci_host *host, u16 *ver, u32 *caps, u32 *caps1)
}
EXPORT_SYMBOL_GPL(__sdhci_read_caps);
+static int sdhci_allocate_bounce_buffer(struct sdhci_host *host)
+{
+ struct mmc_host *mmc = host->mmc;
+ unsigned int max_blocks;
+ unsigned int bounce_size;
+ int ret;
+
+ /*
+ * Cap the bounce buffer at 64KB. Using a bigger bounce buffer
+ * has diminishing returns, this is probably because SD/MMC
+ * cards are usually optimized to handle this size of requests.
+ */
+ bounce_size = SZ_64K;
+ /*
+ * Adjust downwards to maximum request size if this is less
+ * than our segment size, else hammer down the maximum
+ * request size to the maximum buffer size.
+ */
+ if (mmc->max_req_size < bounce_size)
+ bounce_size = mmc->max_req_size;
+ max_blocks = bounce_size / 512;
+
+ /*
+ * When we just support one segment, we can get significant
+ * speedups by the help of a bounce buffer to group scattered
+ * reads/writes together.
+ */
+ host->bounce_buffer = devm_kmalloc(mmc->parent,
+ bounce_size,
+ GFP_KERNEL);
+ if (!host->bounce_buffer) {
+ pr_err("%s: failed to allocate %u bytes for bounce buffer, falling back to single segments\n",
+ mmc_hostname(mmc),
+ bounce_size);
+ /*
+ * Exiting with zero here makes sure we proceed with
+ * mmc->max_segs == 1.
+ */
+ return 0;
+ }
+
+ host->bounce_addr = dma_map_single(mmc->parent,
+ host->bounce_buffer,
+ bounce_size,
+ DMA_BIDIRECTIONAL);
+ ret = dma_mapping_error(mmc->parent, host->bounce_addr);
+ if (ret)
+ /* Again fall back to max_segs == 1 */
+ return 0;
+ host->bounce_buffer_size = bounce_size;
+
+ /* Lie about this since we're bouncing */
+ mmc->max_segs = max_blocks;
+ mmc->max_seg_size = bounce_size;
+ mmc->max_req_size = bounce_size;
+
+ pr_info("%s bounce up to %u segments into one, max segment size %u bytes\n",
+ mmc_hostname(mmc), max_blocks, bounce_size);
+
+ return 0;
+}
+
int sdhci_setup_host(struct sdhci_host *host)
{
struct mmc_host *mmc;
@@ -3713,6 +3854,13 @@ int sdhci_setup_host(struct sdhci_host *host)
*/
mmc->max_blk_count = (host->quirks & SDHCI_QUIRK_NO_MULTIBLOCK) ? 1 : 65535;
+ if (mmc->max_segs == 1) {
+ /* This may alter mmc->*_blk_* parameters */
+ ret = sdhci_allocate_bounce_buffer(host);
+ if (ret)
+ return ret;
+ }
+
return 0;
unreg:
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 54bc444c317f..1d7d61e25dbf 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -440,6 +440,9 @@ struct sdhci_host {
int irq; /* Device IRQ */
void __iomem *ioaddr; /* Mapped address */
+ char *bounce_buffer; /* For packing SDMA reads/writes */
+ dma_addr_t bounce_addr;
+ unsigned int bounce_buffer_size;
const struct sdhci_ops *ops; /* Low level hw interface */
--
2.14.3