The default kernel_fpu_begin() doesn't work on systems that support XMM but haven't yet enabled CR4.OSFXSR. This causes crashes when _mmx_memcpy() is called too early because LDMXCSR generates #UD when the aforementioned bit is clear.
Fix it by using kernel_fpu_begin_mask(KFPU_387) explicitly.
Fixes: 7ad816762f9b ("x86/fpu: Reset MXCSR to default in kernel_fpu_begin()") Cc: stable@vger.kernel.org Reported-by: Krzysztof Mazur krzysiek@podlesie.net Signed-off-by: Andy Lutomirski luto@kernel.org --- arch/x86/lib/mmx_32.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-)
diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c index 4321fa02e18d..ad1dabce931e 100644 --- a/arch/x86/lib/mmx_32.c +++ b/arch/x86/lib/mmx_32.c @@ -26,6 +26,16 @@ #include <asm/fpu/api.h> #include <asm/asm.h>
+/* + * Use KFPU_387. MMX instructions are not affected by MXCSR, + * but both AMD and Intel documentation states that even integer MMX + * operations will result in #MF if an exception is pending in FCW. + * + * EMMS is not needed afterwards because, after we call kernel_fpu_end(), + * any subsequent user of the 387 stack will reinitialize it using + * KFPU_387. + */ + void *_mmx_memcpy(void *to, const void *from, size_t len) { void *p; @@ -37,7 +47,7 @@ void *_mmx_memcpy(void *to, const void *from, size_t len) p = to; i = len >> 6; /* len/64 */
- kernel_fpu_begin(); + kernel_fpu_begin_mask(KFPU_387);
__asm__ __volatile__ ( "1: prefetch (%0)\n" /* This set is 28 bytes */ @@ -127,7 +137,7 @@ static void fast_clear_page(void *page) { int i;
- kernel_fpu_begin(); + kernel_fpu_begin_mask(KFPU_387);
__asm__ __volatile__ ( " pxor %%mm0, %%mm0\n" : : @@ -160,7 +170,7 @@ static void fast_copy_page(void *to, void *from) { int i;
- kernel_fpu_begin(); + kernel_fpu_begin_mask(KFPU_387);
/* * maybe the prefetch stuff can go before the expensive fnsave... @@ -247,7 +257,7 @@ static void fast_clear_page(void *page) { int i;
- kernel_fpu_begin(); + kernel_fpu_begin_mask(KFPU_387);
__asm__ __volatile__ ( " pxor %%mm0, %%mm0\n" : : @@ -282,7 +292,7 @@ static void fast_copy_page(void *to, void *from) { int i;
- kernel_fpu_begin(); + kernel_fpu_begin_mask(KFPU_387);
__asm__ __volatile__ ( "1: prefetch (%0)\n"
On Wed, Jan 20, 2021 at 09:09:49PM -0800, Andy Lutomirski wrote:
The default kernel_fpu_begin() doesn't work on systems that support XMM but haven't yet enabled CR4.OSFXSR. This causes crashes when _mmx_memcpy() is called too early because LDMXCSR generates #UD when the aforementioned bit is clear.
Fix it by using kernel_fpu_begin_mask(KFPU_387) explicitly.
Fixes: 7ad816762f9b ("x86/fpu: Reset MXCSR to default in kernel_fpu_begin()") Cc: stable@vger.kernel.org Reported-by: Krzysztof Mazur krzysiek@podlesie.net Signed-off-by: Andy Lutomirski luto@kernel.org
arch/x86/lib/mmx_32.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-)
Thanks, 5.10 + this patch series boots on K7 with SSE.
Tested-by: Krzysztof Mazur krzysiek@podlesie.net
Regards, Krzysiek
The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: 67de8dca50c027ca0fa3b62a488ee5035036a0da Gitweb: https://git.kernel.org/tip/67de8dca50c027ca0fa3b62a488ee5035036a0da Author: Andy Lutomirski luto@kernel.org AuthorDate: Wed, 20 Jan 2021 21:09:49 -08:00 Committer: Borislav Petkov bp@suse.de CommitterDate: Thu, 21 Jan 2021 13:39:36 +01:00
x86/mmx: Use KFPU_387 for MMX string operations
The default kernel_fpu_begin() doesn't work on systems that support XMM but haven't yet enabled CR4.OSFXSR. This causes crashes when _mmx_memcpy() is called too early because LDMXCSR generates #UD when the aforementioned bit is clear.
Fix it by using kernel_fpu_begin_mask(KFPU_387) explicitly.
Fixes: 7ad816762f9b ("x86/fpu: Reset MXCSR to default in kernel_fpu_begin()") Reported-by: Krzysztof Mazur krzysiek@podlesie.net Signed-off-by: Andy Lutomirski luto@kernel.org Signed-off-by: Borislav Petkov bp@suse.de Tested-by: Krzysztof Piotr Olędzki ole@ans.pl Tested-by: Krzysztof Mazur krzysiek@podlesie.net Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/e7bf21855fe99e5f3baa27446e32623358f69e8d.161120569... --- arch/x86/lib/mmx_32.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-)
diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c index 4321fa0..419365c 100644 --- a/arch/x86/lib/mmx_32.c +++ b/arch/x86/lib/mmx_32.c @@ -26,6 +26,16 @@ #include <asm/fpu/api.h> #include <asm/asm.h>
+/* + * Use KFPU_387. MMX instructions are not affected by MXCSR, + * but both AMD and Intel documentation states that even integer MMX + * operations will result in #MF if an exception is pending in FCW. + * + * EMMS is not needed afterwards because, after calling kernel_fpu_end(), + * any subsequent user of the 387 stack will reinitialize it using + * KFPU_387. + */ + void *_mmx_memcpy(void *to, const void *from, size_t len) { void *p; @@ -37,7 +47,7 @@ void *_mmx_memcpy(void *to, const void *from, size_t len) p = to; i = len >> 6; /* len/64 */
- kernel_fpu_begin(); + kernel_fpu_begin_mask(KFPU_387);
__asm__ __volatile__ ( "1: prefetch (%0)\n" /* This set is 28 bytes */ @@ -127,7 +137,7 @@ static void fast_clear_page(void *page) { int i;
- kernel_fpu_begin(); + kernel_fpu_begin_mask(KFPU_387);
__asm__ __volatile__ ( " pxor %%mm0, %%mm0\n" : : @@ -160,7 +170,7 @@ static void fast_copy_page(void *to, void *from) { int i;
- kernel_fpu_begin(); + kernel_fpu_begin_mask(KFPU_387);
/* * maybe the prefetch stuff can go before the expensive fnsave... @@ -247,7 +257,7 @@ static void fast_clear_page(void *page) { int i;
- kernel_fpu_begin(); + kernel_fpu_begin_mask(KFPU_387);
__asm__ __volatile__ ( " pxor %%mm0, %%mm0\n" : : @@ -282,7 +292,7 @@ static void fast_copy_page(void *to, void *from) { int i;
- kernel_fpu_begin(); + kernel_fpu_begin_mask(KFPU_387);
__asm__ __volatile__ ( "1: prefetch (%0)\n"
linux-stable-mirror@lists.linaro.org