This patch updates dma_direct_unmap_sg() to mark each scatter/gather
entry invalid, after it's unmapped. This fixes two issues:
1. It makes the unmapping code able to tolerate a double unmap.
2. It prevents the NVMe driver from erroneously treating an unmapped DMA
address as mapped.
The bug that motivated this patch was the following sequence, which
occurred within the NVMe driver, with the kernel flag `swiotlb=force`.
* NVMe driver calls dma_direct_map_sg()
* dma_direct_map_sg() fails part way through the scatter gather/list
* dma_direct_map_sg() calls dma_direct_unmap_sg() to unmap any entries
succeeded.
* NVMe driver calls dma_direct_unmap_sg(), redundantly, leading to a
double unmap, which is a bug.
With this patch, a hadoop workload running on a cluster of three AMD
SEV VMs, is able to succeed. Without the patch, the hadoop workload
suffers application-level and even VM-level failures.
Tested-by: Jianxiong Gao <jxgao(a)google.com>
Tested-by: Marc Orr <marcorr(a)google.com>
Reviewed-by: Jianxiong Gao <jxgao(a)google.com>
Signed-off-by: Marc Orr <marcorr(a)google.com>
---
kernel/dma/direct.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 0a4881e59aa7..3d9b17fe5771 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -374,9 +374,11 @@ void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl,
struct scatterlist *sg;
int i;
- for_each_sg(sgl, sg, nents, i)
+ for_each_sg(sgl, sg, nents, i) {
dma_direct_unmap_page(dev, sg->dma_address, sg_dma_len(sg), dir,
attrs);
+ sg->dma_address = DMA_MAPPING_ERROR;
+ }
}
EXPORT_SYMBOL(dma_direct_unmap_sg);
#endif
--
2.30.0.284.gd98b1dd5eaa7-goog
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 98bf2d3f4970179c702ef64db658e0553bc6ef3a Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy(a)csgroup.eu>
Date: Tue, 22 Dec 2020 07:11:18 +0000
Subject: [PATCH] powerpc/32s: Fix RTAS machine check with VMAP stack
When we have VMAP stack, exception prolog 1 sets r1, not r11.
When it is not an RTAS machine check, don't trash r1 because it is
needed by prolog 1.
Fixes: da7bb43ab9da ("powerpc/32: Fix vmap stack - Properly set r1 before activating MMU")
Fixes: d2e006036082 ("powerpc/32: Use SPRN_SPRG_SCRATCH2 in exception prologs")
Cc: stable(a)vger.kernel.org # v5.10+
Signed-off-by: Christophe Leroy <christophe.leroy(a)csgroup.eu>
[mpe: Squash in fixup for RTAS machine check from Christophe]
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/bc77d61d1c18940e456a2dee464f1e2eda65a3f0.16086210…
diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S
index 349bf3f0c3af..858fbc8b19f3 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -260,10 +260,19 @@ __secondary_hold_acknowledge:
MachineCheck:
EXCEPTION_PROLOG_0
#ifdef CONFIG_PPC_CHRP
+#ifdef CONFIG_VMAP_STACK
+ mtspr SPRN_SPRG_SCRATCH2,r1
+ mfspr r1, SPRN_SPRG_THREAD
+ lwz r1, RTAS_SP(r1)
+ cmpwi cr1, r1, 0
+ bne cr1, 7f
+ mfspr r1, SPRN_SPRG_SCRATCH2
+#else
mfspr r11, SPRN_SPRG_THREAD
lwz r11, RTAS_SP(r11)
cmpwi cr1, r11, 0
bne cr1, 7f
+#endif
#endif /* CONFIG_PPC_CHRP */
EXCEPTION_PROLOG_1 for_rtas=1
7: EXCEPTION_PROLOG_2
From: Muhammed Fazal <mfazale(a)nvidia.com>
Ignore I2C_M_DMA_SAFE flag as it does not make a difference
for bpmp-i2c, but causes -EINVAL to be returned for valid
transactions.
Signed-off-by: Muhammed Fazal <mfazale(a)nvidia.com>
Cc: stable(a)vger.kernel.org # v4.19+
Signed-off-by: Mikko Perttunen <mperttunen(a)nvidia.com>
---
This fixes failures seen with PMIC probing tools on
Tegra186+ boards.
drivers/i2c/busses/i2c-tegra-bpmp.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/i2c/busses/i2c-tegra-bpmp.c b/drivers/i2c/busses/i2c-tegra-bpmp.c
index ec7a7e917edd..998d4b21fb59 100644
--- a/drivers/i2c/busses/i2c-tegra-bpmp.c
+++ b/drivers/i2c/busses/i2c-tegra-bpmp.c
@@ -80,6 +80,9 @@ static int tegra_bpmp_xlate_flags(u16 flags, u16 *out)
flags &= ~I2C_M_RECV_LEN;
}
+ if (flags & I2C_M_DMA_SAFE)
+ flags &= ~I2C_M_DMA_SAFE;
+
return (flags != 0) ? -EINVAL : 0;
}
--
2.30.0
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 5efc1f4b454c6179d35e7b0c3eda0ad5763a00fc Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher(a)amd.com>
Date: Tue, 5 Jan 2021 11:42:08 -0500
Subject: [PATCH] Revert "drm/amd/display: Fix memory leaks in S3 resume"
This reverts commit a135a1b4c4db1f3b8cbed9676a40ede39feb3362.
This leads to blank screens on some boards after replugging a
display. Revert until we understand the root cause and can
fix both the leak and the blank screen after replug.
Bug: https://bugzilla.kernel.org/show_bug.cgi?id=211033
Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1427
Cc: Stylon Wang <stylon.wang(a)amd.com>
Cc: Harry Wentland <harry.wentland(a)amd.com>
Cc: Nicholas Kazlauskas <nicholas.kazlauskas(a)amd.com>
Cc: Andre Tomt <andre(a)tomt.net>
Cc: Oleksandr Natalenko <oleksandr(a)natalenko.name>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 3fb6baf9b0ba..146486071d01 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2386,8 +2386,7 @@ void amdgpu_dm_update_connector_after_detect(
drm_connector_update_edid_property(connector,
aconnector->edid);
- aconnector->num_modes = drm_add_edid_modes(connector, aconnector->edid);
- drm_connector_list_update(connector);
+ drm_add_edid_modes(connector, aconnector->edid);
if (aconnector->dc_link->aux_mode)
drm_dp_cec_set_edid(&aconnector->dm_dp_aux.aux,
From: Eric Biggers <ebiggers(a)google.com>
When lazytime is enabled and an inode is being written due to its
in-memory updated timestamps having expired, either due to a sync() or
syncfs() system call or due to dirtytime_expire_interval having elapsed,
the VFS needs to inform the filesystem so that the filesystem can copy
the inode's timestamps out to the on-disk data structures.
This is done by __writeback_single_inode() calling
mark_inode_dirty_sync(), which then calls ->dirty_inode(I_DIRTY_SYNC).
However, this occurs after __writeback_single_inode() has already
cleared the dirty flags from ->i_state. This causes two bugs:
- mark_inode_dirty_sync() redirties the inode, causing it to remain
dirty. This wastefully causes the inode to be written twice. But
more importantly, it breaks cases where sync_filesystem() is expected
to clean dirty inodes. This includes the FS_IOC_REMOVE_ENCRYPTION_KEY
ioctl (as reported at
https://lore.kernel.org/r/20200306004555.GB225345@gmail.com), as well
as possibly filesystem freezing (freeze_super()).
- Since ->i_state doesn't contain I_DIRTY_TIME when ->dirty_inode() is
called from __writeback_single_inode() for lazytime expiration,
xfs_fs_dirty_inode() ignores the notification. (XFS only cares about
lazytime expirations, and it assumes that I_DIRTY_TIME will contain
i_state during those.) Therefore, lazy timestamps aren't persisted by
sync(), syncfs(), or dirtytime_expire_interval on XFS.
Fix this by moving the call to mark_inode_dirty_sync() to earlier in
__writeback_single_inode(), before the dirty flags are cleared from
i_state. This makes filesystems be properly notified of the timestamp
expiration, and it avoids incorrectly redirtying the inode.
This fixes xfstest generic/580 (which tests
FS_IOC_REMOVE_ENCRYPTION_KEY) when run on ext4 or f2fs with lazytime
enabled. It also fixes the new lazytime xfstest I've proposed, which
reproduces the above-mentioned XFS bug
(https://lore.kernel.org/r/20210105005818.92978-1-ebiggers@kernel.org).
Alternatively, we could call ->dirty_inode(I_DIRTY_SYNC) directly. But
due to the introduction of I_SYNC_QUEUED, mark_inode_dirty_sync() is the
right thing to do because mark_inode_dirty_sync() now knows not to move
the inode to a writeback list if it is currently queued for sync.
Fixes: 0ae45f63d4ef ("vfs: add support for a lazytime mount option")
Cc: stable(a)vger.kernel.org
Depends-on: 5afced3bf281 ("writeback: Avoid skipping inode writeback")
Suggested-by: Jan Kara <jack(a)suse.cz>
Signed-off-by: Eric Biggers <ebiggers(a)google.com>
---
fs/fs-writeback.c | 24 +++++++++++++-----------
1 file changed, 13 insertions(+), 11 deletions(-)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index acfb55834af23..c41cb887eb7d3 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1474,21 +1474,25 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
}
/*
- * Some filesystems may redirty the inode during the writeback
- * due to delalloc, clear dirty metadata flags right before
- * write_inode()
+ * If the inode has dirty timestamps and we need to write them, call
+ * mark_inode_dirty_sync() to notify the filesystem about it and to
+ * change I_DIRTY_TIME into I_DIRTY_SYNC.
*/
- spin_lock(&inode->i_lock);
-
- dirty = inode->i_state & I_DIRTY;
if ((inode->i_state & I_DIRTY_TIME) &&
- ((dirty & I_DIRTY_INODE) ||
- wbc->sync_mode == WB_SYNC_ALL || wbc->for_sync ||
+ (wbc->sync_mode == WB_SYNC_ALL || wbc->for_sync ||
time_after(jiffies, inode->dirtied_time_when +
dirtytime_expire_interval * HZ))) {
- dirty |= I_DIRTY_TIME;
trace_writeback_lazytime(inode);
+ mark_inode_dirty_sync(inode);
}
+
+ /*
+ * Some filesystems may redirty the inode during the writeback
+ * due to delalloc, clear dirty metadata flags right before
+ * write_inode()
+ */
+ spin_lock(&inode->i_lock);
+ dirty = inode->i_state & I_DIRTY;
inode->i_state &= ~dirty;
/*
@@ -1509,8 +1513,6 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
spin_unlock(&inode->i_lock);
- if (dirty & I_DIRTY_TIME)
- mark_inode_dirty_sync(inode);
/* Don't write the inode if only I_DIRTY_PAGES was set */
if (dirty & ~I_DIRTY_PAGES) {
int err = write_inode(inode, wbc);
--
2.30.0