The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 7b40066c97ec66a44e388f82fcf694987451768f Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Date: Thu, 5 Aug 2021 15:29:54 -0400
Subject: [PATCH] tracepoint: Use rcu get state and cond sync for static call
updates
State transitions from 1->0->1 and N->2->1 callbacks require RCU
synchronization. Rather than performing the RCU synchronization every
time the state change occurs, which is quite slow when many tracepoints
are registered in batch, instead keep a snapshot of the RCU state on the
most recent transitions which belong to a chain, and conditionally wait
for a grace period on the last transition of the chain if one g.p. has
not elapsed since the last snapshot.
This applies to both RCU and SRCU.
This brings the performance regression caused by commit 231264d6927f
("Fix: tracepoint: static call function vs data state mismatch") back to
what it was originally.
Before this commit:
# trace-cmd start -e all
# time trace-cmd start -p nop
real 0m10.593s
user 0m0.017s
sys 0m0.259s
After this commit:
# trace-cmd start -e all
# time trace-cmd start -p nop
real 0m0.878s
user 0m0.000s
sys 0m0.103s
Link: https://lkml.kernel.org/r/20210805192954.30688-1-mathieu.desnoyers@efficios…
Link: https://lore.kernel.org/io-uring/4ebea8f0-58c9-e571-fd30-0ce4f6f09c70@samba…
Cc: stable(a)vger.kernel.org
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: "Paul E. McKenney" <paulmck(a)kernel.org>
Cc: Stefan Metzmacher <metze(a)samba.org>
Fixes: 231264d6927f ("Fix: tracepoint: static call function vs data state mismatch")
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Reviewed-by: Paul E. McKenney <paulmck(a)kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 8d772bd6894d..efd14c79fab4 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -28,6 +28,44 @@ extern tracepoint_ptr_t __stop___tracepoints_ptrs[];
DEFINE_SRCU(tracepoint_srcu);
EXPORT_SYMBOL_GPL(tracepoint_srcu);
+enum tp_transition_sync {
+ TP_TRANSITION_SYNC_1_0_1,
+ TP_TRANSITION_SYNC_N_2_1,
+
+ _NR_TP_TRANSITION_SYNC,
+};
+
+struct tp_transition_snapshot {
+ unsigned long rcu;
+ unsigned long srcu;
+ bool ongoing;
+};
+
+/* Protected by tracepoints_mutex */
+static struct tp_transition_snapshot tp_transition_snapshot[_NR_TP_TRANSITION_SYNC];
+
+static void tp_rcu_get_state(enum tp_transition_sync sync)
+{
+ struct tp_transition_snapshot *snapshot = &tp_transition_snapshot[sync];
+
+ /* Keep the latest get_state snapshot. */
+ snapshot->rcu = get_state_synchronize_rcu();
+ snapshot->srcu = start_poll_synchronize_srcu(&tracepoint_srcu);
+ snapshot->ongoing = true;
+}
+
+static void tp_rcu_cond_sync(enum tp_transition_sync sync)
+{
+ struct tp_transition_snapshot *snapshot = &tp_transition_snapshot[sync];
+
+ if (!snapshot->ongoing)
+ return;
+ cond_synchronize_rcu(snapshot->rcu);
+ if (!poll_state_synchronize_srcu(&tracepoint_srcu, snapshot->srcu))
+ synchronize_srcu(&tracepoint_srcu);
+ snapshot->ongoing = false;
+}
+
/* Set to 1 to enable tracepoint debug output */
static const int tracepoint_debug;
@@ -311,6 +349,11 @@ static int tracepoint_add_func(struct tracepoint *tp,
*/
switch (nr_func_state(tp_funcs)) {
case TP_FUNC_1: /* 0->1 */
+ /*
+ * Make sure new static func never uses old data after a
+ * 1->0->1 transition sequence.
+ */
+ tp_rcu_cond_sync(TP_TRANSITION_SYNC_1_0_1);
/* Set static call to first function */
tracepoint_update_call(tp, tp_funcs);
/* Both iterator and static call handle NULL tp->funcs */
@@ -325,10 +368,15 @@ static int tracepoint_add_func(struct tracepoint *tp,
* Requires ordering between RCU assign/dereference and
* static call update/call.
*/
- rcu_assign_pointer(tp->funcs, tp_funcs);
- break;
+ fallthrough;
case TP_FUNC_N: /* N->N+1 (N>1) */
rcu_assign_pointer(tp->funcs, tp_funcs);
+ /*
+ * Make sure static func never uses incorrect data after a
+ * N->...->2->1 (N>1) transition sequence.
+ */
+ if (tp_funcs[0].data != old[0].data)
+ tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1);
break;
default:
WARN_ON_ONCE(1);
@@ -372,24 +420,23 @@ static int tracepoint_remove_func(struct tracepoint *tp,
/* Both iterator and static call handle NULL tp->funcs */
rcu_assign_pointer(tp->funcs, NULL);
/*
- * Make sure new func never uses old data after a 1->0->1
- * transition sequence.
- * Considering that transition 0->1 is the common case
- * and don't have rcu-sync, issue rcu-sync after
- * transition 1->0 to break that sequence by waiting for
- * readers to be quiescent.
+ * Make sure new static func never uses old data after a
+ * 1->0->1 transition sequence.
*/
- tracepoint_synchronize_unregister();
+ tp_rcu_get_state(TP_TRANSITION_SYNC_1_0_1);
break;
case TP_FUNC_1: /* 2->1 */
rcu_assign_pointer(tp->funcs, tp_funcs);
/*
- * On 2->1 transition, RCU sync is needed before setting
- * static call to first callback, because the observer
- * may have loaded any prior tp->funcs after the last one
- * associated with an rcu-sync.
+ * Make sure static func never uses incorrect data after a
+ * N->...->2->1 (N>2) transition sequence. If the first
+ * element's data has changed, then force the synchronization
+ * to prevent current readers that have loaded the old data
+ * from calling the new function.
*/
- tracepoint_synchronize_unregister();
+ if (tp_funcs[0].data != old[0].data)
+ tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1);
+ tp_rcu_cond_sync(TP_TRANSITION_SYNC_N_2_1);
/* Set static call to first function */
tracepoint_update_call(tp, tp_funcs);
break;
@@ -397,6 +444,12 @@ static int tracepoint_remove_func(struct tracepoint *tp,
fallthrough;
case TP_FUNC_N:
rcu_assign_pointer(tp->funcs, tp_funcs);
+ /*
+ * Make sure static func never uses incorrect data after a
+ * N->...->2->1 (N>2) transition sequence.
+ */
+ if (tp_funcs[0].data != old[0].data)
+ tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1);
break;
default:
WARN_ON_ONCE(1);
One cannot ftrace functions used to setup ftrace. Doing so leads to a
racy kernel panic as observed by users on SiFive HiFive Unmatched
boards with Ubuntu kernels.
This has been debugged and fixed in v5.12 kernels by ensuring that all
sbi functions are excluded from ftrace.
Link: https://forums.sifive.com/t/u-boot-says-unhandled-exception-illegal-instruc…
BugLink: https://bugs.launchpad.net/bugs/1934548
Guo Ren (2):
riscv: Fixup wrong ftrace remove cflag
riscv: Fixup patch_text panic in ftrace
arch/riscv/kernel/Makefile | 5 +++--
arch/riscv/mm/Makefile | 3 ++-
2 files changed, 5 insertions(+), 3 deletions(-)
--
2.30.2
During tracee-ebpf regression tests, it was discovered that a CO-RE capable
eBPF program, that relied on a kconfig BTF extern, could not be loaded with
the following error:
libbpf: prog 'tracepoint__raw_syscalls__sys_enter': failed to attach to raw
tracepoint 'sys_enter': Invalid argument
That happened because the CONFIG_ARCH_HAS_SYSCALL_WRAPPER variable had the
wrong value, despite kconfig map existing, misleading the eBPF program
execution (which would then have different pointers, not accepted by the
verifier during load time).
I got the patch proposed here by bisecting upstream tree with the testcase
just described. I kindly ask you to include this patch in the LTS v5.4.x
series so CO-RE (Compile Once - Run Everywhere) eBPF programs, relying in
kconfig settings, can be correctly loaded in kernel series v5.4.
Link: https://github.com/aquasecurity/tracee/issues/851#issuecomment-903074596
I have tested latest 5.4 stable tree with this patch and it fixes the issue.
-rafaeldtinoco
A bad "Fixes" SHA in mainline 7387a72c5f8 references a non-public
SHA, instead of referencing f8dd60de1948 -- see:
https://lore.kernel.org/lkml/20210817075644.0b5123d2@canb.auug.org.au/
This matters to -stable since the broken commit is here:
stable-queue$git grep -l f8dd60de1948
releases/5.10.56/tipc-fix-implicit-connect-for-syn.patch
releases/5.13.8/tipc-fix-implicit-connect-for-syn.patch
and hence those releases will need mainline 7387a72c5f8 applied but
I suspect automatic Fixes: parsing won't "see" it.
Thanks,
Paul.
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 4e9655763b82a91e4c341835bb504a2b1590f984 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu(a)suse.com>
Date: Wed, 25 Aug 2021 13:41:42 +0800
Subject: [PATCH] Revert "btrfs: compression: don't try to compress if we don't
have enough pages"
This reverts commit f2165627319ffd33a6217275e5690b1ab5c45763.
[BUG]
It's no longer possible to create compressed inline extent after commit
f2165627319f ("btrfs: compression: don't try to compress if we don't
have enough pages").
[CAUSE]
For compression code, there are several possible reasons we have a range
that needs to be compressed while it's no more than one page.
- Compressed inline write
The data is always smaller than one sector and the test lacks the
condition to properly recognize a non-inline extent.
- Compressed subpage write
For the incoming subpage compressed write support, we require page
alignment of the delalloc range.
And for 64K page size, we can compress just one page into smaller
sectors.
For those reasons, the requirement for the data to be more than one page
is not correct, and is already causing regression for compressed inline
data writeback. The idea of skipping one page to avoid wasting CPU time
could be revisited in the future.
[FIX]
Fix it by reverting the offending commit.
Reported-by: Zygo Blaxell <ce3g8jdj(a)umail.furryterror.org>
Link: https://lore.kernel.org/linux-btrfs/afa2742.c084f5d6.17b6b08dffc@tnonline.n…
Fixes: f2165627319f ("btrfs: compression: don't try to compress if we don't have enough pages")
CC: stable(a)vger.kernel.org # 4.4+
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 06f9f167222b..bd5689fa290e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -629,7 +629,7 @@ static noinline int compress_file_range(struct async_chunk *async_chunk)
* inode has not been flagged as nocompress. This flag can
* change at any time if we discover bad compression ratios.
*/
- if (nr_pages > 1 && inode_need_compress(BTRFS_I(inode), start, end)) {
+ if (inode_need_compress(BTRFS_I(inode), start, end)) {
WARN_ON(pages);
pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
if (!pages) {
From: Frieder Schrempf <frieder.schrempf(a)kontron.de>
The new generic NAND ECC framework stores the configuration and
requirements in separate places since commit 93ef92f6f422 ("mtd: nand: Use
the new generic ECC object"). In 5.10.x The SPI NAND layer still uses only
the requirements to track the ECC properties. This mismatch leads to
values of zero being used for ECC strength and step_size in the SPI NAND
layer wherever nanddev_get_ecc_conf() is used and therefore breaks the SPI
NAND on-die ECC support in 5.10.x.
By using nanddev_get_ecc_requirements() instead of nanddev_get_ecc_conf()
for SPI NAND, we make sure that the correct parameters for the detected
chip are used. In later versions (5.11.x) this is fixed anyway with the
implementation of the SPI NAND on-die ECC engine.
Cc: stable(a)vger.kernel.org # 5.10.x
Reported-by: voice INTER connect GmbH <developer(a)voiceinterconnect.de>
Signed-off-by: Frieder Schrempf <frieder.schrempf(a)kontron.de>
Acked-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
---
Changes in v2:
* Fix checkpatch error/warnings for commit message style
* Add Miquel's A-b tag
---
drivers/mtd/nand/spi/core.c | 6 +++---
drivers/mtd/nand/spi/macronix.c | 6 +++---
drivers/mtd/nand/spi/toshiba.c | 6 +++---
3 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c
index 558d8a14810b..8794a1f6eacd 100644
--- a/drivers/mtd/nand/spi/core.c
+++ b/drivers/mtd/nand/spi/core.c
@@ -419,7 +419,7 @@ static int spinand_check_ecc_status(struct spinand_device *spinand, u8 status)
* fixed, so let's return the maximum possible value so that
* wear-leveling layers move the data immediately.
*/
- return nanddev_get_ecc_conf(nand)->strength;
+ return nanddev_get_ecc_requirements(nand)->strength;
case STATUS_ECC_UNCOR_ERROR:
return -EBADMSG;
@@ -1090,8 +1090,8 @@ static int spinand_init(struct spinand_device *spinand)
mtd->oobavail = ret;
/* Propagate ECC information to mtd_info */
- mtd->ecc_strength = nanddev_get_ecc_conf(nand)->strength;
- mtd->ecc_step_size = nanddev_get_ecc_conf(nand)->step_size;
+ mtd->ecc_strength = nanddev_get_ecc_requirements(nand)->strength;
+ mtd->ecc_step_size = nanddev_get_ecc_requirements(nand)->step_size;
return 0;
diff --git a/drivers/mtd/nand/spi/macronix.c b/drivers/mtd/nand/spi/macronix.c
index 8e801e4c3a00..cd7a9cacc3fb 100644
--- a/drivers/mtd/nand/spi/macronix.c
+++ b/drivers/mtd/nand/spi/macronix.c
@@ -84,11 +84,11 @@ static int mx35lf1ge4ab_ecc_get_status(struct spinand_device *spinand,
* data around if it's not necessary.
*/
if (mx35lf1ge4ab_get_eccsr(spinand, &eccsr))
- return nanddev_get_ecc_conf(nand)->strength;
+ return nanddev_get_ecc_requirements(nand)->strength;
- if (WARN_ON(eccsr > nanddev_get_ecc_conf(nand)->strength ||
+ if (WARN_ON(eccsr > nanddev_get_ecc_requirements(nand)->strength ||
!eccsr))
- return nanddev_get_ecc_conf(nand)->strength;
+ return nanddev_get_ecc_requirements(nand)->strength;
return eccsr;
diff --git a/drivers/mtd/nand/spi/toshiba.c b/drivers/mtd/nand/spi/toshiba.c
index 21fde2875674..6fe7bd2a94d2 100644
--- a/drivers/mtd/nand/spi/toshiba.c
+++ b/drivers/mtd/nand/spi/toshiba.c
@@ -90,12 +90,12 @@ static int tx58cxgxsxraix_ecc_get_status(struct spinand_device *spinand,
* data around if it's not necessary.
*/
if (spi_mem_exec_op(spinand->spimem, &op))
- return nanddev_get_ecc_conf(nand)->strength;
+ return nanddev_get_ecc_requirements(nand)->strength;
mbf >>= 4;
- if (WARN_ON(mbf > nanddev_get_ecc_conf(nand)->strength || !mbf))
- return nanddev_get_ecc_conf(nand)->strength;
+ if (WARN_ON(mbf > nanddev_get_ecc_requirements(nand)->strength || !mbf))
+ return nanddev_get_ecc_requirements(nand)->strength;
return mbf;
--
2.32.0
From: Filipe Manana <fdmanana(a)suse.com>
commit bc0939fcfab0d7efb2ed12896b1af3d819954a14 upstream.
We have a race between marking that an inode needs to be logged, either
at btrfs_set_inode_last_trans() or at btrfs_page_mkwrite(), and between
btrfs_sync_log(). The following steps describe how the race happens.
1) We are at transaction N;
2) Inode I was previously fsynced in the current transaction so it has:
inode->logged_trans set to N;
3) The inode's root currently has:
root->log_transid set to 1
root->last_log_commit set to 0
Which means only one log transaction was committed to far, log
transaction 0. When a log tree is created we set ->log_transid and
->last_log_commit of its parent root to 0 (at btrfs_add_log_tree());
4) One more range of pages is dirtied in inode I;
5) Some task A starts an fsync against some other inode J (same root), and
so it joins log transaction 1.
Before task A calls btrfs_sync_log()...
6) Task B starts an fsync against inode I, which currently has the full
sync flag set, so it starts delalloc and waits for the ordered extent
to complete before calling btrfs_inode_in_log() at btrfs_sync_file();
7) During ordered extent completion we have btrfs_update_inode() called
against inode I, which in turn calls btrfs_set_inode_last_trans(),
which does the following:
spin_lock(&inode->lock);
inode->last_trans = trans->transaction->transid;
inode->last_sub_trans = inode->root->log_transid;
inode->last_log_commit = inode->root->last_log_commit;
spin_unlock(&inode->lock);
So ->last_trans is set to N and ->last_sub_trans set to 1.
But before setting ->last_log_commit...
8) Task A is at btrfs_sync_log():
- it increments root->log_transid to 2
- starts writeback for all log tree extent buffers
- waits for the writeback to complete
- writes the super blocks
- updates root->last_log_commit to 1
It's a lot of slow steps between updating root->log_transid and
root->last_log_commit;
9) The task doing the ordered extent completion, currently at
btrfs_set_inode_last_trans(), then finally runs:
inode->last_log_commit = inode->root->last_log_commit;
spin_unlock(&inode->lock);
Which results in inode->last_log_commit being set to 1.
The ordered extent completes;
10) Task B is resumed, and it calls btrfs_inode_in_log() which returns
true because we have all the following conditions met:
inode->logged_trans == N which matches fs_info->generation &&
inode->last_subtrans (1) <= inode->last_log_commit (1) &&
inode->last_subtrans (1) <= root->last_log_commit (1) &&
list inode->extent_tree.modified_extents is empty
And as a consequence we return without logging the inode, so the
existing logged version of the inode does not point to the extent
that was written after the previous fsync.
It should be impossible in practice for one task be able to do so much
progress in btrfs_sync_log() while another task is at
btrfs_set_inode_last_trans() right after it reads root->log_transid and
before it reads root->last_log_commit. Even if kernel preemption is enabled
we know the task at btrfs_set_inode_last_trans() can not be preempted
because it is holding the inode's spinlock.
However there is another place where we do the same without holding the
spinlock, which is in the memory mapped write path at:
vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
{
(...)
BTRFS_I(inode)->last_trans = fs_info->generation;
BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit;
(...)
So with preemption happening after setting ->last_sub_trans and before
setting ->last_log_commit, it is less of a stretch to have another task
do enough progress at btrfs_sync_log() such that the task doing the memory
mapped write ends up with ->last_sub_trans and ->last_log_commit set to
the same value. It is still a big stretch to get there, as the task doing
btrfs_sync_log() has to start writeback, wait for its completion and write
the super blocks.
So fix this in two different ways:
1) For btrfs_set_inode_last_trans(), simply set ->last_log_commit to the
value of ->last_sub_trans minus 1;
2) For btrfs_page_mkwrite() only set the inode's ->last_sub_trans, just
like we do for buffered and direct writes at btrfs_file_write_iter(),
which is all we need to make sure multiple writes and fsyncs to an
inode in the same transaction never result in an fsync missing that
the inode changed and needs to be logged. Turn this into a helper
function and use it both at btrfs_page_mkwrite() and at
btrfs_file_write_iter() - this also fixes the problem that at
btrfs_page_mkwrite() we were setting those fields without the
protection of the inode's spinlock.
This is an extremely unlikely race to happen in practice.
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: Anand Jain <anand.jain(a)oracle.com>
---
fs/btrfs/btrfs_inode.h | 15 +++++++++++++++
fs/btrfs/file.c | 10 ++--------
fs/btrfs/inode.c | 4 +---
fs/btrfs/transaction.h | 2 +-
4 files changed, 19 insertions(+), 12 deletions(-)
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index f853835c409c..f3ff57b93158 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -268,6 +268,21 @@ static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode,
mod);
}
+/*
+ * Called every time after doing a buffered, direct IO or memory mapped write.
+ *
+ * This is to ensure that if we write to a file that was previously fsynced in
+ * the current transaction, then try to fsync it again in the same transaction,
+ * we will know that there were changes in the file and that it needs to be
+ * logged.
+ */
+static inline void btrfs_set_inode_last_sub_trans(struct btrfs_inode *inode)
+{
+ spin_lock(&inode->lock);
+ inode->last_sub_trans = inode->root->log_transid;
+ spin_unlock(&inode->lock);
+}
+
static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
{
int ret = 0;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 400b0717b9d4..1279359ed172 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2004,14 +2004,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
inode_unlock(inode);
- /*
- * We also have to set last_sub_trans to the current log transid,
- * otherwise subsequent syncs to a file that's been synced in this
- * transaction will appear to have already occurred.
- */
- spin_lock(&BTRFS_I(inode)->lock);
- BTRFS_I(inode)->last_sub_trans = root->log_transid;
- spin_unlock(&BTRFS_I(inode)->lock);
+ btrfs_set_inode_last_sub_trans(BTRFS_I(inode));
+
if (num_written > 0)
num_written = generic_write_sync(iocb, num_written);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b044b1d910de..1117335374ff 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9250,9 +9250,7 @@ vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
set_page_dirty(page);
SetPageUptodate(page);
- BTRFS_I(inode)->last_trans = fs_info->generation;
- BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
- BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit;
+ btrfs_set_inode_last_sub_trans(BTRFS_I(inode));
unlock_extent_cached(io_tree, page_start, page_end, &cached_state);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index d8a7d460e436..cbede328bda5 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -160,7 +160,7 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->last_trans = trans->transaction->transid;
BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
- BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit;
+ BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans - 1;
spin_unlock(&BTRFS_I(inode)->lock);
}
--
2.31.1
Joe reports that using a statically allocated buffer for converting CPER
error records into human readable text is probably a bad idea. Even
though we are not aware of any actual issues, a stack buffer is clearly
a better choice here anyway, so let's move the buffer into the stack
frames of the two functions that refer to it.
Cc: <stable(a)vger.kernel.org>
Reported-by: Joe Perches <joe(a)perches.com>
Signed-off-by: Ard Biesheuvel <ardb(a)kernel.org>
---
drivers/firmware/efi/cper.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index 73bdbd207e7a..6ec8edec6329 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -25,8 +25,6 @@
#include <acpi/ghes.h>
#include <ras/ras_event.h>
-static char rcd_decode_str[CPER_REC_LEN];
-
/*
* CPER record ID need to be unique even after reboot, because record
* ID is used as index for ERST storage, while CPER records from
@@ -312,6 +310,7 @@ const char *cper_mem_err_unpack(struct trace_seq *p,
struct cper_mem_err_compact *cmem)
{
const char *ret = trace_seq_buffer_ptr(p);
+ char rcd_decode_str[CPER_REC_LEN];
if (cper_mem_err_location(cmem, rcd_decode_str))
trace_seq_printf(p, "%s", rcd_decode_str);
@@ -326,6 +325,7 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem,
int len)
{
struct cper_mem_err_compact cmem;
+ char rcd_decode_str[CPER_REC_LEN];
/* Don't trust UEFI 2.1/2.2 structure with bad validation bits */
if (len == sizeof(struct cper_sec_mem_err_old) &&
--
2.30.2