In case of the COW file, new updates and GC writes are already
separated to page caches of the atomic file and COW file. As some cases
that use the meta inode for GC, there are some race issues between a
foreground thread and GC thread.
To handle them, we need to take care when to invalidate and wait
writeback of GC pages in COW files as the case of using the meta inode.
Also, a pointer from the COW inode to the original inode is required to
check the state of original pages.
For the former, we can solve the problem by using the meta inode for GC
of COW files. Then let's get a page from the original inode in
move_data_block when GCing the COW file to avoid race condition.
Fixes: 3db1de0e582c ("f2fs: change the current atomic write way")
Cc: stable(a)vger.kernel.org #v5.19+
Reviewed-by: Sungjong Seo <sj1557.seo(a)samsung.com>
Reviewed-by: Yeongjin Gil <youngjin.gil(a)samsung.com>
Signed-off-by: Sunmin Jeong <s_min.jeong(a)samsung.com>
---
fs/f2fs/data.c | 2 +-
fs/f2fs/f2fs.h | 7 ++++++-
fs/f2fs/file.c | 3 +++
fs/f2fs/gc.c | 12 ++++++++++--
fs/f2fs/inline.c | 2 +-
fs/f2fs/inode.c | 3 ++-
6 files changed, 23 insertions(+), 6 deletions(-)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 05158f89ef32..90ff0f6f7f7f 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2651,7 +2651,7 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
return true;
if (IS_NOQUOTA(inode))
return true;
- if (f2fs_is_atomic_file(inode))
+ if (f2fs_used_in_atomic_write(inode))
return true;
/* rewrite low ratio compress data w/ OPU mode to avoid fragmentation */
if (f2fs_compressed_file(inode) &&
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 59c5117e54b1..4f9fd1c1d024 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -4267,9 +4267,14 @@ static inline bool f2fs_post_read_required(struct inode *inode)
f2fs_compressed_file(inode);
}
+static inline bool f2fs_used_in_atomic_write(struct inode *inode)
+{
+ return f2fs_is_atomic_file(inode) || f2fs_is_cow_file(inode);
+}
+
static inline bool f2fs_meta_inode_gc_required(struct inode *inode)
{
- return f2fs_post_read_required(inode) || f2fs_is_atomic_file(inode);
+ return f2fs_post_read_required(inode) || f2fs_used_in_atomic_write(inode);
}
/*
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 25b119cf3499..c9f0ba658cfd 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2116,6 +2116,9 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
set_inode_flag(fi->cow_inode, FI_COW_FILE);
clear_inode_flag(fi->cow_inode, FI_INLINE_DATA);
+
+ /* Set the COW inode's cow_inode to the atomic inode */
+ F2FS_I(fi->cow_inode)->cow_inode = inode;
} else {
/* Reuse the already created COW inode */
ret = f2fs_do_truncate_blocks(fi->cow_inode, 0, true);
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 136b9e8180a3..76854e732b35 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1188,7 +1188,11 @@ static int ra_data_block(struct inode *inode, pgoff_t index)
};
int err;
- page = f2fs_grab_cache_page(mapping, index, true);
+ if (f2fs_is_cow_file(inode))
+ page = f2fs_grab_cache_page(F2FS_I(inode)->cow_inode->i_mapping,
+ index, true);
+ else
+ page = f2fs_grab_cache_page(mapping, index, true);
if (!page)
return -ENOMEM;
@@ -1287,7 +1291,11 @@ static int move_data_block(struct inode *inode, block_t bidx,
CURSEG_ALL_DATA_ATGC : CURSEG_COLD_DATA;
/* do not read out */
- page = f2fs_grab_cache_page(inode->i_mapping, bidx, false);
+ if (f2fs_is_cow_file(inode))
+ page = f2fs_grab_cache_page(F2FS_I(inode)->cow_inode->i_mapping,
+ bidx, false);
+ else
+ page = f2fs_grab_cache_page(inode->i_mapping, bidx, false);
if (!page)
return -ENOMEM;
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index ac00423f117b..0186ec049db6 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -16,7 +16,7 @@
static bool support_inline_data(struct inode *inode)
{
- if (f2fs_is_atomic_file(inode))
+ if (f2fs_used_in_atomic_write(inode))
return false;
if (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))
return false;
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index c26effdce9aa..c810304e2681 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -807,8 +807,9 @@ void f2fs_evict_inode(struct inode *inode)
f2fs_abort_atomic_write(inode, true);
- if (fi->cow_inode) {
+ if (fi->cow_inode && f2fs_is_cow_file(fi->cow_inode)) {
clear_inode_flag(fi->cow_inode, FI_COW_FILE);
+ F2FS_I(fi->cow_inode)->cow_inode = NULL;
iput(fi->cow_inode);
fi->cow_inode = NULL;
}
--
2.25.1
The page cache of the atomic file keeps new data pages which will be
stored in the COW file. It can also keep old data pages when GCing the
atomic file. In this case, new data can be overwritten by old data if a
GC thread sets the old data page as dirty after new data page was
evicted.
Also, since all writes to the atomic file are redirected to COW inodes,
GC for the atomic file is not working well as below.
f2fs_gc(gc_type=FG_GC)
- select A as a victim segment
do_garbage_collect
- iget atomic file's inode for block B
move_data_page
f2fs_do_write_data_page
- use dn of cow inode
- set fio->old_blkaddr from cow inode
- seg_freed is 0 since block B is still valid
- goto gc_more and A is selected as victim again
To solve the problem, let's separate GC writes and updates in the atomic
file by using the meta inode for GC writes.
Fixes: 3db1de0e582c ("f2fs: change the current atomic write way")
Cc: stable(a)vger.kernel.org #v5.19+
Reviewed-by: Sungjong Seo <sj1557.seo(a)samsung.com>
Reviewed-by: Yeongjin Gil <youngjin.gil(a)samsung.com>
Signed-off-by: Sunmin Jeong <s_min.jeong(a)samsung.com>
---
fs/f2fs/f2fs.h | 5 +++++
fs/f2fs/gc.c | 6 +++---
fs/f2fs/segment.c | 4 ++--
3 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index a000cb024dbe..59c5117e54b1 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -4267,6 +4267,11 @@ static inline bool f2fs_post_read_required(struct inode *inode)
f2fs_compressed_file(inode);
}
+static inline bool f2fs_meta_inode_gc_required(struct inode *inode)
+{
+ return f2fs_post_read_required(inode) || f2fs_is_atomic_file(inode);
+}
+
/*
* compress.c
*/
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index a079eebfb080..136b9e8180a3 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1580,7 +1580,7 @@ static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
start_bidx = f2fs_start_bidx_of_node(nofs, inode) +
ofs_in_node;
- if (f2fs_post_read_required(inode)) {
+ if (f2fs_meta_inode_gc_required(inode)) {
int err = ra_data_block(inode, start_bidx);
f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
@@ -1631,7 +1631,7 @@ static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
start_bidx = f2fs_start_bidx_of_node(nofs, inode)
+ ofs_in_node;
- if (f2fs_post_read_required(inode))
+ if (f2fs_meta_inode_gc_required(inode))
err = move_data_block(inode, start_bidx,
gc_type, segno, off);
else
@@ -1639,7 +1639,7 @@ static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
segno, off);
if (!err && (gc_type == FG_GC ||
- f2fs_post_read_required(inode)))
+ f2fs_meta_inode_gc_required(inode)))
submitted++;
if (locked) {
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 7e47b8054413..b55fc4bd416a 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -3823,7 +3823,7 @@ void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct page *cpage;
- if (!f2fs_post_read_required(inode))
+ if (!f2fs_meta_inode_gc_required(inode))
return;
if (!__is_valid_data_blkaddr(blkaddr))
@@ -3842,7 +3842,7 @@ void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
block_t i;
- if (!f2fs_post_read_required(inode))
+ if (!f2fs_meta_inode_gc_required(inode))
return;
for (i = 0; i < len; i++)
--
2.25.1
The quilt patch titled
Subject: hugetlb: force allocating surplus hugepages on mempolicy allowed nodes
has been removed from the -mm tree. Its filename was
hugetlb-force-allocating-surplus-hugepages-on-mempolicy-allowed-nodes.patch
This patch was dropped because it was merged into the mm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Aristeu Rozanski <aris(a)redhat.com>
Subject: hugetlb: force allocating surplus hugepages on mempolicy allowed nodes
Date: Fri, 21 Jun 2024 15:00:50 -0400
When trying to allocate a hugepage with no reserved ones free, it may be
allowed in case a number of overcommit hugepages was configured (using
/proc/sys/vm/nr_overcommit_hugepages) and that number wasn't reached.
This allows for a behavior of having extra hugepages allocated
dynamically, if there're resources for it. Some sysadmins even prefer not
reserving any hugepages and setting a big number of overcommit hugepages.
But while attempting to allocate overcommit hugepages in a multi node
system (either NUMA or mempolicy/cpuset) said allocations might randomly
fail even when there're resources available for the allocation.
This happens due to allowed_mems_nr() only accounting for the number of
free hugepages in the nodes the current process belongs to and the surplus
hugepage allocation is done so it can be allocated in any node. In case
one or more of the requested surplus hugepages are allocated in a
different node, the whole allocation will fail due allowed_mems_nr()
returning a lower value.
So allocate surplus hugepages in one of the nodes the current process
belongs to.
Easy way to reproduce this issue is to use a 2+ NUMA nodes system:
# echo 0 >/proc/sys/vm/nr_hugepages
# echo 1 >/proc/sys/vm/nr_overcommit_hugepages
# numactl -m0 ./tools/testing/selftests/mm/map_hugetlb 2
Repeating the execution of map_hugetlb test application will eventually
fail when the hugepage ends up allocated in a different node.
[aris(a)ruivo.org: v2]
Link: https://lkml.kernel.org/r/20240701212343.GG844599@cathedrallabs.org
Link: https://lkml.kernel.org/r/20240621190050.mhxwb65zn37doegp@redhat.com
Signed-off-by: Aristeu Rozanski <aris(a)redhat.com>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Aristeu Rozanski <aris(a)ruivo.org>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Vishal Moola <vishal.moola(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hugetlb.c | 47 ++++++++++++++++++++++++++++-------------------
1 file changed, 28 insertions(+), 19 deletions(-)
--- a/mm/hugetlb.c~hugetlb-force-allocating-surplus-hugepages-on-mempolicy-allowed-nodes
+++ a/mm/hugetlb.c
@@ -2620,6 +2620,23 @@ struct folio *alloc_hugetlb_folio_nodema
return alloc_migrate_hugetlb_folio(h, gfp_mask, preferred_nid, nmask);
}
+static nodemask_t *policy_mbind_nodemask(gfp_t gfp)
+{
+#ifdef CONFIG_NUMA
+ struct mempolicy *mpol = get_task_policy(current);
+
+ /*
+ * Only enforce MPOL_BIND policy which overlaps with cpuset policy
+ * (from policy_nodemask) specifically for hugetlb case
+ */
+ if (mpol->mode == MPOL_BIND &&
+ (apply_policy_zone(mpol, gfp_zone(gfp)) &&
+ cpuset_nodemask_valid_mems_allowed(&mpol->nodes)))
+ return &mpol->nodes;
+#endif
+ return NULL;
+}
+
/*
* Increase the hugetlb pool such that it can accommodate a reservation
* of size 'delta'.
@@ -2633,6 +2650,8 @@ static int gather_surplus_pages(struct h
long i;
long needed, allocated;
bool alloc_ok = true;
+ int node;
+ nodemask_t *mbind_nodemask = policy_mbind_nodemask(htlb_alloc_mask(h));
lockdep_assert_held(&hugetlb_lock);
needed = (h->resv_huge_pages + delta) - h->free_huge_pages;
@@ -2647,8 +2666,15 @@ static int gather_surplus_pages(struct h
retry:
spin_unlock_irq(&hugetlb_lock);
for (i = 0; i < needed; i++) {
- folio = alloc_surplus_hugetlb_folio(h, htlb_alloc_mask(h),
- NUMA_NO_NODE, NULL);
+ folio = NULL;
+ for_each_node_mask(node, cpuset_current_mems_allowed) {
+ if (!mbind_nodemask || node_isset(node, *mbind_nodemask)) {
+ folio = alloc_surplus_hugetlb_folio(h, htlb_alloc_mask(h),
+ node, NULL);
+ if (folio)
+ break;
+ }
+ }
if (!folio) {
alloc_ok = false;
break;
@@ -4878,23 +4904,6 @@ static int __init default_hugepagesz_set
}
__setup("default_hugepagesz=", default_hugepagesz_setup);
-static nodemask_t *policy_mbind_nodemask(gfp_t gfp)
-{
-#ifdef CONFIG_NUMA
- struct mempolicy *mpol = get_task_policy(current);
-
- /*
- * Only enforce MPOL_BIND policy which overlaps with cpuset policy
- * (from policy_nodemask) specifically for hugetlb case
- */
- if (mpol->mode == MPOL_BIND &&
- (apply_policy_zone(mpol, gfp_zone(gfp)) &&
- cpuset_nodemask_valid_mems_allowed(&mpol->nodes)))
- return &mpol->nodes;
-#endif
- return NULL;
-}
-
static unsigned int allowed_mems_nr(struct hstate *h)
{
int node;
_
Patches currently in -mm which might be from aris(a)redhat.com are
The quilt patch titled
Subject: hugetlb: force allocating surplus hugepages on mempolicy allowed nodes
has been removed from the -mm tree. Its filename was
hugetlb-force-allocating-surplus-hugepages-on-mempolicy-allowed-nodes-v2.patch
This patch was dropped because it was folded into hugetlb-force-allocating-surplus-hugepages-on-mempolicy-allowed-nodes.patch
------------------------------------------------------
From: Aristeu Rozanski <aris(a)ruivo.org>
Subject: hugetlb: force allocating surplus hugepages on mempolicy allowed nodes
Date: Mon, 1 Jul 2024 17:23:43 -0400
v2: - attempt to make the description more clear
- prevent uninitialized usage of folio in case current process isn't
part of any nodes with memory
Link: https://lkml.kernel.org/r/20240701212343.GG844599@cathedrallabs.org
Signed-off-by: Aristeu Rozanski <aris(a)ruivo.org>
Cc: Vishal Moola <vishal.moola(a)gmail.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Aristeu Rozanski <aris(a)redhat.com>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hugetlb.c | 1 +
1 file changed, 1 insertion(+)
--- a/mm/hugetlb.c~hugetlb-force-allocating-surplus-hugepages-on-mempolicy-allowed-nodes-v2
+++ a/mm/hugetlb.c
@@ -2631,6 +2631,7 @@ static int gather_surplus_pages(struct h
retry:
spin_unlock_irq(&hugetlb_lock);
for (i = 0; i < needed; i++) {
+ folio = NULL;
for_each_node_mask(node, cpuset_current_mems_allowed) {
if (!mbind_nodemask || node_isset(node, *mbind_nodemask)) {
folio = alloc_surplus_hugetlb_folio(h, htlb_alloc_mask(h),
_
Patches currently in -mm which might be from aris(a)ruivo.org are
hugetlb-force-allocating-surplus-hugepages-on-mempolicy-allowed-nodes.patch
Hi Linus,
This PR fixes a few kselftests [1]. This has been in linux-next for a week and
rebased to add Mark Brown's Tested-by. The race condition found while writing
this fix is not new and seems specific to UML's hostfs (I also tested against
ext4 and btrfs without being able to trigger this issue).
Feel free to take this PR if you see fit.
Regards,
Mickaël
[1] https://lore.kernel.org/r/9341d4db-5e21-418c-bf9e-9ae2da7877e1@sirena.org.uk
--
The following changes since commit f2661062f16b2de5d7b6a5c42a9a5c96326b8454:
Linux 6.10-rc5 (2024-06-23 17:08:54 -0400)
are available in the Git repository at:
https://git.kernel.org/pub/scm/linux/kernel/git/mic/linux.git tags/kselftest-fix-2024-07-04
for you to fetch changes up to 130e42806773013e9cf32d211922c935ae2df86c:
selftests/harness: Fix tests timeout and race condition (2024-06-28 16:06:03 +0200)
----------------------------------------------------------------
Fix Kselftests timeout and race condition
----------------------------------------------------------------
Mickaël Salaün (1):
selftests/harness: Fix tests timeout and race condition
tools/testing/selftests/kselftest_harness.h | 43 ++++++++++++++++-------------
1 file changed, 24 insertions(+), 19 deletions(-)
Unless tpm_chip_bootstrap() was called by the driver, !chip->auth can
cause a null derefence in tpm_buf_hmac_session*(). Thus, address
!chip->auth in tpm_buf_hmac_session*() and remove the fallback
implementation for !TCG_TPM2_HMAC.
Cc: stable(a)vger.kernel.org # v6.9+
Reported-by: Stefan Berger <stefanb(a)linux.ibm.com>
Closes: https://lore.kernel.org/linux-integrity/20240617193408.1234365-1-stefanb@li…
Fixes: 1085b8276bb4 ("tpm: Add the rest of the session HMAC API")
Tested-by: Michael Ellerman <mpe(a)ellerman.id.au> # ppc
Signed-off-by: Jarkko Sakkinen <jarkko(a)kernel.org>
---
v4:
* Address:
https://lore.kernel.org/linux-integrity/CAHk-=wiM=Cyw-07EkbAH66pE50VzJiT3bV…
* Added tested-by from Michael Ellerman.
v3:
* Address:
https://lore.kernel.org/linux-integrity/922603265d61011dbb23f18a04525ae973b…
v2:
* Use auth in place of chip->auth.
---
drivers/char/tpm/tpm2-sessions.c | 186 ++++++++++++++++++-------------
include/linux/tpm.h | 68 ++++-------
2 files changed, 130 insertions(+), 124 deletions(-)
diff --git a/drivers/char/tpm/tpm2-sessions.c b/drivers/char/tpm/tpm2-sessions.c
index b3ed35e7ec00..2281d55df545 100644
--- a/drivers/char/tpm/tpm2-sessions.c
+++ b/drivers/char/tpm/tpm2-sessions.c
@@ -272,6 +272,110 @@ void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf,
}
EXPORT_SYMBOL_GPL(tpm_buf_append_name);
+/**
+ * tpm_buf_append_hmac_session() - Append a TPM session element
+ * @chip: the TPM chip structure
+ * @buf: The buffer to be appended
+ * @attributes: The session attributes
+ * @passphrase: The session authority (NULL if none)
+ * @passphrase_len: The length of the session authority (0 if none)
+ *
+ * This fills in a session structure in the TPM command buffer, except
+ * for the HMAC which cannot be computed until the command buffer is
+ * complete. The type of session is controlled by the @attributes,
+ * the main ones of which are TPM2_SA_CONTINUE_SESSION which means the
+ * session won't terminate after tpm_buf_check_hmac_response(),
+ * TPM2_SA_DECRYPT which means this buffers first parameter should be
+ * encrypted with a session key and TPM2_SA_ENCRYPT, which means the
+ * response buffer's first parameter needs to be decrypted (confusing,
+ * but the defines are written from the point of view of the TPM).
+ *
+ * Any session appended by this command must be finalized by calling
+ * tpm_buf_fill_hmac_session() otherwise the HMAC will be incorrect
+ * and the TPM will reject the command.
+ *
+ * As with most tpm_buf operations, success is assumed because failure
+ * will be caused by an incorrect programming model and indicated by a
+ * kernel message.
+ */
+void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf,
+ u8 attributes, u8 *passphrase,
+ int passphrase_len)
+{
+#ifdef CONFIG_TCG_TPM2_HMAC
+ u8 nonce[SHA256_DIGEST_SIZE];
+ struct tpm2_auth *auth;
+ u32 len;
+#endif
+
+ if (!tpm2_chip_auth(chip)) {
+ /* offset tells us where the sessions area begins */
+ int offset = buf->handles * 4 + TPM_HEADER_SIZE;
+ u32 len = 9 + passphrase_len;
+
+ if (tpm_buf_length(buf) != offset) {
+ /* not the first session so update the existing length */
+ len += get_unaligned_be32(&buf->data[offset]);
+ put_unaligned_be32(len, &buf->data[offset]);
+ } else {
+ tpm_buf_append_u32(buf, len);
+ }
+ /* auth handle */
+ tpm_buf_append_u32(buf, TPM2_RS_PW);
+ /* nonce */
+ tpm_buf_append_u16(buf, 0);
+ /* attributes */
+ tpm_buf_append_u8(buf, 0);
+ /* passphrase */
+ tpm_buf_append_u16(buf, passphrase_len);
+ tpm_buf_append(buf, passphrase, passphrase_len);
+ return;
+ }
+
+#ifdef CONFIG_TCG_TPM2_HMAC
+ /*
+ * The Architecture Guide requires us to strip trailing zeros
+ * before computing the HMAC
+ */
+ while (passphrase && passphrase_len > 0 && passphrase[passphrase_len - 1] == '\0')
+ passphrase_len--;
+
+ auth = chip->auth;
+ auth->attrs = attributes;
+ auth->passphrase_len = passphrase_len;
+ if (passphrase_len)
+ memcpy(auth->passphrase, passphrase, passphrase_len);
+
+ if (auth->session != tpm_buf_length(buf)) {
+ /* we're not the first session */
+ len = get_unaligned_be32(&buf->data[auth->session]);
+ if (4 + len + auth->session != tpm_buf_length(buf)) {
+ WARN(1, "session length mismatch, cannot append");
+ return;
+ }
+
+ /* add our new session */
+ len += 9 + 2 * SHA256_DIGEST_SIZE;
+ put_unaligned_be32(len, &buf->data[auth->session]);
+ } else {
+ tpm_buf_append_u32(buf, 9 + 2 * SHA256_DIGEST_SIZE);
+ }
+
+ /* random number for our nonce */
+ get_random_bytes(nonce, sizeof(nonce));
+ memcpy(auth->our_nonce, nonce, sizeof(nonce));
+ tpm_buf_append_u32(buf, auth->handle);
+ /* our new nonce */
+ tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE);
+ tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE);
+ tpm_buf_append_u8(buf, auth->attrs);
+ /* and put a placeholder for the hmac */
+ tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE);
+ tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE);
+#endif
+}
+EXPORT_SYMBOL_GPL(tpm_buf_append_hmac_session);
+
#ifdef CONFIG_TCG_TPM2_HMAC
static int tpm2_create_primary(struct tpm_chip *chip, u32 hierarchy,
@@ -457,82 +561,6 @@ static void tpm_buf_append_salt(struct tpm_buf *buf, struct tpm_chip *chip)
crypto_free_kpp(kpp);
}
-/**
- * tpm_buf_append_hmac_session() - Append a TPM session element
- * @chip: the TPM chip structure
- * @buf: The buffer to be appended
- * @attributes: The session attributes
- * @passphrase: The session authority (NULL if none)
- * @passphrase_len: The length of the session authority (0 if none)
- *
- * This fills in a session structure in the TPM command buffer, except
- * for the HMAC which cannot be computed until the command buffer is
- * complete. The type of session is controlled by the @attributes,
- * the main ones of which are TPM2_SA_CONTINUE_SESSION which means the
- * session won't terminate after tpm_buf_check_hmac_response(),
- * TPM2_SA_DECRYPT which means this buffers first parameter should be
- * encrypted with a session key and TPM2_SA_ENCRYPT, which means the
- * response buffer's first parameter needs to be decrypted (confusing,
- * but the defines are written from the point of view of the TPM).
- *
- * Any session appended by this command must be finalized by calling
- * tpm_buf_fill_hmac_session() otherwise the HMAC will be incorrect
- * and the TPM will reject the command.
- *
- * As with most tpm_buf operations, success is assumed because failure
- * will be caused by an incorrect programming model and indicated by a
- * kernel message.
- */
-void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf,
- u8 attributes, u8 *passphrase,
- int passphrase_len)
-{
- u8 nonce[SHA256_DIGEST_SIZE];
- u32 len;
- struct tpm2_auth *auth = chip->auth;
-
- /*
- * The Architecture Guide requires us to strip trailing zeros
- * before computing the HMAC
- */
- while (passphrase && passphrase_len > 0
- && passphrase[passphrase_len - 1] == '\0')
- passphrase_len--;
-
- auth->attrs = attributes;
- auth->passphrase_len = passphrase_len;
- if (passphrase_len)
- memcpy(auth->passphrase, passphrase, passphrase_len);
-
- if (auth->session != tpm_buf_length(buf)) {
- /* we're not the first session */
- len = get_unaligned_be32(&buf->data[auth->session]);
- if (4 + len + auth->session != tpm_buf_length(buf)) {
- WARN(1, "session length mismatch, cannot append");
- return;
- }
-
- /* add our new session */
- len += 9 + 2 * SHA256_DIGEST_SIZE;
- put_unaligned_be32(len, &buf->data[auth->session]);
- } else {
- tpm_buf_append_u32(buf, 9 + 2 * SHA256_DIGEST_SIZE);
- }
-
- /* random number for our nonce */
- get_random_bytes(nonce, sizeof(nonce));
- memcpy(auth->our_nonce, nonce, sizeof(nonce));
- tpm_buf_append_u32(buf, auth->handle);
- /* our new nonce */
- tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE);
- tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE);
- tpm_buf_append_u8(buf, auth->attrs);
- /* and put a placeholder for the hmac */
- tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE);
- tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE);
-}
-EXPORT_SYMBOL(tpm_buf_append_hmac_session);
-
/**
* tpm_buf_fill_hmac_session() - finalize the session HMAC
* @chip: the TPM chip structure
@@ -563,6 +591,9 @@ void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf)
u8 cphash[SHA256_DIGEST_SIZE];
struct sha256_state sctx;
+ if (!auth)
+ return;
+
/* save the command code in BE format */
auth->ordinal = head->ordinal;
@@ -721,6 +752,9 @@ int tpm_buf_check_hmac_response(struct tpm_chip *chip, struct tpm_buf *buf,
u32 cc = be32_to_cpu(auth->ordinal);
int parm_len, len, i, handles;
+ if (!auth)
+ return rc;
+
if (auth->session >= TPM_HEADER_SIZE) {
WARN(1, "tpm session not filled correctly\n");
goto out;
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 4d3071e885a0..e93ee8d936a9 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -502,10 +502,6 @@ static inline struct tpm2_auth *tpm2_chip_auth(struct tpm_chip *chip)
void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf,
u32 handle, u8 *name);
-
-#ifdef CONFIG_TCG_TPM2_HMAC
-
-int tpm2_start_auth_session(struct tpm_chip *chip);
void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf,
u8 attributes, u8 *passphrase,
int passphraselen);
@@ -515,9 +511,27 @@ static inline void tpm_buf_append_hmac_session_opt(struct tpm_chip *chip,
u8 *passphrase,
int passphraselen)
{
- tpm_buf_append_hmac_session(chip, buf, attributes, passphrase,
- passphraselen);
+ struct tpm_header *head;
+ int offset;
+
+ if (tpm2_chip_auth(chip)) {
+ tpm_buf_append_hmac_session(chip, buf, attributes, passphrase, passphraselen);
+ } else {
+ offset = buf->handles * 4 + TPM_HEADER_SIZE;
+ head = (struct tpm_header *)buf->data;
+
+ /*
+ * If the only sessions are optional, the command tag must change to
+ * TPM2_ST_NO_SESSIONS.
+ */
+ if (tpm_buf_length(buf) == offset)
+ head->tag = cpu_to_be16(TPM2_ST_NO_SESSIONS);
+ }
}
+
+#ifdef CONFIG_TCG_TPM2_HMAC
+
+int tpm2_start_auth_session(struct tpm_chip *chip);
void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf);
int tpm_buf_check_hmac_response(struct tpm_chip *chip, struct tpm_buf *buf,
int rc);
@@ -532,48 +546,6 @@ static inline int tpm2_start_auth_session(struct tpm_chip *chip)
static inline void tpm2_end_auth_session(struct tpm_chip *chip)
{
}
-static inline void tpm_buf_append_hmac_session(struct tpm_chip *chip,
- struct tpm_buf *buf,
- u8 attributes, u8 *passphrase,
- int passphraselen)
-{
- /* offset tells us where the sessions area begins */
- int offset = buf->handles * 4 + TPM_HEADER_SIZE;
- u32 len = 9 + passphraselen;
-
- if (tpm_buf_length(buf) != offset) {
- /* not the first session so update the existing length */
- len += get_unaligned_be32(&buf->data[offset]);
- put_unaligned_be32(len, &buf->data[offset]);
- } else {
- tpm_buf_append_u32(buf, len);
- }
- /* auth handle */
- tpm_buf_append_u32(buf, TPM2_RS_PW);
- /* nonce */
- tpm_buf_append_u16(buf, 0);
- /* attributes */
- tpm_buf_append_u8(buf, 0);
- /* passphrase */
- tpm_buf_append_u16(buf, passphraselen);
- tpm_buf_append(buf, passphrase, passphraselen);
-}
-static inline void tpm_buf_append_hmac_session_opt(struct tpm_chip *chip,
- struct tpm_buf *buf,
- u8 attributes,
- u8 *passphrase,
- int passphraselen)
-{
- int offset = buf->handles * 4 + TPM_HEADER_SIZE;
- struct tpm_header *head = (struct tpm_header *) buf->data;
-
- /*
- * if the only sessions are optional, the command tag
- * must change to TPM2_ST_NO_SESSIONS
- */
- if (tpm_buf_length(buf) == offset)
- head->tag = cpu_to_be16(TPM2_ST_NO_SESSIONS);
-}
static inline void tpm_buf_fill_hmac_session(struct tpm_chip *chip,
struct tpm_buf *buf)
{
--
2.45.2
Unless tpm_chip_bootstrap() was called by the driver, !chip->auth can
cause a null derefence in tpm_buf_append_name(). Thus, address
!chip->auth in tpm_buf_append_name() and remove the fallback
implementation for !TCG_TPM2_HMAC.
Cc: stable(a)vger.kernel.org # v6.10+
Reported-by: Stefan Berger <stefanb(a)linux.ibm.com>
Closes: https://lore.kernel.org/linux-integrity/20240617193408.1234365-1-stefanb@li…
Fixes: d0a25bb961e6 ("tpm: Add HMAC session name/handle append")
Tested-by: Michael Ellerman <mpe(a)ellerman.id.au> # ppc
Signed-off-by: Jarkko Sakkinen <jarkko(a)kernel.org>
---
v4:
* Address:
https://lore.kernel.org/linux-integrity/CAHk-=wiM=Cyw-07EkbAH66pE50VzJiT3bV…
* Added tested-by from Michael Ellerman.
v3:
* Address:
https://lore.kernel.org/linux-integrity/922603265d61011dbb23f18a04525ae973b…
v2:
* N/A
---
drivers/char/tpm/Makefile | 2 +-
drivers/char/tpm/tpm2-sessions.c | 219 +++++++++++++++++--------------
include/linux/tpm.h | 21 +--
3 files changed, 131 insertions(+), 111 deletions(-)
diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile
index 4c695b0388f3..9bb142c75243 100644
--- a/drivers/char/tpm/Makefile
+++ b/drivers/char/tpm/Makefile
@@ -16,8 +16,8 @@ tpm-y += eventlog/common.o
tpm-y += eventlog/tpm1.o
tpm-y += eventlog/tpm2.o
tpm-y += tpm-buf.o
+tpm-y += tpm2-sessions.o
-tpm-$(CONFIG_TCG_TPM2_HMAC) += tpm2-sessions.o
tpm-$(CONFIG_ACPI) += tpm_ppi.o eventlog/acpi.o
tpm-$(CONFIG_EFI) += eventlog/efi.o
tpm-$(CONFIG_OF) += eventlog/of.o
diff --git a/drivers/char/tpm/tpm2-sessions.c b/drivers/char/tpm/tpm2-sessions.c
index 2f1d96a5a5a7..b3ed35e7ec00 100644
--- a/drivers/char/tpm/tpm2-sessions.c
+++ b/drivers/char/tpm/tpm2-sessions.c
@@ -83,9 +83,6 @@
#define AES_KEY_BYTES AES_KEYSIZE_128
#define AES_KEY_BITS (AES_KEY_BYTES*8)
-static int tpm2_create_primary(struct tpm_chip *chip, u32 hierarchy,
- u32 *handle, u8 *name);
-
/*
* This is the structure that carries all the auth information (like
* session handle, nonces, session key and auth) from use to use it is
@@ -148,6 +145,7 @@ struct tpm2_auth {
u8 name[AUTH_MAX_NAMES][2 + SHA512_DIGEST_SIZE];
};
+#ifdef CONFIG_TCG_TPM2_HMAC
/*
* Name Size based on TPM algorithm (assumes no hash bigger than 255)
*/
@@ -163,6 +161,122 @@ static u8 name_size(const u8 *name)
return size_map[alg] + 2;
}
+static int tpm2_parse_read_public(char *name, struct tpm_buf *buf)
+{
+ struct tpm_header *head = (struct tpm_header *)buf->data;
+ off_t offset = TPM_HEADER_SIZE;
+ u32 tot_len = be32_to_cpu(head->length);
+ u32 val;
+
+ /* we're starting after the header so adjust the length */
+ tot_len -= TPM_HEADER_SIZE;
+
+ /* skip public */
+ val = tpm_buf_read_u16(buf, &offset);
+ if (val > tot_len)
+ return -EINVAL;
+ offset += val;
+ /* name */
+ val = tpm_buf_read_u16(buf, &offset);
+ if (val != name_size(&buf->data[offset]))
+ return -EINVAL;
+ memcpy(name, &buf->data[offset], val);
+ /* forget the rest */
+ return 0;
+}
+
+static int tpm2_read_public(struct tpm_chip *chip, u32 handle, char *name)
+{
+ struct tpm_buf buf;
+ int rc;
+
+ rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_READ_PUBLIC);
+ if (rc)
+ return rc;
+
+ tpm_buf_append_u32(&buf, handle);
+ rc = tpm_transmit_cmd(chip, &buf, 0, "read public");
+ if (rc == TPM2_RC_SUCCESS)
+ rc = tpm2_parse_read_public(name, &buf);
+
+ tpm_buf_destroy(&buf);
+
+ return rc;
+}
+#endif /* CONFIG_TCG_TPM2_HMAC */
+
+/**
+ * tpm_buf_append_name() - add a handle area to the buffer
+ * @chip: the TPM chip structure
+ * @buf: The buffer to be appended
+ * @handle: The handle to be appended
+ * @name: The name of the handle (may be NULL)
+ *
+ * In order to compute session HMACs, we need to know the names of the
+ * objects pointed to by the handles. For most objects, this is simply
+ * the actual 4 byte handle or an empty buf (in these cases @name
+ * should be NULL) but for volatile objects, permanent objects and NV
+ * areas, the name is defined as the hash (according to the name
+ * algorithm which should be set to sha256) of the public area to
+ * which the two byte algorithm id has been appended. For these
+ * objects, the @name pointer should point to this. If a name is
+ * required but @name is NULL, then TPM2_ReadPublic() will be called
+ * on the handle to obtain the name.
+ *
+ * As with most tpm_buf operations, success is assumed because failure
+ * will be caused by an incorrect programming model and indicated by a
+ * kernel message.
+ */
+void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf,
+ u32 handle, u8 *name)
+{
+#ifdef CONFIG_TCG_TPM2_HMAC
+ enum tpm2_mso_type mso = tpm2_handle_mso(handle);
+ struct tpm2_auth *auth;
+ int slot;
+#endif
+
+ if (!tpm2_chip_auth(chip)) {
+ tpm_buf_append_u32(buf, handle);
+ /* count the number of handles in the upper bits of flags */
+ buf->handles++;
+ return;
+ }
+
+#ifdef CONFIG_TCG_TPM2_HMAC
+ slot = (tpm_buf_length(buf) - TPM_HEADER_SIZE) / 4;
+ if (slot >= AUTH_MAX_NAMES) {
+ dev_err(&chip->dev, "TPM: too many handles\n");
+ return;
+ }
+ auth = chip->auth;
+ WARN(auth->session != tpm_buf_length(buf),
+ "name added in wrong place\n");
+ tpm_buf_append_u32(buf, handle);
+ auth->session += 4;
+
+ if (mso == TPM2_MSO_PERSISTENT ||
+ mso == TPM2_MSO_VOLATILE ||
+ mso == TPM2_MSO_NVRAM) {
+ if (!name)
+ tpm2_read_public(chip, handle, auth->name[slot]);
+ } else {
+ if (name)
+ dev_err(&chip->dev, "TPM: Handle does not require name but one is specified\n");
+ }
+
+ auth->name_h[slot] = handle;
+ if (name)
+ memcpy(auth->name[slot], name, name_size(name));
+#endif
+}
+EXPORT_SYMBOL_GPL(tpm_buf_append_name);
+
+#ifdef CONFIG_TCG_TPM2_HMAC
+
+static int tpm2_create_primary(struct tpm_chip *chip, u32 hierarchy,
+ u32 *handle, u8 *name);
+
/*
* It turns out the crypto hmac(sha256) is hard for us to consume
* because it assumes a fixed key and the TPM seems to change the key
@@ -567,104 +681,6 @@ void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf)
}
EXPORT_SYMBOL(tpm_buf_fill_hmac_session);
-static int tpm2_parse_read_public(char *name, struct tpm_buf *buf)
-{
- struct tpm_header *head = (struct tpm_header *)buf->data;
- off_t offset = TPM_HEADER_SIZE;
- u32 tot_len = be32_to_cpu(head->length);
- u32 val;
-
- /* we're starting after the header so adjust the length */
- tot_len -= TPM_HEADER_SIZE;
-
- /* skip public */
- val = tpm_buf_read_u16(buf, &offset);
- if (val > tot_len)
- return -EINVAL;
- offset += val;
- /* name */
- val = tpm_buf_read_u16(buf, &offset);
- if (val != name_size(&buf->data[offset]))
- return -EINVAL;
- memcpy(name, &buf->data[offset], val);
- /* forget the rest */
- return 0;
-}
-
-static int tpm2_read_public(struct tpm_chip *chip, u32 handle, char *name)
-{
- struct tpm_buf buf;
- int rc;
-
- rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_READ_PUBLIC);
- if (rc)
- return rc;
-
- tpm_buf_append_u32(&buf, handle);
- rc = tpm_transmit_cmd(chip, &buf, 0, "read public");
- if (rc == TPM2_RC_SUCCESS)
- rc = tpm2_parse_read_public(name, &buf);
-
- tpm_buf_destroy(&buf);
-
- return rc;
-}
-
-/**
- * tpm_buf_append_name() - add a handle area to the buffer
- * @chip: the TPM chip structure
- * @buf: The buffer to be appended
- * @handle: The handle to be appended
- * @name: The name of the handle (may be NULL)
- *
- * In order to compute session HMACs, we need to know the names of the
- * objects pointed to by the handles. For most objects, this is simply
- * the actual 4 byte handle or an empty buf (in these cases @name
- * should be NULL) but for volatile objects, permanent objects and NV
- * areas, the name is defined as the hash (according to the name
- * algorithm which should be set to sha256) of the public area to
- * which the two byte algorithm id has been appended. For these
- * objects, the @name pointer should point to this. If a name is
- * required but @name is NULL, then TPM2_ReadPublic() will be called
- * on the handle to obtain the name.
- *
- * As with most tpm_buf operations, success is assumed because failure
- * will be caused by an incorrect programming model and indicated by a
- * kernel message.
- */
-void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf,
- u32 handle, u8 *name)
-{
- enum tpm2_mso_type mso = tpm2_handle_mso(handle);
- struct tpm2_auth *auth = chip->auth;
- int slot;
-
- slot = (tpm_buf_length(buf) - TPM_HEADER_SIZE)/4;
- if (slot >= AUTH_MAX_NAMES) {
- dev_err(&chip->dev, "TPM: too many handles\n");
- return;
- }
- WARN(auth->session != tpm_buf_length(buf),
- "name added in wrong place\n");
- tpm_buf_append_u32(buf, handle);
- auth->session += 4;
-
- if (mso == TPM2_MSO_PERSISTENT ||
- mso == TPM2_MSO_VOLATILE ||
- mso == TPM2_MSO_NVRAM) {
- if (!name)
- tpm2_read_public(chip, handle, auth->name[slot]);
- } else {
- if (name)
- dev_err(&chip->dev, "TPM: Handle does not require name but one is specified\n");
- }
-
- auth->name_h[slot] = handle;
- if (name)
- memcpy(auth->name[slot], name, name_size(name));
-}
-EXPORT_SYMBOL(tpm_buf_append_name);
-
/**
* tpm_buf_check_hmac_response() - check the TPM return HMAC for correctness
* @chip: the TPM chip structure
@@ -1311,3 +1327,4 @@ int tpm2_sessions_init(struct tpm_chip *chip)
return rc;
}
+#endif /* CONFIG_TCG_TPM2_HMAC */
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 21a67dc9efe8..4d3071e885a0 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -490,11 +490,22 @@ static inline void tpm_buf_append_empty_auth(struct tpm_buf *buf, u32 handle)
{
}
#endif
+
+static inline struct tpm2_auth *tpm2_chip_auth(struct tpm_chip *chip)
+{
#ifdef CONFIG_TCG_TPM2_HMAC
+ return chip->auth;
+#else
+ return NULL;
+#endif
+}
-int tpm2_start_auth_session(struct tpm_chip *chip);
void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf,
u32 handle, u8 *name);
+
+#ifdef CONFIG_TCG_TPM2_HMAC
+
+int tpm2_start_auth_session(struct tpm_chip *chip);
void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf,
u8 attributes, u8 *passphrase,
int passphraselen);
@@ -521,14 +532,6 @@ static inline int tpm2_start_auth_session(struct tpm_chip *chip)
static inline void tpm2_end_auth_session(struct tpm_chip *chip)
{
}
-static inline void tpm_buf_append_name(struct tpm_chip *chip,
- struct tpm_buf *buf,
- u32 handle, u8 *name)
-{
- tpm_buf_append_u32(buf, handle);
- /* count the number of handles in the upper bits of flags */
- buf->handles++;
-}
static inline void tpm_buf_append_hmac_session(struct tpm_chip *chip,
struct tpm_buf *buf,
u8 attributes, u8 *passphrase,
--
2.45.2