The patch titled
Subject: mm/gup: clear the LRU flag of a page before adding to LRU batch
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-gup-clear-the-lru-flag-of-a-page-before-adding-to-lru-batch.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: yangge <yangge1116(a)126.com>
Subject: mm/gup: clear the LRU flag of a page before adding to LRU batch
Date: Wed, 3 Jul 2024 20:02:33 +0800
If a large number of CMA memory are configured in system (for example,
the CMA memory accounts for 50% of the system memory), starting a
virtual virtual machine with device passthrough, it will call
pin_user_pages_remote(..., FOLL_LONGTERM, ...) to pin memory. Normally
if a page is present and in CMA area, pin_user_pages_remote() will
migrate the page from CMA area to non-CMA area because of FOLL_LONGTERM
flag. But the current code will cause the migration failure due to
unexpected page refcounts, and eventually cause the virtual machine
fail to start.
If a page is added in LRU batch, its refcount increases one, remove the
page from LRU batch decreases one. Page migration requires the page is
not referenced by others except page mapping. Before migrating a page,
we should try to drain the page from LRU batch in case the page is in
it, however, folio_test_lru() is not sufficient to tell whether the
page is in LRU batch or not, if the page is in LRU batch, the migration
will fail.
To solve the problem above, we modify the logic of adding to LRU batch.
Before adding a page to LRU batch, we clear the LRU flag of the page
so that we can check whether the page is in LRU batch by
folio_test_lru(page). It's quite valuable, because likely we don't
want to blindly drain the LRU batch simply because there is some
unexpected reference on a page, as described above.
This change makes the LRU flag of a page invisible for longer, which
may impact some programs. For example, as long as a page is on a LRU
batch, we cannot isolate it, and we cannot check if it's an LRU page.
Further, a page can now only be on exactly one LRU batch. This doesn't
seem to matter much, because a new page is allocated from buddy and
added to the lru batch, or be isolated, it's LRU flag may also be
invisible for a long time.
Link: https://lkml.kernel.org/r/1720075944-27201-1-git-send-email-yangge1116@126.…
Link: https://lkml.kernel.org/r/1720008153-16035-1-git-send-email-yangge1116@126.…
Fixes: 9a4e9f3b2d73 ("mm: update get_user_pages_longterm to migrate pages allocated from CMA region")
Signed-off-by: yangge <yangge1116(a)126.com>
Cc: Aneesh Kumar K.V <aneesh.kumar(a)linux.ibm.com>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Barry Song <21cnbao(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/swap.c | 43 +++++++++++++++++++++++++++++++------------
1 file changed, 31 insertions(+), 12 deletions(-)
--- a/mm/swap.c~mm-gup-clear-the-lru-flag-of-a-page-before-adding-to-lru-batch
+++ a/mm/swap.c
@@ -212,10 +212,6 @@ static void folio_batch_move_lru(struct
for (i = 0; i < folio_batch_count(fbatch); i++) {
struct folio *folio = fbatch->folios[i];
- /* block memcg migration while the folio moves between lru */
- if (move_fn != lru_add_fn && !folio_test_clear_lru(folio))
- continue;
-
folio_lruvec_relock_irqsave(folio, &lruvec, &flags);
move_fn(lruvec, folio);
@@ -256,11 +252,16 @@ static void lru_move_tail_fn(struct lruv
void folio_rotate_reclaimable(struct folio *folio)
{
if (!folio_test_locked(folio) && !folio_test_dirty(folio) &&
- !folio_test_unevictable(folio) && folio_test_lru(folio)) {
+ !folio_test_unevictable(folio)) {
struct folio_batch *fbatch;
unsigned long flags;
folio_get(folio);
+ if (!folio_test_clear_lru(folio)) {
+ folio_put(folio);
+ return;
+ }
+
local_lock_irqsave(&lru_rotate.lock, flags);
fbatch = this_cpu_ptr(&lru_rotate.fbatch);
folio_batch_add_and_move(fbatch, folio, lru_move_tail_fn);
@@ -353,11 +354,15 @@ static void folio_activate_drain(int cpu
void folio_activate(struct folio *folio)
{
- if (folio_test_lru(folio) && !folio_test_active(folio) &&
- !folio_test_unevictable(folio)) {
+ if (!folio_test_active(folio) && !folio_test_unevictable(folio)) {
struct folio_batch *fbatch;
folio_get(folio);
+ if (!folio_test_clear_lru(folio)) {
+ folio_put(folio);
+ return;
+ }
+
local_lock(&cpu_fbatches.lock);
fbatch = this_cpu_ptr(&cpu_fbatches.activate);
folio_batch_add_and_move(fbatch, folio, folio_activate_fn);
@@ -701,6 +706,11 @@ void deactivate_file_folio(struct folio
return;
folio_get(folio);
+ if (!folio_test_clear_lru(folio)) {
+ folio_put(folio);
+ return;
+ }
+
local_lock(&cpu_fbatches.lock);
fbatch = this_cpu_ptr(&cpu_fbatches.lru_deactivate_file);
folio_batch_add_and_move(fbatch, folio, lru_deactivate_file_fn);
@@ -717,11 +727,16 @@ void deactivate_file_folio(struct folio
*/
void folio_deactivate(struct folio *folio)
{
- if (folio_test_lru(folio) && !folio_test_unevictable(folio) &&
- (folio_test_active(folio) || lru_gen_enabled())) {
+ if (!folio_test_unevictable(folio) && (folio_test_active(folio) ||
+ lru_gen_enabled())) {
struct folio_batch *fbatch;
folio_get(folio);
+ if (!folio_test_clear_lru(folio)) {
+ folio_put(folio);
+ return;
+ }
+
local_lock(&cpu_fbatches.lock);
fbatch = this_cpu_ptr(&cpu_fbatches.lru_deactivate);
folio_batch_add_and_move(fbatch, folio, lru_deactivate_fn);
@@ -738,12 +753,16 @@ void folio_deactivate(struct folio *foli
*/
void folio_mark_lazyfree(struct folio *folio)
{
- if (folio_test_lru(folio) && folio_test_anon(folio) &&
- folio_test_swapbacked(folio) && !folio_test_swapcache(folio) &&
- !folio_test_unevictable(folio)) {
+ if (folio_test_anon(folio) && folio_test_swapbacked(folio) &&
+ !folio_test_swapcache(folio) && !folio_test_unevictable(folio)) {
struct folio_batch *fbatch;
folio_get(folio);
+ if (!folio_test_clear_lru(folio)) {
+ folio_put(folio);
+ return;
+ }
+
local_lock(&cpu_fbatches.lock);
fbatch = this_cpu_ptr(&cpu_fbatches.lru_lazyfree);
folio_batch_add_and_move(fbatch, folio, lru_lazyfree_fn);
_
Patches currently in -mm which might be from yangge1116(a)126.com are
mm-gup-clear-the-lru-flag-of-a-page-before-adding-to-lru-batch.patch
The patch titled
Subject: mm: fix khugepaged activation policy
has been added to the -mm mm-unstable branch. Its filename is
mm-fix-khugepaged-activation-policy.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Ryan Roberts <ryan.roberts(a)arm.com>
Subject: mm: fix khugepaged activation policy
Date: Thu, 4 Jul 2024 10:10:50 +0100
Since the introduction of mTHP, the docuementation has stated that
khugepaged would be enabled when any mTHP size is enabled, and disabled
when all mTHP sizes are disabled. There are 2 problems with this; 1.
this is not what was implemented by the code and 2. this is not the
desirable behavior.
Desirable behavior is for khugepaged to be enabled when any PMD-sized THP
is enabled, anon or file. (Note that file THP is still controlled by the
top-level control so we must always consider that, as well as the PMD-size
mTHP control for anon). khugepaged only supports collapsing to PMD-sized
THP so there is no value in enabling it when PMD-sized THP is disabled.
So let's change the code and documentation to reflect this policy.
Further, per-size enabled control modification events were not previously
forwarded to khugepaged to give it an opportunity to start or stop.
Consequently the following was resulting in khugepaged eroneously not
being activated:
echo never > /sys/kernel/mm/transparent_hugepage/enabled
echo always > /sys/kernel/mm/transparent_hugepage/hugepages-2048kB/enabled
Link: https://lkml.kernel.org/r/20240704091051.2411934-1-ryan.roberts@arm.com
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
Fixes: 3485b88390b0 ("mm: thp: introduce multi-size THP sysfs interface")
Closes: https://lore.kernel.org/linux-mm/7a0bbe69-1e3d-4263-b206-da007791a5c4@redha…
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Barry Song <baohua(a)kernel.org>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Jonathan Corbet <corbet(a)lwn.net>
Cc: Lance Yang <ioworker0(a)gmail.com>
Cc: Yang Shi <shy828301(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
Documentation/admin-guide/mm/transhuge.rst | 11 +++++------
include/linux/huge_mm.h | 15 ++++++++-------
mm/huge_memory.c | 7 +++++++
mm/khugepaged.c | 13 ++++++-------
4 files changed, 26 insertions(+), 20 deletions(-)
--- a/Documentation/admin-guide/mm/transhuge.rst~mm-fix-khugepaged-activation-policy
+++ a/Documentation/admin-guide/mm/transhuge.rst
@@ -202,12 +202,11 @@ PMD-mappable transparent hugepage::
cat /sys/kernel/mm/transparent_hugepage/hpage_pmd_size
-khugepaged will be automatically started when one or more hugepage
-sizes are enabled (either by directly setting "always" or "madvise",
-or by setting "inherit" while the top-level enabled is set to "always"
-or "madvise"), and it'll be automatically shutdown when the last
-hugepage size is disabled (either by directly setting "never", or by
-setting "inherit" while the top-level enabled is set to "never").
+khugepaged will be automatically started when PMD-sized THP is enabled
+(either of the per-size anon control or the top-level control are set
+to "always" or "madvise"), and it'll be automatically shutdown when
+PMD-sized THP is disabled (when both the per-size anon control and the
+top-level control are "never")
Khugepaged controls
-------------------
--- a/include/linux/huge_mm.h~mm-fix-khugepaged-activation-policy
+++ a/include/linux/huge_mm.h
@@ -128,16 +128,17 @@ static inline bool hugepage_global_alway
(1<<TRANSPARENT_HUGEPAGE_FLAG);
}
-static inline bool hugepage_flags_enabled(void)
+static inline bool hugepage_pmd_enabled(void)
{
/*
- * We cover both the anon and the file-backed case here; we must return
- * true if globally enabled, even when all anon sizes are set to never.
- * So we don't need to look at huge_anon_orders_inherit.
+ * We cover both the anon and the file-backed case here; file-backed
+ * hugepages, when configured in, are determined by the global control.
+ * Anon pmd-sized hugepages are determined by the pmd-size control.
*/
- return hugepage_global_enabled() ||
- READ_ONCE(huge_anon_orders_always) ||
- READ_ONCE(huge_anon_orders_madvise);
+ return (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && hugepage_global_enabled()) ||
+ test_bit(PMD_ORDER, &huge_anon_orders_always) ||
+ test_bit(PMD_ORDER, &huge_anon_orders_madvise) ||
+ (test_bit(PMD_ORDER, &huge_anon_orders_inherit) && hugepage_global_enabled());
}
static inline int highest_order(unsigned long orders)
--- a/mm/huge_memory.c~mm-fix-khugepaged-activation-policy
+++ a/mm/huge_memory.c
@@ -502,6 +502,13 @@ static ssize_t thpsize_enabled_store(str
} else
ret = -EINVAL;
+ if (ret > 0) {
+ int err;
+
+ err = start_stop_khugepaged();
+ if (err)
+ ret = err;
+ }
return ret;
}
--- a/mm/khugepaged.c~mm-fix-khugepaged-activation-policy
+++ a/mm/khugepaged.c
@@ -449,7 +449,7 @@ void khugepaged_enter_vma(struct vm_area
unsigned long vm_flags)
{
if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags) &&
- hugepage_flags_enabled()) {
+ hugepage_pmd_enabled()) {
if (thp_vma_allowable_order(vma, vm_flags, TVA_ENFORCE_SYSFS,
PMD_ORDER))
__khugepaged_enter(vma->vm_mm);
@@ -2462,8 +2462,7 @@ breakouterloop_mmap_lock:
static int khugepaged_has_work(void)
{
- return !list_empty(&khugepaged_scan.mm_head) &&
- hugepage_flags_enabled();
+ return !list_empty(&khugepaged_scan.mm_head) && hugepage_pmd_enabled();
}
static int khugepaged_wait_event(void)
@@ -2536,7 +2535,7 @@ static void khugepaged_wait_work(void)
return;
}
- if (hugepage_flags_enabled())
+ if (hugepage_pmd_enabled())
wait_event_freezable(khugepaged_wait, khugepaged_wait_event());
}
@@ -2567,7 +2566,7 @@ static void set_recommended_min_free_kby
int nr_zones = 0;
unsigned long recommended_min;
- if (!hugepage_flags_enabled()) {
+ if (!hugepage_pmd_enabled()) {
calculate_min_free_kbytes();
goto update_wmarks;
}
@@ -2617,7 +2616,7 @@ int start_stop_khugepaged(void)
int err = 0;
mutex_lock(&khugepaged_mutex);
- if (hugepage_flags_enabled()) {
+ if (hugepage_pmd_enabled()) {
if (!khugepaged_thread)
khugepaged_thread = kthread_run(khugepaged, NULL,
"khugepaged");
@@ -2643,7 +2642,7 @@ fail:
void khugepaged_min_free_kbytes_update(void)
{
mutex_lock(&khugepaged_mutex);
- if (hugepage_flags_enabled() && khugepaged_thread)
+ if (hugepage_pmd_enabled() && khugepaged_thread)
set_recommended_min_free_kbytes();
mutex_unlock(&khugepaged_mutex);
}
_
Patches currently in -mm which might be from ryan.roberts(a)arm.com are
mm-fix-khugepaged-activation-policy.patch
The following commit has been merged into the timers/core branch of tip:
Commit-ID: cabe7ebd57a0bfd0ef8e74f0c70423ae8d4d9171
Gitweb: https://git.kernel.org/tip/cabe7ebd57a0bfd0ef8e74f0c70423ae8d4d9171
Author: Anna-Maria Behnsen <anna-maria(a)linutronix.de>
AuthorDate: Mon, 01 Jul 2024 12:18:37 +02:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Thu, 04 Jul 2024 20:22:21 +02:00
timers/migration: Do not rely always on group->parent
When reading group->parent without holding the group lock it is racy
against CPUs coming online the first time and thereby creating another
level of the hierarchy. This is not a problem when this value is read once
to decide whether to abort a propagation or not. The worst outcome is an
unnecessary/early CPU wake up. But it is racy when reading it several times
during a single 'action' (like activation, deactivation, checking for
remote timer expiry,...) and relying on the consitency of this value
without holding the lock. This happens at the moment e.g. in
tmigr_inactive_up() which is also calling tmigr_udpate_events(). Code relys
on group->parent not to change during this 'action'.
Update parent struct member description to explain the above only
once. Remove parent pointer checks when they are not mandatory (like update
of data->childmask). Remove a warning, which would be nice but the trigger
of this warning is not reliable and add expand the data structure member
description instead. Expand a comment, why it is safe to rely on parent
pointer here (inside hierarchy update).
Fixes: 7ee988770326 ("timers: Implement the hierarchical pull model")
Reported-by: Borislav Petkov <bp(a)alien8.de>
Signed-off-by: Anna-Maria Behnsen <anna-maria(a)linutronix.de>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic(a)kernel.org>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20240701-tmigr-fixes-v3-1-25cd5de318fb@linutronix…
---
kernel/time/timer_migration.c | 33 +++++++++++++++------------------
kernel/time/timer_migration.h | 12 +++++++++++-
2 files changed, 26 insertions(+), 19 deletions(-)
diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c
index 8441311..d91efe1 100644
--- a/kernel/time/timer_migration.c
+++ b/kernel/time/timer_migration.c
@@ -507,7 +507,14 @@ static void walk_groups(up_f up, void *data, struct tmigr_cpu *tmc)
* (get_next_timer_interrupt())
* @firstexp: Contains the first event expiry information when last
* active CPU of hierarchy is on the way to idle to make
- * sure CPU will be back in time.
+ * sure CPU will be back in time. It is updated in top
+ * level group only. Be aware, there could occur a new top
+ * level of the hierarchy between the 'top level call' in
+ * tmigr_update_events() and the check for the parent group
+ * in walk_groups(). Then @firstexp might contain a value
+ * != KTIME_MAX even if it was not the final top
+ * level. This is not a problem, as the worst outcome is a
+ * CPU which might wake up a little early.
* @evt: Pointer to tmigr_event which needs to be queued (of idle
* child group)
* @childmask: childmask of child group
@@ -649,7 +656,7 @@ static bool tmigr_active_up(struct tmigr_group *group,
} while (!atomic_try_cmpxchg(&group->migr_state, &curstate.state, newstate.state));
- if ((walk_done == false) && group->parent)
+ if (walk_done == false)
data->childmask = group->childmask;
/*
@@ -1317,20 +1324,9 @@ static bool tmigr_inactive_up(struct tmigr_group *group,
/* Event Handling */
tmigr_update_events(group, child, data);
- if (group->parent && (walk_done == false))
+ if (walk_done == false)
data->childmask = group->childmask;
- /*
- * data->firstexp was set by tmigr_update_events() and contains the
- * expiry of the first global event which needs to be handled. It
- * differs from KTIME_MAX if:
- * - group is the top level group and
- * - group is idle (which means CPU was the last active CPU in the
- * hierarchy) and
- * - there is a pending event in the hierarchy
- */
- WARN_ON_ONCE(data->firstexp != KTIME_MAX && group->parent);
-
trace_tmigr_group_set_cpu_inactive(group, newstate, childmask);
return walk_done;
@@ -1552,10 +1548,11 @@ static void tmigr_connect_child_parent(struct tmigr_group *child,
data.childmask = child->childmask;
/*
- * There is only one new level per time. When connecting the
- * child and the parent and set the child active when the parent
- * is inactive, the parent needs to be the uppermost
- * level. Otherwise there went something wrong!
+ * There is only one new level per time (which is protected by
+ * tmigr_mutex). When connecting the child and the parent and
+ * set the child active when the parent is inactive, the parent
+ * needs to be the uppermost level. Otherwise there went
+ * something wrong!
*/
WARN_ON(!tmigr_active_up(parent, child, &data) && parent->parent);
}
diff --git a/kernel/time/timer_migration.h b/kernel/time/timer_migration.h
index 6c37d94..494f68c 100644
--- a/kernel/time/timer_migration.h
+++ b/kernel/time/timer_migration.h
@@ -22,7 +22,17 @@ struct tmigr_event {
* struct tmigr_group - timer migration hierarchy group
* @lock: Lock protecting the event information and group hierarchy
* information during setup
- * @parent: Pointer to the parent group
+ * @parent: Pointer to the parent group. Pointer is updated when a
+ * new hierarchy level is added because of a CPU coming
+ * online the first time. Once it is set, the pointer will
+ * not be removed or updated. When accessing parent pointer
+ * lock less to decide whether to abort a propagation or
+ * not, it is not a problem. The worst outcome is an
+ * unnecessary/early CPU wake up. But do not access parent
+ * pointer several times in the same 'action' (like
+ * activation, deactivation, check for remote expiry,...)
+ * without holding the lock as it is not ensured that value
+ * will not change.
* @groupevt: Next event of the group which is only used when the
* group is !active. The group event is then queued into
* the parent timer queue.
The following commit has been merged into the timers/core branch of tip:
Commit-ID: 8cdb61838ee5c63556773ea2eed24deab6b15257
Gitweb: https://git.kernel.org/tip/8cdb61838ee5c63556773ea2eed24deab6b15257
Author: Anna-Maria Behnsen <anna-maria(a)linutronix.de>
AuthorDate: Wed, 03 Jul 2024 22:28:39 +02:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Thu, 04 Jul 2024 20:22:22 +02:00
timers/migration: Move hierarchy setup into cpuhotplug prepare callback
When a CPU comes online the first time, it is possible that a new top level
group will be created. In general all propagation is done from the bottom
to top. This minimizes complexity and prevents possible races. But when a
new top level group is created, the formely top level group needs to be
connected to the new level. This is the only time, when the direction to
propagate changes is changed: the changes are propagated from top (new top
level group) to bottom (formerly top level group).
This introduces two races (see (A) and (B)) as reported by Frederic:
(A) This race happens, when marking the formely top level group as active,
but the last active CPU of the formerly top level group goes idle. Then
it's likely that formerly group is no longer active, but marked
nevertheless as active in new top level group:
[GRP0:0]
migrator = 0
active = 0
nextevt = KTIME_MAX
/ \
0 1 .. 7
active idle
0) Hierarchy has for now only 8 CPUs and CPU 0 is the only active CPU.
[GRP1:0]
migrator = TMIGR_NONE
active = NONE
nextevt = KTIME_MAX
\
[GRP0:0] [GRP0:1]
migrator = 0 migrator = TMIGR_NONE
active = 0 active = NONE
nextevt = KTIME_MAX nextevt = KTIME_MAX
/ \
0 1 .. 7 8
active idle !online
1) CPU 8 is booting and creates a new group in first level GRP0:1 and
therefore also a new top group GRP1:0. For now the setup code proceeded
only until the connected between GRP0:1 to the new top group. The
connection between CPU8 and GRP0:1 is not yet established and CPU 8 is
still !online.
[GRP1:0]
migrator = TMIGR_NONE
active = NONE
nextevt = KTIME_MAX
/ \
[GRP0:0] [GRP0:1]
migrator = 0 migrator = TMIGR_NONE
active = 0 active = NONE
nextevt = KTIME_MAX nextevt = KTIME_MAX
/ \
0 1 .. 7 8
active idle !online
2) Setup code now connects GRP0:0 to GRP1:0 and observes while in
tmigr_connect_child_parent() that GRP0:0 is not TMIGR_NONE. So it
prepares to call tmigr_active_up() on it. It hasn't done it yet.
[GRP1:0]
migrator = TMIGR_NONE
active = NONE
nextevt = KTIME_MAX
/ \
[GRP0:0] [GRP0:1]
migrator = TMIGR_NONE migrator = TMIGR_NONE
active = NONE active = NONE
nextevt = KTIME_MAX nextevt = KTIME_MAX
/ \
0 1 .. 7 8
idle idle !online
3) CPU 0 goes idle. Since GRP0:0->parent has been updated by CPU 8 with
GRP0:0->lock held, CPU 0 observes GRP1:0 after calling
tmigr_update_events() and it propagates the change to the top (no change
there and no wakeup programmed since there is no timer).
[GRP1:0]
migrator = GRP0:0
active = GRP0:0
nextevt = KTIME_MAX
/ \
[GRP0:0] [GRP0:1]
migrator = TMIGR_NONE migrator = TMIGR_NONE
active = NONE active = NONE
nextevt = KTIME_MAX nextevt = KTIME_MAX
/ \
0 1 .. 7 8
idle idle !online
4) Now the setup code finally calls tmigr_active_up() to and sets GRP0:0
active in GRP1:0
[GRP1:0]
migrator = GRP0:0
active = GRP0:0, GRP0:1
nextevt = KTIME_MAX
/ \
[GRP0:0] [GRP0:1]
migrator = TMIGR_NONE migrator = 8
active = NONE active = 8
nextevt = KTIME_MAX nextevt = KTIME_MAX
/ \ |
0 1 .. 7 8
idle idle active
5) Now CPU 8 is connected with GRP0:1 and CPU 8 calls tmigr_active_up() out
of tmigr_cpu_online().
[GRP1:0]
migrator = GRP0:0
active = GRP0:0
nextevt = T8
/ \
[GRP0:0] [GRP0:1]
migrator = TMIGR_NONE migrator = TMIGR_NONE
active = NONE active = NONE
nextevt = KTIME_MAX nextevt = T8
/ \ |
0 1 .. 7 8
idle idle idle
5) CPU 8 goes idle with a timer T8 and relies on GRP0:0 as the migrator.
But it's not really active, so T8 gets ignored.
--> The update which is done in third step is not noticed by setup code. So
a wrong migrator is set to top level group and a timer could get
ignored.
(B) Reading group->parent and group->childmask when an hierarchy update is
ongoing and reaches the formerly top level group is racy as those values
could be inconsistent. (The notation of migrator and active now slightly
changes in contrast to the above example, as now the childmasks are used.)
[GRP1:0]
migrator = TMIGR_NONE
active = 0x00
nextevt = KTIME_MAX
\
[GRP0:0] [GRP0:1]
migrator = TMIGR_NONE migrator = TMIGR_NONE
active = 0x00 active = 0x00
nextevt = KTIME_MAX nextevt = KTIME_MAX
childmask= 0 childmask= 1
parent = NULL parent = GRP1:0
/ \
0 1 .. 7 8
idle idle !online
childmask=1
1) Hierarchy has 8 CPUs. CPU 8 is at the moment in the process of onlining
but did not yet connect GRP0:0 to GRP1:0.
[GRP1:0]
migrator = TMIGR_NONE
active = 0x00
nextevt = KTIME_MAX
/ \
[GRP0:0] [GRP0:1]
migrator = TMIGR_NONE migrator = TMIGR_NONE
active = 0x00 active = 0x00
nextevt = KTIME_MAX nextevt = KTIME_MAX
childmask= 0 childmask= 1
parent = GRP1:0 parent = GRP1:0
/ \
0 1 .. 7 8
idle idle !online
childmask=1
2) Setup code (running on CPU 8) now connects GRP0:0 to GRP1:0, updates
parent pointer of GRP0:0 and ...
[GRP1:0]
migrator = TMIGR_NONE
active = 0x00
nextevt = KTIME_MAX
/ \
[GRP0:0] [GRP0:1]
migrator = 0x01 migrator = TMIGR_NONE
active = 0x01 active = 0x00
nextevt = KTIME_MAX nextevt = KTIME_MAX
childmask= 0 childmask= 1
parent = GRP1:0 parent = GRP1:0
/ \
0 1 .. 7 8
active idle !online
childmask=1
tmigr_walk.childmask = 0
3) ... CPU 0 comes active in the same time. As migrator in GRP0:0 was
TMIGR_NONE, childmask of GRP0:0 is stored in update propagation data
structure tmigr_walk (as update of childmask is not yet
visible/updated). And now ...
[GRP1:0]
migrator = TMIGR_NONE
active = 0x00
nextevt = KTIME_MAX
/ \
[GRP0:0] [GRP0:1]
migrator = 0x01 migrator = TMIGR_NONE
active = 0x01 active = 0x00
nextevt = KTIME_MAX nextevt = KTIME_MAX
childmask= 2 childmask= 1
parent = GRP1:0 parent = GRP1:0
/ \
0 1 .. 7 8
active idle !online
childmask=1
tmigr_walk.childmask = 0
4) ... childmask of GRP0:0 is updated by CPU 8 (still part of setup
code).
[GRP1:0]
migrator = 0x00
active = 0x00
nextevt = KTIME_MAX
/ \
[GRP0:0] [GRP0:1]
migrator = 0x01 migrator = TMIGR_NONE
active = 0x01 active = 0x00
nextevt = KTIME_MAX nextevt = KTIME_MAX
childmask= 2 childmask= 1
parent = GRP1:0 parent = GRP1:0
/ \
0 1 .. 7 8
active idle !online
childmask=1
tmigr_walk.childmask = 0
5) CPU 0 sees the connection to GRP1:0 and now propagates active state to
GRP1:0 but with childmask = 0 as stored in propagation data structure.
--> Now GRP1:0 always has a migrator as 0x00 != TMIGR_NONE and for all CPUs
it looks like GRP1:0 is always active.
To prevent those races, the setup of the hierarchy is moved into the
cpuhotplug prepare callback. The prepare callback is not executed by the
CPU which will come online, it is executed by the CPU which prepares
onlining of the other CPU. This CPU is active while it is connecting the
formerly top level to the new one. This prevents from (A) to happen and it
also prevents from any further walk above the formerly top level until that
active CPU becomes inactive, releasing the new ->parent and ->childmask
updates to be visible by any subsequent walk up above the formerly top
level hierarchy. This prevents from (B) to happen. The direction for the
updates is now forced to look like "from bottom to top".
However if the active CPU prevents from tmigr_cpu_(in)active() to walk up
with the update not-or-half visible, nothing prevents walking up to the new
top with a 0 childmask in tmigr_handle_remote_up() or
tmigr_requires_handle_remote_up() if the active CPU doing the prepare is
not the migrator. But then it looks fine because:
* tmigr_check_migrator() should just return false
* The migrator is active and should eventually observe the new childmask
at some point in a future tick.
Split setup functionality of online callback into the cpuhotplug prepare
callback and setup hotplug state. Reorder the code, that all prepare
related functions are close to each other and online and offline callbacks
are also close together.
Fixes: 7ee988770326 ("timers: Implement the hierarchical pull model")
Signed-off-by: Anna-Maria Behnsen <anna-maria(a)linutronix.de>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic(a)kernel.org>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20240703202839.8921-1-anna-maria@linutronix.de
---
include/linux/cpuhotplug.h | 1 +-
kernel/time/timer_migration.c | 181 ++++++++++++++++++---------------
2 files changed, 101 insertions(+), 81 deletions(-)
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 7a5785f..df59666 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -122,6 +122,7 @@ enum cpuhp_state {
CPUHP_KVM_PPC_BOOK3S_PREPARE,
CPUHP_ZCOMP_PREPARE,
CPUHP_TIMERS_PREPARE,
+ CPUHP_TMIGR_PREPARE,
CPUHP_MIPS_SOC_PREPARE,
CPUHP_BP_PREPARE_DYN,
CPUHP_BP_PREPARE_DYN_END = CPUHP_BP_PREPARE_DYN + 20,
diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c
index d91efe1..9b86efd 100644
--- a/kernel/time/timer_migration.c
+++ b/kernel/time/timer_migration.c
@@ -1438,6 +1438,66 @@ u64 tmigr_quick_check(u64 nextevt)
return KTIME_MAX;
}
+/*
+ * tmigr_trigger_active() - trigger a CPU to become active again
+ *
+ * This function is executed on a CPU which is part of cpu_online_mask, when the
+ * last active CPU in the hierarchy is offlining. With this, it is ensured that
+ * the other CPU is active and takes over the migrator duty.
+ */
+static long tmigr_trigger_active(void *unused)
+{
+ struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
+
+ WARN_ON_ONCE(!tmc->online || tmc->idle);
+
+ return 0;
+}
+
+static int tmigr_cpu_offline(unsigned int cpu)
+{
+ struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
+ int migrator;
+ u64 firstexp;
+
+ raw_spin_lock_irq(&tmc->lock);
+ tmc->online = false;
+ WRITE_ONCE(tmc->wakeup, KTIME_MAX);
+
+ /*
+ * CPU has to handle the local events on his own, when on the way to
+ * offline; Therefore nextevt value is set to KTIME_MAX
+ */
+ firstexp = __tmigr_cpu_deactivate(tmc, KTIME_MAX);
+ trace_tmigr_cpu_offline(tmc);
+ raw_spin_unlock_irq(&tmc->lock);
+
+ if (firstexp != KTIME_MAX) {
+ migrator = cpumask_any_but(cpu_online_mask, cpu);
+ work_on_cpu(migrator, tmigr_trigger_active, NULL);
+ }
+
+ return 0;
+}
+
+static int tmigr_cpu_online(unsigned int cpu)
+{
+ struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
+
+ /* Check whether CPU data was successfully initialized */
+ if (WARN_ON_ONCE(!tmc->tmgroup))
+ return -EINVAL;
+
+ raw_spin_lock_irq(&tmc->lock);
+ trace_tmigr_cpu_online(tmc);
+ tmc->idle = timer_base_is_idle();
+ if (!tmc->idle)
+ __tmigr_cpu_activate(tmc);
+ tmc->online = true;
+ raw_spin_unlock_irq(&tmc->lock);
+ return 0;
+}
+
static void tmigr_init_group(struct tmigr_group *group, unsigned int lvl,
int node)
{
@@ -1512,7 +1572,7 @@ static struct tmigr_group *tmigr_get_group(unsigned int cpu, int node,
static void tmigr_connect_child_parent(struct tmigr_group *child,
struct tmigr_group *parent)
{
- union tmigr_state childstate;
+ struct tmigr_walk data;
raw_spin_lock_irq(&child->lock);
raw_spin_lock_nested(&parent->lock, SINGLE_DEPTH_NESTING);
@@ -1540,22 +1600,24 @@ static void tmigr_connect_child_parent(struct tmigr_group *child,
* child to the new parent. So tmigr_connect_child_parent() is
* executed with the formerly top level group (child) and the newly
* created group (parent).
+ *
+ * * It is ensured that the child is active, as this setup path is
+ * executed in hotplug prepare callback. This is exectued by an
+ * already connected and !idle CPU. Even if all other CPUs go idle,
+ * the CPU executing the setup will be responsible up to current top
+ * level group. And the next time it goes inactive, it will release
+ * the new childmask and parent to subsequent walkers through this
+ * @child. Therefore propagate active state unconditionally.
*/
- childstate.state = atomic_read(&child->migr_state);
- if (childstate.migrator != TMIGR_NONE) {
- struct tmigr_walk data;
-
- data.childmask = child->childmask;
+ data.childmask = child->childmask;
- /*
- * There is only one new level per time (which is protected by
- * tmigr_mutex). When connecting the child and the parent and
- * set the child active when the parent is inactive, the parent
- * needs to be the uppermost level. Otherwise there went
- * something wrong!
- */
- WARN_ON(!tmigr_active_up(parent, child, &data) && parent->parent);
- }
+ /*
+ * There is only one new level per time (which is protected by
+ * tmigr_mutex). When connecting the child and the parent and set the
+ * child active when the parent is inactive, the parent needs to be the
+ * uppermost level. Otherwise there went something wrong!
+ */
+ WARN_ON(!tmigr_active_up(parent, child, &data) && parent->parent);
}
static int tmigr_setup_groups(unsigned int cpu, unsigned int node)
@@ -1661,80 +1723,32 @@ static int tmigr_add_cpu(unsigned int cpu)
return ret;
}
-static int tmigr_cpu_online(unsigned int cpu)
+static int tmigr_cpu_prepare(unsigned int cpu)
{
- struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
- int ret;
-
- /* First online attempt? Initialize CPU data */
- if (!tmc->tmgroup) {
- raw_spin_lock_init(&tmc->lock);
-
- ret = tmigr_add_cpu(cpu);
- if (ret < 0)
- return ret;
-
- if (tmc->childmask == 0)
- return -EINVAL;
+ struct tmigr_cpu *tmc = per_cpu_ptr(&tmigr_cpu, cpu);
+ int ret = 0;
- timerqueue_init(&tmc->cpuevt.nextevt);
- tmc->cpuevt.nextevt.expires = KTIME_MAX;
- tmc->cpuevt.ignore = true;
- tmc->cpuevt.cpu = cpu;
-
- tmc->remote = false;
- WRITE_ONCE(tmc->wakeup, KTIME_MAX);
- }
- raw_spin_lock_irq(&tmc->lock);
- trace_tmigr_cpu_online(tmc);
- tmc->idle = timer_base_is_idle();
- if (!tmc->idle)
- __tmigr_cpu_activate(tmc);
- tmc->online = true;
- raw_spin_unlock_irq(&tmc->lock);
- return 0;
-}
-
-/*
- * tmigr_trigger_active() - trigger a CPU to become active again
- *
- * This function is executed on a CPU which is part of cpu_online_mask, when the
- * last active CPU in the hierarchy is offlining. With this, it is ensured that
- * the other CPU is active and takes over the migrator duty.
- */
-static long tmigr_trigger_active(void *unused)
-{
- struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
+ /* Not first online attempt? */
+ if (tmc->tmgroup)
+ return ret;
- WARN_ON_ONCE(!tmc->online || tmc->idle);
+ raw_spin_lock_init(&tmc->lock);
- return 0;
-}
+ ret = tmigr_add_cpu(cpu);
+ if (ret < 0)
+ return ret;
-static int tmigr_cpu_offline(unsigned int cpu)
-{
- struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu);
- int migrator;
- u64 firstexp;
+ if (tmc->childmask == 0)
+ return -EINVAL;
- raw_spin_lock_irq(&tmc->lock);
- tmc->online = false;
+ timerqueue_init(&tmc->cpuevt.nextevt);
+ tmc->cpuevt.nextevt.expires = KTIME_MAX;
+ tmc->cpuevt.ignore = true;
+ tmc->cpuevt.cpu = cpu;
+ tmc->remote = false;
WRITE_ONCE(tmc->wakeup, KTIME_MAX);
- /*
- * CPU has to handle the local events on his own, when on the way to
- * offline; Therefore nextevt value is set to KTIME_MAX
- */
- firstexp = __tmigr_cpu_deactivate(tmc, KTIME_MAX);
- trace_tmigr_cpu_offline(tmc);
- raw_spin_unlock_irq(&tmc->lock);
-
- if (firstexp != KTIME_MAX) {
- migrator = cpumask_any_but(cpu_online_mask, cpu);
- work_on_cpu(migrator, tmigr_trigger_active, NULL);
- }
-
- return 0;
+ return ret;
}
static int __init tmigr_init(void)
@@ -1793,6 +1807,11 @@ static int __init tmigr_init(void)
tmigr_hierarchy_levels, TMIGR_CHILDREN_PER_GROUP,
tmigr_crossnode_level);
+ ret = cpuhp_setup_state(CPUHP_AP_TMIGR_ONLINE, "tmigr:prepare",
+ tmigr_cpu_prepare, NULL);
+ if (ret)
+ goto err;
+
ret = cpuhp_setup_state(CPUHP_AP_TMIGR_ONLINE, "tmigr:online",
tmigr_cpu_online, tmigr_cpu_offline);
if (ret)
Unless tpm_chip_bootstrap() was called by the driver, !chip->auth can
cause a null derefence in tpm_buf_append_name(). Thus, address
!chip->auth in tpm_buf_append_name() and remove the fallback
implementation for !TCG_TPM2_HMAC.
Cc: stable(a)vger.kernel.org # v6.9+
Reported-by: Stefan Berger <stefanb(a)linux.ibm.com>
Closes: https://lore.kernel.org/linux-integrity/20240617193408.1234365-1-stefanb@li…
Fixes: d0a25bb961e6 ("tpm: Add HMAC session name/handle append")
Signed-off-by: Jarkko Sakkinen <jarkko(a)kernel.org>
---
drivers/char/tpm/Makefile | 2 +-
drivers/char/tpm/tpm2-sessions.c | 205 ++++++++++++++++---------------
include/linux/tpm.h | 16 +--
3 files changed, 113 insertions(+), 110 deletions(-)
diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile
index 4c695b0388f3..9bb142c75243 100644
--- a/drivers/char/tpm/Makefile
+++ b/drivers/char/tpm/Makefile
@@ -16,8 +16,8 @@ tpm-y += eventlog/common.o
tpm-y += eventlog/tpm1.o
tpm-y += eventlog/tpm2.o
tpm-y += tpm-buf.o
+tpm-y += tpm2-sessions.o
-tpm-$(CONFIG_TCG_TPM2_HMAC) += tpm2-sessions.o
tpm-$(CONFIG_ACPI) += tpm_ppi.o eventlog/acpi.o
tpm-$(CONFIG_EFI) += eventlog/efi.o
tpm-$(CONFIG_OF) += eventlog/of.o
diff --git a/drivers/char/tpm/tpm2-sessions.c b/drivers/char/tpm/tpm2-sessions.c
index 2f1d96a5a5a7..06d0f10a2301 100644
--- a/drivers/char/tpm/tpm2-sessions.c
+++ b/drivers/char/tpm/tpm2-sessions.c
@@ -163,6 +163,112 @@ static u8 name_size(const u8 *name)
return size_map[alg] + 2;
}
+static int tpm2_parse_read_public(char *name, struct tpm_buf *buf)
+{
+ struct tpm_header *head = (struct tpm_header *)buf->data;
+ off_t offset = TPM_HEADER_SIZE;
+ u32 tot_len = be32_to_cpu(head->length);
+ u32 val;
+
+ /* we're starting after the header so adjust the length */
+ tot_len -= TPM_HEADER_SIZE;
+
+ /* skip public */
+ val = tpm_buf_read_u16(buf, &offset);
+ if (val > tot_len)
+ return -EINVAL;
+ offset += val;
+ /* name */
+ val = tpm_buf_read_u16(buf, &offset);
+ if (val != name_size(&buf->data[offset]))
+ return -EINVAL;
+ memcpy(name, &buf->data[offset], val);
+ /* forget the rest */
+ return 0;
+}
+
+static int tpm2_read_public(struct tpm_chip *chip, u32 handle, char *name)
+{
+ struct tpm_buf buf;
+ int rc;
+
+ rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_READ_PUBLIC);
+ if (rc)
+ return rc;
+
+ tpm_buf_append_u32(&buf, handle);
+ rc = tpm_transmit_cmd(chip, &buf, 0, "read public");
+ if (rc == TPM2_RC_SUCCESS)
+ rc = tpm2_parse_read_public(name, &buf);
+
+ tpm_buf_destroy(&buf);
+
+ return rc;
+}
+
+/**
+ * tpm_buf_append_name() - add a handle area to the buffer
+ * @chip: the TPM chip structure
+ * @buf: The buffer to be appended
+ * @handle: The handle to be appended
+ * @name: The name of the handle (may be NULL)
+ *
+ * In order to compute session HMACs, we need to know the names of the
+ * objects pointed to by the handles. For most objects, this is simply
+ * the actual 4 byte handle or an empty buf (in these cases @name
+ * should be NULL) but for volatile objects, permanent objects and NV
+ * areas, the name is defined as the hash (according to the name
+ * algorithm which should be set to sha256) of the public area to
+ * which the two byte algorithm id has been appended. For these
+ * objects, the @name pointer should point to this. If a name is
+ * required but @name is NULL, then TPM2_ReadPublic() will be called
+ * on the handle to obtain the name.
+ *
+ * As with most tpm_buf operations, success is assumed because failure
+ * will be caused by an incorrect programming model and indicated by a
+ * kernel message.
+ */
+void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf,
+ u32 handle, u8 *name)
+{
+ enum tpm2_mso_type mso = tpm2_handle_mso(handle);
+ struct tpm2_auth *auth = chip->auth;
+ int slot;
+
+ if (!chip->auth) {
+ tpm_buf_append_u32(buf, handle);
+ /* count the number of handles in the upper bits of flags */
+ buf->handles++;
+ return;
+ }
+
+ slot = (tpm_buf_length(buf) - TPM_HEADER_SIZE) / 4;
+ if (slot >= AUTH_MAX_NAMES) {
+ dev_err(&chip->dev, "TPM: too many handles\n");
+ return;
+ }
+ WARN(auth->session != tpm_buf_length(buf),
+ "name added in wrong place\n");
+ tpm_buf_append_u32(buf, handle);
+ auth->session += 4;
+
+ if (mso == TPM2_MSO_PERSISTENT ||
+ mso == TPM2_MSO_VOLATILE ||
+ mso == TPM2_MSO_NVRAM) {
+ if (!name)
+ tpm2_read_public(chip, handle, auth->name[slot]);
+ } else {
+ if (name)
+ dev_err(&chip->dev, "TPM: Handle does not require name but one is specified\n");
+ }
+
+ auth->name_h[slot] = handle;
+ if (name)
+ memcpy(auth->name[slot], name, name_size(name));
+}
+EXPORT_SYMBOL_GPL(tpm_buf_append_name);
+
+#ifdef CONFIG_TCG_TPM2_HMAC
/*
* It turns out the crypto hmac(sha256) is hard for us to consume
* because it assumes a fixed key and the TPM seems to change the key
@@ -567,104 +673,6 @@ void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf)
}
EXPORT_SYMBOL(tpm_buf_fill_hmac_session);
-static int tpm2_parse_read_public(char *name, struct tpm_buf *buf)
-{
- struct tpm_header *head = (struct tpm_header *)buf->data;
- off_t offset = TPM_HEADER_SIZE;
- u32 tot_len = be32_to_cpu(head->length);
- u32 val;
-
- /* we're starting after the header so adjust the length */
- tot_len -= TPM_HEADER_SIZE;
-
- /* skip public */
- val = tpm_buf_read_u16(buf, &offset);
- if (val > tot_len)
- return -EINVAL;
- offset += val;
- /* name */
- val = tpm_buf_read_u16(buf, &offset);
- if (val != name_size(&buf->data[offset]))
- return -EINVAL;
- memcpy(name, &buf->data[offset], val);
- /* forget the rest */
- return 0;
-}
-
-static int tpm2_read_public(struct tpm_chip *chip, u32 handle, char *name)
-{
- struct tpm_buf buf;
- int rc;
-
- rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_READ_PUBLIC);
- if (rc)
- return rc;
-
- tpm_buf_append_u32(&buf, handle);
- rc = tpm_transmit_cmd(chip, &buf, 0, "read public");
- if (rc == TPM2_RC_SUCCESS)
- rc = tpm2_parse_read_public(name, &buf);
-
- tpm_buf_destroy(&buf);
-
- return rc;
-}
-
-/**
- * tpm_buf_append_name() - add a handle area to the buffer
- * @chip: the TPM chip structure
- * @buf: The buffer to be appended
- * @handle: The handle to be appended
- * @name: The name of the handle (may be NULL)
- *
- * In order to compute session HMACs, we need to know the names of the
- * objects pointed to by the handles. For most objects, this is simply
- * the actual 4 byte handle or an empty buf (in these cases @name
- * should be NULL) but for volatile objects, permanent objects and NV
- * areas, the name is defined as the hash (according to the name
- * algorithm which should be set to sha256) of the public area to
- * which the two byte algorithm id has been appended. For these
- * objects, the @name pointer should point to this. If a name is
- * required but @name is NULL, then TPM2_ReadPublic() will be called
- * on the handle to obtain the name.
- *
- * As with most tpm_buf operations, success is assumed because failure
- * will be caused by an incorrect programming model and indicated by a
- * kernel message.
- */
-void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf,
- u32 handle, u8 *name)
-{
- enum tpm2_mso_type mso = tpm2_handle_mso(handle);
- struct tpm2_auth *auth = chip->auth;
- int slot;
-
- slot = (tpm_buf_length(buf) - TPM_HEADER_SIZE)/4;
- if (slot >= AUTH_MAX_NAMES) {
- dev_err(&chip->dev, "TPM: too many handles\n");
- return;
- }
- WARN(auth->session != tpm_buf_length(buf),
- "name added in wrong place\n");
- tpm_buf_append_u32(buf, handle);
- auth->session += 4;
-
- if (mso == TPM2_MSO_PERSISTENT ||
- mso == TPM2_MSO_VOLATILE ||
- mso == TPM2_MSO_NVRAM) {
- if (!name)
- tpm2_read_public(chip, handle, auth->name[slot]);
- } else {
- if (name)
- dev_err(&chip->dev, "TPM: Handle does not require name but one is specified\n");
- }
-
- auth->name_h[slot] = handle;
- if (name)
- memcpy(auth->name[slot], name, name_size(name));
-}
-EXPORT_SYMBOL(tpm_buf_append_name);
-
/**
* tpm_buf_check_hmac_response() - check the TPM return HMAC for correctness
* @chip: the TPM chip structure
@@ -1311,3 +1319,4 @@ int tpm2_sessions_init(struct tpm_chip *chip)
return rc;
}
+#endif /* CONFIG_TCG_TPM2_HMAC */
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 21a67dc9efe8..2844fea4a12a 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -211,8 +211,8 @@ struct tpm_chip {
u8 null_key_name[TPM2_NAME_SIZE];
u8 null_ec_key_x[EC_PT_SZ];
u8 null_ec_key_y[EC_PT_SZ];
- struct tpm2_auth *auth;
#endif
+ struct tpm2_auth *auth;
};
#define TPM_HEADER_SIZE 10
@@ -490,11 +490,13 @@ static inline void tpm_buf_append_empty_auth(struct tpm_buf *buf, u32 handle)
{
}
#endif
-#ifdef CONFIG_TCG_TPM2_HMAC
-int tpm2_start_auth_session(struct tpm_chip *chip);
void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf,
u32 handle, u8 *name);
+
+#ifdef CONFIG_TCG_TPM2_HMAC
+
+int tpm2_start_auth_session(struct tpm_chip *chip);
void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf,
u8 attributes, u8 *passphrase,
int passphraselen);
@@ -521,14 +523,6 @@ static inline int tpm2_start_auth_session(struct tpm_chip *chip)
static inline void tpm2_end_auth_session(struct tpm_chip *chip)
{
}
-static inline void tpm_buf_append_name(struct tpm_chip *chip,
- struct tpm_buf *buf,
- u32 handle, u8 *name)
-{
- tpm_buf_append_u32(buf, handle);
- /* count the number of handles in the upper bits of flags */
- buf->handles++;
-}
static inline void tpm_buf_append_hmac_session(struct tpm_chip *chip,
struct tpm_buf *buf,
u8 attributes, u8 *passphrase,
--
2.45.2