September 2023 - Linux-stable-mirror

Re: [for-linus][PATCH 3/4] tracing/user_events: Align set_bit() address for all archs

by Steven Rostedt

[ Replying to show this patch that got dropped from the mailing list ] On Sat, 30 Sep 2023 16:32:16 -0400 Steven Rostedt <rostedt(a)goodmis.org> wrote: > From: Beau Belgrave <beaub(a)linux.microsoft.com> > > All architectures should use a long aligned address passed to set_bit(). > User processes can pass either a 32-bit or 64-bit sized value to be > updated when tracing is enabled when on a 64-bit kernel. Both cases are > ensured to be naturally aligned, however, that is not enough. The > address must be long aligned without affecting checks on the value > within the user process which require different adjustments for the bit > for little and big endian CPUs. > > Add a compat flag to user_event_enabler that indicates when a 32-bit > value is being used on a 64-bit kernel. Long align addresses and correct > the bit to be used by set_bit() to account for this alignment. Ensure > compat flags are copied during forks and used during deletion clears. > > Link: https://lore.kernel.org/linux-trace-kernel/20230925230829.341-2-beaub@linux… > Link: https://lore.kernel.org/linux-trace-kernel/20230914131102.179100-1-cleger@r… > > Cc: stable(a)vger.kernel.org > Fixes: 7235759084a4 ("tracing/user_events: Use remote writes for event enablement") > Reported-by: Clément Léger <cleger(a)rivosinc.com> > Suggested-by: Clément Léger <cleger(a)rivosinc.com> > Signed-off-by: Beau Belgrave <beaub(a)linux.microsoft.com> > Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org> > --- > kernel/trace/trace_events_user.c | 58 ++++++++++++++++++++++++++++---- > 1 file changed, 51 insertions(+), 7 deletions(-) > > diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c > index 6f046650e527..b87f41187c6a 100644 > --- a/kernel/trace/trace_events_user.c > +++ b/kernel/trace/trace_events_user.c > @@ -127,8 +127,13 @@ struct user_event_enabler { > /* Bit 7 is for freeing status of enablement */ > #define ENABLE_VAL_FREEING_BIT 7 > > -/* Only duplicate the bit value */ > -#define ENABLE_VAL_DUP_MASK ENABLE_VAL_BIT_MASK > +/* Bit 8 is for marking 32-bit on 64-bit */ > +#define ENABLE_VAL_32_ON_64_BIT 8 > + > +#define ENABLE_VAL_COMPAT_MASK (1 << ENABLE_VAL_32_ON_64_BIT) > + > +/* Only duplicate the bit and compat values */ > +#define ENABLE_VAL_DUP_MASK (ENABLE_VAL_BIT_MASK | ENABLE_VAL_COMPAT_MASK) > > #define ENABLE_BITOPS(e) (&(e)->values) > > @@ -174,6 +179,30 @@ struct user_event_validator { > int flags; > }; > > +static inline void align_addr_bit(unsigned long *addr, int *bit, > + unsigned long *flags) > +{ > + if (IS_ALIGNED(*addr, sizeof(long))) { > +#ifdef __BIG_ENDIAN > + /* 32 bit on BE 64 bit requires a 32 bit offset when aligned. */ > + if (test_bit(ENABLE_VAL_32_ON_64_BIT, flags)) > + *bit += 32; > +#endif > + return; > + } > + > + *addr = ALIGN_DOWN(*addr, sizeof(long)); > + > + /* > + * We only support 32 and 64 bit values. The only time we need > + * to align is a 32 bit value on a 64 bit kernel, which on LE > + * is always 32 bits, and on BE requires no change when unaligned. > + */ > +#ifdef __LITTLE_ENDIAN > + *bit += 32; > +#endif > +} > + > typedef void (*user_event_func_t) (struct user_event *user, struct iov_iter *i, > void *tpdata, bool *faulted); > > @@ -482,6 +511,7 @@ static int user_event_enabler_write(struct user_event_mm *mm, > unsigned long *ptr; > struct page *page; > void *kaddr; > + int bit = ENABLE_BIT(enabler); > int ret; > > lockdep_assert_held(&event_mutex); > @@ -497,6 +527,8 @@ static int user_event_enabler_write(struct user_event_mm *mm, > test_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler)))) > return -EBUSY; > > + align_addr_bit(&uaddr, &bit, ENABLE_BITOPS(enabler)); > + > ret = pin_user_pages_remote(mm->mm, uaddr, 1, FOLL_WRITE | FOLL_NOFAULT, > &page, NULL); > > @@ -515,9 +547,9 @@ static int user_event_enabler_write(struct user_event_mm *mm, > > /* Update bit atomically, user tracers must be atomic as well */ > if (enabler->event && enabler->event->status) > - set_bit(ENABLE_BIT(enabler), ptr); > + set_bit(bit, ptr); > else > - clear_bit(ENABLE_BIT(enabler), ptr); > + clear_bit(bit, ptr); > > kunmap_local(kaddr); > unpin_user_pages_dirty_lock(&page, 1, true); > @@ -849,6 +881,12 @@ static struct user_event_enabler > enabler->event = user; > enabler->addr = uaddr; > enabler->values = reg->enable_bit; > + > +#if BITS_PER_LONG >= 64 > + if (reg->enable_size == 4) > + set_bit(ENABLE_VAL_32_ON_64_BIT, ENABLE_BITOPS(enabler)); > +#endif > + > retry: > /* Prevents state changes from racing with new enablers */ > mutex_lock(&event_mutex); > @@ -2377,7 +2415,8 @@ static long user_unreg_get(struct user_unreg __user *ureg, > } > > static int user_event_mm_clear_bit(struct user_event_mm *user_mm, > - unsigned long uaddr, unsigned char bit) > + unsigned long uaddr, unsigned char bit, > + unsigned long flags) > { > struct user_event_enabler enabler; > int result; > @@ -2385,7 +2424,7 @@ static int user_event_mm_clear_bit(struct user_event_mm *user_mm, > > memset(&enabler, 0, sizeof(enabler)); > enabler.addr = uaddr; > - enabler.values = bit; > + enabler.values = bit | flags; > retry: > /* Prevents state changes from racing with new enablers */ > mutex_lock(&event_mutex); > @@ -2415,6 +2454,7 @@ static long user_events_ioctl_unreg(unsigned long uarg) > struct user_event_mm *mm = current->user_event_mm; > struct user_event_enabler *enabler, *next; > struct user_unreg reg; > + unsigned long flags; > long ret; > > ret = user_unreg_get(ureg, &reg); > @@ -2425,6 +2465,7 @@ static long user_events_ioctl_unreg(unsigned long uarg) > if (!mm) > return -ENOENT; > > + flags = 0; > ret = -ENOENT; > > /* > @@ -2441,6 +2482,9 @@ static long user_events_ioctl_unreg(unsigned long uarg) > ENABLE_BIT(enabler) == reg.disable_bit) { > set_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler)); > > + /* We must keep compat flags for the clear */ > + flags |= enabler->values & ENABLE_VAL_COMPAT_MASK; > + > if (!test_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler))) > user_event_enabler_destroy(enabler, true); > > @@ -2454,7 +2498,7 @@ static long user_events_ioctl_unreg(unsigned long uarg) > /* Ensure bit is now cleared for user, regardless of event status */ > if (!ret) > ret = user_event_mm_clear_bit(mm, reg.disable_addr, > - reg.disable_bit); > + reg.disable_bit, flags); > > return ret; > }

1 year, 9 months

1
0
0 0

[for-linus][PATCH 1/4] ring-buffer: Update "shortest_full" in polling

by Steven Rostedt

From: "Steven Rostedt (Google)" <rostedt(a)goodmis.org> It was discovered that the ring buffer polling was incorrectly stating that read would not block, but that's because polling did not take into account that reads will block if the "buffer-percent" was set. Instead, the ring buffer polling would say reads would not block if there was any data in the ring buffer. This was incorrect behavior from a user space point of view. This was fixed by commit 42fb0a1e84ff by having the polling code check if the ring buffer had more data than what the user specified "buffer percent" had. The problem now is that the polling code did not register itself to the writer that it wanted to wait for a specific "full" value of the ring buffer. The result was that the writer would wake the polling waiter whenever there was a new event. The polling waiter would then wake up, see that there's not enough data in the ring buffer to notify user space and then go back to sleep. The next event would wake it up again. Before the polling fix was added, the code would wake up around 100 times for a hackbench 30 benchmark. After the "fix", due to the constant waking of the writer, it would wake up over 11,0000 times! It would never leave the kernel, so the user space behavior was still "correct", but this definitely is not the desired effect. To fix this, have the polling code add what it's waiting for to the "shortest_full" variable, to tell the writer not to wake it up if the buffer is not as full as it expects to be. Note, after this fix, it appears that the waiter is now woken up around 2x the times it was before (~200). This is a tremendous improvement from the 11,000 times, but I will need to spend some time to see why polling is more aggressive in its wakeups than the read blocking code. Link: https://lore.kernel.org/linux-trace-kernel/20230929180113.01c2cae3@rorschac… Cc: stable(a)vger.kernel.org Cc: Masami Hiramatsu <mhiramat(a)kernel.org> Cc: Mark Rutland <mark.rutland(a)arm.com> Fixes: 42fb0a1e84ff ("tracing/ring-buffer: Have polling block on watermark") Reported-by: Julia Lawall <julia.lawall(a)inria.fr> Tested-by: Julia Lawall <julia.lawall(a)inria.fr> Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org> --- kernel/trace/ring_buffer.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 28daf0ce95c5..515cafdb18d9 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1137,6 +1137,9 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, if (full) { poll_wait(filp, &work->full_waiters, poll_table); work->full_waiters_pending = true; + if (!cpu_buffer->shortest_full || + cpu_buffer->shortest_full > full) + cpu_buffer->shortest_full = full; } else { poll_wait(filp, &work->waiters, poll_table); work->waiters_pending = true; -- 2.40.1

1 year, 9 months

1
0
0 0

[PATCH] power: supply: qcom_battmgr: fix enable request endianness

by Johan Hovold

Add the missing endianness conversion when sending the enable request so that the driver will work also on a hypothetical big-endian machine. This issue was reported by sparse. Fixes: 29e8142b5623 ("power: supply: Introduce Qualcomm PMIC GLINK power supply") Cc: stable(a)vger.kernel.org # 6.3 Cc: Bjorn Andersson <andersson(a)kernel.org> Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org> --- drivers/power/supply/qcom_battmgr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/qcom_battmgr.c b/drivers/power/supply/qcom_battmgr.c index de77df97b3a4..6ec71276bb70 100644 --- a/drivers/power/supply/qcom_battmgr.c +++ b/drivers/power/supply/qcom_battmgr.c @@ -1282,9 +1282,9 @@ static void qcom_battmgr_enable_worker(struct work_struct *work) { struct qcom_battmgr *battmgr = container_of(work, struct qcom_battmgr, enable_work); struct qcom_battmgr_enable_request req = { - .hdr.owner = PMIC_GLINK_OWNER_BATTMGR, - .hdr.type = PMIC_GLINK_NOTIFY, - .hdr.opcode = BATTMGR_REQUEST_NOTIFICATION, + .hdr.owner = cpu_to_le32(PMIC_GLINK_OWNER_BATTMGR), + .hdr.type = cpu_to_le32(PMIC_GLINK_NOTIFY), + .hdr.opcode = cpu_to_le32(BATTMGR_REQUEST_NOTIFICATION), }; int ret; -- 2.41.0

1 year, 9 months

3
2
0 0

[PATCH net, 1/3] net: mana: Fix TX CQE error handling

by Haiyang Zhang

For an unknown TX CQE error type (probably from a newer hardware), still free the SKB, update the queue tail, etc., otherwise the accounting will be wrong. Also, TX errors can be triggered by injecting corrupted packets, so replace the WARN_ONCE to ratelimited error logging, because we don't need stack trace here. Cc: stable(a)vger.kernel.org Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)") Signed-off-by: Haiyang Zhang <haiyangz(a)microsoft.com> --- drivers/net/ethernet/microsoft/mana/mana_en.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 4a16ebff3d1d..5cdcf7561b38 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1317,19 +1317,23 @@ static void mana_poll_tx_cq(struct mana_cq *cq) case CQE_TX_VPORT_IDX_OUT_OF_RANGE: case CQE_TX_VPORT_DISABLED: case CQE_TX_VLAN_TAGGING_VIOLATION: - WARN_ONCE(1, "TX: CQE error %d: ignored.\n", - cqe_oob->cqe_hdr.cqe_type); + if (net_ratelimit()) + netdev_err(ndev, "TX: CQE error %d\n", + cqe_oob->cqe_hdr.cqe_type); + apc->eth_stats.tx_cqe_err++; break; default: - /* If the CQE type is unexpected, log an error, assert, - * and go through the error path. + /* If the CQE type is unknown, log an error, + * and still free the SKB, update tail, etc. */ - WARN_ONCE(1, "TX: Unexpected CQE type %d: HW BUG?\n", - cqe_oob->cqe_hdr.cqe_type); + if (net_ratelimit()) + netdev_err(ndev, "TX: unknown CQE type %d\n", + cqe_oob->cqe_hdr.cqe_type); + apc->eth_stats.tx_cqe_unknown_type++; - return; + break; } if (WARN_ON_ONCE(txq->gdma_txq_id != completions[i].wq_num)) -- 2.25.1

1 year, 9 months

2
4
0 0

[PATCH] KVM: arm64: Always invalidate TLB for stage-2 permission faults

by Oliver Upton

It is possible for multiple vCPUs to fault on the same IPA and attempt to resolve the fault. One of the page table walks will actually update the PTE and the rest will return -EAGAIN per our race detection scheme. KVM elides the TLB invalidation on the racing threads as the return value is nonzero. Before commit a12ab1378a88 ("KVM: arm64: Use local TLBI on permission relaxation") KVM always used broadcast TLB invalidations when handling permission faults, which had the convenient property of making the stage-2 updates visible to all CPUs in the system. However now we do a local invalidation, and TLBI elision leads to vCPUs getting stuck in a permission fault loop. Remember that the architecture permits the TLB to cache translations that precipitate a permission fault. Invalidate the TLB entry responsible for the permission fault if the stage-2 descriptor has been relaxed, regardless of which thread actually did the job. Cc: stable(a)vger.kernel.org Fixes: a12ab1378a88 ("KVM: arm64: Use local TLBI on permission relaxation") Signed-off-by: Oliver Upton <oliver.upton(a)linux.dev> --- arch/arm64/kvm/hyp/pgtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index f155b8c9e98c..286888751793 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -1314,7 +1314,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level, KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED); - if (!ret) + if (!ret || ret == -EAGAIN) kvm_call_hyp(__kvm_tlb_flush_vmid_ipa_nsh, pgt->mmu, addr, level); return ret; } base-commit: ce9ecca0238b140b88f43859b211c9fdfd8e5b70 -- 2.42.0.515.g380fc7ccd1-goog

1 year, 9 months

2
5
0 0

[merged mm-hotfixes-stable] crash-add-lock-to-serialize-crash-hotplug-handling.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: Crash: add lock to serialize crash hotplug handling has been removed from the -mm tree. Its filename was crash-add-lock-to-serialize-crash-hotplug-handling.patch This patch was dropped because it was merged into the mm-hotfixes-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: Baoquan He <bhe(a)redhat.com> Subject: Crash: add lock to serialize crash hotplug handling Date: Tue, 26 Sep 2023 20:09:05 +0800 Eric reported that handling corresponding crash hotplug event can be failed easily when many memory hotplug event are notified in a short period. They failed because failing to take __kexec_lock. ======= [ 78.714569] Fallback order for Node 0: 0 [ 78.714575] Built 1 zonelists, mobility grouping on. Total pages: 1817886 [ 78.717133] Policy zone: Normal [ 78.724423] crash hp: kexec_trylock() failed, elfcorehdr may be inaccurate [ 78.727207] crash hp: kexec_trylock() failed, elfcorehdr may be inaccurate [ 80.056643] PEFILE: Unsigned PE binary ======= The memory hotplug events are notified very quickly and very many, while the handling of crash hotplug is much slower relatively. So the atomic variable __kexec_lock and kexec_trylock() can't guarantee the serialization of crash hotplug handling. Here, add a new mutex lock __crash_hotplug_lock to serialize crash hotplug handling specifically. This doesn't impact the usage of __kexec_lock. Link: https://lkml.kernel.org/r/20230926120905.392903-1-bhe@redhat.com Fixes: 247262756121 ("crash: add generic infrastructure for crash hotplug support") Signed-off-by: Baoquan He <bhe(a)redhat.com> Tested-by: Eric DeVolder <eric.devolder(a)oracle.com> Reviewed-by: Eric DeVolder <eric.devolder(a)oracle.com> Reviewed-by: Valentin Schneider <vschneid(a)redhat.com> Cc: Sourabh Jain <sourabhjain(a)linux.ibm.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- kernel/crash_core.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) --- a/kernel/crash_core.c~crash-add-lock-to-serialize-crash-hotplug-handling +++ a/kernel/crash_core.c @@ -740,6 +740,17 @@ subsys_initcall(crash_notes_memory_init) #define pr_fmt(fmt) "crash hp: " fmt /* + * Different than kexec/kdump loading/unloading/jumping/shrinking which + * usually rarely happen, there will be many crash hotplug events notified + * during one short period, e.g one memory board is hot added and memory + * regions are online. So mutex lock __crash_hotplug_lock is used to + * serialize the crash hotplug handling specifically. + */ +DEFINE_MUTEX(__crash_hotplug_lock); +#define crash_hotplug_lock() mutex_lock(&__crash_hotplug_lock) +#define crash_hotplug_unlock() mutex_unlock(&__crash_hotplug_lock) + +/* * This routine utilized when the crash_hotplug sysfs node is read. * It reflects the kernel's ability/permission to update the crash * elfcorehdr directly. @@ -748,9 +759,11 @@ int crash_check_update_elfcorehdr(void) { int rc = 0; + crash_hotplug_lock(); /* Obtain lock while reading crash information */ if (!kexec_trylock()) { pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n"); + crash_hotplug_unlock(); return 0; } if (kexec_crash_image) { @@ -761,6 +774,7 @@ int crash_check_update_elfcorehdr(void) } /* Release lock now that update complete */ kexec_unlock(); + crash_hotplug_unlock(); return rc; } @@ -783,9 +797,11 @@ static void crash_handle_hotplug_event(u { struct kimage *image; + crash_hotplug_lock(); /* Obtain lock while changing crash information */ if (!kexec_trylock()) { pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n"); + crash_hotplug_unlock(); return; } @@ -852,6 +868,7 @@ static void crash_handle_hotplug_event(u out: /* Release lock now that update complete */ kexec_unlock(); + crash_hotplug_unlock(); } static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v) _ Patches currently in -mm which might be from bhe(a)redhat.com are crash_corec-remove-unnecessary-parameter-of-function.patch crash_core-change-the-prototype-of-function-parse_crashkernel.patch crash_core-change-parse_crashkernel-to-support-crashkernel=highlow-parsing.patch crash_core-add-generic-function-to-do-reservation.patch crash_core-move-crashk_res-definition-into-crash_corec.patch x86-kdump-use-generic-interface-to-simplify-crashkernel-reservation-code.patch x86-kdump-use-generic-interface-to-simplify-crashkernel-reservation-code-fix.patch arm64-kdump-use-generic-interface-to-simplify-crashkernel-reservation.patch riscv-kdump-use-generic-interface-to-simplify-crashkernel-reservation.patch crash_corec-remove-unneeded-functions.patch

1 year, 9 months

1
0
0 0

[merged mm-hotfixes-stable] selftests-mm-fix-awk-usage-in-charge_reserved_hugetlbsh-and-hugetlb_reparenting_testsh-that-may-cause-error.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: selftests/mm: fix awk usage in charge_reserved_hugetlb.sh and hugetlb_reparenting_test.sh that may cause error has been removed from the -mm tree. Its filename was selftests-mm-fix-awk-usage-in-charge_reserved_hugetlbsh-and-hugetlb_reparenting_testsh-that-may-cause-error.patch This patch was dropped because it was merged into the mm-hotfixes-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: Juntong Deng <juntong.deng(a)outlook.com> Subject: selftests/mm: fix awk usage in charge_reserved_hugetlb.sh and hugetlb_reparenting_test.sh that may cause error Date: Wed, 27 Sep 2023 02:19:44 +0800 According to the awk manual, the -e option does not need to be specified in front of 'program' (unless you need to mix program-file). The redundant -e option can cause error when users use awk tools other than gawk (for example, mawk does not support the -e option). Error Example: awk: not an option: -e Link: https://lkml.kernel.org/r/VI1P193MB075228810591AF2FDD7D42C599C3A@VI1P193MB0… Signed-off-by: Juntong Deng <juntong.deng(a)outlook.com> Cc: Shuah Khan <shuah(a)kernel.org> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- tools/testing/selftests/mm/charge_reserved_hugetlb.sh | 4 ++-- tools/testing/selftests/mm/hugetlb_reparenting_test.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) --- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh~selftests-mm-fix-awk-usage-in-charge_reserved_hugetlbsh-and-hugetlb_reparenting_testsh-that-may-cause-error +++ a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh @@ -25,7 +25,7 @@ if [[ "$1" == "-cgroup-v2" ]]; then fi if [[ $cgroup2 ]]; then - cgroup_path=$(mount -t cgroup2 | head -1 | awk -e '{print $3}') + cgroup_path=$(mount -t cgroup2 | head -1 | awk '{print $3}') if [[ -z "$cgroup_path" ]]; then cgroup_path=/dev/cgroup/memory mount -t cgroup2 none $cgroup_path @@ -33,7 +33,7 @@ if [[ $cgroup2 ]]; then fi echo "+hugetlb" >$cgroup_path/cgroup.subtree_control else - cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}') + cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}') if [[ -z "$cgroup_path" ]]; then cgroup_path=/dev/cgroup/memory mount -t cgroup memory,hugetlb $cgroup_path --- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh~selftests-mm-fix-awk-usage-in-charge_reserved_hugetlbsh-and-hugetlb_reparenting_testsh-that-may-cause-error +++ a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh @@ -20,7 +20,7 @@ fi if [[ $cgroup2 ]]; then - CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk -e '{print $3}') + CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk '{print $3}') if [[ -z "$CGROUP_ROOT" ]]; then CGROUP_ROOT=/dev/cgroup/memory mount -t cgroup2 none $CGROUP_ROOT @@ -28,7 +28,7 @@ if [[ $cgroup2 ]]; then fi echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control else - CGROUP_ROOT=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}') + CGROUP_ROOT=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}') if [[ -z "$CGROUP_ROOT" ]]; then CGROUP_ROOT=/dev/cgroup/memory mount -t cgroup memory,hugetlb $CGROUP_ROOT _ Patches currently in -mm which might be from juntong.deng(a)outlook.com are

1 year, 9 months

1
0
0 0

[merged mm-hotfixes-stable] mm-mempolicy-keep-vma-walk-if-both-mpol_mf_strict-and-mpol_mf_move-are-specified.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: mm: mempolicy: keep VMA walk if both MPOL_MF_STRICT and MPOL_MF_MOVE are specified has been removed from the -mm tree. Its filename was mm-mempolicy-keep-vma-walk-if-both-mpol_mf_strict-and-mpol_mf_move-are-specified.patch This patch was dropped because it was merged into the mm-hotfixes-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: Yang Shi <yang(a)os.amperecomputing.com> Subject: mm: mempolicy: keep VMA walk if both MPOL_MF_STRICT and MPOL_MF_MOVE are specified Date: Wed, 20 Sep 2023 15:32:42 -0700 When calling mbind() with MPOL_MF_{MOVE|MOVEALL} | MPOL_MF_STRICT, kernel should attempt to migrate all existing pages, and return -EIO if there is misplaced or unmovable page. Then commit 6f4576e3687b ("mempolicy: apply page table walker on queue_pages_range()") messed up the return value and didn't break VMA scan early ianymore when MPOL_MF_STRICT alone. The return value problem was fixed by commit a7f40cfe3b7a ("mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified"), but it broke the VMA walk early if unmovable page is met, it may cause some pages are not migrated as expected. The code should conceptually do: if (MPOL_MF_MOVE|MOVEALL) scan all vmas try to migrate the existing pages return success else if (MPOL_MF_MOVE* | MPOL_MF_STRICT) scan all vmas try to migrate the existing pages return -EIO if unmovable or migration failed else /* MPOL_MF_STRICT alone */ break early if meets unmovable and don't call mbind_range() at all else /* none of those flags */ check the ranges in test_walk, EFAULT without mbind_range() if discontig. Fixed the behavior. Link: https://lkml.kernel.org/r/20230920223242.3425775-1-yang@os.amperecomputing.… Fixes: a7f40cfe3b7a ("mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified") Signed-off-by: Yang Shi <yang(a)os.amperecomputing.com> Cc: Hugh Dickins <hughd(a)google.com> Cc: Suren Baghdasaryan <surenb(a)google.com> Cc: Matthew Wilcox <willy(a)infradead.org> Cc: Michal Hocko <mhocko(a)suse.com> Cc: Vlastimil Babka <vbabka(a)suse.cz> Cc: Oscar Salvador <osalvador(a)suse.de> Cc: Rafael Aquini <aquini(a)redhat.com> Cc: Kirill A. Shutemov <kirill(a)shutemov.name> Cc: David Rientjes <rientjes(a)google.com> Cc: <stable(a)vger.kernel.org> [4.9+] Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/mempolicy.c | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) --- a/mm/mempolicy.c~mm-mempolicy-keep-vma-walk-if-both-mpol_mf_strict-and-mpol_mf_move-are-specified +++ a/mm/mempolicy.c @@ -426,6 +426,7 @@ struct queue_pages { unsigned long start; unsigned long end; struct vm_area_struct *first; + bool has_unmovable; }; /* @@ -446,9 +447,8 @@ static inline bool queue_folio_required( /* * queue_folios_pmd() has three possible return values: * 0 - folios are placed on the right node or queued successfully, or - * special page is met, i.e. huge zero page. - * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were - * specified. + * special page is met, i.e. zero page, or unmovable page is found + * but continue walking (indicated by queue_pages.has_unmovable). * -EIO - is migration entry or only MPOL_MF_STRICT was specified and an * existing folio was already on a node that does not follow the * policy. @@ -479,7 +479,7 @@ static int queue_folios_pmd(pmd_t *pmd, if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { if (!vma_migratable(walk->vma) || migrate_folio_add(folio, qp->pagelist, flags)) { - ret = 1; + qp->has_unmovable = true; goto unlock; } } else @@ -495,9 +495,8 @@ unlock: * * queue_folios_pte_range() has three possible return values: * 0 - folios are placed on the right node or queued successfully, or - * special page is met, i.e. zero page. - * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were - * specified. + * special page is met, i.e. zero page, or unmovable page is found + * but continue walking (indicated by queue_pages.has_unmovable). * -EIO - only MPOL_MF_STRICT was specified and an existing folio was already * on a node that does not follow the policy. */ @@ -508,7 +507,6 @@ static int queue_folios_pte_range(pmd_t struct folio *folio; struct queue_pages *qp = walk->private; unsigned long flags = qp->flags; - bool has_unmovable = false; pte_t *pte, *mapped_pte; pte_t ptent; spinlock_t *ptl; @@ -538,11 +536,12 @@ static int queue_folios_pte_range(pmd_t if (!queue_folio_required(folio, qp)) continue; if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { - /* MPOL_MF_STRICT must be specified if we get here */ - if (!vma_migratable(vma)) { - has_unmovable = true; - break; - } + /* + * MPOL_MF_STRICT must be specified if we get here. + * Continue walking vmas due to MPOL_MF_MOVE* flags. + */ + if (!vma_migratable(vma)) + qp->has_unmovable = true; /* * Do not abort immediately since there may be @@ -550,16 +549,13 @@ static int queue_folios_pte_range(pmd_t * need migrate other LRU pages. */ if (migrate_folio_add(folio, qp->pagelist, flags)) - has_unmovable = true; + qp->has_unmovable = true; } else break; } pte_unmap_unlock(mapped_pte, ptl); cond_resched(); - if (has_unmovable) - return 1; - return addr != end ? -EIO : 0; } @@ -599,7 +595,7 @@ static int queue_folios_hugetlb(pte_t *p * Detecting misplaced folio but allow migrating folios which * have been queued. */ - ret = 1; + qp->has_unmovable = true; goto unlock; } @@ -620,7 +616,7 @@ static int queue_folios_hugetlb(pte_t *p * Failed to isolate folio but allow migrating pages * which have been queued. */ - ret = 1; + qp->has_unmovable = true; } unlock: spin_unlock(ptl); @@ -756,12 +752,15 @@ queue_pages_range(struct mm_struct *mm, .start = start, .end = end, .first = NULL, + .has_unmovable = false, }; const struct mm_walk_ops *ops = lock_vma ? &queue_pages_lock_vma_walk_ops : &queue_pages_walk_ops; err = walk_page_range(mm, start, end, ops, &qp); + if (qp.has_unmovable) + err = 1; if (!qp.first) /* whole range in hole */ err = -EFAULT; @@ -1358,7 +1357,7 @@ static long do_mbind(unsigned long start putback_movable_pages(&pagelist); } - if ((ret > 0) || (nr_failed && (flags & MPOL_MF_STRICT))) + if (((ret > 0) || nr_failed) && (flags & MPOL_MF_STRICT)) err = -EIO; } else { up_out: _ Patches currently in -mm which might be from yang(a)os.amperecomputing.com are

1 year, 9 months

1
0
0 0

[merged mm-hotfixes-stable] mm-damon-vaddr-test-fix-memory-leak-in-damon_do_test_apply_three_regions.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: mm/damon/vaddr-test: fix memory leak in damon_do_test_apply_three_regions() has been removed from the -mm tree. Its filename was mm-damon-vaddr-test-fix-memory-leak-in-damon_do_test_apply_three_regions.patch This patch was dropped because it was merged into the mm-hotfixes-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: Jinjie Ruan <ruanjinjie(a)huawei.com> Subject: mm/damon/vaddr-test: fix memory leak in damon_do_test_apply_three_regions() Date: Mon, 25 Sep 2023 15:20:59 +0800 When CONFIG_DAMON_VADDR_KUNIT_TEST=y and making CONFIG_DEBUG_KMEMLEAK=y and CONFIG_DEBUG_KMEMLEAK_AUTO_SCAN=y, the below memory leak is detected. Since commit 9f86d624292c ("mm/damon/vaddr-test: remove unnecessary variables"), the damon_destroy_ctx() is removed, but still call damon_new_target() and damon_new_region(), the damon_region which is allocated by kmem_cache_alloc() in damon_new_region() and the damon_target which is allocated by kmalloc in damon_new_target() are not freed. And the damon_region which is allocated in damon_new_region() in damon_set_regions() is also not freed. So use damon_destroy_target to free all the damon_regions and damon_target. unreferenced object 0xffff888107c9a940 (size 64): comm "kunit_try_catch", pid 1069, jiffies 4294670592 (age 732.761s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 06 00 00 00 6b 6b 6b 6b ............kkkk 60 c7 9c 07 81 88 ff ff f8 cb 9c 07 81 88 ff ff `............... backtrace: [<ffffffff817e0167>] kmalloc_trace+0x27/0xa0 [<ffffffff819c11cf>] damon_new_target+0x3f/0x1b0 [<ffffffff819c7d55>] damon_do_test_apply_three_regions.constprop.0+0x95/0x3e0 [<ffffffff819c82be>] damon_test_apply_three_regions1+0x21e/0x260 [<ffffffff829fce6a>] kunit_generic_run_threadfn_adapter+0x4a/0x90 [<ffffffff81237cf6>] kthread+0x2b6/0x380 [<ffffffff81097add>] ret_from_fork+0x2d/0x70 [<ffffffff81003791>] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff8881079cc740 (size 56): comm "kunit_try_catch", pid 1069, jiffies 4294670592 (age 732.761s) hex dump (first 32 bytes): 05 00 00 00 00 00 00 00 14 00 00 00 00 00 00 00 ................ 6b 6b 6b 6b 6b 6b 6b 6b 00 00 00 00 6b 6b 6b 6b kkkkkkkk....kkkk backtrace: [<ffffffff819bc492>] damon_new_region+0x22/0x1c0 [<ffffffff819c7d91>] damon_do_test_apply_three_regions.constprop.0+0xd1/0x3e0 [<ffffffff819c82be>] damon_test_apply_three_regions1+0x21e/0x260 [<ffffffff829fce6a>] kunit_generic_run_threadfn_adapter+0x4a/0x90 [<ffffffff81237cf6>] kthread+0x2b6/0x380 [<ffffffff81097add>] ret_from_fork+0x2d/0x70 [<ffffffff81003791>] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff888107c9ac40 (size 64): comm "kunit_try_catch", pid 1071, jiffies 4294670595 (age 732.843s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 06 00 00 00 6b 6b 6b 6b ............kkkk a0 cc 9c 07 81 88 ff ff 78 a1 76 07 81 88 ff ff ........x.v..... backtrace: [<ffffffff817e0167>] kmalloc_trace+0x27/0xa0 [<ffffffff819c11cf>] damon_new_target+0x3f/0x1b0 [<ffffffff819c7d55>] damon_do_test_apply_three_regions.constprop.0+0x95/0x3e0 [<ffffffff819c851e>] damon_test_apply_three_regions2+0x21e/0x260 [<ffffffff829fce6a>] kunit_generic_run_threadfn_adapter+0x4a/0x90 [<ffffffff81237cf6>] kthread+0x2b6/0x380 [<ffffffff81097add>] ret_from_fork+0x2d/0x70 [<ffffffff81003791>] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff8881079ccc80 (size 56): comm "kunit_try_catch", pid 1071, jiffies 4294670595 (age 732.843s) hex dump (first 32 bytes): 05 00 00 00 00 00 00 00 14 00 00 00 00 00 00 00 ................ 6b 6b 6b 6b 6b 6b 6b 6b 00 00 00 00 6b 6b 6b 6b kkkkkkkk....kkkk backtrace: [<ffffffff819bc492>] damon_new_region+0x22/0x1c0 [<ffffffff819c7d91>] damon_do_test_apply_three_regions.constprop.0+0xd1/0x3e0 [<ffffffff819c851e>] damon_test_apply_three_regions2+0x21e/0x260 [<ffffffff829fce6a>] kunit_generic_run_threadfn_adapter+0x4a/0x90 [<ffffffff81237cf6>] kthread+0x2b6/0x380 [<ffffffff81097add>] ret_from_fork+0x2d/0x70 [<ffffffff81003791>] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff888107c9af40 (size 64): comm "kunit_try_catch", pid 1073, jiffies 4294670597 (age 733.011s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 06 00 00 00 6b 6b 6b 6b ............kkkk 20 a2 76 07 81 88 ff ff b8 a6 76 07 81 88 ff ff .v.......v..... backtrace: [<ffffffff817e0167>] kmalloc_trace+0x27/0xa0 [<ffffffff819c11cf>] damon_new_target+0x3f/0x1b0 [<ffffffff819c7d55>] damon_do_test_apply_three_regions.constprop.0+0x95/0x3e0 [<ffffffff819c877e>] damon_test_apply_three_regions3+0x21e/0x260 [<ffffffff829fce6a>] kunit_generic_run_threadfn_adapter+0x4a/0x90 [<ffffffff81237cf6>] kthread+0x2b6/0x380 [<ffffffff81097add>] ret_from_fork+0x2d/0x70 [<ffffffff81003791>] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff88810776a200 (size 56): comm "kunit_try_catch", pid 1073, jiffies 4294670597 (age 733.011s) hex dump (first 32 bytes): 05 00 00 00 00 00 00 00 14 00 00 00 00 00 00 00 ................ 6b 6b 6b 6b 6b 6b 6b 6b 00 00 00 00 6b 6b 6b 6b kkkkkkkk....kkkk backtrace: [<ffffffff819bc492>] damon_new_region+0x22/0x1c0 [<ffffffff819c7d91>] damon_do_test_apply_three_regions.constprop.0+0xd1/0x3e0 [<ffffffff819c877e>] damon_test_apply_three_regions3+0x21e/0x260 [<ffffffff829fce6a>] kunit_generic_run_threadfn_adapter+0x4a/0x90 [<ffffffff81237cf6>] kthread+0x2b6/0x380 [<ffffffff81097add>] ret_from_fork+0x2d/0x70 [<ffffffff81003791>] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff88810776a740 (size 56): comm "kunit_try_catch", pid 1073, jiffies 4294670597 (age 733.025s) hex dump (first 32 bytes): 3d 00 00 00 00 00 00 00 3f 00 00 00 00 00 00 00 =.......?....... 6b 6b 6b 6b 6b 6b 6b 6b 00 00 00 00 6b 6b 6b 6b kkkkkkkk....kkkk backtrace: [<ffffffff819bc492>] damon_new_region+0x22/0x1c0 [<ffffffff819bfcc2>] damon_set_regions+0x4c2/0x8e0 [<ffffffff819c7dbb>] damon_do_test_apply_three_regions.constprop.0+0xfb/0x3e0 [<ffffffff819c877e>] damon_test_apply_three_regions3+0x21e/0x260 [<ffffffff829fce6a>] kunit_generic_run_threadfn_adapter+0x4a/0x90 [<ffffffff81237cf6>] kthread+0x2b6/0x380 [<ffffffff81097add>] ret_from_fork+0x2d/0x70 [<ffffffff81003791>] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff888108038240 (size 64): comm "kunit_try_catch", pid 1075, jiffies 4294670600 (age 733.022s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 03 00 00 00 6b 6b 6b 6b ............kkkk 48 ad 76 07 81 88 ff ff 98 ae 76 07 81 88 ff ff H.v.......v..... backtrace: [<ffffffff817e0167>] kmalloc_trace+0x27/0xa0 [<ffffffff819c11cf>] damon_new_target+0x3f/0x1b0 [<ffffffff819c7d55>] damon_do_test_apply_three_regions.constprop.0+0x95/0x3e0 [<ffffffff819c898d>] damon_test_apply_three_regions4+0x1cd/0x210 [<ffffffff829fce6a>] kunit_generic_run_threadfn_adapter+0x4a/0x90 [<ffffffff81237cf6>] kthread+0x2b6/0x380 [<ffffffff81097add>] ret_from_fork+0x2d/0x70 [<ffffffff81003791>] ret_from_fork_asm+0x11/0x20 unreferenced object 0xffff88810776ad28 (size 56): comm "kunit_try_catch", pid 1075, jiffies 4294670600 (age 733.022s) hex dump (first 32 bytes): 05 00 00 00 00 00 00 00 07 00 00 00 00 00 00 00 ................ 6b 6b 6b 6b 6b 6b 6b 6b 00 00 00 00 6b 6b 6b 6b kkkkkkkk....kkkk backtrace: [<ffffffff819bc492>] damon_new_region+0x22/0x1c0 [<ffffffff819bfcc2>] damon_set_regions+0x4c2/0x8e0 [<ffffffff819c7dbb>] damon_do_test_apply_three_regions.constprop.0+0xfb/0x3e0 [<ffffffff819c898d>] damon_test_apply_three_regions4+0x1cd/0x210 [<ffffffff829fce6a>] kunit_generic_run_threadfn_adapter+0x4a/0x90 [<ffffffff81237cf6>] kthread+0x2b6/0x380 [<ffffffff81097add>] ret_from_fork+0x2d/0x70 [<ffffffff81003791>] ret_from_fork_asm+0x11/0x20 Link: https://lkml.kernel.org/r/20230925072100.3725620-1-ruanjinjie@huawei.com Fixes: 9f86d624292c ("mm/damon/vaddr-test: remove unnecessary variables") Signed-off-by: Jinjie Ruan <ruanjinjie(a)huawei.com> Reviewed-by: SeongJae Park <sj(a)kernel.org> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/damon/vaddr-test.h | 2 ++ 1 file changed, 2 insertions(+) --- a/mm/damon/vaddr-test.h~mm-damon-vaddr-test-fix-memory-leak-in-damon_do_test_apply_three_regions +++ a/mm/damon/vaddr-test.h @@ -148,6 +148,8 @@ static void damon_do_test_apply_three_re KUNIT_EXPECT_EQ(test, r->ar.start, expected[i * 2]); KUNIT_EXPECT_EQ(test, r->ar.end, expected[i * 2 + 1]); } + + damon_destroy_target(t); } /* _ Patches currently in -mm which might be from ruanjinjie(a)huawei.com are mm-damon-core-test-fix-memory-leak-in-damon_new_region.patch mm-damon-core-test-fix-memory-leak-in-damon_new_ctx.patch

1 year, 9 months

1
0
0 0

[merged mm-hotfixes-stable] mm-memcg-reconsider-kmemlimit_in_bytes-deprecation.patch removed from -mm tree

by Andrew Morton

The quilt patch titled Subject: mm, memcg: reconsider kmem.limit_in_bytes deprecation has been removed from the -mm tree. Its filename was mm-memcg-reconsider-kmemlimit_in_bytes-deprecation.patch This patch was dropped because it was merged into the mm-hotfixes-stable branch of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm ------------------------------------------------------ From: Michal Hocko <mhocko(a)suse.com> Subject: mm, memcg: reconsider kmem.limit_in_bytes deprecation Date: Thu, 21 Sep 2023 09:38:29 +0200 This reverts commits 86327e8eb94c ("memcg: drop kmem.limit_in_bytes") and partially reverts 58056f77502f ("memcg, kmem: further deprecate kmem.limit_in_bytes") which have incrementally removed support for the kernel memory accounting hard limit. Unfortunately it has turned out that there is still userspace depending on the existence of memory.kmem.limit_in_bytes [1]. The underlying functionality is not really required but the non-existent file just confuses the userspace which fails in the result. The patch to fix this on the userspace side has been submitted but it is hard to predict how it will propagate through the maze of 3rd party consumers of the software. Now, reverting alone 86327e8eb94c is not an option because there is another set of userspace which cannot cope with ENOTSUPP returned when writing to the file. Therefore we have to go and revisit 58056f77502f as well. There are two ways to go ahead. Either we give up on the deprecation and fully revert 58056f77502f as well or we can keep kmem.limit_in_bytes but make the write a noop and warn about the fact. This should work for both known breaking workloads which depend on the existence but do not depend on the hard limit enforcement. Note to backporters to stable trees. a8c49af3be5f ("memcg: add per-memcg total kernel memory stat") introduced in 4.18 has added memcg_account_kmem so the accounting is not done by obj_cgroup_charge_pages directly for v1 anymore. Prior kernels need to add it explicitly (thanks to Johannes for pointing this out). [akpm(a)linux-foundation.org: fix build - remove unused local] Link: http://lkml.kernel.org/r/20230920081101.GA12096@linuxonhyperv3.guj3yctzbm1e… [1] Link: https://lkml.kernel.org/r/ZRE5VJozPZt9bRPy@dhcp22.suse.cz Fixes: 86327e8eb94c ("memcg: drop kmem.limit_in_bytes") Fixes: 58056f77502f ("memcg, kmem: further deprecate kmem.limit_in_bytes") Signed-off-by: Michal Hocko <mhocko(a)suse.com> Acked-by: Shakeel Butt <shakeelb(a)google.com> Acked-by: Johannes Weiner <hannes(a)cmpxchg.org> Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> Cc: Jeremi Piotrowski <jpiotrowski(a)linux.microsoft.com> Cc: Muchun Song <muchun.song(a)linux.dev> Cc: Roman Gushchin <roman.gushchin(a)linux.dev> Cc: Tejun heo <tj(a)kernel.org> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- Documentation/admin-guide/cgroup-v1/memory.rst | 7 +++++++ mm/memcontrol.c | 13 +++++++++++++ 2 files changed, 20 insertions(+) --- a/Documentation/admin-guide/cgroup-v1/memory.rst~mm-memcg-reconsider-kmemlimit_in_bytes-deprecation +++ a/Documentation/admin-guide/cgroup-v1/memory.rst @@ -92,6 +92,13 @@ Brief summary of control files. memory.oom_control set/show oom controls. memory.numa_stat show the number of memory usage per numa node + memory.kmem.limit_in_bytes Deprecated knob to set and read the kernel + memory hard limit. Kernel hard limit is not + supported since 5.16. Writing any value to + do file will not have any effect same as if + nokmem kernel parameter was specified. + Kernel memory is still charged and reported + by memory.kmem.usage_in_bytes. memory.kmem.usage_in_bytes show current kernel memory allocation memory.kmem.failcnt show the number of kernel memory usage hits limits --- a/mm/memcontrol.c~mm-memcg-reconsider-kmemlimit_in_bytes-deprecation +++ a/mm/memcontrol.c @@ -3867,6 +3867,13 @@ static ssize_t mem_cgroup_write(struct k case _MEMSWAP: ret = mem_cgroup_resize_max(memcg, nr_pages, true); break; + case _KMEM: + pr_warn_once("kmem.limit_in_bytes is deprecated and will be removed. " + "Writing any value to this file has no effect. " + "Please report your usecase to linux-mm(a)kvack.org if you " + "depend on this functionality.\n"); + ret = 0; + break; case _TCP: ret = memcg_update_tcp_max(memcg, nr_pages); break; @@ -5078,6 +5085,12 @@ static struct cftype mem_cgroup_legacy_f }, #endif { + .name = "kmem.limit_in_bytes", + .private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT), + .write = mem_cgroup_write, + .read_u64 = mem_cgroup_read_u64, + }, + { .name = "kmem.usage_in_bytes", .private = MEMFILE_PRIVATE(_KMEM, RES_USAGE), .read_u64 = mem_cgroup_read_u64, _ Patches currently in -mm which might be from mhocko(a)suse.com are

1 year, 9 months

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror September 2023