From: Roberto Sassu <roberto.sassu(a)huawei.com>
Commit 98de59bfe4b2f ("take calculation of final prot in
security_mmap_file() into a helper") moved the code to update prot, to be
the actual protections applied to the kernel, to a new helper called
mmap_prot().
However, while without the helper ima_file_mmap() was getting the updated
prot, with the helper ima_file_mmap() gets the original prot, which
contains the protections requested by the application.
A possible consequence of this change is that, if an application calls
mmap() with only PROT_READ, and the kernel applies PROT_EXEC in addition,
that application would have access to executable memory without having this
event recorded in the IMA measurement list. This situation would occur for
example if the application, before mmap(), calls the personality() system
call with READ_IMPLIES_EXEC as the first argument.
Align ima_file_mmap() parameters with those of the mmap_file LSM hook, so
that IMA can receive both the requested prot and the final prot. Since the
requested protections are stored in a new variable, and the final
protections are stored in the existing variable, this effectively restores
the original behavior of the MMAP_CHECK hook.
Cc: stable(a)vger.kernel.org
Fixes: 98de59bfe4b2 ("take calculation of final prot in security_mmap_file() into a helper")
Signed-off-by: Roberto Sassu <roberto.sassu(a)huawei.com>
---
include/linux/ima.h | 6 ++++--
security/integrity/ima/ima_main.c | 7 +++++--
security/security.c | 7 ++++---
3 files changed, 13 insertions(+), 7 deletions(-)
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 5a0b2a285a18..d79fee67235e 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -21,7 +21,8 @@ extern int ima_file_check(struct file *file, int mask);
extern void ima_post_create_tmpfile(struct user_namespace *mnt_userns,
struct inode *inode);
extern void ima_file_free(struct file *file);
-extern int ima_file_mmap(struct file *file, unsigned long prot);
+extern int ima_file_mmap(struct file *file, unsigned long reqprot,
+ unsigned long prot, unsigned long flags);
extern int ima_file_mprotect(struct vm_area_struct *vma, unsigned long prot);
extern int ima_load_data(enum kernel_load_data_id id, bool contents);
extern int ima_post_load_data(char *buf, loff_t size,
@@ -76,7 +77,8 @@ static inline void ima_file_free(struct file *file)
return;
}
-static inline int ima_file_mmap(struct file *file, unsigned long prot)
+static inline int ima_file_mmap(struct file *file, unsigned long reqprot,
+ unsigned long prot, unsigned long flags)
{
return 0;
}
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 377300973e6c..f48f4e694921 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -397,7 +397,9 @@ static int process_measurement(struct file *file, const struct cred *cred,
/**
* ima_file_mmap - based on policy, collect/store measurement.
* @file: pointer to the file to be measured (May be NULL)
- * @prot: contains the protection that will be applied by the kernel.
+ * @reqprot: protection requested by the application
+ * @prot: protection that will be applied by the kernel
+ * @flags: operational flags
*
* Measure files being mmapped executable based on the ima_must_measure()
* policy decision.
@@ -405,7 +407,8 @@ static int process_measurement(struct file *file, const struct cred *cred,
* On success return 0. On integrity appraisal error, assuming the file
* is in policy and IMA-appraisal is in enforcing mode, return -EACCES.
*/
-int ima_file_mmap(struct file *file, unsigned long prot)
+int ima_file_mmap(struct file *file, unsigned long reqprot,
+ unsigned long prot, unsigned long flags)
{
u32 secid;
diff --git a/security/security.c b/security/security.c
index d1571900a8c7..174afa4fad81 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1661,12 +1661,13 @@ static inline unsigned long mmap_prot(struct file *file, unsigned long prot)
int security_mmap_file(struct file *file, unsigned long prot,
unsigned long flags)
{
+ unsigned long prot_adj = mmap_prot(file, prot);
int ret;
- ret = call_int_hook(mmap_file, 0, file, prot,
- mmap_prot(file, prot), flags);
+
+ ret = call_int_hook(mmap_file, 0, file, prot, prot_adj, flags);
if (ret)
return ret;
- return ima_file_mmap(file, prot);
+ return ima_file_mmap(file, prot, prot_adj, flags);
}
int security_mmap_addr(unsigned long addr)
--
2.25.1
This is a backport of 'commit 4444bc2116ae ("wifi: mac80211: Proper mark
iTXQs for resumption")' from linux 6.2.
If a hw queue is stopped ieee80211_tx_dequeue() should abort any
potential running iTXQ run and mark the queue for resumption later.
This also drops the redundant @txqs_stopped and
@IEEE80211_TXQ_STOP_NETIF_TX is renamed to @IEEE80211_TXQ_DIRTY to
better describe the flag.
Additionally this fixes an use-after-free caused by
ieee80211_tx_dequeue() potentially returning a pointer to a deleted skb.
The original 'commit 4444bc2116ae ("wifi: mac80211: Proper mark
iTXQs for resumption")' in 6.2 only fixed the issue only in combination
with 'commit 592234e941f1 ("wifi: mac80211: Fix iTXQ AMPDU fragmentation
handling")'
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/065cf0e5-2c64-56c6-ee66-a6b61be2dddf@roeck-us.net
Link: https://lore.kernel.org/r/20221230121850.218810-1-alexander@wetzel-home.de
Signed-off-by: Alexander Wetzel <alexander(a)wetzel-home.de>
---
The automatic backport for this and the next patch failed as expected:
https://lore.kernel.org/r/16742967949726@kroah.comhttps://lore.kernel.org/r/167429677624186@kroah.com
Since these patches stack only I've put them into a mini series.
They fix different things but the logic overlaps.
In kernels < 6.2 we still support the old push path and since backporting
'commit 107395f9cf44 ("wifi: mac80211: Drop support for TX push path")'
to stable kernels is a clear no go some changes had to be done to these
patches.
Therefore here are quick manual ports, taking the old push path into
account.
I developed and verified basic functionality with both patches applied
to the v6.1 tree from
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
This versions should also work for kernels < 6.1 with no or minimal
changes.
A quick hostap hwsim test shows no regressions. (Single run, compared to
reference runs I use with wireless-testing kernel)
But it also happened to trigger the KASAN I repored here again:
https://lore.kernel.org/r/20230112173808.6205-1-alexander@wetzel-home.de
So that's indeed an issue in stable...
I'll try to give that another shot with your feedback, soon.
Alexander
---
include/net/mac80211.h | 4 ----
net/mac80211/debugfs_sta.c | 5 +++--
net/mac80211/driver-ops.h | 2 +-
net/mac80211/ieee80211_i.h | 2 +-
net/mac80211/tx.c | 23 +++++++++++++++--------
net/mac80211/util.c | 20 ++++++--------------
6 files changed, 26 insertions(+), 30 deletions(-)
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index ac2bad57933f..72b739dc6d53 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1827,8 +1827,6 @@ struct ieee80211_vif_cfg {
* @drv_priv: data area for driver use, will always be aligned to
* sizeof(void \*).
* @txq: the multicast data TX queue (if driver uses the TXQ abstraction)
- * @txqs_stopped: per AC flag to indicate that intermediate TXQs are stopped,
- * protected by fq->lock.
* @offload_flags: 802.3 -> 802.11 enapsulation offload flags, see
* &enum ieee80211_offload_flags.
* @mbssid_tx_vif: Pointer to the transmitting interface if MBSSID is enabled.
@@ -1857,8 +1855,6 @@ struct ieee80211_vif {
bool probe_req_reg;
bool rx_mcast_action_reg;
- bool txqs_stopped[IEEE80211_NUM_ACS];
-
struct ieee80211_vif *mbssid_tx_vif;
/* must be last */
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index d3397c1248d3..b057253db28d 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -167,7 +167,7 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf,
continue;
txqi = to_txq_info(sta->sta.txq[i]);
p += scnprintf(p, bufsz + buf - p,
- "%d %d %u %u %u %u %u %u %u %u %u 0x%lx(%s%s%s)\n",
+ "%d %d %u %u %u %u %u %u %u %u %u 0x%lx(%s%s%s%s)\n",
txqi->txq.tid,
txqi->txq.ac,
txqi->tin.backlog_bytes,
@@ -182,7 +182,8 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf,
txqi->flags,
test_bit(IEEE80211_TXQ_STOP, &txqi->flags) ? "STOP" : "RUN",
test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags) ? " AMPDU" : "",
- test_bit(IEEE80211_TXQ_NO_AMSDU, &txqi->flags) ? " NO-AMSDU" : "");
+ test_bit(IEEE80211_TXQ_NO_AMSDU, &txqi->flags) ? " NO-AMSDU" : "",
+ test_bit(IEEE80211_TXQ_DIRTY, &txqi->flags) ? " DIRTY" : "");
}
rcu_read_unlock();
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 81e40b0a3b16..e685c12757f4 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1183,7 +1183,7 @@ static inline void drv_wake_tx_queue(struct ieee80211_local *local,
/* In reconfig don't transmit now, but mark for waking later */
if (local->in_reconfig) {
- set_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txq->flags);
+ set_bit(IEEE80211_TXQ_DIRTY, &txq->flags);
return;
}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a842f2e1c230..9027c6354251 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -835,7 +835,7 @@ enum txq_info_flags {
IEEE80211_TXQ_STOP,
IEEE80211_TXQ_AMPDU,
IEEE80211_TXQ_NO_AMSDU,
- IEEE80211_TXQ_STOP_NETIF_TX,
+ IEEE80211_TXQ_DIRTY,
};
/**
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 874f2a4d831d..3363e322cfd9 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3709,13 +3709,15 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
struct ieee80211_local *local = hw_to_local(hw);
struct txq_info *txqi = container_of(txq, struct txq_info, txq);
struct ieee80211_hdr *hdr;
- struct sk_buff *skb = NULL;
struct fq *fq = &local->fq;
struct fq_tin *tin = &txqi->tin;
struct ieee80211_tx_info *info;
struct ieee80211_tx_data tx;
+ struct sk_buff *skb;
ieee80211_tx_result r;
struct ieee80211_vif *vif = txq->vif;
+ int q = vif->hw_queue[txq->ac];
+ bool q_stopped;
WARN_ON_ONCE(softirq_count() == 0);
@@ -3723,16 +3725,21 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
return NULL;
begin:
- spin_lock_bh(&fq->lock);
+ skb = NULL;
+ spin_lock(&local->queue_stop_reason_lock);
+ q_stopped = local->queue_stop_reasons[q];
+ spin_unlock(&local->queue_stop_reason_lock);
+
+ if (unlikely(q_stopped)) {
+ /* mark for waking later */
+ set_bit(IEEE80211_TXQ_DIRTY, &txqi->flags);
+ return NULL;
+ }
- if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags) ||
- test_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txqi->flags))
- goto out;
+ spin_lock_bh(&fq->lock);
- if (vif->txqs_stopped[txq->ac]) {
- set_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txqi->flags);
+ if (unlikely(test_bit(IEEE80211_TXQ_STOP, &txqi->flags)))
goto out;
- }
/* Make sure fragments stay together. */
skb = __skb_dequeue(&txqi->frags);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index b512cb37aafb..ed53c51bbc32 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -301,8 +301,6 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac)
local_bh_disable();
spin_lock(&fq->lock);
- sdata->vif.txqs_stopped[ac] = false;
-
if (!test_bit(SDATA_STATE_RUNNING, &sdata->state))
goto out;
@@ -324,7 +322,7 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac)
if (ac != txq->ac)
continue;
- if (!test_and_clear_bit(IEEE80211_TXQ_STOP_NETIF_TX,
+ if (!test_and_clear_bit(IEEE80211_TXQ_DIRTY,
&txqi->flags))
continue;
@@ -339,7 +337,7 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac)
txqi = to_txq_info(vif->txq);
- if (!test_and_clear_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txqi->flags) ||
+ if (!test_and_clear_bit(IEEE80211_TXQ_DIRTY, &txqi->flags) ||
(ps && atomic_read(&ps->num_sta_ps)) || ac != vif->txq->ac)
goto out;
@@ -537,16 +535,10 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue,
continue;
for (ac = 0; ac < n_acs; ac++) {
- if (sdata->vif.hw_queue[ac] == queue ||
- sdata->vif.cab_queue == queue) {
- if (!local->ops->wake_tx_queue) {
- netif_stop_subqueue(sdata->dev, ac);
- continue;
- }
- spin_lock(&local->fq.lock);
- sdata->vif.txqs_stopped[ac] = true;
- spin_unlock(&local->fq.lock);
- }
+ if (!local->ops->wake_tx_queue &&
+ (sdata->vif.hw_queue[ac] == queue ||
+ sdata->vif.cab_queue == queue))
+ netif_stop_subqueue(sdata->dev, ac);
}
}
rcu_read_unlock();
--
2.39.0
Svace has identified unchecked return value of mt7615_init_tx_queue
function in 5.10 branch, even though it makes sense to track it
instead. This issue is fixed in upstream version by Lorenzo's patch.
The same patch can be cleanly applied to the 5.10 branch.
Found by Linux Verification Center (linuxtesting.org) with SVACE.
[Public]
Hi,
This commit that went into 6.2 has been shown to help timing corner cases with suspend problems.
4b31b92b143f ("drm/amdgpu: complete gfxoff allow signal during suspend without delay")
Can you please take it to 6.1.y and 5.15.y?
Thanks,
From: Adrian Zaharia <Adrian.Zaharia(a)windriver.com>
This patch enables the flag to defer registering primary roothub if xhci has two roothubs.
Link to upstream patch:
https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git/commit/?id=b…
The support for deferring roothub registration was added in 5.10.121.
(See: a2532c441705 "usb: core: hcd: Add support for deferring roothub registration")
Kishon Vijay Abraham I (1):
xhci: Set HCD flag to defer primary roothub registration
drivers/usb/host/xhci.c | 2 ++
1 file changed, 2 insertions(+)
base-commit: 179624a57b78c02de833370b7bdf0b0f4a27ca31
--
2.39.1
Passing the host topology to the guest is almost certainly wrong
and will confuse the scheduler. In addition, several fields of
these CPUID leaves vary on each processor; it is simply impossible to
return the right values from KVM_GET_SUPPORTED_CPUID in such a way that
they can be passed to KVM_SET_CPUID2.
The values that will most likely prevent confusion are all zeroes.
Userspace will have to override it anyway if it wishes to present a
specific topology to the guest.
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
---
Documentation/virt/kvm/api.rst | 14 ++++++++++++++
arch/x86/kvm/cpuid.c | 32 ++++++++++++++++----------------
2 files changed, 30 insertions(+), 16 deletions(-)
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index eee9f857a986..20f4f6b302ff 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -8249,6 +8249,20 @@ CPU[EAX=1]:ECX[24] (TSC_DEADLINE) is not reported by ``KVM_GET_SUPPORTED_CPUID``
It can be enabled if ``KVM_CAP_TSC_DEADLINE_TIMER`` is present and the kernel
has enabled in-kernel emulation of the local APIC.
+CPU topology
+~~~~~~~~~~~~
+
+Several CPUID values include topology information for the host CPU:
+0x0b and 0x1f for Intel systems, 0x8000001e for AMD systems. Different
+versions of KVM return different values for this information and userspace
+should not rely on it. Currently they return all zeroes.
+
+If userspace wishes to set up a guest topology, it should be careful that
+the values of these three leaves differ for each CPU. In particular,
+the APIC ID is found in EDX for all subleaves of 0x0b and 0x1f, and in EAX
+for 0x8000001e; the latter also encodes the core id and node id in bits
+7:0 of EBX and ECX respectively.
+
Obsolete ioctls and capabilities
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 0810e93cbedc..164bfb7e7a16 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -759,16 +759,22 @@ struct kvm_cpuid_array {
int nent;
};
+static struct kvm_cpuid_entry2 *get_next_cpuid(struct kvm_cpuid_array *array)
+{
+ if (array->nent >= array->maxnent)
+ return NULL;
+
+ return &array->entries[array->nent++];
+}
+
static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array,
u32 function, u32 index)
{
- struct kvm_cpuid_entry2 *entry;
+ struct kvm_cpuid_entry2 *entry = get_next_cpuid(array);
- if (array->nent >= array->maxnent)
+ if (!entry)
return NULL;
- entry = &array->entries[array->nent++];
-
memset(entry, 0, sizeof(*entry));
entry->function = function;
entry->index = index;
@@ -945,22 +951,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->edx = edx.full;
break;
}
- /*
- * Per Intel's SDM, the 0x1f is a superset of 0xb,
- * thus they can be handled by common code.
- */
case 0x1f:
case 0xb:
/*
- * Populate entries until the level type (ECX[15:8]) of the
- * previous entry is zero. Note, CPUID EAX.{0x1f,0xb}.0 is
- * the starting entry, filled by the primary do_host_cpuid().
+ * No topology; a valid topology is indicated by the presence
+ * of subleaf 1.
*/
- for (i = 1; entry->ecx & 0xff00; ++i) {
- entry = do_host_cpuid(array, function, i);
- if (!entry)
- goto out;
- }
+ entry->eax = entry->ebx = entry->ecx = 0;
break;
case 0xd: {
u64 permitted_xcr0 = kvm_caps.supported_xcr0 & xstate_get_guest_group_perm();
@@ -1193,6 +1190,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->ebx = entry->ecx = entry->edx = 0;
break;
case 0x8000001e:
+ /* Do not return host topology information. */
+ entry->eax = entry->ebx = entry->ecx = 0;
+ entry->edx = 0; /* reserved */
break;
case 0x8000001F:
if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) {
--
2.31.1
state_lock, the spinlock type is meant to protect race against concurrent
MHI state transitions. In mhi_ep_set_m0_state(), while the state_lock is
being held, the channels are resumed in mhi_ep_resume_channels() if the
previous state was M3. This causes sleeping in atomic bug, since
mhi_ep_resume_channels() use mutex internally.
Since the state_lock is supposed to be held throughout the state change,
it is not ideal to drop the lock before calling mhi_ep_resume_channels().
So to fix this issue, let's change the type of state_lock to mutex. This
would also allow holding the lock throughout all state transitions thereby
avoiding any potential race.
Cc: <stable(a)vger.kernel.org> # 5.19
Fixes: e4b7b5f0f30a ("bus: mhi: ep: Add support for suspending and resuming channels")
Reported-by: Dan Carpenter <error27(a)gmail.com>
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
---
drivers/bus/mhi/ep/main.c | 8 +++++---
drivers/bus/mhi/ep/sm.c | 42 ++++++++++++++++++++++-----------------
include/linux/mhi_ep.h | 4 ++--
3 files changed, 31 insertions(+), 23 deletions(-)
diff --git a/drivers/bus/mhi/ep/main.c b/drivers/bus/mhi/ep/main.c
index bcaaba97ef63..528c00b232bf 100644
--- a/drivers/bus/mhi/ep/main.c
+++ b/drivers/bus/mhi/ep/main.c
@@ -1001,11 +1001,11 @@ static void mhi_ep_reset_worker(struct work_struct *work)
mhi_ep_power_down(mhi_cntrl);
- spin_lock_bh(&mhi_cntrl->state_lock);
+ mutex_lock(&mhi_cntrl->state_lock);
+
/* Reset MMIO to signal host that the MHI_RESET is completed in endpoint */
mhi_ep_mmio_reset(mhi_cntrl);
cur_state = mhi_cntrl->mhi_state;
- spin_unlock_bh(&mhi_cntrl->state_lock);
/*
* Only proceed further if the reset is due to SYS_ERR. The host will
@@ -1014,6 +1014,8 @@ static void mhi_ep_reset_worker(struct work_struct *work)
*/
if (cur_state == MHI_STATE_SYS_ERR)
mhi_ep_power_up(mhi_cntrl);
+
+ mutex_unlock(&mhi_cntrl->state_lock);
}
/*
@@ -1386,8 +1388,8 @@ int mhi_ep_register_controller(struct mhi_ep_cntrl *mhi_cntrl,
INIT_LIST_HEAD(&mhi_cntrl->st_transition_list);
INIT_LIST_HEAD(&mhi_cntrl->ch_db_list);
- spin_lock_init(&mhi_cntrl->state_lock);
spin_lock_init(&mhi_cntrl->list_lock);
+ mutex_init(&mhi_cntrl->state_lock);
mutex_init(&mhi_cntrl->event_lock);
/* Set MHI version and AMSS EE before enumeration */
diff --git a/drivers/bus/mhi/ep/sm.c b/drivers/bus/mhi/ep/sm.c
index 3655c19e23c7..fd200b2ac0bb 100644
--- a/drivers/bus/mhi/ep/sm.c
+++ b/drivers/bus/mhi/ep/sm.c
@@ -63,24 +63,23 @@ int mhi_ep_set_m0_state(struct mhi_ep_cntrl *mhi_cntrl)
int ret;
/* If MHI is in M3, resume suspended channels */
- spin_lock_bh(&mhi_cntrl->state_lock);
+ mutex_lock(&mhi_cntrl->state_lock);
+
old_state = mhi_cntrl->mhi_state;
if (old_state == MHI_STATE_M3)
mhi_ep_resume_channels(mhi_cntrl);
ret = mhi_ep_set_mhi_state(mhi_cntrl, MHI_STATE_M0);
- spin_unlock_bh(&mhi_cntrl->state_lock);
-
if (ret) {
mhi_ep_handle_syserr(mhi_cntrl);
- return ret;
+ goto err_unlock;
}
/* Signal host that the device moved to M0 */
ret = mhi_ep_send_state_change_event(mhi_cntrl, MHI_STATE_M0);
if (ret) {
dev_err(dev, "Failed sending M0 state change event\n");
- return ret;
+ goto err_unlock;
}
if (old_state == MHI_STATE_READY) {
@@ -88,11 +87,14 @@ int mhi_ep_set_m0_state(struct mhi_ep_cntrl *mhi_cntrl)
ret = mhi_ep_send_ee_event(mhi_cntrl, MHI_EE_AMSS);
if (ret) {
dev_err(dev, "Failed sending AMSS EE event\n");
- return ret;
+ goto err_unlock;
}
}
- return 0;
+err_unlock:
+ mutex_unlock(&mhi_cntrl->state_lock);
+
+ return ret;
}
int mhi_ep_set_m3_state(struct mhi_ep_cntrl *mhi_cntrl)
@@ -100,13 +102,12 @@ int mhi_ep_set_m3_state(struct mhi_ep_cntrl *mhi_cntrl)
struct device *dev = &mhi_cntrl->mhi_dev->dev;
int ret;
- spin_lock_bh(&mhi_cntrl->state_lock);
- ret = mhi_ep_set_mhi_state(mhi_cntrl, MHI_STATE_M3);
- spin_unlock_bh(&mhi_cntrl->state_lock);
+ mutex_lock(&mhi_cntrl->state_lock);
+ ret = mhi_ep_set_mhi_state(mhi_cntrl, MHI_STATE_M3);
if (ret) {
mhi_ep_handle_syserr(mhi_cntrl);
- return ret;
+ goto err_unlock;
}
mhi_ep_suspend_channels(mhi_cntrl);
@@ -115,10 +116,13 @@ int mhi_ep_set_m3_state(struct mhi_ep_cntrl *mhi_cntrl)
ret = mhi_ep_send_state_change_event(mhi_cntrl, MHI_STATE_M3);
if (ret) {
dev_err(dev, "Failed sending M3 state change event\n");
- return ret;
+ goto err_unlock;
}
- return 0;
+err_unlock:
+ mutex_unlock(&mhi_cntrl->state_lock);
+
+ return ret;
}
int mhi_ep_set_ready_state(struct mhi_ep_cntrl *mhi_cntrl)
@@ -127,22 +131,24 @@ int mhi_ep_set_ready_state(struct mhi_ep_cntrl *mhi_cntrl)
enum mhi_state mhi_state;
int ret, is_ready;
- spin_lock_bh(&mhi_cntrl->state_lock);
+ mutex_lock(&mhi_cntrl->state_lock);
+
/* Ensure that the MHISTATUS is set to RESET by host */
mhi_state = mhi_ep_mmio_masked_read(mhi_cntrl, EP_MHISTATUS, MHISTATUS_MHISTATE_MASK);
is_ready = mhi_ep_mmio_masked_read(mhi_cntrl, EP_MHISTATUS, MHISTATUS_READY_MASK);
if (mhi_state != MHI_STATE_RESET || is_ready) {
dev_err(dev, "READY state transition failed. MHI host not in RESET state\n");
- spin_unlock_bh(&mhi_cntrl->state_lock);
- return -EIO;
+ ret = -EIO;
+ goto err_unlock;
}
ret = mhi_ep_set_mhi_state(mhi_cntrl, MHI_STATE_READY);
- spin_unlock_bh(&mhi_cntrl->state_lock);
-
if (ret)
mhi_ep_handle_syserr(mhi_cntrl);
+err_unlock:
+ mutex_unlock(&mhi_cntrl->state_lock);
+
return ret;
}
diff --git a/include/linux/mhi_ep.h b/include/linux/mhi_ep.h
index 478aece17046..f198a8ac7ee7 100644
--- a/include/linux/mhi_ep.h
+++ b/include/linux/mhi_ep.h
@@ -70,8 +70,8 @@ struct mhi_ep_db_info {
* @cmd_ctx_cache_phys: Physical address of the host command context cache
* @chdb: Array of channel doorbell interrupt info
* @event_lock: Lock for protecting event rings
- * @list_lock: Lock for protecting state transition and channel doorbell lists
* @state_lock: Lock for protecting state transitions
+ * @list_lock: Lock for protecting state transition and channel doorbell lists
* @st_transition_list: List of state transitions
* @ch_db_list: List of queued channel doorbells
* @wq: Dedicated workqueue for handling rings and state changes
@@ -117,8 +117,8 @@ struct mhi_ep_cntrl {
struct mhi_ep_db_info chdb[4];
struct mutex event_lock;
+ struct mutex state_lock;
spinlock_t list_lock;
- spinlock_t state_lock;
struct list_head st_transition_list;
struct list_head ch_db_list;
--
2.25.1
A Sysbot [1] corrupted filesystem exposes two flaws in the
handling and sanity checking of the xattr_ids count in the
filesystem. Both of these flaws cause computation overflow
due to incorrect typing.
In the corrupted filesystem the xattr_ids value is 4294967071, which
stored in a signed variable becomes the negative number -225.
Flaw 1 (64-bit systems only):
The signed integer xattr_ids variable causes sign extension.
This causes variable overflow in the SQUASHFS_XATTR_*(A) macros.
The variable is first multiplied by sizeof(struct squashfs_xattr_id)
where the type of the sizeof operator is "unsigned long".
On a 64-bit system this is 64-bits in size, and causes the negative
number to be sign extended and widened to 64-bits and then become unsigned.
This produces the very large number 18446744073709548016 or 2^64 - 3600.
This number when rounded up by SQUASHFS_METADATA_SIZE - 1 (8191 bytes) and
divided by SQUASHFS_METADATA_SIZE overflows and produces a length of 0
(stored in len).
Flaw 2 (32-bit systems only):
On a 32-bit system the integer variable is not widened by the unsigned
long type of the sizeof operator (32-bits), and the signedness of the
variable has no effect due it always being treated as unsigned.
The above corrupted xattr_ids value of 4294967071, when multiplied
overflows and produces the number 4294963696 or 2^32 - 3400. This
number when rounded up by SQUASHFS_METADATA_SIZE - 1 (8191 bytes) and
divided by SQUASHFS_METADATA_SIZE overflows again and produces a length
of 0.
The effect of the 0 length computation:
In conjunction with the corrupted xattr_ids field, the filesystem
also has a corrupted xattr_table_start value, where it matches
the end of filesystem value of 850.
This causes the following sanity check code to fail because the incorrectly
computed len of 0 matches the incorrect size of the table reported by the
superblock (0 bytes).
len = SQUASHFS_XATTR_BLOCK_BYTES(*xattr_ids);
indexes = SQUASHFS_XATTR_BLOCKS(*xattr_ids);
/*
* The computed size of the index table (len bytes) should exactly
* match the table start and end points
*/
start = table_start + sizeof(*id_table);
end = msblk->bytes_used;
if (len != (end - start))
return ERR_PTR(-EINVAL);
Changing the xattr_ids variable to be "usigned int" fixes the flaw
on a 64-bit system. This relies on the fact the computation is widened
by the unsigned long type of the sizeof operator.
Casting the variable to u64 in the above macro fixes this flaw on a 32-bit
system.
It also means 64-bit systems do not implicitly rely on the type of the
sizeof operator to widen the computation.
[1] https://lore.kernel.org/lkml/000000000000cd44f005f1a0f17f@google.com/
Fixes: 506220d2ba21 ("squashfs: add more sanity checks in xattr id lookup")
Reported-by: syzbot+082fa4af80a5bb1a9843(a)syzkaller.appspotmail.com
Signed-off-by: Phillip Lougher <phillip(a)squashfs.org.uk>
Cc: <stable(a)vger.kernel.org>
---
fs/squashfs/squashfs_fs.h | 2 +-
fs/squashfs/squashfs_fs_sb.h | 2 +-
fs/squashfs/xattr.h | 4 ++--
fs/squashfs/xattr_id.c | 2 +-
4 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index b3fdc8212c5f..95f8e8901768 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -183,7 +183,7 @@ static inline int squashfs_block_size(__le32 raw)
#define SQUASHFS_ID_BLOCK_BYTES(A) (SQUASHFS_ID_BLOCKS(A) *\
sizeof(u64))
/* xattr id lookup table defines */
-#define SQUASHFS_XATTR_BYTES(A) ((A) * sizeof(struct squashfs_xattr_id))
+#define SQUASHFS_XATTR_BYTES(A) (((u64) (A)) * sizeof(struct squashfs_xattr_id))
#define SQUASHFS_XATTR_BLOCK(A) (SQUASHFS_XATTR_BYTES(A) / \
SQUASHFS_METADATA_SIZE)
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 659082e9e51d..72f6f4b37863 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -63,7 +63,7 @@ struct squashfs_sb_info {
long long bytes_used;
unsigned int inodes;
unsigned int fragments;
- int xattr_ids;
+ unsigned int xattr_ids;
unsigned int ids;
bool panic_on_errors;
const struct squashfs_decompressor_thread_ops *thread_ops;
diff --git a/fs/squashfs/xattr.h b/fs/squashfs/xattr.h
index d8a270d3ac4c..f1a463d8bfa0 100644
--- a/fs/squashfs/xattr.h
+++ b/fs/squashfs/xattr.h
@@ -10,12 +10,12 @@
#ifdef CONFIG_SQUASHFS_XATTR
extern __le64 *squashfs_read_xattr_id_table(struct super_block *, u64,
- u64 *, int *);
+ u64 *, unsigned int *);
extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *,
unsigned int *, unsigned long long *);
#else
static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb,
- u64 start, u64 *xattr_table_start, int *xattr_ids)
+ u64 start, u64 *xattr_table_start, unsigned int *xattr_ids)
{
struct squashfs_xattr_id_table *id_table;
diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c
index 087cab8c78f4..c8469c656e0d 100644
--- a/fs/squashfs/xattr_id.c
+++ b/fs/squashfs/xattr_id.c
@@ -56,7 +56,7 @@ int squashfs_xattr_lookup(struct super_block *sb, unsigned int index,
* Read uncompressed xattr id lookup table indexes from disk into memory
*/
__le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start,
- u64 *xattr_table_start, int *xattr_ids)
+ u64 *xattr_table_start, unsigned int *xattr_ids)
{
struct squashfs_sb_info *msblk = sb->s_fs_info;
unsigned int len, indexes;
--
2.35.1
Stable team, please backport these two commits to v6.1:
2bd0db4b3f0b ("drm/i915: Allow panel fixed modes to have differing sync polarities")
55cfeecc2197 ("drm/i915: Allow alternate fixed modes always for eDP")
Reference for posterity: https://gitlab.freedesktop.org/drm/intel/-/issues/7841
Thanks,
Jani.
--
Jani Nikula, Intel Open Source Graphics Center
Syzkaller reports use-after-free in hci_cmd_timeout(). The bug was fixed
in the following patch and can be cleanly applied to 6.1 stable tree.
Due to some technical rearrangement, the fix for older stable branches
requires a different patch which I'll send you in another thread.
The patch titled
Subject: highmem: round down the address passed to kunmap_flush_on_unmap()
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
highmem-round-down-the-address-passed-to-kunmap_flush_on_unmap.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: "Matthew Wilcox (Oracle)" <willy(a)infradead.org>
Subject: highmem: round down the address passed to kunmap_flush_on_unmap()
Date: Thu, 26 Jan 2023 20:07:27 +0000
We already round down the address in kunmap_local_indexed() which is the
other implementation of __kunmap_local(). The only implementation of
kunmap_flush_on_unmap() is PA-RISC which is expecting a page-aligned
address. This may be causing PA-RISC to be flushing the wrong addresses
currently.
Link: https://lkml.kernel.org/r/20230126200727.1680362-1-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Fixes: 298fa1ad5571 ("highmem: Provide generic variant of kmap_atomic*")
Cc: "Fabio M. De Francesco" <fmdefrancesco(a)gmail.com>
Cc: Ira Weiny <ira.weiny(a)intel.com>
Cc: Al Viro <viro(a)zeniv.linux.org.uk>
Cc: Ira Weiny <ira.weiny(a)intel.com>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Helge Deller <deller(a)gmx.de>
Cc: Alexander Potapenko <glider(a)google.com>
Cc: Andrey Konovalov <andreyknvl(a)gmail.com>
Cc: Bagas Sanjaya <bagasdotme(a)gmail.com>
Cc: David Sterba <dsterba(a)suse.com>
Cc: "Fabio M. De Francesco" <fmdefrancesco(a)gmail.com>
Cc: Kees Cook <keescook(a)chromium.org>
Cc: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Cc: Tony Luck <tony.luck(a)intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/highmem-internal.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/include/linux/highmem-internal.h~highmem-round-down-the-address-passed-to-kunmap_flush_on_unmap
+++ a/include/linux/highmem-internal.h
@@ -200,7 +200,7 @@ static inline void *kmap_local_pfn(unsig
static inline void __kunmap_local(const void *addr)
{
#ifdef ARCH_HAS_FLUSH_ON_KUNMAP
- kunmap_flush_on_unmap(addr);
+ kunmap_flush_on_unmap(PTR_ALIGN_DOWN(addr, PAGE_SIZE));
#endif
}
@@ -227,7 +227,7 @@ static inline void *kmap_atomic_pfn(unsi
static inline void __kunmap_atomic(const void *addr)
{
#ifdef ARCH_HAS_FLUSH_ON_KUNMAP
- kunmap_flush_on_unmap(addr);
+ kunmap_flush_on_unmap(PTR_ALIGN_DOWN(addr, PAGE_SIZE));
#endif
pagefault_enable();
if (IS_ENABLED(CONFIG_PREEMPT_RT))
_
Patches currently in -mm which might be from willy(a)infradead.org are
highmem-round-down-the-address-passed-to-kunmap_flush_on_unmap.patch
mm-remove-folio_pincount_ptr-and-head_compound_pincount.patch
mm-convert-head_subpages_mapcount-into-folio_nr_pages_mapped.patch
doc-clarify-refcount-section-by-referring-to-folios-pages.patch
mm-convert-total_compound_mapcount-to-folio_total_mapcount.patch
mm-convert-page_remove_rmap-to-use-a-folio-internally.patch
mm-convert-page_add_anon_rmap-to-use-a-folio-internally.patch
mm-convert-page_add_file_rmap-to-use-a-folio-internally.patch
mm-add-folio_add_new_anon_rmap.patch
mm-add-folio_add_new_anon_rmap-fix-2.patch
page_alloc-use-folio-fields-directly.patch
mm-use-a-folio-in-hugepage_add_anon_rmap-and-hugepage_add_new_anon_rmap.patch
mm-use-entire_mapcount-in-__page_dup_rmap.patch
mm-debug-remove-call-to-head_compound_mapcount.patch
hugetlb-remove-uses-of-folio_mapcount_ptr.patch
mm-convert-page_mapcount-to-use-folio_entire_mapcount.patch
mm-remove-head_compound_mapcount-and-_ptr-functions.patch
mm-reimplement-compound_order.patch
mm-reimplement-compound_nr.patch
mm-reimplement-compound_nr-fix.patch
mm-convert-set_compound_page_dtor-and-set_compound_order-to-folios.patch
mm-convert-is_transparent_hugepage-to-use-a-folio.patch
mm-convert-destroy_large_folio-to-use-folio_dtor.patch
hugetlb-remove-uses-of-compound_dtor-and-compound_nr.patch
mm-remove-first-tail-page-members-from-struct-page.patch
doc-correct-struct-folio-kernel-doc.patch
mm-move-page-deferred_list-to-folio-_deferred_list.patch
mm-huge_memory-remove-page_deferred_list.patch
mm-huge_memory-convert-get_deferred_split_queue-to-take-a-folio.patch
mm-convert-deferred_split_huge_page-to-deferred_split_folio.patch
shmem-convert-shmem_write_end-to-use-a-folio.patch
mm-add-vma_alloc_zeroed_movable_folio.patch
mm-convert-do_anonymous_page-to-use-a-folio.patch
mm-convert-wp_page_copy-to-use-folios.patch
mm-use-a-folio-in-copy_pte_range.patch
mm-use-a-folio-in-copy_present_pte.patch
mm-fs-convert-inode_attach_wb-to-take-a-folio.patch
mm-convert-mem_cgroup_css_from_page-to-mem_cgroup_css_from_folio.patch
mm-remove-page_evictable.patch
mm-remove-mlock_vma_page.patch
mm-remove-munlock_vma_page.patch
mm-clean-up-mlock_page-munlock_page-references-in-comments.patch
rmap-add-folio-parameter-to-__page_set_anon_rmap.patch
filemap-convert-filemap_map_pmd-to-take-a-folio.patch
filemap-convert-filemap_range_has_page-to-use-a-folio.patch
readahead-convert-readahead_expand-to-use-a-folio.patch
mm-add-memcpy_from_file_folio.patch
fs-convert-writepage_t-callback-to-pass-a-folio.patch
mpage-convert-__mpage_writepage-to-use-a-folio-more-fully.patch
The patch titled
Subject: migrate: hugetlb: check for hugetlb shared PMD in node migration
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
migrate-hugetlb-check-for-hugetlb-shared-pmd-in-node-migration.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Mike Kravetz <mike.kravetz(a)oracle.com>
Subject: migrate: hugetlb: check for hugetlb shared PMD in node migration
Date: Thu, 26 Jan 2023 14:27:21 -0800
migrate_pages/mempolicy semantics state that CAP_SYS_NICE is required to
move pages shared with another process to a different node. page_mapcount
> 1 is being used to determine if a hugetlb page is shared. However, a
hugetlb page will have a mapcount of 1 if mapped by multiple processes via
a shared PMD. As a result, hugetlb pages shared by multiple processes and
mapped with a shared PMD can be moved by a process without CAP_SYS_NICE.
To fix, check for a shared PMD if mapcount is 1. If a shared PMD is found
consider the page shared.
Link: https://lkml.kernel.org/r/20230126222721.222195-3-mike.kravetz@oracle.com
Fixes: e2d8cf405525 ("migrate: add hugepage migration code to migrate_pages()")
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: James Houghton <jthoughton(a)google.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Muchun Song <songmuchun(a)bytedance.com>
Cc: Naoya Horiguchi <naoya.horiguchi(a)linux.dev>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: Vishal Moola (Oracle) <vishal.moola(a)gmail.com>
Cc: Yang Shi <shy828301(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/mempolicy.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
--- a/mm/mempolicy.c~migrate-hugetlb-check-for-hugetlb-shared-pmd-in-node-migration
+++ a/mm/mempolicy.c
@@ -600,7 +600,8 @@ static int queue_pages_hugetlb(pte_t *pt
/* With MPOL_MF_MOVE, we migrate only unshared hugepage. */
if (flags & (MPOL_MF_MOVE_ALL) ||
- (flags & MPOL_MF_MOVE && page_mapcount(page) == 1)) {
+ (flags & MPOL_MF_MOVE && page_mapcount(page) == 1 &&
+ !hugetlb_pmd_shared(pte))) {
if (isolate_hugetlb(page, qp->pagelist) &&
(flags & MPOL_MF_STRICT))
/*
_
Patches currently in -mm which might be from mike.kravetz(a)oracle.com are
mm-hugetlb-proc-check-for-hugetlb-shared-pmd-in-proc-pid-smaps.patch
migrate-hugetlb-check-for-hugetlb-shared-pmd-in-node-migration.patch
The patch titled
Subject: mm: hugetlb: proc: check for hugetlb shared PMD in /proc/PID/smaps
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-hugetlb-proc-check-for-hugetlb-shared-pmd-in-proc-pid-smaps.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Mike Kravetz <mike.kravetz(a)oracle.com>
Subject: mm: hugetlb: proc: check for hugetlb shared PMD in /proc/PID/smaps
Date: Thu, 26 Jan 2023 14:27:20 -0800
Patch series "Fixes for hugetlb mapcount at most 1 for shared PMDs".
This issue of mapcount in hugetlb pages referenced by shared PMDs was
discussed in [1]. The following two patches address user visible behavior
caused by this issue.
[1] https://lore.kernel.org/linux-mm/Y9BF+OCdWnCSilEu@monkey/
This patch (of 2):
A hugetlb page will have a mapcount of 1 if mapped by multiple processes
via a shared PMD. This is because only the first process increases the
map count, and subsequent processes just add the shared PMD page to their
page table.
page_mapcount is being used to decide if a hugetlb page is shared or
private in /proc/PID/smaps. Pages referenced via a shared PMD were
incorrectly being counted as private.
To fix, check for a shared PMD if mapcount is 1. If a shared PMD is found
count the hugetlb page as shared. A new helper to check for a shared PMD
is added.
Link: https://lkml.kernel.org/r/20230126222721.222195-2-mike.kravetz@oracle.com
Fixes: 25ee01a2fca0 ("mm: hugetlb: proc: add hugetlb-related fields to /proc/PID/smaps")
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: James Houghton <jthoughton(a)google.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Muchun Song <songmuchun(a)bytedance.com>
Cc: Naoya Horiguchi <naoya.horiguchi(a)linux.dev>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: Vishal Moola (Oracle) <vishal.moola(a)gmail.com>
Cc: Yang Shi <shy828301(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/proc/task_mmu.c | 10 ++++++++--
include/linux/hugetlb.h | 12 ++++++++++++
2 files changed, 20 insertions(+), 2 deletions(-)
--- a/fs/proc/task_mmu.c~mm-hugetlb-proc-check-for-hugetlb-shared-pmd-in-proc-pid-smaps
+++ a/fs/proc/task_mmu.c
@@ -749,8 +749,14 @@ static int smaps_hugetlb_range(pte_t *pt
if (mapcount >= 2)
mss->shared_hugetlb += huge_page_size(hstate_vma(vma));
- else
- mss->private_hugetlb += huge_page_size(hstate_vma(vma));
+ else {
+ if (hugetlb_pmd_shared(pte))
+ mss->shared_hugetlb +=
+ huge_page_size(hstate_vma(vma));
+ else
+ mss->private_hugetlb +=
+ huge_page_size(hstate_vma(vma));
+ }
}
return 0;
}
--- a/include/linux/hugetlb.h~mm-hugetlb-proc-check-for-hugetlb-shared-pmd-in-proc-pid-smaps
+++ a/include/linux/hugetlb.h
@@ -1187,6 +1187,18 @@ static inline __init void hugetlb_cma_re
}
#endif
+#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
+static inline bool hugetlb_pmd_shared(pte_t *pte)
+{
+ return page_count(virt_to_page(pte)) > 1;
+}
+#else
+static inline bool hugetlb_pmd_shared(pte_t *pte)
+{
+ return false;
+}
+#endif
+
bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr);
#ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE
_
Patches currently in -mm which might be from mike.kravetz(a)oracle.com are
mm-hugetlb-proc-check-for-hugetlb-shared-pmd-in-proc-pid-smaps.patch
migrate-hugetlb-check-for-hugetlb-shared-pmd-in-node-migration.patch
Use the correct old/new topology and payload states in
intel_mst_disable_dp(). So far drm_atomic_get_mst_topology_state() it
used returned either the old state, in case the state was added already
earlier during the atomic check phase or otherwise the new state (but
the latter could fail, which can't be handled in the enable/disable
hooks). After the first patch in the patchset, the state should always
get added already during the check phase, so here we can get the
old/new states without a failure.
drm_dp_remove_payload() should use time_slots from the old payload state
and vc_start_slot in the new one. It should update the new payload
states to reflect the sink's current payload table after the payload is
removed. Pass the new topology state and the old and new payload states
accordingly.
This also fixes a problem where the payload allocations for multiple MST
streams on the same link got inconsistent after a few commits, as
during payload removal the old instead of the new payload state got
updated, so the subsequent enabling sequence and commits used a stale
payload state.
Cc: Lyude Paul <lyude(a)redhat.com>
Cc: stable(a)vger.kernel.org # 6.1
Signed-off-by: Imre Deak <imre.deak(a)intel.com>
---
drivers/gpu/drm/i915/display/intel_dp_mst.c | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 5f7bcb5c14847..800fa12a61d93 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -524,10 +524,14 @@ static void intel_mst_disable_dp(struct intel_atomic_state *state,
struct intel_dp *intel_dp = &dig_port->dp;
struct intel_connector *connector =
to_intel_connector(old_conn_state->connector);
- struct drm_dp_mst_topology_state *mst_state =
- drm_atomic_get_mst_topology_state(&state->base, &intel_dp->mst_mgr);
- struct drm_dp_mst_atomic_payload *payload =
- drm_atomic_get_mst_payload_state(mst_state, connector->port);
+ struct drm_dp_mst_topology_state *old_mst_state =
+ drm_atomic_get_old_mst_topology_state(&state->base, &intel_dp->mst_mgr);
+ struct drm_dp_mst_topology_state *new_mst_state =
+ drm_atomic_get_new_mst_topology_state(&state->base, &intel_dp->mst_mgr);
+ struct drm_dp_mst_atomic_payload *old_payload =
+ drm_atomic_get_mst_payload_state(old_mst_state, connector->port);
+ struct drm_dp_mst_atomic_payload *new_payload =
+ drm_atomic_get_mst_payload_state(new_mst_state, connector->port);
struct drm_i915_private *i915 = to_i915(connector->base.dev);
drm_dbg_kms(&i915->drm, "active links %d\n",
@@ -535,8 +539,8 @@ static void intel_mst_disable_dp(struct intel_atomic_state *state,
intel_hdcp_disable(intel_mst->connector);
- drm_dp_remove_payload(&intel_dp->mst_mgr, mst_state,
- payload, payload);
+ drm_dp_remove_payload(&intel_dp->mst_mgr, new_mst_state,
+ old_payload, new_payload);
intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state);
}
--
2.37.1
Greetings,
I trust you are well. I sent you an email yesterday, I just want to confirm if you received it.
Please let me know as soon as possible,
Regard
Mrs Alice Walton
Add a function to get the old MST topology state, required by a
follow-up i915 patch.
While at it clarify the code comment of
drm_atomic_get_new_mst_topology_state().
Cc: Lyude Paul <lyude(a)redhat.com>
Cc: stable(a)vger.kernel.org # 6.1
Cc: dri-devel(a)lists.freedesktop.org
Signed-off-by: Imre Deak <imre.deak(a)intel.com>
---
drivers/gpu/drm/display/drm_dp_mst_topology.c | 29 +++++++++++++++++--
include/drm/display/drm_dp_mst_helper.h | 3 ++
2 files changed, 30 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c
index ebf6e31e156e0..81cc0c3b1e000 100644
--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
@@ -5362,18 +5362,43 @@ struct drm_dp_mst_topology_state *drm_atomic_get_mst_topology_state(struct drm_a
}
EXPORT_SYMBOL(drm_atomic_get_mst_topology_state);
+/**
+ * drm_atomic_get_old_mst_topology_state: get old MST topology state in atomic state, if any
+ * @state: global atomic state
+ * @mgr: MST topology manager, also the private object in this case
+ *
+ * This function wraps drm_atomic_get_old_private_obj_state() passing in the MST atomic
+ * state vtable so that the private object state returned is that of a MST
+ * topology object.
+ *
+ * Returns:
+ *
+ * The old MST topology state, or NULL if there's no topology state for this MST mgr
+ * in the global atomic state
+ */
+struct drm_dp_mst_topology_state *
+drm_atomic_get_old_mst_topology_state(struct drm_atomic_state *state,
+ struct drm_dp_mst_topology_mgr *mgr)
+{
+ struct drm_private_state *priv_state =
+ drm_atomic_get_old_private_obj_state(state, &mgr->base);
+
+ return priv_state ? to_dp_mst_topology_state(priv_state) : NULL;
+}
+EXPORT_SYMBOL(drm_atomic_get_old_mst_topology_state);
+
/**
* drm_atomic_get_new_mst_topology_state: get new MST topology state in atomic state, if any
* @state: global atomic state
* @mgr: MST topology manager, also the private object in this case
*
- * This function wraps drm_atomic_get_priv_obj_state() passing in the MST atomic
+ * This function wraps drm_atomic_get_new_private_obj_state() passing in the MST atomic
* state vtable so that the private object state returned is that of a MST
* topology object.
*
* Returns:
*
- * The MST topology state, or NULL if there's no topology state for this MST mgr
+ * The new MST topology state, or NULL if there's no topology state for this MST mgr
* in the global atomic state
*/
struct drm_dp_mst_topology_state *
diff --git a/include/drm/display/drm_dp_mst_helper.h b/include/drm/display/drm_dp_mst_helper.h
index f5eb9aa152b14..32c764fb9cb56 100644
--- a/include/drm/display/drm_dp_mst_helper.h
+++ b/include/drm/display/drm_dp_mst_helper.h
@@ -868,6 +868,9 @@ struct drm_dp_mst_topology_state *
drm_atomic_get_mst_topology_state(struct drm_atomic_state *state,
struct drm_dp_mst_topology_mgr *mgr);
struct drm_dp_mst_topology_state *
+drm_atomic_get_old_mst_topology_state(struct drm_atomic_state *state,
+ struct drm_dp_mst_topology_mgr *mgr);
+struct drm_dp_mst_topology_state *
drm_atomic_get_new_mst_topology_state(struct drm_atomic_state *state,
struct drm_dp_mst_topology_mgr *mgr);
struct drm_dp_mst_atomic_payload *
--
2.37.1
Atm, drm_dp_remove_payload() uses the same payload state to both get the
vc_start_slot required for the payload removal DPCD message and to
deduct time_slots from vc_start_slot of all payloads after the one being
removed.
The above isn't always correct, as vc_start_slot must be the up-to-date
version contained in the new payload state, but time_slots must be the
one used when the payload was previously added, contained in the old
payload state. The new payload's time_slots can change vs. the old one
if the current atomic commit changes the corresponding mode.
This patch let's drivers pass the old and new payload states to
drm_dp_remove_payload(), but keeps these the same for now in all drivers
not to change the behavior. A follow-up i915 patch will pass in that
driver the correct old and new states to the function.
Cc: Lyude Paul <lyude(a)redhat.com>
Cc: Ben Skeggs <bskeggs(a)redhat.com>
Cc: Karol Herbst <kherbst(a)redhat.com>
Cc: Harry Wentland <harry.wentland(a)amd.com>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: Wayne Lin <Wayne.Lin(a)amd.com>
Cc: stable(a)vger.kernel.org # 6.1
Cc: dri-devel(a)lists.freedesktop.org
Signed-off-by: Imre Deak <imre.deak(a)intel.com>
---
.../amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 2 +-
drivers/gpu/drm/display/drm_dp_mst_topology.c | 22 ++++++++++---------
drivers/gpu/drm/i915/display/intel_dp_mst.c | 4 +++-
drivers/gpu/drm/nouveau/dispnv50/disp.c | 2 +-
include/drm/display/drm_dp_mst_helper.h | 3 ++-
5 files changed, 19 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index 6994c9a1ed858..fed4ce6821161 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -179,7 +179,7 @@ bool dm_helpers_dp_mst_write_payload_allocation_table(
if (enable)
drm_dp_add_payload_part1(mst_mgr, mst_state, payload);
else
- drm_dp_remove_payload(mst_mgr, mst_state, payload);
+ drm_dp_remove_payload(mst_mgr, mst_state, payload, payload);
/* mst_mgr->->payloads are VC payload notify MST branch using DPCD or
* AUX message. The sequence is slot 1-63 allocated sequence for each
diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c
index 5861b0a6247bc..ebf6e31e156e0 100644
--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
@@ -3342,7 +3342,8 @@ EXPORT_SYMBOL(drm_dp_add_payload_part1);
* drm_dp_remove_payload() - Remove an MST payload
* @mgr: Manager to use.
* @mst_state: The MST atomic state
- * @payload: The payload to write
+ * @old_payload: The payload with its old state
+ * @new_payload: The payload to write
*
* Removes a payload from an MST topology if it was successfully assigned a start slot. Also updates
* the starting time slots of all other payloads which would have been shifted towards the start of
@@ -3350,33 +3351,34 @@ EXPORT_SYMBOL(drm_dp_add_payload_part1);
*/
void drm_dp_remove_payload(struct drm_dp_mst_topology_mgr *mgr,
struct drm_dp_mst_topology_state *mst_state,
- struct drm_dp_mst_atomic_payload *payload)
+ const struct drm_dp_mst_atomic_payload *old_payload,
+ struct drm_dp_mst_atomic_payload *new_payload)
{
struct drm_dp_mst_atomic_payload *pos;
bool send_remove = false;
/* We failed to make the payload, so nothing to do */
- if (payload->vc_start_slot == -1)
+ if (new_payload->vc_start_slot == -1)
return;
mutex_lock(&mgr->lock);
- send_remove = drm_dp_mst_port_downstream_of_branch(payload->port, mgr->mst_primary);
+ send_remove = drm_dp_mst_port_downstream_of_branch(new_payload->port, mgr->mst_primary);
mutex_unlock(&mgr->lock);
if (send_remove)
- drm_dp_destroy_payload_step1(mgr, mst_state, payload);
+ drm_dp_destroy_payload_step1(mgr, mst_state, new_payload);
else
drm_dbg_kms(mgr->dev, "Payload for VCPI %d not in topology, not sending remove\n",
- payload->vcpi);
+ new_payload->vcpi);
list_for_each_entry(pos, &mst_state->payloads, next) {
- if (pos != payload && pos->vc_start_slot > payload->vc_start_slot)
- pos->vc_start_slot -= payload->time_slots;
+ if (pos != new_payload && pos->vc_start_slot > new_payload->vc_start_slot)
+ pos->vc_start_slot -= old_payload->time_slots;
}
- payload->vc_start_slot = -1;
+ new_payload->vc_start_slot = -1;
mgr->payload_count--;
- mgr->next_start_slot -= payload->time_slots;
+ mgr->next_start_slot -= old_payload->time_slots;
}
EXPORT_SYMBOL(drm_dp_remove_payload);
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index ba29c294b7c1b..5f7bcb5c14847 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -526,6 +526,8 @@ static void intel_mst_disable_dp(struct intel_atomic_state *state,
to_intel_connector(old_conn_state->connector);
struct drm_dp_mst_topology_state *mst_state =
drm_atomic_get_mst_topology_state(&state->base, &intel_dp->mst_mgr);
+ struct drm_dp_mst_atomic_payload *payload =
+ drm_atomic_get_mst_payload_state(mst_state, connector->port);
struct drm_i915_private *i915 = to_i915(connector->base.dev);
drm_dbg_kms(&i915->drm, "active links %d\n",
@@ -534,7 +536,7 @@ static void intel_mst_disable_dp(struct intel_atomic_state *state,
intel_hdcp_disable(intel_mst->connector);
drm_dp_remove_payload(&intel_dp->mst_mgr, mst_state,
- drm_atomic_get_mst_payload_state(mst_state, connector->port));
+ payload, payload);
intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state);
}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index edcb2529b4025..ed9d374147b8d 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -885,7 +885,7 @@ nv50_msto_prepare(struct drm_atomic_state *state,
// TODO: Figure out if we want to do a better job of handling VCPI allocation failures here?
if (msto->disabled) {
- drm_dp_remove_payload(mgr, mst_state, payload);
+ drm_dp_remove_payload(mgr, mst_state, payload, payload);
nvif_outp_dp_mst_vcpi(&mstm->outp->outp, msto->head->base.index, 0, 0, 0, 0);
} else {
diff --git a/include/drm/display/drm_dp_mst_helper.h b/include/drm/display/drm_dp_mst_helper.h
index 41fd8352ab656..f5eb9aa152b14 100644
--- a/include/drm/display/drm_dp_mst_helper.h
+++ b/include/drm/display/drm_dp_mst_helper.h
@@ -841,7 +841,8 @@ int drm_dp_add_payload_part2(struct drm_dp_mst_topology_mgr *mgr,
struct drm_dp_mst_atomic_payload *payload);
void drm_dp_remove_payload(struct drm_dp_mst_topology_mgr *mgr,
struct drm_dp_mst_topology_state *mst_state,
- struct drm_dp_mst_atomic_payload *payload);
+ const struct drm_dp_mst_atomic_payload *old_payload,
+ struct drm_dp_mst_atomic_payload *new_payload);
int drm_dp_check_act_status(struct drm_dp_mst_topology_mgr *mgr);
--
2.37.1
Greetings,
I trust you are well. I sent you an email yesterday, I just want to confirm if you received it.
Please let me know as soon as possible,
Regard
Mrs Alice Walton
From: Archie Pusaka <apusaka(a)chromium.org>
commit 97dfaf073f5881c624856ef293be307b6166115c upstream.
If a command is already sent, we take care of freeing it, but we
also need to cancel the timeout as well.
Signed-off-by: Archie Pusaka <apusaka(a)chromium.org>
Reviewed-by: Abhishek Pandit-Subedi <abhishekpandit(a)google.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz(a)intel.com>
Signed-off-by: Fedor Pchelkin <pchelkin(a)ispras.ru>
---
net/bluetooth/hci_core.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index a41b4dcf1a7a..cabe8eb4c14f 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1632,6 +1632,7 @@ static int hci_dev_do_open(struct hci_dev *hdev)
hdev->flush(hdev);
if (hdev->sent_cmd) {
+ cancel_delayed_work_sync(&hdev->cmd_timer);
kfree_skb(hdev->sent_cmd);
hdev->sent_cmd = NULL;
}
--
2.34.1
A non-first waiter can potentially spin in the for loop of
rwsem_down_write_slowpath() without sleeping but fail to acquire the
lock even if the rwsem is free if the following sequence happens:
Non-first RT waiter First waiter Lock holder
------------------- ------------ -----------
Acquire wait_lock
rwsem_try_write_lock():
Set handoff bit if RT or
wait too long
Set waiter->handoff_set
Release wait_lock
Acquire wait_lock
Inherit waiter->handoff_set
Release wait_lock
Clear owner
Release lock
if (waiter.handoff_set) {
rwsem_spin_on_owner(();
if (OWNER_NULL)
goto trylock_again;
}
trylock_again:
Acquire wait_lock
rwsem_try_write_lock():
if (first->handoff_set && (waiter != first))
return false;
Release wait_lock
A non-first waiter cannot really acquire the rwsem even if it mistakenly
believes that it can spin on OWNER_NULL value. If that waiter happens
to be an RT task running on the same CPU as the first waiter, it can
block the first waiter from acquiring the rwsem leading to live lock.
Fix this problem by making sure that a non-first waiter cannot spin in
the slowpath loop without sleeping.
Fixes: d257cc8cb8d5 ("locking/rwsem: Make handoff bit handling more consistent")
Reviewed-and-tested-by: Mukesh Ojha <quic_mojha(a)quicinc.com>
Signed-off-by: Waiman Long <longman(a)redhat.com>
Cc: stable(a)vger.kernel.org
---
kernel/locking/rwsem.c | 19 +++++++++----------
1 file changed, 9 insertions(+), 10 deletions(-)
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 44873594de03..be2df9ea7c30 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -624,18 +624,16 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
*/
if (first->handoff_set && (waiter != first))
return false;
-
- /*
- * First waiter can inherit a previously set handoff
- * bit and spin on rwsem if lock acquisition fails.
- */
- if (waiter == first)
- waiter->handoff_set = true;
}
new = count;
if (count & RWSEM_LOCK_MASK) {
+ /*
+ * A waiter (first or not) can set the handoff bit
+ * if it is an RT task or wait in the wait queue
+ * for too long.
+ */
if (has_handoff || (!rt_task(waiter->task) &&
!time_after(jiffies, waiter->timeout)))
return false;
@@ -651,11 +649,12 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
} while (!atomic_long_try_cmpxchg_acquire(&sem->count, &count, new));
/*
- * We have either acquired the lock with handoff bit cleared or
- * set the handoff bit.
+ * We have either acquired the lock with handoff bit cleared or set
+ * the handoff bit. Only the first waiter can have its handoff_set
+ * set here to enable optimistic spinning in slowpath loop.
*/
if (new & RWSEM_FLAG_HANDOFF) {
- waiter->handoff_set = true;
+ first->handoff_set = true;
lockevent_inc(rwsem_wlock_handoff);
return false;
}
--
2.31.1
Good morning,
Happy New Year my friend, how are you doing? My names are Hongu Almond
a humble citizen of United States of America I have a mandate in this
2023 to carry out a transaction and I choose you profitable please
write me back for more details.
Your friend
Mr.Hongu Almond
As described in the following commit info, new_opts value is not used
anymore and is not cleaned on non-error paths -- it is a leak. The problem
has been fixed by the following patch which can be cleanly applied
to 4.14, 4.19, 5.4, 5.10, 5.15 branches.
From: Deepak Sharma <deepak.sharma(a)amd.com>
commit a8fb40966f19ff81520d9ccf8f7e2b95201368b8 upstream.
All Zen or newer CPU which support C3 shares cache. Its not necessary to
flush the caches in software before entering C3. This will cause drop in
performance for the cores which share some caches. ARB_DIS is not used
with current AMD C state implementation. So set related flags correctly.
Signed-off-by: Deepak Sharma <deepak.sharma(a)amd.com>
Acked-by: Thomas Gleixner <tglx(a)linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Signed-off-by: Guilherme G. Piccoli <gpiccoli(a)igalia.com>
---
Hi folks, this is a very simple optimization that might be read
as a fix IMO, since it's setting the flags correctly for the Zen CPUs.
It was built/boot tested in the Steam Deck.
The backport for stable was a suggestion from Greg [0], so lemme
know if any of you see an issue with that, or if we should target
other stable versions (or less versions, maybe only 5.15).
Cheers,
Guilherme
[0] https://github.com/ValveSoftware/SteamOS/issues/954#issuecomment-1368182597
arch/x86/kernel/acpi/cstate.c | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 49ae4e1ac9cd..d28d43d774a2 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -79,6 +79,21 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
*/
flags->bm_control = 0;
}
+ if (c->x86_vendor == X86_VENDOR_AMD && c->x86 >= 0x17) {
+ /*
+ * For all AMD Zen or newer CPUs that support C3, caches
+ * should not be flushed by software while entering C3
+ * type state. Set bm->check to 1 so that kernel doesn't
+ * need to execute cache flush operation.
+ */
+ flags->bm_check = 1;
+ /*
+ * In current AMD C state implementation ARB_DIS is no longer
+ * used. So set bm_control to zero to indicate ARB_DIS is not
+ * required while entering C3 type state.
+ */
+ flags->bm_control = 0;
+ }
}
EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
--
2.39.0
Currently, when resetting the USB modem via AT commands, the modem is
no longer re-connected.
This problem is caused by the incorrect description of the USB_OTG2_OC
pad. It should have pull-up enabled, hysteresis enabled and the
property 'over-current-active-low' should be passed.
With this change, the USB modem can be successfully re-connected
after a reset.
Cc: stable(a)vger.kernel.org
Fixes: 9ac0ae97e349 ("ARM: dts: imx7d-smegw01: Add support for i.MX7D SMEGW01 board")
Signed-off-by: Fabio Estevam <festevam(a)denx.de>
---
arch/arm/boot/dts/imx7d-smegw01.dts | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/arm/boot/dts/imx7d-smegw01.dts b/arch/arm/boot/dts/imx7d-smegw01.dts
index 546268b8d0b1..c0f00f5db11e 100644
--- a/arch/arm/boot/dts/imx7d-smegw01.dts
+++ b/arch/arm/boot/dts/imx7d-smegw01.dts
@@ -198,6 +198,7 @@ &usbotg1 {
&usbotg2 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_usbotg2>;
+ over-current-active-low;
dr_mode = "host";
status = "okay";
};
@@ -374,7 +375,7 @@ MX7D_PAD_LPSR_GPIO1_IO05__GPIO1_IO5 0x04
pinctrl_usbotg2: usbotg2grp {
fsl,pins = <
- MX7D_PAD_UART3_RTS_B__USB_OTG2_OC 0x04
+ MX7D_PAD_UART3_RTS_B__USB_OTG2_OC 0x5c
>;
};
--
2.25.1
The previous algorithm was pretty broken.
- The inner loop had a '(m > m_max)' condition, and the value of 'm'
would increase in each iteration;
- Each iteration would actually multiply 'm' by two, so it is not needed
to re-compute the whole equation at each iteration;
- It would loop until (m & 1) == 0, which means it would loop at most
once.
- The outer loop would divide the 'n' value by two at the end of each
iteration. This meant that for a 12 MHz parent clock and a 1.2 GHz
requested clock, it would first try n=12, then n=6, then n=3, then
n=1, none of which would work; the only valid value is n=2 in this
case.
Simplify this algorithm with a single for loop, which decrements 'n'
after each iteration, addressing all of the above problems.
Fixes: bdbfc029374f ("clk: ingenic: Add support for the JZ4760")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Paul Cercueil <paul(a)crapouillou.net>
---
drivers/clk/ingenic/jz4760-cgu.c | 18 ++++++++----------
1 file changed, 8 insertions(+), 10 deletions(-)
diff --git a/drivers/clk/ingenic/jz4760-cgu.c b/drivers/clk/ingenic/jz4760-cgu.c
index ecd395ac8a28..e407f00bd594 100644
--- a/drivers/clk/ingenic/jz4760-cgu.c
+++ b/drivers/clk/ingenic/jz4760-cgu.c
@@ -58,7 +58,7 @@ jz4760_cgu_calc_m_n_od(const struct ingenic_cgu_pll_info *pll_info,
unsigned long rate, unsigned long parent_rate,
unsigned int *pm, unsigned int *pn, unsigned int *pod)
{
- unsigned int m, n, od, m_max = (1 << pll_info->m_bits) - 2;
+ unsigned int m, n, od, m_max = (1 << pll_info->m_bits) - 1;
/* The frequency after the N divider must be between 1 and 50 MHz. */
n = parent_rate / (1 * MHZ);
@@ -66,19 +66,17 @@ jz4760_cgu_calc_m_n_od(const struct ingenic_cgu_pll_info *pll_info,
/* The N divider must be >= 2. */
n = clamp_val(n, 2, 1 << pll_info->n_bits);
- for (;; n >>= 1) {
- od = (unsigned int)-1;
+ rate /= MHZ;
+ parent_rate /= MHZ;
- do {
- m = (rate / MHZ) * (1 << ++od) * n / (parent_rate / MHZ);
- } while ((m > m_max || m & 1) && (od < 4));
-
- if (od < 4 && m >= 4 && m <= m_max)
- break;
+ for (m = m_max; m >= m_max && n >= 2; n--) {
+ m = rate * n / parent_rate;
+ od = m & 1;
+ m <<= od;
}
*pm = m;
- *pn = n;
+ *pn = n + 1;
*pod = 1 << od;
}
--
2.35.1
In commit 34488399fa08 ("mm/madvise: add file and shmem support to
MADV_COLLAPSE") we make the following change to find_pmd_or_thp_or_none():
- if (!pmd_present(pmde))
- return SCAN_PMD_NULL;
+ if (pmd_none(pmde))
+ return SCAN_PMD_NONE;
This was for-use by MADV_COLLAPSE file/shmem codepaths, where MADV_COLLAPSE
might identify a pte-mapped hugepage, only to have khugepaged race-in, free
the pte table, and clear the pmd. Such codepaths include:
A) If we find a suitably-aligned compound page of order HPAGE_PMD_ORDER
already in the pagecache.
B) In retract_page_tables(), if we fail to grab mmap_lock for the target
mm/address.
In these cases, collapse_pte_mapped_thp() really does expect a none (not
just !present) pmd, and we want to suitably identify that case separate
from the case where no pmd is found, or it's a bad-pmd (of course, many
things could happen once we drop mmap_lock, and the pmd could plausibly
undergo multiple transitions due to intervening fault, split, etc).
Regardless, the code is prepared install a huge-pmd only when the existing
pmd entry is either a genuine pte-table-mapping-pmd, or the none-pmd.
However, the commit introduces a logical hole; namely, that we've allowed
!none- && !huge- && !bad-pmds to be classified as genuine
pte-table-mapping-pmds. One such example that could leak through are swap
entries. The pmd values aren't checked again before use in
pte_offset_map_lock(), which is expecting nothing less than a genuine
pte-table-mapping-pmd.
We want to put back the !pmd_present() check (below the pmd_none() check),
but need to be careful to deal with subtleties in pmd transitions and
treatments by various arch.
The issue is that __split_huge_pmd_locked() temporarily clears the present
bit (or otherwise marks the entry as invalid), but pmd_present()
and pmd_trans_huge() still need to return true while the pmd is in this
transitory state. For example, x86's pmd_present() also checks the
_PAGE_PSE , riscv's version also checks the _PAGE_LEAF bit, and arm64 also
checks a PMD_PRESENT_INVALID bit.
Covering all 4 cases for x86 (all checks done on the same pmd value):
1) pmd_present() && pmd_trans_huge()
All we actually know here is that the PSE bit is set. Either:
a) We aren't racing with __split_huge_page(), and PRESENT or PROTNONE
is set.
=> huge-pmd
b) We are currently racing with __split_huge_page(). The danger here
is that we proceed as-if we have a huge-pmd, but really we are
looking at a pte-mapping-pmd. So, what is the risk of this
danger?
The only relevant path is:
madvise_collapse() -> collapse_pte_mapped_thp()
Where we might just incorrectly report back "success", when really
the memory isn't pmd-backed. This is fine, since split could
happen immediately after (actually) successful madvise_collapse().
So, it should be safe to just assume huge-pmd here.
2) pmd_present() && !pmd_trans_huge()
Either:
a) PSE not set and either PRESENT or PROTNONE is.
=> pte-table-mapping pmd (or PROT_NONE)
b) devmap. This routine can be called immediately after
unlocking/locking mmap_lock -- or called with no locks held (see
khugepaged_scan_mm_slot()), so previous VMA checks have since been
invalidated.
3) !pmd_present() && pmd_trans_huge()
Not possible.
4) !pmd_present() && !pmd_trans_huge()
Neither PRESENT nor PROTNONE set
=> not present
I've checked all archs that implement pmd_trans_huge() (arm64, riscv,
powerpc, longarch, x86, mips, s390) and this logic roughly translates
(though devmap treatment is unique to x86 and powerpc, and (3) doesn't
necessarily hold in general -- but that doesn't matter since !pmd_present()
always takes failure path).
Also, add a comment above find_pmd_or_thp_or_none() to help future
travelers reason about the validity of the code; namely, the possible
mutations that might happen out from under us, depending on how
mmap_lock is held (if at all).
Fixes: 34488399fa08 ("mm/madvise: add file and shmem support to MADV_COLLAPSE")
Reported-by: Hugh Dickins <hughd(a)google.com>
Signed-off-by: Zach O'Keefe <zokeefe(a)google.com>
Cc: stable(a)vger.kernel.org
---
Request that this be pulled into stable since it's theoretically
possible (though I have no reproducer) that while mmap_lock is dropped,
racing thp migration installs a pmd migration entry which then has a path to
be consumed, unchecked, by pte_offset_map().
v1 -> v2: Fix typo
---
mm/khugepaged.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 9548644bdb56..1face2ae5877 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -943,6 +943,10 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
return SCAN_SUCCEED;
}
+/*
+ * See pmd_trans_unstable() for how the result may change out from
+ * underneath us, even if we hold mmap_lock in read.
+ */
static int find_pmd_or_thp_or_none(struct mm_struct *mm,
unsigned long address,
pmd_t **pmd)
@@ -961,8 +965,12 @@ static int find_pmd_or_thp_or_none(struct mm_struct *mm,
#endif
if (pmd_none(pmde))
return SCAN_PMD_NONE;
+ if (!pmd_present(pmde))
+ return SCAN_PMD_NULL;
if (pmd_trans_huge(pmde))
return SCAN_PMD_MAPPED;
+ if (pmd_devmap(pmde))
+ return SCAN_PMD_NULL;
if (pmd_bad(pmde))
return SCAN_PMD_NULL;
return SCAN_SUCCEED;
--
2.39.1.456.gfc5497dd1b-goog
The patch titled
Subject: mm/MADV_COLLAPSE: catch !none !huge !bad pmd lookups
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-madv_collapse-catch-none-huge-bad-pmd-lookups.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: "Zach O'Keefe" <zokeefe(a)google.com>
Subject: mm/MADV_COLLAPSE: catch !none !huge !bad pmd lookups
Date: Wed, 25 Jan 2023 14:53:58 -0800
In commit 34488399fa08 ("mm/madvise: add file and shmem support to
MADV_COLLAPSE") we make the following change to find_pmd_or_thp_or_none():
- if (!pmd_present(pmde))
- return SCAN_PMD_NULL;
+ if (pmd_none(pmde))
+ return SCAN_PMD_NONE;
This was for-use by MADV_COLLAPSE file/shmem codepaths, where
MADV_COLLAPSE might identify a pte-mapped hugepage, only to have
khugepaged race-in, free the pte table, and clear the pmd. Such codepaths
include:
A) If we find a suitably-aligned compound page of order HPAGE_PMD_ORDER
already in the pagecache.
B) In retract_page_tables(), if we fail to grab mmap_lock for the target
mm/address.
In these cases, collapse_pte_mapped_thp() really does expect a none (not
just !present) pmd, and we want to suitably identify that case separate
from the case where no pmd is found, or it's a bad-pmd (of course, many
things could happen once we drop mmap_lock, and the pmd could plausibly
undergo multiple transitions due to intervening fault, split, etc).
Regardless, the code is prepared install a huge-pmd only when the existing
pmd entry is either a genuine pte-table-mapping-pmd, or the none-pmd.
However, the commit introduces a logical hole; namely, that we've allowed
!none- && !huge- && !bad-pmds to be classified as genuine
pte-table-mapping-pmds. One such example that could leak through are swap
entries. The pmd values aren't checked again before use in
pte_offset_map_lock(), which is expecting nothing less than a genuine
pte-table-mapping-pmd.
We want to put back the !pmd_present() check (below the pmd_none() check),
but need to be careful to deal with subtleties in pmd transitions and
treatments by various arch.
The issue is that __split_huge_pmd_locked() temporarily clears the present
bit (or otherwise marks the entry as invalid), but pmd_present() and
pmd_trans_huge() still need to return true while the pmd is in this
transitory state. For example, x86's pmd_present() also checks the
_PAGE_PSE , riscv's version also checks the _PAGE_LEAF bit, and arm64 also
checks a PMD_PRESENT_INVALID bit.
Covering all 4 cases for x86 (all checks done on the same pmd value):
1) pmd_present() && pmd_trans_huge()
All we actually know here is that the PSE bit is set. Either:
a) We aren't racing with __split_huge_page(), and PRESENT or PROTNONE
is set.
=> huge-pmd
b) We are currently racing with __split_huge_page(). The danger here
is that we proceed as-if we have a huge-pmd, but really we are
looking at a pte-mapping-pmd. So, what is the risk of this
danger?
The only relevant path is:
madvise_collapse() -> collapse_pte_mapped_thp()
Where we might just incorrectly report back "success", when really
the memory isn't pmd-backed. This is fine, since split could
happen immediately after (actually) successful madvise_collapse().
So, it should be safe to just assume huge-pmd here.
2) pmd_present() && !pmd_trans_huge()
Either:
a) PSE not set and either PRESENT or PROTNONE is.
=> pte-table-mapping pmd (or PROT_NONE)
b) devmap. This routine can be called immediately after
unlocking/locking mmap_lock -- or called with no locks held (see
khugepaged_scan_mm_slot()), so previous VMA checks have since been
invalidated.
3) !pmd_present() && pmd_trans_huge()
Not possible.
4) !pmd_present() && !pmd_trans_huge()
Neither PRESENT nor PROTNONE set
=> not present
I've checked all archs that implement pmd_trans_huge() (arm64, riscv,
powerpc, longarch, x86, mips, s390) and this logic roughly translates
(though devmap treatment is unique to x86 and powerpc, and (3) doesn't
necessarily hold in general -- but that doesn't matter since
!pmd_present() always takes failure path).
Also, add a comment above find_pmd_or_thp_or_none() to help future
travelers reason about the validity of the code; namely, the possible
mutations that might happen out from under us, depending on how mmap_lock
is held (if at all).
Link: https://lkml.kernel.org/r/20230125225358.2576151-1-zokeefe@google.com
Fixes: 34488399fa08 ("mm/madvise: add file and shmem support to MADV_COLLAPSE")
Signed-off-by: Zach O'Keefe <zokeefe(a)google.com>
Reported-by: Hugh Dickins <hughd(a)google.com>
Cc: Yang Shi <shy828301(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
--- a/mm/khugepaged.c~mm-madv_collapse-catch-none-huge-bad-pmd-lookups
+++ a/mm/khugepaged.c
@@ -847,6 +847,10 @@ static int hugepage_vma_revalidate(struc
return SCAN_SUCCEED;
}
+/*
+ * See pmd_trans_unstable() for how the result may change out from
+ * underneath us, even if we hold mmap_lock in read.
+ */
static int find_pmd_or_thp_or_none(struct mm_struct *mm,
unsigned long address,
pmd_t **pmd)
@@ -865,8 +869,12 @@ static int find_pmd_or_thp_or_none(struc
#endif
if (pmd_none(pmde))
return SCAN_PMD_NONE;
+ if (!pmd_present(pmde))
+ return SCAN_PMD_NULL;
if (pmd_trans_huge(pmde))
return SCAN_PMD_MAPPED;
+ if (pmd_devmap(pmde))
+ return SCAN_PMD_NULL;
if (pmd_bad(pmde))
return SCAN_PMD_NULL;
return SCAN_SUCCEED;
_
Patches currently in -mm which might be from zokeefe(a)google.com are
mm-madv_collapse-catch-none-huge-bad-pmd-lookups.patch
The quilt patch titled
Subject: mm/cma.c: delete kmemleak objects when freeing CMA areas to buddy at boot
has been removed from the -mm tree. Its filename was
mm-cmac-delete-kmemleak-objects-when-freeing-cma-areas-to-buddy-at-boot.patch
This patch was dropped because it is obsolete
------------------------------------------------------
From: "Isaac J. Manjarres" <isaacmanjarres(a)google.com>
Subject: mm/cma.c: delete kmemleak objects when freeing CMA areas to buddy at boot
Date: Mon, 9 Jan 2023 14:16:23 -0800
Since every CMA region is now tracked by kmemleak at the time
cma_activate_area() is invoked, and cma_activate_area() is called for each
CMA region, invoke kmemleak_free_part_phys() during cma_activate_area() to
inform kmemleak that the CMA region will be freed. Doing so also removes
the need to invoke kmemleak_ignore_phys() when the global CMA region is
being created, as the kmemleak object for it will be deleted.
This helps resolve a crash when kmemleak and CONFIG_DEBUG_PAGEALLOC are
both enabled, since CONFIG_DEBUG_PAGEALLOC causes the CMA region to be
unmapped from the kernel's address space when the pages are freed to
buddy. Without this patch, kmemleak will attempt to scan the CMA regions,
even though they are unmapped, which leads to a page-fault.
Link: https://lkml.kernel.org/r/20230109221624.592315-3-isaacmanjarres@google.com
Signed-off-by: Isaac J. Manjarres <isaacmanjarres(a)google.com>
Cc: Isaac J. Manjarres <isaacmanjarres(a)google.com>
Cc: Saravana Kannan <saravanak(a)google.com>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
--- a/mm/cma.c~mm-cmac-delete-kmemleak-objects-when-freeing-cma-areas-to-buddy-at-boot
+++ a/mm/cma.c
@@ -103,6 +103,13 @@ static void __init cma_activate_area(str
goto out_error;
/*
+ * The CMA region was marked as allocated by kmemleak when it was either
+ * dynamically allocated or statically reserved. In any case,
+ * inform kmemleak that the region is about to be freed to the page allocator.
+ */
+ kmemleak_free_part_phys(cma_get_base(cma), cma_get_size(cma));
+
+ /*
* alloc_contig_range() requires the pfn range specified to be in the
* same zone. Simplify by forcing the entire CMA resv range to be in the
* same zone.
@@ -361,11 +368,6 @@ int __init cma_declare_contiguous_nid(ph
}
}
- /*
- * kmemleak scans/reads tracked objects for pointers to other
- * objects but this address isn't mapped and accessible
- */
- kmemleak_ignore_phys(addr);
base = addr;
}
_
Patches currently in -mm which might be from isaacmanjarres(a)google.com are
revert-mm-kmemleak-alloc-gray-object-for-reserved-region-with-direct-map.patch
The quilt patch titled
Subject: mm/cma.c: make kmemleak aware of all CMA regions
has been removed from the -mm tree. Its filename was
mm-cmac-make-kmemleak-aware-of-all-cma-regions.patch
This patch was dropped because it is obsolete
------------------------------------------------------
From: "Isaac J. Manjarres" <isaacmanjarres(a)google.com>
Subject: mm/cma.c: make kmemleak aware of all CMA regions
Date: Mon, 9 Jan 2023 14:16:22 -0800
Patch series "Fixes for kmemleak tracking with CMA regions".
When trying to boot a device with an ARM64 kernel with the following
config options enabled:
CONFIG_DEBUG_PAGEALLOC=y
CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT=y
CONFIG_DEBUG_KMEMLEAK=y
a page-fault is encountered when kmemleak starts to scan the list of gray
or allocated objects that it maintains. Upon closer inspection, it was
observed that these page-faults always occurred when kmemleak attempted to
scan a CMA region.
At the moment, kmemleak is made aware of CMA regions that are specified
through the devicetree to be created at specific memory addresses or
dynamically allocated within a range of addresses. However, if the CMA
region is constrained to a certain range of addresses through the command
line, the region is reserved through the memblock_reserve() function, but
kmemleak_alloc_phys() is not invoked. Furthermore, kmemleak is never
informed about CMA regions being freed to buddy at boot, which is
problematic when CONFIG_DEBUG_PAGEALLOC is enabled, as all CMA regions are
unmapped from the kernel's address space, and subsequently causes a
page-fault when kmemleak attempts to scan any of them.
This series makes it so that kmemleak is aware of every CMA region before
they are freed to the buddy allocator, so that at that time, kmemleak can
be informed that each region is about to be freed, and thus it should not
attempt to scan those regions.
This patch (of 2):
Currently, kmemleak tracks CMA regions that are specified through the
devicetree. However, if the global CMA region is specified through the
commandline, kmemleak will be unaware of the CMA region because
kmemleak_alloc_phys() is not invoked after memblock_reserve(). Add the
missing call to kmemleak_alloc_phys() so that all CMA regions are tracked
by kmemleak before they are freed to the page allocator in
cma_activate_area().
Link: https://lkml.kernel.org/r/20230109221624.592315-1-isaacmanjarres@google.com
Link: https://lkml.kernel.org/r/20230109221624.592315-2-isaacmanjarres@google.com
Signed-off-by: Isaac J. Manjarres <isaacmanjarres(a)google.com>
Cc: <stable(a)vger.kernel.org>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Saravana Kannan <saravanak(a)google.com>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
--- a/mm/cma.c~mm-cmac-make-kmemleak-aware-of-all-cma-regions
+++ a/mm/cma.c
@@ -318,6 +318,8 @@ int __init cma_declare_contiguous_nid(ph
ret = -EBUSY;
goto err;
}
+
+ kmemleak_alloc_phys(base, size, 0);
} else {
phys_addr_t addr = 0;
_
Patches currently in -mm which might be from isaacmanjarres(a)google.com are
revert-mm-kmemleak-alloc-gray-object-for-reserved-region-with-direct-map.patch
mm-cmac-delete-kmemleak-objects-when-freeing-cma-areas-to-buddy-at-boot.patch
From: Kan Liang <kan.liang(a)linux.intel.com>
The below error can be observed in a Linux guest, when the hypervisor
is on a hybrid machine.
[ 0.118214] unchecked MSR access error: WRMSR to 0x38f (tried to
write 0x00011000f0000003f) at rIP: 0xffffffff83082124
(native_write_msr+0x4/0x30)
[ 0.118949] Call Trace:
[ 0.119092] <TASK>
[ 0.119215] ? __intel_pmu_enable_all.constprop.0+0x88/0xe0
[ 0.119533] intel_pmu_enable_all+0x15/0x20
[ 0.119778] x86_pmu_enable+0x17c/0x320
The current perf wrongly assumes that the perf metrics feature is always
enabled on p-core. It unconditionally enables the feature to workaround
the unreliable enumeration of the PERF_CAPABILITIES MSR. The assumption
is safe to bare metal. However, KVM doesn't support the perf metrics
feature yet. Setting the corresponding bit triggers MSR access error
in a guest.
Only unconditionally enable the core specific PMU feature for bare metal
on ADL and RPL, which includes the perf metrics on p-core and
PEBS-via-PT on e-core.
For the future platforms, perf doesn't need to hardcode the PMU feature.
The per-core PMU features can be enumerated by the enhanced
PERF_CAPABILITIES MSR and CPUID leaf 0x23. There is no such issue.
Fixes: f83d2f91d259 ("perf/x86/intel: Add Alder Lake Hybrid support")
Link: https://lore.kernel.org/lkml/e161b7c0-f0be-23c8-9a25-002260c2a085@linux.int…
Reported-by: Pengfei Xu <pengfei.xu(a)intel.com>
Signed-off-by: Kan Liang <kan.liang(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
---
Based on my limit knowledge regarding KVM and guest, I use the
HYPERVISOR bit to tell whether it's a guest. But I'm not sure whether
it's reliable. Please let me know if there is a better way. Thanks.
arch/x86/events/intel/core.c | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index bbb7846d3c1e..8d08929a7250 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -6459,8 +6459,17 @@ __init int intel_pmu_init(void)
__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
0, pmu->num_counters, 0, 0);
pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities;
- pmu->intel_cap.perf_metrics = 1;
- pmu->intel_cap.pebs_output_pt_available = 0;
+ /*
+ * The capability bits are not reliable on ADL and RPL.
+ * For bare metal, it's safe to assume that some features
+ * are always enabled, e.g., the perf metrics on p-core,
+ * but we cannot do the same assumption for a hypervisor.
+ * Only update the core specific PMU feature for bare metal.
+ */
+ if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
+ pmu->intel_cap.perf_metrics = 1;
+ pmu->intel_cap.pebs_output_pt_available = 0;
+ }
memcpy(pmu->hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(pmu->hw_cache_event_ids));
memcpy(pmu->hw_cache_extra_regs, spr_hw_cache_extra_regs, sizeof(pmu->hw_cache_extra_regs));
@@ -6480,8 +6489,10 @@ __init int intel_pmu_init(void)
__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
0, pmu->num_counters, 0, 0);
pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities;
- pmu->intel_cap.perf_metrics = 0;
- pmu->intel_cap.pebs_output_pt_available = 1;
+ if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
+ pmu->intel_cap.perf_metrics = 0;
+ pmu->intel_cap.pebs_output_pt_available = 1;
+ }
memcpy(pmu->hw_cache_event_ids, glp_hw_cache_event_ids, sizeof(pmu->hw_cache_event_ids));
memcpy(pmu->hw_cache_extra_regs, tnt_hw_cache_extra_regs, sizeof(pmu->hw_cache_extra_regs));
--
2.35.1
In commit 34488399fa08 ("mm/madvise: add file and shmem support to
MADV_COLLAPSE") we make the following change to find_pmd_or_thp_or_none():
- if (!pmd_present(pmde))
- return SCAN_PMD_NULL;
+ if (pmd_none(pmde))
+ return SCAN_PMD_NONE;
This was for-use by MADV_COLLAPSE file/shmem codepaths, where MADV_COLLAPSE
might identify a pte-mapped hugepage, only to have khugepaged race-in, free
the pte table, and clear the pmd. Such codepaths include:
A) If we find a suitably-aligned compound page of order HPAGE_PMD_ORDER
already in the pagecache.
B) In retract_page_tables(), if we fail to grab mmap_lock for the target
mm/address.
In these cases, collapse_pte_mapped_thp() really does expect a none (not
just !present) pmd, and we want to suitably identify that case separate
from the case where no pmd is found, or it's a bad-pmd (of course, many
things could happen once we drop mmap_lock, and the pmd could plausibly
undergo multiple transitions due to intervening fault, split, etc).
Regardless, the code is prepared install a huge-pmd only when the existing
pmd entry is either a genuine pte-table-mapping-pmd, or the none-pmd.
However, the commit introduces a logical hole; namely, that we've allowed
!none- && !huge- && !bad-pmds to be classified as genuine
pte-table-mapping-pmds. One such example that could leak through are swap
entries. The pmd values aren't checked again before use in
pte_offset_map_lock(), which is expecting nothing less than a genuine
pte-table-mapping-pmd.
We want to put back the !pmd_present() check (below the pmd_none() check),
but need to be careful to deal with subtleties in pmd transitions and
treatments by various arch.
The issue is that __split_huge_pmd_locked() temporarily clears the present
bit (or otherwise marks the entry as invalid), but pmd_present()
and pmd_trans_huge() still need to return true while the pmd is in this
transitory state. For example, x86's pmd_present() also checks the
_PAGE_PSE , riscv's version also checks the _PAGE_LEAF bit, and arm64 also
checks a PMD_PRESENT_INVALID bit.
Covering all 4 cases for x86 (all checks done on the same pmd value):
1) pmd_present() && pmd_trans_huge()
All we actually know here is that the PSE bit is set. Either:
a) We aren't racing with __split_huge_page(), and PRESENT or PROTNONE
is set.
=> huge-pmd
b) We are currently racing with __split_huge_page(). The danger here
is that we proceed as-if we have a huge-pmd, but really we are
looking at a pte-mapping-pmd. So, what is the risk of this
danger?
The only relevant path is:
madvise_collapse() -> collapse_pte_mapped_thp()
Where we might just incorrectly report back "success", when really
the memory isn't pmd-backed. This is fine, since split could
happen immediately after (actually) successful madvise_collapse().
So, it should be safe to just assume huge-pmd here.
2) pmd_present() && !pmd_trans_huge()
Either:
a) PSE not set and either PRESENT or PROTNONE is.
=> pte-table-mapping pmd (or PROT_NONE)
b) devmap. This routine can be called immediately after
unlocking/locking mmap_lock -- or called with no locks held (see
khugepaged_scan_mm_slot()), so previous VMA checks have since been
invalidated.
3) !pmd_present() && pmd_trans_huge()
Not possible.
4) !pmd_present() && !pmd_trans_huge()
Neither PRESENT nor PROTNONE set
=> not present
I've checked all archs that implement pmd_trans_huge() (arm64, riscv,
powerpc, longarch, x86, mips, s390) and this logic roughly translates
(though devmap treatment is unique to x86 and powerpc, and (3) doesn't
necessarily hold in general -- but that doesn't matter since !pmd_present()
always takes failure path).
Also, add a comment above find_pmd_or_thp_or_none() to help future
travelers reason about the validity of the code; namely, the possible
mutations that might happen out from under us, depending on how
mmap_lock is held (if at all).
Fixes: 34488399fa08 ("mm/madvise: add file and shmem support to MADV_COLLAPSE")
Reported-by: Hugh Dickins <hughd(a)google.com>
Signed-off-by: Zach O'Keefe <zokeefe(a)google.com>
Cc: stable(a)vger.kernel.org
---
Request that this be pulled into stable since it's theoretically
possible (though I have no reproducer) that while mmap_lock is dropped,
racing thp migration installs a pmd migration entry which then has a path to
be consumed, unchecked, by pte_offset_map().
---
mm/khugepaged.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index fa38cae240b9..7ea668bbea70 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -941,6 +941,10 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
return SCAN_SUCCEED;
}
+/*
+ * See pmd_trans_unstable() for how the result may change out from
+ * underneath us, even if we hold mmap_lock in read.
+ */
static int find_pmd_or_thp_or_none(struct mm_struct *mm,
unsigned long address,
pmd_t **pmd)
@@ -959,8 +963,12 @@ static int find_pmd_or_thp_or_none(struct mm_struct *mm,
#endif
if (pmd_none(pmde))
return SCAN_PMD_NONE;
+ if (!pmd_present(pmde))
+ return SCAN_PMD_NULL;
if (pmd_trans_huge(pmde))
return SCAN_PMD_MAPPED;
+ if (pmd_devmap(pmd))
+ return SCAN_PMD_NULL;
if (pmd_bad(pmde))
return SCAN_PMD_NULL;
return SCAN_SUCCEED;
--
2.39.1.405.gd4c25cc71f-goog
commit 2efb6edd52dc50273f5e68ad863dd1b1fb2f2d1c upstream.
(Actually, this is fixing the "Read the Current Status" command sent to
the device's outgoing mailbox, but it is only currently used for the PWM
instructions.)
The PCI-1760 is operated mostly by sending commands to a set of Outgoing
Mailbox registers, waiting for the command to complete, and reading the
result from the Incoming Mailbox registers. One of these commands is
the "Read the Current Status" command. The number of this command is
0x07 (see the User's Manual for the PCI-1760 at
<https://advdownload.advantech.com/productfile/Downloadfile2/1-11P6653/PCI-1…>.
The `PCI1760_CMD_GET_STATUS` macro defined in the driver should expand
to this command number 0x07, but unfortunately it currently expands to
0x03. (Command number 0x03 is not defined in the User's Manual.)
Correct the definition of the `PCI1760_CMD_GET_STATUS` macro to fix it.
This is used by all the PWM subdevice related instructions handled by
`pci1760_pwm_insn_config()` which are probably all broken. The effect
of sending the undefined command number 0x03 is not known.
Fixes: 14b93bb6bbf0 ("staging: comedi: adv_pci_dio: separate out PCI-1760 support")
Cc: <stable(a)vger.kernel.org> # v4.5+
Signed-off-by: Ian Abbott <abbotti(a)mev.co.uk>
Link: https://lore.kernel.org/r/20230103143754.17564-1-abbotti@mev.co.uk
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
Should apply OK to v4.5 to v4.18 inclusive. [IA]
---
drivers/staging/comedi/drivers/adv_pci1760.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/staging/comedi/drivers/adv_pci1760.c b/drivers/staging/comedi/drivers/adv_pci1760.c
index 9f525ff7290c..67b0cf0b9066 100644
--- a/drivers/staging/comedi/drivers/adv_pci1760.c
+++ b/drivers/staging/comedi/drivers/adv_pci1760.c
@@ -68,7 +68,7 @@
#define PCI1760_CMD_CLR_IMB2 0x00 /* Clears IMB2 */
#define PCI1760_CMD_SET_DO 0x01 /* Set output state */
#define PCI1760_CMD_GET_DO 0x02 /* Read output status */
-#define PCI1760_CMD_GET_STATUS 0x03 /* Read current status */
+#define PCI1760_CMD_GET_STATUS 0x07 /* Read current status */
#define PCI1760_CMD_GET_FW_VER 0x0e /* Read firware version */
#define PCI1760_CMD_GET_HW_VER 0x0f /* Read hardware version */
#define PCI1760_CMD_SET_PWM_HI(x) (0x10 + (x) * 2) /* Set "hi" period */
--
2.39.0
From: "Steven Rostedt (Google)" <rostedt(a)goodmis.org>
The instructions for the ftrace-bisect.sh script, which is used to find
what function is being traced that is causing a kernel crash, and possibly
a triple fault reboot, uses the old method. In 5.1, a new feature was
added that let the user write in the index into available_filter_functions
that maps to the function a user wants to set in set_ftrace_filter (or
set_ftrace_notrace). This takes O(1) to set, as suppose to writing a
function name, which takes O(n) (where n is the number of functions in
available_filter_functions).
The ftrace-bisect.sh requires setting half of the functions in
available_filter_functions, which is O(n^2) using the name method to enable
and can take several minutes to complete. The number method is O(n) which
takes less than a second to complete. Using the number method for any
kernel 5.1 and after is the proper way to do the bisect.
Update the usage to reflect the new change, as well as using the
/sys/kernel/tracing path instead of the obsolete debugfs path.
Link: https://lkml.kernel.org/r/20230123112252.022003dd@gandalf.local.home
Cc: stable(a)vger.kernel.org
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Acked-by: Mark Rutland <mark.rutland(a)arm.com>
Fixes: f79b3f338564e ("ftrace: Allow enabling of filters via index of available_filter_functions")
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
scripts/tracing/ftrace-bisect.sh | 34 ++++++++++++++++++++++++--------
1 file changed, 26 insertions(+), 8 deletions(-)
diff --git a/scripts/tracing/ftrace-bisect.sh b/scripts/tracing/ftrace-bisect.sh
index 926701162bc8..bb4f59262bbe 100755
--- a/scripts/tracing/ftrace-bisect.sh
+++ b/scripts/tracing/ftrace-bisect.sh
@@ -12,7 +12,7 @@
# (note, if this is a problem with function_graph tracing, then simply
# replace "function" with "function_graph" in the following steps).
#
-# # cd /sys/kernel/debug/tracing
+# # cd /sys/kernel/tracing
# # echo schedule > set_ftrace_filter
# # echo function > current_tracer
#
@@ -20,22 +20,40 @@
#
# # echo nop > current_tracer
#
-# # cat available_filter_functions > ~/full-file
+# Starting with v5.1 this can be done with numbers, making it much faster:
+#
+# The old (slow) way, for kernels before v5.1.
+#
+# [old-way] # cat available_filter_functions > ~/full-file
+#
+# [old-way] *** Note *** this process will take several minutes to update the
+# [old-way] filters. Setting multiple functions is an O(n^2) operation, and we
+# [old-way] are dealing with thousands of functions. So go have coffee, talk
+# [old-way] with your coworkers, read facebook. And eventually, this operation
+# [old-way] will end.
+#
+# The new way (using numbers) is an O(n) operation, and usually takes less than a second.
+#
+# seq `wc -l available_filter_functions | cut -d' ' -f1` > ~/full-file
+#
+# This will create a sequence of numbers that match the functions in
+# available_filter_functions, and when echoing in a number into the
+# set_ftrace_filter file, it will enable the corresponding function in
+# O(1) time. Making enabling all functions O(n) where n is the number of
+# functions to enable.
+#
+# For either the new or old way, the rest of the operations remain the same.
+#
# # ftrace-bisect ~/full-file ~/test-file ~/non-test-file
# # cat ~/test-file > set_ftrace_filter
#
-# *** Note *** this will take several minutes. Setting multiple functions is
-# an O(n^2) operation, and we are dealing with thousands of functions. So go
-# have coffee, talk with your coworkers, read facebook. And eventually, this
-# operation will end.
-#
# # echo function > current_tracer
#
# If it crashes, we know that ~/test-file has a bad function.
#
# Reboot back to test kernel.
#
-# # cd /sys/kernel/debug/tracing
+# # cd /sys/kernel/tracing
# # mv ~/test-file ~/full-file
#
# If it didn't crash.
--
2.39.0
From: "Steven Rostedt (Google)" <rostedt(a)goodmis.org>
Currently trace_printk() can be used as soon as early_trace_init() is
called from start_kernel(). But if a crash happens, and
"ftrace_dump_on_oops" is set on the kernel command line, all you get will
be:
[ 0.456075] <idle>-0 0dN.2. 347519us : Unknown type 6
[ 0.456075] <idle>-0 0dN.2. 353141us : Unknown type 6
[ 0.456075] <idle>-0 0dN.2. 358684us : Unknown type 6
This is because the trace_printk() event (type 6) hasn't been registered
yet. That gets done via an early_initcall(), which may be early, but not
early enough.
Instead of registering the trace_printk() event (and other ftrace events,
which are not trace events) via an early_initcall(), have them registered at
the same time that trace_printk() can be used. This way, if there is a
crash before early_initcall(), then the trace_printk()s will actually be
useful.
Link: https://lkml.kernel.org/r/20230104161412.019f6c55@gandalf.local.home
Cc: stable(a)vger.kernel.org
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Fixes: e725c731e3bb1 ("tracing: Split tracing initialization into two for early initialization")
Reported-by: "Joel Fernandes (Google)" <joel(a)joelfernandes.org>
Tested-by: Joel Fernandes (Google) <joel(a)joelfernandes.org>
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/trace.c | 2 ++
kernel/trace/trace.h | 1 +
kernel/trace/trace_output.c | 3 +--
3 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a555a861b978..78ed5f1baa8c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -10295,6 +10295,8 @@ void __init early_trace_init(void)
static_key_enable(&tracepoint_printk_key.key);
}
tracer_alloc_buffers();
+
+ init_events();
}
void __init trace_init(void)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index e46a49269be2..4eb6d6b97a9f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1490,6 +1490,7 @@ extern void trace_event_enable_cmd_record(bool enable);
extern void trace_event_enable_tgid_record(bool enable);
extern int event_trace_init(void);
+extern int init_events(void);
extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr);
extern int event_trace_del_tracer(struct trace_array *tr);
extern void __trace_early_add_events(struct trace_array *tr);
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 57a13b61f186..bd475a00f96d 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1535,7 +1535,7 @@ static struct trace_event *events[] __initdata = {
NULL
};
-__init static int init_events(void)
+__init int init_events(void)
{
struct trace_event *event;
int i, ret;
@@ -1548,4 +1548,3 @@ __init static int init_events(void)
return 0;
}
-early_initcall(init_events);
--
2.39.0
From: Mark Rutland <mark.rutland(a)arm.com>
Setting filters on an ftrace ops results in some memory being allocated
for the filter hashes, which must be freed before the ops can be freed.
This can be done by removing every individual element of the hash by
calling ftrace_set_filter_ip() or ftrace_set_filter_ips() with `remove`
set, but this is somewhat error prone as it's easy to forget to remove
an element.
Make it easier to clean this up by exporting ftrace_free_filter(), which
can be used to clean up all of the filter hashes after an ftrace_ops has
been unregistered.
Using this, fix the ftrace-direct* samples to free hashes prior to being
unloaded. All other code either removes individual filters explicitly or
is built-in and already calls ftrace_free_filter().
Link: https://lkml.kernel.org/r/20230103124912.2948963-3-mark.rutland@arm.com
Cc: stable(a)vger.kernel.org
Cc: Florent Revest <revest(a)chromium.org>
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Fixes: e1067a07cfbc ("ftrace/samples: Add module to test multi direct modify interface")
Fixes: 5fae941b9a6f ("ftrace/samples: Add multi direct interface test module")
Reviewed-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
Reviewed-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
Signed-off-by: Mark Rutland <mark.rutland(a)arm.com>
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/ftrace.c | 23 ++++++++++++++++++++-
samples/ftrace/ftrace-direct-multi-modify.c | 1 +
samples/ftrace/ftrace-direct-multi.c | 1 +
3 files changed, 24 insertions(+), 1 deletion(-)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 442438b93fe9..750aa3f08b25 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1248,12 +1248,17 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash)
call_rcu(&hash->rcu, __free_ftrace_hash_rcu);
}
+/**
+ * ftrace_free_filter - remove all filters for an ftrace_ops
+ * @ops - the ops to remove the filters from
+ */
void ftrace_free_filter(struct ftrace_ops *ops)
{
ftrace_ops_init(ops);
free_ftrace_hash(ops->func_hash->filter_hash);
free_ftrace_hash(ops->func_hash->notrace_hash);
}
+EXPORT_SYMBOL_GPL(ftrace_free_filter);
static struct ftrace_hash *alloc_ftrace_hash(int size_bits)
{
@@ -5839,6 +5844,10 @@ EXPORT_SYMBOL_GPL(modify_ftrace_direct_multi);
*
* Filters denote which functions should be enabled when tracing is enabled
* If @ip is NULL, it fails to update filter.
+ *
+ * This can allocate memory which must be freed before @ops can be freed,
+ * either by removing each filtered addr or by using
+ * ftrace_free_filter(@ops).
*/
int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip,
int remove, int reset)
@@ -5858,7 +5867,11 @@ EXPORT_SYMBOL_GPL(ftrace_set_filter_ip);
*
* Filters denote which functions should be enabled when tracing is enabled
* If @ips array or any ip specified within is NULL , it fails to update filter.
- */
+ *
+ * This can allocate memory which must be freed before @ops can be freed,
+ * either by removing each filtered addr or by using
+ * ftrace_free_filter(@ops).
+*/
int ftrace_set_filter_ips(struct ftrace_ops *ops, unsigned long *ips,
unsigned int cnt, int remove, int reset)
{
@@ -5900,6 +5913,10 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
*
* Filters denote which functions should be enabled when tracing is enabled.
* If @buf is NULL and reset is set, all functions will be enabled for tracing.
+ *
+ * This can allocate memory which must be freed before @ops can be freed,
+ * either by removing each filtered addr or by using
+ * ftrace_free_filter(@ops).
*/
int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf,
int len, int reset)
@@ -5919,6 +5936,10 @@ EXPORT_SYMBOL_GPL(ftrace_set_filter);
* Notrace Filters denote which functions should not be enabled when tracing
* is enabled. If @buf is NULL and reset is set, all functions will be enabled
* for tracing.
+ *
+ * This can allocate memory which must be freed before @ops can be freed,
+ * either by removing each filtered addr or by using
+ * ftrace_free_filter(@ops).
*/
int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
int len, int reset)
diff --git a/samples/ftrace/ftrace-direct-multi-modify.c b/samples/ftrace/ftrace-direct-multi-modify.c
index d52370cad0b6..a825dbd2c9cf 100644
--- a/samples/ftrace/ftrace-direct-multi-modify.c
+++ b/samples/ftrace/ftrace-direct-multi-modify.c
@@ -152,6 +152,7 @@ static void __exit ftrace_direct_multi_exit(void)
{
kthread_stop(simple_tsk);
unregister_ftrace_direct_multi(&direct, my_tramp);
+ ftrace_free_filter(&direct);
}
module_init(ftrace_direct_multi_init);
diff --git a/samples/ftrace/ftrace-direct-multi.c b/samples/ftrace/ftrace-direct-multi.c
index ec1088922517..d955a2650605 100644
--- a/samples/ftrace/ftrace-direct-multi.c
+++ b/samples/ftrace/ftrace-direct-multi.c
@@ -79,6 +79,7 @@ static int __init ftrace_direct_multi_init(void)
static void __exit ftrace_direct_multi_exit(void)
{
unregister_ftrace_direct_multi(&direct, (unsigned long) my_tramp);
+ ftrace_free_filter(&direct);
}
module_init(ftrace_direct_multi_init);
--
2.39.0
--
Anti-Terrorist and Monetary Crimes Division.
Federal Bureau of Investigation
J. Edgar Hoover Building
935 Pennsylvania Avenue,
NW Washington, DC 20535-0001, USA
FEDERAL BUREAU OF INVESTIGATION MONITORING DEPARTMENT
ATTENTION: Beneficiary,
This is the information reaching my desk from the Office of the
Accountant General that you are Dead and therefore your fund will be
used for another beneficiary that is still alive with a new account
information.
Based on this information, we want to confirm from you if its true or
not and that is why we need an urgent responds from you including your
full contact details so that we can direct you on what to do next.
Faithfully Yours
CHRISTOPHER A WRAY
Federal Office of Investigation
J. Edgar Hoover Building
935 Pennsylvania Avenue,
NW Washington, DC 20535-0001, USA
My last commit to fix profile mode displays on AMD platforms caused
an issue on Intel platforms - sorry!
In it I was reading the current functional mode (MMC, PSC, AMT) from
the BIOS but didn't account for the fact that on some of our Intel
platforms I use a different API which returns just the profile and not
the functional mode.
This commit fixes it so that on Intel platforms it knows the functional
mode is always MMC.
I also fixed a potential problem that a platform may try to set the mode
for both MMC and PSC - which was incorrect.
Tested on X1 Carbon 9 (Intel) and Z13 (AMD).
Link: https://bugzilla.kernel.org/show_bug.cgi?id=216963
Fixes: fde5f74ccfc7 ("platform/x86: thinkpad_acpi: Fix profile mode display in AMT mode")
Signed-off-by: Mark Pearson <mpearson-lenovo(a)squebb.ca>
---
drivers/platform/x86/thinkpad_acpi.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index a95946800ae9..6668d472df39 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -10496,8 +10496,7 @@ static int dytc_profile_set(struct platform_profile_handler *pprof,
if (err)
goto unlock;
}
- }
- if (dytc_capabilities & BIT(DYTC_FC_PSC)) {
+ } else if (dytc_capabilities & BIT(DYTC_FC_PSC)) {
err = dytc_command(DYTC_SET_COMMAND(DYTC_FUNCTION_PSC, perfmode, 1), &output);
if (err)
goto unlock;
@@ -10525,14 +10524,16 @@ static void dytc_profile_refresh(void)
err = dytc_command(DYTC_CMD_MMC_GET, &output);
else
err = dytc_cql_command(DYTC_CMD_GET, &output);
- } else if (dytc_capabilities & BIT(DYTC_FC_PSC))
+ funcmode = DYTC_FUNCTION_MMC;
+ } else if (dytc_capabilities & BIT(DYTC_FC_PSC)) {
err = dytc_command(DYTC_CMD_GET, &output);
-
+ /*Check if we are PSC mode, or have AMT enabled */
+ funcmode = (output >> DYTC_GET_FUNCTION_BIT) & 0xF;
+ }
mutex_unlock(&dytc_mutex);
if (err)
return;
- funcmode = (output >> DYTC_GET_FUNCTION_BIT) & 0xF;
perfmode = (output >> DYTC_GET_MODE_BIT) & 0xF;
convert_dytc_to_profile(funcmode, perfmode, &profile);
if (profile != dytc_current_profile) {
--
2.38.1
This is the start of the stable review cycle for the 4.14.304 release.
There are 25 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Tue, 24 Jan 2023 15:02:08 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.14.304-r…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.14.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.14.304-rc1
YingChi Long <me(a)inclyc.cn>
x86/fpu: Use _Alignof to avoid undefined behavior in TYPE_ALIGN
Khazhismel Kumykov <khazhy(a)chromium.org>
gsmi: fix null-deref in gsmi_get_variable
Tobias Schramm <t.schramm(a)manjaro.org>
serial: atmel: fix incorrect baudrate setup
Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
serial: pch_uart: Pass correct sg to dma_unmap_sg()
Juhyung Park <qkrwngud825(a)gmail.com>
usb-storage: apply IGNORE_UAS only for HIKSEMI MD202 on RTL9210
Maciej Żenczykowski <maze(a)google.com>
usb: gadget: f_ncm: fix potential NULL ptr deref in ncm_bitrate()
Daniel Scally <dan.scally(a)ideasonboard.com>
usb: gadget: g_webcam: Send color matching descriptor per frame
Alexander Stein <alexander.stein(a)ew.tq-group.com>
usb: host: ehci-fsl: Fix module alias
Michael Adler <michael.adler(a)siemens.com>
USB: serial: cp210x: add SCALANCE LPE-9000 device id
Flavio Suligoi <f.suligoi(a)asem.it>
usb: core: hub: disable autosuspend for TI TUSB8041
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
USB: misc: iowarrior: fix up header size for USB_DEVICE_ID_CODEMERCS_IOW100
Duke Xin(辛安文) <duke_xinanwen(a)163.com>
USB: serial: option: add Quectel EM05CN modem
Duke Xin(辛安文) <duke_xinanwen(a)163.com>
USB: serial: option: add Quectel EM05CN (SG) modem
Ali Mirghasemi <ali.mirghasemi1376(a)gmail.com>
USB: serial: option: add Quectel EC200U modem
Duke Xin(辛安文) <duke_xinanwen(a)163.com>
USB: serial: option: add Quectel EM05-G (RS) modem
Duke Xin(辛安文) <duke_xinanwen(a)163.com>
USB: serial: option: add Quectel EM05-G (CS) modem
Duke Xin(辛安文) <duke_xinanwen(a)163.com>
USB: serial: option: add Quectel EM05-G (GR) modem
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
prlimit: do_prlimit needs to have a speculation check
Jimmy Hu <hhhuuu(a)google.com>
usb: xhci: Check endpoint is valid before dereferencing it
Ricardo Ribalda <ribalda(a)chromium.org>
xhci-pci: set the dma max_seg_size
Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
nilfs2: fix general protection fault in nilfs_btree_insert()
Jaegeuk Kim <jaegeuk(a)kernel.org>
f2fs: let's avoid panic if extent_tree is not created
Jiri Slaby (SUSE) <jirislaby(a)kernel.org>
RDMA/srp: Move large values to a new enum for gcc13
Daniil Tatianin <d-tatianin(a)yandex-team.ru>
net/ethtool/ioctl: return -EOPNOTSUPP if we have no phy stats
Olga Kornievskaia <olga.kornievskaia(a)gmail.com>
pNFS/filelayout: Fix coalescing test for single DS
-------------
Diffstat:
Makefile | 4 ++--
arch/x86/kernel/fpu/init.c | 7 ++-----
drivers/firmware/google/gsmi.c | 7 ++++---
drivers/infiniband/ulp/srp/ib_srp.h | 8 +++++---
drivers/tty/serial/atmel_serial.c | 8 +-------
drivers/tty/serial/pch_uart.c | 2 +-
drivers/usb/core/hub.c | 13 +++++++++++++
drivers/usb/gadget/function/f_ncm.c | 4 +++-
drivers/usb/gadget/legacy/webcam.c | 3 +++
drivers/usb/host/ehci-fsl.c | 2 +-
drivers/usb/host/xhci-pci.c | 2 ++
drivers/usb/host/xhci-ring.c | 5 ++++-
drivers/usb/misc/iowarrior.c | 2 +-
drivers/usb/serial/cp210x.c | 1 +
drivers/usb/serial/option.c | 17 +++++++++++++++++
drivers/usb/storage/uas-detect.h | 13 +++++++++++++
drivers/usb/storage/unusual_uas.h | 7 -------
fs/f2fs/extent_cache.c | 3 ++-
fs/nfs/filelayout/filelayout.c | 8 ++++++++
fs/nilfs2/btree.c | 15 ++++++++++++---
kernel/sys.c | 2 ++
net/core/ethtool.c | 3 ++-
22 files changed, 99 insertions(+), 37 deletions(-)
Current usage of kvm_io_device requires users to destruct it with an extra
call of kvm_iodevice_destructor after the device gets unregistered from
the kvm_io_bus. This is not necessary and can cause errors if a user
forgot to make the extra call.
Simplify the usage by combining kvm_iodevice_destructor into
kvm_io_bus_unregister_dev. This reduces LOCs a bit for users and can
avoid the leakage of destructing the device explicitly.
The fix was originally provided by Sean Christopherson.
Link: https://lore.kernel.org/lkml/DS0PR11MB6373F27D0EE6CD28C784478BDCEC9@DS0PR11…
Fixes: 5d3c4c79384a ("KVM: Stop looking for coalesced MMIO zones if the bus is destroyed")
Cc: stable(a)vger.kernel.org
Reported-by: ��������� <liujingfeng(a)qianxin.com>
Signed-off-by: Wei Wang <wei.w.wang(a)intel.com>
---
include/kvm/iodev.h | 6 ------
virt/kvm/coalesced_mmio.c | 1 -
virt/kvm/eventfd.c | 1 -
virt/kvm/kvm_main.c | 24 +++++++++++++++---------
4 files changed, 15 insertions(+), 17 deletions(-)
diff --git a/include/kvm/iodev.h b/include/kvm/iodev.h
index d75fc4365746..56619e33251e 100644
--- a/include/kvm/iodev.h
+++ b/include/kvm/iodev.h
@@ -55,10 +55,4 @@ static inline int kvm_iodevice_write(struct kvm_vcpu *vcpu,
: -EOPNOTSUPP;
}
-static inline void kvm_iodevice_destructor(struct kvm_io_device *dev)
-{
- if (dev->ops->destructor)
- dev->ops->destructor(dev);
-}
-
#endif /* __KVM_IODEV_H__ */
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 0be80c213f7f..d7135a5e76f8 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -195,7 +195,6 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
*/
if (r)
break;
- kvm_iodevice_destructor(&dev->dev);
}
}
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 2a3ed401ce46..1b277afb545b 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -898,7 +898,6 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
bus = kvm_get_bus(kvm, bus_idx);
if (bus)
bus->ioeventfd_count--;
- ioeventfd_release(p);
ret = 0;
break;
}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 13e88297f999..582757ebdce6 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -5200,6 +5200,12 @@ static struct notifier_block kvm_reboot_notifier = {
.priority = 0,
};
+static void kvm_iodevice_destructor(struct kvm_io_device *dev)
+{
+ if (dev->ops->destructor)
+ dev->ops->destructor(dev);
+}
+
static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
{
int i;
@@ -5423,7 +5429,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
struct kvm_io_device *dev)
{
- int i, j;
+ int i;
struct kvm_io_bus *new_bus, *bus;
lockdep_assert_held(&kvm->slots_lock);
@@ -5453,18 +5459,18 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
synchronize_srcu_expedited(&kvm->srcu);
- /* Destroy the old bus _after_ installing the (null) bus. */
+ /*
+ * If (null) bus is installed, destroy the old bus, including all the
+ * attached devices. Otherwise, destroy the caller's device only.
+ */
if (!new_bus) {
pr_err("kvm: failed to shrink bus, removing it completely\n");
- for (j = 0; j < bus->dev_count; j++) {
- if (j == i)
- continue;
- kvm_iodevice_destructor(bus->range[j].dev);
- }
+ kvm_io_bus_destroy(bus);
+ return -ENOMEM;
}
- kfree(bus);
- return new_bus ? 0 : -ENOMEM;
+ kvm_iodevice_destructor(dev);
+ return 0;
}
struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
--
2.27.0
The patch titled
Subject: Revert "mm: kmemleak: alloc gray object for reserved region with direct map"
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
revert-mm-kmemleak-alloc-gray-object-for-reserved-region-with-direct-map.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: "Isaac J. Manjarres" <isaacmanjarres(a)google.com>
Subject: Revert "mm: kmemleak: alloc gray object for reserved region with direct map"
Date: Tue, 24 Jan 2023 15:02:54 -0800
This reverts commit 972fa3a7c17c9d60212e32ecc0205dc585b1e769.
Kmemleak operates by periodically scanning memory regions for pointers to
allocated memory blocks to determine if they are leaked or not. However,
reserved memory regions can be used for DMA transactions between a device
and a CPU, and thus, wouldn't contain pointers to allocated memory blocks,
making them inappropriate for kmemleak to scan. Thus, revert this commit.
Link: https://lkml.kernel.org/r/20230124230254.295589-1-isaacmanjarres@google.com
Fixes: 972fa3a7c17c9 ("mm: kmemleak: alloc gray object for reserved region with direct map")
Signed-off-by: Isaac J. Manjarres <isaacmanjarres(a)google.com>
Cc: Calvin Zhang <calvinzhang.cool(a)gmail.com>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Frank Rowand <frowand.list(a)gmail.com>
Cc: Rob Herring <robh+dt(a)kernel.org>
Cc: Saravana Kannan <saravanak(a)google.com>
Cc: <stable(a)vger.kernel.org> [5.17+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
drivers/of/fdt.c | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
--- a/drivers/of/fdt.c~revert-mm-kmemleak-alloc-gray-object-for-reserved-region-with-direct-map
+++ a/drivers/of/fdt.c
@@ -26,7 +26,6 @@
#include <linux/serial_core.h>
#include <linux/sysfs.h>
#include <linux/random.h>
-#include <linux/kmemleak.h>
#include <asm/setup.h> /* for COMMAND_LINE_SIZE */
#include <asm/page.h>
@@ -525,12 +524,9 @@ static int __init __reserved_mem_reserve
size = dt_mem_next_cell(dt_root_size_cells, &prop);
if (size &&
- early_init_dt_reserve_memory(base, size, nomap) == 0) {
+ early_init_dt_reserve_memory(base, size, nomap) == 0)
pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %lu MiB\n",
uname, &base, (unsigned long)(size / SZ_1M));
- if (!nomap)
- kmemleak_alloc_phys(base, size, 0);
- }
else
pr_err("Reserved memory: failed to reserve memory for node '%s': base %pa, size %lu MiB\n",
uname, &base, (unsigned long)(size / SZ_1M));
_
Patches currently in -mm which might be from isaacmanjarres(a)google.com are
revert-mm-kmemleak-alloc-gray-object-for-reserved-region-with-direct-map.patch
mm-cmac-make-kmemleak-aware-of-all-cma-regions.patch
mm-cmac-delete-kmemleak-objects-when-freeing-cma-areas-to-buddy-at-boot.patch
PATCH_SUBJECT: netfilter: conntrack: do not renew entry stuck in tcp
SYN_SENT state
PATCH_COMMIT: e15d4cdf27cb0c1e977270270b2cea12e0955edd
Reason for backport request:
We've had a few customers experience issues with dnat such that their
kubernetes processes are now unreachable. Because dnat rules fail to
update, kubernetes pod IPs won't resolve and traffic gets blackholed
causing any healthcheck service to kill and restart the pod. This
commit has been verified to fix the issue and the ask here is to
backport it to kernel versions v5.4 and v5.10.
Thanks
Nobel
please backport these EFI patches to v6.1:
8a9a1a18731eb123 arm64: efi: Avoid workqueue to check whether EFI
runtime is live
7ea55715c421d22c arm64: efi: Account for the EFI runtime stack in stack unwinder
Trip temperatures are read using ACPI methods and stored in the memory
during zone initializtion and when the firmware sends a notification for
change. This trip temperature is returned when the thermal core calls via
callback get_trip_temp().
But it is possible that while updating the memory copy of the trips when
the firmware sends a notification for change, thermal core is reading the
trip temperature via the callback get_trip_temp(). This may return invalid
trip temperature.
To address this add a mutex to protect the invalid temperature reads in
the callback get_trip_temp() and int340x_thermal_read_trips().
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com>
Cc: stable(a)vger.kernel.org # 5.0+
---
v2:
- rebased on linux-next
- Add ret variable and remove return as suugested by Rafael
.../int340x_thermal/int340x_thermal_zone.c | 18 +++++++++++++++---
.../int340x_thermal/int340x_thermal_zone.h | 1 +
2 files changed, 16 insertions(+), 3 deletions(-)
diff --git a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c
index 228f44260b27..5fda1e67b793 100644
--- a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c
+++ b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c
@@ -41,7 +41,9 @@ static int int340x_thermal_get_trip_temp(struct thermal_zone_device *zone,
int trip, int *temp)
{
struct int34x_thermal_zone *d = zone->devdata;
- int i;
+ int i, ret = 0;
+
+ mutex_lock(&d->trip_mutex);
if (trip < d->aux_trip_nr)
*temp = d->aux_trips[trip];
@@ -60,10 +62,12 @@ static int int340x_thermal_get_trip_temp(struct thermal_zone_device *zone,
}
}
if (i == INT340X_THERMAL_MAX_ACT_TRIP_COUNT)
- return -EINVAL;
+ ret = -EINVAL;
}
- return 0;
+ mutex_unlock(&d->trip_mutex);
+
+ return ret;
}
static int int340x_thermal_get_trip_type(struct thermal_zone_device *zone,
@@ -165,6 +169,8 @@ int int340x_thermal_read_trips(struct int34x_thermal_zone *int34x_zone)
int trip_cnt = int34x_zone->aux_trip_nr;
int i;
+ mutex_lock(&int34x_zone->trip_mutex);
+
int34x_zone->crt_trip_id = -1;
if (!int340x_thermal_get_trip_config(int34x_zone->adev->handle, "_CRT",
&int34x_zone->crt_temp))
@@ -192,6 +198,8 @@ int int340x_thermal_read_trips(struct int34x_thermal_zone *int34x_zone)
int34x_zone->act_trips[i].valid = true;
}
+ mutex_unlock(&int34x_zone->trip_mutex);
+
return trip_cnt;
}
EXPORT_SYMBOL_GPL(int340x_thermal_read_trips);
@@ -215,6 +223,8 @@ struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev,
if (!int34x_thermal_zone)
return ERR_PTR(-ENOMEM);
+ mutex_init(&int34x_thermal_zone->trip_mutex);
+
int34x_thermal_zone->adev = adev;
int34x_thermal_zone->ops = kmemdup(&int340x_thermal_zone_ops,
@@ -277,6 +287,7 @@ struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev,
err_trip_alloc:
kfree(int34x_thermal_zone->ops);
err_ops_alloc:
+ mutex_destroy(&int34x_thermal_zone->trip_mutex);
kfree(int34x_thermal_zone);
return ERR_PTR(ret);
}
@@ -289,6 +300,7 @@ void int340x_thermal_zone_remove(struct int34x_thermal_zone
acpi_lpat_free_conversion_table(int34x_thermal_zone->lpat_table);
kfree(int34x_thermal_zone->aux_trips);
kfree(int34x_thermal_zone->ops);
+ mutex_destroy(&int34x_thermal_zone->trip_mutex);
kfree(int34x_thermal_zone);
}
EXPORT_SYMBOL_GPL(int340x_thermal_zone_remove);
diff --git a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.h b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.h
index e28ab1ba5e06..6610a9cc441b 100644
--- a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.h
+++ b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.h
@@ -32,6 +32,7 @@ struct int34x_thermal_zone {
struct thermal_zone_device_ops *ops;
void *priv_data;
struct acpi_lpat_conversion_table *lpat_table;
+ struct mutex trip_mutex;
};
struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *,
--
2.31.1
From: Rasmus Villemoes <linux(a)rasmusvillemoes.dk>
[ Upstream commit 0f9c608d4a1eb852d6769d2fc5906c71c02565ae ]
It was never guaranteed to be exactly eight, but since commit
548b8b5168c9 ("scripts/setlocalversion: make git describe output more
reliable"), it has been exactly 12.
Signed-off-by: Rasmus Villemoes <linux(a)rasmusvillemoes.dk>
Signed-off-by: Masahiro Yamada <masahiroy(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
init/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/init/Kconfig b/init/Kconfig
index c87858c434cc..bc3c44f6b503 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -101,7 +101,7 @@ config LOCALVERSION_AUTO
appended after any matching localversion* files, and after the value
set in CONFIG_LOCALVERSION.
- (The actual string used here is the first eight characters produced
+ (The actual string used here is the first 12 characters produced
by running the command:
$ git rev-parse --verify HEAD
--
2.39.0
Key information in wext.connect is not reset on (re)connect and can hold
data from a previous connection.
Reset key data to avoid that drivers or mac80211 incorrectly detect a
WEP connection request and access the freed or already reused memory.
Additionally optimize cfg80211_sme_connect() and avoid an useless
schedule of conn_work.
Fixes: fffd0934b939 ("cfg80211: rework key operation")
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/c80f04d2-8159-a02a-9287-26e5ec838826@wetzel-home.…
Signed-off-by: Alexander Wetzel <alexander(a)wetzel-home.de>
---
I was first wondering if the dangling scheduled work was part of the
problem. It's kind of pointless to schedule a work and then just do the job
yourself. While it turned out to be benign I still added it to the fix here.
Alexander
---
net/wireless/sme.c | 29 ++++++++++++++++++++++++-----
1 file changed, 24 insertions(+), 5 deletions(-)
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 123248b2c0be..8d8176e31e31 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -285,6 +285,15 @@ void cfg80211_conn_work(struct work_struct *work)
wiphy_unlock(&rdev->wiphy);
}
+static void cfg80211_step_auth_next(struct cfg80211_conn *conn,
+ struct cfg80211_bss *bss)
+{
+ memcpy(conn->bssid, bss->bssid, ETH_ALEN);
+ conn->params.bssid = conn->bssid;
+ conn->params.channel = bss->channel;
+ conn->state = CFG80211_CONN_AUTHENTICATE_NEXT;
+}
+
/* Returned bss is reference counted and must be cleaned up appropriately. */
static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev)
{
@@ -302,10 +311,7 @@ static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev)
if (!bss)
return NULL;
- memcpy(wdev->conn->bssid, bss->bssid, ETH_ALEN);
- wdev->conn->params.bssid = wdev->conn->bssid;
- wdev->conn->params.channel = bss->channel;
- wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT;
+ cfg80211_step_auth_next(wdev->conn, bss);
schedule_work(&rdev->conn_work);
return bss;
@@ -597,7 +603,12 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev,
wdev->conn->params.ssid_len = wdev->u.client.ssid_len;
/* see if we have the bss already */
- bss = cfg80211_get_conn_bss(wdev);
+ bss = cfg80211_get_bss(wdev->wiphy, wdev->conn->params.channel,
+ wdev->conn->params.bssid,
+ wdev->conn->params.ssid,
+ wdev->conn->params.ssid_len,
+ wdev->conn_bss_type,
+ IEEE80211_PRIVACY(wdev->conn->params.privacy));
if (prev_bssid) {
memcpy(wdev->conn->prev_bssid, prev_bssid, ETH_ALEN);
@@ -608,6 +619,7 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev,
if (bss) {
enum nl80211_timeout_reason treason;
+ cfg80211_step_auth_next(wdev->conn, bss);
err = cfg80211_conn_do_work(wdev, &treason);
cfg80211_put_bss(wdev->wiphy, bss);
} else {
@@ -1464,6 +1476,13 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
} else {
if (WARN_ON(connkeys))
return -EINVAL;
+
+ /* connect can point to wdev->connect
+ * and may hold outdated key data
+ */
+ connect->key = NULL;
+ connect->key_len = 0;
+ connect->key_idx = 0;
}
wdev->connect_keys = connkeys;
--
2.39.0
The condition determining whether the preallocation can be used had
an off-by-one error so we didn't discard preallocation when new
allocation was just following it. This can then confuse code in
inode_getblk().
CC: stable(a)vger.kernel.org
Fixes: 16d055656814 ("udf: Discard preallocation before extending file with a hole")
Signed-off-by: Jan Kara <jack(a)suse.cz>
---
fs/udf/inode.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 6826c2aa021f..15e0d9f23c06 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -361,7 +361,7 @@ static int udf_map_block(struct inode *inode, struct udf_map_rq *map)
* Block beyond EOF and prealloc extents? Just discard preallocation
* as it is not useful and complicates things.
*/
- if (((loff_t)map->lblk) << inode->i_blkbits > iinfo->i_lenExtents)
+ if (((loff_t)map->lblk) << inode->i_blkbits >= iinfo->i_lenExtents)
udf_discard_prealloc(inode);
udf_clear_extent_cache(inode);
err = inode_getblk(inode, map);
--
2.35.3
When we append new block just after the end of preallocated extent, the
code in inode_getblk() wrongly determined we're going to use the
preallocated extent which resulted in adding block into a wrong logical
offset in the file. Sequence like this manifests it:
xfs_io -f -c "pwrite 0x2cacf 0xd122" -c "truncate 0x2dd6f" \
-c "pwrite 0x27fd9 0x69a9" -c "pwrite 0x32981 0x7244" <file>
The code that determined the use of preallocated extent is actually
stale because udf_do_extend_file() does not create preallocation anymore
so after calling that function we are sure there's no usable
preallocation. Just remove the faulty condition.
CC: stable(a)vger.kernel.org
Fixes: 16d055656814 ("udf: Discard preallocation before extending file with a hole")
Signed-off-by: Jan Kara <jack(a)suse.cz>
---
fs/udf/inode.c | 24 +++++++++++-------------
1 file changed, 11 insertions(+), 13 deletions(-)
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 8f55b37ddcad..6826c2aa021f 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -742,19 +742,17 @@ static int inode_getblk(struct inode *inode, struct udf_map_rq *map)
c = 0;
offset = 0;
count += ret;
- /* We are not covered by a preallocated extent? */
- if ((laarr[0].extLength & UDF_EXTENT_FLAG_MASK) !=
- EXT_NOT_RECORDED_ALLOCATED) {
- /* Is there any real extent? - otherwise we overwrite
- * the fake one... */
- if (count)
- c = !c;
- laarr[c].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED |
- inode->i_sb->s_blocksize;
- memset(&laarr[c].extLocation, 0x00,
- sizeof(struct kernel_lb_addr));
- count++;
- }
+ /*
+ * Is there any real extent? - otherwise we overwrite the fake
+ * one...
+ */
+ if (count)
+ c = !c;
+ laarr[c].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED |
+ inode->i_sb->s_blocksize;
+ memset(&laarr[c].extLocation, 0x00,
+ sizeof(struct kernel_lb_addr));
+ count++;
endnum = c + 1;
lastblock = 1;
} else {
--
2.35.3
This is the start of the stable review cycle for the 5.4.230 release.
There are 51 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Wed, 25 Jan 2023 09:48:53 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.4.230-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.4.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.4.230-rc2
Hugh Dickins <hughd(a)google.com>
mm/khugepaged: fix collapse_pte_mapped_thp() to allow anon_vma
YingChi Long <me(a)inclyc.cn>
x86/fpu: Use _Alignof to avoid undefined behavior in TYPE_ALIGN
Joshua Ashton <joshua(a)froggi.es>
drm/amd/display: Fix COLOR_SPACE_YCBCR2020_TYPE matrix
hongao <hongao(a)uniontech.com>
drm/amd/display: Fix set scaling doesn's work
Sasa Dragic <sasa.dragic(a)gmail.com>
drm/i915: re-disable RC6p on Sandy Bridge
Khazhismel Kumykov <khazhy(a)chromium.org>
gsmi: fix null-deref in gsmi_get_variable
Tobias Schramm <t.schramm(a)manjaro.org>
serial: atmel: fix incorrect baudrate setup
Mohan Kumar <mkumard(a)nvidia.com>
dmaengine: tegra210-adma: fix global intr clear
Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
serial: pch_uart: Pass correct sg to dma_unmap_sg()
Heiner Kallweit <hkallweit1(a)gmail.com>
dt-bindings: phy: g12a-usb3-pcie-phy: fix compatible string documentation
Juhyung Park <qkrwngud825(a)gmail.com>
usb-storage: apply IGNORE_UAS only for HIKSEMI MD202 on RTL9210
Maciej Żenczykowski <maze(a)google.com>
usb: gadget: f_ncm: fix potential NULL ptr deref in ncm_bitrate()
Daniel Scally <dan.scally(a)ideasonboard.com>
usb: gadget: g_webcam: Send color matching descriptor per frame
Prashant Malani <pmalani(a)chromium.org>
usb: typec: altmodes/displayport: Fix pin assignment calculation
Prashant Malani <pmalani(a)chromium.org>
usb: typec: altmodes/displayport: Add pin assignment helper
Alexander Stein <alexander.stein(a)ew.tq-group.com>
usb: host: ehci-fsl: Fix module alias
Michael Adler <michael.adler(a)siemens.com>
USB: serial: cp210x: add SCALANCE LPE-9000 device id
Alan Stern <stern(a)rowland.harvard.edu>
USB: gadgetfs: Fix race between mounting and unmounting
Enzo Matsumiya <ematsumiya(a)suse.de>
cifs: do not include page data when checking signature
Filipe Manana <fdmanana(a)suse.com>
btrfs: fix race between quota rescan and disable leading to NULL pointer deref
Samuel Holland <samuel(a)sholland.org>
mmc: sunxi-mmc: Fix clock refcount imbalance during unbind
Ian Abbott <abbotti(a)mev.co.uk>
comedi: adv_pci1760: Fix PWM instruction handling
Flavio Suligoi <f.suligoi(a)asem.it>
usb: core: hub: disable autosuspend for TI TUSB8041
Ola Jeppsson <ola(a)snap.com>
misc: fastrpc: Fix use-after-free race condition for maps
Abel Vesa <abel.vesa(a)linaro.org>
misc: fastrpc: Don't remove map on creater_process and device_release
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
USB: misc: iowarrior: fix up header size for USB_DEVICE_ID_CODEMERCS_IOW100
Duke Xin(辛安文) <duke_xinanwen(a)163.com>
USB: serial: option: add Quectel EM05CN modem
Duke Xin(辛安文) <duke_xinanwen(a)163.com>
USB: serial: option: add Quectel EM05CN (SG) modem
Ali Mirghasemi <ali.mirghasemi1376(a)gmail.com>
USB: serial: option: add Quectel EC200U modem
Duke Xin(辛安文) <duke_xinanwen(a)163.com>
USB: serial: option: add Quectel EM05-G (RS) modem
Duke Xin(辛安文) <duke_xinanwen(a)163.com>
USB: serial: option: add Quectel EM05-G (CS) modem
Duke Xin(辛安文) <duke_xinanwen(a)163.com>
USB: serial: option: add Quectel EM05-G (GR) modem
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
prlimit: do_prlimit needs to have a speculation check
Mathias Nyman <mathias.nyman(a)linux.intel.com>
xhci: Detect lpm incapable xHC USB3 roothub ports from ACPI tables
Mathias Nyman <mathias.nyman(a)linux.intel.com>
usb: acpi: add helper to check port lpm capability using acpi _DSM
Mathias Nyman <mathias.nyman(a)linux.intel.com>
xhci: Add a flag to disable USB3 lpm on a xhci root port level.
Mathias Nyman <mathias.nyman(a)linux.intel.com>
xhci: Add update_hub_device override for PCI xHCI hosts
Mathias Nyman <mathias.nyman(a)linux.intel.com>
xhci: Fix null pointer dereference when host dies
Jimmy Hu <hhhuuu(a)google.com>
usb: xhci: Check endpoint is valid before dereferencing it
Ricardo Ribalda <ribalda(a)chromium.org>
xhci-pci: set the dma max_seg_size
Yuchi Yang <yangyuchi66(a)gmail.com>
ALSA: hda/realtek - Turn on power early
Chris Wilson <chris(a)chris-wilson.co.uk>
drm/i915/gt: Reset twice
Ding Hui <dinghui(a)sangfor.com.cn>
efi: fix userspace infinite retry read efivars after EFI runtime services page fault
Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
nilfs2: fix general protection fault in nilfs_btree_insert()
Shawn.Shao <shawn.shao(a)jaguarmicro.com>
Add exception protection processing for vd in axi_chan_handle_err function
Arend van Spriel <arend.vanspriel(a)broadcom.com>
wifi: brcmfmac: fix regression for Broadcom PCIe wifi devices
Jaegeuk Kim <jaegeuk(a)kernel.org>
f2fs: let's avoid panic if extent_tree is not created
Jiri Slaby (SUSE) <jirislaby(a)kernel.org>
RDMA/srp: Move large values to a new enum for gcc13
Daniil Tatianin <d-tatianin(a)yandex-team.ru>
net/ethtool/ioctl: return -EOPNOTSUPP if we have no phy stats
Hao Sun <sunhao.th(a)gmail.com>
selftests/bpf: check null propagation only neither reg is PTR_TO_BTF_ID
Olga Kornievskaia <olga.kornievskaia(a)gmail.com>
pNFS/filelayout: Fix coalescing test for single DS
-------------
Diffstat:
...ie-phy.yaml => amlogic,g12a-usb3-pcie-phy.yaml} | 6 +-
Makefile | 4 +-
arch/x86/kernel/fpu/init.c | 7 +--
drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c | 6 ++
drivers/dma/tegra210-adma.c | 2 +-
drivers/firmware/efi/runtime-wrappers.c | 1 +
drivers/firmware/google/gsmi.c | 7 ++-
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 +-
.../gpu/drm/amd/display/dc/core/dc_hw_sequencer.c | 4 +-
drivers/gpu/drm/i915/gt/intel_reset.c | 34 +++++++++--
drivers/gpu/drm/i915/i915_pci.c | 3 +-
drivers/infiniband/ulp/srp/ib_srp.h | 8 ++-
drivers/misc/fastrpc.c | 26 +++++----
drivers/mmc/host/sunxi-mmc.c | 8 ++-
.../wireless/broadcom/brcm80211/brcmfmac/pcie.c | 2 +-
drivers/staging/comedi/drivers/adv_pci1760.c | 2 +-
drivers/tty/serial/atmel_serial.c | 8 +--
drivers/tty/serial/pch_uart.c | 2 +-
drivers/usb/core/hub.c | 13 +++++
drivers/usb/core/usb-acpi.c | 65 ++++++++++++++++++++++
drivers/usb/gadget/function/f_ncm.c | 4 +-
drivers/usb/gadget/legacy/inode.c | 28 +++++++---
drivers/usb/gadget/legacy/webcam.c | 3 +
drivers/usb/host/ehci-fsl.c | 2 +-
drivers/usb/host/xhci-pci.c | 45 +++++++++++++++
drivers/usb/host/xhci-ring.c | 5 +-
drivers/usb/host/xhci.c | 18 +++++-
drivers/usb/host/xhci.h | 5 ++
drivers/usb/misc/iowarrior.c | 2 +-
drivers/usb/serial/cp210x.c | 1 +
drivers/usb/serial/option.c | 17 ++++++
drivers/usb/storage/uas-detect.h | 13 +++++
drivers/usb/storage/unusual_uas.h | 7 ---
drivers/usb/typec/altmodes/displayport.c | 22 +++++---
fs/btrfs/qgroup.c | 25 ++++++---
fs/cifs/smb2pdu.c | 15 +++--
fs/f2fs/extent_cache.c | 3 +-
fs/nfs/filelayout/filelayout.c | 8 +++
fs/nilfs2/btree.c | 15 ++++-
include/linux/usb.h | 3 +
kernel/sys.c | 2 +
mm/khugepaged.c | 14 ++---
net/core/ethtool.c | 3 +-
sound/pci/hda/patch_realtek.c | 30 +++++-----
.../selftests/bpf/prog_tests/jeq_infer_not_null.c | 9 +++
.../selftests/bpf/progs/jeq_infer_not_null_fail.c | 42 ++++++++++++++
46 files changed, 432 insertions(+), 121 deletions(-)
From: Roberto Sassu <roberto.sassu(a)huawei.com>
Commit 98de59bfe4b2f ("take calculation of final prot in
security_mmap_file() into a helper") moved the code to update prot with the
actual protection flags to be granted to the requestor by the kernel to a
helper called mmap_prot(). However, the patch didn't update the argument
passed to ima_file_mmap(), making it receive the requested prot instead of
the final computed prot.
A possible consequence is that files mmapped as executable might not be
measured/appraised if PROT_EXEC is not requested but subsequently added in
the final prot.
Replace prot with mmap_prot(file, prot) as the second argument of
ima_file_mmap() to restore the original behavior.
Cc: stable(a)vger.kernel.org
Fixes: 98de59bfe4b2 ("take calculation of final prot in security_mmap_file() into a helper")
Signed-off-by: Roberto Sassu <roberto.sassu(a)huawei.com>
---
security/security.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/security/security.c b/security/security.c
index d1571900a8c7..0d2359d588a1 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1666,7 +1666,7 @@ int security_mmap_file(struct file *file, unsigned long prot,
mmap_prot(file, prot), flags);
if (ret)
return ret;
- return ima_file_mmap(file, prot);
+ return ima_file_mmap(file, mmap_prot(file, prot));
}
int security_mmap_addr(unsigned long addr)
--
2.25.1
MST has been broken on amdgpu after a refactor in drm_dp_mst
code that was aligning drm_dp_mst more closely with the atomic
model.
The gitlab issue: https://gitlab.freedesktop.org/drm/amd/-/issues/2171
This series fixes it.
It can be found at
https://gitlab.freedesktop.org/hwentland/linux/-/tree/mst_regression
A stable-6.1.y rebase can be found at
https://gitlab.freedesktop.org/hwentland/linux/-/tree/mst_regression_6.1
Lyude Paul (1):
drm/amdgpu/display/mst: Fix mst_state->pbn_div and slot count
assignments
Wayne Lin (6):
drm/amdgpu/display/mst: limit payload to be updated one by one
drm/amdgpu/display/mst: update mst_mgr relevant variable when long HPD
drm/drm_print: correct format problem
drm/display/dp_mst: Correct the kref of port.
drm/amdgpu/display/mst: adjust the naming of mst_port and port of
aconnector
drm/amdgpu/display/mst: adjust the logic in 2nd phase of updating
payload
drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 4 +-
.../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 48 +++++++++---
.../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 4 +-
.../drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c | 2 +-
.../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 16 ++--
.../amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 76 +++++++++++++------
.../display/amdgpu_dm/amdgpu_dm_mst_types.c | 53 ++++++-------
drivers/gpu/drm/amd/display/dc/core/dc_link.c | 14 +++-
drivers/gpu/drm/display/drm_dp_mst_topology.c | 4 +-
include/drm/drm_print.h | 2 +-
10 files changed, 143 insertions(+), 80 deletions(-)
--
2.39.0
The bug that upstream commit c6c7f2a84da45 ("nfsd: Ensure knfsd shuts
down when the "nfsd" pseudofs is unmounted") fixes in kernel v5.13
exists in kernel v5.10 too.
That is, knfsd threads are left behind once the nfsd pseudofs is
unmounted, e.g. when the container is killed.
I backported the patch to kernel v5.10, and tested it.
Trond Myklebust (1):
nfsd: Ensure knfsd shuts down when the "nfsd" pseudofs is unmounted
fs/nfsd/netns.h | 6 +++---
fs/nfsd/nfs4state.c | 8 +-------
fs/nfsd/nfsctl.c | 14 ++------------
fs/nfsd/nfsd.h | 3 +--
fs/nfsd/nfssvc.c | 35 ++++++++++++++++++++++++++++++++++-
5 files changed, 41 insertions(+), 25 deletions(-)
--
2.30.2
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
56d8e3180c06 ("io_uring/msg_ring: fix flagging remote execution")
423d5081d045 ("io_uring/msg_ring: move double lock/unlock helpers higher up")
761c61c15903 ("io_uring/msg_ring: flag target ring as having task_work, if needed")
6d043ee1164c ("io_uring: do msg_ring in target task via tw")
172113101641 ("io_uring: extract a io_msg_install_complete helper")
11373026f296 ("io_uring: get rid of double locking")
4c979eaefa43 ("io_uring: improve io_double_lock_ctx fail handling")
b529c96a896b ("io_uring: remove overflow param from io_post_aux_cqe")
a77ab745f28d ("io_uring: make io_fill_cqe_aux static")
9b8c54755a2b ("io_uring: add io_aux_cqe which allows deferred completion")
931147ddfa6e ("io_uring: allow defer completion for aux posted cqes")
1bec951c3809 ("io_uring: iopoll protect complete_post")
fa18fa2272c7 ("io_uring: inline __io_req_complete_put()")
f9d567c75ec2 ("io_uring: inline __io_req_complete_post()")
e2ad599d1ed3 ("io_uring: allow multishot recv CQEs to overflow")
515e26961295 ("io_uring: revert "io_uring fix multishot accept ordering"")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 56d8e3180c065c9b78ed77afcd0cf99677a4e22f Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Fri, 20 Jan 2023 16:38:05 +0000
Subject: [PATCH] io_uring/msg_ring: fix flagging remote execution
There is a couple of problems with queueing a tw in io_msg_ring_data()
for remote execution. First, once we queue it the target ring can
go away and so setting IORING_SQ_TASKRUN there is not safe. Secondly,
the userspace might not expect IORING_SQ_TASKRUN.
Extract a helper and uniformly use TWA_SIGNAL without TWA_SIGNAL_NO_IPI
tricks for now, just as it was done in the original patch.
Cc: stable(a)vger.kernel.org
Fixes: 6d043ee1164ca ("io_uring: do msg_ring in target task via tw")
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index a333781565d3..bb868447dcdf 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -58,6 +58,25 @@ void io_msg_ring_cleanup(struct io_kiocb *req)
msg->src_file = NULL;
}
+static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx)
+{
+ if (!target_ctx->task_complete)
+ return false;
+ return current != target_ctx->submitter_task;
+}
+
+static int io_msg_exec_remote(struct io_kiocb *req, task_work_func_t func)
+{
+ struct io_ring_ctx *ctx = req->file->private_data;
+ struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
+
+ init_task_work(&msg->tw, func);
+ if (task_work_add(ctx->submitter_task, &msg->tw, TWA_SIGNAL))
+ return -EOWNERDEAD;
+
+ return IOU_ISSUE_SKIP_COMPLETE;
+}
+
static void io_msg_tw_complete(struct callback_head *head)
{
struct io_msg *msg = container_of(head, struct io_msg, tw);
@@ -96,15 +115,8 @@ static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
if (msg->src_fd || msg->dst_fd || msg->flags)
return -EINVAL;
- if (target_ctx->task_complete && current != target_ctx->submitter_task) {
- init_task_work(&msg->tw, io_msg_tw_complete);
- if (task_work_add(target_ctx->submitter_task, &msg->tw,
- TWA_SIGNAL_NO_IPI))
- return -EOWNERDEAD;
-
- atomic_or(IORING_SQ_TASKRUN, &target_ctx->rings->sq_flags);
- return IOU_ISSUE_SKIP_COMPLETE;
- }
+ if (io_msg_need_remote(target_ctx))
+ return io_msg_exec_remote(req, io_msg_tw_complete);
ret = -EOVERFLOW;
if (target_ctx->flags & IORING_SETUP_IOPOLL) {
@@ -202,14 +214,8 @@ static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
req->flags |= REQ_F_NEED_CLEANUP;
}
- if (target_ctx->task_complete && current != target_ctx->submitter_task) {
- init_task_work(&msg->tw, io_msg_tw_fd_complete);
- if (task_work_add(target_ctx->submitter_task, &msg->tw,
- TWA_SIGNAL))
- return -EOWNERDEAD;
-
- return IOU_ISSUE_SKIP_COMPLETE;
- }
+ if (io_msg_need_remote(target_ctx))
+ return io_msg_exec_remote(req, io_msg_tw_fd_complete);
return io_msg_install_complete(req, issue_flags);
}
Hi Greg,
As mentioned, two of them could be discarded. Here are the 4 that should
indeed get applied, ported to 5.10/15 stable and tested. The series
applies cleanly to both trees, so just sending one set rather than applying
to the individual "failed to apply" emails.
--
Jens Axboe