From: Eric Biggers <ebiggers(a)google.com>
The AES library code (which originally came from crypto/aes_ti.c) is
supposed to be constant-time, to the extent possible for a C
implementation. But the hardening measure of disabling interrupts while
the S-box is loaded into cache was not included in the library version;
it was left only in the crypto API wrapper in crypto/aes_ti.c.
Move this logic into the library version so that everyone gets it.
Fixes: e59c1c987456 ("crypto: aes - create AES library based on the fixed time AES code")
Cc: <stable(a)vger.kernel.org> # v5.4+
Cc: Ard Biesheuvel <ardb(a)kernel.org>
Signed-off-by: Eric Biggers <ebiggers(a)google.com>
---
crypto/aes_ti.c | 18 ------------------
lib/crypto/aes.c | 18 ++++++++++++++++++
2 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/crypto/aes_ti.c b/crypto/aes_ti.c
index 205c2c257d4926..121f36621d6dcf 100644
--- a/crypto/aes_ti.c
+++ b/crypto/aes_ti.c
@@ -20,33 +20,15 @@ static int aesti_set_key(struct crypto_tfm *tfm, const u8 *in_key,
static void aesti_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
- unsigned long flags;
-
- /*
- * Temporarily disable interrupts to avoid races where cachelines are
- * evicted when the CPU is interrupted to do something else.
- */
- local_irq_save(flags);
aes_encrypt(ctx, out, in);
-
- local_irq_restore(flags);
}
static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
- unsigned long flags;
-
- /*
- * Temporarily disable interrupts to avoid races where cachelines are
- * evicted when the CPU is interrupted to do something else.
- */
- local_irq_save(flags);
aes_decrypt(ctx, out, in);
-
- local_irq_restore(flags);
}
static struct crypto_alg aes_alg = {
diff --git a/lib/crypto/aes.c b/lib/crypto/aes.c
index 827fe89922fff0..029d8d0eac1f6e 100644
--- a/lib/crypto/aes.c
+++ b/lib/crypto/aes.c
@@ -260,6 +260,7 @@ void aes_encrypt(const struct crypto_aes_ctx *ctx, u8 *out, const u8 *in)
const u32 *rkp = ctx->key_enc + 4;
int rounds = 6 + ctx->key_length / 4;
u32 st0[4], st1[4];
+ unsigned long flags;
int round;
st0[0] = ctx->key_enc[0] ^ get_unaligned_le32(in);
@@ -267,6 +268,12 @@ void aes_encrypt(const struct crypto_aes_ctx *ctx, u8 *out, const u8 *in)
st0[2] = ctx->key_enc[2] ^ get_unaligned_le32(in + 8);
st0[3] = ctx->key_enc[3] ^ get_unaligned_le32(in + 12);
+ /*
+ * Temporarily disable interrupts to avoid races where cachelines are
+ * evicted when the CPU is interrupted to do something else.
+ */
+ local_irq_save(flags);
+
/*
* Force the compiler to emit data independent Sbox references,
* by xoring the input with Sbox values that are known to add up
@@ -297,6 +304,8 @@ void aes_encrypt(const struct crypto_aes_ctx *ctx, u8 *out, const u8 *in)
put_unaligned_le32(subshift(st1, 1) ^ rkp[5], out + 4);
put_unaligned_le32(subshift(st1, 2) ^ rkp[6], out + 8);
put_unaligned_le32(subshift(st1, 3) ^ rkp[7], out + 12);
+
+ local_irq_restore(flags);
}
EXPORT_SYMBOL(aes_encrypt);
@@ -311,6 +320,7 @@ void aes_decrypt(const struct crypto_aes_ctx *ctx, u8 *out, const u8 *in)
const u32 *rkp = ctx->key_dec + 4;
int rounds = 6 + ctx->key_length / 4;
u32 st0[4], st1[4];
+ unsigned long flags;
int round;
st0[0] = ctx->key_dec[0] ^ get_unaligned_le32(in);
@@ -318,6 +328,12 @@ void aes_decrypt(const struct crypto_aes_ctx *ctx, u8 *out, const u8 *in)
st0[2] = ctx->key_dec[2] ^ get_unaligned_le32(in + 8);
st0[3] = ctx->key_dec[3] ^ get_unaligned_le32(in + 12);
+ /*
+ * Temporarily disable interrupts to avoid races where cachelines are
+ * evicted when the CPU is interrupted to do something else.
+ */
+ local_irq_save(flags);
+
/*
* Force the compiler to emit data independent Sbox references,
* by xoring the input with Sbox values that are known to add up
@@ -348,6 +364,8 @@ void aes_decrypt(const struct crypto_aes_ctx *ctx, u8 *out, const u8 *in)
put_unaligned_le32(inv_subshift(st1, 1) ^ rkp[5], out + 4);
put_unaligned_le32(inv_subshift(st1, 2) ^ rkp[6], out + 8);
put_unaligned_le32(inv_subshift(st1, 3) ^ rkp[7], out + 12);
+
+ local_irq_restore(flags);
}
EXPORT_SYMBOL(aes_decrypt);
--
2.26.2
The patch titled
Subject: epoll: ensure ep_poll() doesn't miss wakeup events
has been added to the -mm tree. Its filename is
epoll-ensure-ep_poll-doesnt-miss-wakeup-events.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/epoll-ensure-ep_poll-doesnt-miss-w…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/epoll-ensure-ep_poll-doesnt-miss-w…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Jason Baron <jbaron(a)akamai.com>
Subject: epoll: ensure ep_poll() doesn't miss wakeup events
Now that the ep_events_available() check is done in a lockless way, and we
no longer perform wakeups from ep_scan_ready_list(), we need to ensure
that either ep->rdllist has items or the overflow list is active. Prior
to: commit 339ddb53d373 ("fs/epoll: remove unnecessary wakeups of nested
epoll"), we did wake_up(&ep->wq) after manipulating the ep->rdllist and
the overflow list. Thus, any waiters would observe the correct state.
However, with that wake_up() now removed we need to be more careful to
ensure that condition.
Here's an example of what could go wrong:
We have epoll fds: epfd1, epfd2. And epfd1 is added to epfd2 and epfd2 is
added to a socket: epfd1->epfd2->socket. Thread a is doing epoll_wait()
on epfd1, and thread b is doing epoll_wait on epfd2. Then:
1) data comes in on socket
ep_poll_callback() wakes up threads a and b
2) thread a runs
ep_poll()
ep_scan_ready_list()
ep_send_events_proc()
ep_item_poll()
ep_scan_ready_list()
list_splice_init(&ep->rdllist, &txlist);
3) now thread b is running
ep_poll()
ep_events_available()
returns false
schedule_hrtimeout_range()
Thus, thread b has now scheduled and missed the wakeup.
Link: http://lkml.kernel.org/r/1588360533-11828-1-git-send-email-jbaron@akamai.com
Fixes: 339ddb53d373 ("fs/epoll: remove unnecessary wakeups of nested epoll")
Signed-off-by: Jason Baron <jbaron(a)akamai.com>
Cc: Alexander Viro <viro(a)zeniv.linux.org.uk>
Cc: Heiher <r(a)hev.cc>
Cc: Roman Penyaev <rpenyaev(a)suse.de>
Cc: Khazhismel Kumykov <khazhy(a)google.com>
Cc: Davidlohr Bueso <dbueso(a)suse.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/eventpoll.c | 23 +++++++++++++++++------
1 file changed, 17 insertions(+), 6 deletions(-)
--- a/fs/eventpoll.c~epoll-ensure-ep_poll-doesnt-miss-wakeup-events
+++ a/fs/eventpoll.c
@@ -704,8 +704,14 @@ static __poll_t ep_scan_ready_list(struc
* in a lockless way.
*/
write_lock_irq(&ep->lock);
- list_splice_init(&ep->rdllist, &txlist);
WRITE_ONCE(ep->ovflist, NULL);
+ /*
+ * In ep_poll() we use ep_events_available() in a lockless way to decide
+ * if events are available. So we need to preserve that either
+ * ep->oflist != EP_UNACTIVE_PTR or there are events on the ep->rdllist.
+ */
+ smp_wmb();
+ list_splice_init(&ep->rdllist, &txlist);
write_unlock_irq(&ep->lock);
/*
@@ -737,16 +743,21 @@ static __poll_t ep_scan_ready_list(struc
}
}
/*
+ * Quickly re-inject items left on "txlist".
+ */
+ list_splice(&txlist, &ep->rdllist);
+ /*
+ * In ep_poll() we use ep_events_available() in a lockless way to decide
+ * if events are available. So we need to preserve that either
+ * ep->oflist != EP_UNACTIVE_PTR or there are events on the ep->rdllist.
+ */
+ smp_wmb();
+ /*
* We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after
* releasing the lock, events will be queued in the normal way inside
* ep->rdllist.
*/
WRITE_ONCE(ep->ovflist, EP_UNACTIVE_PTR);
-
- /*
- * Quickly re-inject items left on "txlist".
- */
- list_splice(&txlist, &ep->rdllist);
__pm_relax(ep->ws);
write_unlock_irq(&ep->lock);
_
Patches currently in -mm which might be from jbaron(a)akamai.com are
epoll-ensure-ep_poll-doesnt-miss-wakeup-events.patch
This patch does two things:
1. fixes lost wakeup introduced by:
339ddb53d373 ("fs/epoll: remove unnecessary wakeups of nested epoll")
2. improves performance for events delivery.
The description of the problem is the following: if N (>1) threads
are waiting on ep->wq for new events and M (>1) events come, it is
quite likely that >1 wakeups hit the same wait queue entry, because
there is quite a big window between __add_wait_queue_exclusive() and
the following __remove_wait_queue() calls in ep_poll() function. This
can lead to lost wakeups, because thread, which was woken up, can
handle not all the events in ->rdllist. (in better words the problem
is described here: https://lkml.org/lkml/2019/10/7/905)
The idea of the current patch is to use init_wait() instead of
init_waitqueue_entry(). Internally init_wait() sets
autoremove_wake_function as a callback, which removes the wait entry
atomically (under the wq locks) from the list, thus the next coming
wakeup hits the next wait entry in the wait queue, thus preventing
lost wakeups.
Problem is very well reproduced by the epoll60 test case [1].
Wait entry removal on wakeup has also performance benefits, because
there is no need to take a ep->lock and remove wait entry from the
queue after the successful wakeup. Here is the timing output of
the epoll60 test case:
With explicit wakeup from ep_scan_ready_list() (the state of the
code prior 339ddb53d373):
real 0m6.970s
user 0m49.786s
sys 0m0.113s
After this patch:
real 0m5.220s
user 0m36.879s
sys 0m0.019s
The other testcase is the stress-epoll [2], where one thread consumes
all the events and other threads produce many events:
With explicit wakeup from ep_scan_ready_list() (the state of the
code prior 339ddb53d373):
threads events/ms run-time ms
8 5427 1474
16 6163 2596
32 6824 4689
64 7060 9064
128 6991 18309
After this patch:
threads events/ms run-time ms
8 5598 1429
16 7073 2262
32 7502 4265
64 7640 8376
128 7634 16767
(number of "events/ms" represents event bandwidth, thus higher is
better; number of "run-time ms" represents overall time spent
doing the benchmark, thus lower is better)
[1] tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
[2] https://github.com/rouming/test-tools/blob/master/stress-epoll.c
Signed-off-by: Roman Penyaev <rpenyaev(a)suse.de>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Khazhismel Kumykov <khazhy(a)google.com>
Cc: Alexander Viro <viro(a)zeniv.linux.org.uk>
Cc: Heiher <r(a)hev.cc>
Cc: Jason Baron <jbaron(a)akamai.com>
Cc: stable(a)vger.kernel.org
Cc: linux-fsdevel(a)vger.kernel.org
Cc: linux-kernel(a)vger.kernel.org
---
fs/eventpoll.c | 43 ++++++++++++++++++++++++-------------------
1 file changed, 24 insertions(+), 19 deletions(-)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index d6ba0e52439b..aba03ee749f8 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1822,7 +1822,6 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
{
int res = 0, eavail, timed_out = 0;
u64 slack = 0;
- bool waiter = false;
wait_queue_entry_t wait;
ktime_t expires, *to = NULL;
@@ -1867,21 +1866,23 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
*/
ep_reset_busy_poll_napi_id(ep);
- /*
- * We don't have any available event to return to the caller. We need
- * to sleep here, and we will be woken by ep_poll_callback() when events
- * become available.
- */
- if (!waiter) {
- waiter = true;
- init_waitqueue_entry(&wait, current);
-
+ do {
+ /*
+ * Internally init_wait() uses autoremove_wake_function(),
+ * thus wait entry is removed from the wait queue on each
+ * wakeup. Why it is important? In case of several waiters
+ * each new wakeup will hit the next waiter, giving it the
+ * chance to harvest new event. Otherwise wakeup can be
+ * lost. This is also good performance-wise, because on
+ * normal wakeup path no need to call __remove_wait_queue()
+ * explicitly, thus ep->lock is not taken, which halts the
+ * event delivery.
+ */
+ init_wait(&wait);
write_lock_irq(&ep->lock);
__add_wait_queue_exclusive(&ep->wq, &wait);
write_unlock_irq(&ep->lock);
- }
- for (;;) {
/*
* We don't want to sleep if the ep_poll_callback() sends us
* a wakeup in between. That's why we set the task state
@@ -1911,10 +1912,20 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
timed_out = 1;
break;
}
- }
+
+ /* We were woken up, thus go and try to harvest some events */
+ eavail = 1;
+
+ } while (0);
__set_current_state(TASK_RUNNING);
+ if (!list_empty_careful(&wait.entry)) {
+ write_lock_irq(&ep->lock);
+ __remove_wait_queue(&ep->wq, &wait);
+ write_unlock_irq(&ep->lock);
+ }
+
send_events:
/*
* Try to transfer events to user space. In case we get 0 events and
@@ -1925,12 +1936,6 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
!(res = ep_send_events(ep, events, maxevents)) && !timed_out)
goto fetch_events;
- if (waiter) {
- write_lock_irq(&ep->lock);
- __remove_wait_queue(&ep->wq, &wait);
- write_unlock_irq(&ep->lock);
- }
-
return res;
}
--
2.24.1
Writing to the devfreq sysfs nodes while the GPU is powered down can
result in a system crash (on a5xx) or a nasty GMU error (on a6xx):
$ /sys/class/devfreq/5000000.gpu# echo 500000000 > min_freq
[ 104.841625] platform 506a000.gmu: [drm:a6xx_gmu_set_oob]
*ERROR* Timeout waiting for GMU OOB set GPU_DCVS: 0x0
Despite the fact that we carefully try to suspend the devfreq device when
the hardware is powered down there are lots of holes in the governors that
don't check for the suspend state and blindly call into the devfreq
callbacks that end up triggering hardware reads in the GPU driver.
Call pm_runtime_get_if_in_use() in the gpu_busy() and gpu_set_freq()
callbacks to skip the hardware access if it isn't active.
v2: Use pm_runtime_get_if_in_use() per Eric Anholt
Cc: stable(a)vger.kernel.org
Signed-off-by: Jordan Crouse <jcrouse(a)codeaurora.org>
---
drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 6 ++++++
drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 8 ++++++++
drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 7 +++++++
3 files changed, 21 insertions(+)
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 724024a2243a..4d7f269edfcc 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -1404,6 +1404,10 @@ static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
{
u64 busy_cycles, busy_time;
+ /* Only read the gpu busy if the hardware is already active */
+ if (pm_runtime_get_if_in_use(&gpu->pdev->dev) <= 0)
+ return 0;
+
busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
@@ -1412,6 +1416,8 @@ static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
gpu->devfreq.busy_cycles = busy_cycles;
+ pm_runtime_put(&gpu->pdev->dev);
+
if (WARN_ON(busy_time > ~0LU))
return ~0LU;
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index c4e71abbdd53..8ace989b11db 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -108,6 +108,13 @@ static void __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index)
struct msm_gpu *gpu = &adreno_gpu->base;
int ret;
+ /*
+ * This can get called from devfreq while the hardware is idle. Don't
+ * bring up the power if it isn't already active
+ */
+ if (pm_runtime_get_if_in_use(gmu->dev) <= 0)
+ return;
+
gmu_write(gmu, REG_A6XX_GMU_DCVS_ACK_OPTION, 0);
gmu_write(gmu, REG_A6XX_GMU_DCVS_PERF_SETTING,
@@ -134,6 +141,7 @@ static void __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index)
* for now leave it at max so that the performance is nominal.
*/
icc_set_bw(gpu->icc_path, 0, MBps_to_icc(7216));
+ pm_runtime_put(gmu->dev);
}
void a6xx_gmu_set_freq(struct msm_gpu *gpu, unsigned long freq)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 68af24150de5..bf43eb2fb078 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -810,6 +810,11 @@ static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu)
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
u64 busy_cycles, busy_time;
+
+ /* Only read the gpu busy if the hardware is already active */
+ if (pm_runtime_get_if_in_use(a6xx_gpu->gmu.dev) <= 0)
+ return 0;
+
busy_cycles = gmu_read64(&a6xx_gpu->gmu,
REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H);
@@ -819,6 +824,8 @@ static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu)
gpu->devfreq.busy_cycles = busy_cycles;
+ pm_runtime_put(a6xx_gpu->gmu.dev);
+
if (WARN_ON(busy_time > ~0LU))
return ~0LU;
--
2.17.1
Writing to the devfreq sysfs nodes while the GPU is powered down can
result in a system crash (on a5xx) or a nasty GMU error (on a6xx):
$ /sys/class/devfreq/5000000.gpu# echo 500000000 > min_freq
[ 104.841625] platform 506a000.gmu: [drm:a6xx_gmu_set_oob]
*ERROR* Timeout waiting for GMU OOB set GPU_DCVS: 0x0
Despite the fact that we carefully try to suspend the devfreq device when
the hardware is powered down there are lots of holes in the governors that
don't check for the suspend state and blindly call into the devfreq
callbacks that end up triggering hardware reads in the GPU driver.
Check the power state in the gpu_busy() and gpu_set_freq() callbacks for
a5xx and a6xx to make sure that the hardware is active before trying to
access it.
Cc: stable(a)vger.kernel.org
Signed-off-by: Jordan Crouse <jcrouse(a)codeaurora.org>
---
drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 4 ++++
drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 4 ++++
drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 4 ++++
3 files changed, 12 insertions(+)
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 724024a2243a..9d8c7fe6377e 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -1404,6 +1404,10 @@ static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
{
u64 busy_cycles, busy_time;
+ /* devfreq can get here while we are idle so do a quick check first */
+ if (!pm_runtime_active(&gpu->pdev->dev))
+ return ~0LU;
+
busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index c4e71abbdd53..84618f2ff073 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -108,6 +108,10 @@ static void __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index)
struct msm_gpu *gpu = &adreno_gpu->base;
int ret;
+ /* This can be called via devfreq even when the power is off */
+ if (!pm_runtime_active(gmu->dev))
+ return;
+
gmu_write(gmu, REG_A6XX_GMU_DCVS_ACK_OPTION, 0);
gmu_write(gmu, REG_A6XX_GMU_DCVS_PERF_SETTING,
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 68af24150de5..443efc952f13 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -810,6 +810,10 @@ static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu)
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
u64 busy_cycles, busy_time;
+ /* devfreq can get here while we are idle so do a quick check first */
+ if (!pm_runtime_active(a6xx_gpu->gmu.dev))
+ return ~0LU;
+
busy_cycles = gmu_read64(&a6xx_gpu->gmu,
REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H);
--
2.17.1
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 61e713bdca3678e84815f2427f7a063fc353a1fc Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm(a)xmission.com>
Date: Mon, 20 Apr 2020 11:41:50 -0500
Subject: [PATCH] signal: Avoid corrupting si_pid and si_uid in
do_notify_parent
Christof Meerwald <cmeerw(a)cmeerw.org> writes:
> Hi,
>
> this is probably related to commit
> 7a0cf094944e2540758b7f957eb6846d5126f535 (signal: Correct namespace
> fixups of si_pid and si_uid).
>
> With a 5.6.5 kernel I am seeing SIGCHLD signals that don't include a
> properly set si_pid field - this seems to happen for multi-threaded
> child processes.
>
> A simple test program (based on the sample from the signalfd man page):
>
> #include <sys/signalfd.h>
> #include <signal.h>
> #include <unistd.h>
> #include <spawn.h>
> #include <stdlib.h>
> #include <stdio.h>
>
> #define handle_error(msg) \
> do { perror(msg); exit(EXIT_FAILURE); } while (0)
>
> int main(int argc, char *argv[])
> {
> sigset_t mask;
> int sfd;
> struct signalfd_siginfo fdsi;
> ssize_t s;
>
> sigemptyset(&mask);
> sigaddset(&mask, SIGCHLD);
>
> if (sigprocmask(SIG_BLOCK, &mask, NULL) == -1)
> handle_error("sigprocmask");
>
> pid_t chldpid;
> char *chldargv[] = { "./sfdclient", NULL };
> posix_spawn(&chldpid, "./sfdclient", NULL, NULL, chldargv, NULL);
>
> sfd = signalfd(-1, &mask, 0);
> if (sfd == -1)
> handle_error("signalfd");
>
> for (;;) {
> s = read(sfd, &fdsi, sizeof(struct signalfd_siginfo));
> if (s != sizeof(struct signalfd_siginfo))
> handle_error("read");
>
> if (fdsi.ssi_signo == SIGCHLD) {
> printf("Got SIGCHLD %d %d %d %d\n",
> fdsi.ssi_status, fdsi.ssi_code,
> fdsi.ssi_uid, fdsi.ssi_pid);
> return 0;
> } else {
> printf("Read unexpected signal\n");
> }
> }
> }
>
>
> and a multi-threaded client to test with:
>
> #include <unistd.h>
> #include <pthread.h>
>
> void *f(void *arg)
> {
> sleep(100);
> }
>
> int main()
> {
> pthread_t t[8];
>
> for (int i = 0; i != 8; ++i)
> {
> pthread_create(&t[i], NULL, f, NULL);
> }
> }
>
> I tried to do a bit of debugging and what seems to be happening is
> that
>
> /* From an ancestor pid namespace? */
> if (!task_pid_nr_ns(current, task_active_pid_ns(t))) {
>
> fails inside task_pid_nr_ns because the check for "pid_alive" fails.
>
> This code seems to be called from do_notify_parent and there we
> actually have "tsk != current" (I am assuming both are threads of the
> current process?)
I instrumented the code with a warning and received the following backtrace:
> WARNING: CPU: 0 PID: 777 at kernel/pid.c:501 __task_pid_nr_ns.cold.6+0xc/0x15
> Modules linked in:
> CPU: 0 PID: 777 Comm: sfdclient Not tainted 5.7.0-rc1userns+ #2924
> Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
> RIP: 0010:__task_pid_nr_ns.cold.6+0xc/0x15
> Code: ff 66 90 48 83 ec 08 89 7c 24 04 48 8d 7e 08 48 8d 74 24 04 e8 9a b6 44 00 48 83 c4 08 c3 48 c7 c7 59 9f ac 82 e8 c2 c4 04 00 <0f> 0b e9 3fd
> RSP: 0018:ffffc9000042fbf8 EFLAGS: 00010046
> RAX: 000000000000000c RBX: 0000000000000000 RCX: ffffc9000042faf4
> RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff81193d29
> RBP: ffffc9000042fc18 R08: 0000000000000000 R09: 0000000000000001
> R10: 000000100f938416 R11: 0000000000000309 R12: ffff8880b941c140
> R13: 0000000000000000 R14: 0000000000000000 R15: ffff8880b941c140
> FS: 0000000000000000(0000) GS:ffff8880bca00000(0000) knlGS:0000000000000000
> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 00007f2e8c0a32e0 CR3: 0000000002e10000 CR4: 00000000000006f0
> Call Trace:
> send_signal+0x1c8/0x310
> do_notify_parent+0x50f/0x550
> release_task.part.21+0x4fd/0x620
> do_exit+0x6f6/0xaf0
> do_group_exit+0x42/0xb0
> get_signal+0x13b/0xbb0
> do_signal+0x2b/0x670
> ? __audit_syscall_exit+0x24d/0x2b0
> ? rcu_read_lock_sched_held+0x4d/0x60
> ? kfree+0x24c/0x2b0
> do_syscall_64+0x176/0x640
> ? trace_hardirqs_off_thunk+0x1a/0x1c
> entry_SYSCALL_64_after_hwframe+0x49/0xb3
The immediate problem is as Christof noticed that "pid_alive(current) == false".
This happens because do_notify_parent is called from the last thread to exit
in a process after that thread has been reaped.
The bigger issue is that do_notify_parent can be called from any
process that manages to wait on a thread of a multi-threaded process
from wait_task_zombie. So any logic based upon current for
do_notify_parent is just nonsense, as current can be pretty much
anything.
So change do_notify_parent to call __send_signal directly.
Inspecting the code it appears this problem has existed since the pid
namespace support started handling this case in 2.6.30. This fix only
backports to 7a0cf094944e ("signal: Correct namespace fixups of si_pid and si_uid")
where the problem logic was moved out of __send_signal and into send_signal.
Cc: stable(a)vger.kernel.org
Fixes: 6588c1e3ff01 ("signals: SI_USER: Masquerade si_pid when crossing pid ns boundary")
Ref: 921cf9f63089 ("signals: protect cinit from unblocked SIG_DFL signals")
Link: https://lore.kernel.org/lkml/20200419201336.GI22017@edge.cmeerw.net/
Reported-by: Christof Meerwald <cmeerw(a)cmeerw.org>
Acked-by: Oleg Nesterov <oleg(a)redhat.com>
Acked-by: Christian Brauner <christian.brauner(a)ubuntu.com>
Signed-off-by: "Eric W. Biederman" <ebiederm(a)xmission.com>
diff --git a/kernel/signal.c b/kernel/signal.c
index e58a6c619824..7938c60e11dd 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1993,8 +1993,12 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN)
sig = 0;
}
+ /*
+ * Send with __send_signal as si_pid and si_uid are in the
+ * parent's namespaces.
+ */
if (valid_signal(sig) && sig)
- __group_send_sig_info(sig, &info, tsk->parent);
+ __send_signal(sig, &info, tsk->parent, PIDTYPE_TGID, false);
__wake_up_parent(tsk, tsk->parent);
spin_unlock_irqrestore(&psig->siglock, flags);
Hi,
I need help to backport the patch with the following details onto the
5.4.y stable branch:
Subject: [PATCH] tty: hvc: Fix data abort due to race in hvc_open
commit-id: e2bd1dcbe1aa34ff5570b3427c530e4332ecf0fe
Reason: The issue addressed in the patch was discovered on 5.4.y branch
Thank you.
Raghavendra
While we support using both tx slots for sideband transmissions, it
appears that DisplayPort devices in the field didn't end up doing a very
good job of supporting it. From section 5.2.1 of the DP 2.0
specification:
There are MST Sink/Branch devices in the field that do not handle
interleaved message transactions.
To facilitate message transaction handling by downstream devices, an
MST Source device shall generate message transactions in an atomic
manner (i.e., the MST Source device shall not concurrently interleave
multiple message transactions). Therefore, an MST Source device shall
clear the Message_Sequence_No value in the Sideband_MSG_Header to 0.
This might come as a bit of a surprise since the vast majority of hubs
will support using both tx slots even if they don't support interleaved
message transactions, and we've also been using both tx slots since MST
was introduced into the kernel.
However, there is one device we've had trouble getting working
consistently with MST for so long that we actually assumed it was just
broken: the infamous Dell P2415Qb. Previously this monitor would appear
to work sometimes, but in most situations would end up timing out
LINK_ADDRESS messages almost at random until you power cycled the whole
display. After reading section 5.2.1 in the DP 2.0 spec, some closer
investigation into this infamous display revealed it was only ever
timing out on sideband messages in the second TX slot.
Sure enough, avoiding the second TX slot has suddenly made this monitor
function perfectly for the first time in five years. And since they
explicitly mention this in the specification, I doubt this is the only
monitor out there with this issue. This might even explain explain the
seemingly harmless garbage sideband responses we would occasionally see
with MST hubs!
So - rewrite our sideband TX handlers to only support one TX slot. In
order to simplify our sideband handling now that we don't support
transmitting to multiple MSTBs at once, we also move all state tracking
for down replies from mstbs to the topology manager.
Signed-off-by: Lyude Paul <lyude(a)redhat.com>
Fixes: ad7f8a1f9ced ("drm/helper: add Displayport multi-stream helper (v0.6)")
Cc: Sean Paul <sean(a)poorly.run>
Cc: "Lin, Wayne" <Wayne.Lin(a)amd.com>
Cc: <stable(a)vger.kernel.org> # v3.17+
Reviewed-by: Sean Paul <sean(a)poorly.run>
Link: https://patchwork.freedesktop.org/patch/msgid/20200424181308.770749-1-lyude…
---
drivers/gpu/drm/drm_dp_mst_topology.c | 122 +++++++-------------------
include/drm/drm_dp_mst_helper.h | 18 +---
2 files changed, 33 insertions(+), 107 deletions(-)
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index 9d89ebf3a749..ed6faaf4bbf3 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -1203,16 +1203,8 @@ static int drm_dp_mst_wait_tx_reply(struct drm_dp_mst_branch *mstb,
/* remove from q */
if (txmsg->state == DRM_DP_SIDEBAND_TX_QUEUED ||
- txmsg->state == DRM_DP_SIDEBAND_TX_START_SEND) {
+ txmsg->state == DRM_DP_SIDEBAND_TX_START_SEND)
list_del(&txmsg->next);
- }
-
- if (txmsg->state == DRM_DP_SIDEBAND_TX_START_SEND ||
- txmsg->state == DRM_DP_SIDEBAND_TX_SENT) {
- mstb->tx_slots[txmsg->seqno] = NULL;
- }
- mgr->is_waiting_for_dwn_reply = false;
-
}
out:
if (unlikely(ret == -EIO) && drm_debug_enabled(DRM_UT_DP)) {
@@ -2691,22 +2683,6 @@ static int set_hdr_from_dst_qlock(struct drm_dp_sideband_msg_hdr *hdr,
struct drm_dp_mst_branch *mstb = txmsg->dst;
u8 req_type;
- /* both msg slots are full */
- if (txmsg->seqno == -1) {
- if (mstb->tx_slots[0] && mstb->tx_slots[1]) {
- DRM_DEBUG_KMS("%s: failed to find slot\n", __func__);
- return -EAGAIN;
- }
- if (mstb->tx_slots[0] == NULL && mstb->tx_slots[1] == NULL) {
- txmsg->seqno = mstb->last_seqno;
- mstb->last_seqno ^= 1;
- } else if (mstb->tx_slots[0] == NULL)
- txmsg->seqno = 0;
- else
- txmsg->seqno = 1;
- mstb->tx_slots[txmsg->seqno] = txmsg;
- }
-
req_type = txmsg->msg[0] & 0x7f;
if (req_type == DP_CONNECTION_STATUS_NOTIFY ||
req_type == DP_RESOURCE_STATUS_NOTIFY)
@@ -2718,7 +2694,7 @@ static int set_hdr_from_dst_qlock(struct drm_dp_sideband_msg_hdr *hdr,
hdr->lcr = mstb->lct - 1;
if (mstb->lct > 1)
memcpy(hdr->rad, mstb->rad, mstb->lct / 2);
- hdr->seqno = txmsg->seqno;
+
return 0;
}
/*
@@ -2733,15 +2709,15 @@ static int process_single_tx_qlock(struct drm_dp_mst_topology_mgr *mgr,
int len, space, idx, tosend;
int ret;
+ if (txmsg->state == DRM_DP_SIDEBAND_TX_SENT)
+ return 0;
+
memset(&hdr, 0, sizeof(struct drm_dp_sideband_msg_hdr));
- if (txmsg->state == DRM_DP_SIDEBAND_TX_QUEUED) {
- txmsg->seqno = -1;
+ if (txmsg->state == DRM_DP_SIDEBAND_TX_QUEUED)
txmsg->state = DRM_DP_SIDEBAND_TX_START_SEND;
- }
- /* make hdr from dst mst - for replies use seqno
- otherwise assign one */
+ /* make hdr from dst mst */
ret = set_hdr_from_dst_qlock(&hdr, txmsg);
if (ret < 0)
return ret;
@@ -2794,42 +2770,17 @@ static void process_single_down_tx_qlock(struct drm_dp_mst_topology_mgr *mgr)
if (list_empty(&mgr->tx_msg_downq))
return;
- txmsg = list_first_entry(&mgr->tx_msg_downq, struct drm_dp_sideband_msg_tx, next);
+ txmsg = list_first_entry(&mgr->tx_msg_downq,
+ struct drm_dp_sideband_msg_tx, next);
ret = process_single_tx_qlock(mgr, txmsg, false);
- if (ret == 1) {
- /* txmsg is sent it should be in the slots now */
- mgr->is_waiting_for_dwn_reply = true;
- list_del(&txmsg->next);
- } else if (ret) {
+ if (ret < 0) {
DRM_DEBUG_KMS("failed to send msg in q %d\n", ret);
- mgr->is_waiting_for_dwn_reply = false;
list_del(&txmsg->next);
- if (txmsg->seqno != -1)
- txmsg->dst->tx_slots[txmsg->seqno] = NULL;
txmsg->state = DRM_DP_SIDEBAND_TX_TIMEOUT;
wake_up_all(&mgr->tx_waitq);
}
}
-/* called holding qlock */
-static void process_single_up_tx_qlock(struct drm_dp_mst_topology_mgr *mgr,
- struct drm_dp_sideband_msg_tx *txmsg)
-{
- int ret;
-
- /* construct a chunk from the first msg in the tx_msg queue */
- ret = process_single_tx_qlock(mgr, txmsg, true);
-
- if (ret != 1)
- DRM_DEBUG_KMS("failed to send msg in q %d\n", ret);
-
- if (txmsg->seqno != -1) {
- WARN_ON((unsigned int)txmsg->seqno >
- ARRAY_SIZE(txmsg->dst->tx_slots));
- txmsg->dst->tx_slots[txmsg->seqno] = NULL;
- }
-}
-
static void drm_dp_queue_down_tx(struct drm_dp_mst_topology_mgr *mgr,
struct drm_dp_sideband_msg_tx *txmsg)
{
@@ -2842,8 +2793,7 @@ static void drm_dp_queue_down_tx(struct drm_dp_mst_topology_mgr *mgr,
drm_dp_mst_dump_sideband_msg_tx(&p, txmsg);
}
- if (list_is_singular(&mgr->tx_msg_downq) &&
- !mgr->is_waiting_for_dwn_reply)
+ if (list_is_singular(&mgr->tx_msg_downq))
process_single_down_tx_qlock(mgr);
mutex_unlock(&mgr->qlock);
}
@@ -3467,7 +3417,7 @@ static int drm_dp_encode_up_ack_reply(struct drm_dp_sideband_msg_tx *msg, u8 req
static int drm_dp_send_up_ack_reply(struct drm_dp_mst_topology_mgr *mgr,
struct drm_dp_mst_branch *mstb,
- int req_type, int seqno, bool broadcast)
+ int req_type, bool broadcast)
{
struct drm_dp_sideband_msg_tx *txmsg;
@@ -3476,13 +3426,11 @@ static int drm_dp_send_up_ack_reply(struct drm_dp_mst_topology_mgr *mgr,
return -ENOMEM;
txmsg->dst = mstb;
- txmsg->seqno = seqno;
drm_dp_encode_up_ack_reply(txmsg, req_type);
mutex_lock(&mgr->qlock);
-
- process_single_up_tx_qlock(mgr, txmsg);
-
+ /* construct a chunk from the first msg in the tx_msg queue */
+ process_single_tx_qlock(mgr, txmsg, true);
mutex_unlock(&mgr->qlock);
kfree(txmsg);
@@ -3707,7 +3655,8 @@ int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr,
}
EXPORT_SYMBOL(drm_dp_mst_topology_mgr_resume);
-static bool drm_dp_get_one_sb_msg(struct drm_dp_mst_topology_mgr *mgr, bool up)
+static bool drm_dp_get_one_sb_msg(struct drm_dp_mst_topology_mgr *mgr,
+ bool up)
{
int len;
u8 replyblock[32];
@@ -3760,7 +3709,6 @@ static int drm_dp_mst_handle_down_rep(struct drm_dp_mst_topology_mgr *mgr)
struct drm_dp_sideband_msg_tx *txmsg;
struct drm_dp_mst_branch *mstb;
struct drm_dp_sideband_msg_hdr *hdr = &mgr->down_rep_recv.initial_hdr;
- int slot = -1;
if (!drm_dp_get_one_sb_msg(mgr, false))
goto clear_down_rep_recv;
@@ -3776,10 +3724,9 @@ static int drm_dp_mst_handle_down_rep(struct drm_dp_mst_topology_mgr *mgr)
}
/* find the message */
- slot = hdr->seqno;
mutex_lock(&mgr->qlock);
- txmsg = mstb->tx_slots[slot];
- /* remove from slots */
+ txmsg = list_first_entry_or_null(&mgr->tx_msg_downq,
+ struct drm_dp_sideband_msg_tx, next);
mutex_unlock(&mgr->qlock);
if (!txmsg) {
@@ -3804,8 +3751,7 @@ static int drm_dp_mst_handle_down_rep(struct drm_dp_mst_topology_mgr *mgr)
mutex_lock(&mgr->qlock);
txmsg->state = DRM_DP_SIDEBAND_TX_RX;
- mstb->tx_slots[slot] = NULL;
- mgr->is_waiting_for_dwn_reply = false;
+ list_del(&txmsg->next);
mutex_unlock(&mgr->qlock);
wake_up_all(&mgr->tx_waitq);
@@ -3815,9 +3761,6 @@ static int drm_dp_mst_handle_down_rep(struct drm_dp_mst_topology_mgr *mgr)
no_msg:
drm_dp_mst_topology_put_mstb(mstb);
clear_down_rep_recv:
- mutex_lock(&mgr->qlock);
- mgr->is_waiting_for_dwn_reply = false;
- mutex_unlock(&mgr->qlock);
memset(&mgr->down_rep_recv, 0, sizeof(struct drm_dp_sideband_msg_rx));
return 0;
@@ -3896,7 +3839,6 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr)
{
struct drm_dp_sideband_msg_hdr *hdr = &mgr->up_req_recv.initial_hdr;
struct drm_dp_pending_up_req *up_req;
- bool seqno;
if (!drm_dp_get_one_sb_msg(mgr, true))
goto out;
@@ -3911,7 +3853,6 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr)
}
INIT_LIST_HEAD(&up_req->next);
- seqno = hdr->seqno;
drm_dp_sideband_parse_req(&mgr->up_req_recv, &up_req->msg);
if (up_req->msg.req_type != DP_CONNECTION_STATUS_NOTIFY &&
@@ -3923,7 +3864,7 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr)
}
drm_dp_send_up_ack_reply(mgr, mgr->mst_primary, up_req->msg.req_type,
- seqno, false);
+ false);
if (up_req->msg.req_type == DP_CONNECTION_STATUS_NOTIFY) {
const struct drm_dp_connection_status_notify *conn_stat =
@@ -4674,7 +4615,7 @@ static void drm_dp_tx_work(struct work_struct *work)
struct drm_dp_mst_topology_mgr *mgr = container_of(work, struct drm_dp_mst_topology_mgr, tx_work);
mutex_lock(&mgr->qlock);
- if (!list_empty(&mgr->tx_msg_downq) && !mgr->is_waiting_for_dwn_reply)
+ if (!list_empty(&mgr->tx_msg_downq))
process_single_down_tx_qlock(mgr);
mutex_unlock(&mgr->qlock);
}
@@ -4705,26 +4646,25 @@ static inline void
drm_dp_delayed_destroy_mstb(struct drm_dp_mst_branch *mstb)
{
struct drm_dp_mst_topology_mgr *mgr = mstb->mgr;
- struct drm_dp_mst_port *port, *tmp;
+ struct drm_dp_mst_port *port, *port_tmp;
+ struct drm_dp_sideband_msg_tx *txmsg, *txmsg_tmp;
bool wake_tx = false;
mutex_lock(&mgr->lock);
- list_for_each_entry_safe(port, tmp, &mstb->ports, next) {
+ list_for_each_entry_safe(port, port_tmp, &mstb->ports, next) {
list_del(&port->next);
drm_dp_mst_topology_put_port(port);
}
mutex_unlock(&mgr->lock);
- /* drop any tx slots msg */
+ /* drop any tx slot msg */
mutex_lock(&mstb->mgr->qlock);
- if (mstb->tx_slots[0]) {
- mstb->tx_slots[0]->state = DRM_DP_SIDEBAND_TX_TIMEOUT;
- mstb->tx_slots[0] = NULL;
- wake_tx = true;
- }
- if (mstb->tx_slots[1]) {
- mstb->tx_slots[1]->state = DRM_DP_SIDEBAND_TX_TIMEOUT;
- mstb->tx_slots[1] = NULL;
+ list_for_each_entry_safe(txmsg, txmsg_tmp, &mgr->tx_msg_downq, next) {
+ if (txmsg->dst != mstb)
+ continue;
+
+ txmsg->state = DRM_DP_SIDEBAND_TX_TIMEOUT;
+ list_del(&txmsg->next);
wake_tx = true;
}
mutex_unlock(&mstb->mgr->qlock);
diff --git a/include/drm/drm_dp_mst_helper.h b/include/drm/drm_dp_mst_helper.h
index 3cde42b333c3..644eca37cb94 100644
--- a/include/drm/drm_dp_mst_helper.h
+++ b/include/drm/drm_dp_mst_helper.h
@@ -165,11 +165,8 @@ struct drm_dp_mst_port {
* @rad: Relative Address to talk to this branch device.
* @lct: Link count total to talk to this branch device.
* @num_ports: number of ports on the branch.
- * @msg_slots: one bit per transmitted msg slot.
* @port_parent: pointer to the port parent, NULL if toplevel.
* @mgr: topology manager for this branch device.
- * @tx_slots: transmission slots for this device.
- * @last_seqno: last sequence number used to talk to this.
* @link_address_sent: if a link address message has been sent to this device yet.
* @guid: guid for DP 1.2 branch device. port under this branch can be
* identified by port #.
@@ -210,7 +207,6 @@ struct drm_dp_mst_branch {
u8 lct;
int num_ports;
- int msg_slots;
/**
* @ports: the list of ports on this branch device. This should be
* considered protected for reading by &drm_dp_mst_topology_mgr.lock.
@@ -223,13 +219,9 @@ struct drm_dp_mst_branch {
*/
struct list_head ports;
- /* list of tx ops queue for this port */
struct drm_dp_mst_port *port_parent;
struct drm_dp_mst_topology_mgr *mgr;
- /* slots are protected by mstb->mgr->qlock */
- struct drm_dp_sideband_msg_tx *tx_slots[2];
- int last_seqno;
bool link_address_sent;
/* global unique identifier to identify branch devices */
@@ -589,11 +581,6 @@ struct drm_dp_mst_topology_mgr {
*/
bool payload_id_table_cleared : 1;
- /**
- * @is_waiting_for_dwn_reply: whether we're waiting for a down reply.
- */
- bool is_waiting_for_dwn_reply : 1;
-
/**
* @mst_primary: Pointer to the primary/first branch device.
*/
@@ -618,13 +605,12 @@ struct drm_dp_mst_topology_mgr {
const struct drm_private_state_funcs *funcs;
/**
- * @qlock: protects @tx_msg_downq, the &drm_dp_mst_branch.txslost and
- * &drm_dp_sideband_msg_tx.state once they are queued
+ * @qlock: protects @tx_msg_downq and &drm_dp_sideband_msg_tx.state
*/
struct mutex qlock;
/**
- * @tx_msg_downq: List of pending down replies.
+ * @tx_msg_downq: List of pending down requests
*/
struct list_head tx_msg_downq;
--
2.25.3
From: Jeremy Cline <jcline(a)redhat.com>
[ Upstream commit 4734b0fefbbf98f8c119eb8344efa19dac82cd2c ]
Builds of Fedora's kernel-tools package started to fail with "may be
used uninitialized" warnings for nl_pid in bpf_set_link_xdp_fd() and
bpf_get_link_xdp_info() on the s390 architecture.
Although libbpf_netlink_open() always returns a negative number when it
does not set *nl_pid, the compiler does not determine this and thus
believes the variable might be used uninitialized. Assuage gcc's fears
by explicitly initializing nl_pid.
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1807781
Signed-off-by: Jeremy Cline <jcline(a)redhat.com>
Signed-off-by: Daniel Borkmann <daniel(a)iogearbox.net>
Acked-by: Andrii Nakryiko <andriin(a)fb.com>
Link: https://lore.kernel.org/bpf/20200404051430.698058-1-jcline@redhat.com
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
tools/lib/bpf/netlink.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index 6d47345a310bd..b294e2aeb3283 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -141,7 +141,7 @@ int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
struct ifinfomsg ifinfo;
char attrbuf[64];
} req;
- __u32 nl_pid;
+ __u32 nl_pid = 0;
sock = libbpf_netlink_open(&nl_pid);
if (sock < 0)
@@ -256,7 +256,7 @@ int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
{
struct xdp_id_md xdp_id = {};
int sock, ret;
- __u32 nl_pid;
+ __u32 nl_pid = 0;
__u32 mask;
if (flags & ~XDP_FLAGS_MASK || !info_size)
--
2.20.1
From: "Darrick J. Wong" <darrick.wong(a)oracle.com>
[ Upstream commit c142932c29e533ee892f87b44d8abc5719edceec ]
In the reflink extent remap function, it turns out that uirec (the block
mapping corresponding only to the part of the passed-in mapping that got
unmapped) was not fully initialized. Specifically, br_state was not
being copied from the passed-in struct to the uirec. This could lead to
unpredictable results such as the reflinked mapping being marked
unwritten in the destination file.
Signed-off-by: Darrick J. Wong <darrick.wong(a)oracle.com>
Reviewed-by: Brian Foster <bfoster(a)redhat.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
fs/xfs/xfs_reflink.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 17d3c964a2a23..6b753b969f7b8 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1162,6 +1162,7 @@ xfs_reflink_remap_extent(
uirec.br_startblock = irec->br_startblock + rlen;
uirec.br_startoff = irec->br_startoff + rlen;
uirec.br_blockcount = unmap_len - rlen;
+ uirec.br_state = irec->br_state;
unmap_len = rlen;
/* If this isn't a real mapping, we're done. */
--
2.20.1
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From e5b72e3bc4763152e24bf4b8333bae21cc526c56 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg(a)intel.com>
Date: Fri, 17 Apr 2020 10:08:12 +0300
Subject: [PATCH] iwlwifi: mvm: limit maximum queue appropriately
Due to some hardware issues, queue 31 isn't usable on devices that have
32 queues (7000, 8000, 9000 families), which is correctly reflected in
the configuration and TX queue initialization.
However, the firmware API and queue allocation code assumes that there
are 32 queues, and if something actually attempts to use #31 this leads
to a NULL-pointer dereference since it's not allocated.
Fix this by limiting to 31 in the IWL_MVM_DQA_MAX_DATA_QUEUE, and also
add some code to catch this earlier in the future, if the configuration
changes perhaps.
Cc: stable(a)vger.kernel.org # v4.9+
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
Signed-off-by: Luca Coelho <luciano.coelho(a)intel.com>
Signed-off-by: Kalle Valo <kvalo(a)codeaurora.org>
Link: https://lore.kernel.org/r/iwlwifi.20200417100405.98a79be2db6a.I3a4af6b03b87…
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h b/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h
index 73196cbc7fbe..75d958bab0e3 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h
@@ -8,7 +8,7 @@
* Copyright(c) 2007 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
* Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2019 Intel Corporation
+ * Copyright(c) 2019 - 2020 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
@@ -31,7 +31,7 @@
* Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
* Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2019 Intel Corporation
+ * Copyright(c) 2019 - 2020 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -99,7 +99,7 @@ enum iwl_mvm_dqa_txq {
IWL_MVM_DQA_MAX_MGMT_QUEUE = 8,
IWL_MVM_DQA_AP_PROBE_RESP_QUEUE = 9,
IWL_MVM_DQA_MIN_DATA_QUEUE = 10,
- IWL_MVM_DQA_MAX_DATA_QUEUE = 31,
+ IWL_MVM_DQA_MAX_DATA_QUEUE = 30,
};
enum iwl_mvm_tx_fifo {
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index 64ef3f3ba23b..251d6fbb1da5 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -722,6 +722,11 @@ static int iwl_mvm_find_free_queue(struct iwl_mvm *mvm, u8 sta_id,
lockdep_assert_held(&mvm->mutex);
+ if (WARN(maxq >= mvm->trans->trans_cfg->base_params->num_of_queues,
+ "max queue %d >= num_of_queues (%d)", maxq,
+ mvm->trans->trans_cfg->base_params->num_of_queues))
+ maxq = mvm->trans->trans_cfg->base_params->num_of_queues - 1;
+
/* This should not be hit with new TX path */
if (WARN_ON(iwl_mvm_has_new_tx_api(mvm)))
return -ENOSPC;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From aee194b14dd2b2bde6252b3acf57d36dccfc743a Mon Sep 17 00:00:00 2001
From: Luke Nelson <lukenels(a)cs.washington.edu>
Date: Sat, 18 Apr 2020 16:26:53 -0700
Subject: [PATCH] bpf, x86: Fix encoding for lower 8-bit registers in BPF_STX
BPF_B
This patch fixes an encoding bug in emit_stx for BPF_B when the source
register is BPF_REG_FP.
The current implementation for BPF_STX BPF_B in emit_stx saves one REX
byte when the operands can be encoded using Mod-R/M alone. The lower 8
bits of registers %rax, %rbx, %rcx, and %rdx can be accessed without using
a REX prefix via %al, %bl, %cl, and %dl, respectively. Other registers,
(e.g., %rsi, %rdi, %rbp, %rsp) require a REX prefix to use their 8-bit
equivalents (%sil, %dil, %bpl, %spl).
The current code checks if the source for BPF_STX BPF_B is BPF_REG_1
or BPF_REG_2 (which map to %rdi and %rsi), in which case it emits the
required REX prefix. However, it misses the case when the source is
BPF_REG_FP (mapped to %rbp).
The result is that BPF_STX BPF_B with BPF_REG_FP as the source operand
will read from register %ch instead of the correct %bpl. This patch fixes
the problem by fixing and refactoring the check on which registers need
the extra REX byte. Since no BPF registers map to %rsp, there is no need
to handle %spl.
Fixes: 622582786c9e0 ("net: filter: x86: internal BPF JIT")
Signed-off-by: Xi Wang <xi.wang(a)gmail.com>
Signed-off-by: Luke Nelson <luke.r.nels(a)gmail.com>
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
Link: https://lore.kernel.org/bpf/20200418232655.23870-1-luke.r.nels@gmail.com
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5ea7c2cf7ab4..42b6709e6dc7 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -158,6 +158,19 @@ static bool is_ereg(u32 reg)
BIT(BPF_REG_AX));
}
+/*
+ * is_ereg_8l() == true if BPF register 'reg' is mapped to access x86-64
+ * lower 8-bit registers dil,sil,bpl,spl,r8b..r15b, which need extra byte
+ * of encoding. al,cl,dl,bl have simpler encoding.
+ */
+static bool is_ereg_8l(u32 reg)
+{
+ return is_ereg(reg) ||
+ (1 << reg) & (BIT(BPF_REG_1) |
+ BIT(BPF_REG_2) |
+ BIT(BPF_REG_FP));
+}
+
static bool is_axreg(u32 reg)
{
return reg == BPF_REG_0;
@@ -598,9 +611,8 @@ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
switch (size) {
case BPF_B:
/* Emit 'mov byte ptr [rax + off], al' */
- if (is_ereg(dst_reg) || is_ereg(src_reg) ||
- /* We have to add extra byte for x86 SIL, DIL regs */
- src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
+ if (is_ereg(dst_reg) || is_ereg_8l(src_reg))
+ /* Add extra byte for eregs or SIL,DIL,BPL in src_reg */
EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
else
EMIT1(0x88);
Hi folks,
We're working on trying to figure out a severe performance regression in
the 5.4 and older LTS trees. The regression seems to happen only on
physical spinning rust disks, which is why it was probably went
unnoticed.
The regression seems to be introduced in v4.7 with:
1f60fbe72749 ("ext4: allow readdir()'s of large empty directories to be interrupted")
The fio test used to reproduce it is:
sync; i=0; while [ $i -lt 4 ]; do ( ( time fio
--name=disk-burner --readwrite=write --bs=4096 --invalidate=1
--end_fsync=0 --filesize=800M --runtime=120 --ioengine=libaio
--thread --numjobs=20 --iodepth=1 --unlink=1 ) 2>&1 | grep
'^real' ); ((i++)); done
When run with the offending commit, it'll take 3-4x longer to complete.
The regression was fixed upstream somewhere in this merge:
e5da4c933c50 ("Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4")
but it seems to be a combination of commits that fix it rather than a
single one.
Now, here's the tricky part... reverting these two commits on top of
v4.19.118 "fixes" the issue:
06bd3c36a733 ("ext4: fix data exposure after a crash")
1f60fbe72749 ("ext4: allow readdir()'s of large empty directories to be interrupted")
but clearly this is not something we want to do in the stable trees, so
we're trying to figure out the proper way to fix this.
--
Thanks,
Sasha
The patch titled
Subject: epoll: atomically remove wait entry on wake up
has been added to the -mm tree. Its filename is
epoll-atomically-remove-wait-entry-on-wake-up.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/epoll-atomically-remove-wait-entry…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/epoll-atomically-remove-wait-entry…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Roman Penyaev <rpenyaev(a)suse.de>
Subject: epoll: atomically remove wait entry on wake up
This patch does two things:
1. fixes lost wakeup introduced by:
339ddb53d373 ("fs/epoll: remove unnecessary wakeups of nested epoll")
2. improves performance for events delivery.
The description of the problem is the following: if N (>1) threads are
waiting on ep->wq for new events and M (>1) events come, it is quite
likely that >1 wakeups hit the same wait queue entry, because there is
quite a big window between __add_wait_queue_exclusive() and the following
__remove_wait_queue() calls in ep_poll() function. This can lead to lost
wakeups, because thread, which was woken up, can handle not all the events
in ->rdllist. (in better words the problem is described here:
https://lkml.org/lkml/2019/10/7/905)
The idea of the current patch is to use init_wait() instead of
init_waitqueue_entry(). Internally init_wait() sets
autoremove_wake_function as a callback, which removes the wait entry
atomically (under the wq locks) from the list, thus the next coming wakeup
hits the next wait entry in the wait queue, thus preventing lost wakeups.
Problem is very well reproduced by the epoll60 test case [1].
Wait entry removal on wakeup has also performance benefits, because there
is no need to take a ep->lock and remove wait entry from the queue after
the successful wakeup. Here is the timing output of the epoll60 test
case:
With explicit wakeup from ep_scan_ready_list() (the state of the
code prior 339ddb53d373):
real 0m6.970s
user 0m49.786s
sys 0m0.113s
After this patch:
real 0m5.220s
user 0m36.879s
sys 0m0.019s
The other testcase is the stress-epoll [2], where one thread consumes
all the events and other threads produce many events:
With explicit wakeup from ep_scan_ready_list() (the state of the
code prior 339ddb53d373):
threads events/ms run-time ms
8 5427 1474
16 6163 2596
32 6824 4689
64 7060 9064
128 6991 18309
After this patch:
threads events/ms run-time ms
8 5598 1429
16 7073 2262
32 7502 4265
64 7640 8376
128 7634 16767
(number of "events/ms" represents event bandwidth, thus higher is
better; number of "run-time ms" represents overall time spent
doing the benchmark, thus lower is better)
[1] tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
[2] https://github.com/rouming/test-tools/blob/master/stress-epoll.c
Link: http://lkml.kernel.org/r/20200430130326.1368509-2-rpenyaev@suse.de
Signed-off-by: Roman Penyaev <rpenyaev(a)suse.de>
Cc: Khazhismel Kumykov <khazhy(a)google.com>
Cc: Alexander Viro <viro(a)zeniv.linux.org.uk>
Cc: Heiher <r(a)hev.cc>
Cc: Jason Baron <jbaron(a)akamai.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/eventpoll.c | 43 ++++++++++++++++++++++++-------------------
1 file changed, 24 insertions(+), 19 deletions(-)
--- a/fs/eventpoll.c~epoll-atomically-remove-wait-entry-on-wake-up
+++ a/fs/eventpoll.c
@@ -1822,7 +1822,6 @@ static int ep_poll(struct eventpoll *ep,
{
int res = 0, eavail, timed_out = 0;
u64 slack = 0;
- bool waiter = false;
wait_queue_entry_t wait;
ktime_t expires, *to = NULL;
@@ -1867,21 +1866,23 @@ fetch_events:
*/
ep_reset_busy_poll_napi_id(ep);
- /*
- * We don't have any available event to return to the caller. We need
- * to sleep here, and we will be woken by ep_poll_callback() when events
- * become available.
- */
- if (!waiter) {
- waiter = true;
- init_waitqueue_entry(&wait, current);
-
+ do {
+ /*
+ * Internally init_wait() uses autoremove_wake_function(),
+ * thus wait entry is removed from the wait queue on each
+ * wakeup. Why it is important? In case of several waiters
+ * each new wakeup will hit the next waiter, giving it the
+ * chance to harvest new event. Otherwise wakeup can be
+ * lost. This is also good performance-wise, because on
+ * normal wakeup path no need to call __remove_wait_queue()
+ * explicitly, thus ep->lock is not taken, which halts the
+ * event delivery.
+ */
+ init_wait(&wait);
write_lock_irq(&ep->lock);
__add_wait_queue_exclusive(&ep->wq, &wait);
write_unlock_irq(&ep->lock);
- }
- for (;;) {
/*
* We don't want to sleep if the ep_poll_callback() sends us
* a wakeup in between. That's why we set the task state
@@ -1911,10 +1912,20 @@ fetch_events:
timed_out = 1;
break;
}
- }
+
+ /* We were woken up, thus go and try to harvest some events */
+ eavail = 1;
+
+ } while (0);
__set_current_state(TASK_RUNNING);
+ if (!list_empty_careful(&wait.entry)) {
+ write_lock_irq(&ep->lock);
+ __remove_wait_queue(&ep->wq, &wait);
+ write_unlock_irq(&ep->lock);
+ }
+
send_events:
/*
* Try to transfer events to user space. In case we get 0 events and
@@ -1925,12 +1936,6 @@ send_events:
!(res = ep_send_events(ep, events, maxevents)) && !timed_out)
goto fetch_events;
- if (waiter) {
- write_lock_irq(&ep->lock);
- __remove_wait_queue(&ep->wq, &wait);
- write_unlock_irq(&ep->lock);
- }
-
return res;
}
_
Patches currently in -mm which might be from rpenyaev(a)suse.de are
kselftests-introduce-new-epoll60-testcase-for-catching-lost-wakeups.patch
epoll-atomically-remove-wait-entry-on-wake-up.patch
Building with gcc-10 causes a harmless warning about a section mismatch:
WARNING: modpost: vmlinux.o(.text.unlikely+0x5e191): Section mismatch in reference from the function tpm2_calc_event_log_size() to the function .init.text:early_memunmap()
The function tpm2_calc_event_log_size() references
the function __init early_memunmap().
This is often because tpm2_calc_event_log_size lacks a __init
annotation or the annotation of early_memunmap is wrong.
Add the missing annotation.
Fixes: e658c82be556 ("efi/tpm: Only set 'efi_tpm_final_log_size' after successful event log parsing")
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
---
drivers/firmware/efi/tpm.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/firmware/efi/tpm.c b/drivers/firmware/efi/tpm.c
index 31f9f0e369b9..55b031d2c989 100644
--- a/drivers/firmware/efi/tpm.c
+++ b/drivers/firmware/efi/tpm.c
@@ -16,7 +16,7 @@
int efi_tpm_final_log_size;
EXPORT_SYMBOL(efi_tpm_final_log_size);
-static int tpm2_calc_event_log_size(void *data, int count, void *size_info)
+static int __init tpm2_calc_event_log_size(void *data, int count, void *size_info)
{
struct tcg_pcr_event2_head *header;
int event_size, size = 0;
--
2.26.0
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: b54f6f42cf2e - ASoC: stm32: sai: fix sai probe
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
All kernel binaries, config files, and logs are available for download here:
https://cki-artifacts.s3.us-east-2.amazonaws.com/index.html?prefix=dataware…
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 4 architectures:
aarch64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
s390x:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking MACsec: sanity
✅ Networking socket: fuzz
✅ Networking sctp-auth: sockopts test
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ httpd: mod_ssl smoke sanity
✅ tuned: tune-processes-through-perf
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ Usex - version 1.9-29
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - DaCapo Benchmark Suite
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ LTP: openposix test suite
🚧 ✅ Networking vnic: ipvlan/basic
🚧 ✅ audit: audit testsuite test
🚧 ✅ iotop: sanity
🚧 ✅ storage: dm/common
🚧 ✅ trace: ftrace/tracer
Host 2:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ lvm thinp sanity
✅ storage: software RAID testing
🚧 ✅ Storage blktests
ppc64le:
Host 1:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ lvm thinp sanity
✅ storage: software RAID testing
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ❌ Storage blktests
Host 2:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking MACsec: sanity
✅ Networking socket: fuzz
✅ Networking sctp-auth: sockopts test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - tunnel
✅ httpd: mod_ssl smoke sanity
✅ tuned: tune-processes-through-perf
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ Usex - version 1.9-29
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - DaCapo Benchmark Suite
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ❌ LTP: openposix test suite
🚧 ✅ Networking vnic: ipvlan/basic
🚧 ❌ audit: audit testsuite test
🚧 ⚡⚡⚡ iotop: sanity
🚧 ⚡⚡⚡ storage: dm/common
🚧 ⚡⚡⚡ trace: ftrace/tracer
s390x:
Host 1:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
⚡⚡⚡ Podman system integration test - as root
⚡⚡⚡ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking MACsec: sanity
✅ Networking sctp-auth: sockopts test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ httpd: mod_ssl smoke sanity
✅ tuned: tune-processes-through-perf
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - DaCapo Benchmark Suite
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ❌ LTP: openposix test suite
🚧 ✅ Networking vnic: ipvlan/basic
🚧 ❌ audit: audit testsuite test
🚧 ❌ iotop: sanity
🚧 ✅ storage: dm/common
🚧 ✅ trace: ftrace/tracer
Host 2:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ selinux-policy: serge-testsuite
⚡⚡⚡ stress: stress-ng
🚧 ❌ Storage blktests
x86_64:
Host 1:
✅ Boot test
✅ xfstests - ext4
✅ xfstests - xfs
✅ selinux-policy: serge-testsuite
✅ lvm thinp sanity
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ IOMMU boot test
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
✅ Storage SAN device stress - mpt3sas_gen1
Host 3:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ Podman system integration test - as root
⚡⚡⚡ Podman system integration test - as user
⚡⚡⚡ LTP
⚡⚡⚡ Loopdev Sanity
⚡⚡⚡ Memory function: memfd_create
⚡⚡⚡ AMTU (Abstract Machine Test Utility)
⚡⚡⚡ Networking bridge: sanity
⚡⚡⚡ Ethernet drivers sanity
⚡⚡⚡ Networking MACsec: sanity
⚡⚡⚡ Networking socket: fuzz
⚡⚡⚡ Networking sctp-auth: sockopts test
⚡⚡⚡ Networking: igmp conformance test
⚡⚡⚡ Networking route: pmtu
⚡⚡⚡ Networking route_func - local
⚡⚡⚡ Networking route_func - forward
⚡⚡⚡ Networking TCP: keepalive test
⚡⚡⚡ Networking UDP: socket
⚡⚡⚡ Networking tunnel: geneve basic test
⚡⚡⚡ Networking tunnel: gre basic
⚡⚡⚡ L2TP basic test
⚡⚡⚡ Networking tunnel: vxlan basic
⚡⚡⚡ Networking ipsec: basic netns - transport
⚡⚡⚡ Networking ipsec: basic netns - tunnel
⚡⚡⚡ httpd: mod_ssl smoke sanity
⚡⚡⚡ tuned: tune-processes-through-perf
⚡⚡⚡ pciutils: sanity smoke test
⚡⚡⚡ ALSA PCM loopback test
⚡⚡⚡ ALSA Control (mixer) Userspace Element test
⚡⚡⚡ Usex - version 1.9-29
⚡⚡⚡ storage: SCSI VPD
🚧 ⚡⚡⚡ CIFS Connectathon
🚧 ⚡⚡⚡ POSIX pjd-fstest suites
🚧 ⚡⚡⚡ jvm - DaCapo Benchmark Suite
🚧 ⚡⚡⚡ jvm - jcstress tests
🚧 ⚡⚡⚡ Memory function: kaslr
🚧 ⚡⚡⚡ LTP: openposix test suite
🚧 ⚡⚡⚡ Networking vnic: ipvlan/basic
🚧 ⚡⚡⚡ audit: audit testsuite test
🚧 ⚡⚡⚡ iotop: sanity
🚧 ⚡⚡⚡ storage: dm/common
🚧 ⚡⚡⚡ trace: ftrace/tracer
Host 4:
✅ Boot test
✅ Storage SAN device stress - qedf driver
Host 5:
✅ Boot test
✅ Podman system integration test - as root
✅ Podman system integration test - as user
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking MACsec: sanity
✅ Networking socket: fuzz
✅ Networking sctp-auth: sockopts test
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func - local
✅ Networking route_func - forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns - transport
✅ Networking ipsec: basic netns - tunnel
✅ httpd: mod_ssl smoke sanity
✅ tuned: tune-processes-through-perf
✅ pciutils: sanity smoke test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ Usex - version 1.9-29
✅ storage: SCSI VPD
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm - DaCapo Benchmark Suite
🚧 ✅ jvm - jcstress tests
🚧 ✅ Memory function: kaslr
🚧 ✅ LTP: openposix test suite
🚧 ✅ Networking vnic: ipvlan/basic
🚧 ❌ audit: audit testsuite test
🚧 ✅ iotop: sanity
🚧 ✅ storage: dm/common
🚧 ✅ trace: ftrace/tracer
Test sources: https://github.com/CKI-project/tests-beaker
💚 Pull requests are welcome for new tests or improvements to existing tests!
Aborted tests
-------------
Tests that didn't complete running successfully are marked with ⚡⚡⚡.
If this was caused by an infrastructure issue, we try to mark that
explicitly in the report.
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running yet are marked with ⏱.
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 50fe7ebb6475711c15b3397467e6424e20026d94 Mon Sep 17 00:00:00 2001
From: Luke Nelson <lukenels(a)cs.washington.edu>
Date: Wed, 22 Apr 2020 10:36:30 -0700
Subject: [PATCH] bpf, x86_32: Fix clobbering of dst for BPF_JSET
The current JIT clobbers the destination register for BPF_JSET BPF_X
and BPF_K by using "and" and "or" instructions. This is fine when the
destination register is a temporary loaded from a register stored on
the stack but not otherwise.
This patch fixes the problem (for both BPF_K and BPF_X) by always loading
the destination register into temporaries since BPF_JSET should not
modify the destination register.
This bug may not be currently triggerable as BPF_REG_AX is the only
register not stored on the stack and the verifier uses it in a limited
way.
Fixes: 03f5781be2c7b ("bpf, x86_32: add eBPF JIT compiler for ia32")
Signed-off-by: Xi Wang <xi.wang(a)gmail.com>
Signed-off-by: Luke Nelson <luke.r.nels(a)gmail.com>
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
Acked-by: Wang YanQing <udknight(a)gmail.com>
Link: https://lore.kernel.org/bpf/20200422173630.8351-2-luke.r.nels@gmail.com
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index cc9ad3892ea6..ba7d9ccfc662 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -2015,8 +2015,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_JMP | BPF_JSET | BPF_X:
case BPF_JMP32 | BPF_JSET | BPF_X: {
bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
- u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
- u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+ u8 dreg_lo = IA32_EAX;
+ u8 dreg_hi = IA32_EDX;
u8 sreg_lo = sstk ? IA32_ECX : src_lo;
u8 sreg_hi = sstk ? IA32_EBX : src_hi;
@@ -2028,6 +2028,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
add_2reg(0x40, IA32_EBP,
IA32_EDX),
STACK_VAR(dst_hi));
+ } else {
+ /* mov dreg_lo,dst_lo */
+ EMIT2(0x89, add_2reg(0xC0, dreg_lo, dst_lo));
+ if (is_jmp64)
+ /* mov dreg_hi,dst_hi */
+ EMIT2(0x89,
+ add_2reg(0xC0, dreg_hi, dst_hi));
}
if (sstk) {
@@ -2052,8 +2059,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_JMP | BPF_JSET | BPF_K:
case BPF_JMP32 | BPF_JSET | BPF_K: {
bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
- u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
- u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+ u8 dreg_lo = IA32_EAX;
+ u8 dreg_hi = IA32_EDX;
u8 sreg_lo = IA32_ECX;
u8 sreg_hi = IA32_EBX;
u32 hi;
@@ -2066,6 +2073,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
add_2reg(0x40, IA32_EBP,
IA32_EDX),
STACK_VAR(dst_hi));
+ } else {
+ /* mov dreg_lo,dst_lo */
+ EMIT2(0x89, add_2reg(0xC0, dreg_lo, dst_lo));
+ if (is_jmp64)
+ /* mov dreg_hi,dst_hi */
+ EMIT2(0x89,
+ add_2reg(0xC0, dreg_hi, dst_hi));
}
/* mov ecx,imm32 */
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From aee194b14dd2b2bde6252b3acf57d36dccfc743a Mon Sep 17 00:00:00 2001
From: Luke Nelson <lukenels(a)cs.washington.edu>
Date: Sat, 18 Apr 2020 16:26:53 -0700
Subject: [PATCH] bpf, x86: Fix encoding for lower 8-bit registers in BPF_STX
BPF_B
This patch fixes an encoding bug in emit_stx for BPF_B when the source
register is BPF_REG_FP.
The current implementation for BPF_STX BPF_B in emit_stx saves one REX
byte when the operands can be encoded using Mod-R/M alone. The lower 8
bits of registers %rax, %rbx, %rcx, and %rdx can be accessed without using
a REX prefix via %al, %bl, %cl, and %dl, respectively. Other registers,
(e.g., %rsi, %rdi, %rbp, %rsp) require a REX prefix to use their 8-bit
equivalents (%sil, %dil, %bpl, %spl).
The current code checks if the source for BPF_STX BPF_B is BPF_REG_1
or BPF_REG_2 (which map to %rdi and %rsi), in which case it emits the
required REX prefix. However, it misses the case when the source is
BPF_REG_FP (mapped to %rbp).
The result is that BPF_STX BPF_B with BPF_REG_FP as the source operand
will read from register %ch instead of the correct %bpl. This patch fixes
the problem by fixing and refactoring the check on which registers need
the extra REX byte. Since no BPF registers map to %rsp, there is no need
to handle %spl.
Fixes: 622582786c9e0 ("net: filter: x86: internal BPF JIT")
Signed-off-by: Xi Wang <xi.wang(a)gmail.com>
Signed-off-by: Luke Nelson <luke.r.nels(a)gmail.com>
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
Link: https://lore.kernel.org/bpf/20200418232655.23870-1-luke.r.nels@gmail.com
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5ea7c2cf7ab4..42b6709e6dc7 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -158,6 +158,19 @@ static bool is_ereg(u32 reg)
BIT(BPF_REG_AX));
}
+/*
+ * is_ereg_8l() == true if BPF register 'reg' is mapped to access x86-64
+ * lower 8-bit registers dil,sil,bpl,spl,r8b..r15b, which need extra byte
+ * of encoding. al,cl,dl,bl have simpler encoding.
+ */
+static bool is_ereg_8l(u32 reg)
+{
+ return is_ereg(reg) ||
+ (1 << reg) & (BIT(BPF_REG_1) |
+ BIT(BPF_REG_2) |
+ BIT(BPF_REG_FP));
+}
+
static bool is_axreg(u32 reg)
{
return reg == BPF_REG_0;
@@ -598,9 +611,8 @@ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
switch (size) {
case BPF_B:
/* Emit 'mov byte ptr [rax + off], al' */
- if (is_ereg(dst_reg) || is_ereg(src_reg) ||
- /* We have to add extra byte for x86 SIL, DIL regs */
- src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
+ if (is_ereg(dst_reg) || is_ereg_8l(src_reg))
+ /* Add extra byte for eregs or SIL,DIL,BPL in src_reg */
EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
else
EMIT1(0x88);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From aee194b14dd2b2bde6252b3acf57d36dccfc743a Mon Sep 17 00:00:00 2001
From: Luke Nelson <lukenels(a)cs.washington.edu>
Date: Sat, 18 Apr 2020 16:26:53 -0700
Subject: [PATCH] bpf, x86: Fix encoding for lower 8-bit registers in BPF_STX
BPF_B
This patch fixes an encoding bug in emit_stx for BPF_B when the source
register is BPF_REG_FP.
The current implementation for BPF_STX BPF_B in emit_stx saves one REX
byte when the operands can be encoded using Mod-R/M alone. The lower 8
bits of registers %rax, %rbx, %rcx, and %rdx can be accessed without using
a REX prefix via %al, %bl, %cl, and %dl, respectively. Other registers,
(e.g., %rsi, %rdi, %rbp, %rsp) require a REX prefix to use their 8-bit
equivalents (%sil, %dil, %bpl, %spl).
The current code checks if the source for BPF_STX BPF_B is BPF_REG_1
or BPF_REG_2 (which map to %rdi and %rsi), in which case it emits the
required REX prefix. However, it misses the case when the source is
BPF_REG_FP (mapped to %rbp).
The result is that BPF_STX BPF_B with BPF_REG_FP as the source operand
will read from register %ch instead of the correct %bpl. This patch fixes
the problem by fixing and refactoring the check on which registers need
the extra REX byte. Since no BPF registers map to %rsp, there is no need
to handle %spl.
Fixes: 622582786c9e0 ("net: filter: x86: internal BPF JIT")
Signed-off-by: Xi Wang <xi.wang(a)gmail.com>
Signed-off-by: Luke Nelson <luke.r.nels(a)gmail.com>
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
Link: https://lore.kernel.org/bpf/20200418232655.23870-1-luke.r.nels@gmail.com
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5ea7c2cf7ab4..42b6709e6dc7 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -158,6 +158,19 @@ static bool is_ereg(u32 reg)
BIT(BPF_REG_AX));
}
+/*
+ * is_ereg_8l() == true if BPF register 'reg' is mapped to access x86-64
+ * lower 8-bit registers dil,sil,bpl,spl,r8b..r15b, which need extra byte
+ * of encoding. al,cl,dl,bl have simpler encoding.
+ */
+static bool is_ereg_8l(u32 reg)
+{
+ return is_ereg(reg) ||
+ (1 << reg) & (BIT(BPF_REG_1) |
+ BIT(BPF_REG_2) |
+ BIT(BPF_REG_FP));
+}
+
static bool is_axreg(u32 reg)
{
return reg == BPF_REG_0;
@@ -598,9 +611,8 @@ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
switch (size) {
case BPF_B:
/* Emit 'mov byte ptr [rax + off], al' */
- if (is_ereg(dst_reg) || is_ereg(src_reg) ||
- /* We have to add extra byte for x86 SIL, DIL regs */
- src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
+ if (is_ereg(dst_reg) || is_ereg_8l(src_reg))
+ /* Add extra byte for eregs or SIL,DIL,BPL in src_reg */
EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
else
EMIT1(0x88);
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From aee194b14dd2b2bde6252b3acf57d36dccfc743a Mon Sep 17 00:00:00 2001
From: Luke Nelson <lukenels(a)cs.washington.edu>
Date: Sat, 18 Apr 2020 16:26:53 -0700
Subject: [PATCH] bpf, x86: Fix encoding for lower 8-bit registers in BPF_STX
BPF_B
This patch fixes an encoding bug in emit_stx for BPF_B when the source
register is BPF_REG_FP.
The current implementation for BPF_STX BPF_B in emit_stx saves one REX
byte when the operands can be encoded using Mod-R/M alone. The lower 8
bits of registers %rax, %rbx, %rcx, and %rdx can be accessed without using
a REX prefix via %al, %bl, %cl, and %dl, respectively. Other registers,
(e.g., %rsi, %rdi, %rbp, %rsp) require a REX prefix to use their 8-bit
equivalents (%sil, %dil, %bpl, %spl).
The current code checks if the source for BPF_STX BPF_B is BPF_REG_1
or BPF_REG_2 (which map to %rdi and %rsi), in which case it emits the
required REX prefix. However, it misses the case when the source is
BPF_REG_FP (mapped to %rbp).
The result is that BPF_STX BPF_B with BPF_REG_FP as the source operand
will read from register %ch instead of the correct %bpl. This patch fixes
the problem by fixing and refactoring the check on which registers need
the extra REX byte. Since no BPF registers map to %rsp, there is no need
to handle %spl.
Fixes: 622582786c9e0 ("net: filter: x86: internal BPF JIT")
Signed-off-by: Xi Wang <xi.wang(a)gmail.com>
Signed-off-by: Luke Nelson <luke.r.nels(a)gmail.com>
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
Link: https://lore.kernel.org/bpf/20200418232655.23870-1-luke.r.nels@gmail.com
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5ea7c2cf7ab4..42b6709e6dc7 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -158,6 +158,19 @@ static bool is_ereg(u32 reg)
BIT(BPF_REG_AX));
}
+/*
+ * is_ereg_8l() == true if BPF register 'reg' is mapped to access x86-64
+ * lower 8-bit registers dil,sil,bpl,spl,r8b..r15b, which need extra byte
+ * of encoding. al,cl,dl,bl have simpler encoding.
+ */
+static bool is_ereg_8l(u32 reg)
+{
+ return is_ereg(reg) ||
+ (1 << reg) & (BIT(BPF_REG_1) |
+ BIT(BPF_REG_2) |
+ BIT(BPF_REG_FP));
+}
+
static bool is_axreg(u32 reg)
{
return reg == BPF_REG_0;
@@ -598,9 +611,8 @@ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
switch (size) {
case BPF_B:
/* Emit 'mov byte ptr [rax + off], al' */
- if (is_ereg(dst_reg) || is_ereg(src_reg) ||
- /* We have to add extra byte for x86 SIL, DIL regs */
- src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
+ if (is_ereg(dst_reg) || is_ereg_8l(src_reg))
+ /* Add extra byte for eregs or SIL,DIL,BPL in src_reg */
EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
else
EMIT1(0x88);
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From aee194b14dd2b2bde6252b3acf57d36dccfc743a Mon Sep 17 00:00:00 2001
From: Luke Nelson <lukenels(a)cs.washington.edu>
Date: Sat, 18 Apr 2020 16:26:53 -0700
Subject: [PATCH] bpf, x86: Fix encoding for lower 8-bit registers in BPF_STX
BPF_B
This patch fixes an encoding bug in emit_stx for BPF_B when the source
register is BPF_REG_FP.
The current implementation for BPF_STX BPF_B in emit_stx saves one REX
byte when the operands can be encoded using Mod-R/M alone. The lower 8
bits of registers %rax, %rbx, %rcx, and %rdx can be accessed without using
a REX prefix via %al, %bl, %cl, and %dl, respectively. Other registers,
(e.g., %rsi, %rdi, %rbp, %rsp) require a REX prefix to use their 8-bit
equivalents (%sil, %dil, %bpl, %spl).
The current code checks if the source for BPF_STX BPF_B is BPF_REG_1
or BPF_REG_2 (which map to %rdi and %rsi), in which case it emits the
required REX prefix. However, it misses the case when the source is
BPF_REG_FP (mapped to %rbp).
The result is that BPF_STX BPF_B with BPF_REG_FP as the source operand
will read from register %ch instead of the correct %bpl. This patch fixes
the problem by fixing and refactoring the check on which registers need
the extra REX byte. Since no BPF registers map to %rsp, there is no need
to handle %spl.
Fixes: 622582786c9e0 ("net: filter: x86: internal BPF JIT")
Signed-off-by: Xi Wang <xi.wang(a)gmail.com>
Signed-off-by: Luke Nelson <luke.r.nels(a)gmail.com>
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
Link: https://lore.kernel.org/bpf/20200418232655.23870-1-luke.r.nels@gmail.com
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5ea7c2cf7ab4..42b6709e6dc7 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -158,6 +158,19 @@ static bool is_ereg(u32 reg)
BIT(BPF_REG_AX));
}
+/*
+ * is_ereg_8l() == true if BPF register 'reg' is mapped to access x86-64
+ * lower 8-bit registers dil,sil,bpl,spl,r8b..r15b, which need extra byte
+ * of encoding. al,cl,dl,bl have simpler encoding.
+ */
+static bool is_ereg_8l(u32 reg)
+{
+ return is_ereg(reg) ||
+ (1 << reg) & (BIT(BPF_REG_1) |
+ BIT(BPF_REG_2) |
+ BIT(BPF_REG_FP));
+}
+
static bool is_axreg(u32 reg)
{
return reg == BPF_REG_0;
@@ -598,9 +611,8 @@ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
switch (size) {
case BPF_B:
/* Emit 'mov byte ptr [rax + off], al' */
- if (is_ereg(dst_reg) || is_ereg(src_reg) ||
- /* We have to add extra byte for x86 SIL, DIL regs */
- src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
+ if (is_ereg(dst_reg) || is_ereg_8l(src_reg))
+ /* Add extra byte for eregs or SIL,DIL,BPL in src_reg */
EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
else
EMIT1(0x88);
From: YueHaibing <yuehaibing(a)huawei.com>
[ Upstream commit 28535877ac5b2b84f0d394fd67a5ec71c0c48b10 ]
It should use ad7797_attribute_group in ad7797_info,
according to commit ("iio:ad7793: Add support for the ad7796 and ad7797").
Scale is fixed for the ad7796 and not programmable, hence
should not have the scale_available attribute.
Fixes: fd1a8b912841 ("iio:ad7793: Add support for the ad7796 and ad7797")
Signed-off-by: YueHaibing <yuehaibing(a)huawei.com>
Reviewed-by: Lars-Peter Clausen <lars(a)metafoo.de>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/iio/adc/ad7793.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iio/adc/ad7793.c b/drivers/iio/adc/ad7793.c
index d4bbe5b533189..23a6e7baa396b 100644
--- a/drivers/iio/adc/ad7793.c
+++ b/drivers/iio/adc/ad7793.c
@@ -542,7 +542,7 @@ static const struct iio_info ad7797_info = {
.read_raw = &ad7793_read_raw,
.write_raw = &ad7793_write_raw,
.write_raw_get_fmt = &ad7793_write_raw_get_fmt,
- .attrs = &ad7793_attribute_group,
+ .attrs = &ad7797_attribute_group,
.validate_trigger = ad_sd_validate_trigger,
};
--
2.20.1
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 6e7e63cbb023976d828cdb22422606bf77baa8a9 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Fri, 17 Apr 2020 02:00:06 +0200
Subject: [PATCH] bpf: Forbid XADD on spilled pointers for unprivileged users
When check_xadd() verifies an XADD operation on a pointer to a stack slot
containing a spilled pointer, check_stack_read() verifies that the read,
which is part of XADD, is valid. However, since the placeholder value -1 is
passed as `value_regno`, check_stack_read() can only return a binary
decision and can't return the type of the value that was read. The intent
here is to verify whether the value read from the stack slot may be used as
a SCALAR_VALUE; but since check_stack_read() doesn't check the type, and
the type information is lost when check_stack_read() returns, this is not
enforced, and a malicious user can abuse XADD to leak spilled kernel
pointers.
Fix it by letting check_stack_read() verify that the value is usable as a
SCALAR_VALUE if no type information is passed to the caller.
To be able to use __is_pointer_value() in check_stack_read(), move it up.
Fix up the expected unprivileged error message for a BPF selftest that,
until now, assumed that unprivileged users can use XADD on stack-spilled
pointers. This also gives us a test for the behavior introduced in this
patch for free.
In theory, this could also be fixed by forbidding XADD on stack spills
entirely, since XADD is a locked operation (for operations on memory with
concurrency) and there can't be any concurrency on the BPF stack; but
Alexei has said that he wants to keep XADD on stack slots working to avoid
changes to the test suite [1].
The following BPF program demonstrates how to leak a BPF map pointer as an
unprivileged user using this bug:
// r7 = map_pointer
BPF_LD_MAP_FD(BPF_REG_7, small_map),
// r8 = launder(map_pointer)
BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_7, -8),
BPF_MOV64_IMM(BPF_REG_1, 0),
((struct bpf_insn) {
.code = BPF_STX | BPF_DW | BPF_XADD,
.dst_reg = BPF_REG_FP,
.src_reg = BPF_REG_1,
.off = -8
}),
BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_FP, -8),
// store r8 into map
BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_7),
BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -4),
BPF_ST_MEM(BPF_W, BPF_REG_ARG2, 0, 0),
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_8, 0),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN()
[1] https://lore.kernel.org/bpf/20200416211116.qxqcza5vo2ddnkdq@ast-mbp.dhcp.th…
Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)")
Signed-off-by: Jann Horn <jannh(a)google.com>
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
Link: https://lore.kernel.org/bpf/20200417000007.10734-1-jannh@google.com
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 38cfcf701eeb..9e92d3d5ffd1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2118,6 +2118,15 @@ static bool register_is_const(struct bpf_reg_state *reg)
return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
}
+static bool __is_pointer_value(bool allow_ptr_leaks,
+ const struct bpf_reg_state *reg)
+{
+ if (allow_ptr_leaks)
+ return false;
+
+ return reg->type != SCALAR_VALUE;
+}
+
static void save_register_state(struct bpf_func_state *state,
int spi, struct bpf_reg_state *reg)
{
@@ -2308,6 +2317,16 @@ static int check_stack_read(struct bpf_verifier_env *env,
* which resets stack/reg liveness for state transitions
*/
state->regs[value_regno].live |= REG_LIVE_WRITTEN;
+ } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
+ /* If value_regno==-1, the caller is asking us whether
+ * it is acceptable to use this value as a SCALAR_VALUE
+ * (e.g. for XADD).
+ * We must not allow unprivileged callers to do that
+ * with spilled pointers.
+ */
+ verbose(env, "leaking pointer from stack off %d\n",
+ off);
+ return -EACCES;
}
mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
} else {
@@ -2673,15 +2692,6 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
return -EACCES;
}
-static bool __is_pointer_value(bool allow_ptr_leaks,
- const struct bpf_reg_state *reg)
-{
- if (allow_ptr_leaks)
- return false;
-
- return reg->type != SCALAR_VALUE;
-}
-
static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
{
return cur_regs(env) + regno;
diff --git a/tools/testing/selftests/bpf/verifier/value_illegal_alu.c b/tools/testing/selftests/bpf/verifier/value_illegal_alu.c
index 7f6c232cd842..ed1c2cea1dea 100644
--- a/tools/testing/selftests/bpf/verifier/value_illegal_alu.c
+++ b/tools/testing/selftests/bpf/verifier/value_illegal_alu.c
@@ -88,6 +88,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
+ .errstr_unpriv = "leaking pointer from stack off -8",
.errstr = "R0 invalid mem access 'inv'",
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 6e7e63cbb023976d828cdb22422606bf77baa8a9 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Fri, 17 Apr 2020 02:00:06 +0200
Subject: [PATCH] bpf: Forbid XADD on spilled pointers for unprivileged users
When check_xadd() verifies an XADD operation on a pointer to a stack slot
containing a spilled pointer, check_stack_read() verifies that the read,
which is part of XADD, is valid. However, since the placeholder value -1 is
passed as `value_regno`, check_stack_read() can only return a binary
decision and can't return the type of the value that was read. The intent
here is to verify whether the value read from the stack slot may be used as
a SCALAR_VALUE; but since check_stack_read() doesn't check the type, and
the type information is lost when check_stack_read() returns, this is not
enforced, and a malicious user can abuse XADD to leak spilled kernel
pointers.
Fix it by letting check_stack_read() verify that the value is usable as a
SCALAR_VALUE if no type information is passed to the caller.
To be able to use __is_pointer_value() in check_stack_read(), move it up.
Fix up the expected unprivileged error message for a BPF selftest that,
until now, assumed that unprivileged users can use XADD on stack-spilled
pointers. This also gives us a test for the behavior introduced in this
patch for free.
In theory, this could also be fixed by forbidding XADD on stack spills
entirely, since XADD is a locked operation (for operations on memory with
concurrency) and there can't be any concurrency on the BPF stack; but
Alexei has said that he wants to keep XADD on stack slots working to avoid
changes to the test suite [1].
The following BPF program demonstrates how to leak a BPF map pointer as an
unprivileged user using this bug:
// r7 = map_pointer
BPF_LD_MAP_FD(BPF_REG_7, small_map),
// r8 = launder(map_pointer)
BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_7, -8),
BPF_MOV64_IMM(BPF_REG_1, 0),
((struct bpf_insn) {
.code = BPF_STX | BPF_DW | BPF_XADD,
.dst_reg = BPF_REG_FP,
.src_reg = BPF_REG_1,
.off = -8
}),
BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_FP, -8),
// store r8 into map
BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_7),
BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -4),
BPF_ST_MEM(BPF_W, BPF_REG_ARG2, 0, 0),
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_8, 0),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN()
[1] https://lore.kernel.org/bpf/20200416211116.qxqcza5vo2ddnkdq@ast-mbp.dhcp.th…
Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)")
Signed-off-by: Jann Horn <jannh(a)google.com>
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
Link: https://lore.kernel.org/bpf/20200417000007.10734-1-jannh@google.com
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 38cfcf701eeb..9e92d3d5ffd1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2118,6 +2118,15 @@ static bool register_is_const(struct bpf_reg_state *reg)
return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
}
+static bool __is_pointer_value(bool allow_ptr_leaks,
+ const struct bpf_reg_state *reg)
+{
+ if (allow_ptr_leaks)
+ return false;
+
+ return reg->type != SCALAR_VALUE;
+}
+
static void save_register_state(struct bpf_func_state *state,
int spi, struct bpf_reg_state *reg)
{
@@ -2308,6 +2317,16 @@ static int check_stack_read(struct bpf_verifier_env *env,
* which resets stack/reg liveness for state transitions
*/
state->regs[value_regno].live |= REG_LIVE_WRITTEN;
+ } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
+ /* If value_regno==-1, the caller is asking us whether
+ * it is acceptable to use this value as a SCALAR_VALUE
+ * (e.g. for XADD).
+ * We must not allow unprivileged callers to do that
+ * with spilled pointers.
+ */
+ verbose(env, "leaking pointer from stack off %d\n",
+ off);
+ return -EACCES;
}
mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
} else {
@@ -2673,15 +2692,6 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
return -EACCES;
}
-static bool __is_pointer_value(bool allow_ptr_leaks,
- const struct bpf_reg_state *reg)
-{
- if (allow_ptr_leaks)
- return false;
-
- return reg->type != SCALAR_VALUE;
-}
-
static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
{
return cur_regs(env) + regno;
diff --git a/tools/testing/selftests/bpf/verifier/value_illegal_alu.c b/tools/testing/selftests/bpf/verifier/value_illegal_alu.c
index 7f6c232cd842..ed1c2cea1dea 100644
--- a/tools/testing/selftests/bpf/verifier/value_illegal_alu.c
+++ b/tools/testing/selftests/bpf/verifier/value_illegal_alu.c
@@ -88,6 +88,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
+ .errstr_unpriv = "leaking pointer from stack off -8",
.errstr = "R0 invalid mem access 'inv'",
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 6e7e63cbb023976d828cdb22422606bf77baa8a9 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Fri, 17 Apr 2020 02:00:06 +0200
Subject: [PATCH] bpf: Forbid XADD on spilled pointers for unprivileged users
When check_xadd() verifies an XADD operation on a pointer to a stack slot
containing a spilled pointer, check_stack_read() verifies that the read,
which is part of XADD, is valid. However, since the placeholder value -1 is
passed as `value_regno`, check_stack_read() can only return a binary
decision and can't return the type of the value that was read. The intent
here is to verify whether the value read from the stack slot may be used as
a SCALAR_VALUE; but since check_stack_read() doesn't check the type, and
the type information is lost when check_stack_read() returns, this is not
enforced, and a malicious user can abuse XADD to leak spilled kernel
pointers.
Fix it by letting check_stack_read() verify that the value is usable as a
SCALAR_VALUE if no type information is passed to the caller.
To be able to use __is_pointer_value() in check_stack_read(), move it up.
Fix up the expected unprivileged error message for a BPF selftest that,
until now, assumed that unprivileged users can use XADD on stack-spilled
pointers. This also gives us a test for the behavior introduced in this
patch for free.
In theory, this could also be fixed by forbidding XADD on stack spills
entirely, since XADD is a locked operation (for operations on memory with
concurrency) and there can't be any concurrency on the BPF stack; but
Alexei has said that he wants to keep XADD on stack slots working to avoid
changes to the test suite [1].
The following BPF program demonstrates how to leak a BPF map pointer as an
unprivileged user using this bug:
// r7 = map_pointer
BPF_LD_MAP_FD(BPF_REG_7, small_map),
// r8 = launder(map_pointer)
BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_7, -8),
BPF_MOV64_IMM(BPF_REG_1, 0),
((struct bpf_insn) {
.code = BPF_STX | BPF_DW | BPF_XADD,
.dst_reg = BPF_REG_FP,
.src_reg = BPF_REG_1,
.off = -8
}),
BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_FP, -8),
// store r8 into map
BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_7),
BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -4),
BPF_ST_MEM(BPF_W, BPF_REG_ARG2, 0, 0),
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_8, 0),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN()
[1] https://lore.kernel.org/bpf/20200416211116.qxqcza5vo2ddnkdq@ast-mbp.dhcp.th…
Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)")
Signed-off-by: Jann Horn <jannh(a)google.com>
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
Link: https://lore.kernel.org/bpf/20200417000007.10734-1-jannh@google.com
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 38cfcf701eeb..9e92d3d5ffd1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2118,6 +2118,15 @@ static bool register_is_const(struct bpf_reg_state *reg)
return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
}
+static bool __is_pointer_value(bool allow_ptr_leaks,
+ const struct bpf_reg_state *reg)
+{
+ if (allow_ptr_leaks)
+ return false;
+
+ return reg->type != SCALAR_VALUE;
+}
+
static void save_register_state(struct bpf_func_state *state,
int spi, struct bpf_reg_state *reg)
{
@@ -2308,6 +2317,16 @@ static int check_stack_read(struct bpf_verifier_env *env,
* which resets stack/reg liveness for state transitions
*/
state->regs[value_regno].live |= REG_LIVE_WRITTEN;
+ } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
+ /* If value_regno==-1, the caller is asking us whether
+ * it is acceptable to use this value as a SCALAR_VALUE
+ * (e.g. for XADD).
+ * We must not allow unprivileged callers to do that
+ * with spilled pointers.
+ */
+ verbose(env, "leaking pointer from stack off %d\n",
+ off);
+ return -EACCES;
}
mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
} else {
@@ -2673,15 +2692,6 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
return -EACCES;
}
-static bool __is_pointer_value(bool allow_ptr_leaks,
- const struct bpf_reg_state *reg)
-{
- if (allow_ptr_leaks)
- return false;
-
- return reg->type != SCALAR_VALUE;
-}
-
static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
{
return cur_regs(env) + regno;
diff --git a/tools/testing/selftests/bpf/verifier/value_illegal_alu.c b/tools/testing/selftests/bpf/verifier/value_illegal_alu.c
index 7f6c232cd842..ed1c2cea1dea 100644
--- a/tools/testing/selftests/bpf/verifier/value_illegal_alu.c
+++ b/tools/testing/selftests/bpf/verifier/value_illegal_alu.c
@@ -88,6 +88,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
+ .errstr_unpriv = "leaking pointer from stack off -8",
.errstr = "R0 invalid mem access 'inv'",
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 6e7e63cbb023976d828cdb22422606bf77baa8a9 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Fri, 17 Apr 2020 02:00:06 +0200
Subject: [PATCH] bpf: Forbid XADD on spilled pointers for unprivileged users
When check_xadd() verifies an XADD operation on a pointer to a stack slot
containing a spilled pointer, check_stack_read() verifies that the read,
which is part of XADD, is valid. However, since the placeholder value -1 is
passed as `value_regno`, check_stack_read() can only return a binary
decision and can't return the type of the value that was read. The intent
here is to verify whether the value read from the stack slot may be used as
a SCALAR_VALUE; but since check_stack_read() doesn't check the type, and
the type information is lost when check_stack_read() returns, this is not
enforced, and a malicious user can abuse XADD to leak spilled kernel
pointers.
Fix it by letting check_stack_read() verify that the value is usable as a
SCALAR_VALUE if no type information is passed to the caller.
To be able to use __is_pointer_value() in check_stack_read(), move it up.
Fix up the expected unprivileged error message for a BPF selftest that,
until now, assumed that unprivileged users can use XADD on stack-spilled
pointers. This also gives us a test for the behavior introduced in this
patch for free.
In theory, this could also be fixed by forbidding XADD on stack spills
entirely, since XADD is a locked operation (for operations on memory with
concurrency) and there can't be any concurrency on the BPF stack; but
Alexei has said that he wants to keep XADD on stack slots working to avoid
changes to the test suite [1].
The following BPF program demonstrates how to leak a BPF map pointer as an
unprivileged user using this bug:
// r7 = map_pointer
BPF_LD_MAP_FD(BPF_REG_7, small_map),
// r8 = launder(map_pointer)
BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_7, -8),
BPF_MOV64_IMM(BPF_REG_1, 0),
((struct bpf_insn) {
.code = BPF_STX | BPF_DW | BPF_XADD,
.dst_reg = BPF_REG_FP,
.src_reg = BPF_REG_1,
.off = -8
}),
BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_FP, -8),
// store r8 into map
BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_7),
BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -4),
BPF_ST_MEM(BPF_W, BPF_REG_ARG2, 0, 0),
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_8, 0),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN()
[1] https://lore.kernel.org/bpf/20200416211116.qxqcza5vo2ddnkdq@ast-mbp.dhcp.th…
Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)")
Signed-off-by: Jann Horn <jannh(a)google.com>
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
Link: https://lore.kernel.org/bpf/20200417000007.10734-1-jannh@google.com
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 38cfcf701eeb..9e92d3d5ffd1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2118,6 +2118,15 @@ static bool register_is_const(struct bpf_reg_state *reg)
return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
}
+static bool __is_pointer_value(bool allow_ptr_leaks,
+ const struct bpf_reg_state *reg)
+{
+ if (allow_ptr_leaks)
+ return false;
+
+ return reg->type != SCALAR_VALUE;
+}
+
static void save_register_state(struct bpf_func_state *state,
int spi, struct bpf_reg_state *reg)
{
@@ -2308,6 +2317,16 @@ static int check_stack_read(struct bpf_verifier_env *env,
* which resets stack/reg liveness for state transitions
*/
state->regs[value_regno].live |= REG_LIVE_WRITTEN;
+ } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
+ /* If value_regno==-1, the caller is asking us whether
+ * it is acceptable to use this value as a SCALAR_VALUE
+ * (e.g. for XADD).
+ * We must not allow unprivileged callers to do that
+ * with spilled pointers.
+ */
+ verbose(env, "leaking pointer from stack off %d\n",
+ off);
+ return -EACCES;
}
mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
} else {
@@ -2673,15 +2692,6 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
return -EACCES;
}
-static bool __is_pointer_value(bool allow_ptr_leaks,
- const struct bpf_reg_state *reg)
-{
- if (allow_ptr_leaks)
- return false;
-
- return reg->type != SCALAR_VALUE;
-}
-
static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
{
return cur_regs(env) + regno;
diff --git a/tools/testing/selftests/bpf/verifier/value_illegal_alu.c b/tools/testing/selftests/bpf/verifier/value_illegal_alu.c
index 7f6c232cd842..ed1c2cea1dea 100644
--- a/tools/testing/selftests/bpf/verifier/value_illegal_alu.c
+++ b/tools/testing/selftests/bpf/verifier/value_illegal_alu.c
@@ -88,6 +88,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
+ .errstr_unpriv = "leaking pointer from stack off -8",
.errstr = "R0 invalid mem access 'inv'",
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
Hi,
Please consider applying the following patches to the listed stable releases.
The following patches were found to be missing in stable releases by the
Chrome OS missing patch robot. The patches meet the following criteria.
- The patch includes a Fixes: tag
- The patch referenced in the Fixes: tag has been applied to the listed
stable release
- The patch has not been applied to that stable release
All patches have been applied to the listed stable releases and to at least one
Chrome OS branch. Resulting images have been build- and runtime-tested (where
applicable) on real hardware and with virtual hardware on kerneltests.org.
Thanks,
Guenter
---
Upstream commit a8dd397903a6 ("sctp: use right member as the param of list_for_each_entry")
upstream: v4.15-rc2
Fixes: d04adf1b3551 ("sctp: reset owner sk for data chunks on out queues when migrating a sock")
in linux-4.4.y: 4b5bb7723da1
in linux-4.9.y: b89fc6a5caff
upstream: v4.14-rc7
Affected branches:
linux-4.4.y
linux-4.9.y (already applied)
linux-4.14.y (already applied)
i
Upstream commit 2d84a2d19b61 ("fuse: fix possibly missed wake-up after abort")
upstream: v4.20-rc3
Fixes: b8f95e5d13f5 ("fuse: umount should wait for all requests")
in linux-4.4.y: 4d6ef17a060c
in linux-4.9.y: 6465d7688c2d
in linux-4.14.y: 973206923812
upstream: v4.19-rc1
Affected branches:
linux-4.4.y
linux-4.9.y
linux-4.14.y (already applied)
linux-4.19.y (already applied)
Upstream commit d9b8a67b3b95 ("mtd: cfi: fix deadloop in cfi_cmdset_0002.c do_write_buffer")
upstream: v5.1-rc4
Fixes: dfeae1073583 ("mtd: cfi_cmdset_0002: Change write buffer to check correct value")
in linux-4.4.y: 5069cd50117a
in linux-4.9.y: e9dc5dce0925
in linux-4.14.y: 746c1362c434
upstream: v4.18-rc1
Affected branches:
linux-4.4.y
linux-4.9.y
linux-4.14.y
linux-4.19.y
Upstream commit 467d12f5c784 ("include/uapi/linux/swab.h: fix userspace breakage, use __BITS_PER_LONG for swap")
upstream: v5.6-rc3
Fixes: d5767057c9a7 ("uapi: rename ext2_swab() to swab() and share globally in swab.h")
in linux-4.14.y: 4eb9d5bc7065
in linux-4.19.y: 9af535dc019e
in linux-5.4.y: 8b0f08036659
in linux-5.5.y: b83fb35b0300
upstream: v5.6-rc1
Affected branches:
linux-4.14.y
linux-4.19.y
linux-5.4.y (already applied)
Upstream commit 60d488571083 ("binder: take read mode of mmap_sem in binder_alloc_free_page()")
upstream: v5.2-rc1
Fixes: 5cec2d2e5839 ("binder: fix race between munmap() and direct reclaim")
in linux-4.14.y: c2a035d7822a
in linux-4.19.y: 9d57cfd4e9d8
upstream: v5.1-rc3
Affected branches:
linux-4.14.y
linux-4.19.y
Upstream commit e2bcb65782f9 ("ASoC: stm32: sai: fix sai probe")
upstream: v5.7-rc3
Fixes: 0d6defc7e0e4 ("ASoC: stm32: sai: manage rebind issue")
in linux-5.4.y: af7dd05d7c8f
in linux-5.5.y: 1ab75b0342d2
upstream: v5.6-rc5
Affected branches:
linux-5.4.y
linux-5.6.y
During some scenarios mmc_sdio_init_card() runs a retry path for the UHS-I
specific initialization, which leads to removal of the previously allocated
card. A new card is then re-allocated while retrying.
However, in one of the corresponding error paths we may end up to remove an
already removed card, which likely leads to a NULL pointer exception. So,
let's fix this.
Fixes: 5fc3d80ef496 ("mmc: sdio: don't use rocr to check if the card could support UHS mode")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
---
drivers/mmc/core/sdio.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index ebb387aa5158..d35679e6e6aa 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -718,9 +718,8 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr,
/* Retry init sequence, but without R4_18V_PRESENT. */
retries = 0;
goto try_again;
- } else {
- goto remove;
}
+ return err;
}
/*
--
2.20.1
Hi,
commit ab6f762f0f53 ("printk: queue wake_up_klogd irq_work only if per-CPU
areas are ready") fixes a critical problem introduced with commit
1b710b1b10ef ("char/random: silence a lockdep splat with printk()").
Since commit 1b710b1b10ef has been applied to v5.4.y, commit ab6f762f0f53
needs to be applied as well.
An alternative might be to revert commit 1b710b1b10ef from v5.4.y, as
it was done for older kernel branches. However, we found that ab6f762f0f53
applies cleanly to v5.4.y and fixes the problem, so applying the fix
is probably a better solution.
Commit ab6f762f0f53 also needs to be applied to v5.6.y.
Thanks,
Guenter
From: Shubhrajyoti Datta <shubhrajyoti.datta(a)xilinx.com>
Update the console index. Once the serial node is found update it to the
console index.
Fixes: 18cc7ac8a28e ("Revert "serial: uartps: Register own uart console and driver structures"")
Signed-off-by: Shubhrajyoti Datta <shubhrajyoti.datta(a)xilinx.com>
Cc: stable <stable(a)vger.kernel.org>
Signed-off-by: Michal Simek <michal.simek(a)xilinx.com>
---
Greg: Would be good if you can take this patch to 5.7 and also to stable
trees.
---
drivers/tty/serial/xilinx_uartps.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c
index 672cfa075e28..b9d672af8b65 100644
--- a/drivers/tty/serial/xilinx_uartps.c
+++ b/drivers/tty/serial/xilinx_uartps.c
@@ -1465,6 +1465,7 @@ static int cdns_uart_probe(struct platform_device *pdev)
cdns_uart_uart_driver.nr = CDNS_UART_NR_PORTS;
#ifdef CONFIG_SERIAL_XILINX_PS_UART_CONSOLE
cdns_uart_uart_driver.cons = &cdns_uart_console;
+ cdns_uart_console.index = id;
#endif
rc = uart_register_driver(&cdns_uart_uart_driver);
--
2.26.2
Commit a408e4a86b36 ("ima: open a new file instance if no read
permissions") tries to create a new file descriptor to calculate a file
digest if the file has not been opened with O_RDONLY flag. However, if a
new file descriptor cannot be obtained, it sets the FMODE_READ flag to
file->f_flags instead of file->f_mode.
This patch fixes this issue by replacing f_flags with f_mode as it was
before that commit.
Changelog
v1:
- fix comment for f_mode change (suggested by Mimi)
- rename modified_flags variable to modified_mode (suggested by Mimi)
Cc: stable(a)vger.kernel.org # 4.20.x
Fixes: a408e4a86b36 ("ima: open a new file instance if no read permissions")
Signed-off-by: Roberto Sassu <roberto.sassu(a)huawei.com>
---
security/integrity/ima/ima_crypto.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index 5201f5ec2ce4..f3a7f4eb1fc1 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -537,7 +537,7 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash)
loff_t i_size;
int rc;
struct file *f = file;
- bool new_file_instance = false, modified_flags = false;
+ bool new_file_instance = false, modified_mode = false;
/*
* For consistency, fail file's opened with the O_DIRECT flag on
@@ -557,13 +557,13 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash)
f = dentry_open(&file->f_path, flags, file->f_cred);
if (IS_ERR(f)) {
/*
- * Cannot open the file again, lets modify f_flags
+ * Cannot open the file again, lets modify f_mode
* of original and continue
*/
pr_info_ratelimited("Unable to reopen file for reading.\n");
f = file;
- f->f_flags |= FMODE_READ;
- modified_flags = true;
+ f->f_mode |= FMODE_READ;
+ modified_mode = true;
} else {
new_file_instance = true;
}
@@ -581,8 +581,8 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash)
out:
if (new_file_instance)
fput(f);
- else if (modified_flags)
- f->f_flags &= ~FMODE_READ;
+ else if (modified_mode)
+ f->f_mode &= ~FMODE_READ;
return rc;
}
--
2.17.1
----- Ursprüngliche Mail -----
> Von: "John Ogness" <john.ogness(a)linutronix.de>
> An: "richard" <richard(a)nod.at>
> CC: "linux-mtd" <linux-mtd(a)lists.infradead.org>, "linux-kernel" <linux-kernel(a)vger.kernel.org>
> Gesendet: Mittwoch, 29. April 2020 16:56:31
> Betreff: Re: [PATCH] ubifs: Fix ubifs_tnc_lookup() usage in do_kill_orphans()
> Hi Richard,
>
> Could you CC this patch to stable? It fixes a serious problem that I am
> seeing on real devices (i.e. Linux not being able to mount its root
> filesystem after a power cut). Thanks.
Just checked again, better ask stable maintainers. :-)
Stable maintainers, can you please make sure this patch will make it
into stable?
The upstream commit is:
4ab25ac8b2b5 ("ubifs: Fix ubifs_tnc_lookup() usage in do_kill_orphans()")
I always thought havings a Fixes-Tag is enough to make sure it will
get picked up. Isn't this the case?
Thanks,
//richard
> John Ogness
>
> On 2020-01-19, Richard Weinberger <richard(a)nod.at> wrote:
>> Orphans are allowed to point to deleted inodes.
>> So -ENOENT is not a fatal error.
>>
>> Reported-by: Кочетков Максим <fido_max(a)inbox.ru>
>> Reported-and-tested-by: "Christian Berger" <Christian.Berger(a)de.bosch.com>
>> Fixes: ee1438ce5dc4 ("ubifs: Check link count of inodes when killing orphans.")
>> Signed-off-by: Richard Weinberger <richard(a)nod.at>
>> ---
>> fs/ubifs/orphan.c | 4 ++--
>> 1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
>> index 54d6db61106f..2645917360b9 100644
>> --- a/fs/ubifs/orphan.c
>> +++ b/fs/ubifs/orphan.c
>> @@ -688,14 +688,14 @@ static int do_kill_orphans(struct ubifs_info *c, struct
>> ubifs_scan_leb *sleb,
>>
>> ino_key_init(c, &key1, inum);
>> err = ubifs_tnc_lookup(c, &key1, ino);
>> - if (err)
>> + if (err && err != -ENOENT)
>> goto out_free;
>>
>> /*
>> * Check whether an inode can really get deleted.
>> * linkat() with O_TMPFILE allows rebirth of an inode.
>> */
>> - if (ino->nlink == 0) {
>> + if (err == 0 && ino->nlink == 0) {
>> dbg_rcvry("deleting orphaned inode %lu",
> > (unsigned long)inum);
This is a note to let you know that I've just added the patch titled
USB: uas: add quirk for LaCie 2Big Quadra
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From 9f04db234af691007bb785342a06abab5fb34474 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum(a)suse.com>
Date: Wed, 29 Apr 2020 17:52:18 +0200
Subject: USB: uas: add quirk for LaCie 2Big Quadra
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This device needs US_FL_NO_REPORT_OPCODES to avoid going
through prolonged error handling on enumeration.
Signed-off-by: Oliver Neukum <oneukum(a)suse.com>
Reported-by: Julian Groß <julian.g(a)posteo.de>
Cc: stable <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/20200429155218.7308-1-oneukum@suse.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/storage/unusual_uas.h | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h
index 1b23741036ee..37157ed9a881 100644
--- a/drivers/usb/storage/unusual_uas.h
+++ b/drivers/usb/storage/unusual_uas.h
@@ -28,6 +28,13 @@
* and don't forget to CC: the USB development list <linux-usb(a)vger.kernel.org>
*/
+/* Reported-by: Julian Groß <julian.g(a)posteo.de> */
+UNUSUAL_DEV(0x059f, 0x105f, 0x0000, 0x9999,
+ "LaCie",
+ "2Big Quadra USB3",
+ USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+ US_FL_NO_REPORT_OPCODES),
+
/*
* Apricorn USB3 dongle sometimes returns "USBSUSBSUSBS" in response to SCSI
* commands in UAS mode. Observed with the 1.28 firmware; are there others?
--
2.26.2
Hi,
On Fri, 4 Oct 2019 Clement Leger <cleger(a)kalray.eu> wrote:
>
> Index of rvring is computed using pointer arithmetic. However, since
> rvring->rvdev->vring is the base of the vring array, computation
> of rvring idx should be reversed. It previously lead to writing at negative
> indices in the resource table.
>
> Signed-off-by: Clement Leger <cleger(a)kalray.eu>
> ---
> drivers/remoteproc/remoteproc_core.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
Randomly stumbled upon this in a list of patches. This patch landed
in mainline as:
00a0eec59ddb remoteproc: Fix wrong rvring index computation
Should it be queued up for stable? I'm guessing:
Fixes: c0d631570ad5 ("remoteproc: set vring addresses in resource table")
-Doug
Removing the pcrypt module triggers this:
general protection fault, probably for non-canonical
address 0xdead000000000122
CPU: 5 PID: 264 Comm: modprobe Not tainted 5.6.0+ #2
Hardware name: QEMU Standard PC
RIP: 0010:__cpuhp_state_remove_instance+0xcc/0x120
Call Trace:
padata_sysfs_release+0x74/0xce
kobject_put+0x81/0xd0
padata_free+0x12/0x20
pcrypt_exit+0x43/0x8ee [pcrypt]
padata instances wrongly use the same hlist node for the online and dead
states, so __padata_free()'s second cpuhp remove call chokes on the node
that the first poisoned.
cpuhp multi-instance callbacks only walk forward in cpuhp_step->list and
the same node is linked in both the online and dead lists, so the list
corruption that results from padata_alloc() adding the node to a second
list without removing it from the first doesn't cause problems as long
as no instances are freed.
Avoid the issue by giving each state its own node.
Fixes: 894c9ef9780c ("padata: validate cpumask without removed CPU during offline")
Signed-off-by: Daniel Jordan <daniel.m.jordan(a)oracle.com>
Cc: Herbert Xu <herbert(a)gondor.apana.org.au>
Cc: Steffen Klassert <steffen.klassert(a)secunet.com>
Cc: linux-crypto(a)vger.kernel.org
Cc: linux-kernel(a)vger.kernel.org
Cc: stable(a)vger.kernel.org # v5.4+
---
include/linux/padata.h | 6 ++++--
kernel/padata.c | 14 ++++++++------
2 files changed, 12 insertions(+), 8 deletions(-)
diff --git a/include/linux/padata.h b/include/linux/padata.h
index a0d8b41850b2..693cae9bfe66 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -139,7 +139,8 @@ struct padata_shell {
/**
* struct padata_instance - The overall control structure.
*
- * @node: Used by CPU hotplug.
+ * @cpu_online_node: Linkage for CPU online callback.
+ * @cpu_dead_node: Linkage for CPU offline callback.
* @parallel_wq: The workqueue used for parallel work.
* @serial_wq: The workqueue used for serial work.
* @pslist: List of padata_shell objects attached to this instance.
@@ -150,7 +151,8 @@ struct padata_shell {
* @flags: padata flags.
*/
struct padata_instance {
- struct hlist_node node;
+ struct hlist_node cpu_online_node;
+ struct hlist_node cpu_dead_node;
struct workqueue_struct *parallel_wq;
struct workqueue_struct *serial_wq;
struct list_head pslist;
diff --git a/kernel/padata.c b/kernel/padata.c
index a6afa12fb75e..aae789896616 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -703,7 +703,7 @@ static int padata_cpu_online(unsigned int cpu, struct hlist_node *node)
struct padata_instance *pinst;
int ret;
- pinst = hlist_entry_safe(node, struct padata_instance, node);
+ pinst = hlist_entry_safe(node, struct padata_instance, cpu_online_node);
if (!pinst_has_cpu(pinst, cpu))
return 0;
@@ -718,7 +718,7 @@ static int padata_cpu_dead(unsigned int cpu, struct hlist_node *node)
struct padata_instance *pinst;
int ret;
- pinst = hlist_entry_safe(node, struct padata_instance, node);
+ pinst = hlist_entry_safe(node, struct padata_instance, cpu_dead_node);
if (!pinst_has_cpu(pinst, cpu))
return 0;
@@ -734,8 +734,9 @@ static enum cpuhp_state hp_online;
static void __padata_free(struct padata_instance *pinst)
{
#ifdef CONFIG_HOTPLUG_CPU
- cpuhp_state_remove_instance_nocalls(CPUHP_PADATA_DEAD, &pinst->node);
- cpuhp_state_remove_instance_nocalls(hp_online, &pinst->node);
+ cpuhp_state_remove_instance_nocalls(CPUHP_PADATA_DEAD,
+ &pinst->cpu_dead_node);
+ cpuhp_state_remove_instance_nocalls(hp_online, &pinst->cpu_online_node);
#endif
WARN_ON(!list_empty(&pinst->pslist));
@@ -939,9 +940,10 @@ static struct padata_instance *padata_alloc(const char *name,
mutex_init(&pinst->lock);
#ifdef CONFIG_HOTPLUG_CPU
- cpuhp_state_add_instance_nocalls_cpuslocked(hp_online, &pinst->node);
+ cpuhp_state_add_instance_nocalls_cpuslocked(hp_online,
+ &pinst->cpu_online_node);
cpuhp_state_add_instance_nocalls_cpuslocked(CPUHP_PADATA_DEAD,
- &pinst->node);
+ &pinst->cpu_dead_node);
#endif
put_online_cpus();
base-commit: ae83d0b416db002fe95601e7f97f64b59514d936
--
2.26.0