The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From bee749b187ac57d1faf00b2ab356ff322230fce8 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Wed, 25 Nov 2020 02:19:23 +0000
Subject: [PATCH] io_uring: fix files cancellation
io_uring_cancel_files()'s task check condition mistakenly got flipped.
1. There can't be a request in the inflight list without
IO_WQ_WORK_FILES, kill this check to keep the whole condition simpler.
2. Also, don't call the function for files==NULL to not do such a check,
all that staff is already handled well by its counter part,
__io_uring_cancel_task_requests().
With that just flip the task check.
Also, it iowq-cancels all request of current task there, don't forget to
set right ->files into struct io_task_cancel.
Fixes: c1973b38bf639 ("io_uring: cancel only requests of current task")
Reported-by: syzbot+c0d52d0b3c0c3ffb9525(a)syzkaller.appspotmail.com
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/fs/io_uring.c b/fs/io_uring.c
index e66888d45778..f47de27e5125 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -8688,15 +8688,14 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
struct files_struct *files)
{
while (!list_empty_careful(&ctx->inflight_list)) {
- struct io_task_cancel cancel = { .task = task, .files = NULL, };
+ struct io_task_cancel cancel = { .task = task, .files = files };
struct io_kiocb *req;
DEFINE_WAIT(wait);
bool found = false;
spin_lock_irq(&ctx->inflight_lock);
list_for_each_entry(req, &ctx->inflight_list, inflight_entry) {
- if (req->task == task &&
- (req->work.flags & IO_WQ_WORK_FILES) &&
+ if (req->task != task ||
req->work.identity->files != files)
continue;
found = true;
@@ -8768,10 +8767,11 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
io_cancel_defer_files(ctx, task, files);
io_cqring_overflow_flush(ctx, true, task, files);
- io_uring_cancel_files(ctx, task, files);
if (!files)
__io_uring_cancel_task_requests(ctx, task);
+ else
+ io_uring_cancel_files(ctx, task, files);
if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) {
atomic_dec(&task->io_uring->in_idle);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 4aa84f2ffa81f71e15e5cffc2cc6090dbee78f8e Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Thu, 7 Jan 2021 03:15:42 +0000
Subject: [PATCH] io_uring: dont kill fasync under completion_lock
CPU0 CPU1
---- ----
lock(&new->fa_lock);
local_irq_disable();
lock(&ctx->completion_lock);
lock(&new->fa_lock);
<Interrupt>
lock(&ctx->completion_lock);
*** DEADLOCK ***
Move kill_fasync() out of io_commit_cqring() to io_cqring_ev_posted(),
so it doesn't hold completion_lock while doing it. That saves from the
reported deadlock, and it's just nice to shorten the locking time and
untangle nested locks (compl_lock -> wq_head::lock).
Reported-by: syzbot+91ca3f25bd7f795f019c(a)syzkaller.appspotmail.com
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 91e517ad1421..401316fe2ae2 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1345,11 +1345,6 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx)
/* order cqe stores with ring update */
smp_store_release(&rings->cq.tail, ctx->cached_cq_tail);
-
- if (wq_has_sleeper(&ctx->cq_wait)) {
- wake_up_interruptible(&ctx->cq_wait);
- kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN);
- }
}
static void io_put_identity(struct io_uring_task *tctx, struct io_kiocb *req)
@@ -1711,6 +1706,10 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
wake_up(&ctx->sq_data->wait);
if (io_should_trigger_evfd(ctx))
eventfd_signal(ctx->cq_ev_fd, 1);
+ if (wq_has_sleeper(&ctx->cq_wait)) {
+ wake_up_interruptible(&ctx->cq_wait);
+ kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN);
+ }
}
static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
@@ -1721,6 +1720,10 @@ static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
}
if (io_should_trigger_evfd(ctx))
eventfd_signal(ctx->cq_ev_fd, 1);
+ if (wq_has_sleeper(&ctx->cq_wait)) {
+ wake_up_interruptible(&ctx->cq_wait);
+ kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN);
+ }
}
/* Returns true if there are no backlogged entries after the flush */
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 690d8b917bbe64772cb0b652311bcd50908aea6b Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders(a)chromium.org>
Date: Thu, 17 Dec 2020 14:29:12 -0800
Subject: [PATCH] spi: spi-geni-qcom: Fail new xfers if xfer/cancel/abort
pending
If we got a timeout when trying to send an abort command then it means
that we just got 3 timeouts in a row:
1. The original timeout that caused handle_fifo_timeout() to be
called.
2. A one second timeout waiting for the cancel command to finish.
3. A one second timeout waiting for the abort command to finish.
SPI is clocked by the controller, so nothing (aside from a hardware
fault or a totally broken sequencer) should be causing the actual
commands to fail in hardware. However, even though the hardware
itself is not expected to fail (and it'd be hard to predict how we
should handle things if it did), it's easy to hit the timeout case by
simply blocking our interrupt handler from running for a long period
of time. Obviously the system is in pretty bad shape if a interrupt
handler is blocked for > 2 seconds, but there are certainly bugs (even
bugs in other unrelated drivers) that can make this happen.
Let's make things a bit more robust against this case. If we fail to
abort we'll set a flag and then we'll block all future transfers until
we have no more interrupts pending.
Fixes: 561de45f72bd ("spi: spi-geni-qcom: Add SPI driver support for GENI based QUP")
Signed-off-by: Douglas Anderson <dianders(a)chromium.org>
Reviewed-by: Stephen Boyd <swboyd(a)chromium.org>
Link: https://lore.kernel.org/r/20201217142842.v3.2.Ibade998ed587e070388b4bf58801…
Signed-off-by: Mark Brown <broonie(a)kernel.org>
diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
index 096edfbde451..32c053705dec 100644
--- a/drivers/spi/spi-geni-qcom.c
+++ b/drivers/spi/spi-geni-qcom.c
@@ -83,6 +83,7 @@ struct spi_geni_master {
spinlock_t lock;
int irq;
bool cs_flag;
+ bool abort_failed;
};
static int get_spi_clk_cfg(unsigned int speed_hz,
@@ -141,8 +142,49 @@ static void handle_fifo_timeout(struct spi_master *spi,
spin_unlock_irq(&mas->lock);
time_left = wait_for_completion_timeout(&mas->abort_done, HZ);
- if (!time_left)
+ if (!time_left) {
dev_err(mas->dev, "Failed to cancel/abort m_cmd\n");
+
+ /*
+ * No need for a lock since SPI core has a lock and we never
+ * access this from an interrupt.
+ */
+ mas->abort_failed = true;
+ }
+}
+
+static bool spi_geni_is_abort_still_pending(struct spi_geni_master *mas)
+{
+ struct geni_se *se = &mas->se;
+ u32 m_irq, m_irq_en;
+
+ if (!mas->abort_failed)
+ return false;
+
+ /*
+ * The only known case where a transfer times out and then a cancel
+ * times out then an abort times out is if something is blocking our
+ * interrupt handler from running. Avoid starting any new transfers
+ * until that sorts itself out.
+ */
+ spin_lock_irq(&mas->lock);
+ m_irq = readl(se->base + SE_GENI_M_IRQ_STATUS);
+ m_irq_en = readl(se->base + SE_GENI_M_IRQ_EN);
+ spin_unlock_irq(&mas->lock);
+
+ if (m_irq & m_irq_en) {
+ dev_err(mas->dev, "Interrupts pending after abort: %#010x\n",
+ m_irq & m_irq_en);
+ return true;
+ }
+
+ /*
+ * If we're here the problem resolved itself so no need to check more
+ * on future transfers.
+ */
+ mas->abort_failed = false;
+
+ return false;
}
static void spi_geni_set_cs(struct spi_device *slv, bool set_flag)
@@ -158,9 +200,15 @@ static void spi_geni_set_cs(struct spi_device *slv, bool set_flag)
if (set_flag == mas->cs_flag)
return;
+ pm_runtime_get_sync(mas->dev);
+
+ if (spi_geni_is_abort_still_pending(mas)) {
+ dev_err(mas->dev, "Can't set chip select\n");
+ goto exit;
+ }
+
mas->cs_flag = set_flag;
- pm_runtime_get_sync(mas->dev);
spin_lock_irq(&mas->lock);
reinit_completion(&mas->cs_done);
if (set_flag)
@@ -173,6 +221,7 @@ static void spi_geni_set_cs(struct spi_device *slv, bool set_flag)
if (!time_left)
handle_fifo_timeout(spi, NULL);
+exit:
pm_runtime_put(mas->dev);
}
@@ -280,6 +329,9 @@ static int spi_geni_prepare_message(struct spi_master *spi,
int ret;
struct spi_geni_master *mas = spi_master_get_devdata(spi);
+ if (spi_geni_is_abort_still_pending(mas))
+ return -EBUSY;
+
ret = setup_fifo_params(spi_msg->spi, spi);
if (ret)
dev_err(mas->dev, "Couldn't select mode %d\n", ret);
@@ -509,6 +561,9 @@ static int spi_geni_transfer_one(struct spi_master *spi,
{
struct spi_geni_master *mas = spi_master_get_devdata(spi);
+ if (spi_geni_is_abort_still_pending(mas))
+ return -EBUSY;
+
/* Terminate and return success for 0 byte length transfer */
if (!xfer->len)
return 0;