This is a note to let you know that I've just added the patch titled
scsi: hpsa: limit outstanding rescans
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
scsi-hpsa-limit-outstanding-rescans.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Mon Dec 18 14:12:34 CET 2017
From: Don Brace <don.brace(a)microsemi.com>
Date: Fri, 10 Mar 2017 14:35:17 -0600
Subject: scsi: hpsa: limit outstanding rescans
From: Don Brace <don.brace(a)microsemi.com>
[ Upstream commit 87b9e6aa87d9411f1059aa245c0c79976bc557ac ]
Avoid rescan storms. No need to queue another if one is pending.
Reviewed-by: Scott Benesh <scott.benesh(a)microsemi.com>
Reviewed-by: Scott Teel <scott.teel(a)microsemi.com>
Reviewed-by: Tomas Henzl <thenzl(a)redhat.com>
Signed-off-by: Don Brace <don.brace(a)microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
Signed-off-by: Sasha Levin <alexander.levin(a)verizon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/scsi/hpsa.c | 16 +++++++++++++++-
drivers/scsi/hpsa.h | 1 +
2 files changed, 16 insertions(+), 1 deletion(-)
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -5529,7 +5529,7 @@ static void hpsa_scan_complete(struct ct
spin_lock_irqsave(&h->scan_lock, flags);
h->scan_finished = 1;
- wake_up_all(&h->scan_wait_queue);
+ wake_up(&h->scan_wait_queue);
spin_unlock_irqrestore(&h->scan_lock, flags);
}
@@ -5547,11 +5547,23 @@ static void hpsa_scan_start(struct Scsi_
if (unlikely(lockup_detected(h)))
return hpsa_scan_complete(h);
+ /*
+ * If a scan is already waiting to run, no need to add another
+ */
+ spin_lock_irqsave(&h->scan_lock, flags);
+ if (h->scan_waiting) {
+ spin_unlock_irqrestore(&h->scan_lock, flags);
+ return;
+ }
+
+ spin_unlock_irqrestore(&h->scan_lock, flags);
+
/* wait until any scan already in progress is finished. */
while (1) {
spin_lock_irqsave(&h->scan_lock, flags);
if (h->scan_finished)
break;
+ h->scan_waiting = 1;
spin_unlock_irqrestore(&h->scan_lock, flags);
wait_event(h->scan_wait_queue, h->scan_finished);
/* Note: We don't need to worry about a race between this
@@ -5561,6 +5573,7 @@ static void hpsa_scan_start(struct Scsi_
*/
}
h->scan_finished = 0; /* mark scan as in progress */
+ h->scan_waiting = 0;
spin_unlock_irqrestore(&h->scan_lock, flags);
if (unlikely(lockup_detected(h)))
@@ -8799,6 +8812,7 @@ reinit_after_soft_reset:
init_waitqueue_head(&h->event_sync_wait_queue);
mutex_init(&h->reset_mutex);
h->scan_finished = 1; /* no scan currently in progress */
+ h->scan_waiting = 0;
pci_set_drvdata(pdev, h);
h->ndevices = 0;
--- a/drivers/scsi/hpsa.h
+++ b/drivers/scsi/hpsa.h
@@ -203,6 +203,7 @@ struct ctlr_info {
dma_addr_t errinfo_pool_dhandle;
unsigned long *cmd_pool_bits;
int scan_finished;
+ u8 scan_waiting : 1;
spinlock_t scan_lock;
wait_queue_head_t scan_wait_queue;
Patches currently in stable-queue which might be from don.brace(a)microsemi.com are
queue-4.9/scsi-hpsa-limit-outstanding-rescans.patch
queue-4.9/scsi-hpsa-cleanup-sas_phy-structures-in-sysfs-when-unloading.patch
queue-4.9/scsi-hpsa-destroy-sas-transport-properties-before-scsi_host.patch
queue-4.9/scsi-hpsa-do-not-timeout-reset-operations.patch
queue-4.9/scsi-hpsa-update-check-for-logical-volume-status.patch
This is a note to let you know that I've just added the patch titled
scsi: hpsa: do not timeout reset operations
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
scsi-hpsa-do-not-timeout-reset-operations.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Mon Dec 18 14:12:34 CET 2017
From: Don Brace <don.brace(a)microsemi.com>
Date: Fri, 10 Mar 2017 14:35:23 -0600
Subject: scsi: hpsa: do not timeout reset operations
From: Don Brace <don.brace(a)microsemi.com>
[ Upstream commit 2ef2884980873081a4edae92f9d88dd580c85f6e ]
Resets can take longer than DEFAULT_TIMEOUT.
Reviewed-by: Scott Benesh <scott.benesh(a)microsemi.com>
Reviewed-by: Scott Teel <scott.teel(a)microsemi.com>
Reviewed-by: Tomas Henzl <thenzl(a)redhat.com>
Signed-off-by: Don Brace <don.brace(a)microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
Signed-off-by: Sasha Levin <alexander.levin(a)verizon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/scsi/hpsa.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -2951,7 +2951,7 @@ static int hpsa_send_reset(struct ctlr_i
/* fill_cmd can't fail here, no data buffer to map. */
(void) fill_cmd(c, reset_type, h, NULL, 0, 0,
scsi3addr, TYPE_MSG);
- rc = hpsa_scsi_do_simple_cmd(h, c, reply_queue, DEFAULT_TIMEOUT);
+ rc = hpsa_scsi_do_simple_cmd(h, c, reply_queue, NO_TIMEOUT);
if (rc) {
dev_warn(&h->pdev->dev, "Failed to send reset command\n");
goto out;
Patches currently in stable-queue which might be from don.brace(a)microsemi.com are
queue-4.9/scsi-hpsa-limit-outstanding-rescans.patch
queue-4.9/scsi-hpsa-cleanup-sas_phy-structures-in-sysfs-when-unloading.patch
queue-4.9/scsi-hpsa-destroy-sas-transport-properties-before-scsi_host.patch
queue-4.9/scsi-hpsa-do-not-timeout-reset-operations.patch
queue-4.9/scsi-hpsa-update-check-for-logical-volume-status.patch
This is a note to let you know that I've just added the patch titled
scsi: hpsa: destroy sas transport properties before scsi_host
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
scsi-hpsa-destroy-sas-transport-properties-before-scsi_host.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Mon Dec 18 14:12:35 CET 2017
From: Martin Wilck <mwilck(a)suse.de>
Date: Fri, 20 Oct 2017 16:51:08 -0500
Subject: scsi: hpsa: destroy sas transport properties before scsi_host
From: Martin Wilck <mwilck(a)suse.de>
[ Upstream commit dfb2e6f46b3074eb85203d8f0888b71ec1c2e37a ]
This patch cleans up a lot of warnings when unloading the driver.
A current example of the stack trace starts with:
[ 142.570715] sysfs group 'power' not found for kobject 'port-5:0'
There can be hundreds of these messages during a driver unload.
I am resubmitting this patch on behalf of Martin Wilck with his
permission.
His original patch can be found here:
https://www.spinics.net/lists/linux-scsi/msg102085.html
This patch did not help until Hannes's
commit 9441284fbc39 ("scsi-fixup-kernel-warning-during-rmmod")
was applied to the kernel.
---------------------------
Original patch description:
---------------------------
Unloading the hpsa driver causes warnings
[ 1063.793652] WARNING: CPU: 1 PID: 4850 at ../fs/sysfs/group.c:237 device_del+0x54/0x240()
[ 1063.793659] sysfs group ffffffff81cf21a0 not found for kobject 'port-2:0'
with two different stacks:
1)
[ 1063.793774] [<ffffffff81448af4>] device_del+0x54/0x240
[ 1063.793780] [<ffffffff8145178a>] transport_remove_classdev+0x4a/0x60
[ 1063.793784] [<ffffffff81451216>] attribute_container_device_trigger+0xa6/0xb0
[ 1063.793802] [<ffffffffa0105d46>] sas_port_delete+0x126/0x160 [scsi_transport_sas]
[ 1063.793819] [<ffffffffa036ebcc>] hpsa_free_sas_port+0x3c/0x70 [hpsa]
2)
[ 1063.797103] [<ffffffff81448af4>] device_del+0x54/0x240
[ 1063.797118] [<ffffffffa0105d4e>] sas_port_delete+0x12e/0x160 [scsi_transport_sas]
[ 1063.797134] [<ffffffffa036ebcc>] hpsa_free_sas_port+0x3c/0x70 [hpsa]
This is caused by the fact that host device hostX is deleted before the
SAS transport devices hostX/port-a:b.
This patch fixes this by reverting the order of device deletions.
Tested-by: Don Brace <don.brace(a)microsemi.com>
Reviewed-by: Don Brace <don.brace(a)microsemi.com>
Signed-off-by: Martin Wilck <mwilck(a)suse.de>
Signed-off-by: Don Brace <don.brace(a)microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
Signed-off-by: Sasha Levin <alexander.levin(a)verizon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/scsi/hpsa.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -9105,6 +9105,8 @@ static void hpsa_remove_one(struct pci_d
destroy_workqueue(h->rescan_ctlr_wq);
destroy_workqueue(h->resubmit_wq);
+ hpsa_delete_sas_host(h);
+
/*
* Call before disabling interrupts.
* scsi_remove_host can trigger I/O operations especially
@@ -9139,8 +9141,6 @@ static void hpsa_remove_one(struct pci_d
h->lockup_detected = NULL; /* init_one 2 */
/* (void) pci_disable_pcie_error_reporting(pdev); */ /* init_one 1 */
- hpsa_delete_sas_host(h);
-
kfree(h); /* init_one 1 */
}
Patches currently in stable-queue which might be from mwilck(a)suse.de are
queue-4.9/scsi-hpsa-cleanup-sas_phy-structures-in-sysfs-when-unloading.patch
queue-4.9/scsi-hpsa-destroy-sas-transport-properties-before-scsi_host.patch
This is a note to let you know that I've just added the patch titled
scsi: hpsa: cleanup sas_phy structures in sysfs when unloading
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
scsi-hpsa-cleanup-sas_phy-structures-in-sysfs-when-unloading.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Mon Dec 18 14:12:35 CET 2017
From: Martin Wilck <mwilck(a)suse.de>
Date: Fri, 20 Oct 2017 16:51:14 -0500
Subject: scsi: hpsa: cleanup sas_phy structures in sysfs when unloading
From: Martin Wilck <mwilck(a)suse.de>
[ Upstream commit 55ca38b4255bb336c2d35990bdb2b368e19b435a ]
I am resubmitting this patch on behalf of Martin Wilck with his
permission.
The original patch can be found here:
https://www.spinics.net/lists/linux-scsi/msg102083.html
This patch did not help until Hannes's
commit 9441284fbc39 ("scsi-fixup-kernel-warning-during-rmmod")
was applied to the kernel.
--------------------------------------
Original patch description from Martin:
--------------------------------------
When the hpsa module is unloaded using rmmod, dangling
symlinks remain under /sys/class/sas_phy. Fix this by
calling sas_phy_delete() rather than sas_phy_free (which,
according to comments, should not be called for PHYs that
have been set up successfully, anyway).
Tested-by: Don Brace <don.brace(a)microsemi.com>
Reviewed-by: Don Brace <don.brace(a)microsemi.com>
Signed-off-by: Martin Wilck <mwilck(a)suse.de>
Signed-off-by: Don Brace <don.brace(a)microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
Signed-off-by: Sasha Levin <alexander.levin(a)verizon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/scsi/hpsa.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -9632,9 +9632,9 @@ static void hpsa_free_sas_phy(struct hps
struct sas_phy *phy = hpsa_sas_phy->phy;
sas_port_delete_phy(hpsa_sas_phy->parent_port->port, phy);
- sas_phy_free(phy);
if (hpsa_sas_phy->added_to_port)
list_del(&hpsa_sas_phy->phy_list_entry);
+ sas_phy_delete(phy);
kfree(hpsa_sas_phy);
}
Patches currently in stable-queue which might be from mwilck(a)suse.de are
queue-4.9/scsi-hpsa-cleanup-sas_phy-structures-in-sysfs-when-unloading.patch
queue-4.9/scsi-hpsa-destroy-sas-transport-properties-before-scsi_host.patch
This is a note to let you know that I've just added the patch titled
scsi: bfa: integer overflow in debugfs
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
scsi-bfa-integer-overflow-in-debugfs.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Mon Dec 18 14:12:35 CET 2017
From: Dan Carpenter <dan.carpenter(a)oracle.com>
Date: Wed, 4 Oct 2017 10:50:37 +0300
Subject: scsi: bfa: integer overflow in debugfs
From: Dan Carpenter <dan.carpenter(a)oracle.com>
[ Upstream commit 3e351275655d3c84dc28abf170def9786db5176d ]
We could allocate less memory than intended because we do:
bfad->regdata = kzalloc(len << 2, GFP_KERNEL);
The shift can overflow leading to a crash. This is debugfs code so the
impact is very small. I fixed the network version of this in March with
commit 13e2d5187f6b ("bna: integer overflow bug in debugfs").
Fixes: ab2a9ba189e8 ("[SCSI] bfa: add debugfs support")
Signed-off-by: Dan Carpenter <dan.carpenter(a)oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
Signed-off-by: Sasha Levin <alexander.levin(a)verizon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/scsi/bfa/bfad_debugfs.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
--- a/drivers/scsi/bfa/bfad_debugfs.c
+++ b/drivers/scsi/bfa/bfad_debugfs.c
@@ -255,7 +255,8 @@ bfad_debugfs_write_regrd(struct file *fi
struct bfad_s *bfad = port->bfad;
struct bfa_s *bfa = &bfad->bfa;
struct bfa_ioc_s *ioc = &bfa->ioc;
- int addr, len, rc, i;
+ int addr, rc, i;
+ u32 len;
u32 *regbuf;
void __iomem *rb, *reg_addr;
unsigned long flags;
@@ -266,7 +267,7 @@ bfad_debugfs_write_regrd(struct file *fi
return PTR_ERR(kern_buf);
rc = sscanf(kern_buf, "%x:%x", &addr, &len);
- if (rc < 2) {
+ if (rc < 2 || len > (UINT_MAX >> 2)) {
printk(KERN_INFO
"bfad[%d]: %s failed to read user buf\n",
bfad->inst_no, __func__);
Patches currently in stable-queue which might be from dan.carpenter(a)oracle.com are
queue-4.9/scsi-bfa-integer-overflow-in-debugfs.patch
queue-4.9/fbdev-controlfb-add-missing-modes-to-fix-out-of-bounds-access.patch
This is a note to let you know that I've just added the patch titled
sched/deadline: Use deadline instead of period when calculating overflow
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
sched-deadline-use-deadline-instead-of-period-when-calculating-overflow.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Mon Dec 18 14:12:34 CET 2017
From: "Steven Rostedt (VMware)" <rostedt(a)goodmis.org>
Date: Thu, 2 Mar 2017 15:10:59 +0100
Subject: sched/deadline: Use deadline instead of period when calculating overflow
From: "Steven Rostedt (VMware)" <rostedt(a)goodmis.org>
[ Upstream commit 2317d5f1c34913bac5971d93d69fb6c31bb74670 ]
I was testing Daniel's changes with his test case, and tweaked it a
little. Instead of having the runtime equal to the deadline, I
increased the deadline ten fold.
Daniel's test case had:
attr.sched_runtime = 2 * 1000 * 1000; /* 2 ms */
attr.sched_deadline = 2 * 1000 * 1000; /* 2 ms */
attr.sched_period = 2 * 1000 * 1000 * 1000; /* 2 s */
To make it more interesting, I changed it to:
attr.sched_runtime = 2 * 1000 * 1000; /* 2 ms */
attr.sched_deadline = 20 * 1000 * 1000; /* 20 ms */
attr.sched_period = 2 * 1000 * 1000 * 1000; /* 2 s */
The results were rather surprising. The behavior that Daniel's patch
was fixing came back. The task started using much more than .1% of the
CPU. More like 20%.
Looking into this I found that it was due to the dl_entity_overflow()
constantly returning true. That's because it uses the relative period
against relative runtime vs the absolute deadline against absolute
runtime.
runtime / (deadline - t) > dl_runtime / dl_period
There's even a comment mentioning this, and saying that when relative
deadline equals relative period, that the equation is the same as using
deadline instead of period. That comment is backwards! What we really
want is:
runtime / (deadline - t) > dl_runtime / dl_deadline
We care about if the runtime can make its deadline, not its period. And
then we can say "when the deadline equals the period, the equation is
the same as using dl_period instead of dl_deadline".
After correcting this, now when the task gets enqueued, it can throttle
correctly, and Daniel's fix to the throttling of sleeping deadline
tasks works even when the runtime and deadline are not the same.
Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Reviewed-by: Daniel Bristot de Oliveira <bristot(a)redhat.com>
Cc: Juri Lelli <juri.lelli(a)arm.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Luca Abeni <luca.abeni(a)santannapisa.it>
Cc: Mike Galbraith <efault(a)gmx.de>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Romulo Silva de Oliveira <romulo.deoliveira(a)ufsc.br>
Cc: Steven Rostedt <rostedt(a)goodmis.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Tommaso Cucinotta <tommaso.cucinotta(a)sssup.it>
Link: http://lkml.kernel.org/r/02135a27f1ae3fe5fd032568a5a2f370e190e8d7.148839293…
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Sasha Levin <alexander.levin(a)verizon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
kernel/sched/deadline.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -445,13 +445,13 @@ static void replenish_dl_entity(struct s
*
* This function returns true if:
*
- * runtime / (deadline - t) > dl_runtime / dl_period ,
+ * runtime / (deadline - t) > dl_runtime / dl_deadline ,
*
* IOW we can't recycle current parameters.
*
- * Notice that the bandwidth check is done against the period. For
+ * Notice that the bandwidth check is done against the deadline. For
* task with deadline equal to period this is the same of using
- * dl_deadline instead of dl_period in the equation above.
+ * dl_period instead of dl_deadline in the equation above.
*/
static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
struct sched_dl_entity *pi_se, u64 t)
@@ -476,7 +476,7 @@ static bool dl_entity_overflow(struct sc
* of anything below microseconds resolution is actually fiction
* (but still we want to give the user that illusion >;).
*/
- left = (pi_se->dl_period >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
+ left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
right = ((dl_se->deadline - t) >> DL_SCALE) *
(pi_se->dl_runtime >> DL_SCALE);
Patches currently in stable-queue which might be from rostedt(a)goodmis.org are
queue-4.9/sched-deadline-make-sure-the-replenishment-timer-fires-in-the-next-period.patch
queue-4.9/sched-rt-do-not-pull-from-current-cpu-if-only-one-cpu-to-pull.patch
queue-4.9/tracing-allocate-mask_str-buffer-dynamically.patch
queue-4.9/sched-deadline-throttle-a-constrained-deadline-task-activated-after-the-deadline.patch
queue-4.9/sched-deadline-use-deadline-instead-of-period-when-calculating-overflow.patch
This is a note to let you know that I've just added the patch titled
sched/deadline: Throttle a constrained deadline task activated after the deadline
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
sched-deadline-throttle-a-constrained-deadline-task-activated-after-the-deadline.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Mon Dec 18 14:12:34 CET 2017
From: Daniel Bristot de Oliveira <bristot(a)redhat.com>
Date: Thu, 2 Mar 2017 15:10:58 +0100
Subject: sched/deadline: Throttle a constrained deadline task activated after the deadline
From: Daniel Bristot de Oliveira <bristot(a)redhat.com>
[ Upstream commit df8eac8cafce7d086be3bd5cf5a838fa37594dfb ]
During the activation, CBS checks if it can reuse the current task's
runtime and period. If the deadline of the task is in the past, CBS
cannot use the runtime, and so it replenishes the task. This rule
works fine for implicit deadline tasks (deadline == period), and the
CBS was designed for implicit deadline tasks. However, a task with
constrained deadline (deadine < period) might be awakened after the
deadline, but before the next period. In this case, replenishing the
task would allow it to run for runtime / deadline. As in this case
deadline < period, CBS enables a task to run for more than the
runtime / period. In a very loaded system, this can cause a domino
effect, making other tasks miss their deadlines.
To avoid this problem, in the activation of a constrained deadline
task after the deadline but before the next period, throttle the
task and set the replenishing timer to the begin of the next period,
unless it is boosted.
Reproducer:
--------------- %< ---------------
int main (int argc, char **argv)
{
int ret;
int flags = 0;
unsigned long l = 0;
struct timespec ts;
struct sched_attr attr;
memset(&attr, 0, sizeof(attr));
attr.size = sizeof(attr);
attr.sched_policy = SCHED_DEADLINE;
attr.sched_runtime = 2 * 1000 * 1000; /* 2 ms */
attr.sched_deadline = 2 * 1000 * 1000; /* 2 ms */
attr.sched_period = 2 * 1000 * 1000 * 1000; /* 2 s */
ts.tv_sec = 0;
ts.tv_nsec = 2000 * 1000; /* 2 ms */
ret = sched_setattr(0, &attr, flags);
if (ret < 0) {
perror("sched_setattr");
exit(-1);
}
for(;;) {
/* XXX: you may need to adjust the loop */
for (l = 0; l < 150000; l++);
/*
* The ideia is to go to sleep right before the deadline
* and then wake up before the next period to receive
* a new replenishment.
*/
nanosleep(&ts, NULL);
}
exit(0);
}
--------------- >% ---------------
On my box, this reproducer uses almost 50% of the CPU time, which is
obviously wrong for a task with 2/2000 reservation.
Signed-off-by: Daniel Bristot de Oliveira <bristot(a)redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Cc: Juri Lelli <juri.lelli(a)arm.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Luca Abeni <luca.abeni(a)santannapisa.it>
Cc: Mike Galbraith <efault(a)gmx.de>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Romulo Silva de Oliveira <romulo.deoliveira(a)ufsc.br>
Cc: Steven Rostedt <rostedt(a)goodmis.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Tommaso Cucinotta <tommaso.cucinotta(a)sssup.it>
Link: http://lkml.kernel.org/r/edf58354e01db46bf42df8d2dd32418833f68c89.148839293…
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Sasha Levin <alexander.levin(a)verizon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
kernel/sched/deadline.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 45 insertions(+)
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -695,6 +695,37 @@ void init_dl_task_timer(struct sched_dl_
timer->function = dl_task_timer;
}
+/*
+ * During the activation, CBS checks if it can reuse the current task's
+ * runtime and period. If the deadline of the task is in the past, CBS
+ * cannot use the runtime, and so it replenishes the task. This rule
+ * works fine for implicit deadline tasks (deadline == period), and the
+ * CBS was designed for implicit deadline tasks. However, a task with
+ * constrained deadline (deadine < period) might be awakened after the
+ * deadline, but before the next period. In this case, replenishing the
+ * task would allow it to run for runtime / deadline. As in this case
+ * deadline < period, CBS enables a task to run for more than the
+ * runtime / period. In a very loaded system, this can cause a domino
+ * effect, making other tasks miss their deadlines.
+ *
+ * To avoid this problem, in the activation of a constrained deadline
+ * task after the deadline but before the next period, throttle the
+ * task and set the replenishing timer to the begin of the next period,
+ * unless it is boosted.
+ */
+static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
+{
+ struct task_struct *p = dl_task_of(dl_se);
+ struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se));
+
+ if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
+ dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
+ if (unlikely(dl_se->dl_boosted || !start_dl_timer(p)))
+ return;
+ dl_se->dl_throttled = 1;
+ }
+}
+
static
int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
{
@@ -928,6 +959,11 @@ static void dequeue_dl_entity(struct sch
__dequeue_dl_entity(dl_se);
}
+static inline bool dl_is_constrained(struct sched_dl_entity *dl_se)
+{
+ return dl_se->dl_deadline < dl_se->dl_period;
+}
+
static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
{
struct task_struct *pi_task = rt_mutex_get_top_task(p);
@@ -954,6 +990,15 @@ static void enqueue_task_dl(struct rq *r
}
/*
+ * Check if a constrained deadline task was activated
+ * after the deadline but before the next period.
+ * If that is the case, the task will be throttled and
+ * the replenishment timer will be set to the next period.
+ */
+ if (!p->dl.dl_throttled && dl_is_constrained(&p->dl))
+ dl_check_constrained_dl(&p->dl);
+
+ /*
* If p is throttled, we do nothing. In fact, if it exhausted
* its budget it needs a replenishment and, since it now is on
* its rq, the bandwidth timer callback (which clearly has not
Patches currently in stable-queue which might be from bristot(a)redhat.com are
queue-4.9/sched-deadline-make-sure-the-replenishment-timer-fires-in-the-next-period.patch
queue-4.9/sched-deadline-throttle-a-constrained-deadline-task-activated-after-the-deadline.patch
queue-4.9/sched-deadline-use-deadline-instead-of-period-when-calculating-overflow.patch
This is a note to let you know that I've just added the patch titled
sched/deadline: Make sure the replenishment timer fires in the next period
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
sched-deadline-make-sure-the-replenishment-timer-fires-in-the-next-period.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Mon Dec 18 14:12:34 CET 2017
From: Daniel Bristot de Oliveira <bristot(a)redhat.com>
Date: Thu, 2 Mar 2017 15:10:57 +0100
Subject: sched/deadline: Make sure the replenishment timer fires in the next period
From: Daniel Bristot de Oliveira <bristot(a)redhat.com>
[ Upstream commit 5ac69d37784b237707a7b15d199cdb6c6fdb6780 ]
Currently, the replenishment timer is set to fire at the deadline
of a task. Although that works for implicit deadline tasks because the
deadline is equals to the begin of the next period, that is not correct
for constrained deadline tasks (deadline < period).
For instance:
f.c:
--------------- %< ---------------
int main (void)
{
for(;;);
}
--------------- >% ---------------
# gcc -o f f.c
# trace-cmd record -e sched:sched_switch \
-e syscalls:sys_exit_sched_setattr \
chrt -d --sched-runtime 490000000 \
--sched-deadline 500000000 \
--sched-period 1000000000 0 ./f
# trace-cmd report | grep "{pid of ./f}"
After setting parameters, the task is replenished and continue running
until being throttled:
f-11295 [003] 13322.113776: sys_exit_sched_setattr: 0x0
The task is throttled after running 492318 ms, as expected:
f-11295 [003] 13322.606094: sched_switch: f:11295 [-1] R ==> watchdog/3:32 [0]
But then, the task is replenished 500719 ms after the first
replenishment:
<idle>-0 [003] 13322.614495: sched_switch: swapper/3:0 [120] R ==> f:11295 [-1]
Running for 490277 ms:
f-11295 [003] 13323.104772: sched_switch: f:11295 [-1] R ==> swapper/3:0 [120]
Hence, in the first period, the task runs 2 * runtime, and that is a bug.
During the first replenishment, the next deadline is set one period away.
So the runtime / period starts to be respected. However, as the second
replenishment took place in the wrong instant, the next replenishment
will also be held in a wrong instant of time. Rather than occurring in
the nth period away from the first activation, it is taking place
in the (nth period - relative deadline).
Signed-off-by: Daniel Bristot de Oliveira <bristot(a)redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Reviewed-by: Luca Abeni <luca.abeni(a)santannapisa.it>
Reviewed-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
Reviewed-by: Juri Lelli <juri.lelli(a)arm.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Mike Galbraith <efault(a)gmx.de>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Romulo Silva de Oliveira <romulo.deoliveira(a)ufsc.br>
Cc: Steven Rostedt <rostedt(a)goodmis.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Tommaso Cucinotta <tommaso.cucinotta(a)sssup.it>
Link: http://lkml.kernel.org/r/ac50d89887c25285b47465638354b63362f8adff.148839293…
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Sasha Levin <alexander.levin(a)verizon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
kernel/sched/deadline.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -505,10 +505,15 @@ static void update_dl_entity(struct sche
}
}
+static inline u64 dl_next_period(struct sched_dl_entity *dl_se)
+{
+ return dl_se->deadline - dl_se->dl_deadline + dl_se->dl_period;
+}
+
/*
* If the entity depleted all its runtime, and if we want it to sleep
* while waiting for some new execution time to become available, we
- * set the bandwidth enforcement timer to the replenishment instant
+ * set the bandwidth replenishment timer to the replenishment instant
* and try to activate it.
*
* Notice that it is important for the caller to know if the timer
@@ -530,7 +535,7 @@ static int start_dl_timer(struct task_st
* that it is actually coming from rq->clock and not from
* hrtimer's time base reading.
*/
- act = ns_to_ktime(dl_se->deadline);
+ act = ns_to_ktime(dl_next_period(dl_se));
now = hrtimer_cb_get_time(timer);
delta = ktime_to_ns(now) - rq_clock(rq);
act = ktime_add_ns(act, delta);
Patches currently in stable-queue which might be from bristot(a)redhat.com are
queue-4.9/sched-deadline-make-sure-the-replenishment-timer-fires-in-the-next-period.patch
queue-4.9/sched-deadline-throttle-a-constrained-deadline-task-activated-after-the-deadline.patch
queue-4.9/sched-deadline-use-deadline-instead-of-period-when-calculating-overflow.patch
This is a note to let you know that I've just added the patch titled
sched/deadline: Add missing update_rq_clock() in dl_task_timer()
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
sched-deadline-add-missing-update_rq_clock-in-dl_task_timer.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Mon Dec 18 14:12:34 CET 2017
From: Wanpeng Li <wanpeng.li(a)hotmail.com>
Date: Mon, 6 Mar 2017 21:51:28 -0800
Subject: sched/deadline: Add missing update_rq_clock() in dl_task_timer()
From: Wanpeng Li <wanpeng.li(a)hotmail.com>
[ Upstream commit dcc3b5ffe1b32771c9a22e2c916fb94c4fcf5b79 ]
The following warning can be triggered by hot-unplugging the CPU
on which an active SCHED_DEADLINE task is running on:
------------[ cut here ]------------
WARNING: CPU: 7 PID: 0 at kernel/sched/sched.h:833 replenish_dl_entity+0x71e/0xc40
rq->clock_update_flags < RQCF_ACT_SKIP
CPU: 7 PID: 0 Comm: swapper/7 Tainted: G B 4.11.0-rc1+ #24
Hardware name: LENOVO ThinkCentre M8500t-N000/SHARKBAY, BIOS FBKTC1AUS 02/16/2016
Call Trace:
<IRQ>
dump_stack+0x85/0xc4
__warn+0x172/0x1b0
warn_slowpath_fmt+0xb4/0xf0
? __warn+0x1b0/0x1b0
? debug_check_no_locks_freed+0x2c0/0x2c0
? cpudl_set+0x3d/0x2b0
replenish_dl_entity+0x71e/0xc40
enqueue_task_dl+0x2ea/0x12e0
? dl_task_timer+0x777/0x990
? __hrtimer_run_queues+0x270/0xa50
dl_task_timer+0x316/0x990
? enqueue_task_dl+0x12e0/0x12e0
? enqueue_task_dl+0x12e0/0x12e0
__hrtimer_run_queues+0x270/0xa50
? hrtimer_cancel+0x20/0x20
? hrtimer_interrupt+0x119/0x600
hrtimer_interrupt+0x19c/0x600
? trace_hardirqs_off+0xd/0x10
local_apic_timer_interrupt+0x74/0xe0
smp_apic_timer_interrupt+0x76/0xa0
apic_timer_interrupt+0x93/0xa0
The DL task will be migrated to a suitable later deadline rq once the DL
timer fires and currnet rq is offline. The rq clock of the new rq should
be updated. This patch fixes it by updating the rq clock after holding
the new rq's rq lock.
Signed-off-by: Wanpeng Li <wanpeng.li(a)hotmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Reviewed-by: Matt Fleming <matt(a)codeblueprint.co.uk>
Cc: Juri Lelli <juri.lelli(a)arm.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Mike Galbraith <efault(a)gmx.de>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Link: http://lkml.kernel.org/r/1488865888-15894-1-git-send-email-wanpeng.li@hotma…
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Sasha Levin <alexander.levin(a)verizon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
kernel/sched/deadline.c | 1 +
1 file changed, 1 insertion(+)
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -638,6 +638,7 @@ static enum hrtimer_restart dl_task_time
lockdep_unpin_lock(&rq->lock, rf.cookie);
rq = dl_task_offline_migration(rq, p);
rf.cookie = lockdep_pin_lock(&rq->lock);
+ update_rq_clock(rq);
/*
* Now that the task has been migrated to the new RQ and we
Patches currently in stable-queue which might be from wanpeng.li(a)hotmail.com are
queue-4.9/sched-deadline-add-missing-update_rq_clock-in-dl_task_timer.patch