From: Waiman Long <longman(a)redhat.com>
[ Upstream commit 9506a7425b094d2f1d9c877ed5a78f416669269b ]
It was found that when debug_locks was turned off because of a problem
found by the lockdep code, the system performance could drop quite
significantly when the lock_stat code was also configured into the
kernel. For instance, parallel kernel build time on a 4-socket x86-64
server nearly doubled.
Further analysis into the cause of the slowdown traced back to the
frequent call to debug_locks_off() from the __lock_acquired() function
probably due to some inconsistent lockdep states with debug_locks
off. The debug_locks_off() function did an unconditional atomic xchg
to write a 0 value into debug_locks which had already been set to 0.
This led to severe cacheline contention in the cacheline that held
debug_locks. As debug_locks is being referenced in quite a few different
places in the kernel, this greatly slow down the system performance.
To prevent that trashing of debug_locks cacheline, lock_acquired()
and lock_contended() now checks the state of debug_locks before
proceeding. The debug_locks_off() function is also modified to check
debug_locks before calling __debug_locks_off().
Signed-off-by: Waiman Long <longman(a)redhat.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Paul E. McKenney <paulmck(a)linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Will Deacon <will.deacon(a)arm.com>
Link: http://lkml.kernel.org/r/1539913518-15598-1-git-send-email-longman@redhat.c…
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
kernel/locking/lockdep.c | 4 ++--
lib/debug_locks.c | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 6e171b547a80..774ab79d3ec7 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3826,7 +3826,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
{
unsigned long flags;
- if (unlikely(!lock_stat))
+ if (unlikely(!lock_stat || !debug_locks))
return;
if (unlikely(current->lockdep_recursion))
@@ -3846,7 +3846,7 @@ void lock_acquired(struct lockdep_map *lock, unsigned long ip)
{
unsigned long flags;
- if (unlikely(!lock_stat))
+ if (unlikely(!lock_stat || !debug_locks))
return;
if (unlikely(current->lockdep_recursion))
diff --git a/lib/debug_locks.c b/lib/debug_locks.c
index 96c4c633d95e..124fdf238b3d 100644
--- a/lib/debug_locks.c
+++ b/lib/debug_locks.c
@@ -37,7 +37,7 @@ EXPORT_SYMBOL_GPL(debug_locks_silent);
*/
int debug_locks_off(void)
{
- if (__debug_locks_off()) {
+ if (debug_locks && __debug_locks_off()) {
if (!debug_locks_silent) {
console_verbose();
return 1;
--
2.17.1
From: Oleksij Rempel <o.rempel(a)pengutronix.de>
Current flexcan driver will put TX-ECHO in regular unsorted way, in
this case TX-ECHO can come after the response to the same TXed message.
In some cases, for example for J1939 stack, things will break.
This patch is using new rx-offload API to put the messages just in the
right place.
Signed-off-by: Oleksij Rempel <o.rempel(a)pengutronix.de>
Cc: linux-stable <stable(a)vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de>
---
drivers/net/can/flexcan.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index 68b46395c580..41a175f80c4b 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -787,8 +787,11 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
/* transmission complete interrupt */
if (reg_iflag2 & FLEXCAN_IFLAG_MB(FLEXCAN_TX_MB)) {
+ u32 reg_ctrl = priv->read(®s->mb[FLEXCAN_TX_MB].can_ctrl);
+
handled = IRQ_HANDLED;
- stats->tx_bytes += can_get_echo_skb(dev, 0);
+ stats->tx_bytes += can_rx_offload_get_echo_skb(&priv->offload,
+ 0, reg_ctrl << 16);
stats->tx_packets++;
can_led_event(dev, CAN_LED_EVENT_TX);
--
2.19.1
Prior to echoing a successfully transmitted CAN frame (by calling
can_get_echo_skb()), CAN drivers have to put the CAN frame (by calling
can_put_echo_skb() in the transmit function). These put and get function
take an index as parameter, which is used to identify the CAN frame.
A driver calling can_get_echo_skb() with a index not pointing to a skb
is a BUG, so add an appropriate error message.
Cc: linux-stable <stable(a)vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de>
---
drivers/net/can/dev.c | 27 ++++++++++++++-------------
1 file changed, 14 insertions(+), 13 deletions(-)
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index c05e4d50d43d..3b3f88ffab53 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -480,6 +480,8 @@ EXPORT_SYMBOL_GPL(can_put_echo_skb);
struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr)
{
struct can_priv *priv = netdev_priv(dev);
+ struct sk_buff *skb = priv->echo_skb[idx];
+ struct canfd_frame *cf;
if (idx >= priv->echo_skb_max) {
netdev_err(dev, "%s: BUG! Trying to access can_priv::echo_skb out of bounds (%u/max %u)\n",
@@ -487,21 +489,20 @@ struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8
return NULL;
}
- if (priv->echo_skb[idx]) {
- /* Using "struct canfd_frame::len" for the frame
- * length is supported on both CAN and CANFD frames.
- */
- struct sk_buff *skb = priv->echo_skb[idx];
- struct canfd_frame *cf = (struct canfd_frame *)skb->data;
- u8 len = cf->len;
-
- *len_ptr = len;
- priv->echo_skb[idx] = NULL;
-
- return skb;
+ if (!skb) {
+ netdev_err(dev, "%s: BUG! Trying to echo non existing skb: can_priv::echo_skb[%u]\n",
+ __func__, idx);
+ return NULL;
}
- return NULL;
+ /* Using "struct canfd_frame::len" for the frame
+ * length is supported on both CAN and CANFD frames.
+ */
+ cf = (struct canfd_frame *)skb->data;
+ *len_ptr = cf->len;
+ priv->echo_skb[idx] = NULL;
+
+ return skb;
}
/*
--
2.19.1
This patch replaces the use of "struct can_frame::can_dlc" by "struct
canfd_frame::len" to access the frame's length. As it is ensured that
both structures have a compatible memory layout for this member this is
no functional change. Futher, this compatibility is documented in a
comment.
Cc: linux-stable <stable(a)vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de>
---
drivers/net/can/dev.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 80530ab37b1e..46cc5fec4043 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -484,11 +484,14 @@ struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8
BUG_ON(idx >= priv->echo_skb_max);
if (priv->echo_skb[idx]) {
+ /* Using "struct canfd_frame::len" for the frame
+ * length is supported on both CAN and CANFD frames.
+ */
struct sk_buff *skb = priv->echo_skb[idx];
- struct can_frame *cf = (struct can_frame *)skb->data;
- u8 dlc = cf->can_dlc;
+ struct canfd_frame *cf = (struct canfd_frame *)skb->data;
+ u8 len = cf->len;
- *len_ptr = dlc;
+ *len_ptr = len;
priv->echo_skb[idx] = NULL;
return skb;
--
2.19.1
This patch factors out all non sending parts of can_get_echo_skb() into
a seperate function __can_get_echo_skb(), so that it can be re-used in
an upcoming patch.
Cc: linux-stable <stable(a)vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de>
---
drivers/net/can/dev.c | 36 +++++++++++++++++++++++++-----------
include/linux/can/dev.h | 1 +
2 files changed, 26 insertions(+), 11 deletions(-)
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 49163570a63a..80530ab37b1e 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -477,14 +477,7 @@ void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev,
}
EXPORT_SYMBOL_GPL(can_put_echo_skb);
-/*
- * Get the skb from the stack and loop it back locally
- *
- * The function is typically called when the TX done interrupt
- * is handled in the device driver. The driver must protect
- * access to priv->echo_skb, if necessary.
- */
-unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx)
+struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr)
{
struct can_priv *priv = netdev_priv(dev);
@@ -495,13 +488,34 @@ unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx)
struct can_frame *cf = (struct can_frame *)skb->data;
u8 dlc = cf->can_dlc;
- netif_rx(priv->echo_skb[idx]);
+ *len_ptr = dlc;
priv->echo_skb[idx] = NULL;
- return dlc;
+ return skb;
}
- return 0;
+ return NULL;
+}
+
+/*
+ * Get the skb from the stack and loop it back locally
+ *
+ * The function is typically called when the TX done interrupt
+ * is handled in the device driver. The driver must protect
+ * access to priv->echo_skb, if necessary.
+ */
+unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx)
+{
+ struct sk_buff *skb;
+ u8 len;
+
+ skb = __can_get_echo_skb(dev, idx, &len);
+ if (!skb)
+ return 0;
+
+ netif_rx(skb);
+
+ return len;
}
EXPORT_SYMBOL_GPL(can_get_echo_skb);
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index a83e1f632eb7..f01623aef2f7 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -169,6 +169,7 @@ void can_change_state(struct net_device *dev, struct can_frame *cf,
void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev,
unsigned int idx);
+struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr);
unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx);
void can_free_echo_skb(struct net_device *dev, unsigned int idx);
--
2.19.1