The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x f63955721a8020e979b99cc417dcb6da3106aa24
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102137-pawing-rift-8b65@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f63955721a8020e979b99cc417dcb6da3106aa24 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust(a)hammerspace.com>
Date: Sun, 8 Oct 2023 14:20:19 -0400
Subject: [PATCH] pNFS: Fix a hang in nfs4_evict_inode()
We are not allowed to call pnfs_mark_matching_lsegs_return() without
also holding a reference to the layout header, since doing so could lead
to the reference count going to zero when we call
pnfs_layout_remove_lseg(). This again can lead to a hang when we get to
nfs4_evict_inode() and are unable to clear the layout pointer.
pnfs_layout_return_unused_byserver() is guilty of this behaviour, and
has been seen to trigger the refcount warning prior to a hang.
Fixes: b6d49ecd1081 ("NFSv4: Fix a pNFS layout related use-after-free race when freeing the inode")
Cc: stable(a)vger.kernel.org
Signed-off-by: Trond Myklebust <trond.myklebust(a)hammerspace.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker(a)Netapp.com>
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 306cba0b9e69..84343aefbbd6 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2634,31 +2634,44 @@ pnfs_should_return_unused_layout(struct pnfs_layout_hdr *lo,
return mode == 0;
}
-static int
-pnfs_layout_return_unused_byserver(struct nfs_server *server, void *data)
+static int pnfs_layout_return_unused_byserver(struct nfs_server *server,
+ void *data)
{
const struct pnfs_layout_range *range = data;
+ const struct cred *cred;
struct pnfs_layout_hdr *lo;
struct inode *inode;
+ nfs4_stateid stateid;
+ enum pnfs_iomode iomode;
+
restart:
rcu_read_lock();
list_for_each_entry_rcu(lo, &server->layouts, plh_layouts) {
- if (!pnfs_layout_can_be_returned(lo) ||
+ inode = lo->plh_inode;
+ if (!inode || !pnfs_layout_can_be_returned(lo) ||
test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
continue;
- inode = lo->plh_inode;
spin_lock(&inode->i_lock);
- if (!pnfs_should_return_unused_layout(lo, range)) {
+ if (!lo->plh_inode ||
+ !pnfs_should_return_unused_layout(lo, range)) {
spin_unlock(&inode->i_lock);
continue;
}
+ pnfs_get_layout_hdr(lo);
+ pnfs_set_plh_return_info(lo, range->iomode, 0);
+ if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
+ range, 0) != 0 ||
+ !pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode)) {
+ spin_unlock(&inode->i_lock);
+ rcu_read_unlock();
+ pnfs_put_layout_hdr(lo);
+ cond_resched();
+ goto restart;
+ }
spin_unlock(&inode->i_lock);
- inode = pnfs_grab_inode_layout_hdr(lo);
- if (!inode)
- continue;
rcu_read_unlock();
- pnfs_mark_layout_for_return(inode, range);
- iput(inode);
+ pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false);
+ pnfs_put_layout_hdr(lo);
cond_resched();
goto restart;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x f63955721a8020e979b99cc417dcb6da3106aa24
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102135-blurt-entree-54ec@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f63955721a8020e979b99cc417dcb6da3106aa24 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust(a)hammerspace.com>
Date: Sun, 8 Oct 2023 14:20:19 -0400
Subject: [PATCH] pNFS: Fix a hang in nfs4_evict_inode()
We are not allowed to call pnfs_mark_matching_lsegs_return() without
also holding a reference to the layout header, since doing so could lead
to the reference count going to zero when we call
pnfs_layout_remove_lseg(). This again can lead to a hang when we get to
nfs4_evict_inode() and are unable to clear the layout pointer.
pnfs_layout_return_unused_byserver() is guilty of this behaviour, and
has been seen to trigger the refcount warning prior to a hang.
Fixes: b6d49ecd1081 ("NFSv4: Fix a pNFS layout related use-after-free race when freeing the inode")
Cc: stable(a)vger.kernel.org
Signed-off-by: Trond Myklebust <trond.myklebust(a)hammerspace.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker(a)Netapp.com>
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 306cba0b9e69..84343aefbbd6 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2634,31 +2634,44 @@ pnfs_should_return_unused_layout(struct pnfs_layout_hdr *lo,
return mode == 0;
}
-static int
-pnfs_layout_return_unused_byserver(struct nfs_server *server, void *data)
+static int pnfs_layout_return_unused_byserver(struct nfs_server *server,
+ void *data)
{
const struct pnfs_layout_range *range = data;
+ const struct cred *cred;
struct pnfs_layout_hdr *lo;
struct inode *inode;
+ nfs4_stateid stateid;
+ enum pnfs_iomode iomode;
+
restart:
rcu_read_lock();
list_for_each_entry_rcu(lo, &server->layouts, plh_layouts) {
- if (!pnfs_layout_can_be_returned(lo) ||
+ inode = lo->plh_inode;
+ if (!inode || !pnfs_layout_can_be_returned(lo) ||
test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
continue;
- inode = lo->plh_inode;
spin_lock(&inode->i_lock);
- if (!pnfs_should_return_unused_layout(lo, range)) {
+ if (!lo->plh_inode ||
+ !pnfs_should_return_unused_layout(lo, range)) {
spin_unlock(&inode->i_lock);
continue;
}
+ pnfs_get_layout_hdr(lo);
+ pnfs_set_plh_return_info(lo, range->iomode, 0);
+ if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
+ range, 0) != 0 ||
+ !pnfs_prepare_layoutreturn(lo, &stateid, &cred, &iomode)) {
+ spin_unlock(&inode->i_lock);
+ rcu_read_unlock();
+ pnfs_put_layout_hdr(lo);
+ cond_resched();
+ goto restart;
+ }
spin_unlock(&inode->i_lock);
- inode = pnfs_grab_inode_layout_hdr(lo);
- if (!inode)
- continue;
rcu_read_unlock();
- pnfs_mark_layout_for_return(inode, range);
- iput(inode);
+ pnfs_send_layoutreturn(lo, &stateid, &cred, iomode, false);
+ pnfs_put_layout_hdr(lo);
cond_resched();
goto restart;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x c8befdc411e5fd1bf95a13e8744c8ca79b412bee
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102142-tiny-thing-872a@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c8befdc411e5fd1bf95a13e8744c8ca79b412bee Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Date: Fri, 13 Oct 2023 16:57:05 +0200
Subject: [PATCH] pinctrl: qcom: lpass-lpi: fix concurrent register updates
The Qualcomm LPASS LPI pin controller driver uses one lock for guarding
Read-Modify-Write code for slew rate registers. However the pin
configuration and muxing registers have exactly the same RMW code but
are not protected.
Pin controller framework does not provide locking here, thus it is
possible to trigger simultaneous change of pin configuration registers
resulting in non-atomic changes.
Protect from concurrent access by re-using the same lock used to cover
the slew rate register. Using the same lock instead of adding second
one will make more sense, once we add support for newer Qualcomm SoC,
where slew rate is configured in the same register as pin
configuration/muxing.
Fixes: 6e261d1090d6 ("pinctrl: qcom: Add sm8250 lpass lpi pinctrl driver")
Cc: stable(a)vger.kernel.org
Reviewed-by: Linus Walleij <linus.walleij(a)linaro.org>
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Link: https://lore.kernel.org/r/20231013145705.219954-1-krzysztof.kozlowski@linar…
Signed-off-by: Linus Walleij <linus.walleij(a)linaro.org>
diff --git a/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c b/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c
index e5a418026ba3..0b2839d27fd6 100644
--- a/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c
+++ b/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c
@@ -32,7 +32,8 @@ struct lpi_pinctrl {
char __iomem *tlmm_base;
char __iomem *slew_base;
struct clk_bulk_data clks[MAX_LPI_NUM_CLKS];
- struct mutex slew_access_lock;
+ /* Protects from concurrent register updates */
+ struct mutex lock;
DECLARE_BITMAP(ever_gpio, MAX_NR_GPIO);
const struct lpi_pinctrl_variant_data *data;
};
@@ -103,6 +104,7 @@ static int lpi_gpio_set_mux(struct pinctrl_dev *pctldev, unsigned int function,
if (WARN_ON(i == g->nfuncs))
return -EINVAL;
+ mutex_lock(&pctrl->lock);
val = lpi_gpio_read(pctrl, pin, LPI_GPIO_CFG_REG);
/*
@@ -128,6 +130,7 @@ static int lpi_gpio_set_mux(struct pinctrl_dev *pctldev, unsigned int function,
u32p_replace_bits(&val, i, LPI_GPIO_FUNCTION_MASK);
lpi_gpio_write(pctrl, pin, LPI_GPIO_CFG_REG, val);
+ mutex_unlock(&pctrl->lock);
return 0;
}
@@ -233,14 +236,14 @@ static int lpi_config_set(struct pinctrl_dev *pctldev, unsigned int group,
if (slew_offset == LPI_NO_SLEW)
break;
- mutex_lock(&pctrl->slew_access_lock);
+ mutex_lock(&pctrl->lock);
sval = ioread32(pctrl->slew_base + LPI_SLEW_RATE_CTL_REG);
sval &= ~(LPI_SLEW_RATE_MASK << slew_offset);
sval |= arg << slew_offset;
iowrite32(sval, pctrl->slew_base + LPI_SLEW_RATE_CTL_REG);
- mutex_unlock(&pctrl->slew_access_lock);
+ mutex_unlock(&pctrl->lock);
break;
default:
return -EINVAL;
@@ -256,6 +259,7 @@ static int lpi_config_set(struct pinctrl_dev *pctldev, unsigned int group,
lpi_gpio_write(pctrl, group, LPI_GPIO_VALUE_REG, val);
}
+ mutex_lock(&pctrl->lock);
val = lpi_gpio_read(pctrl, group, LPI_GPIO_CFG_REG);
u32p_replace_bits(&val, pullup, LPI_GPIO_PULL_MASK);
@@ -264,6 +268,7 @@ static int lpi_config_set(struct pinctrl_dev *pctldev, unsigned int group,
u32p_replace_bits(&val, output_enabled, LPI_GPIO_OE_MASK);
lpi_gpio_write(pctrl, group, LPI_GPIO_CFG_REG, val);
+ mutex_unlock(&pctrl->lock);
return 0;
}
@@ -461,7 +466,7 @@ int lpi_pinctrl_probe(struct platform_device *pdev)
pctrl->chip.label = dev_name(dev);
pctrl->chip.can_sleep = false;
- mutex_init(&pctrl->slew_access_lock);
+ mutex_init(&pctrl->lock);
pctrl->ctrl = devm_pinctrl_register(dev, &pctrl->desc, pctrl);
if (IS_ERR(pctrl->ctrl)) {
@@ -483,7 +488,7 @@ int lpi_pinctrl_probe(struct platform_device *pdev)
return 0;
err_pinctrl:
- mutex_destroy(&pctrl->slew_access_lock);
+ mutex_destroy(&pctrl->lock);
clk_bulk_disable_unprepare(MAX_LPI_NUM_CLKS, pctrl->clks);
return ret;
@@ -495,7 +500,7 @@ int lpi_pinctrl_remove(struct platform_device *pdev)
struct lpi_pinctrl *pctrl = platform_get_drvdata(pdev);
int i;
- mutex_destroy(&pctrl->slew_access_lock);
+ mutex_destroy(&pctrl->lock);
clk_bulk_disable_unprepare(MAX_LPI_NUM_CLKS, pctrl->clks);
for (i = 0; i < pctrl->data->npins; i++)
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x c8befdc411e5fd1bf95a13e8744c8ca79b412bee
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102141-caring-alienate-2ec8@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c8befdc411e5fd1bf95a13e8744c8ca79b412bee Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Date: Fri, 13 Oct 2023 16:57:05 +0200
Subject: [PATCH] pinctrl: qcom: lpass-lpi: fix concurrent register updates
The Qualcomm LPASS LPI pin controller driver uses one lock for guarding
Read-Modify-Write code for slew rate registers. However the pin
configuration and muxing registers have exactly the same RMW code but
are not protected.
Pin controller framework does not provide locking here, thus it is
possible to trigger simultaneous change of pin configuration registers
resulting in non-atomic changes.
Protect from concurrent access by re-using the same lock used to cover
the slew rate register. Using the same lock instead of adding second
one will make more sense, once we add support for newer Qualcomm SoC,
where slew rate is configured in the same register as pin
configuration/muxing.
Fixes: 6e261d1090d6 ("pinctrl: qcom: Add sm8250 lpass lpi pinctrl driver")
Cc: stable(a)vger.kernel.org
Reviewed-by: Linus Walleij <linus.walleij(a)linaro.org>
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Link: https://lore.kernel.org/r/20231013145705.219954-1-krzysztof.kozlowski@linar…
Signed-off-by: Linus Walleij <linus.walleij(a)linaro.org>
diff --git a/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c b/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c
index e5a418026ba3..0b2839d27fd6 100644
--- a/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c
+++ b/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c
@@ -32,7 +32,8 @@ struct lpi_pinctrl {
char __iomem *tlmm_base;
char __iomem *slew_base;
struct clk_bulk_data clks[MAX_LPI_NUM_CLKS];
- struct mutex slew_access_lock;
+ /* Protects from concurrent register updates */
+ struct mutex lock;
DECLARE_BITMAP(ever_gpio, MAX_NR_GPIO);
const struct lpi_pinctrl_variant_data *data;
};
@@ -103,6 +104,7 @@ static int lpi_gpio_set_mux(struct pinctrl_dev *pctldev, unsigned int function,
if (WARN_ON(i == g->nfuncs))
return -EINVAL;
+ mutex_lock(&pctrl->lock);
val = lpi_gpio_read(pctrl, pin, LPI_GPIO_CFG_REG);
/*
@@ -128,6 +130,7 @@ static int lpi_gpio_set_mux(struct pinctrl_dev *pctldev, unsigned int function,
u32p_replace_bits(&val, i, LPI_GPIO_FUNCTION_MASK);
lpi_gpio_write(pctrl, pin, LPI_GPIO_CFG_REG, val);
+ mutex_unlock(&pctrl->lock);
return 0;
}
@@ -233,14 +236,14 @@ static int lpi_config_set(struct pinctrl_dev *pctldev, unsigned int group,
if (slew_offset == LPI_NO_SLEW)
break;
- mutex_lock(&pctrl->slew_access_lock);
+ mutex_lock(&pctrl->lock);
sval = ioread32(pctrl->slew_base + LPI_SLEW_RATE_CTL_REG);
sval &= ~(LPI_SLEW_RATE_MASK << slew_offset);
sval |= arg << slew_offset;
iowrite32(sval, pctrl->slew_base + LPI_SLEW_RATE_CTL_REG);
- mutex_unlock(&pctrl->slew_access_lock);
+ mutex_unlock(&pctrl->lock);
break;
default:
return -EINVAL;
@@ -256,6 +259,7 @@ static int lpi_config_set(struct pinctrl_dev *pctldev, unsigned int group,
lpi_gpio_write(pctrl, group, LPI_GPIO_VALUE_REG, val);
}
+ mutex_lock(&pctrl->lock);
val = lpi_gpio_read(pctrl, group, LPI_GPIO_CFG_REG);
u32p_replace_bits(&val, pullup, LPI_GPIO_PULL_MASK);
@@ -264,6 +268,7 @@ static int lpi_config_set(struct pinctrl_dev *pctldev, unsigned int group,
u32p_replace_bits(&val, output_enabled, LPI_GPIO_OE_MASK);
lpi_gpio_write(pctrl, group, LPI_GPIO_CFG_REG, val);
+ mutex_unlock(&pctrl->lock);
return 0;
}
@@ -461,7 +466,7 @@ int lpi_pinctrl_probe(struct platform_device *pdev)
pctrl->chip.label = dev_name(dev);
pctrl->chip.can_sleep = false;
- mutex_init(&pctrl->slew_access_lock);
+ mutex_init(&pctrl->lock);
pctrl->ctrl = devm_pinctrl_register(dev, &pctrl->desc, pctrl);
if (IS_ERR(pctrl->ctrl)) {
@@ -483,7 +488,7 @@ int lpi_pinctrl_probe(struct platform_device *pdev)
return 0;
err_pinctrl:
- mutex_destroy(&pctrl->slew_access_lock);
+ mutex_destroy(&pctrl->lock);
clk_bulk_disable_unprepare(MAX_LPI_NUM_CLKS, pctrl->clks);
return ret;
@@ -495,7 +500,7 @@ int lpi_pinctrl_remove(struct platform_device *pdev)
struct lpi_pinctrl *pctrl = platform_get_drvdata(pdev);
int i;
- mutex_destroy(&pctrl->slew_access_lock);
+ mutex_destroy(&pctrl->lock);
clk_bulk_disable_unprepare(MAX_LPI_NUM_CLKS, pctrl->clks);
for (i = 0; i < pctrl->data->npins; i++)
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x 32a9cdb8869dc111a0c96cf8e1762be9684af15b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102105-washbasin-crimp-78d1@gregkh' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 32a9cdb8869dc111a0c96cf8e1762be9684af15b Mon Sep 17 00:00:00 2001
From: Haibo Chen <haibo.chen(a)nxp.com>
Date: Wed, 30 Aug 2023 17:39:22 +0800
Subject: [PATCH] mmc: core: sdio: hold retuning if sdio in 1-bit mode
tuning only support in 4-bit mode or 8 bit mode, so in 1-bit mode,
need to hold retuning.
Find this issue when use manual tuning method on imx93. When system
resume back, SDIO WIFI try to switch back to 4 bit mode, first will
trigger retuning, and all tuning command failed.
Signed-off-by: Haibo Chen <haibo.chen(a)nxp.com>
Acked-by: Adrian Hunter <adrian.hunter(a)intel.com>
Fixes: dfa13ebbe334 ("mmc: host: Add facility to support re-tuning")
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20230830093922.3095850-1-haibo.chen@nxp.com
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index f64b9ac76a5c..5914516df2f7 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -1089,8 +1089,14 @@ static int mmc_sdio_resume(struct mmc_host *host)
}
err = mmc_sdio_reinit_card(host);
} else if (mmc_card_wake_sdio_irq(host)) {
- /* We may have switched to 1-bit mode during suspend */
+ /*
+ * We may have switched to 1-bit mode during suspend,
+ * need to hold retuning, because tuning only supprt
+ * 4-bit mode or 8 bit mode.
+ */
+ mmc_retune_hold_now(host);
err = sdio_enable_4bit_bus(host->card);
+ mmc_retune_release(host);
}
if (err)
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 32a9cdb8869dc111a0c96cf8e1762be9684af15b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102104-decal-avenue-0d88@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 32a9cdb8869dc111a0c96cf8e1762be9684af15b Mon Sep 17 00:00:00 2001
From: Haibo Chen <haibo.chen(a)nxp.com>
Date: Wed, 30 Aug 2023 17:39:22 +0800
Subject: [PATCH] mmc: core: sdio: hold retuning if sdio in 1-bit mode
tuning only support in 4-bit mode or 8 bit mode, so in 1-bit mode,
need to hold retuning.
Find this issue when use manual tuning method on imx93. When system
resume back, SDIO WIFI try to switch back to 4 bit mode, first will
trigger retuning, and all tuning command failed.
Signed-off-by: Haibo Chen <haibo.chen(a)nxp.com>
Acked-by: Adrian Hunter <adrian.hunter(a)intel.com>
Fixes: dfa13ebbe334 ("mmc: host: Add facility to support re-tuning")
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20230830093922.3095850-1-haibo.chen@nxp.com
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index f64b9ac76a5c..5914516df2f7 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -1089,8 +1089,14 @@ static int mmc_sdio_resume(struct mmc_host *host)
}
err = mmc_sdio_reinit_card(host);
} else if (mmc_card_wake_sdio_irq(host)) {
- /* We may have switched to 1-bit mode during suspend */
+ /*
+ * We may have switched to 1-bit mode during suspend,
+ * need to hold retuning, because tuning only supprt
+ * 4-bit mode or 8 bit mode.
+ */
+ mmc_retune_hold_now(host);
err = sdio_enable_4bit_bus(host->card);
+ mmc_retune_release(host);
}
if (err)
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x 3e01d5254698ea3d18e09d96b974c762328352cd
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102111-unclaimed-marbled-f39f@gregkh' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3e01d5254698ea3d18e09d96b974c762328352cd Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal(a)bootlin.com>
Date: Mon, 17 Jul 2023 21:42:19 +0200
Subject: [PATCH] mtd: rawnand: marvell: Ensure program page operations are
successful
The NAND core complies with the ONFI specification, which itself
mentions that after any program or erase operation, a status check
should be performed to see whether the operation was finished *and*
successful.
The NAND core offers helpers to finish a page write (sending the
"PAGE PROG" command, waiting for the NAND chip to be ready again, and
checking the operation status). But in some cases, advanced controller
drivers might want to optimize this and craft their own page write
helper to leverage additional hardware capabilities, thus not always
using the core facilities.
Some drivers, like this one, do not use the core helper to finish a page
write because the final cycles are automatically managed by the
hardware. In this case, the additional care must be taken to manually
perform the final status check.
Let's read the NAND chip status at the end of the page write helper and
return -EIO upon error.
Cc: stable(a)vger.kernel.org
Fixes: 02f26ecf8c77 ("mtd: nand: add reworked Marvell NAND controller driver")
Reported-by: Aviram Dali <aviramd(a)marvell.com>
Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
Tested-by: Ravi Chandra Minnikanti <rminnikanti(a)marvell.com>
Link: https://lore.kernel.org/linux-mtd/20230717194221.229778-1-miquel.raynal@boo…
diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c
index 2c94da7a3b3a..b841a81cb128 100644
--- a/drivers/mtd/nand/raw/marvell_nand.c
+++ b/drivers/mtd/nand/raw/marvell_nand.c
@@ -1165,6 +1165,7 @@ static int marvell_nfc_hw_ecc_hmg_do_write_page(struct nand_chip *chip,
.ndcb[2] = NDCB2_ADDR5_PAGE(page),
};
unsigned int oob_bytes = lt->spare_bytes + (raw ? lt->ecc_bytes : 0);
+ u8 status;
int ret;
/* NFCv2 needs more information about the operation being executed */
@@ -1198,7 +1199,18 @@ static int marvell_nfc_hw_ecc_hmg_do_write_page(struct nand_chip *chip,
ret = marvell_nfc_wait_op(chip,
PSEC_TO_MSEC(sdr->tPROG_max));
- return ret;
+ if (ret)
+ return ret;
+
+ /* Check write status on the chip side */
+ ret = nand_status_op(chip, &status);
+ if (ret)
+ return ret;
+
+ if (status & NAND_STATUS_FAIL)
+ return -EIO;
+
+ return 0;
}
static int marvell_nfc_hw_ecc_hmg_write_page_raw(struct nand_chip *chip,
@@ -1627,6 +1639,7 @@ static int marvell_nfc_hw_ecc_bch_write_page(struct nand_chip *chip,
int data_len = lt->data_bytes;
int spare_len = lt->spare_bytes;
int chunk, ret;
+ u8 status;
marvell_nfc_select_target(chip, chip->cur_cs);
@@ -1663,6 +1676,14 @@ static int marvell_nfc_hw_ecc_bch_write_page(struct nand_chip *chip,
if (ret)
return ret;
+ /* Check write status on the chip side */
+ ret = nand_status_op(chip, &status);
+ if (ret)
+ return ret;
+
+ if (status & NAND_STATUS_FAIL)
+ return -EIO;
+
return 0;
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 3e01d5254698ea3d18e09d96b974c762328352cd
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102110-prominent-seventh-4ea6@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3e01d5254698ea3d18e09d96b974c762328352cd Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal(a)bootlin.com>
Date: Mon, 17 Jul 2023 21:42:19 +0200
Subject: [PATCH] mtd: rawnand: marvell: Ensure program page operations are
successful
The NAND core complies with the ONFI specification, which itself
mentions that after any program or erase operation, a status check
should be performed to see whether the operation was finished *and*
successful.
The NAND core offers helpers to finish a page write (sending the
"PAGE PROG" command, waiting for the NAND chip to be ready again, and
checking the operation status). But in some cases, advanced controller
drivers might want to optimize this and craft their own page write
helper to leverage additional hardware capabilities, thus not always
using the core facilities.
Some drivers, like this one, do not use the core helper to finish a page
write because the final cycles are automatically managed by the
hardware. In this case, the additional care must be taken to manually
perform the final status check.
Let's read the NAND chip status at the end of the page write helper and
return -EIO upon error.
Cc: stable(a)vger.kernel.org
Fixes: 02f26ecf8c77 ("mtd: nand: add reworked Marvell NAND controller driver")
Reported-by: Aviram Dali <aviramd(a)marvell.com>
Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
Tested-by: Ravi Chandra Minnikanti <rminnikanti(a)marvell.com>
Link: https://lore.kernel.org/linux-mtd/20230717194221.229778-1-miquel.raynal@boo…
diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c
index 2c94da7a3b3a..b841a81cb128 100644
--- a/drivers/mtd/nand/raw/marvell_nand.c
+++ b/drivers/mtd/nand/raw/marvell_nand.c
@@ -1165,6 +1165,7 @@ static int marvell_nfc_hw_ecc_hmg_do_write_page(struct nand_chip *chip,
.ndcb[2] = NDCB2_ADDR5_PAGE(page),
};
unsigned int oob_bytes = lt->spare_bytes + (raw ? lt->ecc_bytes : 0);
+ u8 status;
int ret;
/* NFCv2 needs more information about the operation being executed */
@@ -1198,7 +1199,18 @@ static int marvell_nfc_hw_ecc_hmg_do_write_page(struct nand_chip *chip,
ret = marvell_nfc_wait_op(chip,
PSEC_TO_MSEC(sdr->tPROG_max));
- return ret;
+ if (ret)
+ return ret;
+
+ /* Check write status on the chip side */
+ ret = nand_status_op(chip, &status);
+ if (ret)
+ return ret;
+
+ if (status & NAND_STATUS_FAIL)
+ return -EIO;
+
+ return 0;
}
static int marvell_nfc_hw_ecc_hmg_write_page_raw(struct nand_chip *chip,
@@ -1627,6 +1639,7 @@ static int marvell_nfc_hw_ecc_bch_write_page(struct nand_chip *chip,
int data_len = lt->data_bytes;
int spare_len = lt->spare_bytes;
int chunk, ret;
+ u8 status;
marvell_nfc_select_target(chip, chip->cur_cs);
@@ -1663,6 +1676,14 @@ static int marvell_nfc_hw_ecc_bch_write_page(struct nand_chip *chip,
if (ret)
return ret;
+ /* Check write status on the chip side */
+ ret = nand_status_op(chip, &status);
+ if (ret)
+ return ret;
+
+ if (status & NAND_STATUS_FAIL)
+ return -EIO;
+
return 0;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 3e01d5254698ea3d18e09d96b974c762328352cd
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102108-winner-gorged-4d0d@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3e01d5254698ea3d18e09d96b974c762328352cd Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal(a)bootlin.com>
Date: Mon, 17 Jul 2023 21:42:19 +0200
Subject: [PATCH] mtd: rawnand: marvell: Ensure program page operations are
successful
The NAND core complies with the ONFI specification, which itself
mentions that after any program or erase operation, a status check
should be performed to see whether the operation was finished *and*
successful.
The NAND core offers helpers to finish a page write (sending the
"PAGE PROG" command, waiting for the NAND chip to be ready again, and
checking the operation status). But in some cases, advanced controller
drivers might want to optimize this and craft their own page write
helper to leverage additional hardware capabilities, thus not always
using the core facilities.
Some drivers, like this one, do not use the core helper to finish a page
write because the final cycles are automatically managed by the
hardware. In this case, the additional care must be taken to manually
perform the final status check.
Let's read the NAND chip status at the end of the page write helper and
return -EIO upon error.
Cc: stable(a)vger.kernel.org
Fixes: 02f26ecf8c77 ("mtd: nand: add reworked Marvell NAND controller driver")
Reported-by: Aviram Dali <aviramd(a)marvell.com>
Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
Tested-by: Ravi Chandra Minnikanti <rminnikanti(a)marvell.com>
Link: https://lore.kernel.org/linux-mtd/20230717194221.229778-1-miquel.raynal@boo…
diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c
index 2c94da7a3b3a..b841a81cb128 100644
--- a/drivers/mtd/nand/raw/marvell_nand.c
+++ b/drivers/mtd/nand/raw/marvell_nand.c
@@ -1165,6 +1165,7 @@ static int marvell_nfc_hw_ecc_hmg_do_write_page(struct nand_chip *chip,
.ndcb[2] = NDCB2_ADDR5_PAGE(page),
};
unsigned int oob_bytes = lt->spare_bytes + (raw ? lt->ecc_bytes : 0);
+ u8 status;
int ret;
/* NFCv2 needs more information about the operation being executed */
@@ -1198,7 +1199,18 @@ static int marvell_nfc_hw_ecc_hmg_do_write_page(struct nand_chip *chip,
ret = marvell_nfc_wait_op(chip,
PSEC_TO_MSEC(sdr->tPROG_max));
- return ret;
+ if (ret)
+ return ret;
+
+ /* Check write status on the chip side */
+ ret = nand_status_op(chip, &status);
+ if (ret)
+ return ret;
+
+ if (status & NAND_STATUS_FAIL)
+ return -EIO;
+
+ return 0;
}
static int marvell_nfc_hw_ecc_hmg_write_page_raw(struct nand_chip *chip,
@@ -1627,6 +1639,7 @@ static int marvell_nfc_hw_ecc_bch_write_page(struct nand_chip *chip,
int data_len = lt->data_bytes;
int spare_len = lt->spare_bytes;
int chunk, ret;
+ u8 status;
marvell_nfc_select_target(chip, chip->cur_cs);
@@ -1663,6 +1676,14 @@ static int marvell_nfc_hw_ecc_bch_write_page(struct nand_chip *chip,
if (ret)
return ret;
+ /* Check write status on the chip side */
+ ret = nand_status_op(chip, &status);
+ if (ret)
+ return ret;
+
+ if (status & NAND_STATUS_FAIL)
+ return -EIO;
+
return 0;
}
The patch below does not apply to the 6.5-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.5.y
git checkout FETCH_HEAD
git cherry-pick -x 8e15aee621618a3ee3abecaf1fd8c1428098b7ef
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102009-aqueduct-crate-9618@gregkh' --subject-prefix 'PATCH 6.5.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8e15aee621618a3ee3abecaf1fd8c1428098b7ef Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba(a)kernel.org>
Date: Tue, 17 Oct 2023 18:38:16 -0700
Subject: [PATCH] net: move altnames together with the netdevice
The altname nodes are currently not moved to the new netns
when netdevice itself moves:
[ ~]# ip netns add test
[ ~]# ip -netns test link add name eth0 type dummy
[ ~]# ip -netns test link property add dev eth0 altname some-name
[ ~]# ip -netns test link show dev some-name
2: eth0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ether 1e:67:ed:19:3d:24 brd ff:ff:ff:ff:ff:ff
altname some-name
[ ~]# ip -netns test link set dev eth0 netns 1
[ ~]# ip link
...
3: eth0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ether 02:40:88:62:ec:b8 brd ff:ff:ff:ff:ff:ff
altname some-name
[ ~]# ip li show dev some-name
Device "some-name" does not exist.
Remove them from the hash table when device is unlisted
and add back when listed again.
Fixes: 36fbf1e52bd3 ("net: rtnetlink: add linkprop commands to add and delete alternative ifnames")
Reviewed-by: Jiri Pirko <jiri(a)nvidia.com>
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
diff --git a/net/core/dev.c b/net/core/dev.c
index 559705aeefe4..9f3f8930c691 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -381,6 +381,7 @@ static void netdev_name_node_alt_flush(struct net_device *dev)
/* Device list insertion */
static void list_netdevice(struct net_device *dev)
{
+ struct netdev_name_node *name_node;
struct net *net = dev_net(dev);
ASSERT_RTNL();
@@ -391,6 +392,10 @@ static void list_netdevice(struct net_device *dev)
hlist_add_head_rcu(&dev->index_hlist,
dev_index_hash(net, dev->ifindex));
write_unlock(&dev_base_lock);
+
+ netdev_for_each_altname(dev, name_node)
+ netdev_name_node_add(net, name_node);
+
/* We reserved the ifindex, this can't fail */
WARN_ON(xa_store(&net->dev_by_index, dev->ifindex, dev, GFP_KERNEL));
@@ -402,12 +407,16 @@ static void list_netdevice(struct net_device *dev)
*/
static void unlist_netdevice(struct net_device *dev, bool lock)
{
+ struct netdev_name_node *name_node;
struct net *net = dev_net(dev);
ASSERT_RTNL();
xa_erase(&net->dev_by_index, dev->ifindex);
+ netdev_for_each_altname(dev, name_node)
+ netdev_name_node_del(name_node);
+
/* Unlink dev from the device chain */
if (lock)
write_lock(&dev_base_lock);
@@ -10942,7 +10951,6 @@ void unregister_netdevice_many_notify(struct list_head *head,
synchronize_net();
list_for_each_entry(dev, head, unreg_list) {
- struct netdev_name_node *name_node;
struct sk_buff *skb = NULL;
/* Shutdown queueing discipline. */
@@ -10970,9 +10978,6 @@ void unregister_netdevice_many_notify(struct list_head *head,
dev_uc_flush(dev);
dev_mc_flush(dev);
- netdev_for_each_altname(dev, name_node)
- netdev_name_node_del(name_node);
- synchronize_rcu();
netdev_name_node_alt_flush(dev);
netdev_name_node_free(dev->name_node);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 1a83f4a7c156fa6bbd6b530e89fa3270bf3d9d1b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102023-ignore-graveness-861d@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1a83f4a7c156fa6bbd6b530e89fa3270bf3d9d1b Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba(a)kernel.org>
Date: Tue, 17 Oct 2023 18:38:15 -0700
Subject: [PATCH] net: avoid UAF on deleted altname
Altnames are accessed under RCU (dev_get_by_name_rcu())
but freed by kfree() with no synchronization point.
Each node has one or two allocations (node and a variable-size
name, sometimes the name is netdev->name). Adding rcu_heads
here is a bit tedious. Besides most code which unlists the names
already has rcu barriers - so take the simpler approach of adding
synchronize_rcu(). Note that the one on the unregistration path
(which matters more) is removed by the next fix.
Fixes: ff92741270bf ("net: introduce name_node struct to be used in hashlist")
Reviewed-by: Jiri Pirko <jiri(a)nvidia.com>
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
diff --git a/net/core/dev.c b/net/core/dev.c
index ae557193b77c..559705aeefe4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -345,7 +345,6 @@ int netdev_name_node_alt_create(struct net_device *dev, const char *name)
static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node)
{
list_del(&name_node->list);
- netdev_name_node_del(name_node);
kfree(name_node->name);
netdev_name_node_free(name_node);
}
@@ -364,6 +363,8 @@ int netdev_name_node_alt_destroy(struct net_device *dev, const char *name)
if (name_node == dev->name_node || name_node->dev != dev)
return -EINVAL;
+ netdev_name_node_del(name_node);
+ synchronize_rcu();
__netdev_name_node_alt_destroy(name_node);
return 0;
@@ -10941,6 +10942,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
synchronize_net();
list_for_each_entry(dev, head, unreg_list) {
+ struct netdev_name_node *name_node;
struct sk_buff *skb = NULL;
/* Shutdown queueing discipline. */
@@ -10968,6 +10970,9 @@ void unregister_netdevice_many_notify(struct list_head *head,
dev_uc_flush(dev);
dev_mc_flush(dev);
+ netdev_for_each_altname(dev, name_node)
+ netdev_name_node_del(name_node);
+ synchronize_rcu();
netdev_name_node_alt_flush(dev);
netdev_name_node_free(dev->name_node);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 1a83f4a7c156fa6bbd6b530e89fa3270bf3d9d1b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102011-rubble-require-8127@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1a83f4a7c156fa6bbd6b530e89fa3270bf3d9d1b Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba(a)kernel.org>
Date: Tue, 17 Oct 2023 18:38:15 -0700
Subject: [PATCH] net: avoid UAF on deleted altname
Altnames are accessed under RCU (dev_get_by_name_rcu())
but freed by kfree() with no synchronization point.
Each node has one or two allocations (node and a variable-size
name, sometimes the name is netdev->name). Adding rcu_heads
here is a bit tedious. Besides most code which unlists the names
already has rcu barriers - so take the simpler approach of adding
synchronize_rcu(). Note that the one on the unregistration path
(which matters more) is removed by the next fix.
Fixes: ff92741270bf ("net: introduce name_node struct to be used in hashlist")
Reviewed-by: Jiri Pirko <jiri(a)nvidia.com>
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
diff --git a/net/core/dev.c b/net/core/dev.c
index ae557193b77c..559705aeefe4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -345,7 +345,6 @@ int netdev_name_node_alt_create(struct net_device *dev, const char *name)
static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node)
{
list_del(&name_node->list);
- netdev_name_node_del(name_node);
kfree(name_node->name);
netdev_name_node_free(name_node);
}
@@ -364,6 +363,8 @@ int netdev_name_node_alt_destroy(struct net_device *dev, const char *name)
if (name_node == dev->name_node || name_node->dev != dev)
return -EINVAL;
+ netdev_name_node_del(name_node);
+ synchronize_rcu();
__netdev_name_node_alt_destroy(name_node);
return 0;
@@ -10941,6 +10942,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
synchronize_net();
list_for_each_entry(dev, head, unreg_list) {
+ struct netdev_name_node *name_node;
struct sk_buff *skb = NULL;
/* Shutdown queueing discipline. */
@@ -10968,6 +10970,9 @@ void unregister_netdevice_many_notify(struct list_head *head,
dev_uc_flush(dev);
dev_mc_flush(dev);
+ netdev_for_each_altname(dev, name_node)
+ netdev_name_node_del(name_node);
+ synchronize_rcu();
netdev_name_node_alt_flush(dev);
netdev_name_node_free(dev->name_node);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 8e15aee621618a3ee3abecaf1fd8c1428098b7ef
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102012-trustee-handball-1567@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8e15aee621618a3ee3abecaf1fd8c1428098b7ef Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba(a)kernel.org>
Date: Tue, 17 Oct 2023 18:38:16 -0700
Subject: [PATCH] net: move altnames together with the netdevice
The altname nodes are currently not moved to the new netns
when netdevice itself moves:
[ ~]# ip netns add test
[ ~]# ip -netns test link add name eth0 type dummy
[ ~]# ip -netns test link property add dev eth0 altname some-name
[ ~]# ip -netns test link show dev some-name
2: eth0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ether 1e:67:ed:19:3d:24 brd ff:ff:ff:ff:ff:ff
altname some-name
[ ~]# ip -netns test link set dev eth0 netns 1
[ ~]# ip link
...
3: eth0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ether 02:40:88:62:ec:b8 brd ff:ff:ff:ff:ff:ff
altname some-name
[ ~]# ip li show dev some-name
Device "some-name" does not exist.
Remove them from the hash table when device is unlisted
and add back when listed again.
Fixes: 36fbf1e52bd3 ("net: rtnetlink: add linkprop commands to add and delete alternative ifnames")
Reviewed-by: Jiri Pirko <jiri(a)nvidia.com>
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
diff --git a/net/core/dev.c b/net/core/dev.c
index 559705aeefe4..9f3f8930c691 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -381,6 +381,7 @@ static void netdev_name_node_alt_flush(struct net_device *dev)
/* Device list insertion */
static void list_netdevice(struct net_device *dev)
{
+ struct netdev_name_node *name_node;
struct net *net = dev_net(dev);
ASSERT_RTNL();
@@ -391,6 +392,10 @@ static void list_netdevice(struct net_device *dev)
hlist_add_head_rcu(&dev->index_hlist,
dev_index_hash(net, dev->ifindex));
write_unlock(&dev_base_lock);
+
+ netdev_for_each_altname(dev, name_node)
+ netdev_name_node_add(net, name_node);
+
/* We reserved the ifindex, this can't fail */
WARN_ON(xa_store(&net->dev_by_index, dev->ifindex, dev, GFP_KERNEL));
@@ -402,12 +407,16 @@ static void list_netdevice(struct net_device *dev)
*/
static void unlist_netdevice(struct net_device *dev, bool lock)
{
+ struct netdev_name_node *name_node;
struct net *net = dev_net(dev);
ASSERT_RTNL();
xa_erase(&net->dev_by_index, dev->ifindex);
+ netdev_for_each_altname(dev, name_node)
+ netdev_name_node_del(name_node);
+
/* Unlink dev from the device chain */
if (lock)
write_lock(&dev_base_lock);
@@ -10942,7 +10951,6 @@ void unregister_netdevice_many_notify(struct list_head *head,
synchronize_net();
list_for_each_entry(dev, head, unreg_list) {
- struct netdev_name_node *name_node;
struct sk_buff *skb = NULL;
/* Shutdown queueing discipline. */
@@ -10970,9 +10978,6 @@ void unregister_netdevice_many_notify(struct list_head *head,
dev_uc_flush(dev);
dev_mc_flush(dev);
- netdev_for_each_altname(dev, name_node)
- netdev_name_node_del(name_node);
- synchronize_rcu();
netdev_name_node_alt_flush(dev);
netdev_name_node_free(dev->name_node);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 8e15aee621618a3ee3abecaf1fd8c1428098b7ef
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102011-escapable-flattop-efeb@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8e15aee621618a3ee3abecaf1fd8c1428098b7ef Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba(a)kernel.org>
Date: Tue, 17 Oct 2023 18:38:16 -0700
Subject: [PATCH] net: move altnames together with the netdevice
The altname nodes are currently not moved to the new netns
when netdevice itself moves:
[ ~]# ip netns add test
[ ~]# ip -netns test link add name eth0 type dummy
[ ~]# ip -netns test link property add dev eth0 altname some-name
[ ~]# ip -netns test link show dev some-name
2: eth0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ether 1e:67:ed:19:3d:24 brd ff:ff:ff:ff:ff:ff
altname some-name
[ ~]# ip -netns test link set dev eth0 netns 1
[ ~]# ip link
...
3: eth0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ether 02:40:88:62:ec:b8 brd ff:ff:ff:ff:ff:ff
altname some-name
[ ~]# ip li show dev some-name
Device "some-name" does not exist.
Remove them from the hash table when device is unlisted
and add back when listed again.
Fixes: 36fbf1e52bd3 ("net: rtnetlink: add linkprop commands to add and delete alternative ifnames")
Reviewed-by: Jiri Pirko <jiri(a)nvidia.com>
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
diff --git a/net/core/dev.c b/net/core/dev.c
index 559705aeefe4..9f3f8930c691 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -381,6 +381,7 @@ static void netdev_name_node_alt_flush(struct net_device *dev)
/* Device list insertion */
static void list_netdevice(struct net_device *dev)
{
+ struct netdev_name_node *name_node;
struct net *net = dev_net(dev);
ASSERT_RTNL();
@@ -391,6 +392,10 @@ static void list_netdevice(struct net_device *dev)
hlist_add_head_rcu(&dev->index_hlist,
dev_index_hash(net, dev->ifindex));
write_unlock(&dev_base_lock);
+
+ netdev_for_each_altname(dev, name_node)
+ netdev_name_node_add(net, name_node);
+
/* We reserved the ifindex, this can't fail */
WARN_ON(xa_store(&net->dev_by_index, dev->ifindex, dev, GFP_KERNEL));
@@ -402,12 +407,16 @@ static void list_netdevice(struct net_device *dev)
*/
static void unlist_netdevice(struct net_device *dev, bool lock)
{
+ struct netdev_name_node *name_node;
struct net *net = dev_net(dev);
ASSERT_RTNL();
xa_erase(&net->dev_by_index, dev->ifindex);
+ netdev_for_each_altname(dev, name_node)
+ netdev_name_node_del(name_node);
+
/* Unlink dev from the device chain */
if (lock)
write_lock(&dev_base_lock);
@@ -10942,7 +10951,6 @@ void unregister_netdevice_many_notify(struct list_head *head,
synchronize_net();
list_for_each_entry(dev, head, unreg_list) {
- struct netdev_name_node *name_node;
struct sk_buff *skb = NULL;
/* Shutdown queueing discipline. */
@@ -10970,9 +10978,6 @@ void unregister_netdevice_many_notify(struct list_head *head,
dev_uc_flush(dev);
dev_mc_flush(dev);
- netdev_for_each_altname(dev, name_node)
- netdev_name_node_del(name_node);
- synchronize_rcu();
netdev_name_node_alt_flush(dev);
netdev_name_node_free(dev->name_node);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 7663d522099ecc464512164e660bc771b2ff7b64
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102049-endanger-chief-b8ab@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7663d522099ecc464512164e660bc771b2ff7b64 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba(a)kernel.org>
Date: Tue, 17 Oct 2023 18:38:14 -0700
Subject: [PATCH] net: check for altname conflicts when changing netdev's netns
It's currently possible to create an altname conflicting
with an altname or real name of another device by creating
it in another netns and moving it over:
[ ~]$ ip link add dev eth0 type dummy
[ ~]$ ip netns add test
[ ~]$ ip -netns test link add dev ethX netns test type dummy
[ ~]$ ip -netns test link property add dev ethX altname eth0
[ ~]$ ip -netns test link set dev ethX netns 1
[ ~]$ ip link
...
3: eth0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ether 02:40:88:62:ec:b8 brd ff:ff:ff:ff:ff:ff
...
5: ethX: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ether 26:b7:28:78:38:0f brd ff:ff:ff:ff:ff:ff
altname eth0
Create a macro for walking the altnames, this hopefully makes
it clearer that the list we walk contains only altnames.
Which is otherwise not entirely intuitive.
Fixes: 36fbf1e52bd3 ("net: rtnetlink: add linkprop commands to add and delete alternative ifnames")
Reviewed-by: Jiri Pirko <jiri(a)nvidia.com>
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
diff --git a/net/core/dev.c b/net/core/dev.c
index f109ad34d660..ae557193b77c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1086,7 +1086,8 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
for_each_netdev(net, d) {
struct netdev_name_node *name_node;
- list_for_each_entry(name_node, &d->name_node->list, list) {
+
+ netdev_for_each_altname(d, name_node) {
if (!sscanf(name_node->name, name, &i))
continue;
if (i < 0 || i >= max_netdevices)
@@ -11051,6 +11052,7 @@ EXPORT_SYMBOL(unregister_netdev);
int __dev_change_net_namespace(struct net_device *dev, struct net *net,
const char *pat, int new_ifindex)
{
+ struct netdev_name_node *name_node;
struct net *net_old = dev_net(dev);
char new_name[IFNAMSIZ] = {};
int err, new_nsid;
@@ -11083,6 +11085,11 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
if (err < 0)
goto out;
}
+ /* Check that none of the altnames conflicts. */
+ err = -EEXIST;
+ netdev_for_each_altname(dev, name_node)
+ if (netdev_name_in_use(net, name_node->name))
+ goto out;
/* Check that new_ifindex isn't used yet. */
if (new_ifindex) {
diff --git a/net/core/dev.h b/net/core/dev.h
index e075e198092c..fa2e9c5c4122 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -62,6 +62,9 @@ struct netdev_name_node {
int netdev_get_name(struct net *net, char *name, int ifindex);
int dev_change_name(struct net_device *dev, const char *newname);
+#define netdev_for_each_altname(dev, namenode) \
+ list_for_each_entry((namenode), &(dev)->name_node->list, list)
+
int netdev_name_node_alt_create(struct net_device *dev, const char *name);
int netdev_name_node_alt_destroy(struct net_device *dev, const char *name);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 7663d522099ecc464512164e660bc771b2ff7b64
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102047-purplish-sectional-1245@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7663d522099ecc464512164e660bc771b2ff7b64 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba(a)kernel.org>
Date: Tue, 17 Oct 2023 18:38:14 -0700
Subject: [PATCH] net: check for altname conflicts when changing netdev's netns
It's currently possible to create an altname conflicting
with an altname or real name of another device by creating
it in another netns and moving it over:
[ ~]$ ip link add dev eth0 type dummy
[ ~]$ ip netns add test
[ ~]$ ip -netns test link add dev ethX netns test type dummy
[ ~]$ ip -netns test link property add dev ethX altname eth0
[ ~]$ ip -netns test link set dev ethX netns 1
[ ~]$ ip link
...
3: eth0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ether 02:40:88:62:ec:b8 brd ff:ff:ff:ff:ff:ff
...
5: ethX: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ether 26:b7:28:78:38:0f brd ff:ff:ff:ff:ff:ff
altname eth0
Create a macro for walking the altnames, this hopefully makes
it clearer that the list we walk contains only altnames.
Which is otherwise not entirely intuitive.
Fixes: 36fbf1e52bd3 ("net: rtnetlink: add linkprop commands to add and delete alternative ifnames")
Reviewed-by: Jiri Pirko <jiri(a)nvidia.com>
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
diff --git a/net/core/dev.c b/net/core/dev.c
index f109ad34d660..ae557193b77c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1086,7 +1086,8 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
for_each_netdev(net, d) {
struct netdev_name_node *name_node;
- list_for_each_entry(name_node, &d->name_node->list, list) {
+
+ netdev_for_each_altname(d, name_node) {
if (!sscanf(name_node->name, name, &i))
continue;
if (i < 0 || i >= max_netdevices)
@@ -11051,6 +11052,7 @@ EXPORT_SYMBOL(unregister_netdev);
int __dev_change_net_namespace(struct net_device *dev, struct net *net,
const char *pat, int new_ifindex)
{
+ struct netdev_name_node *name_node;
struct net *net_old = dev_net(dev);
char new_name[IFNAMSIZ] = {};
int err, new_nsid;
@@ -11083,6 +11085,11 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
if (err < 0)
goto out;
}
+ /* Check that none of the altnames conflicts. */
+ err = -EEXIST;
+ netdev_for_each_altname(dev, name_node)
+ if (netdev_name_in_use(net, name_node->name))
+ goto out;
/* Check that new_ifindex isn't used yet. */
if (new_ifindex) {
diff --git a/net/core/dev.h b/net/core/dev.h
index e075e198092c..fa2e9c5c4122 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -62,6 +62,9 @@ struct netdev_name_node {
int netdev_get_name(struct net *net, char *name, int ifindex);
int dev_change_name(struct net_device *dev, const char *newname);
+#define netdev_for_each_altname(dev, namenode) \
+ list_for_each_entry((namenode), &(dev)->name_node->list, list)
+
int netdev_name_node_alt_create(struct net_device *dev, const char *name);
int netdev_name_node_alt_destroy(struct net_device *dev, const char *name);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x 311cca40661f428b7aa114fb5af578cfdbe3e8b6
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102028-tartness-disown-ace1@gregkh' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 311cca40661f428b7aa114fb5af578cfdbe3e8b6 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba(a)kernel.org>
Date: Tue, 17 Oct 2023 18:38:13 -0700
Subject: [PATCH] net: fix ifname in netlink ntf during netns move
dev_get_valid_name() overwrites the netdev's name on success.
This makes it hard to use in prepare-commit-like fashion,
where we do validation first, and "commit" to the change
later.
Factor out a helper which lets us save the new name to a buffer.
Use it to fix the problem of notification on netns move having
incorrect name:
5: eth0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN group default
link/ether be:4d:58:f9:d5:40 brd ff:ff:ff:ff:ff:ff
6: eth1: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN group default
link/ether 1e:4a:34:36:e3:cd brd ff:ff:ff:ff:ff:ff
[ ~]# ip link set dev eth0 netns 1 name eth1
ip monitor inside netns:
Deleted inet eth0
Deleted inet6 eth0
Deleted 5: eth1: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN group default
link/ether be:4d:58:f9:d5:40 brd ff:ff:ff:ff:ff:ff new-netnsid 0 new-ifindex 7
Name is reported as eth1 in old netns for ifindex 5, already renamed.
Fixes: d90310243fd7 ("net: device name allocation cleanups")
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Reviewed-by: Jiri Pirko <jiri(a)nvidia.com>
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
diff --git a/net/core/dev.c b/net/core/dev.c
index 5aaf5753d4e4..f109ad34d660 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1123,6 +1123,26 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
return -ENFILE;
}
+static int dev_prep_valid_name(struct net *net, struct net_device *dev,
+ const char *want_name, char *out_name)
+{
+ int ret;
+
+ if (!dev_valid_name(want_name))
+ return -EINVAL;
+
+ if (strchr(want_name, '%')) {
+ ret = __dev_alloc_name(net, want_name, out_name);
+ return ret < 0 ? ret : 0;
+ } else if (netdev_name_in_use(net, want_name)) {
+ return -EEXIST;
+ } else if (out_name != want_name) {
+ strscpy(out_name, want_name, IFNAMSIZ);
+ }
+
+ return 0;
+}
+
static int dev_alloc_name_ns(struct net *net,
struct net_device *dev,
const char *name)
@@ -1160,19 +1180,13 @@ EXPORT_SYMBOL(dev_alloc_name);
static int dev_get_valid_name(struct net *net, struct net_device *dev,
const char *name)
{
- BUG_ON(!net);
-
- if (!dev_valid_name(name))
- return -EINVAL;
-
- if (strchr(name, '%'))
- return dev_alloc_name_ns(net, dev, name);
- else if (netdev_name_in_use(net, name))
- return -EEXIST;
- else if (dev->name != name)
- strscpy(dev->name, name, IFNAMSIZ);
+ char buf[IFNAMSIZ];
+ int ret;
- return 0;
+ ret = dev_prep_valid_name(net, dev, name, buf);
+ if (ret >= 0)
+ strscpy(dev->name, buf, IFNAMSIZ);
+ return ret;
}
/**
@@ -11038,6 +11052,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
const char *pat, int new_ifindex)
{
struct net *net_old = dev_net(dev);
+ char new_name[IFNAMSIZ] = {};
int err, new_nsid;
ASSERT_RTNL();
@@ -11064,7 +11079,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
/* We get here if we can't use the current device name */
if (!pat)
goto out;
- err = dev_get_valid_name(net, dev, pat);
+ err = dev_prep_valid_name(net, dev, pat, new_name);
if (err < 0)
goto out;
}
@@ -11135,6 +11150,9 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
netdev_adjacent_add_links(dev);
+ if (new_name[0]) /* Rename the netdev to prepared name */
+ strscpy(dev->name, new_name, IFNAMSIZ);
+
/* Fixup kobjects */
err = device_rename(&dev->dev, dev->name);
WARN_ON(err);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 311cca40661f428b7aa114fb5af578cfdbe3e8b6
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102027-writing-grab-f97c@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 311cca40661f428b7aa114fb5af578cfdbe3e8b6 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba(a)kernel.org>
Date: Tue, 17 Oct 2023 18:38:13 -0700
Subject: [PATCH] net: fix ifname in netlink ntf during netns move
dev_get_valid_name() overwrites the netdev's name on success.
This makes it hard to use in prepare-commit-like fashion,
where we do validation first, and "commit" to the change
later.
Factor out a helper which lets us save the new name to a buffer.
Use it to fix the problem of notification on netns move having
incorrect name:
5: eth0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN group default
link/ether be:4d:58:f9:d5:40 brd ff:ff:ff:ff:ff:ff
6: eth1: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN group default
link/ether 1e:4a:34:36:e3:cd brd ff:ff:ff:ff:ff:ff
[ ~]# ip link set dev eth0 netns 1 name eth1
ip monitor inside netns:
Deleted inet eth0
Deleted inet6 eth0
Deleted 5: eth1: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN group default
link/ether be:4d:58:f9:d5:40 brd ff:ff:ff:ff:ff:ff new-netnsid 0 new-ifindex 7
Name is reported as eth1 in old netns for ifindex 5, already renamed.
Fixes: d90310243fd7 ("net: device name allocation cleanups")
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Reviewed-by: Jiri Pirko <jiri(a)nvidia.com>
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
diff --git a/net/core/dev.c b/net/core/dev.c
index 5aaf5753d4e4..f109ad34d660 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1123,6 +1123,26 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
return -ENFILE;
}
+static int dev_prep_valid_name(struct net *net, struct net_device *dev,
+ const char *want_name, char *out_name)
+{
+ int ret;
+
+ if (!dev_valid_name(want_name))
+ return -EINVAL;
+
+ if (strchr(want_name, '%')) {
+ ret = __dev_alloc_name(net, want_name, out_name);
+ return ret < 0 ? ret : 0;
+ } else if (netdev_name_in_use(net, want_name)) {
+ return -EEXIST;
+ } else if (out_name != want_name) {
+ strscpy(out_name, want_name, IFNAMSIZ);
+ }
+
+ return 0;
+}
+
static int dev_alloc_name_ns(struct net *net,
struct net_device *dev,
const char *name)
@@ -1160,19 +1180,13 @@ EXPORT_SYMBOL(dev_alloc_name);
static int dev_get_valid_name(struct net *net, struct net_device *dev,
const char *name)
{
- BUG_ON(!net);
-
- if (!dev_valid_name(name))
- return -EINVAL;
-
- if (strchr(name, '%'))
- return dev_alloc_name_ns(net, dev, name);
- else if (netdev_name_in_use(net, name))
- return -EEXIST;
- else if (dev->name != name)
- strscpy(dev->name, name, IFNAMSIZ);
+ char buf[IFNAMSIZ];
+ int ret;
- return 0;
+ ret = dev_prep_valid_name(net, dev, name, buf);
+ if (ret >= 0)
+ strscpy(dev->name, buf, IFNAMSIZ);
+ return ret;
}
/**
@@ -11038,6 +11052,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
const char *pat, int new_ifindex)
{
struct net *net_old = dev_net(dev);
+ char new_name[IFNAMSIZ] = {};
int err, new_nsid;
ASSERT_RTNL();
@@ -11064,7 +11079,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
/* We get here if we can't use the current device name */
if (!pat)
goto out;
- err = dev_get_valid_name(net, dev, pat);
+ err = dev_prep_valid_name(net, dev, pat, new_name);
if (err < 0)
goto out;
}
@@ -11135,6 +11150,9 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
netdev_adjacent_add_links(dev);
+ if (new_name[0]) /* Rename the netdev to prepared name */
+ strscpy(dev->name, new_name, IFNAMSIZ);
+
/* Fixup kobjects */
err = device_rename(&dev->dev, dev->name);
WARN_ON(err);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 311cca40661f428b7aa114fb5af578cfdbe3e8b6
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102026-cruelly-untaken-8b7c@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 311cca40661f428b7aa114fb5af578cfdbe3e8b6 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba(a)kernel.org>
Date: Tue, 17 Oct 2023 18:38:13 -0700
Subject: [PATCH] net: fix ifname in netlink ntf during netns move
dev_get_valid_name() overwrites the netdev's name on success.
This makes it hard to use in prepare-commit-like fashion,
where we do validation first, and "commit" to the change
later.
Factor out a helper which lets us save the new name to a buffer.
Use it to fix the problem of notification on netns move having
incorrect name:
5: eth0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN group default
link/ether be:4d:58:f9:d5:40 brd ff:ff:ff:ff:ff:ff
6: eth1: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN group default
link/ether 1e:4a:34:36:e3:cd brd ff:ff:ff:ff:ff:ff
[ ~]# ip link set dev eth0 netns 1 name eth1
ip monitor inside netns:
Deleted inet eth0
Deleted inet6 eth0
Deleted 5: eth1: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN group default
link/ether be:4d:58:f9:d5:40 brd ff:ff:ff:ff:ff:ff new-netnsid 0 new-ifindex 7
Name is reported as eth1 in old netns for ifindex 5, already renamed.
Fixes: d90310243fd7 ("net: device name allocation cleanups")
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Reviewed-by: Jiri Pirko <jiri(a)nvidia.com>
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
diff --git a/net/core/dev.c b/net/core/dev.c
index 5aaf5753d4e4..f109ad34d660 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1123,6 +1123,26 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
return -ENFILE;
}
+static int dev_prep_valid_name(struct net *net, struct net_device *dev,
+ const char *want_name, char *out_name)
+{
+ int ret;
+
+ if (!dev_valid_name(want_name))
+ return -EINVAL;
+
+ if (strchr(want_name, '%')) {
+ ret = __dev_alloc_name(net, want_name, out_name);
+ return ret < 0 ? ret : 0;
+ } else if (netdev_name_in_use(net, want_name)) {
+ return -EEXIST;
+ } else if (out_name != want_name) {
+ strscpy(out_name, want_name, IFNAMSIZ);
+ }
+
+ return 0;
+}
+
static int dev_alloc_name_ns(struct net *net,
struct net_device *dev,
const char *name)
@@ -1160,19 +1180,13 @@ EXPORT_SYMBOL(dev_alloc_name);
static int dev_get_valid_name(struct net *net, struct net_device *dev,
const char *name)
{
- BUG_ON(!net);
-
- if (!dev_valid_name(name))
- return -EINVAL;
-
- if (strchr(name, '%'))
- return dev_alloc_name_ns(net, dev, name);
- else if (netdev_name_in_use(net, name))
- return -EEXIST;
- else if (dev->name != name)
- strscpy(dev->name, name, IFNAMSIZ);
+ char buf[IFNAMSIZ];
+ int ret;
- return 0;
+ ret = dev_prep_valid_name(net, dev, name, buf);
+ if (ret >= 0)
+ strscpy(dev->name, buf, IFNAMSIZ);
+ return ret;
}
/**
@@ -11038,6 +11052,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
const char *pat, int new_ifindex)
{
struct net *net_old = dev_net(dev);
+ char new_name[IFNAMSIZ] = {};
int err, new_nsid;
ASSERT_RTNL();
@@ -11064,7 +11079,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
/* We get here if we can't use the current device name */
if (!pat)
goto out;
- err = dev_get_valid_name(net, dev, pat);
+ err = dev_prep_valid_name(net, dev, pat, new_name);
if (err < 0)
goto out;
}
@@ -11135,6 +11150,9 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
netdev_adjacent_add_links(dev);
+ if (new_name[0]) /* Rename the netdev to prepared name */
+ strscpy(dev->name, new_name, IFNAMSIZ);
+
/* Fixup kobjects */
err = device_rename(&dev->dev, dev->name);
WARN_ON(err);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 311cca40661f428b7aa114fb5af578cfdbe3e8b6
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102025-divisibly-deck-9f5a@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 311cca40661f428b7aa114fb5af578cfdbe3e8b6 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba(a)kernel.org>
Date: Tue, 17 Oct 2023 18:38:13 -0700
Subject: [PATCH] net: fix ifname in netlink ntf during netns move
dev_get_valid_name() overwrites the netdev's name on success.
This makes it hard to use in prepare-commit-like fashion,
where we do validation first, and "commit" to the change
later.
Factor out a helper which lets us save the new name to a buffer.
Use it to fix the problem of notification on netns move having
incorrect name:
5: eth0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN group default
link/ether be:4d:58:f9:d5:40 brd ff:ff:ff:ff:ff:ff
6: eth1: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN group default
link/ether 1e:4a:34:36:e3:cd brd ff:ff:ff:ff:ff:ff
[ ~]# ip link set dev eth0 netns 1 name eth1
ip monitor inside netns:
Deleted inet eth0
Deleted inet6 eth0
Deleted 5: eth1: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN group default
link/ether be:4d:58:f9:d5:40 brd ff:ff:ff:ff:ff:ff new-netnsid 0 new-ifindex 7
Name is reported as eth1 in old netns for ifindex 5, already renamed.
Fixes: d90310243fd7 ("net: device name allocation cleanups")
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Reviewed-by: Jiri Pirko <jiri(a)nvidia.com>
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
diff --git a/net/core/dev.c b/net/core/dev.c
index 5aaf5753d4e4..f109ad34d660 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1123,6 +1123,26 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
return -ENFILE;
}
+static int dev_prep_valid_name(struct net *net, struct net_device *dev,
+ const char *want_name, char *out_name)
+{
+ int ret;
+
+ if (!dev_valid_name(want_name))
+ return -EINVAL;
+
+ if (strchr(want_name, '%')) {
+ ret = __dev_alloc_name(net, want_name, out_name);
+ return ret < 0 ? ret : 0;
+ } else if (netdev_name_in_use(net, want_name)) {
+ return -EEXIST;
+ } else if (out_name != want_name) {
+ strscpy(out_name, want_name, IFNAMSIZ);
+ }
+
+ return 0;
+}
+
static int dev_alloc_name_ns(struct net *net,
struct net_device *dev,
const char *name)
@@ -1160,19 +1180,13 @@ EXPORT_SYMBOL(dev_alloc_name);
static int dev_get_valid_name(struct net *net, struct net_device *dev,
const char *name)
{
- BUG_ON(!net);
-
- if (!dev_valid_name(name))
- return -EINVAL;
-
- if (strchr(name, '%'))
- return dev_alloc_name_ns(net, dev, name);
- else if (netdev_name_in_use(net, name))
- return -EEXIST;
- else if (dev->name != name)
- strscpy(dev->name, name, IFNAMSIZ);
+ char buf[IFNAMSIZ];
+ int ret;
- return 0;
+ ret = dev_prep_valid_name(net, dev, name, buf);
+ if (ret >= 0)
+ strscpy(dev->name, buf, IFNAMSIZ);
+ return ret;
}
/**
@@ -11038,6 +11052,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
const char *pat, int new_ifindex)
{
struct net *net_old = dev_net(dev);
+ char new_name[IFNAMSIZ] = {};
int err, new_nsid;
ASSERT_RTNL();
@@ -11064,7 +11079,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
/* We get here if we can't use the current device name */
if (!pat)
goto out;
- err = dev_get_valid_name(net, dev, pat);
+ err = dev_prep_valid_name(net, dev, pat, new_name);
if (err < 0)
goto out;
}
@@ -11135,6 +11150,9 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
netdev_adjacent_add_links(dev);
+ if (new_name[0]) /* Rename the netdev to prepared name */
+ strscpy(dev->name, new_name, IFNAMSIZ);
+
/* Fixup kobjects */
err = device_rename(&dev->dev, dev->name);
WARN_ON(err);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 311cca40661f428b7aa114fb5af578cfdbe3e8b6
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102024-blame-romp-1612@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 311cca40661f428b7aa114fb5af578cfdbe3e8b6 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba(a)kernel.org>
Date: Tue, 17 Oct 2023 18:38:13 -0700
Subject: [PATCH] net: fix ifname in netlink ntf during netns move
dev_get_valid_name() overwrites the netdev's name on success.
This makes it hard to use in prepare-commit-like fashion,
where we do validation first, and "commit" to the change
later.
Factor out a helper which lets us save the new name to a buffer.
Use it to fix the problem of notification on netns move having
incorrect name:
5: eth0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN group default
link/ether be:4d:58:f9:d5:40 brd ff:ff:ff:ff:ff:ff
6: eth1: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN group default
link/ether 1e:4a:34:36:e3:cd brd ff:ff:ff:ff:ff:ff
[ ~]# ip link set dev eth0 netns 1 name eth1
ip monitor inside netns:
Deleted inet eth0
Deleted inet6 eth0
Deleted 5: eth1: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN group default
link/ether be:4d:58:f9:d5:40 brd ff:ff:ff:ff:ff:ff new-netnsid 0 new-ifindex 7
Name is reported as eth1 in old netns for ifindex 5, already renamed.
Fixes: d90310243fd7 ("net: device name allocation cleanups")
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Reviewed-by: Jiri Pirko <jiri(a)nvidia.com>
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
diff --git a/net/core/dev.c b/net/core/dev.c
index 5aaf5753d4e4..f109ad34d660 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1123,6 +1123,26 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
return -ENFILE;
}
+static int dev_prep_valid_name(struct net *net, struct net_device *dev,
+ const char *want_name, char *out_name)
+{
+ int ret;
+
+ if (!dev_valid_name(want_name))
+ return -EINVAL;
+
+ if (strchr(want_name, '%')) {
+ ret = __dev_alloc_name(net, want_name, out_name);
+ return ret < 0 ? ret : 0;
+ } else if (netdev_name_in_use(net, want_name)) {
+ return -EEXIST;
+ } else if (out_name != want_name) {
+ strscpy(out_name, want_name, IFNAMSIZ);
+ }
+
+ return 0;
+}
+
static int dev_alloc_name_ns(struct net *net,
struct net_device *dev,
const char *name)
@@ -1160,19 +1180,13 @@ EXPORT_SYMBOL(dev_alloc_name);
static int dev_get_valid_name(struct net *net, struct net_device *dev,
const char *name)
{
- BUG_ON(!net);
-
- if (!dev_valid_name(name))
- return -EINVAL;
-
- if (strchr(name, '%'))
- return dev_alloc_name_ns(net, dev, name);
- else if (netdev_name_in_use(net, name))
- return -EEXIST;
- else if (dev->name != name)
- strscpy(dev->name, name, IFNAMSIZ);
+ char buf[IFNAMSIZ];
+ int ret;
- return 0;
+ ret = dev_prep_valid_name(net, dev, name, buf);
+ if (ret >= 0)
+ strscpy(dev->name, buf, IFNAMSIZ);
+ return ret;
}
/**
@@ -11038,6 +11052,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
const char *pat, int new_ifindex)
{
struct net *net_old = dev_net(dev);
+ char new_name[IFNAMSIZ] = {};
int err, new_nsid;
ASSERT_RTNL();
@@ -11064,7 +11079,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
/* We get here if we can't use the current device name */
if (!pat)
goto out;
- err = dev_get_valid_name(net, dev, pat);
+ err = dev_prep_valid_name(net, dev, pat, new_name);
if (err < 0)
goto out;
}
@@ -11135,6 +11150,9 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
netdev_adjacent_add_links(dev);
+ if (new_name[0]) /* Rename the netdev to prepared name */
+ strscpy(dev->name, new_name, IFNAMSIZ);
+
/* Fixup kobjects */
err = device_rename(&dev->dev, dev->name);
WARN_ON(err);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x a13b67c9a015c4e21601ef9aa4ec9c5d972df1b4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102023-poster-opulently-4c96@gregkh' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a13b67c9a015c4e21601ef9aa4ec9c5d972df1b4 Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela(a)mojatatu.com>
Date: Tue, 17 Oct 2023 11:36:02 -0300
Subject: [PATCH] net/sched: sch_hfsc: upgrade 'rt' to 'sc' when it becomes a
inner curve
Christian Theune says:
I upgraded from 6.1.38 to 6.1.55 this morning and it broke my traffic shaping script,
leaving me with a non-functional uplink on a remote router.
A 'rt' curve cannot be used as a inner curve (parent class), but we were
allowing such configurations since the qdisc was introduced. Such
configurations would trigger a UAF as Budimir explains:
The parent will have vttree_insert() called on it in init_vf(),
but will not have vttree_remove() called on it in update_vf()
because it does not have the HFSC_FSC flag set.
The qdisc always assumes that inner classes have the HFSC_FSC flag set.
This is by design as it doesn't make sense 'qdisc wise' for an 'rt'
curve to be an inner curve.
Budimir's original patch disallows users to add classes with a 'rt'
parent, but this is too strict as it breaks users that have been using
'rt' as a inner class. Another approach, taken by this patch, is to
upgrade the inner 'rt' into a 'sc', warning the user in the process.
It avoids the UAF reported by Budimir while also being more permissive
to bad scripts/users/code using 'rt' as a inner class.
Users checking the `tc class ls [...]` or `tc class get [...]` dumps would
observe the curve change and are potentially breaking with this change.
v1->v2: https://lore.kernel.org/all/20231013151057.2611860-1-pctammela@mojatatu.com/
- Correct 'Fixes' tag and merge with revert (Jakub)
Cc: Christian Theune <ct(a)flyingcircus.io>
Cc: Budimir Markovic <markovicbudimir(a)gmail.com>
Fixes: b3d26c5702c7 ("net/sched: sch_hfsc: Ensure inner classes have fsc curve")
Signed-off-by: Pedro Tammela <pctammela(a)mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs(a)mojatatu.com>
Link: https://lore.kernel.org/r/20231017143602.3191556-1-pctammela@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 3554085bc2be..880c5f16b29c 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -902,6 +902,14 @@ hfsc_change_usc(struct hfsc_class *cl, struct tc_service_curve *usc,
cl->cl_flags |= HFSC_USC;
}
+static void
+hfsc_upgrade_rt(struct hfsc_class *cl)
+{
+ cl->cl_fsc = cl->cl_rsc;
+ rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total);
+ cl->cl_flags |= HFSC_FSC;
+}
+
static const struct nla_policy hfsc_policy[TCA_HFSC_MAX + 1] = {
[TCA_HFSC_RSC] = { .len = sizeof(struct tc_service_curve) },
[TCA_HFSC_FSC] = { .len = sizeof(struct tc_service_curve) },
@@ -1011,10 +1019,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (parent == NULL)
return -ENOENT;
}
- if (!(parent->cl_flags & HFSC_FSC) && parent != &q->root) {
- NL_SET_ERR_MSG(extack, "Invalid parent - parent class must have FSC");
- return -EINVAL;
- }
if (classid == 0 || TC_H_MAJ(classid ^ sch->handle) != 0)
return -EINVAL;
@@ -1065,6 +1069,12 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
cl->cf_tree = RB_ROOT;
sch_tree_lock(sch);
+ /* Check if the inner class is a misconfigured 'rt' */
+ if (!(parent->cl_flags & HFSC_FSC) && parent != &q->root) {
+ NL_SET_ERR_MSG(extack,
+ "Forced curve change on parent 'rt' to 'sc'");
+ hfsc_upgrade_rt(parent);
+ }
qdisc_class_hash_insert(&q->clhash, &cl->cl_common);
list_add_tail(&cl->siblings, &parent->children);
if (parent->level == 0)
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x 61b40cefe51af005c72dbdcf975a3d166c6e6406
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102042-clinic-pruning-a3ca@gregkh' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 61b40cefe51af005c72dbdcf975a3d166c6e6406 Mon Sep 17 00:00:00 2001
From: Jinjie Ruan <ruanjinjie(a)huawei.com>
Date: Wed, 11 Oct 2023 11:24:19 +0800
Subject: [PATCH] net: dsa: bcm_sf2: Fix possible memory leak in
bcm_sf2_mdio_register()
In bcm_sf2_mdio_register(), the class_find_device() will call get_device()
to increment reference count for priv->master_mii_bus->dev if
of_mdio_find_bus() succeeds. If mdiobus_alloc() or mdiobus_register()
fails, it will call get_device() twice without decrement reference count
for the device. And it is the same if bcm_sf2_mdio_register() succeeds but
fails in bcm_sf2_sw_probe(), or if bcm_sf2_sw_probe() succeeds. If the
reference count has not decremented to zero, the dev related resource will
not be freed.
So remove the get_device() in bcm_sf2_mdio_register(), and call
put_device() if mdiobus_alloc() or mdiobus_register() fails and in
bcm_sf2_mdio_unregister() to solve the issue.
And as Simon suggested, unwind from errors for bcm_sf2_mdio_register() and
just return 0 if it succeeds to make it cleaner.
Fixes: 461cd1b03e32 ("net: dsa: bcm_sf2: Register our slave MDIO bus")
Signed-off-by: Jinjie Ruan <ruanjinjie(a)huawei.com>
Suggested-by: Simon Horman <horms(a)kernel.org>
Reviewed-by: Simon Horman <horms(a)kernel.org>
Reviewed-by: Florian Fainelli <florian.fainelli(a)broadcom.com>
Link: https://lore.kernel.org/r/20231011032419.2423290-1-ruanjinjie@huawei.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 72374b066f64..cd1f240c90f3 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -617,17 +617,16 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds)
dn = of_find_compatible_node(NULL, NULL, "brcm,unimac-mdio");
priv->master_mii_bus = of_mdio_find_bus(dn);
if (!priv->master_mii_bus) {
- of_node_put(dn);
- return -EPROBE_DEFER;
+ err = -EPROBE_DEFER;
+ goto err_of_node_put;
}
- get_device(&priv->master_mii_bus->dev);
priv->master_mii_dn = dn;
priv->slave_mii_bus = mdiobus_alloc();
if (!priv->slave_mii_bus) {
- of_node_put(dn);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto err_put_master_mii_bus_dev;
}
priv->slave_mii_bus->priv = priv;
@@ -684,11 +683,17 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds)
}
err = mdiobus_register(priv->slave_mii_bus);
- if (err && dn) {
- mdiobus_free(priv->slave_mii_bus);
- of_node_put(dn);
- }
+ if (err && dn)
+ goto err_free_slave_mii_bus;
+ return 0;
+
+err_free_slave_mii_bus:
+ mdiobus_free(priv->slave_mii_bus);
+err_put_master_mii_bus_dev:
+ put_device(&priv->master_mii_bus->dev);
+err_of_node_put:
+ of_node_put(dn);
return err;
}
@@ -696,6 +701,7 @@ static void bcm_sf2_mdio_unregister(struct bcm_sf2_priv *priv)
{
mdiobus_unregister(priv->slave_mii_bus);
mdiobus_free(priv->slave_mii_bus);
+ put_device(&priv->master_mii_bus->dev);
of_node_put(priv->master_mii_dn);
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 61b40cefe51af005c72dbdcf975a3d166c6e6406
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102041-speculate-spew-1d42@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 61b40cefe51af005c72dbdcf975a3d166c6e6406 Mon Sep 17 00:00:00 2001
From: Jinjie Ruan <ruanjinjie(a)huawei.com>
Date: Wed, 11 Oct 2023 11:24:19 +0800
Subject: [PATCH] net: dsa: bcm_sf2: Fix possible memory leak in
bcm_sf2_mdio_register()
In bcm_sf2_mdio_register(), the class_find_device() will call get_device()
to increment reference count for priv->master_mii_bus->dev if
of_mdio_find_bus() succeeds. If mdiobus_alloc() or mdiobus_register()
fails, it will call get_device() twice without decrement reference count
for the device. And it is the same if bcm_sf2_mdio_register() succeeds but
fails in bcm_sf2_sw_probe(), or if bcm_sf2_sw_probe() succeeds. If the
reference count has not decremented to zero, the dev related resource will
not be freed.
So remove the get_device() in bcm_sf2_mdio_register(), and call
put_device() if mdiobus_alloc() or mdiobus_register() fails and in
bcm_sf2_mdio_unregister() to solve the issue.
And as Simon suggested, unwind from errors for bcm_sf2_mdio_register() and
just return 0 if it succeeds to make it cleaner.
Fixes: 461cd1b03e32 ("net: dsa: bcm_sf2: Register our slave MDIO bus")
Signed-off-by: Jinjie Ruan <ruanjinjie(a)huawei.com>
Suggested-by: Simon Horman <horms(a)kernel.org>
Reviewed-by: Simon Horman <horms(a)kernel.org>
Reviewed-by: Florian Fainelli <florian.fainelli(a)broadcom.com>
Link: https://lore.kernel.org/r/20231011032419.2423290-1-ruanjinjie@huawei.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 72374b066f64..cd1f240c90f3 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -617,17 +617,16 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds)
dn = of_find_compatible_node(NULL, NULL, "brcm,unimac-mdio");
priv->master_mii_bus = of_mdio_find_bus(dn);
if (!priv->master_mii_bus) {
- of_node_put(dn);
- return -EPROBE_DEFER;
+ err = -EPROBE_DEFER;
+ goto err_of_node_put;
}
- get_device(&priv->master_mii_bus->dev);
priv->master_mii_dn = dn;
priv->slave_mii_bus = mdiobus_alloc();
if (!priv->slave_mii_bus) {
- of_node_put(dn);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto err_put_master_mii_bus_dev;
}
priv->slave_mii_bus->priv = priv;
@@ -684,11 +683,17 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds)
}
err = mdiobus_register(priv->slave_mii_bus);
- if (err && dn) {
- mdiobus_free(priv->slave_mii_bus);
- of_node_put(dn);
- }
+ if (err && dn)
+ goto err_free_slave_mii_bus;
+ return 0;
+
+err_free_slave_mii_bus:
+ mdiobus_free(priv->slave_mii_bus);
+err_put_master_mii_bus_dev:
+ put_device(&priv->master_mii_bus->dev);
+err_of_node_put:
+ of_node_put(dn);
return err;
}
@@ -696,6 +701,7 @@ static void bcm_sf2_mdio_unregister(struct bcm_sf2_priv *priv)
{
mdiobus_unregister(priv->slave_mii_bus);
mdiobus_free(priv->slave_mii_bus);
+ put_device(&priv->master_mii_bus->dev);
of_node_put(priv->master_mii_dn);
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 61b40cefe51af005c72dbdcf975a3d166c6e6406
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102039-chance-unseen-916d@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 61b40cefe51af005c72dbdcf975a3d166c6e6406 Mon Sep 17 00:00:00 2001
From: Jinjie Ruan <ruanjinjie(a)huawei.com>
Date: Wed, 11 Oct 2023 11:24:19 +0800
Subject: [PATCH] net: dsa: bcm_sf2: Fix possible memory leak in
bcm_sf2_mdio_register()
In bcm_sf2_mdio_register(), the class_find_device() will call get_device()
to increment reference count for priv->master_mii_bus->dev if
of_mdio_find_bus() succeeds. If mdiobus_alloc() or mdiobus_register()
fails, it will call get_device() twice without decrement reference count
for the device. And it is the same if bcm_sf2_mdio_register() succeeds but
fails in bcm_sf2_sw_probe(), or if bcm_sf2_sw_probe() succeeds. If the
reference count has not decremented to zero, the dev related resource will
not be freed.
So remove the get_device() in bcm_sf2_mdio_register(), and call
put_device() if mdiobus_alloc() or mdiobus_register() fails and in
bcm_sf2_mdio_unregister() to solve the issue.
And as Simon suggested, unwind from errors for bcm_sf2_mdio_register() and
just return 0 if it succeeds to make it cleaner.
Fixes: 461cd1b03e32 ("net: dsa: bcm_sf2: Register our slave MDIO bus")
Signed-off-by: Jinjie Ruan <ruanjinjie(a)huawei.com>
Suggested-by: Simon Horman <horms(a)kernel.org>
Reviewed-by: Simon Horman <horms(a)kernel.org>
Reviewed-by: Florian Fainelli <florian.fainelli(a)broadcom.com>
Link: https://lore.kernel.org/r/20231011032419.2423290-1-ruanjinjie@huawei.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 72374b066f64..cd1f240c90f3 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -617,17 +617,16 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds)
dn = of_find_compatible_node(NULL, NULL, "brcm,unimac-mdio");
priv->master_mii_bus = of_mdio_find_bus(dn);
if (!priv->master_mii_bus) {
- of_node_put(dn);
- return -EPROBE_DEFER;
+ err = -EPROBE_DEFER;
+ goto err_of_node_put;
}
- get_device(&priv->master_mii_bus->dev);
priv->master_mii_dn = dn;
priv->slave_mii_bus = mdiobus_alloc();
if (!priv->slave_mii_bus) {
- of_node_put(dn);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto err_put_master_mii_bus_dev;
}
priv->slave_mii_bus->priv = priv;
@@ -684,11 +683,17 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds)
}
err = mdiobus_register(priv->slave_mii_bus);
- if (err && dn) {
- mdiobus_free(priv->slave_mii_bus);
- of_node_put(dn);
- }
+ if (err && dn)
+ goto err_free_slave_mii_bus;
+ return 0;
+
+err_free_slave_mii_bus:
+ mdiobus_free(priv->slave_mii_bus);
+err_put_master_mii_bus_dev:
+ put_device(&priv->master_mii_bus->dev);
+err_of_node_put:
+ of_node_put(dn);
return err;
}
@@ -696,6 +701,7 @@ static void bcm_sf2_mdio_unregister(struct bcm_sf2_priv *priv)
{
mdiobus_unregister(priv->slave_mii_bus);
mdiobus_free(priv->slave_mii_bus);
+ put_device(&priv->master_mii_bus->dev);
of_node_put(priv->master_mii_dn);
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x 1c2709cfff1dedbb9591e989e2f001484208d914
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102017-pedometer-skier-c67d@gregkh' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1c2709cfff1dedbb9591e989e2f001484208d914 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell(a)google.com>
Date: Sun, 15 Oct 2023 13:47:00 -0400
Subject: [PATCH] tcp: fix excessive TLP and RACK timeouts from HZ rounding
We discovered from packet traces of slow loss recovery on kernels with
the default HZ=250 setting (and min_rtt < 1ms) that after reordering,
when receiving a SACKed sequence range, the RACK reordering timer was
firing after about 16ms rather than the desired value of roughly
min_rtt/4 + 2ms. The problem is largely due to the RACK reorder timer
calculation adding in TCP_TIMEOUT_MIN, which is 2 jiffies. On kernels
with HZ=250, this is 2*4ms = 8ms. The TLP timer calculation has the
exact same issue.
This commit fixes the TLP transmit timer and RACK reordering timer
floor calculation to more closely match the intended 2ms floor even on
kernels with HZ=250. It does this by adding in a new
TCP_TIMEOUT_MIN_US floor of 2000 us and then converting to jiffies,
instead of the current approach of converting to jiffies and then
adding th TCP_TIMEOUT_MIN value of 2 jiffies.
Our testing has verified that on kernels with HZ=1000, as expected,
this does not produce significant changes in behavior, but on kernels
with the default HZ=250 the latency improvement can be large. For
example, our tests show that for HZ=250 kernels at low RTTs this fix
roughly halves the latency for the RACK reorder timer: instead of
mostly firing at 16ms it mostly fires at 8ms.
Suggested-by: Eric Dumazet <edumazet(a)google.com>
Signed-off-by: Neal Cardwell <ncardwell(a)google.com>
Signed-off-by: Yuchung Cheng <ycheng(a)google.com>
Fixes: bb4d991a28cc ("tcp: adjust tail loss probe timeout")
Reviewed-by: Eric Dumazet <edumazet(a)google.com>
Link: https://lore.kernel.org/r/20231015174700.2206872-1-ncardwell.sw@gmail.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 7b1a720691ae..4b03ca7cb8a5 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -141,6 +141,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCP_RTO_MAX ((unsigned)(120*HZ))
#define TCP_RTO_MIN ((unsigned)(HZ/5))
#define TCP_TIMEOUT_MIN (2U) /* Min timeout for TCP timers in jiffies */
+
+#define TCP_TIMEOUT_MIN_US (2*USEC_PER_MSEC) /* Min TCP timeout in microsecs */
+
#define TCP_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */
#define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value, now
* used as a fallback RTO for the
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9c8c42c280b7..bbd85672fda7 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2788,7 +2788,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
- u32 timeout, rto_delta_us;
+ u32 timeout, timeout_us, rto_delta_us;
int early_retrans;
/* Don't do any loss probe on a Fast Open connection before 3WHS
@@ -2812,11 +2812,12 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
* sample is available then probe after TCP_TIMEOUT_INIT.
*/
if (tp->srtt_us) {
- timeout = usecs_to_jiffies(tp->srtt_us >> 2);
+ timeout_us = tp->srtt_us >> 2;
if (tp->packets_out == 1)
- timeout += TCP_RTO_MIN;
+ timeout_us += tcp_rto_min_us(sk);
else
- timeout += TCP_TIMEOUT_MIN;
+ timeout_us += TCP_TIMEOUT_MIN_US;
+ timeout = usecs_to_jiffies(timeout_us);
} else {
timeout = TCP_TIMEOUT_INIT;
}
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index acf4869c5d3b..bba10110fbbc 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -104,7 +104,7 @@ bool tcp_rack_mark_lost(struct sock *sk)
tp->rack.advanced = 0;
tcp_rack_detect_loss(sk, &timeout);
if (timeout) {
- timeout = usecs_to_jiffies(timeout) + TCP_TIMEOUT_MIN;
+ timeout = usecs_to_jiffies(timeout + TCP_TIMEOUT_MIN_US);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT,
timeout, inet_csk(sk)->icsk_rto);
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 700b2b439766e8aab8a7174991198497345bd411
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102023-rubble-eggshell-ae24@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 700b2b439766e8aab8a7174991198497345bd411 Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat(a)kernel.org>
Date: Tue, 17 Oct 2023 08:49:45 +0900
Subject: [PATCH] fprobe: Fix to ensure the number of active retprobes is not
zero
The number of active retprobes can be zero but it is not acceptable,
so return EINVAL error if detected.
Link: https://lore.kernel.org/all/169750018550.186853.11198884812017796410.stgit@…
Reported-by: wuqiang.matt <wuqiang.matt(a)bytedance.com>
Closes: https://lore.kernel.org/all/20231016222103.cb9f426edc60220eabd8aa6a@kernel.…
Fixes: 5b0ab78998e3 ("fprobe: Add exit_handler support")
Signed-off-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 3b21f4063258..881f90f0cbcf 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -189,7 +189,7 @@ static int fprobe_init_rethook(struct fprobe *fp, int num)
{
int i, size;
- if (num < 0)
+ if (num <= 0)
return -EINVAL;
if (!fp->exit_handler) {
@@ -202,8 +202,8 @@ static int fprobe_init_rethook(struct fprobe *fp, int num)
size = fp->nr_maxactive;
else
size = num * num_possible_cpus() * 2;
- if (size < 0)
- return -E2BIG;
+ if (size <= 0)
+ return -EINVAL;
fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler);
if (!fp->rethook)
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 6d41d4fe28724db16ca1016df0713a07e0cc7448
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102006-detector-tweed-1a0c@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6d41d4fe28724db16ca1016df0713a07e0cc7448 Mon Sep 17 00:00:00 2001
From: Dong Chenchen <dongchenchen2(a)huawei.com>
Date: Tue, 15 Aug 2023 22:18:34 +0800
Subject: [PATCH] net: xfrm: skip policies marked as dead while reinserting
policies
BUG: KASAN: slab-use-after-free in xfrm_policy_inexact_list_reinsert+0xb6/0x430
Read of size 1 at addr ffff8881051f3bf8 by task ip/668
CPU: 2 PID: 668 Comm: ip Not tainted 6.5.0-rc5-00182-g25aa0bebba72-dirty #64
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13 04/01/2014
Call Trace:
<TASK>
dump_stack_lvl+0x72/0xa0
print_report+0xd0/0x620
kasan_report+0xb6/0xf0
xfrm_policy_inexact_list_reinsert+0xb6/0x430
xfrm_policy_inexact_insert_node.constprop.0+0x537/0x800
xfrm_policy_inexact_alloc_chain+0x23f/0x320
xfrm_policy_inexact_insert+0x6b/0x590
xfrm_policy_insert+0x3b1/0x480
xfrm_add_policy+0x23c/0x3c0
xfrm_user_rcv_msg+0x2d0/0x510
netlink_rcv_skb+0x10d/0x2d0
xfrm_netlink_rcv+0x49/0x60
netlink_unicast+0x3fe/0x540
netlink_sendmsg+0x528/0x970
sock_sendmsg+0x14a/0x160
____sys_sendmsg+0x4fc/0x580
___sys_sendmsg+0xef/0x160
__sys_sendmsg+0xf7/0x1b0
do_syscall_64+0x3f/0x90
entry_SYSCALL_64_after_hwframe+0x73/0xdd
The root cause is:
cpu 0 cpu1
xfrm_dump_policy
xfrm_policy_walk
list_move_tail
xfrm_add_policy
... ...
xfrm_policy_inexact_list_reinsert
list_for_each_entry_reverse
if (!policy->bydst_reinsert)
//read non-existent policy
xfrm_dump_policy_done
xfrm_policy_walk_done
list_del(&walk->walk.all);
If dump_one_policy() returns err (triggered by netlink socket),
xfrm_policy_walk() will move walk initialized by socket to list
net->xfrm.policy_all. so this socket becomes visible in the global
policy list. The head *walk can be traversed when users add policies
with different prefixlen and trigger xfrm_policy node merge.
The issue can also be triggered by policy list traversal while rehashing
and flushing policies.
It can be fixed by skip such "policies" with walk.dead set to 1.
Fixes: 9cf545ebd591 ("xfrm: policy: store inexact policies in a tree ordered by destination address")
Fixes: 12a169e7d8f4 ("ipsec: Put dumpers on the dump list")
Signed-off-by: Dong Chenchen <dongchenchen2(a)huawei.com>
Signed-off-by: Steffen Klassert <steffen.klassert(a)secunet.com>
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d6b405782b63..113fb7e9cdaf 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -851,7 +851,7 @@ static void xfrm_policy_inexact_list_reinsert(struct net *net,
struct hlist_node *newpos = NULL;
bool matches_s, matches_d;
- if (!policy->bydst_reinsert)
+ if (policy->walk.dead || !policy->bydst_reinsert)
continue;
WARN_ON_ONCE(policy->family != family);
@@ -1256,8 +1256,11 @@ static void xfrm_hash_rebuild(struct work_struct *work)
struct xfrm_pol_inexact_bin *bin;
u8 dbits, sbits;
+ if (policy->walk.dead)
+ continue;
+
dir = xfrm_policy_id2dir(policy->index);
- if (policy->walk.dead || dir >= XFRM_POLICY_MAX)
+ if (dir >= XFRM_POLICY_MAX)
continue;
if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
@@ -1823,9 +1826,11 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
again:
list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
+ if (pol->walk.dead)
+ continue;
+
dir = xfrm_policy_id2dir(pol->index);
- if (pol->walk.dead ||
- dir >= XFRM_POLICY_MAX ||
+ if (dir >= XFRM_POLICY_MAX ||
pol->type != type)
continue;
@@ -1862,9 +1867,11 @@ int xfrm_dev_policy_flush(struct net *net, struct net_device *dev,
again:
list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
+ if (pol->walk.dead)
+ continue;
+
dir = xfrm_policy_id2dir(pol->index);
- if (pol->walk.dead ||
- dir >= XFRM_POLICY_MAX ||
+ if (dir >= XFRM_POLICY_MAX ||
pol->xdo.dev != dev)
continue;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 6d41d4fe28724db16ca1016df0713a07e0cc7448
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102004-imbecile-uneasy-23a8@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6d41d4fe28724db16ca1016df0713a07e0cc7448 Mon Sep 17 00:00:00 2001
From: Dong Chenchen <dongchenchen2(a)huawei.com>
Date: Tue, 15 Aug 2023 22:18:34 +0800
Subject: [PATCH] net: xfrm: skip policies marked as dead while reinserting
policies
BUG: KASAN: slab-use-after-free in xfrm_policy_inexact_list_reinsert+0xb6/0x430
Read of size 1 at addr ffff8881051f3bf8 by task ip/668
CPU: 2 PID: 668 Comm: ip Not tainted 6.5.0-rc5-00182-g25aa0bebba72-dirty #64
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13 04/01/2014
Call Trace:
<TASK>
dump_stack_lvl+0x72/0xa0
print_report+0xd0/0x620
kasan_report+0xb6/0xf0
xfrm_policy_inexact_list_reinsert+0xb6/0x430
xfrm_policy_inexact_insert_node.constprop.0+0x537/0x800
xfrm_policy_inexact_alloc_chain+0x23f/0x320
xfrm_policy_inexact_insert+0x6b/0x590
xfrm_policy_insert+0x3b1/0x480
xfrm_add_policy+0x23c/0x3c0
xfrm_user_rcv_msg+0x2d0/0x510
netlink_rcv_skb+0x10d/0x2d0
xfrm_netlink_rcv+0x49/0x60
netlink_unicast+0x3fe/0x540
netlink_sendmsg+0x528/0x970
sock_sendmsg+0x14a/0x160
____sys_sendmsg+0x4fc/0x580
___sys_sendmsg+0xef/0x160
__sys_sendmsg+0xf7/0x1b0
do_syscall_64+0x3f/0x90
entry_SYSCALL_64_after_hwframe+0x73/0xdd
The root cause is:
cpu 0 cpu1
xfrm_dump_policy
xfrm_policy_walk
list_move_tail
xfrm_add_policy
... ...
xfrm_policy_inexact_list_reinsert
list_for_each_entry_reverse
if (!policy->bydst_reinsert)
//read non-existent policy
xfrm_dump_policy_done
xfrm_policy_walk_done
list_del(&walk->walk.all);
If dump_one_policy() returns err (triggered by netlink socket),
xfrm_policy_walk() will move walk initialized by socket to list
net->xfrm.policy_all. so this socket becomes visible in the global
policy list. The head *walk can be traversed when users add policies
with different prefixlen and trigger xfrm_policy node merge.
The issue can also be triggered by policy list traversal while rehashing
and flushing policies.
It can be fixed by skip such "policies" with walk.dead set to 1.
Fixes: 9cf545ebd591 ("xfrm: policy: store inexact policies in a tree ordered by destination address")
Fixes: 12a169e7d8f4 ("ipsec: Put dumpers on the dump list")
Signed-off-by: Dong Chenchen <dongchenchen2(a)huawei.com>
Signed-off-by: Steffen Klassert <steffen.klassert(a)secunet.com>
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d6b405782b63..113fb7e9cdaf 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -851,7 +851,7 @@ static void xfrm_policy_inexact_list_reinsert(struct net *net,
struct hlist_node *newpos = NULL;
bool matches_s, matches_d;
- if (!policy->bydst_reinsert)
+ if (policy->walk.dead || !policy->bydst_reinsert)
continue;
WARN_ON_ONCE(policy->family != family);
@@ -1256,8 +1256,11 @@ static void xfrm_hash_rebuild(struct work_struct *work)
struct xfrm_pol_inexact_bin *bin;
u8 dbits, sbits;
+ if (policy->walk.dead)
+ continue;
+
dir = xfrm_policy_id2dir(policy->index);
- if (policy->walk.dead || dir >= XFRM_POLICY_MAX)
+ if (dir >= XFRM_POLICY_MAX)
continue;
if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
@@ -1823,9 +1826,11 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
again:
list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
+ if (pol->walk.dead)
+ continue;
+
dir = xfrm_policy_id2dir(pol->index);
- if (pol->walk.dead ||
- dir >= XFRM_POLICY_MAX ||
+ if (dir >= XFRM_POLICY_MAX ||
pol->type != type)
continue;
@@ -1862,9 +1867,11 @@ int xfrm_dev_policy_flush(struct net *net, struct net_device *dev,
again:
list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
+ if (pol->walk.dead)
+ continue;
+
dir = xfrm_policy_id2dir(pol->index);
- if (pol->walk.dead ||
- dir >= XFRM_POLICY_MAX ||
+ if (dir >= XFRM_POLICY_MAX ||
pol->xdo.dev != dev)
continue;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 6d41d4fe28724db16ca1016df0713a07e0cc7448
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102003-shank-happiness-527c@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6d41d4fe28724db16ca1016df0713a07e0cc7448 Mon Sep 17 00:00:00 2001
From: Dong Chenchen <dongchenchen2(a)huawei.com>
Date: Tue, 15 Aug 2023 22:18:34 +0800
Subject: [PATCH] net: xfrm: skip policies marked as dead while reinserting
policies
BUG: KASAN: slab-use-after-free in xfrm_policy_inexact_list_reinsert+0xb6/0x430
Read of size 1 at addr ffff8881051f3bf8 by task ip/668
CPU: 2 PID: 668 Comm: ip Not tainted 6.5.0-rc5-00182-g25aa0bebba72-dirty #64
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13 04/01/2014
Call Trace:
<TASK>
dump_stack_lvl+0x72/0xa0
print_report+0xd0/0x620
kasan_report+0xb6/0xf0
xfrm_policy_inexact_list_reinsert+0xb6/0x430
xfrm_policy_inexact_insert_node.constprop.0+0x537/0x800
xfrm_policy_inexact_alloc_chain+0x23f/0x320
xfrm_policy_inexact_insert+0x6b/0x590
xfrm_policy_insert+0x3b1/0x480
xfrm_add_policy+0x23c/0x3c0
xfrm_user_rcv_msg+0x2d0/0x510
netlink_rcv_skb+0x10d/0x2d0
xfrm_netlink_rcv+0x49/0x60
netlink_unicast+0x3fe/0x540
netlink_sendmsg+0x528/0x970
sock_sendmsg+0x14a/0x160
____sys_sendmsg+0x4fc/0x580
___sys_sendmsg+0xef/0x160
__sys_sendmsg+0xf7/0x1b0
do_syscall_64+0x3f/0x90
entry_SYSCALL_64_after_hwframe+0x73/0xdd
The root cause is:
cpu 0 cpu1
xfrm_dump_policy
xfrm_policy_walk
list_move_tail
xfrm_add_policy
... ...
xfrm_policy_inexact_list_reinsert
list_for_each_entry_reverse
if (!policy->bydst_reinsert)
//read non-existent policy
xfrm_dump_policy_done
xfrm_policy_walk_done
list_del(&walk->walk.all);
If dump_one_policy() returns err (triggered by netlink socket),
xfrm_policy_walk() will move walk initialized by socket to list
net->xfrm.policy_all. so this socket becomes visible in the global
policy list. The head *walk can be traversed when users add policies
with different prefixlen and trigger xfrm_policy node merge.
The issue can also be triggered by policy list traversal while rehashing
and flushing policies.
It can be fixed by skip such "policies" with walk.dead set to 1.
Fixes: 9cf545ebd591 ("xfrm: policy: store inexact policies in a tree ordered by destination address")
Fixes: 12a169e7d8f4 ("ipsec: Put dumpers on the dump list")
Signed-off-by: Dong Chenchen <dongchenchen2(a)huawei.com>
Signed-off-by: Steffen Klassert <steffen.klassert(a)secunet.com>
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d6b405782b63..113fb7e9cdaf 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -851,7 +851,7 @@ static void xfrm_policy_inexact_list_reinsert(struct net *net,
struct hlist_node *newpos = NULL;
bool matches_s, matches_d;
- if (!policy->bydst_reinsert)
+ if (policy->walk.dead || !policy->bydst_reinsert)
continue;
WARN_ON_ONCE(policy->family != family);
@@ -1256,8 +1256,11 @@ static void xfrm_hash_rebuild(struct work_struct *work)
struct xfrm_pol_inexact_bin *bin;
u8 dbits, sbits;
+ if (policy->walk.dead)
+ continue;
+
dir = xfrm_policy_id2dir(policy->index);
- if (policy->walk.dead || dir >= XFRM_POLICY_MAX)
+ if (dir >= XFRM_POLICY_MAX)
continue;
if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
@@ -1823,9 +1826,11 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
again:
list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
+ if (pol->walk.dead)
+ continue;
+
dir = xfrm_policy_id2dir(pol->index);
- if (pol->walk.dead ||
- dir >= XFRM_POLICY_MAX ||
+ if (dir >= XFRM_POLICY_MAX ||
pol->type != type)
continue;
@@ -1862,9 +1867,11 @@ int xfrm_dev_policy_flush(struct net *net, struct net_device *dev,
again:
list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
+ if (pol->walk.dead)
+ continue;
+
dir = xfrm_policy_id2dir(pol->index);
- if (pol->walk.dead ||
- dir >= XFRM_POLICY_MAX ||
+ if (dir >= XFRM_POLICY_MAX ||
pol->xdo.dev != dev)
continue;
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 6d41d4fe28724db16ca1016df0713a07e0cc7448
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102002-overplant-unseated-c388@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6d41d4fe28724db16ca1016df0713a07e0cc7448 Mon Sep 17 00:00:00 2001
From: Dong Chenchen <dongchenchen2(a)huawei.com>
Date: Tue, 15 Aug 2023 22:18:34 +0800
Subject: [PATCH] net: xfrm: skip policies marked as dead while reinserting
policies
BUG: KASAN: slab-use-after-free in xfrm_policy_inexact_list_reinsert+0xb6/0x430
Read of size 1 at addr ffff8881051f3bf8 by task ip/668
CPU: 2 PID: 668 Comm: ip Not tainted 6.5.0-rc5-00182-g25aa0bebba72-dirty #64
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13 04/01/2014
Call Trace:
<TASK>
dump_stack_lvl+0x72/0xa0
print_report+0xd0/0x620
kasan_report+0xb6/0xf0
xfrm_policy_inexact_list_reinsert+0xb6/0x430
xfrm_policy_inexact_insert_node.constprop.0+0x537/0x800
xfrm_policy_inexact_alloc_chain+0x23f/0x320
xfrm_policy_inexact_insert+0x6b/0x590
xfrm_policy_insert+0x3b1/0x480
xfrm_add_policy+0x23c/0x3c0
xfrm_user_rcv_msg+0x2d0/0x510
netlink_rcv_skb+0x10d/0x2d0
xfrm_netlink_rcv+0x49/0x60
netlink_unicast+0x3fe/0x540
netlink_sendmsg+0x528/0x970
sock_sendmsg+0x14a/0x160
____sys_sendmsg+0x4fc/0x580
___sys_sendmsg+0xef/0x160
__sys_sendmsg+0xf7/0x1b0
do_syscall_64+0x3f/0x90
entry_SYSCALL_64_after_hwframe+0x73/0xdd
The root cause is:
cpu 0 cpu1
xfrm_dump_policy
xfrm_policy_walk
list_move_tail
xfrm_add_policy
... ...
xfrm_policy_inexact_list_reinsert
list_for_each_entry_reverse
if (!policy->bydst_reinsert)
//read non-existent policy
xfrm_dump_policy_done
xfrm_policy_walk_done
list_del(&walk->walk.all);
If dump_one_policy() returns err (triggered by netlink socket),
xfrm_policy_walk() will move walk initialized by socket to list
net->xfrm.policy_all. so this socket becomes visible in the global
policy list. The head *walk can be traversed when users add policies
with different prefixlen and trigger xfrm_policy node merge.
The issue can also be triggered by policy list traversal while rehashing
and flushing policies.
It can be fixed by skip such "policies" with walk.dead set to 1.
Fixes: 9cf545ebd591 ("xfrm: policy: store inexact policies in a tree ordered by destination address")
Fixes: 12a169e7d8f4 ("ipsec: Put dumpers on the dump list")
Signed-off-by: Dong Chenchen <dongchenchen2(a)huawei.com>
Signed-off-by: Steffen Klassert <steffen.klassert(a)secunet.com>
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d6b405782b63..113fb7e9cdaf 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -851,7 +851,7 @@ static void xfrm_policy_inexact_list_reinsert(struct net *net,
struct hlist_node *newpos = NULL;
bool matches_s, matches_d;
- if (!policy->bydst_reinsert)
+ if (policy->walk.dead || !policy->bydst_reinsert)
continue;
WARN_ON_ONCE(policy->family != family);
@@ -1256,8 +1256,11 @@ static void xfrm_hash_rebuild(struct work_struct *work)
struct xfrm_pol_inexact_bin *bin;
u8 dbits, sbits;
+ if (policy->walk.dead)
+ continue;
+
dir = xfrm_policy_id2dir(policy->index);
- if (policy->walk.dead || dir >= XFRM_POLICY_MAX)
+ if (dir >= XFRM_POLICY_MAX)
continue;
if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
@@ -1823,9 +1826,11 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
again:
list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
+ if (pol->walk.dead)
+ continue;
+
dir = xfrm_policy_id2dir(pol->index);
- if (pol->walk.dead ||
- dir >= XFRM_POLICY_MAX ||
+ if (dir >= XFRM_POLICY_MAX ||
pol->type != type)
continue;
@@ -1862,9 +1867,11 @@ int xfrm_dev_policy_flush(struct net *net, struct net_device *dev,
again:
list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
+ if (pol->walk.dead)
+ continue;
+
dir = xfrm_policy_id2dir(pol->index);
- if (pol->walk.dead ||
- dir >= XFRM_POLICY_MAX ||
+ if (dir >= XFRM_POLICY_MAX ||
pol->xdo.dev != dev)
continue;
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x cc9b364bb1d58d3dae270c7a931a8cc717dc2b3b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102005-strained-decompose-2eae@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From cc9b364bb1d58d3dae270c7a931a8cc717dc2b3b Mon Sep 17 00:00:00 2001
From: Zhang Changzhong <zhangchangzhong(a)huawei.com>
Date: Fri, 15 Sep 2023 19:20:41 +0800
Subject: [PATCH] xfrm6: fix inet6_dev refcount underflow problem
There are race conditions that may lead to inet6_dev refcount underflow
in xfrm6_dst_destroy() and rt6_uncached_list_flush_dev().
One of the refcount underflow bugs is shown below:
(cpu 1) | (cpu 2)
xfrm6_dst_destroy() |
... |
in6_dev_put() |
| rt6_uncached_list_flush_dev()
... | ...
| in6_dev_put()
rt6_uncached_list_del() | ...
... |
xfrm6_dst_destroy() calls rt6_uncached_list_del() after in6_dev_put(),
so rt6_uncached_list_flush_dev() has a chance to call in6_dev_put()
again for the same inet6_dev.
Fix it by moving in6_dev_put() after rt6_uncached_list_del() in
xfrm6_dst_destroy().
Fixes: 510c321b5571 ("xfrm: reuse uncached_list to track xdsts")
Signed-off-by: Zhang Changzhong <zhangchangzhong(a)huawei.com>
Reviewed-by: Xin Long <lucien.xin(a)gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert(a)secunet.com>
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 188224a76685..45d0f9a8b28c 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -117,10 +117,10 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
{
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
- if (likely(xdst->u.rt6.rt6i_idev))
- in6_dev_put(xdst->u.rt6.rt6i_idev);
dst_destroy_metrics_generic(dst);
rt6_uncached_list_del(&xdst->u.rt6);
+ if (likely(xdst->u.rt6.rt6i_idev))
+ in6_dev_put(xdst->u.rt6.rt6i_idev);
xfrm_dst_destroy(xdst);
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x cc9b364bb1d58d3dae270c7a931a8cc717dc2b3b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102004-collision-feminism-9a66@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From cc9b364bb1d58d3dae270c7a931a8cc717dc2b3b Mon Sep 17 00:00:00 2001
From: Zhang Changzhong <zhangchangzhong(a)huawei.com>
Date: Fri, 15 Sep 2023 19:20:41 +0800
Subject: [PATCH] xfrm6: fix inet6_dev refcount underflow problem
There are race conditions that may lead to inet6_dev refcount underflow
in xfrm6_dst_destroy() and rt6_uncached_list_flush_dev().
One of the refcount underflow bugs is shown below:
(cpu 1) | (cpu 2)
xfrm6_dst_destroy() |
... |
in6_dev_put() |
| rt6_uncached_list_flush_dev()
... | ...
| in6_dev_put()
rt6_uncached_list_del() | ...
... |
xfrm6_dst_destroy() calls rt6_uncached_list_del() after in6_dev_put(),
so rt6_uncached_list_flush_dev() has a chance to call in6_dev_put()
again for the same inet6_dev.
Fix it by moving in6_dev_put() after rt6_uncached_list_del() in
xfrm6_dst_destroy().
Fixes: 510c321b5571 ("xfrm: reuse uncached_list to track xdsts")
Signed-off-by: Zhang Changzhong <zhangchangzhong(a)huawei.com>
Reviewed-by: Xin Long <lucien.xin(a)gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert(a)secunet.com>
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 188224a76685..45d0f9a8b28c 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -117,10 +117,10 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
{
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
- if (likely(xdst->u.rt6.rt6i_idev))
- in6_dev_put(xdst->u.rt6.rt6i_idev);
dst_destroy_metrics_generic(dst);
rt6_uncached_list_del(&xdst->u.rt6);
+ if (likely(xdst->u.rt6.rt6i_idev))
+ in6_dev_put(xdst->u.rt6.rt6i_idev);
xfrm_dst_destroy(xdst);
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x cc9b364bb1d58d3dae270c7a931a8cc717dc2b3b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102003-anagram-decency-29a0@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From cc9b364bb1d58d3dae270c7a931a8cc717dc2b3b Mon Sep 17 00:00:00 2001
From: Zhang Changzhong <zhangchangzhong(a)huawei.com>
Date: Fri, 15 Sep 2023 19:20:41 +0800
Subject: [PATCH] xfrm6: fix inet6_dev refcount underflow problem
There are race conditions that may lead to inet6_dev refcount underflow
in xfrm6_dst_destroy() and rt6_uncached_list_flush_dev().
One of the refcount underflow bugs is shown below:
(cpu 1) | (cpu 2)
xfrm6_dst_destroy() |
... |
in6_dev_put() |
| rt6_uncached_list_flush_dev()
... | ...
| in6_dev_put()
rt6_uncached_list_del() | ...
... |
xfrm6_dst_destroy() calls rt6_uncached_list_del() after in6_dev_put(),
so rt6_uncached_list_flush_dev() has a chance to call in6_dev_put()
again for the same inet6_dev.
Fix it by moving in6_dev_put() after rt6_uncached_list_del() in
xfrm6_dst_destroy().
Fixes: 510c321b5571 ("xfrm: reuse uncached_list to track xdsts")
Signed-off-by: Zhang Changzhong <zhangchangzhong(a)huawei.com>
Reviewed-by: Xin Long <lucien.xin(a)gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert(a)secunet.com>
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 188224a76685..45d0f9a8b28c 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -117,10 +117,10 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
{
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
- if (likely(xdst->u.rt6.rt6i_idev))
- in6_dev_put(xdst->u.rt6.rt6i_idev);
dst_destroy_metrics_generic(dst);
rt6_uncached_list_del(&xdst->u.rt6);
+ if (likely(xdst->u.rt6.rt6i_idev))
+ in6_dev_put(xdst->u.rt6.rt6i_idev);
xfrm_dst_destroy(xdst);
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x cc9b364bb1d58d3dae270c7a931a8cc717dc2b3b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102002-laurel-arise-0294@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From cc9b364bb1d58d3dae270c7a931a8cc717dc2b3b Mon Sep 17 00:00:00 2001
From: Zhang Changzhong <zhangchangzhong(a)huawei.com>
Date: Fri, 15 Sep 2023 19:20:41 +0800
Subject: [PATCH] xfrm6: fix inet6_dev refcount underflow problem
There are race conditions that may lead to inet6_dev refcount underflow
in xfrm6_dst_destroy() and rt6_uncached_list_flush_dev().
One of the refcount underflow bugs is shown below:
(cpu 1) | (cpu 2)
xfrm6_dst_destroy() |
... |
in6_dev_put() |
| rt6_uncached_list_flush_dev()
... | ...
| in6_dev_put()
rt6_uncached_list_del() | ...
... |
xfrm6_dst_destroy() calls rt6_uncached_list_del() after in6_dev_put(),
so rt6_uncached_list_flush_dev() has a chance to call in6_dev_put()
again for the same inet6_dev.
Fix it by moving in6_dev_put() after rt6_uncached_list_del() in
xfrm6_dst_destroy().
Fixes: 510c321b5571 ("xfrm: reuse uncached_list to track xdsts")
Signed-off-by: Zhang Changzhong <zhangchangzhong(a)huawei.com>
Reviewed-by: Xin Long <lucien.xin(a)gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert(a)secunet.com>
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 188224a76685..45d0f9a8b28c 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -117,10 +117,10 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
{
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
- if (likely(xdst->u.rt6.rt6i_idev))
- in6_dev_put(xdst->u.rt6.rt6i_idev);
dst_destroy_metrics_generic(dst);
rt6_uncached_list_del(&xdst->u.rt6);
+ if (likely(xdst->u.rt6.rt6i_idev))
+ in6_dev_put(xdst->u.rt6.rt6i_idev);
xfrm_dst_destroy(xdst);
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x cc9b364bb1d58d3dae270c7a931a8cc717dc2b3b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102001-barometer-press-265e@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From cc9b364bb1d58d3dae270c7a931a8cc717dc2b3b Mon Sep 17 00:00:00 2001
From: Zhang Changzhong <zhangchangzhong(a)huawei.com>
Date: Fri, 15 Sep 2023 19:20:41 +0800
Subject: [PATCH] xfrm6: fix inet6_dev refcount underflow problem
There are race conditions that may lead to inet6_dev refcount underflow
in xfrm6_dst_destroy() and rt6_uncached_list_flush_dev().
One of the refcount underflow bugs is shown below:
(cpu 1) | (cpu 2)
xfrm6_dst_destroy() |
... |
in6_dev_put() |
| rt6_uncached_list_flush_dev()
... | ...
| in6_dev_put()
rt6_uncached_list_del() | ...
... |
xfrm6_dst_destroy() calls rt6_uncached_list_del() after in6_dev_put(),
so rt6_uncached_list_flush_dev() has a chance to call in6_dev_put()
again for the same inet6_dev.
Fix it by moving in6_dev_put() after rt6_uncached_list_del() in
xfrm6_dst_destroy().
Fixes: 510c321b5571 ("xfrm: reuse uncached_list to track xdsts")
Signed-off-by: Zhang Changzhong <zhangchangzhong(a)huawei.com>
Reviewed-by: Xin Long <lucien.xin(a)gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert(a)secunet.com>
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 188224a76685..45d0f9a8b28c 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -117,10 +117,10 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
{
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
- if (likely(xdst->u.rt6.rt6i_idev))
- in6_dev_put(xdst->u.rt6.rt6i_idev);
dst_destroy_metrics_generic(dst);
rt6_uncached_list_del(&xdst->u.rt6);
+ if (likely(xdst->u.rt6.rt6i_idev))
+ in6_dev_put(xdst->u.rt6.rt6i_idev);
xfrm_dst_destroy(xdst);
}
Some Jasperlake Chromebooks overwrite the system vendor DMI value to the
name of the OEM that manufactured the device. This breaks Chromebook
quirk detection as it expects the system vendor to be "Google".
Add another quirk detection entry that looks for "Google" in the BIOS
version.
Cc: stable(a)vger.kernel.org
Signed-off-by: Mark Hasemeyer <markhas(a)chromium.org>
---
sound/hda/intel-dsp-config.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c
index 24a948baf1bc..756fa0aa69bb 100644
--- a/sound/hda/intel-dsp-config.c
+++ b/sound/hda/intel-dsp-config.c
@@ -336,6 +336,12 @@ static const struct config_entry config_table[] = {
DMI_MATCH(DMI_SYS_VENDOR, "Google"),
}
},
+ {
+ .ident = "Google firmware",
+ .matches = {
+ DMI_MATCH(DMI_BIOS_VERSION, "Google"),
+ }
+ },
{}
}
},
--
2.42.0.655.g421f12c284-goog
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 2f3389c73832ad90b63208c0fc281ad080114c7a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102029-display-provolone-fab2@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2f3389c73832ad90b63208c0fc281ad080114c7a Mon Sep 17 00:00:00 2001
From: Manish Chopra <manishc(a)marvell.com>
Date: Fri, 13 Oct 2023 18:48:12 +0530
Subject: [PATCH] qed: fix LL2 RX buffer allocation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Driver allocates the LL2 rx buffers from kmalloc()
area to construct the skb using slab_build_skb()
The required size allocation seems to have overlooked
for accounting both skb_shared_info size and device
placement padding bytes which results into the below
panic when doing skb_put() for a standard MTU sized frame.
skbuff: skb_over_panic: text:ffffffffc0b0225f len:1514 put:1514
head:ff3dabceaf39c000 data:ff3dabceaf39c042 tail:0x62c end:0x566
dev:<NULL>
…
skb_panic+0x48/0x4a
skb_put.cold+0x10/0x10
qed_ll2b_complete_rx_packet+0x14f/0x260 [qed]
qed_ll2_rxq_handle_completion.constprop.0+0x169/0x200 [qed]
qed_ll2_rxq_completion+0xba/0x320 [qed]
qed_int_sp_dpc+0x1a7/0x1e0 [qed]
This patch fixes this by accouting skb_shared_info and device
placement padding size bytes when allocating the buffers.
Cc: David S. Miller <davem(a)davemloft.net>
Fixes: 0a7fb11c23c0 ("qed: Add Light L2 support")
Signed-off-by: Manish Chopra <manishc(a)marvell.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index 717a0b3f89bd..ab5ef254a748 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -113,7 +113,10 @@ static void qed_ll2b_complete_tx_packet(void *cxt,
static int qed_ll2_alloc_buffer(struct qed_dev *cdev,
u8 **data, dma_addr_t *phys_addr)
{
- *data = kmalloc(cdev->ll2->rx_size, GFP_ATOMIC);
+ size_t size = cdev->ll2->rx_size + NET_SKB_PAD +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+ *data = kmalloc(size, GFP_ATOMIC);
if (!(*data)) {
DP_INFO(cdev, "Failed to allocate LL2 buffer data\n");
return -ENOMEM;
@@ -2589,7 +2592,7 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
INIT_LIST_HEAD(&cdev->ll2->list);
spin_lock_init(&cdev->ll2->lock);
- cdev->ll2->rx_size = NET_SKB_PAD + ETH_HLEN +
+ cdev->ll2->rx_size = PRM_DMA_PAD_BYTES_NUM + ETH_HLEN +
L1_CACHE_BYTES + params->mtu;
/* Allocate memory for LL2.
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x 2f3389c73832ad90b63208c0fc281ad080114c7a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102030-strict-cake-e8e2@gregkh' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2f3389c73832ad90b63208c0fc281ad080114c7a Mon Sep 17 00:00:00 2001
From: Manish Chopra <manishc(a)marvell.com>
Date: Fri, 13 Oct 2023 18:48:12 +0530
Subject: [PATCH] qed: fix LL2 RX buffer allocation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Driver allocates the LL2 rx buffers from kmalloc()
area to construct the skb using slab_build_skb()
The required size allocation seems to have overlooked
for accounting both skb_shared_info size and device
placement padding bytes which results into the below
panic when doing skb_put() for a standard MTU sized frame.
skbuff: skb_over_panic: text:ffffffffc0b0225f len:1514 put:1514
head:ff3dabceaf39c000 data:ff3dabceaf39c042 tail:0x62c end:0x566
dev:<NULL>
…
skb_panic+0x48/0x4a
skb_put.cold+0x10/0x10
qed_ll2b_complete_rx_packet+0x14f/0x260 [qed]
qed_ll2_rxq_handle_completion.constprop.0+0x169/0x200 [qed]
qed_ll2_rxq_completion+0xba/0x320 [qed]
qed_int_sp_dpc+0x1a7/0x1e0 [qed]
This patch fixes this by accouting skb_shared_info and device
placement padding size bytes when allocating the buffers.
Cc: David S. Miller <davem(a)davemloft.net>
Fixes: 0a7fb11c23c0 ("qed: Add Light L2 support")
Signed-off-by: Manish Chopra <manishc(a)marvell.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index 717a0b3f89bd..ab5ef254a748 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -113,7 +113,10 @@ static void qed_ll2b_complete_tx_packet(void *cxt,
static int qed_ll2_alloc_buffer(struct qed_dev *cdev,
u8 **data, dma_addr_t *phys_addr)
{
- *data = kmalloc(cdev->ll2->rx_size, GFP_ATOMIC);
+ size_t size = cdev->ll2->rx_size + NET_SKB_PAD +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+ *data = kmalloc(size, GFP_ATOMIC);
if (!(*data)) {
DP_INFO(cdev, "Failed to allocate LL2 buffer data\n");
return -ENOMEM;
@@ -2589,7 +2592,7 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
INIT_LIST_HEAD(&cdev->ll2->list);
spin_lock_init(&cdev->ll2->lock);
- cdev->ll2->rx_size = NET_SKB_PAD + ETH_HLEN +
+ cdev->ll2->rx_size = PRM_DMA_PAD_BYTES_NUM + ETH_HLEN +
L1_CACHE_BYTES + params->mtu;
/* Allocate memory for LL2.
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 3ebebb2c1eca92a15107b2d7aeff34196fd9e217
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102002-womanhood-mushy-54a2@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3ebebb2c1eca92a15107b2d7aeff34196fd9e217 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Tue, 3 Oct 2023 17:55:56 +0200
Subject: [PATCH] ASoC: codecs: wcd938x: fix runtime PM imbalance on remove
Make sure to balance the runtime PM operations, including the disable
count, on driver unbind.
Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver")
Cc: stable(a)vger.kernel.org # 5.14
Cc: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Link: https://lore.kernel.org/r/20231003155558.27079-6-johan+linaro@kernel.org
Signed-off-by: Mark Brown <broonie(a)kernel.org>
diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c
index 679c627f7eaa..d27b919c63b4 100644
--- a/sound/soc/codecs/wcd938x.c
+++ b/sound/soc/codecs/wcd938x.c
@@ -3620,9 +3620,15 @@ static int wcd938x_probe(struct platform_device *pdev)
static void wcd938x_remove(struct platform_device *pdev)
{
- struct wcd938x_priv *wcd938x = dev_get_drvdata(&pdev->dev);
+ struct device *dev = &pdev->dev;
+ struct wcd938x_priv *wcd938x = dev_get_drvdata(dev);
+
+ component_master_del(dev, &wcd938x_comp_ops);
+
+ pm_runtime_disable(dev);
+ pm_runtime_set_suspended(dev);
+ pm_runtime_dont_use_autosuspend(dev);
- component_master_del(&pdev->dev, &wcd938x_comp_ops);
regulator_bulk_disable(WCD938X_MAX_SUPPLY, wcd938x->supplies);
regulator_bulk_free(WCD938X_MAX_SUPPLY, wcd938x->supplies);
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 3ebebb2c1eca92a15107b2d7aeff34196fd9e217
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102002-maximum-tray-450b@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3ebebb2c1eca92a15107b2d7aeff34196fd9e217 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Tue, 3 Oct 2023 17:55:56 +0200
Subject: [PATCH] ASoC: codecs: wcd938x: fix runtime PM imbalance on remove
Make sure to balance the runtime PM operations, including the disable
count, on driver unbind.
Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver")
Cc: stable(a)vger.kernel.org # 5.14
Cc: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Link: https://lore.kernel.org/r/20231003155558.27079-6-johan+linaro@kernel.org
Signed-off-by: Mark Brown <broonie(a)kernel.org>
diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c
index 679c627f7eaa..d27b919c63b4 100644
--- a/sound/soc/codecs/wcd938x.c
+++ b/sound/soc/codecs/wcd938x.c
@@ -3620,9 +3620,15 @@ static int wcd938x_probe(struct platform_device *pdev)
static void wcd938x_remove(struct platform_device *pdev)
{
- struct wcd938x_priv *wcd938x = dev_get_drvdata(&pdev->dev);
+ struct device *dev = &pdev->dev;
+ struct wcd938x_priv *wcd938x = dev_get_drvdata(dev);
+
+ component_master_del(dev, &wcd938x_comp_ops);
+
+ pm_runtime_disable(dev);
+ pm_runtime_set_suspended(dev);
+ pm_runtime_dont_use_autosuspend(dev);
- component_master_del(&pdev->dev, &wcd938x_comp_ops);
regulator_bulk_disable(WCD938X_MAX_SUPPLY, wcd938x->supplies);
regulator_bulk_free(WCD938X_MAX_SUPPLY, wcd938x->supplies);
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x dcc583c225e659d5da34b4ad83914fd6b51e3dbf
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102025-copious-thud-be0f@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From dcc583c225e659d5da34b4ad83914fd6b51e3dbf Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wenst(a)chromium.org>
Date: Wed, 4 Oct 2023 16:32:24 +0800
Subject: [PATCH] drm/mediatek: Correctly free sg_table in gem prime vmap
The MediaTek DRM driver implements GEM PRIME vmap by fetching the
sg_table for the object, iterating through the pages, and then
vmapping them. In essence, unlike the GEM DMA helpers which vmap
when the object is first created or imported, the MediaTek version
does it on request.
Unfortunately, the code never correctly frees the sg_table contents.
This results in a kernel memory leak. On a Hayato device with a text
console on the internal display, this results in the system running
out of memory in a few days from all the console screen cursor updates.
Add sg_free_table() to correctly free the contents of the sg_table. This
was missing despite explicitly required by mtk_gem_prime_get_sg_table().
Also move the "out" shortcut label to after the kfree() call for the
sg_table. Having sg_free_table() together with kfree() makes more sense.
The shortcut is only used when the object already has a kernel address,
in which case the pointer is NULL and kfree() does nothing. Hence this
change causes no functional change.
Fixes: 3df64d7b0a4f ("drm/mediatek: Implement gem prime vmap/vunmap function")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Chen-Yu Tsai <wenst(a)chromium.org>
Reviewed-by: CK Hu <ck.hu(a)mediatek.com>
Link: https://patchwork.kernel.org/project/dri-devel/patch/20231004083226.1940055…
Signed-off-by: Chun-Kuang Hu <chunkuang.hu(a)kernel.org>
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.c b/drivers/gpu/drm/mediatek/mtk_drm_gem.c
index 9f364df52478..0e0a41b2f57f 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_gem.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.c
@@ -239,6 +239,7 @@ int mtk_drm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map)
npages = obj->size >> PAGE_SHIFT;
mtk_gem->pages = kcalloc(npages, sizeof(*mtk_gem->pages), GFP_KERNEL);
if (!mtk_gem->pages) {
+ sg_free_table(sgt);
kfree(sgt);
return -ENOMEM;
}
@@ -248,12 +249,15 @@ int mtk_drm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map)
mtk_gem->kvaddr = vmap(mtk_gem->pages, npages, VM_MAP,
pgprot_writecombine(PAGE_KERNEL));
if (!mtk_gem->kvaddr) {
+ sg_free_table(sgt);
kfree(sgt);
kfree(mtk_gem->pages);
return -ENOMEM;
}
-out:
+ sg_free_table(sgt);
kfree(sgt);
+
+out:
iosys_map_set_vaddr(map, mtk_gem->kvaddr);
return 0;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x dcc583c225e659d5da34b4ad83914fd6b51e3dbf
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102024-bagginess-ultra-3e5b@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From dcc583c225e659d5da34b4ad83914fd6b51e3dbf Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wenst(a)chromium.org>
Date: Wed, 4 Oct 2023 16:32:24 +0800
Subject: [PATCH] drm/mediatek: Correctly free sg_table in gem prime vmap
The MediaTek DRM driver implements GEM PRIME vmap by fetching the
sg_table for the object, iterating through the pages, and then
vmapping them. In essence, unlike the GEM DMA helpers which vmap
when the object is first created or imported, the MediaTek version
does it on request.
Unfortunately, the code never correctly frees the sg_table contents.
This results in a kernel memory leak. On a Hayato device with a text
console on the internal display, this results in the system running
out of memory in a few days from all the console screen cursor updates.
Add sg_free_table() to correctly free the contents of the sg_table. This
was missing despite explicitly required by mtk_gem_prime_get_sg_table().
Also move the "out" shortcut label to after the kfree() call for the
sg_table. Having sg_free_table() together with kfree() makes more sense.
The shortcut is only used when the object already has a kernel address,
in which case the pointer is NULL and kfree() does nothing. Hence this
change causes no functional change.
Fixes: 3df64d7b0a4f ("drm/mediatek: Implement gem prime vmap/vunmap function")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Chen-Yu Tsai <wenst(a)chromium.org>
Reviewed-by: CK Hu <ck.hu(a)mediatek.com>
Link: https://patchwork.kernel.org/project/dri-devel/patch/20231004083226.1940055…
Signed-off-by: Chun-Kuang Hu <chunkuang.hu(a)kernel.org>
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.c b/drivers/gpu/drm/mediatek/mtk_drm_gem.c
index 9f364df52478..0e0a41b2f57f 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_gem.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.c
@@ -239,6 +239,7 @@ int mtk_drm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map)
npages = obj->size >> PAGE_SHIFT;
mtk_gem->pages = kcalloc(npages, sizeof(*mtk_gem->pages), GFP_KERNEL);
if (!mtk_gem->pages) {
+ sg_free_table(sgt);
kfree(sgt);
return -ENOMEM;
}
@@ -248,12 +249,15 @@ int mtk_drm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map)
mtk_gem->kvaddr = vmap(mtk_gem->pages, npages, VM_MAP,
pgprot_writecombine(PAGE_KERNEL));
if (!mtk_gem->kvaddr) {
+ sg_free_table(sgt);
kfree(sgt);
kfree(mtk_gem->pages);
return -ENOMEM;
}
-out:
+ sg_free_table(sgt);
kfree(sgt);
+
+out:
iosys_map_set_vaddr(map, mtk_gem->kvaddr);
return 0;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x dcc583c225e659d5da34b4ad83914fd6b51e3dbf
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102023-praising-mumbo-a93b@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From dcc583c225e659d5da34b4ad83914fd6b51e3dbf Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wenst(a)chromium.org>
Date: Wed, 4 Oct 2023 16:32:24 +0800
Subject: [PATCH] drm/mediatek: Correctly free sg_table in gem prime vmap
The MediaTek DRM driver implements GEM PRIME vmap by fetching the
sg_table for the object, iterating through the pages, and then
vmapping them. In essence, unlike the GEM DMA helpers which vmap
when the object is first created or imported, the MediaTek version
does it on request.
Unfortunately, the code never correctly frees the sg_table contents.
This results in a kernel memory leak. On a Hayato device with a text
console on the internal display, this results in the system running
out of memory in a few days from all the console screen cursor updates.
Add sg_free_table() to correctly free the contents of the sg_table. This
was missing despite explicitly required by mtk_gem_prime_get_sg_table().
Also move the "out" shortcut label to after the kfree() call for the
sg_table. Having sg_free_table() together with kfree() makes more sense.
The shortcut is only used when the object already has a kernel address,
in which case the pointer is NULL and kfree() does nothing. Hence this
change causes no functional change.
Fixes: 3df64d7b0a4f ("drm/mediatek: Implement gem prime vmap/vunmap function")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Chen-Yu Tsai <wenst(a)chromium.org>
Reviewed-by: CK Hu <ck.hu(a)mediatek.com>
Link: https://patchwork.kernel.org/project/dri-devel/patch/20231004083226.1940055…
Signed-off-by: Chun-Kuang Hu <chunkuang.hu(a)kernel.org>
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.c b/drivers/gpu/drm/mediatek/mtk_drm_gem.c
index 9f364df52478..0e0a41b2f57f 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_gem.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.c
@@ -239,6 +239,7 @@ int mtk_drm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map)
npages = obj->size >> PAGE_SHIFT;
mtk_gem->pages = kcalloc(npages, sizeof(*mtk_gem->pages), GFP_KERNEL);
if (!mtk_gem->pages) {
+ sg_free_table(sgt);
kfree(sgt);
return -ENOMEM;
}
@@ -248,12 +249,15 @@ int mtk_drm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map)
mtk_gem->kvaddr = vmap(mtk_gem->pages, npages, VM_MAP,
pgprot_writecombine(PAGE_KERNEL));
if (!mtk_gem->kvaddr) {
+ sg_free_table(sgt);
kfree(sgt);
kfree(mtk_gem->pages);
return -ENOMEM;
}
-out:
+ sg_free_table(sgt);
kfree(sgt);
+
+out:
iosys_map_set_vaddr(map, mtk_gem->kvaddr);
return 0;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 72377ab2d671befd6390a1d5677f5cca61235b65
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102002-grooving-carnivore-aa31@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 72377ab2d671befd6390a1d5677f5cca61235b65 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni(a)redhat.com>
Date: Wed, 18 Oct 2023 11:23:54 -0700
Subject: [PATCH] mptcp: more conservative check for zero probes
Christoph reported that the MPTCP protocol can find the subflow-level
write queue unexpectedly not empty while crafting a zero-window probe,
hitting a warning:
------------[ cut here ]------------
WARNING: CPU: 0 PID: 188 at net/mptcp/protocol.c:1312 mptcp_sendmsg_frag+0xc06/0xe70
Modules linked in:
CPU: 0 PID: 188 Comm: kworker/0:2 Not tainted 6.6.0-rc2-g1176aa719d7a #47
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014
Workqueue: events mptcp_worker
RIP: 0010:mptcp_sendmsg_frag+0xc06/0xe70 net/mptcp/protocol.c:1312
RAX: 47d0530de347ff6a RBX: 47d0530de347ff6b RCX: ffff8881015d3c00
RDX: ffff8881015d3c00 RSI: 47d0530de347ff6b RDI: 47d0530de347ff6b
RBP: 47d0530de347ff6b R08: ffffffff8243c6a8 R09: ffffffff82042d9c
R10: 0000000000000002 R11: ffffffff82056850 R12: ffff88812a13d580
R13: 0000000000000001 R14: ffff88812b375e50 R15: ffff88812bbf3200
FS: 0000000000000000(0000) GS:ffff88813bc00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000695118 CR3: 0000000115dfc001 CR4: 0000000000170ef0
Call Trace:
<TASK>
__subflow_push_pending+0xa4/0x420 net/mptcp/protocol.c:1545
__mptcp_push_pending+0x128/0x3b0 net/mptcp/protocol.c:1614
mptcp_release_cb+0x218/0x5b0 net/mptcp/protocol.c:3391
release_sock+0xf6/0x100 net/core/sock.c:3521
mptcp_worker+0x6e8/0x8f0 net/mptcp/protocol.c:2746
process_scheduled_works+0x341/0x690 kernel/workqueue.c:2630
worker_thread+0x3a7/0x610 kernel/workqueue.c:2784
kthread+0x143/0x180 kernel/kthread.c:388
ret_from_fork+0x4d/0x60 arch/x86/kernel/process.c:147
ret_from_fork_asm+0x1b/0x30 arch/x86/entry/entry_64.S:304
</TASK>
The root cause of the issue is that expectations are wrong: e.g. due
to MPTCP-level re-injection we can hit the critical condition.
Explicitly avoid the zero-window probe when the subflow write queue
is not empty and drop the related warnings.
Reported-by: Christoph Paasch <cpaasch(a)apple.com>
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/444
Fixes: f70cad1085d1 ("mptcp: stop relying on tcp_tx_skb_cache")
Cc: stable(a)vger.kernel.org
Reviewed-by: Mat Martineau <martineau(a)kernel.org>
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
Signed-off-by: Mat Martineau <martineau(a)kernel.org>
Link: https://lore.kernel.org/r/20231018-send-net-20231018-v1-3-17ecb002e41d@kern…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index d1902373c974..4e30e5ba3795 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1298,7 +1298,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
if (copy == 0) {
u64 snd_una = READ_ONCE(msk->snd_una);
- if (snd_una != msk->snd_nxt) {
+ if (snd_una != msk->snd_nxt || tcp_write_queue_tail(ssk)) {
tcp_remove_empty_skb(ssk);
return 0;
}
@@ -1306,11 +1306,6 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
zero_window_probe = true;
data_seq = snd_una - 1;
copy = 1;
-
- /* all mptcp-level data is acked, no skbs should be present into the
- * ssk write queue
- */
- WARN_ON_ONCE(reuse_skb);
}
copy = min_t(size_t, copy, info->limit - info->sent);
@@ -1339,7 +1334,6 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
if (reuse_skb) {
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
mpext->data_len += copy;
- WARN_ON_ONCE(zero_window_probe);
goto out;
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x b29a2acd36dd7a33c63f260df738fb96baa3d4f8
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102044-ice-badass-92b5@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b29a2acd36dd7a33c63f260df738fb96baa3d4f8 Mon Sep 17 00:00:00 2001
From: Roman Kagan <rkagan(a)amazon.de>
Date: Thu, 4 May 2023 14:00:42 +0200
Subject: [PATCH] KVM: x86/pmu: Truncate counter value to allowed width on
write
Performance counters are defined to have width less than 64 bits. The
vPMU code maintains the counters in u64 variables but assumes the value
to fit within the defined width. However, for Intel non-full-width
counters (MSR_IA32_PERFCTRx) the value receieved from the guest is
truncated to 32 bits and then sign-extended to full 64 bits. If a
negative value is set, it's sign-extended to 64 bits, but then in
kvm_pmu_incr_counter() it's incremented, truncated, and compared to the
previous value for overflow detection.
That previous value is not truncated, so it always evaluates bigger than
the truncated new one, and a PMI is injected. If the PMI handler writes
a negative counter value itself, the vCPU never quits the PMI loop.
Turns out that Linux PMI handler actually does write the counter with
the value just read with RDPMC, so when no full-width support is exposed
via MSR_IA32_PERF_CAPABILITIES, and the guest initializes the counter to
a negative value, it locks up.
This has been observed in the field, for example, when the guest configures
atop to use perfevents and runs two instances of it simultaneously.
To address the problem, maintain the invariant that the counter value
always fits in the defined bit width, by truncating the received value
in the respective set_msr methods. For better readability, factor the
out into a helper function, pmc_write_counter(), shared by vmx and svm
parts.
Fixes: 9cd803d496e7 ("KVM: x86: Update vPMCs when retiring instructions")
Cc: stable(a)vger.kernel.org
Signed-off-by: Roman Kagan <rkagan(a)amazon.de>
Link: https://lore.kernel.org/all/20230504120042.785651-1-rkagan@amazon.de
Tested-by: Like Xu <likexu(a)tencent.com>
[sean: tweak changelog, s/set/write in the helper]
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 7d9ba301c090..1d64113de488 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -74,6 +74,12 @@ static inline u64 pmc_read_counter(struct kvm_pmc *pmc)
return counter & pmc_bitmask(pmc);
}
+static inline void pmc_write_counter(struct kvm_pmc *pmc, u64 val)
+{
+ pmc->counter += val - pmc_read_counter(pmc);
+ pmc->counter &= pmc_bitmask(pmc);
+}
+
static inline void pmc_release_perf_event(struct kvm_pmc *pmc)
{
if (pmc->perf_event) {
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index cef5a3d0abd0..373ff6a6687b 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -160,7 +160,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
/* MSR_PERFCTRn */
pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
if (pmc) {
- pmc->counter += data - pmc_read_counter(pmc);
+ pmc_write_counter(pmc, data);
pmc_update_sample_period(pmc);
return 0;
}
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index f2efa0bf7ae8..820d3e1f6b4f 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -436,11 +436,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!msr_info->host_initiated &&
!(msr & MSR_PMC_FULL_WIDTH_BIT))
data = (s64)(s32)data;
- pmc->counter += data - pmc_read_counter(pmc);
+ pmc_write_counter(pmc, data);
pmc_update_sample_period(pmc);
break;
} else if ((pmc = get_fixed_pmc(pmu, msr))) {
- pmc->counter += data - pmc_read_counter(pmc);
+ pmc_write_counter(pmc, data);
pmc_update_sample_period(pmc);
break;
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
When calculating the hotness threshold for lru_prio scheme of
DAMON_LRU_SORT, the module divides some values by the maximum
nr_accesses. However, due to the type of the related variables, simple
division-based calculation of the divisor can return zero. As a result,
divide-by-zero is possible. Fix it by using damon_max_nr_accesses(),
which handles the case.
Reported-by: Jakub Acs <acsjakub(a)amazon.de>
Fixes: 40e983cca927 ("mm/damon: introduce DAMON-based LRU-lists Sorting")
Cc: <stable(a)vger.kernel.org> # 6.0.x
Signed-off-by: SeongJae Park <sj(a)kernel.org>
---
mm/damon/lru_sort.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c
index 3ecdcc029443..f2e5f9431892 100644
--- a/mm/damon/lru_sort.c
+++ b/mm/damon/lru_sort.c
@@ -195,9 +195,7 @@ static int damon_lru_sort_apply_parameters(void)
if (err)
return err;
- /* aggr_interval / sample_interval is the maximum nr_accesses */
- hot_thres = damon_lru_sort_mon_attrs.aggr_interval /
- damon_lru_sort_mon_attrs.sample_interval *
+ hot_thres = damon_max_nr_accesses(&damon_lru_sort_mon_attrs) *
hot_thres_access_freq / 1000;
scheme = damon_lru_sort_new_hot_scheme(hot_thres);
if (!scheme)
--
2.34.1
When calculating the hotness of each region for the under-quota regions
prioritization, DAMON divides some values by the maximum nr_accesses.
However, due to the type of the related variables, simple division-based
calculation of the divisor can return zero. As a result, divide-by-zero
is possible. Fix it by using damon_max_nr_accesses(), which handles the
case.
Reported-by: Jakub Acs <acsjakub(a)amazon.de>
Fixes: 198f0f4c58b9 ("mm/damon/vaddr,paddr: support pageout prioritization")
Cc: <stable(a)vger.kernel.org> # 5.16.x
Signed-off-by: SeongJae Park <sj(a)kernel.org>
---
mm/damon/ops-common.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/mm/damon/ops-common.c b/mm/damon/ops-common.c
index ac1c3fa80f98..d25d99cb5f2b 100644
--- a/mm/damon/ops-common.c
+++ b/mm/damon/ops-common.c
@@ -73,7 +73,6 @@ void damon_pmdp_mkold(pmd_t *pmd, struct vm_area_struct *vma, unsigned long addr
int damon_hot_score(struct damon_ctx *c, struct damon_region *r,
struct damos *s)
{
- unsigned int max_nr_accesses;
int freq_subscore;
unsigned int age_in_sec;
int age_in_log, age_subscore;
@@ -81,8 +80,8 @@ int damon_hot_score(struct damon_ctx *c, struct damon_region *r,
unsigned int age_weight = s->quota.weight_age;
int hotness;
- max_nr_accesses = c->attrs.aggr_interval / c->attrs.sample_interval;
- freq_subscore = r->nr_accesses * DAMON_MAX_SUBSCORE / max_nr_accesses;
+ freq_subscore = r->nr_accesses * DAMON_MAX_SUBSCORE /
+ damon_max_nr_accesses(&c->attrs);
age_in_sec = (unsigned long)r->age * c->attrs.aggr_interval / 1000000;
for (age_in_log = 0; age_in_log < DAMON_MAX_AGE_IN_LOG && age_in_sec;
--
2.34.1
When monitoring attributes are changed, DAMON updates access rate of the
monitoring results accordingly. For that, it divides some values by the
maximum nr_accesses. However, due to the type of the related variables,
simple division-based calculation of the divisor can return zero. As a
result, divide-by-zero is possible. Fix it by using
damon_max_nr_accesses(), which handles the case.
Reported-by: Jakub Acs <acsjakub(a)amazon.de>
Fixes: 2f5bef5a590b ("mm/damon/core: update monitoring results for new monitoring attributes")
Cc: <stable(a)vger.kernel.org> # 6.3.x
Signed-off-by: SeongJae Park <sj(a)kernel.org>
---
mm/damon/core.c | 10 ++--------
1 file changed, 2 insertions(+), 8 deletions(-)
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 9f4f7c378cf3..e194c8075235 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -500,20 +500,14 @@ static unsigned int damon_age_for_new_attrs(unsigned int age,
static unsigned int damon_accesses_bp_to_nr_accesses(
unsigned int accesses_bp, struct damon_attrs *attrs)
{
- unsigned int max_nr_accesses =
- attrs->aggr_interval / attrs->sample_interval;
-
- return accesses_bp * max_nr_accesses / 10000;
+ return accesses_bp * damon_max_nr_accesses(attrs) / 10000;
}
/* convert nr_accesses to access ratio in bp (per 10,000) */
static unsigned int damon_nr_accesses_to_accesses_bp(
unsigned int nr_accesses, struct damon_attrs *attrs)
{
- unsigned int max_nr_accesses =
- attrs->aggr_interval / attrs->sample_interval;
-
- return nr_accesses * 10000 / max_nr_accesses;
+ return nr_accesses * 10000 / damon_max_nr_accesses(attrs);
}
static unsigned int damon_nr_accesses_for_new_attrs(unsigned int nr_accesses,
--
2.34.1
The maximum nr_accesses of given DAMON context can be calculated by
dividing the aggregation interval by the sampling interval. Some logics
in DAMON uses the maximum nr_accesses as a divisor. Hence, the value
shouldn't be zero. Such case is avoided since DAMON avoids setting the
agregation interval as samller than the sampling interval. However,
since nr_accesses is unsigned int while the intervals are unsigned long,
the maximum nr_accesses could be zero while casting. Implement a
function that handles the corner case.
Note that this commit is not fixing the real issue since this is only
introducing the safe function that will replaces the problematic
divisions. The replacements will be made by followup commits, to make
backporting on stable series easier.
Reported-by: Jakub Acs <acsjakub(a)amazon.de>
Fixes: 198f0f4c58b9 ("mm/damon/vaddr,paddr: support pageout prioritization")
Cc: <stable(a)vger.kernel.org> # 5.16.x
Signed-off-by: SeongJae Park <sj(a)kernel.org>
---
include/linux/damon.h | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/include/linux/damon.h b/include/linux/damon.h
index 27b995c22497..ab2f17d9926b 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -681,6 +681,13 @@ static inline bool damon_target_has_pid(const struct damon_ctx *ctx)
return ctx->ops.id == DAMON_OPS_VADDR || ctx->ops.id == DAMON_OPS_FVADDR;
}
+static inline unsigned int damon_max_nr_accesses(const struct damon_attrs *attrs)
+{
+ /* {aggr,sample}_interval are unsigned long, hence could overflow */
+ return min(attrs->aggr_interval / attrs->sample_interval,
+ (unsigned long)UINT_MAX);
+}
+
int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive);
int damon_stop(struct damon_ctx **ctxs, int nr_ctxs);
--
2.34.1
The patch titled
Subject: mm/damon/lru_sort: avoid divide-by-zero in hot threshold calculation
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-damon-lru_sort-avoid-divide-by-zero-in-hot-threshold-calculation.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: SeongJae Park <sj(a)kernel.org>
Subject: mm/damon/lru_sort: avoid divide-by-zero in hot threshold calculation
Date: Thu, 19 Oct 2023 19:49:23 +0000
When calculating the hotness threshold for lru_prio scheme of
DAMON_LRU_SORT, the module divides some values by the maximum nr_accesses.
However, due to the type of the related variables, simple division-based
calculation of the divisor can return zero. As a result, divide-by-zero
is possible. Fix it by using damon_max_nr_accesses(), which handles the
case.
Link: https://lkml.kernel.org/r/20231019194924.100347-5-sj@kernel.org
Fixes: 40e983cca927 ("mm/damon: introduce DAMON-based LRU-lists Sorting")
Signed-off-by: SeongJae Park <sj(a)kernel.org>
Cc: <stable(a)vger.kernel.org> [6.0+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/damon/lru_sort.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
--- a/mm/damon/lru_sort.c~mm-damon-lru_sort-avoid-divide-by-zero-in-hot-threshold-calculation
+++ a/mm/damon/lru_sort.c
@@ -193,9 +193,7 @@ static int damon_lru_sort_apply_paramete
if (err)
return err;
- /* aggr_interval / sample_interval is the maximum nr_accesses */
- hot_thres = damon_lru_sort_mon_attrs.aggr_interval /
- damon_lru_sort_mon_attrs.sample_interval *
+ hot_thres = damon_max_nr_accesses(&damon_lru_sort_mon_attrs) *
hot_thres_access_freq / 1000;
scheme = damon_lru_sort_new_hot_scheme(hot_thres);
if (!scheme)
_
Patches currently in -mm which might be from sj(a)kernel.org are
mm-damon-implement-a-function-for-max-nr_accesses-safe-calculation.patch
mm-damon-core-avoid-divide-by-zero-during-monitoring-results-update.patch
mm-damon-ops-common-avoid-divide-by-zero-during-region-hotness-calculation.patch
mm-damon-lru_sort-avoid-divide-by-zero-in-hot-threshold-calculation.patch
The patch titled
Subject: mm/damon/ops-common: avoid divide-by-zero during region hotness calculation
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-damon-ops-common-avoid-divide-by-zero-during-region-hotness-calculation.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: SeongJae Park <sj(a)kernel.org>
Subject: mm/damon/ops-common: avoid divide-by-zero during region hotness calculation
Date: Thu, 19 Oct 2023 19:49:22 +0000
When calculating the hotness of each region for the under-quota regions
prioritization, DAMON divides some values by the maximum nr_accesses.
However, due to the type of the related variables, simple division-based
calculation of the divisor can return zero. As a result, divide-by-zero
is possible. Fix it by using damon_max_nr_accesses(), which handles the
case.
Link: https://lkml.kernel.org/r/20231019194924.100347-4-sj@kernel.org
Fixes: 198f0f4c58b9 ("mm/damon/vaddr,paddr: support pageout prioritization")
Signed-off-by: SeongJae Park <sj(a)kernel.org>
Cc: <stable(a)vger.kernel.org> [5.16+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/damon/ops-common.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
--- a/mm/damon/ops-common.c~mm-damon-ops-common-avoid-divide-by-zero-during-region-hotness-calculation
+++ a/mm/damon/ops-common.c
@@ -73,7 +73,6 @@ void damon_pmdp_mkold(pmd_t *pmd, struct
int damon_hot_score(struct damon_ctx *c, struct damon_region *r,
struct damos *s)
{
- unsigned int max_nr_accesses;
int freq_subscore;
unsigned int age_in_sec;
int age_in_log, age_subscore;
@@ -81,8 +80,8 @@ int damon_hot_score(struct damon_ctx *c,
unsigned int age_weight = s->quota.weight_age;
int hotness;
- max_nr_accesses = c->attrs.aggr_interval / c->attrs.sample_interval;
- freq_subscore = r->nr_accesses * DAMON_MAX_SUBSCORE / max_nr_accesses;
+ freq_subscore = r->nr_accesses * DAMON_MAX_SUBSCORE /
+ damon_max_nr_accesses(&c->attrs);
age_in_sec = (unsigned long)r->age * c->attrs.aggr_interval / 1000000;
for (age_in_log = 0; age_in_log < DAMON_MAX_AGE_IN_LOG && age_in_sec;
_
Patches currently in -mm which might be from sj(a)kernel.org are
mm-damon-implement-a-function-for-max-nr_accesses-safe-calculation.patch
mm-damon-core-avoid-divide-by-zero-during-monitoring-results-update.patch
mm-damon-ops-common-avoid-divide-by-zero-during-region-hotness-calculation.patch
mm-damon-lru_sort-avoid-divide-by-zero-in-hot-threshold-calculation.patch
The patch titled
Subject: mm/damon/core: avoid divide-by-zero during monitoring results update
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-damon-core-avoid-divide-by-zero-during-monitoring-results-update.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: SeongJae Park <sj(a)kernel.org>
Subject: mm/damon/core: avoid divide-by-zero during monitoring results update
Date: Thu, 19 Oct 2023 19:49:21 +0000
When monitoring attributes are changed, DAMON updates access rate of the
monitoring results accordingly. For that, it divides some values by the
maximum nr_accesses. However, due to the type of the related variables,
simple division-based calculation of the divisor can return zero. As a
result, divide-by-zero is possible. Fix it by using
damon_max_nr_accesses(), which handles the case.
Link: https://lkml.kernel.org/r/20231019194924.100347-3-sj@kernel.org
Fixes: 2f5bef5a590b ("mm/damon/core: update monitoring results for new monitoring attributes")
Signed-off-by: SeongJae Park <sj(a)kernel.org>
Cc: <stable(a)vger.kernel.org> [6.3+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/damon/core.c | 10 ++--------
1 file changed, 2 insertions(+), 8 deletions(-)
--- a/mm/damon/core.c~mm-damon-core-avoid-divide-by-zero-during-monitoring-results-update
+++ a/mm/damon/core.c
@@ -476,20 +476,14 @@ static unsigned int damon_age_for_new_at
static unsigned int damon_accesses_bp_to_nr_accesses(
unsigned int accesses_bp, struct damon_attrs *attrs)
{
- unsigned int max_nr_accesses =
- attrs->aggr_interval / attrs->sample_interval;
-
- return accesses_bp * max_nr_accesses / 10000;
+ return accesses_bp * damon_max_nr_accesses(attrs) / 10000;
}
/* convert nr_accesses to access ratio in bp (per 10,000) */
static unsigned int damon_nr_accesses_to_accesses_bp(
unsigned int nr_accesses, struct damon_attrs *attrs)
{
- unsigned int max_nr_accesses =
- attrs->aggr_interval / attrs->sample_interval;
-
- return nr_accesses * 10000 / max_nr_accesses;
+ return nr_accesses * 10000 / damon_max_nr_accesses(attrs);
}
static unsigned int damon_nr_accesses_for_new_attrs(unsigned int nr_accesses,
_
Patches currently in -mm which might be from sj(a)kernel.org are
mm-damon-implement-a-function-for-max-nr_accesses-safe-calculation.patch
mm-damon-core-avoid-divide-by-zero-during-monitoring-results-update.patch
mm-damon-ops-common-avoid-divide-by-zero-during-region-hotness-calculation.patch
mm-damon-lru_sort-avoid-divide-by-zero-in-hot-threshold-calculation.patch
The patch titled
Subject: mm/damon: implement a function for max nr_accesses safe calculation
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-damon-implement-a-function-for-max-nr_accesses-safe-calculation.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: SeongJae Park <sj(a)kernel.org>
Subject: mm/damon: implement a function for max nr_accesses safe calculation
Date: Thu, 19 Oct 2023 19:49:20 +0000
Patch series "avoid divide-by-zero due to max_nr_accesses overflow".
The maximum nr_accesses of given DAMON context can be calculated by
dividing the aggregation interval by the sampling interval. Some logics
in DAMON uses the maximum nr_accesses as a divisor. Hence, the value
shouldn't be zero. Such case is avoided since DAMON avoids setting the
agregation interval as samller than the sampling interval. However, since
nr_accesses is unsigned int while the intervals are unsigned long, the
maximum nr_accesses could be zero while casting.
Avoid the divide-by-zero by implementing a function that handles the
corner case (first patch), and replaces the vulnerable direct max
nr_accesses calculations (remaining patches).
Note that the patches for the replacements are divided for broken commits,
to make backporting on required tres easier. Especially, the last patch
is for a patch that not yet merged into the mainline but in mm tree.
This patch (of 4):
The maximum nr_accesses of given DAMON context can be calculated by
dividing the aggregation interval by the sampling interval. Some logics
in DAMON uses the maximum nr_accesses as a divisor. Hence, the value
shouldn't be zero. Such case is avoided since DAMON avoids setting the
agregation interval as samller than the sampling interval. However, since
nr_accesses is unsigned int while the intervals are unsigned long, the
maximum nr_accesses could be zero while casting. Implement a function
that handles the corner case.
Note that this commit is not fixing the real issue since this is only
introducing the safe function that will replaces the problematic
divisions. The replacements will be made by followup commits, to make
backporting on stable series easier.
Link: https://lkml.kernel.org/r/20231019194924.100347-1-sj@kernel.org
Link: https://lkml.kernel.org/r/20231019194924.100347-2-sj@kernel.org
Fixes: 198f0f4c58b9 ("mm/damon/vaddr,paddr: support pageout prioritization")
Signed-off-by: SeongJae Park <sj(a)kernel.org>
Cc: <stable(a)vger.kernel.org> [5.16+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/damon.h | 7 +++++++
1 file changed, 7 insertions(+)
--- a/include/linux/damon.h~mm-damon-implement-a-function-for-max-nr_accesses-safe-calculation
+++ a/include/linux/damon.h
@@ -642,6 +642,13 @@ static inline bool damon_target_has_pid(
return ctx->ops.id == DAMON_OPS_VADDR || ctx->ops.id == DAMON_OPS_FVADDR;
}
+static inline unsigned int damon_max_nr_accesses(const struct damon_attrs *attrs)
+{
+ /* {aggr,sample}_interval are unsigned long, hence could overflow */
+ return min(attrs->aggr_interval / attrs->sample_interval,
+ (unsigned long)UINT_MAX);
+}
+
int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive);
int damon_stop(struct damon_ctx **ctxs, int nr_ctxs);
_
Patches currently in -mm which might be from sj(a)kernel.org are
mm-damon-implement-a-function-for-max-nr_accesses-safe-calculation.patch
mm-damon-core-avoid-divide-by-zero-during-monitoring-results-update.patch
mm-damon-ops-common-avoid-divide-by-zero-during-region-hotness-calculation.patch
mm-damon-lru_sort-avoid-divide-by-zero-in-hot-threshold-calculation.patch
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x c68681ae46eaaa1640b52fe366d21a93b2185df5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102058-bullish-chess-e399@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c68681ae46eaaa1640b52fe366d21a93b2185df5 Mon Sep 17 00:00:00 2001
From: Albert Huang <huangjie.albert(a)bytedance.com>
Date: Wed, 11 Oct 2023 15:48:51 +0800
Subject: [PATCH] net/smc: fix smc clc failed issue when netdevice not in
init_net
If the netdevice is within a container and communicates externally
through network technologies such as VxLAN, we won't be able to find
routing information in the init_net namespace. To address this issue,
we need to add a struct net parameter to the smc_ib_find_route function.
This allow us to locate the routing information within the corresponding
net namespace, ensuring the correct completion of the SMC CLC interaction.
Fixes: e5c4744cfb59 ("net/smc: add SMC-Rv2 connection establishment")
Signed-off-by: Albert Huang <huangjie.albert(a)bytedance.com>
Reviewed-by: Dust Li <dust.li(a)linux.alibaba.com>
Reviewed-by: Wenjia Zhang <wenjia(a)linux.ibm.com>
Link: https://lore.kernel.org/r/20231011074851.95280-1-huangjie.albert@bytedance.…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index bacdd971615e..7a874da90c7f 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1201,6 +1201,7 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
(struct smc_clc_msg_accept_confirm_v2 *)aclc;
struct smc_clc_first_contact_ext *fce =
smc_get_clc_first_contact_ext(clc_v2, false);
+ struct net *net = sock_net(&smc->sk);
int rc;
if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1)
@@ -1210,7 +1211,7 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN);
ini->smcrv2.uses_gateway = false;
} else {
- if (smc_ib_find_route(smc->clcsock->sk->sk_rcv_saddr,
+ if (smc_ib_find_route(net, smc->clcsock->sk->sk_rcv_saddr,
smc_ib_gid_to_ipv4(aclc->r0.lcl.gid),
ini->smcrv2.nexthop_mac,
&ini->smcrv2.uses_gateway))
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 9b66d6aeeb1a..89981dbe46c9 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -193,7 +193,7 @@ bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport)
return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE;
}
-int smc_ib_find_route(__be32 saddr, __be32 daddr,
+int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr,
u8 nexthop_mac[], u8 *uses_gateway)
{
struct neighbour *neigh = NULL;
@@ -205,7 +205,7 @@ int smc_ib_find_route(__be32 saddr, __be32 daddr,
if (daddr == cpu_to_be32(INADDR_NONE))
goto out;
- rt = ip_route_output_flow(&init_net, &fl4, NULL);
+ rt = ip_route_output_flow(net, &fl4, NULL);
if (IS_ERR(rt))
goto out;
if (rt->rt_uses_gateway && rt->rt_gw_family != AF_INET)
@@ -235,6 +235,7 @@ static int smc_ib_determine_gid_rcu(const struct net_device *ndev,
if (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP &&
smc_ib_gid_to_ipv4((u8 *)&attr->gid) != cpu_to_be32(INADDR_NONE)) {
struct in_device *in_dev = __in_dev_get_rcu(ndev);
+ struct net *net = dev_net(ndev);
const struct in_ifaddr *ifa;
bool subnet_match = false;
@@ -248,7 +249,7 @@ static int smc_ib_determine_gid_rcu(const struct net_device *ndev,
}
if (!subnet_match)
goto out;
- if (smcrv2->daddr && smc_ib_find_route(smcrv2->saddr,
+ if (smcrv2->daddr && smc_ib_find_route(net, smcrv2->saddr,
smcrv2->daddr,
smcrv2->nexthop_mac,
&smcrv2->uses_gateway))
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index 4df5f8c8a0a1..ef8ac2b7546d 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -112,7 +112,7 @@ void smc_ib_sync_sg_for_device(struct smc_link *lnk,
int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
unsigned short vlan_id, u8 gid[], u8 *sgid_index,
struct smc_init_info_smcrv2 *smcrv2);
-int smc_ib_find_route(__be32 saddr, __be32 daddr,
+int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr,
u8 nexthop_mac[], u8 *uses_gateway);
bool smc_ib_is_valid_local_systemid(void);
int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb);
The patch below does not apply to the 6.5-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.5.y
git checkout FETCH_HEAD
git cherry-pick -x c68681ae46eaaa1640b52fe366d21a93b2185df5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102057-arming-autism-6d65@gregkh' --subject-prefix 'PATCH 6.5.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c68681ae46eaaa1640b52fe366d21a93b2185df5 Mon Sep 17 00:00:00 2001
From: Albert Huang <huangjie.albert(a)bytedance.com>
Date: Wed, 11 Oct 2023 15:48:51 +0800
Subject: [PATCH] net/smc: fix smc clc failed issue when netdevice not in
init_net
If the netdevice is within a container and communicates externally
through network technologies such as VxLAN, we won't be able to find
routing information in the init_net namespace. To address this issue,
we need to add a struct net parameter to the smc_ib_find_route function.
This allow us to locate the routing information within the corresponding
net namespace, ensuring the correct completion of the SMC CLC interaction.
Fixes: e5c4744cfb59 ("net/smc: add SMC-Rv2 connection establishment")
Signed-off-by: Albert Huang <huangjie.albert(a)bytedance.com>
Reviewed-by: Dust Li <dust.li(a)linux.alibaba.com>
Reviewed-by: Wenjia Zhang <wenjia(a)linux.ibm.com>
Link: https://lore.kernel.org/r/20231011074851.95280-1-huangjie.albert@bytedance.…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index bacdd971615e..7a874da90c7f 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1201,6 +1201,7 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
(struct smc_clc_msg_accept_confirm_v2 *)aclc;
struct smc_clc_first_contact_ext *fce =
smc_get_clc_first_contact_ext(clc_v2, false);
+ struct net *net = sock_net(&smc->sk);
int rc;
if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1)
@@ -1210,7 +1211,7 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN);
ini->smcrv2.uses_gateway = false;
} else {
- if (smc_ib_find_route(smc->clcsock->sk->sk_rcv_saddr,
+ if (smc_ib_find_route(net, smc->clcsock->sk->sk_rcv_saddr,
smc_ib_gid_to_ipv4(aclc->r0.lcl.gid),
ini->smcrv2.nexthop_mac,
&ini->smcrv2.uses_gateway))
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 9b66d6aeeb1a..89981dbe46c9 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -193,7 +193,7 @@ bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport)
return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE;
}
-int smc_ib_find_route(__be32 saddr, __be32 daddr,
+int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr,
u8 nexthop_mac[], u8 *uses_gateway)
{
struct neighbour *neigh = NULL;
@@ -205,7 +205,7 @@ int smc_ib_find_route(__be32 saddr, __be32 daddr,
if (daddr == cpu_to_be32(INADDR_NONE))
goto out;
- rt = ip_route_output_flow(&init_net, &fl4, NULL);
+ rt = ip_route_output_flow(net, &fl4, NULL);
if (IS_ERR(rt))
goto out;
if (rt->rt_uses_gateway && rt->rt_gw_family != AF_INET)
@@ -235,6 +235,7 @@ static int smc_ib_determine_gid_rcu(const struct net_device *ndev,
if (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP &&
smc_ib_gid_to_ipv4((u8 *)&attr->gid) != cpu_to_be32(INADDR_NONE)) {
struct in_device *in_dev = __in_dev_get_rcu(ndev);
+ struct net *net = dev_net(ndev);
const struct in_ifaddr *ifa;
bool subnet_match = false;
@@ -248,7 +249,7 @@ static int smc_ib_determine_gid_rcu(const struct net_device *ndev,
}
if (!subnet_match)
goto out;
- if (smcrv2->daddr && smc_ib_find_route(smcrv2->saddr,
+ if (smcrv2->daddr && smc_ib_find_route(net, smcrv2->saddr,
smcrv2->daddr,
smcrv2->nexthop_mac,
&smcrv2->uses_gateway))
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index 4df5f8c8a0a1..ef8ac2b7546d 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -112,7 +112,7 @@ void smc_ib_sync_sg_for_device(struct smc_link *lnk,
int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
unsigned short vlan_id, u8 gid[], u8 *sgid_index,
struct smc_init_info_smcrv2 *smcrv2);
-int smc_ib_find_route(__be32 saddr, __be32 daddr,
+int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr,
u8 nexthop_mac[], u8 *uses_gateway);
bool smc_ib_is_valid_local_systemid(void);
int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 0288c3e709e5fabd51e84715c5c798a02f43061a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102021-polygraph-modular-418a@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0288c3e709e5fabd51e84715c5c798a02f43061a Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg(a)intel.com>
Date: Wed, 11 Oct 2023 16:33:33 -0700
Subject: [PATCH] ice: reset first in crash dump kernels
When the system boots into the crash dump kernel after a panic, the ice
networking device may still have pending transactions that can cause errors
or machine checks when the device is re-enabled. This can prevent the crash
dump kernel from loading the driver or collecting the crash data.
To avoid this issue, perform a function level reset (FLR) on the ice device
via PCIe config space before enabling it on the crash kernel. This will
clear any outstanding transactions and stop all queues and interrupts.
Restore the config space after the FLR, otherwise it was found in testing
that the driver wouldn't load successfully.
The following sequence causes the original issue:
- Load the ice driver with modprobe ice
- Enable SR-IOV with 2 VFs: echo 2 > /sys/class/net/eth0/device/sriov_num_vfs
- Trigger a crash with echo c > /proc/sysrq-trigger
- Load the ice driver again (or let it load automatically) with modprobe ice
- The system crashes again during pcim_enable_device()
Fixes: 837f08fdecbe ("ice: Add basic driver framework for Intel(R) E800 Series")
Reported-by: Vishal Agrawal <vagrawal(a)redhat.com>
Reviewed-by: Jay Vosburgh <jay.vosburgh(a)canonical.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel(a)intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg(a)intel.com>
Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha(a)intel.com> (A Contingent worker at Intel)
Link: https://lore.kernel.org/r/20231011233334.336092-3-jacob.e.keller@intel.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index c8286adae946..6550c46e4e36 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -6,6 +6,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <generated/utsrelease.h>
+#include <linux/crash_dump.h>
#include "ice.h"
#include "ice_base.h"
#include "ice_lib.h"
@@ -5014,6 +5015,20 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
return -EINVAL;
}
+ /* when under a kdump kernel initiate a reset before enabling the
+ * device in order to clear out any pending DMA transactions. These
+ * transactions can cause some systems to machine check when doing
+ * the pcim_enable_device() below.
+ */
+ if (is_kdump_kernel()) {
+ pci_save_state(pdev);
+ pci_clear_master(pdev);
+ err = pcie_flr(pdev);
+ if (err)
+ return err;
+ pci_restore_state(pdev);
+ }
+
/* this driver uses devres, see
* Documentation/driver-api/driver-model/devres.rst
*/
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 0288c3e709e5fabd51e84715c5c798a02f43061a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102020-spyglass-frostlike-54b5@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0288c3e709e5fabd51e84715c5c798a02f43061a Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg(a)intel.com>
Date: Wed, 11 Oct 2023 16:33:33 -0700
Subject: [PATCH] ice: reset first in crash dump kernels
When the system boots into the crash dump kernel after a panic, the ice
networking device may still have pending transactions that can cause errors
or machine checks when the device is re-enabled. This can prevent the crash
dump kernel from loading the driver or collecting the crash data.
To avoid this issue, perform a function level reset (FLR) on the ice device
via PCIe config space before enabling it on the crash kernel. This will
clear any outstanding transactions and stop all queues and interrupts.
Restore the config space after the FLR, otherwise it was found in testing
that the driver wouldn't load successfully.
The following sequence causes the original issue:
- Load the ice driver with modprobe ice
- Enable SR-IOV with 2 VFs: echo 2 > /sys/class/net/eth0/device/sriov_num_vfs
- Trigger a crash with echo c > /proc/sysrq-trigger
- Load the ice driver again (or let it load automatically) with modprobe ice
- The system crashes again during pcim_enable_device()
Fixes: 837f08fdecbe ("ice: Add basic driver framework for Intel(R) E800 Series")
Reported-by: Vishal Agrawal <vagrawal(a)redhat.com>
Reviewed-by: Jay Vosburgh <jay.vosburgh(a)canonical.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel(a)intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg(a)intel.com>
Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha(a)intel.com> (A Contingent worker at Intel)
Link: https://lore.kernel.org/r/20231011233334.336092-3-jacob.e.keller@intel.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index c8286adae946..6550c46e4e36 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -6,6 +6,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <generated/utsrelease.h>
+#include <linux/crash_dump.h>
#include "ice.h"
#include "ice_base.h"
#include "ice_lib.h"
@@ -5014,6 +5015,20 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
return -EINVAL;
}
+ /* when under a kdump kernel initiate a reset before enabling the
+ * device in order to clear out any pending DMA transactions. These
+ * transactions can cause some systems to machine check when doing
+ * the pcim_enable_device() below.
+ */
+ if (is_kdump_kernel()) {
+ pci_save_state(pdev);
+ pci_clear_master(pdev);
+ err = pcie_flr(pdev);
+ if (err)
+ return err;
+ pci_restore_state(pdev);
+ }
+
/* this driver uses devres, see
* Documentation/driver-api/driver-model/devres.rst
*/
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 242e34500a32631f85c2b4eb6cb42a368a39e54f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102051-monsoon-democracy-4b10@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 242e34500a32631f85c2b4eb6cb42a368a39e54f Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg(a)intel.com>
Date: Tue, 10 Oct 2023 13:30:59 -0700
Subject: [PATCH] ice: fix over-shifted variable
Since the introduction of the ice driver the code has been
double-shifting the RSS enabling field, because the define already has
shifts in it and can't have the regular pattern of "a << shiftval &
mask" applied.
Most places in the code got it right, but one line was still wrong. Fix
this one location for easy backports to stable. An in-progress patch
fixes the defines to "standard" and will be applied as part of the
regular -next process sometime after this one.
Fixes: d76a60ba7afb ("ice: Add support for VLANs and offloads")
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel(a)intel.com>
CC: stable(a)vger.kernel.org
Signed-off-by: Jesse Brandeburg <jesse.brandeburg(a)intel.com>
Reviewed-by: Simon Horman <horms(a)kernel.org>
Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha(a)intel.com> (A Contingent worker at Intel)
Signed-off-by: Jacob Keller <jacob.e.keller(a)intel.com>
Link: https://lore.kernel.org/r/20231010203101.406248-1-jacob.e.keller@intel.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 7bf9b7069754..73bbf06a76db 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1201,8 +1201,7 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
ctxt->info.q_opt_rss = ((lut_type << ICE_AQ_VSI_Q_OPT_RSS_LUT_S) &
ICE_AQ_VSI_Q_OPT_RSS_LUT_M) |
- ((hash_type << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) &
- ICE_AQ_VSI_Q_OPT_RSS_HASH_M);
+ (hash_type & ICE_AQ_VSI_Q_OPT_RSS_HASH_M);
}
static void
The patch below does not apply to the 6.5-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.5.y
git checkout FETCH_HEAD
git cherry-pick -x acab8ff29a2a226409cfe04e6d2e0896928c1b3a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102005-stomp-defy-0f8e@gregkh' --subject-prefix 'PATCH 6.5.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From acab8ff29a2a226409cfe04e6d2e0896928c1b3a Mon Sep 17 00:00:00 2001
From: Iulia Tanasescu <iulia.tanasescu(a)nxp.com>
Date: Thu, 28 Sep 2023 10:52:57 +0300
Subject: [PATCH] Bluetooth: ISO: Fix invalid context error
This moves the hci_le_terminate_big_sync call from rx_work
to cmd_sync_work, to avoid calling sleeping function from
an invalid context.
Reported-by: syzbot+c715e1bd8dfbcb1ab176(a)syzkaller.appspotmail.com
Fixes: a0bfde167b50 ("Bluetooth: ISO: Add support for connecting multiple BISes")
Signed-off-by: Iulia Tanasescu <iulia.tanasescu(a)nxp.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz(a)intel.com>
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 31d02b54eea1..e6cfc65abcb8 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -7021,6 +7021,14 @@ static void hci_le_cis_req_evt(struct hci_dev *hdev, void *data,
hci_dev_unlock(hdev);
}
+static int hci_iso_term_big_sync(struct hci_dev *hdev, void *data)
+{
+ u8 handle = PTR_UINT(data);
+
+ return hci_le_terminate_big_sync(hdev, handle,
+ HCI_ERROR_LOCAL_HOST_TERM);
+}
+
static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
@@ -7065,16 +7073,17 @@ static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data,
rcu_read_lock();
}
+ rcu_read_unlock();
+
if (!ev->status && !i)
/* If no BISes have been connected for the BIG,
* terminate. This is in case all bound connections
* have been closed before the BIG creation
* has completed.
*/
- hci_le_terminate_big_sync(hdev, ev->handle,
- HCI_ERROR_LOCAL_HOST_TERM);
+ hci_cmd_sync_queue(hdev, hci_iso_term_big_sync,
+ UINT_PTR(ev->handle), NULL);
- rcu_read_unlock();
hci_dev_unlock(hdev);
}
The patch below does not apply to the 6.5-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.5.y
git checkout FETCH_HEAD
git cherry-pick -x a239110ee8e0b0aafa265f0d54f7a16744855e70
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023102019-shove-stagnant-982e@gregkh' --subject-prefix 'PATCH 6.5.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a239110ee8e0b0aafa265f0d54f7a16744855e70 Mon Sep 17 00:00:00 2001
From: Pauli Virtanen <pav(a)iki.fi>
Date: Sat, 30 Sep 2023 15:53:32 +0300
Subject: [PATCH] Bluetooth: hci_sync: always check if connection is alive
before deleting
In hci_abort_conn_sync it is possible that conn is deleted concurrently
by something else, also e.g. when waiting for hdev->lock. This causes
double deletion of the conn, so UAF or conn_hash.list corruption.
Fix by having all code paths check that the connection is still in
conn_hash before deleting it, while holding hdev->lock which prevents
any races.
Log (when powering off while BAP streaming, occurs rarely):
=======================================================================
kernel BUG at lib/list_debug.c:56!
...
? __list_del_entry_valid (lib/list_debug.c:56)
hci_conn_del (net/bluetooth/hci_conn.c:154) bluetooth
hci_abort_conn_sync (net/bluetooth/hci_sync.c:5415) bluetooth
? __pfx_hci_abort_conn_sync+0x10/0x10 [bluetooth]
? lock_release+0x1d5/0x3c0
? hci_disconnect_all_sync.constprop.0+0xb2/0x230 [bluetooth]
? __pfx_lock_release+0x10/0x10
? __kmem_cache_free+0x14d/0x2e0
hci_disconnect_all_sync.constprop.0+0xda/0x230 [bluetooth]
? __pfx_hci_disconnect_all_sync.constprop.0+0x10/0x10 [bluetooth]
? hci_clear_adv_sync+0x14f/0x170 [bluetooth]
? __pfx_set_powered_sync+0x10/0x10 [bluetooth]
hci_set_powered_sync+0x293/0x450 [bluetooth]
=======================================================================
Fixes: 94d9ba9f9888 ("Bluetooth: hci_sync: Fix UAF in hci_disconnect_all_sync")
Signed-off-by: Pauli Virtanen <pav(a)iki.fi>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz(a)intel.com>
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index d06e07a0ea5a..a15ab0b874a9 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -5369,6 +5369,7 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason)
{
int err = 0;
u16 handle = conn->handle;
+ bool disconnect = false;
struct hci_conn *c;
switch (conn->state) {
@@ -5399,24 +5400,15 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason)
hci_dev_unlock(hdev);
return 0;
case BT_BOUND:
- hci_dev_lock(hdev);
- hci_conn_failed(conn, reason);
- hci_dev_unlock(hdev);
- return 0;
+ break;
default:
- hci_dev_lock(hdev);
- conn->state = BT_CLOSED;
- hci_disconn_cfm(conn, reason);
- hci_conn_del(conn);
- hci_dev_unlock(hdev);
- return 0;
+ disconnect = true;
+ break;
}
hci_dev_lock(hdev);
- /* Check if the connection hasn't been cleanup while waiting
- * commands to complete.
- */
+ /* Check if the connection has been cleaned up concurrently */
c = hci_conn_hash_lookup_handle(hdev, handle);
if (!c || c != conn) {
err = 0;
@@ -5428,7 +5420,13 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason)
* or in case of LE it was still scanning so it can be cleanup
* safely.
*/
- hci_conn_failed(conn, reason);
+ if (disconnect) {
+ conn->state = BT_CLOSED;
+ hci_disconn_cfm(conn, reason);
+ hci_conn_del(conn);
+ } else {
+ hci_conn_failed(conn, reason);
+ }
unlock:
hci_dev_unlock(hdev);
This is the start of the stable review cycle for the 5.4.237 release.
There are 68 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Fri, 17 Mar 2023 11:57:10 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.4.237-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.4.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.4.237-rc1
Stefan Haberland <sth(a)linux.ibm.com>
s390/dasd: add missing discipline function
Masahiro Yamada <masahiroy(a)kernel.org>
UML: define RUNTIME_DISCARD_EXIT
Tom Saeger <tom.saeger(a)oracle.com>
sh: define RUNTIME_DISCARD_EXIT
Masahiro Yamada <masahiroy(a)kernel.org>
s390: define RUNTIME_DISCARD_EXIT to fix link error with GNU ld < 2.36
Michael Ellerman <mpe(a)ellerman.id.au>
powerpc/vmlinux.lds: Don't discard .rela* for relocatable builds
Michael Ellerman <mpe(a)ellerman.id.au>
powerpc/vmlinux.lds: Define RUNTIME_DISCARD_EXIT
Masahiro Yamada <masahiroy(a)kernel.org>
arch: fix broken BuildID for arm64 and riscv
H.J. Lu <hjl.tools(a)gmail.com>
x86, vmlinux.lds: Add RUNTIME_DISCARD_EXIT to generic DISCARDS
John Harrison <John.C.Harrison(a)Intel.com>
drm/i915: Don't use BAR mappings for ring buffers with LLC
Corey Minyard <cminyard(a)mvista.com>
ipmi:watchdog: Set panic count to proper value on a panic
Yejune Deng <yejune.deng(a)gmail.com>
ipmi/watchdog: replace atomic_add() and atomic_sub()
Paul Elder <paul.elder(a)ideasonboard.com>
media: ov5640: Fix analogue gain control
Alvaro Karsz <alvaro.karsz(a)solid-run.com>
PCI: Avoid FLR for SolidRun SNET DPU rev 1
Alvaro Karsz <alvaro.karsz(a)solid-run.com>
PCI: Add SolidRun vendor ID
Nathan Chancellor <nathan(a)kernel.org>
macintosh: windfarm: Use unsigned type for 1-bit bitfields
Edward Humes <aurxenon(a)lunos.org>
alpha: fix R_ALPHA_LITERAL reloc for large modules
Christophe Leroy <christophe.leroy(a)csgroup.eu>
powerpc: Check !irq instead of irq == NO_IRQ and remove NO_IRQ
xurui <xurui(a)kylinos.cn>
MIPS: Fix a compilation issue
Dmitry Baryshkov <dmitry.baryshkov(a)linaro.org>
clk: qcom: mmcc-apq8084: remove spdm clocks
Jan Kara <jack(a)suse.cz>
ext4: Fix deadlock during directory rename
Alexandre Ghiti <alexghiti(a)rivosinc.com>
riscv: Use READ_ONCE_NOCHECK in imprecise unwinding stack mode
D. Wythe <alibuda(a)linux.alibaba.com>
net/smc: fix fallback failed while sendmsg with fastopen
Chandrakanth Patil <chandrakanth.patil(a)broadcom.com>
scsi: megaraid_sas: Update max supported LD IDs to 240
Lorenz Bauer <lorenz.bauer(a)isovalent.com>
btf: fix resolving BTF_KIND_VAR after ARRAY, STRUCT, UNION, PTR
Florian Westphal <fw(a)strlen.de>
netfilter: tproxy: fix deadlock due to missing BH disable
Michael Chan <michael.chan(a)broadcom.com>
bnxt_en: Avoid order-5 memory allocation for TPA data
Shigeru Yoshida <syoshida(a)redhat.com>
net: caif: Fix use-after-free in cfusbl_device_notify()
Yuiko Oshino <yuiko.oshino(a)microchip.com>
net: lan78xx: fix accessing the LAN7800's internal phy specific registers from the MAC driver
Lee Jones <lee.jones(a)linaro.org>
net: usb: lan78xx: Remove lots of set but unused 'ret' variables
Hangbin Liu <liuhangbin(a)gmail.com>
selftests: nft_nat: ensuring the listening side is up before starting the client
Eric Dumazet <edumazet(a)google.com>
ila: do not generate empty messages in ila_xlat_nl_cmd_get_mapping()
Kang Chen <void0red(a)gmail.com>
nfc: fdp: add null check of devm_kmalloc_array in fdp_nci_i2c_read_device_properties
Dmitry Baryshkov <dmitry.baryshkov(a)linaro.org>
drm/msm/a5xx: fix setting of the CP_PREEMPT_ENABLE_LOCAL register
Jan Kara <jack(a)suse.cz>
ext4: Fix possible corruption when moving a directory
Bart Van Assche <bvanassche(a)acm.org>
scsi: core: Remove the /proc/scsi/${proc_name} directory earlier
Volker Lendecke <vl(a)samba.org>
cifs: Fix uninitialized memory read in smb3_qfs_tcon()
Amir Goldstein <amir73il(a)gmail.com>
SMB3: Backup intent flag missing from some more ops
Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
ARM: dts: exynos: correct TMU phandle in Odroid XU3 family
Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
ARM: dts: exynos: correct TMU phandle in Odroid HC1
Marek Szyprowski <m.szyprowski(a)samsung.com>
ARM: dts: exynos: Add GPU thermal zone cooling maps for Odroid XU3/XU4/HC1
Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
ARM: dts: exynos: correct TMU phandle in Exynos5250
Krzysztof Kozlowski <krzk(a)kernel.org>
ARM: dts: exynos: Override thermal by label in Exynos5250
Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
ARM: dts: exynos: correct TMU phandle in Exynos4210
Krzysztof Kozlowski <krzk(a)kernel.org>
ARM: dts: exynos: Override thermal by label in Exynos4210
Jacob Pan <jacob.jun.pan(a)linux.intel.com>
iommu/vt-d: Fix PASID directory pointer coherency
Marc Zyngier <maz(a)kernel.org>
irqdomain: Fix domain registration race
Bixuan Cui <cuibixuan(a)huawei.com>
irqdomain: Change the type of 'size' in __irq_domain_add() to be consistent
Corey Minyard <cminyard(a)mvista.com>
ipmi:ssif: Add a timer between request retries
Corey Minyard <cminyard(a)mvista.com>
ipmi:ssif: Increase the message retry time
Corey Minyard <cminyard(a)mvista.com>
ipmi:ssif: Remove rtc_us_timer
Corey Minyard <cminyard(a)mvista.com>
ipmi:ssif: resend_msg() cannot fail
Liguang Zhang <zhangliguang(a)linux.alibaba.com>
ipmi:ssif: make ssif_i2c_send() void
Gavrilov Ilia <Ilia.Gavrilov(a)infotecs.ru>
iommu/amd: Add a length limitation for the ivrs_acpihid command-line parameter
Kim Phillips <kim.phillips(a)amd.com>
iommu/amd: Fix ill-formed ivrs_ioapic, ivrs_hpet and ivrs_acpihid options
Suravee Suthikulpanit <suravee.suthikulpanit(a)amd.com>
iommu/amd: Add PCI segment support for ivrs_[ioapic/hpet/acpihid] commands
Jani Nikula <jani.nikula(a)intel.com>
drm/edid: fix AVI infoframe aspect ratio handling
Wayne Lin <Wayne.Lin(a)amd.com>
drm/edid: Add aspect ratios to HDMI 4K modes
Ville Syrjälä <ville.syrjala(a)linux.intel.com>
drm/edid: Fix HDMI VIC handling
Ville Syrjälä <ville.syrjala(a)linux.intel.com>
drm/edid: Extract drm_mode_cea_vic()
Fedor Pchelkin <pchelkin(a)ispras.ru>
nfc: change order inside nfc_se_io error path
Zhihao Cheng <chengzhihao1(a)huawei.com>
ext4: zero i_disksize when initializing the bootloader inode
Ye Bin <yebin10(a)huawei.com>
ext4: fix WARNING in ext4_update_inline_data
Ye Bin <yebin10(a)huawei.com>
ext4: move where set the MAY_INLINE_DATA flag is set
Darrick J. Wong <djwong(a)kernel.org>
ext4: fix another off-by-one fsmap error on 1k block filesystems
Eric Whitney <enwlinux(a)gmail.com>
ext4: fix RENAME_WHITEOUT handling for inline directories
Harry Wentland <harry.wentland(a)amd.com>
drm/connector: print max_requested_bpc in state debugfs
Andrew Cooper <andrew.cooper3(a)citrix.com>
x86/CPU/AMD: Disable XSAVES on AMD family 0x17
Theodore Ts'o <tytso(a)mit.edu>
fs: prevent out-of-bounds array speculation when closing a file descriptor
-------------
Diffstat:
Documentation/admin-guide/kernel-parameters.txt | 51 +++-
Makefile | 4 +-
arch/alpha/kernel/module.c | 4 +-
arch/arm/boot/dts/exynos4210.dtsi | 35 ++-
arch/arm/boot/dts/exynos5250.dtsi | 38 ++-
arch/arm/boot/dts/exynos5422-odroidhc1.dts | 38 ++-
arch/arm/boot/dts/exynos5422-odroidxu3-common.dtsi | 67 ++++-
arch/mips/include/asm/mach-rc32434/pci.h | 2 +-
arch/powerpc/include/asm/irq.h | 3 -
arch/powerpc/kernel/vmlinux.lds.S | 6 +-
arch/powerpc/platforms/44x/fsp2.c | 2 +-
arch/riscv/kernel/stacktrace.c | 2 +-
arch/s390/kernel/vmlinux.lds.S | 2 +
arch/sh/kernel/vmlinux.lds.S | 1 +
arch/um/kernel/vmlinux.lds.S | 2 +-
arch/x86/kernel/cpu/amd.c | 9 +
arch/x86/kernel/vmlinux.lds.S | 2 +
drivers/char/ipmi/ipmi_ssif.c | 146 ++++-------
drivers/char/ipmi/ipmi_watchdog.c | 8 +-
drivers/clk/qcom/mmcc-apq8084.c | 271 ---------------------
drivers/gpu/drm/drm_atomic.c | 1 +
drivers/gpu/drm/drm_edid.c | 130 ++++++----
drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 4 +-
drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 4 +-
drivers/iommu/amd_iommu_init.c | 105 ++++++--
drivers/iommu/intel-pasid.c | 7 +
drivers/macintosh/windfarm_lm75_sensor.c | 4 +-
drivers/macintosh/windfarm_smu_sensors.c | 4 +-
drivers/media/i2c/ov5640.c | 2 +-
drivers/net/ethernet/broadcom/bnxt/bnxt.c | 23 +-
drivers/net/phy/microchip.c | 32 +++
drivers/net/usb/lan78xx.c | 189 ++++++--------
drivers/nfc/fdp/i2c.c | 4 +
drivers/pci/quirks.c | 8 +
drivers/s390/block/dasd_diag.c | 7 +-
drivers/s390/block/dasd_fba.c | 7 +-
drivers/s390/block/dasd_int.h | 1 -
drivers/scsi/hosts.c | 2 +
drivers/scsi/megaraid/megaraid_sas.h | 2 +
drivers/scsi/megaraid/megaraid_sas_fp.c | 2 +-
fs/cifs/cifsacl.c | 14 +-
fs/cifs/cifsfs.c | 2 +-
fs/cifs/cifsglob.h | 6 +-
fs/cifs/cifsproto.h | 8 +
fs/cifs/connect.c | 2 +-
fs/cifs/dir.c | 5 +-
fs/cifs/file.c | 10 +-
fs/cifs/inode.c | 8 +-
fs/cifs/ioctl.c | 2 +-
fs/cifs/link.c | 18 +-
fs/cifs/smb1ops.c | 19 +-
fs/cifs/smb2inode.c | 9 +-
fs/cifs/smb2ops.c | 92 +++----
fs/cifs/smb2proto.h | 2 +-
fs/ext4/fsmap.c | 2 +
fs/ext4/inline.c | 1 -
fs/ext4/inode.c | 7 +-
fs/ext4/ioctl.c | 1 +
fs/ext4/namei.c | 36 ++-
fs/ext4/xattr.c | 3 +
fs/file.c | 1 +
include/asm-generic/vmlinux.lds.h | 16 +-
include/linux/irqdomain.h | 2 +-
include/linux/pci_ids.h | 2 +
include/net/netfilter/nf_tproxy.h | 7 +
kernel/bpf/btf.c | 1 +
kernel/irq/irqdomain.c | 62 +++--
net/caif/caif_usb.c | 3 +
net/ipv4/netfilter/nf_tproxy_ipv4.c | 2 +-
net/ipv6/ila/ila_xlat.c | 1 +
net/ipv6/netfilter/nf_tproxy_ipv6.c | 2 +-
net/nfc/netlink.c | 2 +-
net/smc/af_smc.c | 13 +-
tools/testing/selftests/netfilter/nft_nat.sh | 2 +
74 files changed, 783 insertions(+), 811 deletions(-)
Dear Stable Maintainers,
I am submitting several individual patches for consideration to be applied to the Linux stable kernel v6.1.
Below, you will find the necessary information for each patch:
Patch 1:
Subject: igc: remove I226 Qbv BaseTime restriction
Commit ID: b8897dc54e3b
Reason for Application: Remove the Qbv BaseTime restriction for I226 so that the BaseTime can be scheduled to the future time
Target Kernel Version: v6.1
Patch 2:
Subject: igc: enable Qbv configuration for 2nd GCL
Commit ID: 5ac1231ac14d
Reason for Application: Make reset task only executes for i225 and Qbv disabling to allow i226 configure for 2nd GCL without resetting the adapter.
Target Kernel Version: v6.1
Patch 3:
Subject: igc: Remove reset adapter task for i226 during disable tsn config
Commit ID: 1d1b4c63ba73
Reason for Application: Removes the power cycle restriction so that when user configure/remove any TSN mode, it would not go into power cycle reset adapter.
Target Kernel Version: v6.1
Patch 4:
Subject: igc: Add qbv_config_change_errors counter
Commit ID: ae4fe4698300
Reason for Application: Add ConfigChangeError(qbv_config_change_errors) when user try to set the AdminBaseTime to past value while the current GCL is still running.
Target Kernel Version: v6.1
Patch 5:
Subject: igc: Add condition for qbv_config_change_errors counter
Commit ID: ed89b74d2dc9
Reason for Application: Fix patch ae4fe4698300 ("igc: Add qbv_config_change_errors counter")
Target Kernel Version: v6.1
Patch 6:
Subject: igc: Fix race condition in PTP Tx code
Commit ID: 9c50e2b150c8
Reason for Application: Fix race condition introduced by patch 2c344ae24501 ("igc: Add support for TX timestamping")
Target Kernel Version: v6.1
Thank you for your time and consideration.
Please let me know if you require any additional information or have any questions regarding these patch submissions.
Best regards,
Faizal
faizal.abdul.rahim(a)intel.com
Add support for the IX-100, IX-200 and IX-400 serial cards.
Cc: stable(a)vger.kernel.org
Signed-off-by: Cameron Williams <cang1(a)live.co.uk>
---
For stable: This patch is only applicable to 5.15 LTS and up, other LTS
kernels dont have the neccesary quirk options available (in patch 11) to
have these cards work correctly.
v3 - v4:
Add Cc: tag.
v2 - v3:
Re-submit patch series using git send-email to make threading work.
v1 - v2:
This is a resubmission series for the patch series below. That series
was lots of changes sent to lots of maintainers, this series is just for
the tty/serial/8250 subsystem.
[1] https://lore.kernel.org/all/DU0PR02MB789950E64D808DB57E9D7312C4F8A@DU0PR02M…
[2] https://lore.kernel.org/all/DU0PR02MB7899DE53DFC900EFB50E53F2C4F8A@DU0PR02M…
[3] https://lore.kernel.org/all/DU0PR02MB7899033E7E81EAF3694BC20AC4F8A@DU0PR02M…
[4] https://lore.kernel.org/all/DU0PR02MB7899EABA8C3DCAC94DCC79D4C4F8A@DU0PR02M…
drivers/tty/serial/8250/8250_pci.c | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index 65ac7fdd361f..af970b372cc2 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -4931,6 +4931,27 @@ static const struct pci_device_id serial_pci_tbl[] = {
{ PCI_VENDOR_ID_INTASHIELD, PCI_DEVICE_ID_INTASHIELD_IS400,
PCI_ANY_ID, PCI_ANY_ID, 0, 0, /* 135a.0dc0 */
pbn_b2_4_115200 },
+ /*
+ * IntaShield IX-100
+ */
+ { PCI_VENDOR_ID_INTASHIELD, 0x4027,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
+ pbn_oxsemi_1_15625000 },
+ /*
+ * IntaShield IX-200
+ */
+ { PCI_VENDOR_ID_INTASHIELD, 0x4028,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
+ pbn_oxsemi_2_15625000 },
+ /*
+ * IntaShield IX-400
+ */
+ { PCI_VENDOR_ID_INTASHIELD, 0x4029,
+ PCI_ANY_ID, PCI_ANY_ID,
+ 0, 0,
+ pbn_oxsemi_4_15625000 },
/* Brainboxes Devices */
/*
* Brainboxes UC-101
--
2.42.0
The PX-835 and PX-846 are variants of each other, clarify that
in the comment for the card.
This patch makes no functional difference.
Fixes: ef5a03a26c87 ("tty: 8250: Add support for Brainboxes PX cards.")
Cc: stable(a)vger.kernel.org
Signed-off-by: Cameron Williams <cang1(a)live.co.uk>
---
For stable: This patch is only applicable to 5.15 LTS and up, other LTS
kernels dont have PX card support.
v3 - v4:
Split patch v3 part 5 into multiple Fixes patches and an Additions patch.
Add Fixes: and Cc: tag.
v2 - v3:
Alter commit message a little to make the additions/fixes cleaner
Re-submit patch series using git send-email to make threading work.
v1 - v2:
This is a resubmission series for the patch series below. That series
was lots of changes sent to lots of maintainers, this series is just for
the tty/serial/8250 subsystem.
[1] https://lore.kernel.org/all/DU0PR02MB789950E64D808DB57E9D7312C4F8A@DU0PR02M…
[2] https://lore.kernel.org/all/DU0PR02MB7899DE53DFC900EFB50E53F2C4F8A@DU0PR02M…
[3] https://lore.kernel.org/all/DU0PR02MB7899033E7E81EAF3694BC20AC4F8A@DU0PR02M…
[4] https://lore.kernel.org/all/DU0PR02MB7899EABA8C3DCAC94DCC79D4C4F8A@DU0PR02M…
drivers/tty/serial/8250/8250_pci.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index a68ae56e5578..f1acf5a4704f 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -5246,7 +5246,7 @@ static const struct pci_device_id serial_pci_tbl[] = {
0, 0,
pbn_oxsemi_2_15625000 },
/*
- * Brainboxes PX-846
+ * Brainboxes PX-835/PX-846
*/
{ PCI_VENDOR_ID_INTASHIELD, 0x4008,
PCI_ANY_ID, PCI_ANY_ID,
--
2.42.0
The PX-803/PX-857 are variants of each other, add a note.
Additionally fix up the port counts for the card (2, not 1).
Fixes: ef5a03a26c87 ("tty: 8250: Add support for Brainboxes PX cards.")
Cc: stable(a)vger.kernel.org
Signed-off-by: Cameron Williams <cang1(a)live.co.uk>
---
For stable: This patch is only applicable to 5.15 LTS and up, other LTS
kernels dont have PX card support.
v3 - v4:
Split patch v3 part 5 into multiple Fixes patches and an Additions patch.
Add Fixes: and Cc: tag.
v2 - v3:
Alter commit message a little to make the additions/fixes cleaner
Re-submit patch series using git send-email to make threading work.
v1 - v2:
This is a resubmission series for the patch series below. That series
was lots of changes sent to lots of maintainers, this series is just for
the tty/serial/8250 subsystem.
[1] https://lore.kernel.org/all/DU0PR02MB789950E64D808DB57E9D7312C4F8A@DU0PR02M…
[2] https://lore.kernel.org/all/DU0PR02MB7899DE53DFC900EFB50E53F2C4F8A@DU0PR02M…
[3] https://lore.kernel.org/all/DU0PR02MB7899033E7E81EAF3694BC20AC4F8A@DU0PR02M…
[4] https://lore.kernel.org/all/DU0PR02MB7899EABA8C3DCAC94DCC79D4C4F8A@DU0PR02M…
drivers/tty/serial/8250/8250_pci.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index 59074a709254..a68ae56e5578 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -5235,16 +5235,16 @@ static const struct pci_device_id serial_pci_tbl[] = {
0, 0,
pbn_oxsemi_4_15625000 },
/*
- * Brainboxes PX-803
+ * Brainboxes PX-803/PX-857
*/
{ PCI_VENDOR_ID_INTASHIELD, 0x4009,
PCI_ANY_ID, PCI_ANY_ID,
0, 0,
- pbn_b0_1_115200 },
+ pbn_b0_2_115200 },
{ PCI_VENDOR_ID_INTASHIELD, 0x401E,
PCI_ANY_ID, PCI_ANY_ID,
0, 0,
- pbn_oxsemi_1_15625000 },
+ pbn_oxsemi_2_15625000 },
/*
* Brainboxes PX-846
*/
--
2.42.0
The UC-257 is a serial + LPT card, so remove it from this driver.
A patch has been submitted to add it to parport_serial instead.
Additionaly, the UC-431 does not use this card ID, only the UC-420
does. The 431 is a 3-port card and there is no generic 3-port configuration
available, so remove reference to it from this driver.
Fixes: 152d1afa834c ("tty: Add support for Brainboxes UC cards.")
Cc: stable(a)vger.kernel.org
Signed-off-by: Cameron Williams <cang1(a)live.co.uk>
---
With regards to the parport_serial patch, I mistakenly did not copy in
the LKML, only the maintainer, so I have no link to the patch, my mistake
and apologies.
v3 - v4:
Split patch v3 part 2 into Fixes and Additions
Add Fixes: and Cc: tag.
v2 - v3:
Re-submit patch series using git send-email to make threading work.
v1 - v2:
This is a resubmission series for the patch series below. That series
was lots of changes sent to lots of maintainers, this series is just for
the tty/serial/8250 subsystem.
[1] https://lore.kernel.org/all/DU0PR02MB789950E64D808DB57E9D7312C4F8A@DU0PR02M…
[2] https://lore.kernel.org/all/DU0PR02MB7899DE53DFC900EFB50E53F2C4F8A@DU0PR02M…
[3] https://lore.kernel.org/all/DU0PR02MB7899033E7E81EAF3694BC20AC4F8A@DU0PR02M…
[4] https://lore.kernel.org/all/DU0PR02MB7899EABA8C3DCAC94DCC79D4C4F8A@DU0PR02M…
drivers/tty/serial/8250/8250_pci.c | 9 +--------
1 file changed, 1 insertion(+), 8 deletions(-)
diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index ecb4e9acc70d..a59b9b8eaa68 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -4940,13 +4940,6 @@ static const struct pci_device_id serial_pci_tbl[] = {
PCI_ANY_ID, PCI_ANY_ID,
0, 0,
pbn_b2_1_115200 },
- /*
- * Brainboxes UC-257
- */
- { PCI_VENDOR_ID_INTASHIELD, 0x0861,
- PCI_ANY_ID, PCI_ANY_ID,
- 0, 0,
- pbn_b2_2_115200 },
/*
* Brainboxes UC-260/271/701/756
*/
@@ -5026,7 +5019,7 @@ static const struct pci_device_id serial_pci_tbl[] = {
0, 0,
pbn_b2_4_115200 },
/*
- * Brainboxes UC-420/431
+ * Brainboxes UC-420
*/
{ PCI_VENDOR_ID_INTASHIELD, 0x0921,
PCI_ANY_ID, PCI_ANY_ID,
--
2.42.0
The quilt patch titled
Subject: x86/mm: drop 4MB restriction on minimal NUMA node size
has been removed from the -mm tree. Its filename was
x86-mm-drop-4mb-restriction-on-minimal-numa-node-size.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: "Mike Rapoport (IBM)" <rppt(a)kernel.org>
Subject: x86/mm: drop 4MB restriction on minimal NUMA node size
Date: Tue, 17 Oct 2023 09:22:15 +0300
Qi Zheng reports crashes in a production environment and provides a
simplified example as a reproducer:
For example, if we use qemu to start a two NUMA node kernel,
one of the nodes has 2M memory (less than NODE_MIN_SIZE),
and the other node has 2G, then we will encounter the
following panic:
[ 0.149844] BUG: kernel NULL pointer dereference, address: 0000000000000000
[ 0.150783] #PF: supervisor write access in kernel mode
[ 0.151488] #PF: error_code(0x0002) - not-present page
<...>
[ 0.156056] RIP: 0010:_raw_spin_lock_irqsave+0x22/0x40
<...>
[ 0.169781] Call Trace:
[ 0.170159] <TASK>
[ 0.170448] deactivate_slab+0x187/0x3c0
[ 0.171031] ? bootstrap+0x1b/0x10e
[ 0.171559] ? preempt_count_sub+0x9/0xa0
[ 0.172145] ? kmem_cache_alloc+0x12c/0x440
[ 0.172735] ? bootstrap+0x1b/0x10e
[ 0.173236] bootstrap+0x6b/0x10e
[ 0.173720] kmem_cache_init+0x10a/0x188
[ 0.174240] start_kernel+0x415/0x6ac
[ 0.174738] secondary_startup_64_no_verify+0xe0/0xeb
[ 0.175417] </TASK>
[ 0.175713] Modules linked in:
[ 0.176117] CR2: 0000000000000000
The crashes happen because of inconsistency between nodemask that has
nodes with less than 4MB as memoryless and the actual memory fed into
core mm.
The commit 9391a3f9c7f1 ("[PATCH] x86_64: Clear more state when ignoring
empty node in SRAT parsing") that introduced minimal size of a NUMA node
does not explain why a node size cannot be less than 4MB and what boot
failures this restriction might fix.
Since then a lot has changed and core mm won't confuse badly about small
node sizes.
Drop the limitation for the minimal node size.
Link: https://lkml.kernel.org/r/20231017062215.171670-1-rppt@kernel.org
Signed-off-by: Mike Rapoport (IBM) <rppt(a)kernel.org>
Reported-by: Qi Zheng <zhengqi.arch(a)bytedance.com>
Closes: https://lore.kernel.org/all/20230212110305.93670-1-zhengqi.arch@bytedance.c…
Acked-by: David Hildenbrand <david(a)redhat.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Tested-by: Mario Casquero <mcasquer(a)redhat.com>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: Borislav Petkov (AMD) <bp(a)alien8.de>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Peter Ziljstra <peterz(a)infradead.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/x86/include/asm/numa.h | 7 -------
arch/x86/mm/numa.c | 7 -------
2 files changed, 14 deletions(-)
--- a/arch/x86/include/asm/numa.h~x86-mm-drop-4mb-restriction-on-minimal-numa-node-size
+++ a/arch/x86/include/asm/numa.h
@@ -12,13 +12,6 @@
#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
-/*
- * Too small node sizes may confuse the VM badly. Usually they
- * result from BIOS bugs. So dont recognize nodes as standalone
- * NUMA entities that have less than this amount of RAM listed:
- */
-#define NODE_MIN_SIZE (4*1024*1024)
-
extern int numa_off;
/*
--- a/arch/x86/mm/numa.c~x86-mm-drop-4mb-restriction-on-minimal-numa-node-size
+++ a/arch/x86/mm/numa.c
@@ -601,13 +601,6 @@ static int __init numa_register_memblks(
if (start >= end)
continue;
- /*
- * Don't confuse VM with a node that doesn't have the
- * minimum amount of memory:
- */
- if (end && (end - start) < NODE_MIN_SIZE)
- continue;
-
alloc_node_data(nid);
}
_
Patches currently in -mm which might be from rppt(a)kernel.org are
Several stable branches (4.14 to 5.15 inclusive) belatedly got
backports of commit 1202cdd665315c ("Remove DECnet support from
kernel"). This causes a minor regression for the documentation build,
which was fixed upstream by:
commit 1faa34672f8a17a3e155e74bde9648564e9480d6
Author: Bagas Sanjaya <bagasdotme(a)gmail.com>
Date: Wed Aug 24 10:58:04 2022 +0700
Documentation: sysctl: align cells in second content column
Please apply this to the affected branches.
Ben.
--
Ben Hutchings
Who are all these weirdos? - David Bowie, on joining IRC
Please backport commit 786dee864804 ("mm/memory_hotplug: ratelimit page
migration warnings") to 5.10 stable branch.
Commit message:
"""
When offlining memory the system can attempt to migrate a lot of pages, if
there are problems with migration this can flood the logs. Printing all
the data hogs the CPU and cause some RT threads to run for a long time,
which may have some bad consequences.
Rate limit the page migration warnings in order to avoid this.
"""
We are sometimes seeing RCU stalls while offlining memory with the 5.10
kernel due to the printing of these messages.
Applying this patch solves the problem by ratelimiting the prints.
Thanks,
Puranjay
> If this holds up without regressions than all LTSes. That's what Amir
> and Leah did for some other work. I can add that to the comment for
> clarity.
Unfortunately, this has not held up in LTSes without causing
regressions, specifically in crun:
Crun issue and patch
1. https://github.com/containers/crun/issues/1308
2. https://github.com/containers/crun/pull/1309
Debian bug report
1. https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1053821
I think it should be reverted in LTSes and possibly in upstream.
Yours kindly, Jesse Hathaway
P.S. apologies for not having the correct threading headers. I am not on
the list.
I'm announcing the release of the 6.1.58 kernel.
All users of the 6.1 kernel series must upgrade.
The updated 6.1.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-6.1.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Makefile | 2
fs/nfs/direct.c | 134 ++++++++++++++---------------------------------
fs/nfs/write.c | 23 +++-----
include/linux/nfs_page.h | 4 -
lib/test_meminit.c | 2
5 files changed, 55 insertions(+), 110 deletions(-)
Greg Kroah-Hartman (7):
Revert "NFS: More fixes for nfs_direct_write_reschedule_io()"
Revert "NFS: Use the correct commit info in nfs_join_page_group()"
Revert "NFS: More O_DIRECT accounting fixes for error paths"
Revert "NFS: Fix O_DIRECT locking issues"
Revert "NFS: Fix error handling for O_DIRECT write scheduling"
lib/test_meminit: fix off-by-one error in test_pages()
Linux 6.1.58
Hi,
As reported before[1], we found another regression in 6.1 when doing
performance comparisons with 5.10. This one is caused by CONFIG_DEBUG_PREEMPT
being enabled by default by the following upstream commit if you have the
right config dependencies enabled (commit is introduced in v5.16-rc1):
"""
commit c597bfddc9e9e8a63817252b67c3ca0e544ace26
Author: Frederic Weisbecker <frederic(a)kernel.org>
Date: Tue Sep 14 12:31:34 2021 +0200
sched: Provide Kconfig support for default dynamic preempt mode
"""
We found up to 8% performance improvement with CONFIG_DEBUG_PREEMPT
disabled in different perf benchmarks (including UnixBench process
creation and redis). The root cause is explained in the commit log
below which is merged in 6.3 and applies (almost) clealy on 6.1.59.
"""
commit cc6003916ed46d7a67d91ee32de0f9138047d55f
Author: Hyeonggon Yoo <42.hyeyoo(a)gmail.com>
Date: Sat Jan 21 12:39:42 2023 +0900
lib/Kconfig.debug: do not enable DEBUG_PREEMPT by default
In workloads where this_cpu operations are frequently performed,
enabling DEBUG_PREEMPT may result in significant increase in
runtime overhead due to frequent invocation of
__this_cpu_preempt_check() function.
This can be demonstrated through benchmarks such as hackbench where this
configuration results in a 10% reduction in performance, primarily due to
the added overhead within memcg charging path.
"""
[1] https://lore.kernel.org/stable/010edf5a-453d-4c98-9c07-12e75d3f983c@amazon.…
Igor Raits and Bagas Sanjaya report a RQCF_ACT_SKIP leak warning.
Link: https://lore.kernel.org/all/a5dd536d-041a-2ce9-f4b7-64d8d85c86dc@gmail.com
This warning may be triggered in the following situations:
CPU0 CPU1
__schedule()
*rq->clock_update_flags <<= 1;* unregister_fair_sched_group()
pick_next_task_fair+0x4a/0x410 destroy_cfs_bandwidth()
newidle_balance+0x115/0x3e0 for_each_possible_cpu(i) *i=0*
rq_unpin_lock(this_rq, rf) __cfsb_csd_unthrottle()
raw_spin_rq_unlock(this_rq)
rq_lock(*CPU0_rq*, &rf)
rq_clock_start_loop_update()
rq->clock_update_flags & RQCF_ACT_SKIP <--
raw_spin_rq_lock(this_rq)
The purpose of RQCF_ACT_SKIP is to skip the update rq clock,
but the update is very early in __schedule(), but we clear
RQCF_*_SKIP very late, causing it to span that gap above
and triggering this warning.
In __schedule() we can clear the RQCF_*_SKIP flag immediately
after update_rq_clock() to avoid this RQCF_ACT_SKIP leak warning.
And set rq->clock_update_flags to RQCF_UPDATED to avoid
rq->clock_update_flags < RQCF_ACT_SKIP warning that may be triggered later.
Fixes: ebb83d84e49b ("sched/core: Avoid multiple calling update_rq_clock() in __cfsb_csd_unthrottle()")
Cc: stable(a)vger.kernel.org
Reported-by: Igor Raits <igor.raits(a)gmail.com>
Reported-by: Bagas Sanjaya <bagasdotme(a)gmail.com>
Closes: https://lore.kernel.org/all/20230913082424.73252-1-jiahao.os@bytedance.com
Suggested-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Signed-off-by: Hao Jia <jiahao.os(a)bytedance.com>
---
kernel/sched/core.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 802551e0009b..afb8d213155b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5374,8 +5374,6 @@ context_switch(struct rq *rq, struct task_struct *prev,
/* switch_mm_cid() requires the memory barriers above. */
switch_mm_cid(rq, prev, next);
- rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
-
prepare_lock_switch(rq, next, rf);
/* Here we just switch the register state and the stack. */
@@ -6615,6 +6613,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
/* Promote REQ to ACT */
rq->clock_update_flags <<= 1;
update_rq_clock(rq);
+ rq->clock_update_flags = RQCF_UPDATED;
switch_count = &prev->nivcsw;
@@ -6694,8 +6693,6 @@ static void __sched notrace __schedule(unsigned int sched_mode)
/* Also unlocks the rq: */
rq = context_switch(rq, prev, next, &rf);
} else {
- rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
-
rq_unpin_lock(rq, &rf);
__balance_callbacks(rq);
raw_spin_rq_unlock_irq(rq);
--
2.39.2
If name_show() is non unique, this test will try to install a kprobe on this
function which should fail returning EADDRNOTAVAIL.
On kernel where name_show() is not unique, this test is skipped.
Cc: stable(a)vger.kernel.org
Signed-off-by: Francis Laniel <flaniel(a)linux.microsoft.com>
---
.../ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc | 13 +++++++++++++
1 file changed, 13 insertions(+)
create mode 100644 tools/testing/selftests/ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc
new file mode 100644
index 000000000000..bc9514428dba
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc
@@ -0,0 +1,13 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Test failure of registering kprobe on non unique symbol
+# requires: kprobe_events
+
+SYMBOL='name_show'
+
+# We skip this test on kernel where SYMBOL is unique or does not exist.
+if [ "$(grep -c -E "[[:alnum:]]+ t ${SYMBOL}" /proc/kallsyms)" -le '1' ]; then
+ exit_unsupported
+fi
+
+! echo "p:test_non_unique ${SYMBOL}" > kprobe_events
--
2.34.1
When a kprobe is attached to a function that's name is not unique (is
static and shares the name with other functions in the kernel), the
kprobe is attached to the first function it finds. This is a bug as the
function that it is attaching to is not necessarily the one that the
user wants to attach to.
Instead of blindly picking a function to attach to what is ambiguous,
error with EADDRNOTAVAIL to let the user know that this function is not
unique, and that the user must use another unique function with an
address offset to get to the function they want to attach to.
Cc: stable(a)vger.kernel.org
Fixes: 413d37d1eb69 ("tracing: Add kprobe-based event tracer")
Suggested-by: Masami Hiramatsu <mhiramat(a)kernel.org>
Signed-off-by: Francis Laniel <flaniel(a)linux.microsoft.com>
Link: https://lore.kernel.org/lkml/20230819101105.b0c104ae4494a7d1f2eea742@kernel…
---
kernel/trace/trace_kprobe.c | 63 +++++++++++++++++++++++++++++++++++++
kernel/trace/trace_probe.h | 1 +
2 files changed, 64 insertions(+)
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 3d7a180a8427..a8fef6ab0872 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -705,6 +705,25 @@ static struct notifier_block trace_kprobe_module_nb = {
.priority = 1 /* Invoked after kprobe module callback */
};
+static int count_symbols(void *data, unsigned long unused)
+{
+ unsigned int *count = data;
+
+ (*count)++;
+
+ return 0;
+}
+
+static unsigned int number_of_same_symbols(char *func_name)
+{
+ unsigned int count;
+
+ count = 0;
+ kallsyms_on_each_match_symbol(count_symbols, func_name, &count);
+
+ return count;
+}
+
static int __trace_kprobe_create(int argc, const char *argv[])
{
/*
@@ -836,6 +855,31 @@ static int __trace_kprobe_create(int argc, const char *argv[])
}
}
+ if (symbol && !strchr(symbol, ':')) {
+ unsigned int count;
+
+ count = number_of_same_symbols(symbol);
+ if (count > 1) {
+ /*
+ * Users should use ADDR to remove the ambiguity of
+ * using KSYM only.
+ */
+ trace_probe_log_err(0, NON_UNIQ_SYMBOL);
+ ret = -EADDRNOTAVAIL;
+
+ goto error;
+ } else if (count == 0) {
+ /*
+ * We can return ENOENT earlier than when register the
+ * kprobe.
+ */
+ trace_probe_log_err(0, BAD_PROBE_ADDR);
+ ret = -ENOENT;
+
+ goto error;
+ }
+ }
+
trace_probe_log_set_index(0);
if (event) {
ret = traceprobe_parse_event_name(&event, &group, gbuf,
@@ -1695,6 +1739,7 @@ static int unregister_kprobe_event(struct trace_kprobe *tk)
}
#ifdef CONFIG_PERF_EVENTS
+
/* create a trace_kprobe, but don't add it to global lists */
struct trace_event_call *
create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
@@ -1705,6 +1750,24 @@ create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
int ret;
char *event;
+ if (func) {
+ unsigned int count;
+
+ count = number_of_same_symbols(func);
+ if (count > 1)
+ /*
+ * Users should use addr to remove the ambiguity of
+ * using func only.
+ */
+ return ERR_PTR(-EADDRNOTAVAIL);
+ else if (count == 0)
+ /*
+ * We can return ENOENT earlier than when register the
+ * kprobe.
+ */
+ return ERR_PTR(-ENOENT);
+ }
+
/*
* local trace_kprobes are not added to dyn_event, so they are never
* searched in find_trace_kprobe(). Therefore, there is no concern of
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 02b432ae7513..850d9ecb6765 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -450,6 +450,7 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,
C(BAD_MAXACT, "Invalid maxactive number"), \
C(MAXACT_TOO_BIG, "Maxactive is too big"), \
C(BAD_PROBE_ADDR, "Invalid probed address or symbol"), \
+ C(NON_UNIQ_SYMBOL, "The symbol is not unique"), \
C(BAD_RETPROBE, "Retprobe address must be an function entry"), \
C(NO_TRACEPOINT, "Tracepoint is not found"), \
C(BAD_ADDR_SUFFIX, "Invalid probed address suffix"), \
--
2.34.1
Hi.
In the kernel source code, it exists different functions which share the same
name but which have, of course, different addresses as they can be defined in
different modules:
# Kernel was compiled with CONFIG_NTFS_FS and CONFIG_NTFS3_FS as built-in.
root@vm-amd64:~# grep ntfs_file_write_iter /proc/kallsyms
ffffffff814ce3c0 t __pfx_ntfs_file_write_iter
ffffffff814ce3d0 t ntfs_file_write_iter
ffffffff814fc8a0 t __pfx_ntfs_file_write_iter
ffffffff814fc8b0 t ntfs_file_write_iter
This can be source of troubles when you create a PMU kprobe for such a function,
as it will only install one for the first address (e.g. 0xffffffff814ce3d0 in
the above).
This could lead to some troubles were BPF based tools does not report any event
because the second function is not called:
root@vm-amd64:/mnt# mount | grep /mnt
/foo.img on /mnt type ntfs3 (rw,relatime,uid=0,gid=0,iocharset=utf8)
# ig is a tool which installs a PMU kprobe on ntfs_file_write_iter().
root@vm-amd64:/mnt# ig trace fsslower -m 0 -f ntfs3 --host &> /tmp/foo &
[1] 207
root@vm-amd64:/mnt# dd if=./foo of=./bar count=3
3+0 records in
3+0 records out
1536 bytes (1.5 kB, 1.5 KiB) copied, 0.00543323 s, 283 kB/s
root@vm-amd64:/mnt# fg
ig trace fsslower -m 0 -f ntfs3 --host &> /tmp/foo
^Croot@vm-amd64:/mnt# more /tmp/foo
RUNTIME.CONTAINERNAME RUNTIME.CONTAIN… PID COMM
T BYTES OFFSET LAT FILE
214 dd
R 512 0 766 foo
214 dd
R 512 512 9 foo
214 dd
As you can see in the above, only read events are reported and no write because
the kprobe is installed for the old ntfs_file_write_iter() and not the ntfs3
one.
The same behavior occurs with sysfs kprobe:
root@vm-amd64:/# echo 'p:probe/ntfs_file_write_iter ntfs_file_write_iter' > /sys/kernel/tracing/kprobe_events
root@vm-amd64:/# cat /sys/kernel/tracing/kprobe_events
p:probe/ntfs_file_write_iter ntfs_file_write_iter
root@vm-amd64:/# mount | grep /mnt
/foo.img on /mnt type ntfs3 (rw,relatime,uid=0,gid=0,iocharset=utf8)
root@vm-amd64:/# perf record -e probe:ntfs_file_write_iter &
[1] 210
root@vm-amd64:/# cd /mnt/
root@vm-amd64:/mnt# dd if=./foo of=./bar count=3
3+0 records in
3+0 records out
1536 bytes (1.5 kB, 1.5 KiB) copied, 0.00234793 s, 654 kB/s
root@vm-amd64:/mnt# cd -
/
root@vm-amd64:/# fg
perf record -e probe:ntfs_file_write_iter
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.056 MB perf.data ]
root@vm-amd64:/# perf report
Error:
The perf.data data has no samples!
# To display the perf.data header info, please use --header/--header-only optio>
#
In this contribution, I modified the functions creating sysfs and PMU kprobes to
test if the function name given as argument matches several symbols.
In this case, these functions return EADDRNOTAVAIL to indicate the user to use
addr and offs to remove this ambiguity.
So, when the above BPF tool is run, the following error message is printed:
root@vm-amd64:~# ig trace fsslower -m 0 -f ntfs3 --host &> /tmp/foo &
[1] 228
root@vm-amd64:~# more /tmp/foo
RUNTIME.CONTAINERNAME RUNTIME.CONTAIN… PID COMM
T BYTES OFFSET LAT FILE
Error: running gadget: running gadget: installing tracer: attaching kprobe: crea
ting perf_kprobe PMU (arch-specific fallback for "ntfs_file_write_iter"): token
ntfs_file_write_iter: opening perf event: cannot assign requested address
And the same with sysfs kprobe:
root@vm-amd64:/# echo 'p:probe/ntfs_file_write_iter ntfs_file_write_iter' > /sys/kernel/tracing/kprobe_events
-bash: echo: write error: Cannot assign requested address
Note that, this does not influence perf as it installs kprobes as offset on
_text:
root@vm-amd64:/# perf probe --add ntfs_file_write_iter
Added new events:
probe:ntfs_file_write_iter (on ntfs_file_write_iter)
probe:ntfs_file_write_iter (on ntfs_file_write_iter)
...
root@vm-amd64:/# cat /sys/kernel/tracing/kprobe_events
p:probe/ntfs_file_write_iter _text+5039088
p:probe/ntfs_file_write_iter _text+5228752
Note that, this contribution is the conclusion of a previous RFC which intended
to install a PMU kprobe for all matching symbols [1, 2].
If you see any way to improve this contribution, particularly if you have an
idea to add tests or documentation for this behavior, please share your
feedback.
Changes since:
v1:
* Use EADDRNOTAVAIL instead of adding a new error code.
* Correct also this behavior for sysfs kprobe.
v2:
* Count the number of symbols corresponding to function name and return
EADDRNOTAVAIL if higher than 1.
* Return ENOENT if above count is 0, as it would be returned later by while
registering the kprobe.
v3:
* Check symbol does not contain ':' before testing its uniqueness.
* Add a selftest to check this is not possible to install a kprobe for a non
unique symbol.
v5:
* No changes, just add linux-stable as recipient.
Francis Laniel (2):
tracing/kprobes: Return EADDRNOTAVAIL when func matches several
symbols
selftests/ftrace: Add new test case which checks non unique symbol
kernel/trace/trace_kprobe.c | 63 +++++++++++++++++++
kernel/trace/trace_probe.h | 1 +
.../test.d/kprobe/kprobe_non_uniq_symbol.tc | 13 ++++
3 files changed, 77 insertions(+)
create mode 100644 tools/testing/selftests/ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc
Best regards and thank you in advance.
---
[1]: https://lore.kernel.org/lkml/20230816163517.112518-1-flaniel@linux.microsof…
[2]: https://lore.kernel.org/lkml/20230819101105.b0c104ae4494a7d1f2eea742@kernel…
--
2.34.1
On Sun, Sep 03, 2023 at 09:19:13PM +0900, Kenta Sato wrote:
> Hi,
>
> I am using the FriendlyElec NanoPi R4S board.
> When I update the kernel from 6.4.7 to 6.4.11, 6.4.13, and 6.5.1, it
> doesn't recognize some USB devices.
>
> The board has two USB 3.0 ports. I connected 1) BUFFALO USB Flash Disk
> (high-speed) and 2) NETGEAR A6210 (SuperSpeed) to each port.
> 1) is often not recognized. On the other hand, 2) was working while I
> was testing.
> Regardless of whether a USB device is connected, I could see the below
> message on dmesg:
>
> [ 0.740993] phy phy-ff7c0000.phy.8: phy poweron failed --> -110
> [ 0.741585] dwc3 fe800000.usb: error -ETIMEDOUT: failed to initialize core
> [ 0.742334] dwc3: probe of fe800000.usb failed with error -110
> [ 0.751635] rockchip-usb2phy ff770000.syscon:usb2phy@e460:
> Requested PHY is disabled
>
> Is there any idea on this?
>
> The cause seems to be related to this commit. I tried reverting this
> change and the issue seemed to be solved.
>
> >From 317d6e4c12b46bde61248ea4ab5e19f68cbd1c57 Mon Sep 17 00:00:00 2001
> From: Jisheng Zhang <jszhang(a)kernel.org>
> Date: Wed, 28 Jun 2023 00:20:18 +0800
> Subject: usb: dwc3: don't reset device side if dwc3 was configured as
> host-only
>
> commit e835c0a4e23c38531dcee5ef77e8d1cf462658c7 upstream.
>
> Commit c4a5153e87fd ("usb: dwc3: core: Power-off core/PHYs on
> system_suspend in host mode") replaces check for HOST only dr_mode with
> current_dr_role. But during booting, the current_dr_role isn't
> initialized, thus the device side reset is always issued even if dwc3
> was configured as host-only. What's more, on some platforms with host
> only dwc3, aways issuing device side reset by accessing device register
> block can cause kernel panic.
>
> Fixes: c4a5153e87fd ("usb: dwc3: core: Power-off core/PHYs on
> system_suspend in host mode")
> Cc: stable <stable(a)kernel.org>
> Signed-off-by: Jisheng Zhang <jszhang(a)kernel.org>
> Acked-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
> Link: https://lore.kernel.org/r/20230627162018.739-1-jszhang@kernel.org
> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
> ---
> drivers/usb/dwc3/core.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=…
>
Thanks for the regression report. I'm adding it to regzbot:
#regzbot ^introduced: e835c0a4e23c38
#regzbot title: some USB devices unrecognized caused by not resetting dwc3 device if it is host-only
--
An old man doll... just what I always wanted! - Clara