From: Krzysztof Kozlowski krzysztof.kozlowski@linaro.org
[ Upstream commit 300b5f661eebefb8571841b78091343eb87eca54 ]
The Last Level Cache Controller (LLCC) device does not need to access entire LLCC address space. Currently driver uses only hardware info and status registers which both reside in LLCC0_COMMON range (offset 0x30000, size 0x1000). Narrow the address space to allow binding other drivers to rest of LLCC address space.
Cc: Rajendra Nayak quic_rjendra@quicinc.com Cc: Sibi Sankar quic_sibis@quicinc.com Reported-by: Steev Klimaszewski steev@kali.org Suggested-by: Sibi Sankar quic_sibis@quicinc.com Signed-off-by: Krzysztof Kozlowski krzysztof.kozlowski@linaro.org Tested-by: Steev Klimaszewski steev@kali.org Signed-off-by: Bjorn Andersson bjorn.andersson@linaro.org Link: https://lore.kernel.org/r/20220728113748.170548-11-krzysztof.kozlowski@linar... Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/boot/dts/qcom/sdm845.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index f0e286715d1b..4d5ae5897d1d 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -2138,7 +2138,7 @@ uart15: serial@a9c000 {
llcc: system-cache-controller@1100000 { compatible = "qcom,sdm845-llcc"; - reg = <0 0x01100000 0 0x200000>, <0 0x01300000 0 0x50000>; + reg = <0 0x01100000 0 0x31000>, <0 0x01300000 0 0x50000>; reg-names = "llcc_base", "llcc_broadcast_base"; interrupts = <GIC_SPI 582 IRQ_TYPE_LEVEL_HIGH>; };
From: Alexander Stein alexander.stein@ew.tq-group.com
[ Upstream commit c9d38ff7080b2c4fa6786b82210fa13115895aae ]
In most cases this is related to fsl,err006687-workaround-present, which requires a GPIO interrupt next a GIC interrupt.
This fixes the dtbs_check warning: imx6dl-mba6a.dtb: ethernet@2188000: More than one condition true in oneOf schema: {'$filename': 'Documentation/devicetree/bindings/net/fsl,fec.yaml', [...]
Signed-off-by: Alexander Stein alexander.stein@ew.tq-group.com Signed-off-by: Shawn Guo shawnguo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/boot/dts/imx6dl-riotboard.dts | 1 + arch/arm/boot/dts/imx6q-arm2.dts | 1 + arch/arm/boot/dts/imx6q-evi.dts | 1 + arch/arm/boot/dts/imx6q-mccmon6.dts | 1 + arch/arm/boot/dts/imx6qdl-nit6xlite.dtsi | 1 + arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi | 1 + arch/arm/boot/dts/imx6qdl-nitrogen6_som2.dtsi | 1 + arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi | 1 + arch/arm/boot/dts/imx6qdl-sabreauto.dtsi | 1 + arch/arm/boot/dts/imx6qdl-tqma6a.dtsi | 1 + arch/arm/boot/dts/imx6qdl-ts7970.dtsi | 1 + 11 files changed, 11 insertions(+)
diff --git a/arch/arm/boot/dts/imx6dl-riotboard.dts b/arch/arm/boot/dts/imx6dl-riotboard.dts index e7d9bfbfd0e4..e7be05f205d3 100644 --- a/arch/arm/boot/dts/imx6dl-riotboard.dts +++ b/arch/arm/boot/dts/imx6dl-riotboard.dts @@ -90,6 +90,7 @@ &fec { pinctrl-0 = <&pinctrl_enet>; phy-mode = "rgmii-id"; phy-handle = <&rgmii_phy>; + /delete-property/ interrupts; interrupts-extended = <&gpio1 6 IRQ_TYPE_LEVEL_HIGH>, <&intc 0 119 IRQ_TYPE_LEVEL_HIGH>; fsl,err006687-workaround-present; diff --git a/arch/arm/boot/dts/imx6q-arm2.dts b/arch/arm/boot/dts/imx6q-arm2.dts index 0b40f52268b3..75586299d9ca 100644 --- a/arch/arm/boot/dts/imx6q-arm2.dts +++ b/arch/arm/boot/dts/imx6q-arm2.dts @@ -178,6 +178,7 @@ &fec { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; phy-mode = "rgmii"; + /delete-property/ interrupts; interrupts-extended = <&gpio1 6 IRQ_TYPE_LEVEL_HIGH>, <&intc 0 119 IRQ_TYPE_LEVEL_HIGH>; fsl,err006687-workaround-present; diff --git a/arch/arm/boot/dts/imx6q-evi.dts b/arch/arm/boot/dts/imx6q-evi.dts index c63f371ede8b..78d941fef5df 100644 --- a/arch/arm/boot/dts/imx6q-evi.dts +++ b/arch/arm/boot/dts/imx6q-evi.dts @@ -146,6 +146,7 @@ &fec { pinctrl-0 = <&pinctrl_enet>; phy-mode = "rgmii"; phy-reset-gpios = <&gpio1 25 GPIO_ACTIVE_LOW>; + /delete-property/ interrupts; interrupts-extended = <&gpio1 6 IRQ_TYPE_LEVEL_HIGH>, <&intc 0 119 IRQ_TYPE_LEVEL_HIGH>; fsl,err006687-workaround-present; diff --git a/arch/arm/boot/dts/imx6q-mccmon6.dts b/arch/arm/boot/dts/imx6q-mccmon6.dts index 55692c73943d..64ab01018b71 100644 --- a/arch/arm/boot/dts/imx6q-mccmon6.dts +++ b/arch/arm/boot/dts/imx6q-mccmon6.dts @@ -100,6 +100,7 @@ &fec { pinctrl-0 = <&pinctrl_enet>; phy-mode = "rgmii"; phy-reset-gpios = <&gpio1 27 GPIO_ACTIVE_LOW>; + /delete-property/ interrupts; interrupts-extended = <&gpio1 6 IRQ_TYPE_LEVEL_HIGH>, <&intc 0 119 IRQ_TYPE_LEVEL_HIGH>; status = "okay"; diff --git a/arch/arm/boot/dts/imx6qdl-nit6xlite.dtsi b/arch/arm/boot/dts/imx6qdl-nit6xlite.dtsi index 0ad4cb4f1e82..a53a5d0766a5 100644 --- a/arch/arm/boot/dts/imx6qdl-nit6xlite.dtsi +++ b/arch/arm/boot/dts/imx6qdl-nit6xlite.dtsi @@ -192,6 +192,7 @@ &fec { phy-mode = "rgmii"; phy-handle = <ðphy>; phy-reset-gpios = <&gpio1 27 GPIO_ACTIVE_LOW>; + /delete-property/ interrupts; interrupts-extended = <&gpio1 6 IRQ_TYPE_LEVEL_HIGH>, <&intc 0 119 IRQ_TYPE_LEVEL_HIGH>; fsl,err006687-workaround-present; diff --git a/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi b/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi index beaa2dcd436c..57c21a01f126 100644 --- a/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi +++ b/arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi @@ -334,6 +334,7 @@ &fec { phy-mode = "rgmii"; phy-handle = <ðphy>; phy-reset-gpios = <&gpio1 27 GPIO_ACTIVE_LOW>; + /delete-property/ interrupts; interrupts-extended = <&gpio1 6 IRQ_TYPE_LEVEL_HIGH>, <&intc 0 119 IRQ_TYPE_LEVEL_HIGH>; fsl,err006687-workaround-present; diff --git a/arch/arm/boot/dts/imx6qdl-nitrogen6_som2.dtsi b/arch/arm/boot/dts/imx6qdl-nitrogen6_som2.dtsi index ee7e2371f94b..000e9dc97b1a 100644 --- a/arch/arm/boot/dts/imx6qdl-nitrogen6_som2.dtsi +++ b/arch/arm/boot/dts/imx6qdl-nitrogen6_som2.dtsi @@ -263,6 +263,7 @@ &fec { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; phy-mode = "rgmii"; + /delete-property/ interrupts; interrupts-extended = <&gpio1 6 IRQ_TYPE_LEVEL_HIGH>, <&intc 0 119 IRQ_TYPE_LEVEL_HIGH>; fsl,err006687-workaround-present; diff --git a/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi b/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi index 904d5d051d63..731759bdd7f5 100644 --- a/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi +++ b/arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi @@ -267,6 +267,7 @@ &fec { phy-mode = "rgmii"; phy-handle = <ðphy>; phy-reset-gpios = <&gpio1 27 GPIO_ACTIVE_LOW>; + /delete-property/ interrupts; interrupts-extended = <&gpio1 6 IRQ_TYPE_LEVEL_HIGH>, <&intc 0 119 IRQ_TYPE_LEVEL_HIGH>; fsl,err006687-workaround-present; diff --git a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi index 1368a4762037..3dbb460ef102 100644 --- a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi @@ -295,6 +295,7 @@ &fec { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; phy-mode = "rgmii-id"; + /delete-property/ interrupts; interrupts-extended = <&gpio1 6 IRQ_TYPE_LEVEL_HIGH>, <&intc 0 119 IRQ_TYPE_LEVEL_HIGH>; fsl,err006687-workaround-present; diff --git a/arch/arm/boot/dts/imx6qdl-tqma6a.dtsi b/arch/arm/boot/dts/imx6qdl-tqma6a.dtsi index 7dc3f0005b0f..0a36e1bce375 100644 --- a/arch/arm/boot/dts/imx6qdl-tqma6a.dtsi +++ b/arch/arm/boot/dts/imx6qdl-tqma6a.dtsi @@ -7,6 +7,7 @@ #include <dt-bindings/gpio/gpio.h>
&fec { + /delete-property/ interrupts; interrupts-extended = <&gpio1 6 IRQ_TYPE_LEVEL_HIGH>, <&intc 0 119 IRQ_TYPE_LEVEL_HIGH>; fsl,err006687-workaround-present; diff --git a/arch/arm/boot/dts/imx6qdl-ts7970.dtsi b/arch/arm/boot/dts/imx6qdl-ts7970.dtsi index d6ba4b2a60f6..c096d25a6f5b 100644 --- a/arch/arm/boot/dts/imx6qdl-ts7970.dtsi +++ b/arch/arm/boot/dts/imx6qdl-ts7970.dtsi @@ -192,6 +192,7 @@ &fec { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; phy-mode = "rgmii"; + /delete-property/ interrupts; interrupts-extended = <&gpio1 6 IRQ_TYPE_LEVEL_HIGH>, <&intc 0 119 IRQ_TYPE_LEVEL_HIGH>; fsl,err006687-workaround-present;
From: Haibo Chen haibo.chen@nxp.com
[ Upstream commit e7c4ebe2f9cd68588eb24ba4ed122e696e2d5272 ]
Use the general touchscreen method to config the max pressure for touch tsc2046(data sheet suggest 8 bit pressure), otherwise, for ABS_PRESSURE, when config the same max and min value, weston will meet the following issue,
[17:19:39.183] event1 - ADS7846 Touchscreen: is tagged by udev as: Touchscreen [17:19:39.183] event1 - ADS7846 Touchscreen: kernel bug: device has min == max on ABS_PRESSURE [17:19:39.183] event1 - ADS7846 Touchscreen: was rejected [17:19:39.183] event1 - not using input device '/dev/input/event1'
This will then cause the APP weston-touch-calibrator can't list touch devices.
root@imx6ul7d:~# weston-touch-calibrator could not load cursor 'dnd-move' could not load cursor 'dnd-copy' could not load cursor 'dnd-none' No devices listed.
And accroding to binding Doc, "ti,x-max", "ti,y-max", "ti,pressure-max" belong to the deprecated properties, so remove them. Also for "ti,x-min", "ti,y-min", "ti,x-plate-ohms", the value set in dts equal to the default value in driver, so are redundant, also remove here.
Signed-off-by: Haibo Chen haibo.chen@nxp.com Signed-off-by: Shawn Guo shawnguo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/boot/dts/imx7d-sdb.dts | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/arch/arm/boot/dts/imx7d-sdb.dts b/arch/arm/boot/dts/imx7d-sdb.dts index 78f4224a9bf4..e93b9cd9c27b 100644 --- a/arch/arm/boot/dts/imx7d-sdb.dts +++ b/arch/arm/boot/dts/imx7d-sdb.dts @@ -206,12 +206,7 @@ tsc2046@0 { interrupt-parent = <&gpio2>; interrupts = <29 0>; pendown-gpio = <&gpio2 29 GPIO_ACTIVE_HIGH>; - ti,x-min = /bits/ 16 <0>; - ti,x-max = /bits/ 16 <0>; - ti,y-min = /bits/ 16 <0>; - ti,y-max = /bits/ 16 <0>; - ti,pressure-max = /bits/ 16 <0>; - ti,x-plate-ohms = /bits/ 16 <400>; + touchscreen-max-pressure = <255>; wakeup-source; }; };
From: Jean Delvare jdelvare@suse.de
[ Upstream commit 2778caedb5667239823a29148dfc48b26a8b3c2a ]
With the following configuration options: CONFIG_OF is not set CONFIG_MTK_PMIC_WRAP=y CONFIG_MTK_SCPSYS=y we get the following build warnings:
CC drivers/soc/mediatek/mtk-pmic-wrap.o drivers/soc/mediatek/mtk-pmic-wrap.c:2138:34: warning: ‘of_pwrap_match_tbl’ defined but not used [-Wunused-const-variable=] drivers/soc/mediatek/mtk-pmic-wrap.c:1953:34: warning: ‘of_slave_match_tbl’ defined but not used [-Wunused-const-variable=] CC drivers/soc/mediatek/mtk-scpsys.o drivers/soc/mediatek/mtk-scpsys.c:1084:34: warning: ‘of_scpsys_match_tbl’ defined but not used [-Wunused-const-variable=]
Looking at the code, both drivers can only bind to OF-defined device nodes, so these drivers are useless without OF and should therefore depend on it.
Also drop of_match_ptr() from both drivers. We already know what it will resolve to, so we might as well save cpp some work.
Developers or QA teams who wish to test-build the code can still do so by enabling CONFIG_OF, which is available on all architectures and has no dependencies.
Signed-off-by: Jean Delvare jdelvare@suse.de Reported-by: kernel test robot lkp@intel.com Link: https://lore.kernel.org/all/202207240252.ZY5hSCNB-lkp@intel.com/ Cc: Matthias Brugger matthias.bgg@gmail.com Cc: Chenglin Xu chenglin.xu@mediatek.com Link: https://lore.kernel.org/r/20220730144833.0a0d9825@endymion.delvare Signed-off-by: Matthias Brugger matthias.bgg@gmail.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/soc/mediatek/Kconfig | 2 ++ drivers/soc/mediatek/mtk-pmic-wrap.c | 2 +- drivers/soc/mediatek/mtk-scpsys.c | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/soc/mediatek/Kconfig b/drivers/soc/mediatek/Kconfig index 3c3eedea35f7..73e63920b1b9 100644 --- a/drivers/soc/mediatek/Kconfig +++ b/drivers/soc/mediatek/Kconfig @@ -37,6 +37,7 @@ config MTK_INFRACFG config MTK_PMIC_WRAP tristate "MediaTek PMIC Wrapper Support" depends on RESET_CONTROLLER + depends on OF select REGMAP help Say yes here to add support for MediaTek PMIC Wrapper found @@ -46,6 +47,7 @@ config MTK_PMIC_WRAP config MTK_SCPSYS bool "MediaTek SCPSYS Support" default ARCH_MEDIATEK + depends on OF select REGMAP select MTK_INFRACFG select PM_GENERIC_DOMAINS if PM diff --git a/drivers/soc/mediatek/mtk-pmic-wrap.c b/drivers/soc/mediatek/mtk-pmic-wrap.c index d8cb0f833645..eb82ae06697f 100644 --- a/drivers/soc/mediatek/mtk-pmic-wrap.c +++ b/drivers/soc/mediatek/mtk-pmic-wrap.c @@ -2316,7 +2316,7 @@ static int pwrap_probe(struct platform_device *pdev) static struct platform_driver pwrap_drv = { .driver = { .name = "mt-pmic-pwrap", - .of_match_table = of_match_ptr(of_pwrap_match_tbl), + .of_match_table = of_pwrap_match_tbl, }, .probe = pwrap_probe, }; diff --git a/drivers/soc/mediatek/mtk-scpsys.c b/drivers/soc/mediatek/mtk-scpsys.c index ca75b14931ec..7a668888111c 100644 --- a/drivers/soc/mediatek/mtk-scpsys.c +++ b/drivers/soc/mediatek/mtk-scpsys.c @@ -1141,7 +1141,7 @@ static struct platform_driver scpsys_drv = { .name = "mtk-scpsys", .suppress_bind_attrs = true, .owner = THIS_MODULE, - .of_match_table = of_match_ptr(of_scpsys_match_tbl), + .of_match_table = of_scpsys_match_tbl, }, }; builtin_platform_driver(scpsys_drv);
Hi Sasha,
On Tue, 11 Oct 2022 10:49:32 -0400, Sasha Levin wrote:
From: Jean Delvare jdelvare@suse.de
[ Upstream commit 2778caedb5667239823a29148dfc48b26a8b3c2a ]
With the following configuration options: CONFIG_OF is not set CONFIG_MTK_PMIC_WRAP=y CONFIG_MTK_SCPSYS=y we get the following build warnings:
CC drivers/soc/mediatek/mtk-pmic-wrap.o drivers/soc/mediatek/mtk-pmic-wrap.c:2138:34: warning: ‘of_pwrap_match_tbl’ defined but not used [-Wunused-const-variable=] drivers/soc/mediatek/mtk-pmic-wrap.c:1953:34: warning: ‘of_slave_match_tbl’ defined but not used [-Wunused-const-variable=] CC drivers/soc/mediatek/mtk-scpsys.o drivers/soc/mediatek/mtk-scpsys.c:1084:34: warning: ‘of_scpsys_match_tbl’ defined but not used [-Wunused-const-variable=] (...)
This is warning only, pretty harmless, so I don't think this qualifies for stable kernel trees.
Thanks,
On Wed, Oct 12, 2022 at 12:49:49AM +0200, Jean Delvare wrote:
Hi Sasha,
On Tue, 11 Oct 2022 10:49:32 -0400, Sasha Levin wrote:
From: Jean Delvare jdelvare@suse.de
[ Upstream commit 2778caedb5667239823a29148dfc48b26a8b3c2a ]
With the following configuration options: CONFIG_OF is not set CONFIG_MTK_PMIC_WRAP=y CONFIG_MTK_SCPSYS=y we get the following build warnings:
CC drivers/soc/mediatek/mtk-pmic-wrap.o drivers/soc/mediatek/mtk-pmic-wrap.c:2138:34: warning: ‘of_pwrap_match_tbl’ defined but not used [-Wunused-const-variable=] drivers/soc/mediatek/mtk-pmic-wrap.c:1953:34: warning: ‘of_slave_match_tbl’ defined but not used [-Wunused-const-variable=] CC drivers/soc/mediatek/mtk-scpsys.o drivers/soc/mediatek/mtk-scpsys.c:1084:34: warning: ‘of_scpsys_match_tbl’ defined but not used [-Wunused-const-variable=] (...)
This is warning only, pretty harmless, so I don't think this qualifies for stable kernel trees.
Ack, I can drop it.
From: Krzysztof Kozlowski krzysztof.kozlowski@linaro.org
[ Upstream commit 5589ffb2da2a66988ab3a68334dad3e68b42e3a9 ]
Correct SPMI PMIC VADC channel node name: 1. Use hyphens instead of underscores, 2. Add missing unit address.
This fixes `make dtbs_check` warnings like:
qcom/sc7280-idp.dtb: pmic@0: adc@3100: 'pmk8350_die_temp', 'pmr735a_die_temp' do not match any of the regexes: '^.*@[0-9a-f]+$', 'pinctrl-[0-9]+'
Signed-off-by: Krzysztof Kozlowski krzysztof.kozlowski@linaro.org Reviewed-by: Stephen Boyd sboyd@kernel.org Reviewed-by: Vinod Koul vkoul@kernel.org Reviewed-by: David Heidelberg david@ixit.cz Signed-off-by: Bjorn Andersson andersson@kernel.org Link: https://lore.kernel.org/r/20220828084341.112146-12-krzysztof.kozlowski@linar... Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/boot/dts/qcom/sc7280-idp.dts | 2 +- arch/arm64/boot/dts/qcom/sc7280-idp.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/boot/dts/qcom/sc7280-idp.dts b/arch/arm64/boot/dts/qcom/sc7280-idp.dts index 6d3ff80582ae..e2e37a0292ad 100644 --- a/arch/arm64/boot/dts/qcom/sc7280-idp.dts +++ b/arch/arm64/boot/dts/qcom/sc7280-idp.dts @@ -78,7 +78,7 @@ &nvme_3v3_regulator { };
&pmk8350_vadc { - pmr735a_die_temp { + pmr735a-die-temp@403 { reg = <PMR735A_ADC7_DIE_TEMP>; label = "pmr735a_die_temp"; qcom,pre-scaling = <1 1>; diff --git a/arch/arm64/boot/dts/qcom/sc7280-idp.dtsi b/arch/arm64/boot/dts/qcom/sc7280-idp.dtsi index a74e0b730db6..27c47ddbdf02 100644 --- a/arch/arm64/boot/dts/qcom/sc7280-idp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280-idp.dtsi @@ -264,7 +264,7 @@ &pcie1_phy { };
&pmk8350_vadc { - pmk8350_die_temp { + pmk8350-die-temp@3 { reg = <PMK8350_ADC7_DIE_TEMP>; label = "pmk8350_die_temp"; qcom,pre-scaling = <1 1>;
From: Alexander Stein alexander.stein@ew.tq-group.com
[ Upstream commit b11d083c5dcec7c42fe982c854706d404ddd3a5f ]
All 3 properties are required by sram.yaml. Fixes the dtbs_check warning: sram@900000: '#address-cells' is a required property sram@900000: '#size-cells' is a required property sram@900000: 'ranges' is a required property
Signed-off-by: Alexander Stein alexander.stein@ew.tq-group.com Signed-off-by: Shawn Guo shawnguo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/boot/dts/imx6q.dtsi | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/arch/arm/boot/dts/imx6q.dtsi b/arch/arm/boot/dts/imx6q.dtsi index 3b77eae40e39..df86049a695b 100644 --- a/arch/arm/boot/dts/imx6q.dtsi +++ b/arch/arm/boot/dts/imx6q.dtsi @@ -163,6 +163,9 @@ soc: soc { ocram: sram@900000 { compatible = "mmio-sram"; reg = <0x00900000 0x40000>; + ranges = <0 0x00900000 0x40000>; + #address-cells = <1>; + #size-cells = <1>; clocks = <&clks IMX6QDL_CLK_OCRAM>; };
From: Alexander Stein alexander.stein@ew.tq-group.com
[ Upstream commit f5848b95633d598bacf0500e0108dc5961af88c0 ]
All 3 properties are required by sram.yaml. Fixes the dtbs_check warning: sram@900000: '#address-cells' is a required property sram@900000: '#size-cells' is a required property sram@900000: 'ranges' is a required property
Signed-off-by: Alexander Stein alexander.stein@ew.tq-group.com Signed-off-by: Shawn Guo shawnguo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/boot/dts/imx6dl.dtsi | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/arch/arm/boot/dts/imx6dl.dtsi b/arch/arm/boot/dts/imx6dl.dtsi index 8e0ed209ede0..dc919e09a505 100644 --- a/arch/arm/boot/dts/imx6dl.dtsi +++ b/arch/arm/boot/dts/imx6dl.dtsi @@ -84,6 +84,9 @@ soc: soc { ocram: sram@900000 { compatible = "mmio-sram"; reg = <0x00900000 0x20000>; + ranges = <0 0x00900000 0x20000>; + #address-cells = <1>; + #size-cells = <1>; clocks = <&clks IMX6QDL_CLK_OCRAM>; };
From: Alexander Stein alexander.stein@ew.tq-group.com
[ Upstream commit 088fe5237435ee2f7ed4450519b2ef58b94c832f ]
All 3 properties are required by sram.yaml. Fixes the dtbs_check warning: sram@940000: '#address-cells' is a required property sram@940000: '#size-cells' is a required property sram@940000: 'ranges' is a required property
Signed-off-by: Alexander Stein alexander.stein@ew.tq-group.com Signed-off-by: Shawn Guo shawnguo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/boot/dts/imx6qp.dtsi | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/arch/arm/boot/dts/imx6qp.dtsi b/arch/arm/boot/dts/imx6qp.dtsi index 050365513836..fc164991d2ae 100644 --- a/arch/arm/boot/dts/imx6qp.dtsi +++ b/arch/arm/boot/dts/imx6qp.dtsi @@ -9,12 +9,18 @@ soc { ocram2: sram@940000 { compatible = "mmio-sram"; reg = <0x00940000 0x20000>; + ranges = <0 0x00940000 0x20000>; + #address-cells = <1>; + #size-cells = <1>; clocks = <&clks IMX6QDL_CLK_OCRAM>; };
ocram3: sram@960000 { compatible = "mmio-sram"; reg = <0x00960000 0x20000>; + ranges = <0 0x00960000 0x20000>; + #address-cells = <1>; + #size-cells = <1>; clocks = <&clks IMX6QDL_CLK_OCRAM>; };
From: Alexander Stein alexander.stein@ew.tq-group.com
[ Upstream commit 60c9213a1d9941a8b33db570796c3f9be8984974 ]
All 3 properties are required by sram.yaml. Fixes the dtbs_check warning: sram@900000: '#address-cells' is a required property sram@900000: '#size-cells' is a required property sram@900000: 'ranges' is a required property
Signed-off-by: Alexander Stein alexander.stein@ew.tq-group.com Signed-off-by: Shawn Guo shawnguo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/boot/dts/imx6sl.dtsi | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/arch/arm/boot/dts/imx6sl.dtsi b/arch/arm/boot/dts/imx6sl.dtsi index 06a515121dfc..cfd6b4972ae7 100644 --- a/arch/arm/boot/dts/imx6sl.dtsi +++ b/arch/arm/boot/dts/imx6sl.dtsi @@ -115,6 +115,9 @@ soc { ocram: sram@900000 { compatible = "mmio-sram"; reg = <0x00900000 0x20000>; + ranges = <0 0x00900000 0x20000>; + #address-cells = <1>; + #size-cells = <1>; clocks = <&clks IMX6SL_CLK_OCRAM>; };
From: Alexander Stein alexander.stein@ew.tq-group.com
[ Upstream commit 7492a83ed9b7a151e2dd11d64b06da7a7f0fa7f9 ]
All 3 properties are required by sram.yaml. Fixes the dtbs_check warning: sram@900000: '#address-cells' is a required property sram@900000: '#size-cells' is a required property sram@900000: 'ranges' is a required property
Signed-off-by: Alexander Stein alexander.stein@ew.tq-group.com Signed-off-by: Shawn Guo shawnguo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/boot/dts/imx6sll.dtsi | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/arch/arm/boot/dts/imx6sll.dtsi b/arch/arm/boot/dts/imx6sll.dtsi index d4a000c3dde7..2873369a57c0 100644 --- a/arch/arm/boot/dts/imx6sll.dtsi +++ b/arch/arm/boot/dts/imx6sll.dtsi @@ -115,6 +115,9 @@ soc { ocram: sram@900000 { compatible = "mmio-sram"; reg = <0x00900000 0x20000>; + ranges = <0 0x00900000 0x20000>; + #address-cells = <1>; + #size-cells = <1>; };
intc: interrupt-controller@a01000 {
From: Alexander Stein alexander.stein@ew.tq-group.com
[ Upstream commit 415432c008b2bce8138841356ba444631cabaa50 ]
All 3 properties are required by sram.yaml. Fixes the dtbs_check warning: sram@900000: '#address-cells' is a required property sram@900000: '#size-cells' is a required property sram@900000: 'ranges' is a required property
Signed-off-by: Alexander Stein alexander.stein@ew.tq-group.com Signed-off-by: Shawn Guo shawnguo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/boot/dts/imx6sx.dtsi | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi index 4d075e2bf749..2611eef3b2a2 100644 --- a/arch/arm/boot/dts/imx6sx.dtsi +++ b/arch/arm/boot/dts/imx6sx.dtsi @@ -164,12 +164,18 @@ soc: soc { ocram_s: sram@8f8000 { compatible = "mmio-sram"; reg = <0x008f8000 0x4000>; + ranges = <0 0x008f8000 0x4000>; + #address-cells = <1>; + #size-cells = <1>; clocks = <&clks IMX6SX_CLK_OCRAM_S>; };
ocram: sram@900000 { compatible = "mmio-sram"; reg = <0x00900000 0x20000>; + ranges = <0 0x00900000 0x20000>; + #address-cells = <1>; + #size-cells = <1>; clocks = <&clks IMX6SX_CLK_OCRAM>; };
From: Marcel Ziswiler marcel.ziswiler@toradex.com
[ Upstream commit 218db824a7519856d0eaaeb5c41ca504ed550210 ]
This fixes the following error:
arch/arm/boot/dts/imx6sl.dtsi:714: error: code indent should use tabs where possible
Signed-off-by: Marcel Ziswiler marcel.ziswiler@toradex.com Signed-off-by: Shawn Guo shawnguo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/boot/dts/imx6sl.dtsi | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/arch/arm/boot/dts/imx6sl.dtsi b/arch/arm/boot/dts/imx6sl.dtsi index cfd6b4972ae7..01122ddfdc0d 100644 --- a/arch/arm/boot/dts/imx6sl.dtsi +++ b/arch/arm/boot/dts/imx6sl.dtsi @@ -61,10 +61,10 @@ cpu0: cpu@0 { <792000 1175000>, <396000 975000>; fsl,soc-operating-points = - /* ARM kHz SOC-PU uV */ - <996000 1225000>, - <792000 1175000>, - <396000 1175000>; + /* ARM kHz SOC-PU uV */ + <996000 1225000>, + <792000 1175000>, + <396000 1175000>; clock-latency = <61036>; /* two CLK32 periods */ #cooling-cells = <2>; clocks = <&clks IMX6SL_CLK_ARM>, <&clks IMX6SL_CLK_PLL2_PFD2>, @@ -225,7 +225,7 @@ ecspi4: spi@2014000 {
uart5: serial@2018000 { compatible = "fsl,imx6sl-uart", - "fsl,imx6q-uart", "fsl,imx21-uart"; + "fsl,imx6q-uart", "fsl,imx21-uart"; reg = <0x02018000 0x4000>; interrupts = <0 30 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6SL_CLK_UART>, @@ -238,7 +238,7 @@ uart5: serial@2018000 {
uart1: serial@2020000 { compatible = "fsl,imx6sl-uart", - "fsl,imx6q-uart", "fsl,imx21-uart"; + "fsl,imx6q-uart", "fsl,imx21-uart"; reg = <0x02020000 0x4000>; interrupts = <0 26 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6SL_CLK_UART>, @@ -251,7 +251,7 @@ uart1: serial@2020000 {
uart2: serial@2024000 { compatible = "fsl,imx6sl-uart", - "fsl,imx6q-uart", "fsl,imx21-uart"; + "fsl,imx6q-uart", "fsl,imx21-uart"; reg = <0x02024000 0x4000>; interrupts = <0 27 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6SL_CLK_UART>, @@ -312,7 +312,7 @@ ssi3: ssi@2030000 {
uart3: serial@2034000 { compatible = "fsl,imx6sl-uart", - "fsl,imx6q-uart", "fsl,imx21-uart"; + "fsl,imx6q-uart", "fsl,imx21-uart"; reg = <0x02034000 0x4000>; interrupts = <0 28 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6SL_CLK_UART>, @@ -325,7 +325,7 @@ uart3: serial@2034000 {
uart4: serial@2038000 { compatible = "fsl,imx6sl-uart", - "fsl,imx6q-uart", "fsl,imx21-uart"; + "fsl,imx6q-uart", "fsl,imx21-uart"; reg = <0x02038000 0x4000>; interrupts = <0 29 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6SL_CLK_UART>, @@ -714,7 +714,7 @@ pd_pu: power-domain@1 { #power-domain-cells = <0>; power-supply = <®_pu>; clocks = <&clks IMX6SL_CLK_GPU2D_OVG>, - <&clks IMX6SL_CLK_GPU2D_PODF>; + <&clks IMX6SL_CLK_GPU2D_PODF>; };
pd_disp: power-domain@2 {
From: Marcel Ziswiler marcel.ziswiler@toradex.com
[ Upstream commit fd2dd7077c7498765e7326c1b7f34bde85f1a975 ]
This fixes the following warning:
arch/arm/boot/dts/imx6sx-udoo-neo.dtsi:309: check: Please don't use multiple blank lines
While at it, use tabs indent for some pinctrl entries.
Signed-off-by: Marcel Ziswiler marcel.ziswiler@toradex.com Signed-off-by: Shawn Guo shawnguo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/boot/dts/imx6sx-udoo-neo.dtsi | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/arch/arm/boot/dts/imx6sx-udoo-neo.dtsi b/arch/arm/boot/dts/imx6sx-udoo-neo.dtsi index 35861bbea94e..c84ea1fac5e9 100644 --- a/arch/arm/boot/dts/imx6sx-udoo-neo.dtsi +++ b/arch/arm/boot/dts/imx6sx-udoo-neo.dtsi @@ -226,7 +226,7 @@ lcdc: endpoint { &iomuxc { pinctrl_bt_reg: btreggrp { fsl,pins = - <MX6SX_PAD_KEY_ROW2__GPIO2_IO_17 0x15059>; + <MX6SX_PAD_KEY_ROW2__GPIO2_IO_17 0x15059>; };
pinctrl_enet1: enet1grp { @@ -306,7 +306,6 @@ MX6SX_PAD_LCD1_RESET__GPIO3_IO_27 0x4001b0b0 >; };
- pinctrl_uart1: uart1grp { fsl,pins = <MX6SX_PAD_GPIO1_IO04__UART1_DCE_TX 0x1b0b1>, @@ -347,24 +346,23 @@ pinctrl_uart6: uart6grp {
pinctrl_otg1_reg: otg1grp { fsl,pins = - <MX6SX_PAD_GPIO1_IO09__GPIO1_IO_9 0x10b0>; + <MX6SX_PAD_GPIO1_IO09__GPIO1_IO_9 0x10b0>; };
- pinctrl_otg2_reg: otg2grp { fsl,pins = - <MX6SX_PAD_NAND_RE_B__GPIO4_IO_12 0x10b0>; + <MX6SX_PAD_NAND_RE_B__GPIO4_IO_12 0x10b0>; };
pinctrl_usb_otg1: usbotg1grp { fsl,pins = - <MX6SX_PAD_GPIO1_IO10__ANATOP_OTG1_ID 0x17059>, - <MX6SX_PAD_GPIO1_IO08__USB_OTG1_OC 0x10b0>; + <MX6SX_PAD_GPIO1_IO10__ANATOP_OTG1_ID 0x17059>, + <MX6SX_PAD_GPIO1_IO08__USB_OTG1_OC 0x10b0>; };
pinctrl_usb_otg2: usbot2ggrp { fsl,pins = - <MX6SX_PAD_QSPI1A_DATA0__USB_OTG2_OC 0x10b0>; + <MX6SX_PAD_QSPI1A_DATA0__USB_OTG2_OC 0x10b0>; };
pinctrl_usdhc2: usdhc2grp {
From: Mark Brown broonie@kernel.org
[ Upstream commit 5c152c2f66f9368394b89ac90dc7483476ef7b88 ]
When arm64 signal context data overflows the base struct sigcontext it gets placed in an extra buffer pointed to by a record of type EXTRA_CONTEXT in the base struct sigcontext which is required to be the last record in the base struct sigframe. The current validation code attempts to check this by using GET_RESV_NEXT_HEAD() to step forward from the current record to the next but that is a macro which assumes it is being provided with a struct _aarch64_ctx and uses the size there to skip forward to the next record. Instead validate_extra_context() passes it a struct extra_context which has a separate size field. This compiles but results in us trying to validate a termination record in completely the wrong place, at best failing validation and at worst just segfaulting. Fix this by passing the struct _aarch64_ctx we meant to into the macro.
Signed-off-by: Mark Brown broonie@kernel.org Link: https://lore.kernel.org/r/20220829160703.874492-4-broonie@kernel.org Signed-off-by: Catalin Marinas catalin.marinas@arm.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/arm64/signal/testcases/testcases.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c index 84c36bee4d82..d98828cb542b 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.c +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c @@ -33,7 +33,7 @@ bool validate_extra_context(struct extra_context *extra, char **err) return false;
fprintf(stderr, "Validating EXTRA...\n"); - term = GET_RESV_NEXT_HEAD(extra); + term = GET_RESV_NEXT_HEAD(&extra->head); if (!term || term->magic || term->size) { *err = "Missing terminator after EXTRA context"; return false;
From: Mark Brown broonie@kernel.org
[ Upstream commit 38150a6204c731a4846786682e500d132571fd82 ]
In order to allow testing of signal contexts that overflow the base signal frame allow callers to pass the buffer size for the user context into get_signal_context(). No functional change.
Signed-off-by: Mark Brown broonie@kernel.org Link: https://lore.kernel.org/r/20220829160703.874492-10-broonie@kernel.org Signed-off-by: Catalin Marinas catalin.marinas@arm.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/arm64/signal/test_signals_utils.h | 5 +++-- .../arm64/signal/testcases/fake_sigreturn_bad_magic.c | 2 +- .../arm64/signal/testcases/fake_sigreturn_bad_size.c | 2 +- .../signal/testcases/fake_sigreturn_bad_size_for_magic0.c | 2 +- .../signal/testcases/fake_sigreturn_duplicated_fpsimd.c | 2 +- .../arm64/signal/testcases/fake_sigreturn_misaligned_sp.c | 2 +- .../arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c | 2 +- .../arm64/signal/testcases/fake_sigreturn_sme_change_vl.c | 2 +- .../arm64/signal/testcases/fake_sigreturn_sve_change_vl.c | 2 +- tools/testing/selftests/arm64/signal/testcases/sme_vl.c | 2 +- tools/testing/selftests/arm64/signal/testcases/ssve_regs.c | 2 +- tools/testing/selftests/arm64/signal/testcases/sve_regs.c | 2 +- tools/testing/selftests/arm64/signal/testcases/sve_vl.c | 2 +- tools/testing/selftests/arm64/signal/testcases/za_regs.c | 2 +- 14 files changed, 16 insertions(+), 15 deletions(-)
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h index f3aa99ba67bb..222093f51b67 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.h +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h @@ -56,7 +56,8 @@ static inline bool feats_ok(struct tdescr *td) * at sizeof(ucontext_t). */ static __always_inline bool get_current_context(struct tdescr *td, - ucontext_t *dest_uc) + ucontext_t *dest_uc, + size_t dest_sz) { static volatile bool seen_already;
@@ -64,7 +65,7 @@ static __always_inline bool get_current_context(struct tdescr *td, /* it's a genuine invocation..reinit */ seen_already = 0; td->live_uc_valid = 0; - td->live_sz = sizeof(*dest_uc); + td->live_sz = dest_sz; memset(dest_uc, 0x00, td->live_sz); td->live_uc = dest_uc; /* diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c index 8dc600a7d4fd..8c7f00ea9823 100644 --- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c @@ -21,7 +21,7 @@ static int fake_sigreturn_bad_magic_run(struct tdescr *td, struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head;
/* just to fill the ucontext_t with something real */ - if (!get_current_context(td, &sf.uc)) + if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) return 1;
/* need at least 2*HDR_SZ space: KSFT_BAD_MAGIC + terminator. */ diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c index b3c362100666..1c03f6b638e0 100644 --- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c @@ -24,7 +24,7 @@ static int fake_sigreturn_bad_size_run(struct tdescr *td, struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head;
/* just to fill the ucontext_t with something real */ - if (!get_current_context(td, &sf.uc)) + if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) return 1;
resv_sz = GET_SF_RESV_SIZE(sf); diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c index a44b88bfc81a..bc22f64b544e 100644 --- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c @@ -21,7 +21,7 @@ static int fake_sigreturn_bad_size_for_magic0_run(struct tdescr *td, struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head;
/* just to fill the ucontext_t with something real */ - if (!get_current_context(td, &sf.uc)) + if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) return 1;
/* at least HDR_SZ for the badly sized terminator. */ diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c index afe8915f0998..63e3906b631c 100644 --- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c @@ -21,7 +21,7 @@ static int fake_sigreturn_duplicated_fpsimd_run(struct tdescr *td, struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head;
/* just to fill the ucontext_t with something real */ - if (!get_current_context(td, &sf.uc)) + if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) return 1;
head = get_starting_head(shead, sizeof(struct fpsimd_context) + HDR_SZ, diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c index 1e089e66f9f3..d00625ff12c2 100644 --- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c @@ -19,7 +19,7 @@ static int fake_sigreturn_misaligned_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc) { /* just to fill the ucontext_t with something real */ - if (!get_current_context(td, &sf.uc)) + if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) return 1;
/* Forcing sigframe on misaligned SP (16 + 3) */ diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c index 08ecd8073a1a..f805138cb20d 100644 --- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c @@ -23,7 +23,7 @@ static int fake_sigreturn_missing_fpsimd_run(struct tdescr *td, struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
/* just to fill the ucontext_t with something real */ - if (!get_current_context(td, &sf.uc)) + if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) return 1;
resv_sz = GET_SF_RESV_SIZE(sf); diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c index 7ed762b7202f..ebd5815b54bb 100644 --- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c @@ -54,7 +54,7 @@ static int fake_sigreturn_ssve_change_vl(struct tdescr *td, struct sve_context *sve;
/* Get a signal context with a SME ZA frame in it */ - if (!get_current_context(td, &sf.uc)) + if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) return 1;
resv_sz = GET_SF_RESV_SIZE(sf); diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c index 915821375b0a..e2a452190511 100644 --- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c @@ -56,7 +56,7 @@ static int fake_sigreturn_sve_change_vl(struct tdescr *td, struct sve_context *sve;
/* Get a signal context with a SVE frame in it */ - if (!get_current_context(td, &sf.uc)) + if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) return 1;
resv_sz = GET_SF_RESV_SIZE(sf); diff --git a/tools/testing/selftests/arm64/signal/testcases/sme_vl.c b/tools/testing/selftests/arm64/signal/testcases/sme_vl.c index 13ff3b35cbaf..75f387f2db81 100644 --- a/tools/testing/selftests/arm64/signal/testcases/sme_vl.c +++ b/tools/testing/selftests/arm64/signal/testcases/sme_vl.c @@ -34,7 +34,7 @@ static int sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc) struct za_context *za;
/* Get a signal context which should have a ZA frame in it */ - if (!get_current_context(td, &sf.uc)) + if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) return 1;
resv_sz = GET_SF_RESV_SIZE(sf); diff --git a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c index 9022a6cab4b3..71f14632c524 100644 --- a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c @@ -73,7 +73,7 @@ static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc, * in it. */ setup_ssve_regs(); - if (!get_current_context(td, &sf.uc)) + if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) return 1;
resv_sz = GET_SF_RESV_SIZE(sf); diff --git a/tools/testing/selftests/arm64/signal/testcases/sve_regs.c b/tools/testing/selftests/arm64/signal/testcases/sve_regs.c index 4b2418aa08a9..4cdedb706786 100644 --- a/tools/testing/selftests/arm64/signal/testcases/sve_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/sve_regs.c @@ -71,7 +71,7 @@ static int do_one_sve_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc, * in it. */ setup_sve_regs(); - if (!get_current_context(td, &sf.uc)) + if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) return 1;
resv_sz = GET_SF_RESV_SIZE(sf); diff --git a/tools/testing/selftests/arm64/signal/testcases/sve_vl.c b/tools/testing/selftests/arm64/signal/testcases/sve_vl.c index 92904653add1..aa835acec062 100644 --- a/tools/testing/selftests/arm64/signal/testcases/sve_vl.c +++ b/tools/testing/selftests/arm64/signal/testcases/sve_vl.c @@ -34,7 +34,7 @@ static int sve_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc) struct sve_context *sve;
/* Get a signal context which should have a SVE frame in it */ - if (!get_current_context(td, &sf.uc)) + if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) return 1;
resv_sz = GET_SF_RESV_SIZE(sf); diff --git a/tools/testing/selftests/arm64/signal/testcases/za_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_regs.c index b94e4f99fcac..70c00ca6eded 100644 --- a/tools/testing/selftests/arm64/signal/testcases/za_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/za_regs.c @@ -71,7 +71,7 @@ static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc, * in it. */ setup_za_regs(); - if (!get_current_context(td, &sf.uc)) + if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) return 1;
resv_sz = GET_SF_RESV_SIZE(sf);
On Tue, Oct 11, 2022 at 10:49:43AM -0400, Sasha Levin wrote:
From: Mark Brown broonie@kernel.org
[ Upstream commit 38150a6204c731a4846786682e500d132571fd82 ]
In order to allow testing of signal contexts that overflow the base signal frame allow callers to pass the buffer size for the user context into get_signal_context(). No functional change.
This doesn't obviously make sense independently, even by the relaxed standards stable uses these days?
On Tue, Oct 11, 2022 at 04:04:42PM +0100, Mark Brown wrote:
On Tue, Oct 11, 2022 at 10:49:43AM -0400, Sasha Levin wrote:
From: Mark Brown broonie@kernel.org
[ Upstream commit 38150a6204c731a4846786682e500d132571fd82 ]
In order to allow testing of signal contexts that overflow the base signal frame allow callers to pass the buffer size for the user context into get_signal_context(). No functional change.
This doesn't obviously make sense independently, even by the relaxed standards stable uses these days?
I can drop this one, or, are there maybe additional patches we do want to take?
On Thu, Oct 13, 2022 at 01:58:30PM -0400, Sasha Levin wrote:
On Tue, Oct 11, 2022 at 04:04:42PM +0100, Mark Brown wrote:
On Tue, Oct 11, 2022 at 10:49:43AM -0400, Sasha Levin wrote:
From: Mark Brown broonie@kernel.org
In order to allow testing of signal contexts that overflow the base signal frame allow callers to pass the buffer size for the user context into get_signal_context(). No functional change.
This doesn't obviously make sense independently, even by the relaxed standards stable uses these days?
I can drop this one, or, are there maybe additional patches we do want to take?
Unless you're going to start backporting extra tests as well I'd drop it.
From: Linus Walleij linus.walleij@linaro.org
[ Upstream commit 2c230431e1e809270178905974f57cf3878939f5 ]
This enables the Sparc to use <asm-generic/io.h> to fill in the missing (undefined) [read|write]sq I/O accessor functions.
This is needed if Sparc[64] ever wants to uses CONFIG_REGMAP_MMIO which has been patches to use accelerated _noinc accessors such as readsq/writesq that Sparc64, while being a 64bit platform, as of now not yet provide.
This comes with the requirement that everything the architecture already provides needs to be defined, rather than just being, say, static inline functions.
Bite the bullet and just provide the definitions and make it work. Compile-tested on sparc32 and sparc64.
Reported-by: kernel test robot lkp@intel.com Signed-off-by: Linus Walleij linus.walleij@linaro.org Cc: David S. Miller davem@davemloft.net Cc: sparclinux@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: Mark Brown broonie@kernel.org Cc: Arnd Bergmann arnd@arndb.de Link: https://lore.kernel.org/linux-arm-kernel/202208201639.HXye3ke4-lkp@intel.com... Signed-off-by: Arnd Bergmann arnd@arndb.de Signed-off-by: Sasha Levin sashal@kernel.org --- arch/sparc/include/asm/io.h | 2 ++ arch/sparc/include/asm/io_64.h | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+)
diff --git a/arch/sparc/include/asm/io.h b/arch/sparc/include/asm/io.h index 2eefa526b38f..2dad9be9ec75 100644 --- a/arch/sparc/include/asm/io.h +++ b/arch/sparc/include/asm/io.h @@ -19,4 +19,6 @@ #define writel_be(__w, __addr) __raw_writel(__w, __addr) #define writew_be(__l, __addr) __raw_writew(__l, __addr)
+#include <asm-generic/io.h> + #endif diff --git a/arch/sparc/include/asm/io_64.h b/arch/sparc/include/asm/io_64.h index 5ffa820dcd4d..9303270b22f3 100644 --- a/arch/sparc/include/asm/io_64.h +++ b/arch/sparc/include/asm/io_64.h @@ -9,6 +9,7 @@ #include <asm/page.h> /* IO address mapping routines need this */ #include <asm/asi.h> #include <asm-generic/pci_iomap.h> +#define pci_iomap pci_iomap
/* BIO layer definitions. */ extern unsigned long kern_base, kern_size; @@ -239,38 +240,51 @@ static inline void outl(u32 l, unsigned long addr) void outsb(unsigned long, const void *, unsigned long); void outsw(unsigned long, const void *, unsigned long); void outsl(unsigned long, const void *, unsigned long); +#define outsb outsb +#define outsw outsw +#define outsl outsl void insb(unsigned long, void *, unsigned long); void insw(unsigned long, void *, unsigned long); void insl(unsigned long, void *, unsigned long); +#define insb insb +#define insw insw +#define insl insl
static inline void readsb(void __iomem *port, void *buf, unsigned long count) { insb((unsigned long __force)port, buf, count); } +#define readsb readsb + static inline void readsw(void __iomem *port, void *buf, unsigned long count) { insw((unsigned long __force)port, buf, count); } +#define readsw readsw
static inline void readsl(void __iomem *port, void *buf, unsigned long count) { insl((unsigned long __force)port, buf, count); } +#define readsl readsl
static inline void writesb(void __iomem *port, const void *buf, unsigned long count) { outsb((unsigned long __force)port, buf, count); } +#define writesb writesb
static inline void writesw(void __iomem *port, const void *buf, unsigned long count) { outsw((unsigned long __force)port, buf, count); } +#define writesw writesw
static inline void writesl(void __iomem *port, const void *buf, unsigned long count) { outsl((unsigned long __force)port, buf, count); } +#define writesl writesl
#define ioread8_rep(p,d,l) readsb(p,d,l) #define ioread16_rep(p,d,l) readsw(p,d,l) @@ -344,6 +358,7 @@ static inline void memset_io(volatile void __iomem *dst, int c, __kernel_size_t d++; } } +#define memset_io memset_io
static inline void sbus_memcpy_fromio(void *dst, const volatile void __iomem *src, __kernel_size_t n) @@ -369,6 +384,7 @@ static inline void memcpy_fromio(void *dst, const volatile void __iomem *src, src++; } } +#define memcpy_fromio memcpy_fromio
static inline void sbus_memcpy_toio(volatile void __iomem *dst, const void *src, __kernel_size_t n) @@ -395,6 +411,7 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, d++; } } +#define memcpy_toio memcpy_toio
#ifdef __KERNEL__
@@ -412,7 +429,9 @@ static inline void __iomem *ioremap(unsigned long offset, unsigned long size) static inline void __iomem *ioremap_np(unsigned long offset, unsigned long size) { return NULL; + } +#define ioremap_np ioremap_np
static inline void iounmap(volatile void __iomem *addr) { @@ -432,10 +451,13 @@ static inline void iounmap(volatile void __iomem *addr) /* Create a virtual mapping cookie for an IO port range */ void __iomem *ioport_map(unsigned long port, unsigned int nr); void ioport_unmap(void __iomem *); +#define ioport_map ioport_map +#define ioport_unmap ioport_unmap
/* Create a virtual mapping cookie for a PCI BAR (memory or IO) */ struct pci_dev; void pci_iounmap(struct pci_dev *dev, void __iomem *); +#define pci_iounmap pci_iounmap
static inline int sbus_can_dma_64bit(void) {
From: Mark Rutland mark.rutland@arm.com
[ Upstream commit b2c3ccbd0011bb3b51d0fec24cb3a5812b1ec8ea ]
When CONFIG_ARM64_LSE_ATOMICS=y, each use of an LL/SC atomic results in a fragment of code being generated in a subsection without a clear association with its caller. A trampoline in the caller branches to the LL/SC atomic with with a direct branch, and the atomic directly branches back into its trampoline.
This breaks backtracing, as any PC within the out-of-line fragment will be symbolized as an offset from the nearest prior symbol (which may not be the function using the atomic), and since the atomic returns with a direct branch, the caller's PC may be missing from the backtrace.
For example, with secondary_start_kernel() hacked to contain atomic_inc(NULL), the resulting exception can be reported as being taken from cpus_are_stuck_in_kernel():
| Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 | Mem abort info: | ESR = 0x0000000096000004 | EC = 0x25: DABT (current EL), IL = 32 bits | SET = 0, FnV = 0 | EA = 0, S1PTW = 0 | FSC = 0x04: level 0 translation fault | Data abort info: | ISV = 0, ISS = 0x00000004 | CM = 0, WnR = 0 | [0000000000000000] user address but active_mm is swapper | Internal error: Oops: 96000004 [#1] PREEMPT SMP | Modules linked in: | CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.19.0-11219-geb555cb5b794-dirty #3 | Hardware name: linux,dummy-virt (DT) | pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : cpus_are_stuck_in_kernel+0xa4/0x120 | lr : secondary_start_kernel+0x164/0x170 | sp : ffff80000a4cbe90 | x29: ffff80000a4cbe90 x28: 0000000000000000 x27: 0000000000000000 | x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000 | x23: 0000000000000000 x22: 0000000000000000 x21: 0000000000000000 | x20: 0000000000000001 x19: 0000000000000001 x18: 0000000000000008 | x17: 3030383832343030 x16: 3030303030307830 x15: ffff80000a4cbab0 | x14: 0000000000000001 x13: 5d31666130663133 x12: 3478305b20313030 | x11: 3030303030303078 x10: 3020726f73736563 x9 : 726f737365636f72 | x8 : ffff800009ff2ef0 x7 : 0000000000000003 x6 : 0000000000000000 | x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000100 | x2 : 0000000000000000 x1 : ffff0000029bd880 x0 : 0000000000000000 | Call trace: | cpus_are_stuck_in_kernel+0xa4/0x120 | __secondary_switched+0xb0/0xb4 | Code: 35ffffa3 17fffc6c d53cd040 f9800011 (885f7c01) | ---[ end trace 0000000000000000 ]---
This is confusing and hinders debugging, and will be problematic for CONFIG_LIVEPATCH as these cases cannot be unwound reliably.
This is very similar to recent issues with out-of-line exception fixups, which were removed in commits:
35d67794b8828333 ("arm64: lib: __arch_clear_user(): fold fixups into body") 4012e0e22739eef9 ("arm64: lib: __arch_copy_from_user(): fold fixups into body") 139f9ab73d60cf76 ("arm64: lib: __arch_copy_to_user(): fold fixups into body")
When the trampolines were introduced in commit:
addfc38672c73efd ("arm64: atomics: avoid out-of-line ll/sc atomics")
The rationale was to improve icache performance by grouping the LL/SC atomics together. This has never been measured, and this theoretical benefit is outweighed by other factors:
* As the subsections are collapsed into sections at object file granularity, these are spread out throughout the kernel and can share cachelines with unrelated code regardless.
* GCC 12.1.0 has been observed to place the trampoline out-of-line in specialised __ll_sc_*() functions, introducing more branching than was intended.
* Removing the trampolines has been observed to shrink a defconfig kernel Image by 64KiB when building with GCC 12.1.0.
This patch removes the LL/SC trampolines, meaning that the LL/SC atomics will be inlined into their callers (or placed in out-of line functions using regular BL/RET pairs). When CONFIG_ARM64_LSE_ATOMICS=y, the LL/SC atomics are always called in an unlikely branch, and will be placed in a cold portion of the function, so this should have minimal impact to the hot paths.
Other than the improved backtracing, there should be no functional change as a result of this patch.
Signed-off-by: Mark Rutland mark.rutland@arm.com Cc: Will Deacon will@kernel.org Link: https://lore.kernel.org/r/20220817155914.3975112-2-mark.rutland@arm.com Signed-off-by: Catalin Marinas catalin.marinas@arm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/include/asm/atomic_ll_sc.h | 40 ++++++--------------------- 1 file changed, 9 insertions(+), 31 deletions(-)
diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index fe0db8d416fb..906e2d8c254c 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -12,19 +12,6 @@
#include <linux/stringify.h>
-#ifdef CONFIG_ARM64_LSE_ATOMICS -#define __LL_SC_FALLBACK(asm_ops) \ -" b 3f\n" \ -" .subsection 1\n" \ -"3:\n" \ -asm_ops "\n" \ -" b 4f\n" \ -" .previous\n" \ -"4:\n" -#else -#define __LL_SC_FALLBACK(asm_ops) asm_ops -#endif - #ifndef CONFIG_CC_HAS_K_CONSTRAINT #define K #endif @@ -43,12 +30,11 @@ __ll_sc_atomic_##op(int i, atomic_t *v) \ int result; \ \ asm volatile("// atomic_" #op "\n" \ - __LL_SC_FALLBACK( \ " prfm pstl1strm, %2\n" \ "1: ldxr %w0, %2\n" \ " " #asm_op " %w0, %w0, %w3\n" \ " stxr %w1, %w0, %2\n" \ - " cbnz %w1, 1b\n") \ + " cbnz %w1, 1b\n" \ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ : __stringify(constraint) "r" (i)); \ } @@ -61,13 +47,12 @@ __ll_sc_atomic_##op##_return##name(int i, atomic_t *v) \ int result; \ \ asm volatile("// atomic_" #op "_return" #name "\n" \ - __LL_SC_FALLBACK( \ " prfm pstl1strm, %2\n" \ "1: ld" #acq "xr %w0, %2\n" \ " " #asm_op " %w0, %w0, %w3\n" \ " st" #rel "xr %w1, %w0, %2\n" \ " cbnz %w1, 1b\n" \ - " " #mb ) \ + " " #mb \ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ : __stringify(constraint) "r" (i) \ : cl); \ @@ -83,13 +68,12 @@ __ll_sc_atomic_fetch_##op##name(int i, atomic_t *v) \ int val, result; \ \ asm volatile("// atomic_fetch_" #op #name "\n" \ - __LL_SC_FALLBACK( \ " prfm pstl1strm, %3\n" \ "1: ld" #acq "xr %w0, %3\n" \ " " #asm_op " %w1, %w0, %w4\n" \ " st" #rel "xr %w2, %w1, %3\n" \ " cbnz %w2, 1b\n" \ - " " #mb ) \ + " " #mb \ : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \ : __stringify(constraint) "r" (i) \ : cl); \ @@ -142,12 +126,11 @@ __ll_sc_atomic64_##op(s64 i, atomic64_t *v) \ unsigned long tmp; \ \ asm volatile("// atomic64_" #op "\n" \ - __LL_SC_FALLBACK( \ " prfm pstl1strm, %2\n" \ "1: ldxr %0, %2\n" \ " " #asm_op " %0, %0, %3\n" \ " stxr %w1, %0, %2\n" \ - " cbnz %w1, 1b") \ + " cbnz %w1, 1b" \ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ : __stringify(constraint) "r" (i)); \ } @@ -160,13 +143,12 @@ __ll_sc_atomic64_##op##_return##name(s64 i, atomic64_t *v) \ unsigned long tmp; \ \ asm volatile("// atomic64_" #op "_return" #name "\n" \ - __LL_SC_FALLBACK( \ " prfm pstl1strm, %2\n" \ "1: ld" #acq "xr %0, %2\n" \ " " #asm_op " %0, %0, %3\n" \ " st" #rel "xr %w1, %0, %2\n" \ " cbnz %w1, 1b\n" \ - " " #mb ) \ + " " #mb \ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ : __stringify(constraint) "r" (i) \ : cl); \ @@ -182,13 +164,12 @@ __ll_sc_atomic64_fetch_##op##name(s64 i, atomic64_t *v) \ unsigned long tmp; \ \ asm volatile("// atomic64_fetch_" #op #name "\n" \ - __LL_SC_FALLBACK( \ " prfm pstl1strm, %3\n" \ "1: ld" #acq "xr %0, %3\n" \ " " #asm_op " %1, %0, %4\n" \ " st" #rel "xr %w2, %1, %3\n" \ " cbnz %w2, 1b\n" \ - " " #mb ) \ + " " #mb \ : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \ : __stringify(constraint) "r" (i) \ : cl); \ @@ -240,7 +221,6 @@ __ll_sc_atomic64_dec_if_positive(atomic64_t *v) unsigned long tmp;
asm volatile("// atomic64_dec_if_positive\n" - __LL_SC_FALLBACK( " prfm pstl1strm, %2\n" "1: ldxr %0, %2\n" " subs %0, %0, #1\n" @@ -248,7 +228,7 @@ __ll_sc_atomic64_dec_if_positive(atomic64_t *v) " stlxr %w1, %0, %2\n" " cbnz %w1, 1b\n" " dmb ish\n" - "2:") + "2:" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) : : "cc", "memory"); @@ -274,7 +254,6 @@ __ll_sc__cmpxchg_case_##name##sz(volatile void *ptr, \ old = (u##sz)old; \ \ asm volatile( \ - __LL_SC_FALLBACK( \ " prfm pstl1strm, %[v]\n" \ "1: ld" #acq "xr" #sfx "\t%" #w "[oldval], %[v]\n" \ " eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \ @@ -282,7 +261,7 @@ __ll_sc__cmpxchg_case_##name##sz(volatile void *ptr, \ " st" #rel "xr" #sfx "\t%w[tmp], %" #w "[new], %[v]\n" \ " cbnz %w[tmp], 1b\n" \ " " #mb "\n" \ - "2:") \ + "2:" \ : [tmp] "=&r" (tmp), [oldval] "=&r" (oldval), \ [v] "+Q" (*(u##sz *)ptr) \ : [old] __stringify(constraint) "r" (old), [new] "r" (new) \ @@ -326,7 +305,6 @@ __ll_sc__cmpxchg_double##name(unsigned long old1, \ unsigned long tmp, ret; \ \ asm volatile("// __cmpxchg_double" #name "\n" \ - __LL_SC_FALLBACK( \ " prfm pstl1strm, %2\n" \ "1: ldxp %0, %1, %2\n" \ " eor %0, %0, %3\n" \ @@ -336,7 +314,7 @@ __ll_sc__cmpxchg_double##name(unsigned long old1, \ " st" #rel "xp %w0, %5, %6, %2\n" \ " cbnz %w0, 1b\n" \ " " #mb "\n" \ - "2:") \ + "2:" \ : "=&r" (tmp), "=&r" (ret), "+Q" (*(unsigned long *)ptr) \ : "r" (old1), "r" (old2), "r" (new1), "r" (new2) \ : cl); \
On Tue, Oct 11, 2022 at 10:49:45AM -0400, Sasha Levin wrote:
From: Mark Rutland mark.rutland@arm.com
[ Upstream commit b2c3ccbd0011bb3b51d0fec24cb3a5812b1ec8ea ]
When CONFIG_ARM64_LSE_ATOMICS=y, each use of an LL/SC atomic results in a fragment of code being generated in a subsection without a clear association with its caller. A trampoline in the caller branches to the LL/SC atomic with with a direct branch, and the atomic directly branches back into its trampoline.
This breaks backtracing, as any PC within the out-of-line fragment will be symbolized as an offset from the nearest prior symbol (which may not be the function using the atomic), and since the atomic returns with a direct branch, the caller's PC may be missing from the backtrace.
For example, with secondary_start_kernel() hacked to contain atomic_inc(NULL), the resulting exception can be reported as being taken from cpus_are_stuck_in_kernel():
| Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 | Mem abort info: | ESR = 0x0000000096000004 | EC = 0x25: DABT (current EL), IL = 32 bits | SET = 0, FnV = 0 | EA = 0, S1PTW = 0 | FSC = 0x04: level 0 translation fault | Data abort info: | ISV = 0, ISS = 0x00000004 | CM = 0, WnR = 0 | [0000000000000000] user address but active_mm is swapper | Internal error: Oops: 96000004 [#1] PREEMPT SMP | Modules linked in: | CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.19.0-11219-geb555cb5b794-dirty #3 | Hardware name: linux,dummy-virt (DT) | pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : cpus_are_stuck_in_kernel+0xa4/0x120 | lr : secondary_start_kernel+0x164/0x170 | sp : ffff80000a4cbe90 | x29: ffff80000a4cbe90 x28: 0000000000000000 x27: 0000000000000000 | x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000 | x23: 0000000000000000 x22: 0000000000000000 x21: 0000000000000000 | x20: 0000000000000001 x19: 0000000000000001 x18: 0000000000000008 | x17: 3030383832343030 x16: 3030303030307830 x15: ffff80000a4cbab0 | x14: 0000000000000001 x13: 5d31666130663133 x12: 3478305b20313030 | x11: 3030303030303078 x10: 3020726f73736563 x9 : 726f737365636f72 | x8 : ffff800009ff2ef0 x7 : 0000000000000003 x6 : 0000000000000000 | x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000100 | x2 : 0000000000000000 x1 : ffff0000029bd880 x0 : 0000000000000000 | Call trace: | cpus_are_stuck_in_kernel+0xa4/0x120 | __secondary_switched+0xb0/0xb4 | Code: 35ffffa3 17fffc6c d53cd040 f9800011 (885f7c01) | ---[ end trace 0000000000000000 ]---
This is confusing and hinders debugging, and will be problematic for CONFIG_LIVEPATCH as these cases cannot be unwound reliably.
This is very similar to recent issues with out-of-line exception fixups, which were removed in commits:
35d67794b8828333 ("arm64: lib: __arch_clear_user(): fold fixups into body") 4012e0e22739eef9 ("arm64: lib: __arch_copy_from_user(): fold fixups into body") 139f9ab73d60cf76 ("arm64: lib: __arch_copy_to_user(): fold fixups into body")
When the trampolines were introduced in commit:
addfc38672c73efd ("arm64: atomics: avoid out-of-line ll/sc atomics")
The rationale was to improve icache performance by grouping the LL/SC atomics together. This has never been measured, and this theoretical benefit is outweighed by other factors:
As the subsections are collapsed into sections at object file granularity, these are spread out throughout the kernel and can share cachelines with unrelated code regardless.
GCC 12.1.0 has been observed to place the trampoline out-of-line in specialised __ll_sc_*() functions, introducing more branching than was intended.
Removing the trampolines has been observed to shrink a defconfig kernel Image by 64KiB when building with GCC 12.1.0.
This patch removes the LL/SC trampolines, meaning that the LL/SC atomics will be inlined into their callers (or placed in out-of line functions using regular BL/RET pairs). When CONFIG_ARM64_LSE_ATOMICS=y, the LL/SC atomics are always called in an unlikely branch, and will be placed in a cold portion of the function, so this should have minimal impact to the hot paths.
Other than the improved backtracing, there should be no functional change as a result of this patch.
Signed-off-by: Mark Rutland mark.rutland@arm.com Cc: Will Deacon will@kernel.org Link: https://lore.kernel.org/r/20220817155914.3975112-2-mark.rutland@arm.com Signed-off-by: Catalin Marinas catalin.marinas@arm.com Signed-off-by: Sasha Levin sashal@kernel.org
Please also drop this here. Thanks.
From: Qi Zheng zhengqi.arch@bytedance.com
[ Upstream commit 8eb858c44b98e0326bb32fca34ae671995cd73bb ]
Currently arm64 supports per-CPU IRQ stack, but softirqs are still handled in the task context.
Since any call to local_bh_enable() at any level in the task's call stack may trigger a softirq processing run, which could potentially cause a task stack overflow if the combined stack footprints exceed the stack's size, let's run these softirqs on the IRQ stack as well.
Signed-off-by: Qi Zheng zhengqi.arch@bytedance.com Reviewed-by: Arnd Bergmann arnd@arndb.de Acked-by: Will Deacon will@kernel.org Link: https://lore.kernel.org/r/20220815124739.15948-1-zhengqi.arch@bytedance.com Signed-off-by: Catalin Marinas catalin.marinas@arm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/Kconfig | 1 + arch/arm64/kernel/irq.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1ce7685ad5de..68ee7146bdb9 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -230,6 +230,7 @@ config ARM64 select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD select TRACE_IRQFLAGS_SUPPORT select TRACE_IRQFLAGS_NMI_SUPPORT + select HAVE_SOFTIRQ_ON_OWN_STACK help ARM 64-bit (AArch64) Linux support.
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index bda49430c9ea..38dbd3828f13 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -21,7 +21,9 @@ #include <linux/seq_file.h> #include <linux/vmalloc.h> #include <asm/daifflags.h> +#include <asm/exception.h> #include <asm/vmap_stack.h> +#include <asm/softirq_stack.h>
/* Only access this in an NMI enter/exit */ DEFINE_PER_CPU(struct nmi_ctx, nmi_contexts); @@ -71,6 +73,18 @@ static void init_irq_stacks(void) } #endif
+#ifndef CONFIG_PREEMPT_RT +static void ____do_softirq(struct pt_regs *regs) +{ + __do_softirq(); +} + +void do_softirq_own_stack(void) +{ + call_on_irq_stack(NULL, ____do_softirq); +} +#endif + static void default_handle_irq(struct pt_regs *regs) { panic("IRQ taken without a root IRQ handler\n");
From: Frieder Schrempf frieder.schrempf@kontron.de
[ Upstream commit eef2c0217e02b6c7ed5b10b82ea944127145e113 ]
It turns out that it is not necessary to declare the VSELECT signal as GPIO and let the PMIC driver set it to a fixed high level. This switches the voltage between 3.3V and 1.8V by setting the PMIC register for LDO5 accordingly.
Instead we can do it like other boards already do and simply mux the VSELECT signal of the USDHC interface to the pin. This makes sure that the correct voltage is selected by setting the PMIC's SD_VSEL input to high or low accordingly.
Reported-by: Heiko Thiery heiko.thiery@gmail.com Signed-off-by: Frieder Schrempf frieder.schrempf@kontron.de Reviewed-by: Heiko Thiery heiko.thiery@gmail.com Signed-off-by: Shawn Guo shawnguo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts | 3 +++ arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts index 23be1ec538ba..c54536c0a2ba 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts @@ -321,6 +321,7 @@ MX8MM_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x1d0 MX8MM_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x1d0 MX8MM_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x1d0 MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x019 + MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x1d0 >; };
@@ -333,6 +334,7 @@ MX8MM_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x1d4 MX8MM_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x1d4 MX8MM_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x1d4 MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x019 + MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x1d0 >; };
@@ -345,6 +347,7 @@ MX8MM_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x1d6 MX8MM_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x1d6 MX8MM_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x1d6 MX8MM_IOMUXC_SD2_CD_B_GPIO2_IO12 0x019 + MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x1d0 >; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi index 8f90eb02550d..6307af803429 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi @@ -86,7 +86,6 @@ pca9450: pmic@25 { pinctrl-0 = <&pinctrl_pmic>; interrupt-parent = <&gpio1>; interrupts = <0 IRQ_TYPE_LEVEL_LOW>; - sd-vsel-gpios = <&gpio1 4 GPIO_ACTIVE_HIGH>;
regulators { reg_vdd_soc: BUCK1 { @@ -229,7 +228,6 @@ MX8MM_IOMUXC_I2C1_SDA_I2C1_SDA 0x400001c3 pinctrl_pmic: pmicgrp { fsl,pins = < MX8MM_IOMUXC_GPIO1_IO00_GPIO1_IO0 0x141 - MX8MM_IOMUXC_GPIO1_IO04_GPIO1_IO4 0x141 >; };
From: Marcel Ziswiler marcel.ziswiler@toradex.com
[ Upstream commit 7db9905d48e1b9a97a28224c5a201262ebce7489 ]
This fixes the following error:
arch/arm64/boot/dts/freescale/imx8ulp-pinfunc.h: error: do not set execute permissions for source files
Signed-off-by: Marcel Ziswiler marcel.ziswiler@toradex.com Acked-by: Peng Fan peng.fan@nxp.com Signed-off-by: Shawn Guo shawnguo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/boot/dts/freescale/imx8ulp-pinfunc.h | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 arch/arm64/boot/dts/freescale/imx8ulp-pinfunc.h
diff --git a/arch/arm64/boot/dts/freescale/imx8ulp-pinfunc.h b/arch/arm64/boot/dts/freescale/imx8ulp-pinfunc.h old mode 100755 new mode 100644
From: Sebastian Krzyszkowiak sebastian.krzyszkowiak@puri.sm
[ Upstream commit 6effe295e1a87408033c29dbcea9d5a5c8b937d5 ]
This allows the userspace to notice that there's not enough current provided to charge the battery, and also fixes issues with 0% SOC values being considered invalid.
Signed-off-by: Sebastian Krzyszkowiak sebastian.krzyszkowiak@puri.sm Signed-off-by: Martin Kepplinger martin.kepplinger@puri.sm Signed-off-by: Shawn Guo shawnguo@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi | 1 + 1 file changed, 1 insertion(+)
diff --git a/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi b/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi index 9eec8a7eecfc..127fc7f904c8 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi @@ -1077,6 +1077,7 @@ bat: fuel-gauge@36 { interrupts = <20 IRQ_TYPE_LEVEL_LOW>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gauge>; + power-supplies = <&bq25895>; maxim,over-heat-temp = <700>; maxim,over-volt = <4500>; maxim,rsns-microohm = <5000>;
From: Arnd Bergmann arnd@arndb.de
[ Upstream commit 63872304bdb3decd5454f4dd210c25395278ed13 ]
Now that CONFIG_ARCH_MULTIPLATFORM can be disabled anywhere, there is a build failure for plat-orion:
arch/arm/plat-orion/irq.c:19:10: fatal error: plat/irq.h: No such file or directory
Make the include path unconditional.
Reported-by: kernel test robot lkp@intel.com Signed-off-by: Arnd Bergmann arnd@arndb.de Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/plat-orion/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm/plat-orion/Makefile b/arch/arm/plat-orion/Makefile index 4e3f25de13c1..830b0be038c6 100644 --- a/arch/arm/plat-orion/Makefile +++ b/arch/arm/plat-orion/Makefile @@ -2,7 +2,7 @@ # # Makefile for the linux kernel. # -ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include +ccflags-y := -I$(srctree)/$(src)/include
orion-gpio-$(CONFIG_GPIOLIB) += gpio.o obj-$(CONFIG_PLAT_ORION_LEGACY) += irq.o pcie.o time.o common.o mpp.o
From: Qu Wenruo wqu@suse.com
[ Upstream commit 62cd9d4474282a1eb84f945955c56cbfc42e1ffe ]
There is an internal report on hitting the following ASSERT() in recalculate_thresholds():
ASSERT(ctl->total_bitmaps <= max_bitmaps);
Above @max_bitmaps is calculated using the following variables:
- bytes_per_bg 8 * 4096 * 4096 (128M) for x86_64/x86.
- block_group->length The length of the block group.
@max_bitmaps is the rounded up value of block_group->length / 128M.
Normally one free space cache should not have more bitmaps than above value, but when it happens the ASSERT() can be triggered if CONFIG_BTRFS_ASSERT is also enabled.
But the ASSERT() itself won't provide enough info to know which is going wrong. Is the bg too small thus it only allows one bitmap? Or is there something else wrong?
So although I haven't found extra reports or crash dump to do further investigation, add the extra info to make it more helpful to debug.
Reviewed-by: Anand Jain anand.jain@oracle.com Signed-off-by: Qu Wenruo wqu@suse.com Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/free-space-cache.c | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 996da650ecdc..85404c62a1c2 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -693,6 +693,12 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
max_bitmaps = max_t(u64, max_bitmaps, 1);
+ if (ctl->total_bitmaps > max_bitmaps) + btrfs_err(block_group->fs_info, +"invalid free space control: bg start=%llu len=%llu total_bitmaps=%u unit=%u max_bitmaps=%llu bytes_per_bg=%llu", + block_group->start, block_group->length, + ctl->total_bitmaps, ctl->unit, max_bitmaps, + bytes_per_bg); ASSERT(ctl->total_bitmaps <= max_bitmaps);
/*
From: Ioannis Angelakopoulos iangelak@fb.com
[ Upstream commit ab9a323f9ab576000795285dd7ac6afeedf29e32 ]
Introduce four macros that are used to annotate wait events in btrfs code with lockdep;
1) the btrfs_lockdep_init_map 2) the btrfs_lockdep_acquire, 3) the btrfs_lockdep_release 4) the btrfs_might_wait_for_event macros.
The btrfs_lockdep_init_map macro is used to initialize a lockdep map.
The btrfs_lockdep_<acquire,release> macros are used by threads to take the lockdep map as readers (shared lock) and release it, respectively.
The btrfs_might_wait_for_event macro is used by threads to take the lockdep map as writers (exclusive lock) and release it.
In general, the lockdep annotation for wait events work as follows:
The condition for a wait event can be modified and signaled at the same time by multiple threads. These threads hold the lockdep map as readers when they enter a context in which blocking would prevent signaling the condition. Frequently, this occurs when a thread violates a condition (lockdep map acquire), before restoring it and signaling it at a later point (lockdep map release).
The threads that block on the wait event take the lockdep map as writers (exclusive lock). These threads have to block until all the threads that hold the lockdep map as readers signal the condition for the wait event and release the lockdep map.
The lockdep annotation is used to warn about potential deadlock scenarios that involve the threads that modify and signal the wait event condition and threads that block on the wait event. A simple example is illustrated below:
Without lockdep:
TA TB cond = false lock(A) wait_event(w, cond) unlock(A) lock(A) cond = true signal(w) unlock(A)
With lockdep:
TA TB rwsem_acquire_read(lockdep_map) cond = false lock(A) rwsem_acquire(lockdep_map) rwsem_release(lockdep_map) wait_event(w, cond) unlock(A) lock(A) cond = true signal(w) unlock(A) rwsem_release(lockdep_map)
In the second case, with the lockdep annotation, lockdep would warn about an ABBA deadlock, while the first case would just deadlock at some point.
Reviewed-by: Josef Bacik josef@toxicpanda.com Signed-off-by: Ioannis Angelakopoulos iangelak@fb.com Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/ctree.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index df8c99c99df9..dfeb7174219e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1174,6 +1174,51 @@ enum { BTRFS_ROOT_RESET_LOCKDEP_CLASS, };
+/* + * Lockdep annotation for wait events. + * + * @owner: The struct where the lockdep map is defined + * @lock: The lockdep map corresponding to a wait event + * + * This macro is used to annotate a wait event. In this case a thread acquires + * the lockdep map as writer (exclusive lock) because it has to block until all + * the threads that hold the lock as readers signal the condition for the wait + * event and release their locks. + */ +#define btrfs_might_wait_for_event(owner, lock) \ + do { \ + rwsem_acquire(&owner->lock##_map, 0, 0, _THIS_IP_); \ + rwsem_release(&owner->lock##_map, _THIS_IP_); \ + } while (0) + +/* + * Protection for the resource/condition of a wait event. + * + * @owner: The struct where the lockdep map is defined + * @lock: The lockdep map corresponding to a wait event + * + * Many threads can modify the condition for the wait event at the same time + * and signal the threads that block on the wait event. The threads that modify + * the condition and do the signaling acquire the lock as readers (shared + * lock). + */ +#define btrfs_lockdep_acquire(owner, lock) \ + rwsem_acquire_read(&owner->lock##_map, 0, 0, _THIS_IP_) + +/* + * Used after signaling the condition for a wait event to release the lockdep + * map held by a reader thread. + */ +#define btrfs_lockdep_release(owner, lock) \ + rwsem_release(&owner->lock##_map, _THIS_IP_) + +/* Initialization of the lockdep map */ +#define btrfs_lockdep_init_map(owner, lock) \ + do { \ + static struct lock_class_key lock##_key; \ + lockdep_init_map(&owner->lock##_map, #lock, &lock##_key, 0); \ + } while (0) + static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info) { clear_and_wake_up_bit(BTRFS_FS_UNFINISHED_DROPS, &fs_info->flags);
From: Ioannis Angelakopoulos iangelak@fb.com
[ Upstream commit e1489b4fe6045a79a5e9c658eed65311977e230a ]
Annotate the num_writers wait event in fs/btrfs/transaction.c with lockdep in order to catch deadlocks involving this wait event.
Reviewed-by: Josef Bacik josef@toxicpanda.com Signed-off-by: Ioannis Angelakopoulos iangelak@fb.com Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/ctree.h | 6 ++++++ fs/btrfs/disk-io.c | 2 ++ fs/btrfs/transaction.c | 38 +++++++++++++++++++++++++++++++++----- 3 files changed, 41 insertions(+), 5 deletions(-)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index dfeb7174219e..707e644bab92 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1092,6 +1092,12 @@ struct btrfs_fs_info { /* Updates are not protected by any lock */ struct btrfs_commit_stats commit_stats;
+ /* + * Annotations for transaction events (structures are empty when + * compiled without lockdep). + */ + struct lockdep_map btrfs_trans_num_writers_map; + #ifdef CONFIG_BTRFS_FS_REF_VERIFY spinlock_t ref_verify_lock; struct rb_root block_tree; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2633137c3e9f..a04b32f7df9d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2990,6 +2990,8 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) mutex_init(&fs_info->zoned_data_reloc_io_lock); seqlock_init(&fs_info->profiles_lock);
+ btrfs_lockdep_init_map(fs_info, btrfs_trans_num_writers); + INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); INIT_LIST_HEAD(&fs_info->space_info); INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 0bec10740ad3..b3cb54d852f8 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -313,6 +313,7 @@ static noinline int join_transaction(struct btrfs_fs_info *fs_info, atomic_inc(&cur_trans->num_writers); extwriter_counter_inc(cur_trans, type); spin_unlock(&fs_info->trans_lock); + btrfs_lockdep_acquire(fs_info, btrfs_trans_num_writers); return 0; } spin_unlock(&fs_info->trans_lock); @@ -334,16 +335,20 @@ static noinline int join_transaction(struct btrfs_fs_info *fs_info, if (!cur_trans) return -ENOMEM;
+ btrfs_lockdep_acquire(fs_info, btrfs_trans_num_writers); + spin_lock(&fs_info->trans_lock); if (fs_info->running_transaction) { /* * someone started a transaction after we unlocked. Make sure * to redo the checks above */ + btrfs_lockdep_release(fs_info, btrfs_trans_num_writers); kfree(cur_trans); goto loop; } else if (BTRFS_FS_ERROR(fs_info)) { spin_unlock(&fs_info->trans_lock); + btrfs_lockdep_release(fs_info, btrfs_trans_num_writers); kfree(cur_trans); return -EROFS; } @@ -1022,6 +1027,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, extwriter_counter_dec(cur_trans, trans->type);
cond_wake_up(&cur_trans->writer_wait); + + btrfs_lockdep_release(info, btrfs_trans_num_writers); + btrfs_put_transaction(cur_trans);
if (current->journal_info == trans) @@ -1994,6 +2002,12 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, int err) if (cur_trans == fs_info->running_transaction) { cur_trans->state = TRANS_STATE_COMMIT_DOING; spin_unlock(&fs_info->trans_lock); + + /* + * The thread has already released the lockdep map as reader + * already in btrfs_commit_transaction(). + */ + btrfs_might_wait_for_event(fs_info, btrfs_trans_num_writers); wait_event(cur_trans->writer_wait, atomic_read(&cur_trans->num_writers) == 1);
@@ -2222,7 +2236,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
btrfs_put_transaction(prev_trans); if (ret) - goto cleanup_transaction; + goto lockdep_release; } else { spin_unlock(&fs_info->trans_lock); } @@ -2236,7 +2250,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) */ if (BTRFS_FS_ERROR(fs_info)) { ret = -EROFS; - goto cleanup_transaction; + goto lockdep_release; } }
@@ -2250,19 +2264,21 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
ret = btrfs_start_delalloc_flush(fs_info); if (ret) - goto cleanup_transaction; + goto lockdep_release;
ret = btrfs_run_delayed_items(trans); if (ret) - goto cleanup_transaction; + goto lockdep_release;
wait_event(cur_trans->writer_wait, extwriter_counter_read(cur_trans) == 0);
/* some pending stuffs might be added after the previous flush. */ ret = btrfs_run_delayed_items(trans); - if (ret) + if (ret) { + btrfs_lockdep_release(fs_info, btrfs_trans_num_writers); goto cleanup_transaction; + }
btrfs_wait_delalloc_flush(fs_info);
@@ -2284,6 +2300,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) add_pending_snapshot(trans); cur_trans->state = TRANS_STATE_COMMIT_DOING; spin_unlock(&fs_info->trans_lock); + + /* + * The thread has started/joined the transaction thus it holds the + * lockdep map as a reader. It has to release it before acquiring the + * lockdep map as a writer. + */ + btrfs_lockdep_release(fs_info, btrfs_trans_num_writers); + btrfs_might_wait_for_event(fs_info, btrfs_trans_num_writers); wait_event(cur_trans->writer_wait, atomic_read(&cur_trans->num_writers) == 1);
@@ -2515,6 +2539,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) cleanup_transaction(trans, ret);
return ret; + +lockdep_release: + btrfs_lockdep_release(fs_info, btrfs_trans_num_writers); + goto cleanup_transaction; }
/*
From: Ioannis Angelakopoulos iangelak@fb.com
[ Upstream commit 5a9ba6709f13313984900d635b4c73c9eb7d644e ]
Similarly to the num_writers wait event in fs/btrfs/transaction.c add a lockdep annotation for the num_extwriters wait event.
Reviewed-by: Josef Bacik josef@toxicpanda.com Signed-off-by: Ioannis Angelakopoulos iangelak@fb.com Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/transaction.c | 13 +++++++++++++ 3 files changed, 15 insertions(+)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 707e644bab92..e886cf639c0f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1097,6 +1097,7 @@ struct btrfs_fs_info { * compiled without lockdep). */ struct lockdep_map btrfs_trans_num_writers_map; + struct lockdep_map btrfs_trans_num_extwriters_map;
#ifdef CONFIG_BTRFS_FS_REF_VERIFY spinlock_t ref_verify_lock; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a04b32f7df9d..811d743e26e6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2991,6 +2991,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) seqlock_init(&fs_info->profiles_lock);
btrfs_lockdep_init_map(fs_info, btrfs_trans_num_writers); + btrfs_lockdep_init_map(fs_info, btrfs_trans_num_extwriters);
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); INIT_LIST_HEAD(&fs_info->space_info); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b3cb54d852f8..44e47db4c8e8 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -314,6 +314,7 @@ static noinline int join_transaction(struct btrfs_fs_info *fs_info, extwriter_counter_inc(cur_trans, type); spin_unlock(&fs_info->trans_lock); btrfs_lockdep_acquire(fs_info, btrfs_trans_num_writers); + btrfs_lockdep_acquire(fs_info, btrfs_trans_num_extwriters); return 0; } spin_unlock(&fs_info->trans_lock); @@ -336,6 +337,7 @@ static noinline int join_transaction(struct btrfs_fs_info *fs_info, return -ENOMEM;
btrfs_lockdep_acquire(fs_info, btrfs_trans_num_writers); + btrfs_lockdep_acquire(fs_info, btrfs_trans_num_extwriters);
spin_lock(&fs_info->trans_lock); if (fs_info->running_transaction) { @@ -343,11 +345,13 @@ static noinline int join_transaction(struct btrfs_fs_info *fs_info, * someone started a transaction after we unlocked. Make sure * to redo the checks above */ + btrfs_lockdep_release(fs_info, btrfs_trans_num_extwriters); btrfs_lockdep_release(fs_info, btrfs_trans_num_writers); kfree(cur_trans); goto loop; } else if (BTRFS_FS_ERROR(fs_info)) { spin_unlock(&fs_info->trans_lock); + btrfs_lockdep_release(fs_info, btrfs_trans_num_extwriters); btrfs_lockdep_release(fs_info, btrfs_trans_num_writers); kfree(cur_trans); return -EROFS; @@ -1028,6 +1032,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
cond_wake_up(&cur_trans->writer_wait);
+ btrfs_lockdep_release(info, btrfs_trans_num_extwriters); btrfs_lockdep_release(info, btrfs_trans_num_writers);
btrfs_put_transaction(cur_trans); @@ -2270,6 +2275,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) if (ret) goto lockdep_release;
+ /* + * The thread has started/joined the transaction thus it holds the + * lockdep map as a reader. It has to release it before acquiring the + * lockdep map as a writer. + */ + btrfs_lockdep_release(fs_info, btrfs_trans_num_extwriters); + btrfs_might_wait_for_event(fs_info, btrfs_trans_num_extwriters); wait_event(cur_trans->writer_wait, extwriter_counter_read(cur_trans) == 0);
@@ -2541,6 +2553,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) return ret;
lockdep_release: + btrfs_lockdep_release(fs_info, btrfs_trans_num_extwriters); btrfs_lockdep_release(fs_info, btrfs_trans_num_writers); goto cleanup_transaction; }
From: Ioannis Angelakopoulos iangelak@fb.com
[ Upstream commit 3e738c531aad8caa7f3d20ab878a8a0d3574e730 ]
Add lockdep annotations for the transaction states that have wait events;
1) TRANS_STATE_COMMIT_START 2) TRANS_STATE_UNBLOCKED 3) TRANS_STATE_SUPER_COMMITTED 4) TRANS_STATE_COMPLETED
The new macros introduced here to annotate the transaction states wait events have the same effect as the generic lockdep annotation macros.
With the exception of the lockdep annotation for TRANS_STATE_COMMIT_START the transaction thread has to acquire the lockdep maps for the transaction states as reader after the lockdep map for num_writers is released so that lockdep does not complain.
Reviewed-by: Josef Bacik josef@toxicpanda.com Signed-off-by: Ioannis Angelakopoulos iangelak@fb.com Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/ctree.h | 32 +++++++++++++++++++++++++ fs/btrfs/disk-io.c | 8 +++++++ fs/btrfs/transaction.c | 53 ++++++++++++++++++++++++++++++++++-------- 3 files changed, 83 insertions(+), 10 deletions(-)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e886cf639c0f..f8172e269f03 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1098,6 +1098,7 @@ struct btrfs_fs_info { */ struct lockdep_map btrfs_trans_num_writers_map; struct lockdep_map btrfs_trans_num_extwriters_map; + struct lockdep_map btrfs_state_change_map[4];
#ifdef CONFIG_BTRFS_FS_REF_VERIFY spinlock_t ref_verify_lock; @@ -1181,6 +1182,13 @@ enum { BTRFS_ROOT_RESET_LOCKDEP_CLASS, };
+enum btrfs_lockdep_trans_states { + BTRFS_LOCKDEP_TRANS_COMMIT_START, + BTRFS_LOCKDEP_TRANS_UNBLOCKED, + BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED, + BTRFS_LOCKDEP_TRANS_COMPLETED, +}; + /* * Lockdep annotation for wait events. * @@ -1219,6 +1227,22 @@ enum { #define btrfs_lockdep_release(owner, lock) \ rwsem_release(&owner->lock##_map, _THIS_IP_)
+/* + * Macros for the transaction states wait events, similar to the generic wait + * event macros. + */ +#define btrfs_might_wait_for_state(owner, i) \ + do { \ + rwsem_acquire(&owner->btrfs_state_change_map[i], 0, 0, _THIS_IP_); \ + rwsem_release(&owner->btrfs_state_change_map[i], _THIS_IP_); \ + } while (0) + +#define btrfs_trans_state_lockdep_acquire(owner, i) \ + rwsem_acquire_read(&owner->btrfs_state_change_map[i], 0, 0, _THIS_IP_) + +#define btrfs_trans_state_lockdep_release(owner, i) \ + rwsem_release(&owner->btrfs_state_change_map[i], _THIS_IP_) + /* Initialization of the lockdep map */ #define btrfs_lockdep_init_map(owner, lock) \ do { \ @@ -1226,6 +1250,14 @@ enum { lockdep_init_map(&owner->lock##_map, #lock, &lock##_key, 0); \ } while (0)
+/* Initialization of the transaction states lockdep maps. */ +#define btrfs_state_lockdep_init_map(owner, lock, state) \ + do { \ + static struct lock_class_key lock##_key; \ + lockdep_init_map(&owner->btrfs_state_change_map[state], #lock, \ + &lock##_key, 0); \ + } while (0) + static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info) { clear_and_wake_up_bit(BTRFS_FS_UNFINISHED_DROPS, &fs_info->flags); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 811d743e26e6..68c6cb4e9283 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2992,6 +2992,14 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
btrfs_lockdep_init_map(fs_info, btrfs_trans_num_writers); btrfs_lockdep_init_map(fs_info, btrfs_trans_num_extwriters); + btrfs_state_lockdep_init_map(fs_info, btrfs_trans_commit_start, + BTRFS_LOCKDEP_TRANS_COMMIT_START); + btrfs_state_lockdep_init_map(fs_info, btrfs_trans_unblocked, + BTRFS_LOCKDEP_TRANS_UNBLOCKED); + btrfs_state_lockdep_init_map(fs_info, btrfs_trans_super_committed, + BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED); + btrfs_state_lockdep_init_map(fs_info, btrfs_trans_completed, + BTRFS_LOCKDEP_TRANS_COMPLETED);
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); INIT_LIST_HEAD(&fs_info->space_info); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 44e47db4c8e8..d3576f84020d 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -550,6 +550,7 @@ static void wait_current_trans(struct btrfs_fs_info *fs_info) refcount_inc(&cur_trans->use_count); spin_unlock(&fs_info->trans_lock);
+ btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED); wait_event(fs_info->transaction_wait, cur_trans->state >= TRANS_STATE_UNBLOCKED || TRANS_ABORTED(cur_trans)); @@ -868,6 +869,15 @@ static noinline void wait_for_commit(struct btrfs_transaction *commit, u64 transid = commit->transid; bool put = false;
+ /* + * At the moment this function is called with min_state either being + * TRANS_STATE_COMPLETED or TRANS_STATE_SUPER_COMMITTED. + */ + if (min_state == TRANS_STATE_COMPLETED) + btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_COMPLETED); + else + btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED); + while (1) { wait_event(commit->commit_wait, commit->state >= min_state); if (put) @@ -1980,6 +1990,7 @@ void btrfs_commit_transaction_async(struct btrfs_trans_handle *trans) * Wait for the current transaction commit to start and block * subsequent transaction joins */ + btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START); wait_event(fs_info->transaction_blocked_wait, cur_trans->state >= TRANS_STATE_COMMIT_START || TRANS_ABORTED(cur_trans)); @@ -2137,12 +2148,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) ktime_t interval;
ASSERT(refcount_read(&trans->use_count) == 1); + btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
/* Stop the commit early if ->aborted is set */ if (TRANS_ABORTED(cur_trans)) { ret = cur_trans->aborted; - btrfs_end_transaction(trans); - return ret; + goto lockdep_trans_commit_start_release; }
btrfs_trans_release_metadata(trans); @@ -2159,10 +2170,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) * Any running threads may add more while we are here. */ ret = btrfs_run_delayed_refs(trans, 0); - if (ret) { - btrfs_end_transaction(trans); - return ret; - } + if (ret) + goto lockdep_trans_commit_start_release; }
btrfs_create_pending_block_groups(trans); @@ -2191,10 +2200,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
if (run_it) { ret = btrfs_start_dirty_block_groups(trans); - if (ret) { - btrfs_end_transaction(trans); - return ret; - } + if (ret) + goto lockdep_trans_commit_start_release; } }
@@ -2209,6 +2216,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
if (trans->in_fsync) want_state = TRANS_STATE_SUPER_COMMITTED; + + btrfs_trans_state_lockdep_release(fs_info, + BTRFS_LOCKDEP_TRANS_COMMIT_START); ret = btrfs_end_transaction(trans); wait_for_commit(cur_trans, want_state);
@@ -2222,6 +2232,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
cur_trans->state = TRANS_STATE_COMMIT_START; wake_up(&fs_info->transaction_blocked_wait); + btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
if (cur_trans->list.prev != &fs_info->trans_list) { enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED; @@ -2323,6 +2334,16 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) wait_event(cur_trans->writer_wait, atomic_read(&cur_trans->num_writers) == 1);
+ /* + * Make lockdep happy by acquiring the state locks after + * btrfs_trans_num_writers is released. If we acquired the state locks + * before releasing the btrfs_trans_num_writers lock then lockdep would + * complain because we did not follow the reverse order unlocking rule. + */ + btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMPLETED); + btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED); + btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED); + /* * We've started the commit, clear the flag in case we were triggered to * do an async commit but somebody else started before the transaction @@ -2332,6 +2353,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
if (TRANS_ABORTED(cur_trans)) { ret = cur_trans->aborted; + btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED); goto scrub_continue; } /* @@ -2466,6 +2488,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) mutex_unlock(&fs_info->reloc_mutex);
wake_up(&fs_info->transaction_wait); + btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED);
ret = btrfs_write_and_wait_transaction(trans); if (ret) { @@ -2497,6 +2520,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) */ cur_trans->state = TRANS_STATE_SUPER_COMMITTED; wake_up(&cur_trans->commit_wait); + btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED);
btrfs_finish_extent_commit(trans);
@@ -2510,6 +2534,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) */ cur_trans->state = TRANS_STATE_COMPLETED; wake_up(&cur_trans->commit_wait); + btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMPLETED);
spin_lock(&fs_info->trans_lock); list_del_init(&cur_trans->list); @@ -2538,7 +2563,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
unlock_reloc: mutex_unlock(&fs_info->reloc_mutex); + btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED); scrub_continue: + btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED); + btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMPLETED); btrfs_scrub_continue(fs_info); cleanup_transaction: btrfs_trans_release_metadata(trans); @@ -2556,6 +2584,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) btrfs_lockdep_release(fs_info, btrfs_trans_num_extwriters); btrfs_lockdep_release(fs_info, btrfs_trans_num_writers); goto cleanup_transaction; + +lockdep_trans_commit_start_release: + btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START); + btrfs_end_transaction(trans); + return ret; }
/*
From: Ioannis Angelakopoulos iangelak@fb.com
[ Upstream commit 8b53779eaa98b55f4cccadd4d12b3233e9633140 ]
In contrast to the num_writers and num_extwriters wait events, the condition for the pending ordered wait event is signaled in a different context from the wait event itself. The condition signaling occurs in btrfs_remove_ordered_extent() in fs/btrfs/ordered-data.c while the wait event is implemented in btrfs_commit_transaction() in fs/btrfs/transaction.c
Thus the thread signaling the condition has to acquire the lockdep map as a reader at the start of btrfs_remove_ordered_extent() and release it after it has signaled the condition. In this case some dependencies might be left out due to the placement of the annotation, but it is better than no annotation at all.
Reviewed-by: Josef Bacik josef@toxicpanda.com Signed-off-by: Ioannis Angelakopoulos iangelak@fb.com Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/ordered-data.c | 3 +++ fs/btrfs/transaction.c | 1 + 4 files changed, 6 insertions(+)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f8172e269f03..8bd9a6d5ade6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1099,6 +1099,7 @@ struct btrfs_fs_info { struct lockdep_map btrfs_trans_num_writers_map; struct lockdep_map btrfs_trans_num_extwriters_map; struct lockdep_map btrfs_state_change_map[4]; + struct lockdep_map btrfs_trans_pending_ordered_map;
#ifdef CONFIG_BTRFS_FS_REF_VERIFY spinlock_t ref_verify_lock; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 68c6cb4e9283..393553fdfed6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2992,6 +2992,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
btrfs_lockdep_init_map(fs_info, btrfs_trans_num_writers); btrfs_lockdep_init_map(fs_info, btrfs_trans_num_extwriters); + btrfs_lockdep_init_map(fs_info, btrfs_trans_pending_ordered); btrfs_state_lockdep_init_map(fs_info, btrfs_trans_commit_start, BTRFS_LOCKDEP_TRANS_COMMIT_START); btrfs_state_lockdep_init_map(fs_info, btrfs_trans_unblocked, diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 1952ac85222c..2a4cb6db42d1 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -525,6 +525,7 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode, struct rb_node *node; bool pending;
+ btrfs_lockdep_acquire(fs_info, btrfs_trans_pending_ordered); /* This is paired with btrfs_add_ordered_extent. */ spin_lock(&btrfs_inode->lock); btrfs_mod_outstanding_extents(btrfs_inode, -1); @@ -580,6 +581,8 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode, } }
+ btrfs_lockdep_release(fs_info, btrfs_trans_pending_ordered); + spin_lock(&root->ordered_extent_lock); list_del_init(&entry->root_extent_list); root->nr_ordered_extents--; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index d3576f84020d..6e3b2cb6a04a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -2310,6 +2310,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) * transaction. Otherwise if this transaction commits before the ordered * extents complete we lose logged data after a power failure. */ + btrfs_might_wait_for_event(fs_info, btrfs_trans_pending_ordered); wait_event(cur_trans->pending_wait, atomic_read(&cur_trans->pending_ordered) == 0);
From: Ioannis Angelakopoulos iangelak@fb.com
[ Upstream commit 9d7464c87b159bbf763c24faeb7a2dcaac96e4a1 ]
Reinitialize the class of the lockdep map for struct inode's mapping->invalidate_lock in load_free_space_cache() function in fs/btrfs/free-space-cache.c. This will prevent lockdep from producing false positives related to execution paths that make use of free space inodes and paths that make use of normal inodes.
Specifically, with this change lockdep will create separate lock dependencies that include the invalidate_lock, in the case that free space inodes are used and in the case that normal inodes are used.
The lockdep class for this lock was first initialized in inode_init_always() in fs/inode.c.
Reviewed-by: Josef Bacik josef@toxicpanda.com Signed-off-by: Ioannis Angelakopoulos iangelak@fb.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/free-space-cache.c | 10 ++++++++++ 1 file changed, 10 insertions(+)
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 85404c62a1c2..835071fa39a9 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -920,6 +920,8 @@ static int copy_free_space_cache(struct btrfs_block_group *block_group, return ret; }
+static struct lock_class_key btrfs_free_space_inode_key; + int load_free_space_cache(struct btrfs_block_group *block_group) { struct btrfs_fs_info *fs_info = block_group->fs_info; @@ -989,6 +991,14 @@ int load_free_space_cache(struct btrfs_block_group *block_group) } spin_unlock(&block_group->lock);
+ /* + * Reinitialize the class of struct inode's mapping->invalidate_lock for + * free space inodes to prevent false positives related to locks for normal + * inodes. + */ + lockdep_set_class(&(&inode->i_data)->invalidate_lock, + &btrfs_free_space_inode_key); + ret = __load_free_space_cache(fs_info->tree_root, inode, &tmp_ctl, path, block_group->start); btrfs_free_path(path);
From: Ioannis Angelakopoulos iangelak@fb.com
[ Upstream commit 5f4403e10f9b75b081bcc763b98d73e29de8c248 ]
This wait event is very similar to the pending ordered wait event in the sense that it occurs in a different context than the condition signaling for the event. The signaling occurs in btrfs_remove_ordered_extent() while the wait event is implemented in btrfs_start_ordered_extent() in fs/btrfs/ordered-data.c
However, in this case a thread must not acquire the lockdep map for the ordered extents wait event when the ordered extent is related to a free space inode. That is because lockdep creates dependencies between locks acquired both in execution paths related to normal inodes and paths related to free space inodes, thus leading to false positives.
Reviewed-by: Josef Bacik josef@toxicpanda.com Signed-off-by: Ioannis Angelakopoulos iangelak@fb.com Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/inode.c | 13 +++++++++++++ fs/btrfs/ordered-data.c | 18 ++++++++++++++++++ 4 files changed, 33 insertions(+)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8bd9a6d5ade6..804962f97452 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1100,6 +1100,7 @@ struct btrfs_fs_info { struct lockdep_map btrfs_trans_num_extwriters_map; struct lockdep_map btrfs_state_change_map[4]; struct lockdep_map btrfs_trans_pending_ordered_map; + struct lockdep_map btrfs_ordered_extent_map;
#ifdef CONFIG_BTRFS_FS_REF_VERIFY spinlock_t ref_verify_lock; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 393553fdfed6..e0e1730e67d7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2993,6 +2993,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) btrfs_lockdep_init_map(fs_info, btrfs_trans_num_writers); btrfs_lockdep_init_map(fs_info, btrfs_trans_num_extwriters); btrfs_lockdep_init_map(fs_info, btrfs_trans_pending_ordered); + btrfs_lockdep_init_map(fs_info, btrfs_ordered_extent); btrfs_state_lockdep_init_map(fs_info, btrfs_trans_commit_start, BTRFS_LOCKDEP_TRANS_COMMIT_START); btrfs_state_lockdep_init_map(fs_info, btrfs_trans_unblocked, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1372210869b1..b06955727055 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3225,6 +3225,8 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) clear_bits |= EXTENT_DELALLOC_NEW;
freespace_inode = btrfs_is_free_space_inode(inode); + if (!freespace_inode) + btrfs_lockdep_acquire(fs_info, btrfs_ordered_extent);
if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) { ret = -EIO; @@ -8959,6 +8961,7 @@ void btrfs_destroy_inode(struct inode *vfs_inode) struct btrfs_ordered_extent *ordered; struct btrfs_inode *inode = BTRFS_I(vfs_inode); struct btrfs_root *root = inode->root; + bool freespace_inode;
WARN_ON(!hlist_empty(&vfs_inode->i_dentry)); WARN_ON(vfs_inode->i_data.nrpages); @@ -8980,6 +8983,12 @@ void btrfs_destroy_inode(struct inode *vfs_inode) if (!root) return;
+ /* + * If this is a free space inode do not take the ordered extents lockdep + * map. + */ + freespace_inode = btrfs_is_free_space_inode(inode); + while (1) { ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); if (!ordered) @@ -8988,6 +8997,10 @@ void btrfs_destroy_inode(struct inode *vfs_inode) btrfs_err(root->fs_info, "found ordered extent %llu %llu on inode cleanup", ordered->file_offset, ordered->num_bytes); + + if (!freespace_inode) + btrfs_lockdep_acquire(root->fs_info, btrfs_ordered_extent); + btrfs_remove_ordered_extent(inode, ordered); btrfs_put_ordered_extent(ordered); btrfs_put_ordered_extent(ordered); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 2a4cb6db42d1..eb24a6d20ff8 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -524,6 +524,13 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode, struct btrfs_fs_info *fs_info = root->fs_info; struct rb_node *node; bool pending; + bool freespace_inode; + + /* + * If this is a free space inode the thread has not acquired the ordered + * extents lockdep map. + */ + freespace_inode = btrfs_is_free_space_inode(btrfs_inode);
btrfs_lockdep_acquire(fs_info, btrfs_trans_pending_ordered); /* This is paired with btrfs_add_ordered_extent. */ @@ -597,6 +604,8 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode, } spin_unlock(&root->ordered_extent_lock); wake_up(&entry->wait); + if (!freespace_inode) + btrfs_lockdep_release(fs_info, btrfs_ordered_extent); }
static void btrfs_run_ordered_extent_work(struct btrfs_work *work) @@ -715,9 +724,16 @@ void btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry, int wait) u64 start = entry->file_offset; u64 end = start + entry->num_bytes - 1; struct btrfs_inode *inode = BTRFS_I(entry->inode); + bool freespace_inode;
trace_btrfs_ordered_extent_start(inode, entry);
+ /* + * If this is a free space inode do not take the ordered extents lockdep + * map. + */ + freespace_inode = btrfs_is_free_space_inode(inode); + /* * pages in the range can be dirty, clean or writeback. We * start IO on any dirty ones so the wait doesn't stall waiting @@ -726,6 +742,8 @@ void btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry, int wait) if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags)) filemap_fdatawrite_range(inode->vfs_inode.i_mapping, start, end); if (wait) { + if (!freespace_inode) + btrfs_might_wait_for_event(inode->root->fs_info, btrfs_ordered_extent); wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, &entry->flags)); }
From: Qu Wenruo wqu@suse.com
[ Upstream commit e69bf81c9a339f1b2c041b112a6fbb9f60fc9340 ]
[PROBLEM]
Unlike data/metadata corruption, if scrub detected some error in the super block, the only error message is from the updated device status:
BTRFS info (device dm-1): scrub: started on devid 2 BTRFS error (device dm-1): bdev /dev/mapper/test-scratch2 errs: wr 0, rd 0, flush 0, corrupt 1, gen 0 BTRFS info (device dm-1): scrub: finished on devid 2 with status: 0
This is not helpful at all.
[CAUSE] Unlike data/metadata error reporting, there is no visible report in kernel dmesg to report supper block errors.
In fact, return value of scrub_checksum_super() is intentionally skipped, thus scrub_handle_errored_block() will never be called for super blocks.
[FIX] Make super block errors to output an error message, now the full dmesg would looks like this:
BTRFS info (device dm-1): scrub: started on devid 2 BTRFS warning (device dm-1): super block error on device /dev/mapper/test-scratch2, physical 67108864 BTRFS error (device dm-1): bdev /dev/mapper/test-scratch2 errs: wr 0, rd 0, flush 0, corrupt 1, gen 0 BTRFS info (device dm-1): scrub: finished on devid 2 with status: 0 BTRFS info (device dm-1): scrub: started on devid 2
This fix involves:
- Move the super_errors reporting to scrub_handle_errored_block() This allows the device status message to show after the super block error message. But now we no longer distinguish super block corruption and generation mismatch, now all counted as corruption.
- Properly check the return value from scrub_checksum_super() - Add extra super block error reporting for scrub_print_warning().
Signed-off-by: Qu Wenruo wqu@suse.com Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/scrub.c | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 3afe5fa50a63..0fe7c4882e1f 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -729,6 +729,13 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) dev = sblock->sectors[0]->dev; fs_info = sblock->sctx->fs_info;
+ /* Super block error, no need to search extent tree. */ + if (sblock->sectors[0]->flags & BTRFS_EXTENT_FLAG_SUPER) { + btrfs_warn_in_rcu(fs_info, "%s on device %s, physical %llu", + errstr, rcu_str_deref(dev->name), + sblock->sectors[0]->physical); + return; + } path = btrfs_alloc_path(); if (!path) return; @@ -804,7 +811,7 @@ static inline void scrub_put_recover(struct btrfs_fs_info *fs_info, static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) { struct scrub_ctx *sctx = sblock_to_check->sctx; - struct btrfs_device *dev; + struct btrfs_device *dev = sblock_to_check->sectors[0]->dev; struct btrfs_fs_info *fs_info; u64 logical; unsigned int failed_mirror_index; @@ -825,13 +832,15 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) fs_info = sctx->fs_info; if (sblock_to_check->sectors[0]->flags & BTRFS_EXTENT_FLAG_SUPER) { /* - * if we find an error in a super block, we just report it. + * If we find an error in a super block, we just report it. * They will get written with the next transaction commit * anyway */ + scrub_print_warning("super block error", sblock_to_check); spin_lock(&sctx->stat_lock); ++sctx->stat.super_errors; spin_unlock(&sctx->stat_lock); + btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS); return 0; } logical = sblock_to_check->sectors[0]->logical; @@ -840,7 +849,6 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) is_metadata = !(sblock_to_check->sectors[0]->flags & BTRFS_EXTENT_FLAG_DATA); have_csum = sblock_to_check->sectors[0]->have_csum; - dev = sblock_to_check->sectors[0]->dev;
if (!sctx->is_dev_replace && btrfs_repair_one_zone(fs_info, logical)) return 0; @@ -1762,7 +1770,7 @@ static int scrub_checksum(struct scrub_block *sblock) else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) ret = scrub_checksum_tree_block(sblock); else if (flags & BTRFS_EXTENT_FLAG_SUPER) - (void)scrub_checksum_super(sblock); + ret = scrub_checksum_super(sblock); else WARN_ON(1); if (ret) @@ -1901,23 +1909,6 @@ static int scrub_checksum_super(struct scrub_block *sblock) if (memcmp(calculated_csum, s->csum, sctx->fs_info->csum_size)) ++fail_cor;
- if (fail_cor + fail_gen) { - /* - * if we find an error in a super block, we just report it. - * They will get written with the next transaction commit - * anyway - */ - spin_lock(&sctx->stat_lock); - ++sctx->stat.super_errors; - spin_unlock(&sctx->stat_lock); - if (fail_cor) - btrfs_dev_stat_inc_and_print(sector->dev, - BTRFS_DEV_STAT_CORRUPTION_ERRS); - else - btrfs_dev_stat_inc_and_print(sector->dev, - BTRFS_DEV_STAT_GENERATION_ERRS); - } - return fail_cor + fail_gen; }
From: Qu Wenruo wqu@suse.com
[ Upstream commit f9eab5f0bba76742af654f33d517bf62a0db8f12 ]
[BUG] The following script shows that, although scrub can detect super block errors, it never tries to fix it:
mkfs.btrfs -f -d raid1 -m raid1 $dev1 $dev2 xfs_io -c "pwrite 67108864 4k" $dev2
mount $dev1 $mnt btrfs scrub start -B $dev2 btrfs scrub start -Br $dev2 umount $mnt
The first scrub reports the super error correctly:
scrub done for f3289218-abd3-41ac-a630-202f766c0859 Scrub started: Tue Aug 2 14:44:11 2022 Status: finished Duration: 0:00:00 Total to scrub: 1.26GiB Rate: 0.00B/s Error summary: super=1 Corrected: 0 Uncorrectable: 0 Unverified: 0
But the second read-only scrub still reports the same super error:
Scrub started: Tue Aug 2 14:44:11 2022 Status: finished Duration: 0:00:00 Total to scrub: 1.26GiB Rate: 0.00B/s Error summary: super=1 Corrected: 0 Uncorrectable: 0 Unverified: 0
[CAUSE] The comments already shows that super block can be easily fixed by committing a transaction:
/* * If we find an error in a super block, we just report it. * They will get written with the next transaction commit * anyway */
But the truth is, such assumption is not always true, and since scrub should try to repair every error it found (except for read-only scrub), we should really actively commit a transaction to fix this.
[FIX] Just commit a transaction if we found any super block errors, after everything else is done.
We cannot do this just after scrub_supers(), as btrfs_commit_transaction() will try to pause and wait for the running scrub, thus we can not call it with scrub_lock hold.
Signed-off-by: Qu Wenruo wqu@suse.com Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/scrub.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 0fe7c4882e1f..7d9b09e3ca70 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -4093,6 +4093,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, int ret; struct btrfs_device *dev; unsigned int nofs_flag; + bool need_commit = false;
if (btrfs_fs_closing(fs_info)) return -EAGAIN; @@ -4196,6 +4197,12 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, */ nofs_flag = memalloc_nofs_save(); if (!is_dev_replace) { + u64 old_super_errors; + + spin_lock(&sctx->stat_lock); + old_super_errors = sctx->stat.super_errors; + spin_unlock(&sctx->stat_lock); + btrfs_info(fs_info, "scrub: started on devid %llu", devid); /* * by holding device list mutex, we can @@ -4204,6 +4211,16 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, mutex_lock(&fs_info->fs_devices->device_list_mutex); ret = scrub_supers(sctx, dev); mutex_unlock(&fs_info->fs_devices->device_list_mutex); + + spin_lock(&sctx->stat_lock); + /* + * Super block errors found, but we can not commit transaction + * at current context, since btrfs_commit_transaction() needs + * to pause the current running scrub (hold by ourselves). + */ + if (sctx->stat.super_errors > old_super_errors && !sctx->readonly) + need_commit = true; + spin_unlock(&sctx->stat_lock); }
if (!ret) @@ -4230,6 +4247,25 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, scrub_workers_put(fs_info); scrub_put_ctx(sctx);
+ /* + * We found some super block errors before, now try to force a + * transaction commit, as scrub has finished. + */ + if (need_commit) { + struct btrfs_trans_handle *trans; + + trans = btrfs_start_transaction(fs_info->tree_root, 0); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + btrfs_err(fs_info, + "scrub: failed to start transaction to fix super block errors: %d", ret); + return ret; + } + ret = btrfs_commit_transaction(trans); + if (ret < 0) + btrfs_err(fs_info, + "scrub: failed to commit transaction to fix super block errors: %d", ret); + } return ret; out: scrub_workers_put(fs_info);
From: Omar Sandoval osandov@fb.com
[ Upstream commit 48ff70830bec1ccc714f4e31059df737f17ec909 ]
struct btrfs_caching_ctl::progress and struct btrfs_block_group::last_byte_to_unpin were previously needed to ensure that unpin_extent_range() didn't return a range to the free space cache before the caching thread had a chance to cache that range. However, the commit "btrfs: fix space cache corruption and potential double allocations" made it so that we always synchronously cache the block group at the time that we pin the extent, so this machinery is no longer necessary.
Reviewed-by: Filipe Manana fdmanana@suse.com Signed-off-by: Omar Sandoval osandov@fb.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/block-group.c | 13 ------------ fs/btrfs/block-group.h | 2 -- fs/btrfs/extent-tree.c | 9 ++------- fs/btrfs/free-space-tree.c | 8 -------- fs/btrfs/transaction.c | 41 -------------------------------------- fs/btrfs/zoned.c | 1 - 6 files changed, 2 insertions(+), 72 deletions(-)
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index e0375ba9d0fe..943da54df80e 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -593,8 +593,6 @@ static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
if (need_resched() || rwsem_is_contended(&fs_info->commit_root_sem)) { - if (wakeup) - caching_ctl->progress = last; btrfs_release_path(path); up_read(&fs_info->commit_root_sem); mutex_unlock(&caching_ctl->mutex); @@ -618,9 +616,6 @@ static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl) key.objectid = last; key.offset = 0; key.type = BTRFS_EXTENT_ITEM_KEY; - - if (wakeup) - caching_ctl->progress = last; btrfs_release_path(path); goto next; } @@ -655,7 +650,6 @@ static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
total_found += add_new_free_space(block_group, last, block_group->start + block_group->length); - caching_ctl->progress = (u64)-1;
out: btrfs_free_path(path); @@ -725,8 +719,6 @@ static noinline void caching_thread(struct btrfs_work *work) } #endif
- caching_ctl->progress = (u64)-1; - up_read(&fs_info->commit_root_sem); btrfs_free_excluded_extents(block_group); mutex_unlock(&caching_ctl->mutex); @@ -755,7 +747,6 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait) mutex_init(&caching_ctl->mutex); init_waitqueue_head(&caching_ctl->wait); caching_ctl->block_group = cache; - caching_ctl->progress = cache->start; refcount_set(&caching_ctl->count, 2); btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL);
@@ -2078,11 +2069,9 @@ static int read_one_block_group(struct btrfs_fs_info *info, /* Should not have any excluded extents. Just in case, though. */ btrfs_free_excluded_extents(cache); } else if (cache->length == cache->used) { - cache->last_byte_to_unpin = (u64)-1; cache->cached = BTRFS_CACHE_FINISHED; btrfs_free_excluded_extents(cache); } else if (cache->used == 0) { - cache->last_byte_to_unpin = (u64)-1; cache->cached = BTRFS_CACHE_FINISHED; add_new_free_space(cache, cache->start, cache->start + cache->length); @@ -2146,7 +2135,6 @@ static int fill_dummy_bgs(struct btrfs_fs_info *fs_info) /* Fill dummy cache as FULL */ bg->length = em->len; bg->flags = map->type; - bg->last_byte_to_unpin = (u64)-1; bg->cached = BTRFS_CACHE_FINISHED; bg->used = em->len; bg->flags = map->type; @@ -2494,7 +2482,6 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran set_free_space_tree_thresholds(cache); cache->used = bytes_used; cache->flags = type; - cache->last_byte_to_unpin = (u64)-1; cache->cached = BTRFS_CACHE_FINISHED; cache->global_root_id = calculate_global_root_id(fs_info, cache->start);
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 6b3cdc4cbc41..817b52ff4f7a 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -52,7 +52,6 @@ struct btrfs_caching_control { wait_queue_head_t wait; struct btrfs_work work; struct btrfs_block_group *block_group; - u64 progress; refcount_t count; };
@@ -111,7 +110,6 @@ struct btrfs_block_group { /* Cache tracking stuff */ int cached; struct btrfs_caching_control *caching_ctl; - u64 last_byte_to_unpin;
struct btrfs_space_info *space_info;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6914cd8024ba..43db25475f4b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2686,13 +2686,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info, len = cache->start + cache->length - start; len = min(len, end + 1 - start);
- down_read(&fs_info->commit_root_sem); - if (start < cache->last_byte_to_unpin && return_free_space) { - u64 add_len = min(len, cache->last_byte_to_unpin - start); - - btrfs_add_free_space(cache, start, add_len); - } - up_read(&fs_info->commit_root_sem); + if (return_free_space) + btrfs_add_free_space(cache, start, len);
start += len; total_unpinned += len; diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index 1bf89aa67216..367bcfcf68f5 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1453,8 +1453,6 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl, ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY); ASSERT(key.objectid < end && key.objectid + key.offset <= end);
- caching_ctl->progress = key.objectid; - offset = key.objectid; while (offset < key.objectid + key.offset) { bit = free_space_test_bit(block_group, path, offset); @@ -1490,8 +1488,6 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl, goto out; }
- caching_ctl->progress = (u64)-1; - ret = 0; out: return ret; @@ -1531,8 +1527,6 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl, ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY); ASSERT(key.objectid < end && key.objectid + key.offset <= end);
- caching_ctl->progress = key.objectid; - total_found += add_new_free_space(block_group, key.objectid, key.objectid + key.offset); if (total_found > CACHING_CTL_WAKE_UP) { @@ -1552,8 +1546,6 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl, goto out; }
- caching_ctl->progress = (u64)-1; - ret = 0; out: return ret; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 6e3b2cb6a04a..4c87bf2abc14 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -161,7 +161,6 @@ static noinline void switch_commit_roots(struct btrfs_trans_handle *trans) struct btrfs_transaction *cur_trans = trans->transaction; struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *root, *tmp; - struct btrfs_caching_control *caching_ctl, *next;
/* * At this point no one can be using this transaction to modify any tree @@ -196,46 +195,6 @@ static noinline void switch_commit_roots(struct btrfs_trans_handle *trans) } spin_unlock(&cur_trans->dropped_roots_lock);
- /* - * We have to update the last_byte_to_unpin under the commit_root_sem, - * at the same time we swap out the commit roots. - * - * This is because we must have a real view of the last spot the caching - * kthreads were while caching. Consider the following views of the - * extent tree for a block group - * - * commit root - * +----+----+----+----+----+----+----+ - * |\\| |\\|\\| |\\|\\| - * +----+----+----+----+----+----+----+ - * 0 1 2 3 4 5 6 7 - * - * new commit root - * +----+----+----+----+----+----+----+ - * | | | |\\| | |\\| - * +----+----+----+----+----+----+----+ - * 0 1 2 3 4 5 6 7 - * - * If the cache_ctl->progress was at 3, then we are only allowed to - * unpin [0,1) and [2,3], because the caching thread has already - * processed those extents. We are not allowed to unpin [5,6), because - * the caching thread will re-start it's search from 3, and thus find - * the hole from [4,6) to add to the free space cache. - */ - write_lock(&fs_info->block_group_cache_lock); - list_for_each_entry_safe(caching_ctl, next, - &fs_info->caching_block_groups, list) { - struct btrfs_block_group *cache = caching_ctl->block_group; - - if (btrfs_block_group_done(cache)) { - cache->last_byte_to_unpin = (u64)-1; - list_del_init(&caching_ctl->list); - btrfs_put_caching_control(caching_ctl); - } else { - cache->last_byte_to_unpin = caching_ctl->progress; - } - } - write_unlock(&fs_info->block_group_cache_lock); up_write(&fs_info->commit_root_sem); }
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 73c6929f7be6..252b41e066cd 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1563,7 +1563,6 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache) free = cache->zone_capacity - cache->alloc_offset;
/* We only need ->free_space in ALLOC_SEQ block groups */ - cache->last_byte_to_unpin = (u64)-1; cache->cached = BTRFS_CACHE_FINISHED; cache->free_space_ctl->free_space = free; cache->zone_unusable = unusable;
On Tue, Oct 11, 2022 at 10:50:01AM -0400, Sasha Levin wrote:
From: Omar Sandoval osandov@fb.com
[ Upstream commit 48ff70830bec1ccc714f4e31059df737f17ec909 ]
struct btrfs_caching_ctl::progress and struct btrfs_block_group::last_byte_to_unpin were previously needed to ensure that unpin_extent_range() didn't return a range to the free space cache before the caching thread had a chance to cache that range. However, the commit "btrfs: fix space cache corruption and potential double allocations" made it so that we always synchronously cache the block group at the time that we pin the extent, so this machinery is no longer necessary.
Reviewed-by: Filipe Manana fdmanana@suse.com Signed-off-by: Omar Sandoval osandov@fb.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org
fs/btrfs/block-group.c | 13 ------------ fs/btrfs/block-group.h | 2 -- fs/btrfs/extent-tree.c | 9 ++------- fs/btrfs/free-space-tree.c | 8 -------- fs/btrfs/transaction.c | 41 -------------------------------------- fs/btrfs/zoned.c | 1 - 6 files changed, 2 insertions(+), 72 deletions(-)
Hi, Sasha,
This commit is a cleanup. Please drop it from 6.0 and 5.19.
Thanks, Omar
On Tue, Oct 11, 2022 at 04:46:49PM -0700, Omar Sandoval wrote:
On Tue, Oct 11, 2022 at 10:50:01AM -0400, Sasha Levin wrote:
From: Omar Sandoval osandov@fb.com
[ Upstream commit 48ff70830bec1ccc714f4e31059df737f17ec909 ]
struct btrfs_caching_ctl::progress and struct btrfs_block_group::last_byte_to_unpin were previously needed to ensure that unpin_extent_range() didn't return a range to the free space cache before the caching thread had a chance to cache that range. However, the commit "btrfs: fix space cache corruption and potential double allocations" made it so that we always synchronously cache the block group at the time that we pin the extent, so this machinery is no longer necessary.
Reviewed-by: Filipe Manana fdmanana@suse.com Signed-off-by: Omar Sandoval osandov@fb.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org
fs/btrfs/block-group.c | 13 ------------ fs/btrfs/block-group.h | 2 -- fs/btrfs/extent-tree.c | 9 ++------- fs/btrfs/free-space-tree.c | 8 -------- fs/btrfs/transaction.c | 41 -------------------------------------- fs/btrfs/zoned.c | 1 - 6 files changed, 2 insertions(+), 72 deletions(-)
Hi, Sasha,
This commit is a cleanup. Please drop it from 6.0 and 5.19.
Ack, thanks!
From: "Maciej S. Szmigiero" maciej.szmigiero@oracle.com
[ Upstream commit dbecac26630014d336a8e5ea67096ff18210fb9c ]
btrfs currently prints information about space cache or free space tree being in use on every remount, regardless whether such remount actually enabled or disabled one of these features.
This is actually unnecessary since providing remount options changing the state of these features will explicitly print the appropriate notice.
Let's instead print such unconditional information just on an initial mount to avoid filling the kernel log when, for example, laptop-mode-tools remount the fs on some events.
Signed-off-by: Maciej S. Szmigiero maciej.szmigiero@oracle.com Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/super.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f89beac3c665..f1c6ca59299e 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -626,6 +626,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, int saved_compress_level; bool saved_compress_force; int no_compress = 0; + const bool remounting = test_bit(BTRFS_FS_STATE_REMOUNTING, &info->fs_state);
if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) btrfs_set_opt(info->mount_opt, FREE_SPACE_TREE); @@ -1137,10 +1138,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, } if (!ret) ret = btrfs_check_mountopts_zoned(info); - if (!ret && btrfs_test_opt(info, SPACE_CACHE)) - btrfs_info(info, "disk space caching is enabled"); - if (!ret && btrfs_test_opt(info, FREE_SPACE_TREE)) - btrfs_info(info, "using free space tree"); + if (!ret && !remounting) { + if (btrfs_test_opt(info, SPACE_CACHE)) + btrfs_info(info, "disk space caching is enabled"); + if (btrfs_test_opt(info, FREE_SPACE_TREE)) + btrfs_info(info, "using free space tree"); + } return ret; }
From: Josef Bacik josef@toxicpanda.com
[ Upstream commit 8a1ae2781dee9fc21ca82db682d37bea4bd074ad ]
Now that lockdep is staying enabled through our entire CI runs I started seeing the following stack in generic/475
------------[ cut here ]------------ WARNING: CPU: 1 PID: 2171864 at fs/btrfs/discard.c:604 btrfs_discard_update_discardable+0x98/0xb0 CPU: 1 PID: 2171864 Comm: kworker/u4:0 Not tainted 5.19.0-rc8+ #789 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014 Workqueue: btrfs-cache btrfs_work_helper RIP: 0010:btrfs_discard_update_discardable+0x98/0xb0 RSP: 0018:ffffb857c2f7bad0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8c85c605c200 RCX: 0000000000000001 RDX: 0000000000000000 RSI: ffffffff86807c5b RDI: ffffffff868a831e RBP: ffff8c85c4c54000 R08: 0000000000000000 R09: 0000000000000000 R10: ffff8c85c66932f0 R11: 0000000000000001 R12: ffff8c85c3899010 R13: ffff8c85d5be4f40 R14: ffff8c85c4c54000 R15: ffff8c86114bfa80 FS: 0000000000000000(0000) GS:ffff8c863bd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f2e7f168160 CR3: 000000010289a004 CR4: 0000000000370ee0 Call Trace:
__btrfs_remove_free_space_cache+0x27/0x30 load_free_space_cache+0xad2/0xaf0 caching_thread+0x40b/0x650 ? lock_release+0x137/0x2d0 btrfs_work_helper+0xf2/0x3e0 ? lock_is_held_type+0xe2/0x140 process_one_work+0x271/0x590 ? process_one_work+0x590/0x590 worker_thread+0x52/0x3b0 ? process_one_work+0x590/0x590 kthread+0xf0/0x120 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30
This is the code
ctl = block_group->free_space_ctl; discard_ctl = &block_group->fs_info->discard_ctl;
lockdep_assert_held(&ctl->tree_lock);
We have a temporary free space ctl for loading the free space cache in order to avoid having allocations happening while we're loading the cache. When we hit an error we free it all up, however this also calls btrfs_discard_update_discardable, which requires block_group->free_space_ctl->tree_lock to be held. However this is our temporary ctl so this lock isn't held. Fix this by calling __btrfs_remove_free_space_cache_locked instead so that we only clean up the entries and do not mess with the discardable stats.
Signed-off-by: Josef Bacik josef@toxicpanda.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/free-space-cache.c | 53 +++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 20 deletions(-)
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 835071fa39a9..2f88053cfc5e 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -48,6 +48,25 @@ static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *info, u64 offset, u64 bytes, bool update_stats);
+static void __btrfs_remove_free_space_cache_locked( + struct btrfs_free_space_ctl *ctl) +{ + struct btrfs_free_space *info; + struct rb_node *node; + + while ((node = rb_last(&ctl->free_space_offset)) != NULL) { + info = rb_entry(node, struct btrfs_free_space, offset_index); + if (!info->bitmap) { + unlink_free_space(ctl, info, true); + kmem_cache_free(btrfs_free_space_cachep, info); + } else { + free_bitmap(ctl, info); + } + + cond_resched_lock(&ctl->tree_lock); + } +} + static struct inode *__lookup_free_space_inode(struct btrfs_root *root, struct btrfs_path *path, u64 offset) @@ -881,7 +900,14 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, return ret; free_cache: io_ctl_drop_pages(&io_ctl); - __btrfs_remove_free_space_cache(ctl); + + /* + * We need to call the _locked variant so we don't try to update the + * discard counters. + */ + spin_lock(&ctl->tree_lock); + __btrfs_remove_free_space_cache_locked(ctl); + spin_unlock(&ctl->tree_lock); goto out; }
@@ -1017,7 +1043,13 @@ int load_free_space_cache(struct btrfs_block_group *block_group) if (ret == 0) ret = 1; } else { + /* + * We need to call the _locked variant so we don't try to update + * the discard counters. + */ + spin_lock(&tmp_ctl.tree_lock); __btrfs_remove_free_space_cache(&tmp_ctl); + spin_unlock(&tmp_ctl.tree_lock); btrfs_warn(fs_info, "block group %llu has wrong amount of free space", block_group->start); @@ -2980,25 +3012,6 @@ static void __btrfs_return_cluster_to_free_space( btrfs_put_block_group(block_group); }
-static void __btrfs_remove_free_space_cache_locked( - struct btrfs_free_space_ctl *ctl) -{ - struct btrfs_free_space *info; - struct rb_node *node; - - while ((node = rb_last(&ctl->free_space_offset)) != NULL) { - info = rb_entry(node, struct btrfs_free_space, offset_index); - if (!info->bitmap) { - unlink_free_space(ctl, info, true); - kmem_cache_free(btrfs_free_space_cachep, info); - } else { - free_bitmap(ctl, info); - } - - cond_resched_lock(&ctl->tree_lock); - } -} - void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) { spin_lock(&ctl->tree_lock);
From: Qu Wenruo wqu@suse.com
[ Upstream commit a05d3c9153145283ce9c58a1d7a9056fbb85f6a1 ]
[BACKGROUND] There is an incident report that, one user hibernated the system, with one btrfs on removable device still mounted.
Then by some incident, the btrfs got mounted and modified by another system/OS, then back to the hibernated system.
After resuming from the hibernation, new write happened into the victim btrfs.
Now the fs is completely broken, since the underlying btrfs is no longer the same one before the hibernation, and the user lost their data due to various transid mismatch.
[REPRODUCER] We can emulate the situation using the following small script:
truncate -s 1G $dev mkfs.btrfs -f $dev mount $dev $mnt fsstress -w -d $mnt -n 500 sync xfs_freeze -f $mnt cp $dev $dev.backup
# There is no way to mount the same cloned fs on the same system, # as the conflicting fsid will be rejected by btrfs. # Thus here we have to wipe the fs using a different btrfs. mkfs.btrfs -f $dev.backup
dd if=$dev.backup of=$dev bs=1M xfs_freeze -u $mnt fsstress -w -d $mnt -n 20 umount $mnt btrfs check $dev
The final fsck will fail due to some tree blocks has incorrect fsid.
This is enough to emulate the problem hit by the unfortunate user.
[ENHANCEMENT] Although such case should not be that common, it can still happen from time to time.
From the view of btrfs, we can detect any unexpected super block change, and if there is any unexpected change, we just mark the fs read-only, and thaw the fs.
By this we can limit the damage to minimal, and I hope no one would lose their data by this anymore.
Suggested-by: Goffredo Baroncelli kreijack@libero.it Link: https://lore.kernel.org/linux-btrfs/83bf3b4b-7f4c-387a-b286-9251e3991e34@blu... Reviewed-by: Anand Jain anand.jain@oracle.com Signed-off-by: Qu Wenruo wqu@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/disk-io.c | 25 ++++++++++++++----- fs/btrfs/disk-io.h | 4 +++- fs/btrfs/super.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/volumes.c | 2 +- 4 files changed, 83 insertions(+), 8 deletions(-)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e0e1730e67d7..d9881b54efd1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2600,8 +2600,8 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info) * 1, 2 2nd and 3rd backup copy * -1 skip bytenr check */ -static int validate_super(struct btrfs_fs_info *fs_info, - struct btrfs_super_block *sb, int mirror_num) +int btrfs_validate_super(struct btrfs_fs_info *fs_info, + struct btrfs_super_block *sb, int mirror_num) { u64 nodesize = btrfs_super_nodesize(sb); u64 sectorsize = btrfs_super_sectorsize(sb); @@ -2785,7 +2785,7 @@ static int validate_super(struct btrfs_fs_info *fs_info, */ static int btrfs_validate_mount_super(struct btrfs_fs_info *fs_info) { - return validate_super(fs_info, fs_info->super_copy, 0); + return btrfs_validate_super(fs_info, fs_info->super_copy, 0); }
/* @@ -2799,7 +2799,7 @@ static int btrfs_validate_write_super(struct btrfs_fs_info *fs_info, { int ret;
- ret = validate_super(fs_info, sb, -1); + ret = btrfs_validate_super(fs_info, sb, -1); if (ret < 0) goto out; if (!btrfs_supported_super_csum(btrfs_super_csum_type(sb))) { @@ -3846,7 +3846,7 @@ static void btrfs_end_super_write(struct bio *bio) }
struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev, - int copy_num) + int copy_num, bool drop_cache) { struct btrfs_super_block *super; struct page *page; @@ -3864,6 +3864,19 @@ struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev, if (bytenr + BTRFS_SUPER_INFO_SIZE >= bdev_nr_bytes(bdev)) return ERR_PTR(-EINVAL);
+ if (drop_cache) { + /* This should only be called with the primary sb. */ + ASSERT(copy_num == 0); + + /* + * Drop the page of the primary superblock, so later read will + * always read from the device. + */ + invalidate_inode_pages2_range(mapping, + bytenr >> PAGE_SHIFT, + (bytenr + BTRFS_SUPER_INFO_SIZE) >> PAGE_SHIFT); + } + page = read_cache_page_gfp(mapping, bytenr >> PAGE_SHIFT, GFP_NOFS); if (IS_ERR(page)) return ERR_CAST(page); @@ -3895,7 +3908,7 @@ struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev) * later supers, using BTRFS_SUPER_MIRROR_MAX instead */ for (i = 0; i < 1; i++) { - super = btrfs_read_dev_one_super(bdev, i); + super = btrfs_read_dev_one_super(bdev, i, false); if (IS_ERR(super)) continue;
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 47ad8e0a2d33..aef981de672c 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -46,10 +46,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices, char *options); void __cold close_ctree(struct btrfs_fs_info *fs_info); +int btrfs_validate_super(struct btrfs_fs_info *fs_info, + struct btrfs_super_block *sb, int mirror_num); int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors); struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev); struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev, - int copy_num); + int copy_num, bool drop_cache); int btrfs_commit_super(struct btrfs_fs_info *fs_info); struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, struct btrfs_key *key); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f1c6ca59299e..692983e69ba1 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2553,11 +2553,71 @@ static int btrfs_freeze(struct super_block *sb) return btrfs_commit_transaction(trans); }
+static int check_dev_super(struct btrfs_device *dev) +{ + struct btrfs_fs_info *fs_info = dev->fs_info; + struct btrfs_super_block *sb; + int ret = 0; + + /* This should be called with fs still frozen. */ + ASSERT(test_bit(BTRFS_FS_FROZEN, &fs_info->flags)); + + /* Missing dev, no need to check. */ + if (!dev->bdev) + return 0; + + /* Only need to check the primary super block. */ + sb = btrfs_read_dev_one_super(dev->bdev, 0, true); + if (IS_ERR(sb)) + return PTR_ERR(sb); + + /* Btrfs_validate_super() includes fsid check against super->fsid. */ + ret = btrfs_validate_super(fs_info, sb, 0); + if (ret < 0) + goto out; + + if (btrfs_super_generation(sb) != fs_info->last_trans_committed) { + btrfs_err(fs_info, "transid mismatch, has %llu expect %llu", + btrfs_super_generation(sb), + fs_info->last_trans_committed); + ret = -EUCLEAN; + goto out; + } +out: + btrfs_release_disk_super(sb); + return ret; +} + static int btrfs_unfreeze(struct super_block *sb) { struct btrfs_fs_info *fs_info = btrfs_sb(sb); + struct btrfs_device *device; + int ret = 0;
+ /* + * Make sure the fs is not changed by accident (like hibernation then + * modified by other OS). + * If we found anything wrong, we mark the fs error immediately. + * + * And since the fs is frozen, no one can modify the fs yet, thus + * we don't need to hold device_list_mutex. + */ + list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { + ret = check_dev_super(device); + if (ret < 0) { + btrfs_handle_fs_error(fs_info, ret, + "super block on devid %llu got modified unexpectedly", + device->devid); + break; + } + } clear_bit(BTRFS_FS_FROZEN, &fs_info->flags); + + /* + * We still return 0, to allow VFS layer to unfreeze the fs even the + * above checks failed. Since the fs is either fine or read-only, we're + * safe to continue, without causing further damage. + */ return 0; }
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f63ff91e2883..b4df6f74855c 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2017,7 +2017,7 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, struct page *page; int ret;
- disk_super = btrfs_read_dev_one_super(bdev, copy_num); + disk_super = btrfs_read_dev_one_super(bdev, copy_num, false); if (IS_ERR(disk_super)) continue;
From: zhang songyi zhang.songyi@zte.com.cn
[ Upstream commit bd64f6221a98fb1857485c63fd3d8da8d47406c6 ]
Return the sysfs_emit() and iterate_object_props() directly instead of using unnecessary variables.
Reported-by: Zeal Robot zealci@zte.com.cn Reviewed-by: Anand Jain anand.jain@oracle.com Signed-off-by: zhang songyi zhang.songyi@zte.com.cn Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/props.c | 5 +---- fs/btrfs/sysfs.c | 10 ++-------- 2 files changed, 3 insertions(+), 12 deletions(-)
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index a2ec8ecae8de..055a631276ce 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -270,11 +270,8 @@ int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path) { struct btrfs_root *root = BTRFS_I(inode)->root; u64 ino = btrfs_ino(BTRFS_I(inode)); - int ret; - - ret = iterate_object_props(root, path, ino, inode_prop_iterator, inode);
- return ret; + return iterate_object_props(root, path, ino, inode_prop_iterator, inode); }
static int prop_compression_validate(const struct btrfs_inode *inode, diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index d5d0717fd09a..484a4ed3ecf8 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -837,11 +837,8 @@ static ssize_t btrfs_sinfo_bg_reclaim_threshold_show(struct kobject *kobj, char *buf) { struct btrfs_space_info *space_info = to_space_info(kobj); - ssize_t ret; - - ret = sysfs_emit(buf, "%d\n", READ_ONCE(space_info->bg_reclaim_threshold));
- return ret; + return sysfs_emit(buf, "%d\n", READ_ONCE(space_info->bg_reclaim_threshold)); }
static ssize_t btrfs_sinfo_bg_reclaim_threshold_store(struct kobject *kobj, @@ -1222,11 +1219,8 @@ static ssize_t btrfs_bg_reclaim_threshold_show(struct kobject *kobj, char *buf) { struct btrfs_fs_info *fs_info = to_fs_info(kobj); - ssize_t ret; - - ret = sysfs_emit(buf, "%d\n", READ_ONCE(fs_info->bg_reclaim_threshold));
- return ret; + return sysfs_emit(buf, "%d\n", READ_ONCE(fs_info->bg_reclaim_threshold)); }
static ssize_t btrfs_bg_reclaim_threshold_store(struct kobject *kobj,
On Tue, Oct 11, 2022 at 10:50:05AM -0400, Sasha Levin wrote:
From: zhang songyi zhang.songyi@zte.com.cn
[ Upstream commit bd64f6221a98fb1857485c63fd3d8da8d47406c6 ]
Return the sysfs_emit() and iterate_object_props() directly instead of using unnecessary variables.
Reported-by: Zeal Robot zealci@zte.com.cn Reviewed-by: Anand Jain anand.jain@oracle.com Signed-off-by: zhang songyi zhang.songyi@zte.com.cn Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org
fs/btrfs/props.c | 5 +---- fs/btrfs/sysfs.c | 10 ++-------- 2 files changed, 3 insertions(+), 12 deletions(-)
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index a2ec8ecae8de..055a631276ce 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -270,11 +270,8 @@ int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path) { struct btrfs_root *root = BTRFS_I(inode)->root; u64 ino = btrfs_ino(BTRFS_I(inode));
- int ret;
- ret = iterate_object_props(root, path, ino, inode_prop_iterator, inode);
- return ret;
- return iterate_object_props(root, path, ino, inode_prop_iterator, inode);
Please drop the patch from stable queues, it's an obvious cleanup.
On Wed, Oct 12, 2022 at 01:54:18PM +0200, David Sterba wrote:
On Tue, Oct 11, 2022 at 10:50:05AM -0400, Sasha Levin wrote:
From: zhang songyi zhang.songyi@zte.com.cn
[ Upstream commit bd64f6221a98fb1857485c63fd3d8da8d47406c6 ]
Return the sysfs_emit() and iterate_object_props() directly instead of using unnecessary variables.
Reported-by: Zeal Robot zealci@zte.com.cn Reviewed-by: Anand Jain anand.jain@oracle.com Signed-off-by: zhang songyi zhang.songyi@zte.com.cn Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org
fs/btrfs/props.c | 5 +---- fs/btrfs/sysfs.c | 10 ++-------- 2 files changed, 3 insertions(+), 12 deletions(-)
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index a2ec8ecae8de..055a631276ce 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -270,11 +270,8 @@ int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path) { struct btrfs_root *root = BTRFS_I(inode)->root; u64 ino = btrfs_ino(BTRFS_I(inode));
int ret;
ret = iterate_object_props(root, path, ino, inode_prop_iterator, inode);
return ret;
- return iterate_object_props(root, path, ino, inode_prop_iterator, inode);
Please drop the patch from stable queues, it's an obvious cleanup.
Ack, I'll drop this and the other btrfs commits you've pointed out.
From: Qu Wenruo wqu@suse.com
[ Upstream commit e562a8bdf652b010ce2525bcf15d145c9d3932bf ]
Introduce a new runtime flag, BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN, which will inform qgroup rescan to cancel its work asynchronously.
This is to address the window when an operation makes qgroup numbers inconsistent (like qgroup inheriting) while a qgroup rescan is running.
In that case, qgroup inconsistent flag will be cleared when qgroup rescan finishes. But we changed the ownership of some extents, which means the rescan is already meaningless, and the qgroup inconsistent flag should not be cleared.
With the new flag, each time we set INCONSISTENT flag, we also set this new flag to inform any running qgroup rescan to exit immediately, and leaving the INCONSISTENT flag there.
The new runtime flag can only be cleared when a new rescan is started.
Signed-off-by: Qu Wenruo wqu@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/qgroup.c | 43 ++++++++++++++++++++++++++----------------- fs/btrfs/qgroup.h | 2 ++ 2 files changed, 28 insertions(+), 17 deletions(-)
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index db723c0026bd..8e7b188d1dc1 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -333,6 +333,12 @@ int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, } #endif
+static void qgroup_mark_inconsistent(struct btrfs_fs_info *fs_info) +{ + fs_info->qgroup_flags |= (BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT | + BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN); +} + /* * The full config is read in one go, only called from open_ctree() * It doesn't use any locking, as at this point we're still single-threaded @@ -401,7 +407,7 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) } if (btrfs_qgroup_status_generation(l, ptr) != fs_info->generation) { - flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; + qgroup_mark_inconsistent(fs_info); btrfs_err(fs_info, "qgroup generation mismatch, marked as inconsistent"); } @@ -419,7 +425,7 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) || (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) { btrfs_err(fs_info, "inconsistent qgroup config"); - flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; + qgroup_mark_inconsistent(fs_info); } if (!qgroup) { qgroup = add_qgroup_rb(fs_info, found_key.offset); @@ -1717,7 +1723,7 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
ret = update_qgroup_limit_item(trans, qgroup); if (ret) { - fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; + qgroup_mark_inconsistent(fs_info); btrfs_info(fs_info, "unable to update quota limit for %llu", qgroupid); } @@ -1793,7 +1799,7 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans, ret = btrfs_find_all_roots(NULL, trans->fs_info, bytenr, 0, &old_root, true); if (ret < 0) { - trans->fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; + qgroup_mark_inconsistent(trans->fs_info); btrfs_warn(trans->fs_info, "error accounting new delayed refs extent (err code: %d), quota inconsistent", ret); @@ -2269,7 +2275,7 @@ static int qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans, out: btrfs_free_path(dst_path); if (ret < 0) - fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; + qgroup_mark_inconsistent(fs_info); return ret; }
@@ -2773,12 +2779,10 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans) spin_unlock(&fs_info->qgroup_lock); ret = update_qgroup_info_item(trans, qgroup); if (ret) - fs_info->qgroup_flags |= - BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; + qgroup_mark_inconsistent(fs_info); ret = update_qgroup_limit_item(trans, qgroup); if (ret) - fs_info->qgroup_flags |= - BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; + qgroup_mark_inconsistent(fs_info); spin_lock(&fs_info->qgroup_lock); } if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) @@ -2789,7 +2793,7 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
ret = update_qgroup_status_item(trans); if (ret) - fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; + qgroup_mark_inconsistent(fs_info);
return ret; } @@ -2907,7 +2911,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
ret = update_qgroup_limit_item(trans, dstgroup); if (ret) { - fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; + qgroup_mark_inconsistent(fs_info); btrfs_info(fs_info, "unable to update quota limit for %llu", dstgroup->qgroupid); @@ -3013,7 +3017,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid, if (!committing) mutex_unlock(&fs_info->qgroup_ioctl_lock); if (need_rescan) - fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; + qgroup_mark_inconsistent(fs_info); return ret; }
@@ -3286,7 +3290,8 @@ static bool rescan_should_stop(struct btrfs_fs_info *fs_info) { return btrfs_fs_closing(fs_info) || test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state) || - !test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); + !test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || + fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN; }
static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) @@ -3351,7 +3356,8 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) }
mutex_lock(&fs_info->qgroup_rescan_lock); - if (!stopped) + if (!stopped || + fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN) fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; if (trans) { ret = update_qgroup_status_item(trans); @@ -3362,6 +3368,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) } } fs_info->qgroup_rescan_running = false; + fs_info->qgroup_flags &= ~BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN; complete_all(&fs_info->qgroup_rescan_completion); mutex_unlock(&fs_info->qgroup_rescan_lock);
@@ -3372,6 +3379,8 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
if (stopped) { btrfs_info(fs_info, "qgroup scan paused"); + } else if (fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN) { + btrfs_info(fs_info, "qgroup scan cancelled"); } else if (err >= 0) { btrfs_info(fs_info, "qgroup scan completed%s", err > 0 ? " (inconsistency flag cleared)" : ""); @@ -3434,6 +3443,7 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
memset(&fs_info->qgroup_rescan_progress, 0, sizeof(fs_info->qgroup_rescan_progress)); + fs_info->qgroup_flags &= ~BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN; fs_info->qgroup_rescan_progress.objectid = progress_objectid; init_completion(&fs_info->qgroup_rescan_completion); mutex_unlock(&fs_info->qgroup_rescan_lock); @@ -4231,8 +4241,7 @@ int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans, spin_unlock(&blocks->lock); out: if (ret < 0) - fs_info->qgroup_flags |= - BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; + qgroup_mark_inconsistent(fs_info); return ret; }
@@ -4319,7 +4328,7 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans, btrfs_err_rl(fs_info, "failed to account subtree at bytenr %llu: %d", subvol_eb->start, ret); - fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; + qgroup_mark_inconsistent(fs_info); } return ret; } diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index 0c4dd2a9af96..90d3632c5524 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -100,6 +100,8 @@ * subtree rescan for them. */
+#define BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN (1UL << 3) + /* * Record a dirty extent, and info qgroup to update quota on it * TODO: Use kmem cache to alloc it.
On Tue, Oct 11, 2022 at 10:50:06AM -0400, Sasha Levin wrote:
From: Qu Wenruo wqu@suse.com
[ Upstream commit e562a8bdf652b010ce2525bcf15d145c9d3932bf ]
Introduce a new runtime flag, BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN, which will inform qgroup rescan to cancel its work asynchronously.
This is to address the window when an operation makes qgroup numbers inconsistent (like qgroup inheriting) while a qgroup rescan is running.
In that case, qgroup inconsistent flag will be cleared when qgroup rescan finishes. But we changed the ownership of some extents, which means the rescan is already meaningless, and the qgroup inconsistent flag should not be cleared.
With the new flag, each time we set INCONSISTENT flag, we also set this new flag to inform any running qgroup rescan to exit immediately, and leaving the INCONSISTENT flag there.
The new runtime flag can only be cleared when a new rescan is started.
Qu, does this patch make sense for stable on itself? It was part of a series adding some new flags and the sysfs knob. As I read it there's a case where it can affect how the rescan is done and that it can be cancelled but still am not sure if it's worth the backport.
On 2022/10/12 20:56, David Sterba wrote:
On Tue, Oct 11, 2022 at 10:50:06AM -0400, Sasha Levin wrote:
From: Qu Wenruo wqu@suse.com
[ Upstream commit e562a8bdf652b010ce2525bcf15d145c9d3932bf ]
Introduce a new runtime flag, BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN, which will inform qgroup rescan to cancel its work asynchronously.
This is to address the window when an operation makes qgroup numbers inconsistent (like qgroup inheriting) while a qgroup rescan is running.
In that case, qgroup inconsistent flag will be cleared when qgroup rescan finishes. But we changed the ownership of some extents, which means the rescan is already meaningless, and the qgroup inconsistent flag should not be cleared.
With the new flag, each time we set INCONSISTENT flag, we also set this new flag to inform any running qgroup rescan to exit immediately, and leaving the INCONSISTENT flag there.
The new runtime flag can only be cleared when a new rescan is started.
Qu, does this patch make sense for stable on itself? It was part of a series adding some new flags and the sysfs knob. As I read it there's a case where it can affect how the rescan is done and that it can be cancelled but still am not sure if it's worth the backport.
Considering the qgroup still lacks a way to handle large subvolume drop, and a lot of things can mark qgroup inconsistent halfway, I think backporting this patch itself is not that bad.
The problem is, why only backporting this one?
To me, it would make more sense to backport either all or none.
Sure, if we can cancel rescan it's an improvement, but rescan itself is already relatively cheap compared to other qgroup operations. Thus I prefer to backport all the qgroup patches.
Thanks, Qu
On Thu, Oct 13, 2022 at 07:12:10AM +0800, Qu Wenruo wrote:
On 2022/10/12 20:56, David Sterba wrote:
On Tue, Oct 11, 2022 at 10:50:06AM -0400, Sasha Levin wrote:
From: Qu Wenruo wqu@suse.com
[ Upstream commit e562a8bdf652b010ce2525bcf15d145c9d3932bf ]
Introduce a new runtime flag, BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN, which will inform qgroup rescan to cancel its work asynchronously.
This is to address the window when an operation makes qgroup numbers inconsistent (like qgroup inheriting) while a qgroup rescan is running.
In that case, qgroup inconsistent flag will be cleared when qgroup rescan finishes. But we changed the ownership of some extents, which means the rescan is already meaningless, and the qgroup inconsistent flag should not be cleared.
With the new flag, each time we set INCONSISTENT flag, we also set this new flag to inform any running qgroup rescan to exit immediately, and leaving the INCONSISTENT flag there.
The new runtime flag can only be cleared when a new rescan is started.
Qu, does this patch make sense for stable on itself? It was part of a series adding some new flags and the sysfs knob. As I read it there's a case where it can affect how the rescan is done and that it can be cancelled but still am not sure if it's worth the backport.
Considering the qgroup still lacks a way to handle large subvolume drop, and a lot of things can mark qgroup inconsistent halfway, I think backporting this patch itself is not that bad.
The problem is, why only backporting this one?
To me, it would make more sense to backport either all or none.
Sure, if we can cancel rescan it's an improvement, but rescan itself is already relatively cheap compared to other qgroup operations. Thus I prefer to backport all the qgroup patches.
I'll drop this one and happily take a series if you want to send one out.
From: David Sterba dsterba@suse.com
[ Upstream commit 748f553c3c4c4f175c6c834358632aff802d72cf ]
KCSAN reports that there's unlocked access mixed with locked access, which is technically correct but is not a bug. To avoid false alerts at least from KCSAN, add annotation and use a wrapper whenever ->full is accessed for read outside of lock.
It is used as a fast check and only advisory. In the worst case the block reserve is found !full and becomes full in the meantime, but properly handled.
Depending on the value of ->full, btrfs_block_rsv_release decides where to return the reservation, and block_rsv_release_bytes handles a NULL pointer for block_rsv and if it's not NULL then it double checks the full status under a lock.
Link: https://lore.kernel.org/linux-btrfs/CAAwBoOJDjei5Hnem155N_cJwiEkVwJYvgN-tQrw... Link: https://lore.kernel.org/linux-btrfs/YvHU/vsXd7uz5V6j@hungrycats.org Reported-by: Zygo Blaxell ce3g8jdj@umail.furryterror.org Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/block-rsv.c | 2 +- fs/btrfs/block-rsv.h | 9 +++++++++ fs/btrfs/transaction.c | 4 ++-- 3 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c index 06be0644dd37..046caf14a4bb 100644 --- a/fs/btrfs/block-rsv.c +++ b/fs/btrfs/block-rsv.c @@ -286,7 +286,7 @@ u64 btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, */ if (block_rsv == delayed_rsv) target = global_rsv; - else if (block_rsv != global_rsv && !delayed_rsv->full) + else if (block_rsv != global_rsv && !btrfs_block_rsv_full(delayed_rsv)) target = delayed_rsv;
if (target && block_rsv->space_info != target->space_info) diff --git a/fs/btrfs/block-rsv.h b/fs/btrfs/block-rsv.h index 0c183709be00..578c3497a455 100644 --- a/fs/btrfs/block-rsv.h +++ b/fs/btrfs/block-rsv.h @@ -92,4 +92,13 @@ static inline void btrfs_unuse_block_rsv(struct btrfs_fs_info *fs_info, btrfs_block_rsv_release(fs_info, block_rsv, 0, NULL); }
+/* + * Fast path to check if the reserve is full, may be carefully used outside of + * locks. + */ +static inline bool btrfs_block_rsv_full(const struct btrfs_block_rsv *rsv) +{ + return data_race(rsv->full); +} + #endif /* BTRFS_BLOCK_RSV_H */ diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 4c87bf2abc14..49570ad23f2e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -594,7 +594,7 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, */ num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_items); if (flush == BTRFS_RESERVE_FLUSH_ALL && - delayed_refs_rsv->full == 0) { + btrfs_block_rsv_full(delayed_refs_rsv) == 0) { delayed_refs_bytes = num_bytes; num_bytes <<= 1; } @@ -619,7 +619,7 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, if (rsv->space_info->force_alloc) do_chunk_alloc = true; } else if (num_items == 0 && flush == BTRFS_RESERVE_FLUSH_ALL && - !delayed_refs_rsv->full) { + !btrfs_block_rsv_full(delayed_refs_rsv)) { /* * Some people call with btrfs_start_transaction(root, 0) * because they can be throttled, but have some other mechanism
From: Josef Bacik josef@toxicpanda.com
[ Upstream commit a40246e8afc0af3ffdee21854fb755c9364b8346 ]
Currently we have the add/del functions generic so that we can use them for both extent buffers and extent states. We want to separate this code however, so separate these helpers into per-object helpers in anticipation of the split.
Signed-off-by: Josef Bacik josef@toxicpanda.com Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/extent_io.c | 58 +++++++++++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 20 deletions(-)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index cf4f19e80e2f..d9d254b59bd1 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -44,25 +44,42 @@ static inline bool extent_state_in_tree(const struct extent_state *state) static LIST_HEAD(states); static DEFINE_SPINLOCK(leak_lock);
-static inline void btrfs_leak_debug_add(spinlock_t *lock, - struct list_head *new, - struct list_head *head) +static inline void btrfs_leak_debug_add_eb(struct extent_buffer *eb) +{ + struct btrfs_fs_info *fs_info = eb->fs_info; + unsigned long flags; + + spin_lock_irqsave(&fs_info->eb_leak_lock, flags); + list_add(&eb->leak_list, &fs_info->allocated_ebs); + spin_unlock_irqrestore(&fs_info->eb_leak_lock, flags); +} + +static inline void btrfs_leak_debug_add_state(struct extent_state *state) { unsigned long flags;
- spin_lock_irqsave(lock, flags); - list_add(new, head); - spin_unlock_irqrestore(lock, flags); + spin_lock_irqsave(&leak_lock, flags); + list_add(&state->leak_list, &states); + spin_unlock_irqrestore(&leak_lock, flags); +} + +static inline void btrfs_leak_debug_del_eb(struct extent_buffer *eb) +{ + struct btrfs_fs_info *fs_info = eb->fs_info; + unsigned long flags; + + spin_lock_irqsave(&fs_info->eb_leak_lock, flags); + list_del(&eb->leak_list); + spin_unlock_irqrestore(&fs_info->eb_leak_lock, flags); }
-static inline void btrfs_leak_debug_del(spinlock_t *lock, - struct list_head *entry) +static inline void btrfs_leak_debug_del_state(struct extent_state *state) { unsigned long flags;
- spin_lock_irqsave(lock, flags); - list_del(entry); - spin_unlock_irqrestore(lock, flags); + spin_lock_irqsave(&leak_lock, flags); + list_del(&state->leak_list); + spin_unlock_irqrestore(&leak_lock, flags); }
void btrfs_extent_buffer_leak_debug_check(struct btrfs_fs_info *fs_info) @@ -126,9 +143,11 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller, } } #else -#define btrfs_leak_debug_add(lock, new, head) do {} while (0) -#define btrfs_leak_debug_del(lock, entry) do {} while (0) -#define btrfs_extent_state_leak_debug_check() do {} while (0) +#define btrfs_leak_debug_add_eb(eb) do {} while (0) +#define btrfs_leak_debug_add_state(state) do {} while (0) +#define btrfs_leak_debug_del_eb(eb) do {} while (0) +#define btrfs_leak_debug_del_state(state) do {} while (0) +#define btrfs_extent_state_leak_debug_check() do {} while (0) #define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0) #endif
@@ -353,7 +372,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask) state->state = 0; state->failrec = NULL; RB_CLEAR_NODE(&state->rb_node); - btrfs_leak_debug_add(&leak_lock, &state->leak_list, &states); + btrfs_leak_debug_add_state(state); refcount_set(&state->refs, 1); init_waitqueue_head(&state->wq); trace_alloc_extent_state(state, mask, _RET_IP_); @@ -366,7 +385,7 @@ void free_extent_state(struct extent_state *state) return; if (refcount_dec_and_test(&state->refs)) { WARN_ON(extent_state_in_tree(state)); - btrfs_leak_debug_del(&leak_lock, &state->leak_list); + btrfs_leak_debug_del_state(state); trace_free_extent_state(state, _RET_IP_); kmem_cache_free(extent_state_cache, state); } @@ -5856,7 +5875,7 @@ static void btrfs_release_extent_buffer_pages(struct extent_buffer *eb) static inline void btrfs_release_extent_buffer(struct extent_buffer *eb) { btrfs_release_extent_buffer_pages(eb); - btrfs_leak_debug_del(&eb->fs_info->eb_leak_lock, &eb->leak_list); + btrfs_leak_debug_del_eb(eb); __free_extent_buffer(eb); }
@@ -5873,8 +5892,7 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, eb->bflags = 0; init_rwsem(&eb->lock);
- btrfs_leak_debug_add(&fs_info->eb_leak_lock, &eb->leak_list, - &fs_info->allocated_ebs); + btrfs_leak_debug_add_eb(eb); INIT_LIST_HEAD(&eb->release_list);
spin_lock_init(&eb->refs_lock); @@ -6342,7 +6360,7 @@ static int release_extent_buffer(struct extent_buffer *eb) spin_unlock(&eb->refs_lock); }
- btrfs_leak_debug_del(&eb->fs_info->eb_leak_lock, &eb->leak_list); + btrfs_leak_debug_del_eb(eb); /* Should be safe to release our pages at this point */ btrfs_release_extent_buffer_pages(eb); #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
From: Qu Wenruo wqu@suse.com
[ Upstream commit d7f67ac9a928fa158a95573406eac0a887bbc28c ]
[BUG] When one user did a wrong attempt to clear block group tree, which can not be done through mount option, by using "-o clear_cache,space_cache=v2", it will cause the following error on a fs with block-group-tree feature:
BTRFS info (device dm-1): force clearing of disk cache BTRFS info (device dm-1): using free space tree BTRFS info (device dm-1): clearing free space tree BTRFS info (device dm-1): clearing compat-ro feature flag for FREE_SPACE_TREE (0x1) BTRFS info (device dm-1): clearing compat-ro feature flag for FREE_SPACE_TREE_VALID (0x2) BTRFS error (device dm-1): block-group-tree feature requires fres-space-tree and no-holes BTRFS error (device dm-1): super block corruption detected before writing it to disk BTRFS: error (device dm-1) in write_all_supers:4318: errno=-117 Filesystem corrupted (unexpected superblock corruption detected) BTRFS warning (device dm-1: state E): Skipping commit of aborted transaction.
[CAUSE] Although the dependency for block-group-tree feature is just an artificial one (to reduce test matrix), we put the dependency check into btrfs_validate_super().
This is too strict, and during space cache clearing, we will have a window where free space tree is cleared, and we need to commit the super block.
In that window, we had block group tree without v2 cache, and triggered the artificial dependency check.
This is not necessary at all, especially for such a soft dependency.
[FIX] Introduce a new helper, btrfs_check_features(), to do all the runtime limitation checks, including:
- Unsupported incompat flags check
- Unsupported compat RO flags check
- Setting missing incompat flags
- Artificial feature dependency checks Currently only block group tree will rely on this.
- Subpage runtime check for v1 cache
With this helper, we can move quite some checks from open_ctree()/btrfs_remount() into it, and just call it after btrfs_parse_options().
Now "-o clear_cache,space_cache=v2" will not trigger the above error anymore.
Signed-off-by: Qu Wenruo wqu@suse.com Reviewed-by: David Sterba dsterba@suse.com [ edit messages ] Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/disk-io.c | 172 ++++++++++++++++++++++++++++----------------- fs/btrfs/disk-io.h | 1 + fs/btrfs/super.c | 10 +-- 3 files changed, 113 insertions(+), 70 deletions(-)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d9881b54efd1..0161f294f025 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3292,6 +3292,112 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info) return ret; }
+/* + * Do various sanity and dependency checks of different features. + * + * This is the place for less strict checks (like for subpage or artificial + * feature dependencies). + * + * For strict checks or possible corruption detection, see + * btrfs_validate_super(). + * + * This should be called after btrfs_parse_options(), as some mount options + * (space cache related) can modify on-disk format like free space tree and + * screw up certain feature dependencies. + */ +int btrfs_check_features(struct btrfs_fs_info *fs_info, struct super_block *sb) +{ + struct btrfs_super_block *disk_super = fs_info->super_copy; + u64 incompat = btrfs_super_incompat_flags(disk_super); + const u64 compat_ro = btrfs_super_compat_ro_flags(disk_super); + const u64 compat_ro_unsupp = (compat_ro & ~BTRFS_FEATURE_COMPAT_RO_SUPP); + + if (incompat & ~BTRFS_FEATURE_INCOMPAT_SUPP) { + btrfs_err(fs_info, + "cannot mount because of unknown incompat features (0x%llx)", + incompat); + return -EINVAL; + } + + /* Runtime limitation for mixed block groups. */ + if ((incompat & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && + (fs_info->sectorsize != fs_info->nodesize)) { + btrfs_err(fs_info, +"unequal nodesize/sectorsize (%u != %u) are not allowed for mixed block groups", + fs_info->nodesize, fs_info->sectorsize); + return -EINVAL; + } + + /* Mixed backref is an always-enabled feature. */ + incompat |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; + + /* Set compression related flags just in case. */ + if (fs_info->compress_type == BTRFS_COMPRESS_LZO) + incompat |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; + else if (fs_info->compress_type == BTRFS_COMPRESS_ZSTD) + incompat |= BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD; + + /* + * An ancient flag, which should really be marked deprecated. + * Such runtime limitation doesn't really need a incompat flag. + */ + if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) + incompat |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA; + + if (compat_ro_unsupp && !sb_rdonly(sb)) { + btrfs_err(fs_info, + "cannot mount read-write because of unknown compat_ro features (0x%llx)", + compat_ro); + return -EINVAL; + } + + /* + * We have unsupported RO compat features, although RO mounted, we + * should not cause any metadata writes, including log replay. + * Or we could screw up whatever the new feature requires. + */ + if (compat_ro_unsupp && btrfs_super_log_root(disk_super) && + !btrfs_test_opt(fs_info, NOLOGREPLAY)) { + btrfs_err(fs_info, +"cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay", + compat_ro); + return -EINVAL; + } + + /* + * Artificial limitations for block group tree, to force + * block-group-tree to rely on no-holes and free-space-tree. + */ + if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE) && + (!btrfs_fs_incompat(fs_info, NO_HOLES) || + !btrfs_test_opt(fs_info, FREE_SPACE_TREE))) { + btrfs_err(fs_info, +"block-group-tree feature requires no-holes and free-space-tree features"); + return -EINVAL; + } + + /* + * Subpage runtime limitation on v1 cache. + * + * V1 space cache still has some hard codeed PAGE_SIZE usage, while + * we're already defaulting to v2 cache, no need to bother v1 as it's + * going to be deprecated anyway. + */ + if (fs_info->sectorsize < PAGE_SIZE && btrfs_test_opt(fs_info, SPACE_CACHE)) { + btrfs_warn(fs_info, + "v1 space cache is not supported for page size %lu with sectorsize %u", + PAGE_SIZE, fs_info->sectorsize); + return -EINVAL; + } + + /* This can be called by remount, we need to protect the super block. */ + spin_lock(&fs_info->super_lock); + btrfs_set_super_incompat_flags(disk_super, incompat); + spin_unlock(&fs_info->super_lock); + + return 0; +} + int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices, char *options) { @@ -3441,72 +3547,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device goto fail_alloc; }
- features = btrfs_super_incompat_flags(disk_super) & - ~BTRFS_FEATURE_INCOMPAT_SUPP; - if (features) { - btrfs_err(fs_info, - "cannot mount because of unsupported optional features (0x%llx)", - features); - err = -EINVAL; - goto fail_alloc; - } - - features = btrfs_super_incompat_flags(disk_super); - features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; - if (fs_info->compress_type == BTRFS_COMPRESS_LZO) - features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; - else if (fs_info->compress_type == BTRFS_COMPRESS_ZSTD) - features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD; - - /* - * Flag our filesystem as having big metadata blocks if they are bigger - * than the page size. - */ - if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) - features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA; - - /* - * mixed block groups end up with duplicate but slightly offset - * extent buffers for the same range. It leads to corruptions - */ - if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && - (sectorsize != nodesize)) { - btrfs_err(fs_info, -"unequal nodesize/sectorsize (%u != %u) are not allowed for mixed block groups", - nodesize, sectorsize); - goto fail_alloc; - } - - /* - * Needn't use the lock because there is no other task which will - * update the flag. - */ - btrfs_set_super_incompat_flags(disk_super, features); - - features = btrfs_super_compat_ro_flags(disk_super) & - ~BTRFS_FEATURE_COMPAT_RO_SUPP; - if (!sb_rdonly(sb) && features) { - btrfs_err(fs_info, - "cannot mount read-write because of unsupported optional features (0x%llx)", - features); - err = -EINVAL; - goto fail_alloc; - } - /* - * We have unsupported RO compat features, although RO mounted, we - * should not cause any metadata write, including log replay. - * Or we could screw up whatever the new feature requires. - */ - if (unlikely(features && btrfs_super_log_root(disk_super) && - !btrfs_test_opt(fs_info, NOLOGREPLAY))) { - btrfs_err(fs_info, -"cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay", - features); - err = -EINVAL; + ret = btrfs_check_features(fs_info, sb); + if (ret < 0) { + err = ret; goto fail_alloc; }
- if (sectorsize < PAGE_SIZE) { struct btrfs_subpage_info *subpage_info;
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index aef981de672c..5b615b371589 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -48,6 +48,7 @@ int __cold open_ctree(struct super_block *sb, void __cold close_ctree(struct btrfs_fs_info *fs_info); int btrfs_validate_super(struct btrfs_fs_info *fs_info, struct btrfs_super_block *sb, int mirror_num); +int btrfs_check_features(struct btrfs_fs_info *fs_info, struct super_block *sb); int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors); struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev); struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 692983e69ba1..b6f3f24ac95d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2012,14 +2012,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) if (ret) goto restore;
- /* V1 cache is not supported for subpage mount. */ - if (fs_info->sectorsize < PAGE_SIZE && btrfs_test_opt(fs_info, SPACE_CACHE)) { - btrfs_warn(fs_info, - "v1 space cache is not supported for page size %lu with sectorsize %u", - PAGE_SIZE, fs_info->sectorsize); - ret = -EINVAL; + ret = btrfs_check_features(fs_info, sb); + if (ret < 0) goto restore; - } + btrfs_remount_begin(fs_info, old_opts, *flags); btrfs_resize_thread_pool(fs_info, fs_info->thread_pool_size, old_thread_pool_size);
On Tue, Oct 11, 2022 at 10:50:09AM -0400, Sasha Levin wrote:
From: Qu Wenruo wqu@suse.com
[ Upstream commit d7f67ac9a928fa158a95573406eac0a887bbc28c ]
[BUG] When one user did a wrong attempt to clear block group tree, which can not be done through mount option, by using "-o clear_cache,space_cache=v2", it will cause the following error on a fs with block-group-tree feature:
BTRFS info (device dm-1): force clearing of disk cache BTRFS info (device dm-1): using free space tree BTRFS info (device dm-1): clearing free space tree BTRFS info (device dm-1): clearing compat-ro feature flag for FREE_SPACE_TREE (0x1) BTRFS info (device dm-1): clearing compat-ro feature flag for FREE_SPACE_TREE_VALID (0x2) BTRFS error (device dm-1): block-group-tree feature requires fres-space-tree and no-holes BTRFS error (device dm-1): super block corruption detected before writing it to disk BTRFS: error (device dm-1) in write_all_supers:4318: errno=-117 Filesystem corrupted (unexpected superblock corruption detected) BTRFS warning (device dm-1: state E): Skipping commit of aborted transaction.
[CAUSE] Although the dependency for block-group-tree feature is just an artificial one (to reduce test matrix), we put the dependency check into btrfs_validate_super().
This is too strict, and during space cache clearing, we will have a window where free space tree is cleared, and we need to commit the super block.
In that window, we had block group tree without v2 cache, and triggered the artificial dependency check.
This is not necessary at all, especially for such a soft dependency.
[FIX] Introduce a new helper, btrfs_check_features(), to do all the runtime limitation checks, including:
Unsupported incompat flags check
Unsupported compat RO flags check
Setting missing incompat flags
Artificial feature dependency checks Currently only block group tree will rely on this.
Subpage runtime check for v1 cache
With this helper, we can move quite some checks from open_ctree()/btrfs_remount() into it, and just call it after btrfs_parse_options().
Now "-o clear_cache,space_cache=v2" will not trigger the above error anymore.
Signed-off-by: Qu Wenruo wqu@suse.com Reviewed-by: David Sterba dsterba@suse.com [ edit messages ] Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org
Please drop this from the stable queue, it's for an unreleased feature (ETA 6.1).
From: Kunihiko Hayashi hayashi.kunihiko@socionext.com
[ Upstream commit 19fee1a1096d21ab1f1e712148b5417bda2939a2 ]
PXs3 reference board can change each USB port 0 and 1 to device mode with jumpers. Prepare devicetree sources for USB port 0 and 1.
This specifies dr_mode, pinctrl, and some quirks and removes nodes for unused phys and vbus-supply properties.
Signed-off-by: Kunihiko Hayashi hayashi.kunihiko@socionext.com Link: https://lore.kernel.org/r/20220913042321.4817-8-hayashi.kunihiko@socionext.c...' Signed-off-by: Arnd Bergmann arnd@arndb.de Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/boot/dts/uniphier-pinctrl.dtsi | 10 +++++ arch/arm64/boot/dts/socionext/Makefile | 4 +- .../socionext/uniphier-pxs3-ref-gadget0.dts | 41 +++++++++++++++++++ .../socionext/uniphier-pxs3-ref-gadget1.dts | 40 ++++++++++++++++++ 4 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/boot/dts/socionext/uniphier-pxs3-ref-gadget0.dts create mode 100644 arch/arm64/boot/dts/socionext/uniphier-pxs3-ref-gadget1.dts
diff --git a/arch/arm/boot/dts/uniphier-pinctrl.dtsi b/arch/arm/boot/dts/uniphier-pinctrl.dtsi index c0fd029b37e5..f909ec2e5333 100644 --- a/arch/arm/boot/dts/uniphier-pinctrl.dtsi +++ b/arch/arm/boot/dts/uniphier-pinctrl.dtsi @@ -196,11 +196,21 @@ pinctrl_usb0: usb0 { function = "usb0"; };
+ pinctrl_usb0_device: usb0-device { + groups = "usb0_device"; + function = "usb0"; + }; + pinctrl_usb1: usb1 { groups = "usb1"; function = "usb1"; };
+ pinctrl_usb1_device: usb1-device { + groups = "usb1_device"; + function = "usb1"; + }; + pinctrl_usb2: usb2 { groups = "usb2"; function = "usb2"; diff --git a/arch/arm64/boot/dts/socionext/Makefile b/arch/arm64/boot/dts/socionext/Makefile index dda3da33614b..33989a9643ac 100644 --- a/arch/arm64/boot/dts/socionext/Makefile +++ b/arch/arm64/boot/dts/socionext/Makefile @@ -5,4 +5,6 @@ dtb-$(CONFIG_ARCH_UNIPHIER) += \ uniphier-ld20-akebi96.dtb \ uniphier-ld20-global.dtb \ uniphier-ld20-ref.dtb \ - uniphier-pxs3-ref.dtb + uniphier-pxs3-ref.dtb \ + uniphier-pxs3-ref-gadget0.dtb \ + uniphier-pxs3-ref-gadget1.dtb diff --git a/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref-gadget0.dts b/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref-gadget0.dts new file mode 100644 index 000000000000..7069f51bc120 --- /dev/null +++ b/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref-gadget0.dts @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0-or-later OR MIT +// +// Device Tree Source for UniPhier PXs3 Reference Board (for USB-Device #0) +// +// Copyright (C) 2021 Socionext Inc. +// Author: Kunihiko Hayashi hayashi.kunihiko@socionext.com + +/dts-v1/; +#include "uniphier-pxs3-ref.dts" + +/ { + model = "UniPhier PXs3 Reference Board (USB-Device #0)"; +}; + +/* I2C3 pinctrl is shared with USB*VBUSIN */ +&i2c3 { + status = "disabled"; +}; + +&usb0 { + status = "okay"; + dr_mode = "peripheral"; + pinctrl-0 = <&pinctrl_usb0_device>; + snps,dis_enblslpm_quirk; + snps,dis_u2_susphy_quirk; + snps,dis_u3_susphy_quirk; + snps,usb2_gadget_lpm_disable; + phy-names = "usb2-phy", "usb3-phy"; + phys = <&usb0_hsphy0>, <&usb0_ssphy0>; +}; + +&usb0_hsphy0 { + /delete-property/ vbus-supply; +}; + +&usb0_ssphy0 { + /delete-property/ vbus-supply; +}; + +/delete-node/ &usb0_hsphy1; +/delete-node/ &usb0_ssphy1; diff --git a/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref-gadget1.dts b/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref-gadget1.dts new file mode 100644 index 000000000000..a3cfa8113ffb --- /dev/null +++ b/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref-gadget1.dts @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0-or-later OR MIT +// +// Device Tree Source for UniPhier PXs3 Reference Board (for USB-Device #1) +// +// Copyright (C) 2021 Socionext Inc. +// Author: Kunihiko Hayashi hayashi.kunihiko@socionext.com + +/dts-v1/; +#include "uniphier-pxs3-ref.dts" + +/ { + model = "UniPhier PXs3 Reference Board (USB-Device #1)"; +}; + +/* I2C3 pinctrl is shared with USB*VBUSIN */ +&i2c3 { + status = "disabled"; +}; + +&usb1 { + status = "okay"; + dr_mode = "peripheral"; + pinctrl-0 = <&pinctrl_usb1_device>; + snps,dis_enblslpm_quirk; + snps,dis_u2_susphy_quirk; + snps,dis_u3_susphy_quirk; + snps,usb2_gadget_lpm_disable; + phy-names = "usb2-phy", "usb3-phy"; + phys = <&usb1_hsphy0>, <&usb1_ssphy0>; +}; + +&usb1_hsphy0 { + /delete-property/ vbus-supply; +}; + +&usb1_ssphy0 { + /delete-property/ vbus-supply; +}; + +/delete-node/ &usb1_hsphy1;
From: Li Huafei lihuafei1@huawei.com
[ Upstream commit 5854e4d8530e6ed4c2532a71a6b0474e199d44dd ]
When using the frame pointer unwinder, it was found that the stack trace output of stack_trace_save() is incomplete if the stack contains call_with_stack():
[0x7f00002c] dump_stack_task+0x2c/0x90 [hrtimer] [0x7f0000a0] hrtimer_hander+0x10/0x18 [hrtimer] [0x801a67f0] __hrtimer_run_queues+0x1b0/0x3b4 [0x801a7350] hrtimer_run_queues+0xc4/0xd8 [0x801a597c] update_process_times+0x3c/0x88 [0x801b5a98] tick_periodic+0x50/0xd8 [0x801b5bf4] tick_handle_periodic+0x24/0x84 [0x8010ffc4] twd_handler+0x38/0x48 [0x8017d220] handle_percpu_devid_irq+0xa8/0x244 [0x80176e9c] generic_handle_domain_irq+0x2c/0x3c [0x8052e3a8] gic_handle_irq+0x7c/0x90 [0x808ab15c] generic_handle_arch_irq+0x60/0x80 [0x8051191c] call_with_stack+0x1c/0x20
For the frame pointer unwinder, unwind_frame() checks stackframe::fp by stackframe::sp. Since call_with_stack() switches the SP from one stack to another, stackframe::fp and stackframe: :sp will point to different stacks, so we can no longer check stackframe::fp by stackframe::sp. Skip checking stackframe::fp at this point to avoid this problem.
Signed-off-by: Li Huafei lihuafei1@huawei.com Reviewed-by: Linus Waleij linus.walleij@linaro.org Signed-off-by: Russell King (Oracle) rmk+kernel@armlinux.org.uk Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/kernel/stacktrace.c | 40 ++++++++++++++++++++++++++++------ arch/arm/lib/call_with_stack.S | 2 ++ 2 files changed, 35 insertions(+), 7 deletions(-)
diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c index d0fa2037460a..af87040b0353 100644 --- a/arch/arm/kernel/stacktrace.c +++ b/arch/arm/kernel/stacktrace.c @@ -9,6 +9,8 @@ #include <asm/stacktrace.h> #include <asm/traps.h>
+#include "reboot.h" + #if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) /* * Unwind the current stack frame and store the new register values in the @@ -39,29 +41,53 @@ * Note that with framepointer enabled, even the leaf functions have the same * prologue and epilogue, therefore we can ignore the LR value in this case. */ -int notrace unwind_frame(struct stackframe *frame) + +extern unsigned long call_with_stack_end; + +static int frame_pointer_check(struct stackframe *frame) { unsigned long high, low; unsigned long fp = frame->fp; + unsigned long pc = frame->pc; + + /* + * call_with_stack() is the only place we allow SP to jump from one + * stack to another, with FP and SP pointing to different stacks, + * skipping the FP boundary check at this point. + */ + if (pc >= (unsigned long)&call_with_stack && + pc < (unsigned long)&call_with_stack_end) + return 0;
/* only go to a higher address on the stack */ low = frame->sp; high = ALIGN(low, THREAD_SIZE);
-#ifdef CONFIG_CC_IS_CLANG /* check current frame pointer is within bounds */ +#ifdef CONFIG_CC_IS_CLANG if (fp < low + 4 || fp > high - 4) return -EINVAL; - - frame->sp = frame->fp; - frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); - frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 4)); #else - /* check current frame pointer is within bounds */ if (fp < low + 12 || fp > high - 4) return -EINVAL; +#endif + + return 0; +} + +int notrace unwind_frame(struct stackframe *frame) +{ + unsigned long fp = frame->fp; + + if (frame_pointer_check(frame)) + return -EINVAL;
/* restore the registers from the stack frame */ +#ifdef CONFIG_CC_IS_CLANG + frame->sp = frame->fp; + frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); + frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 4)); +#else frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp - 12)); frame->sp = READ_ONCE_NOCHECK(*(unsigned long *)(fp - 8)); frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp - 4)); diff --git a/arch/arm/lib/call_with_stack.S b/arch/arm/lib/call_with_stack.S index 0a268a6c513c..5030d4e8d126 100644 --- a/arch/arm/lib/call_with_stack.S +++ b/arch/arm/lib/call_with_stack.S @@ -46,4 +46,6 @@ UNWIND( .setfp fpreg, sp ) pop {fpreg, pc} UNWIND( .fnend ) #endif + .globl call_with_stack_end +call_with_stack_end: ENDPROC(call_with_stack)
From: Li Huafei lihuafei1@huawei.com
[ Upstream commit 752ec621ef5c30777958cc5eb5f1cf394f7733f4 ]
Because an exception stack frame is not created in the exception entry, save_trace() does special handling for the exception PC, but this is only needed when CONFIG_FRAME_POINTER_UNWIND=y. When CONFIG_ARM_UNWIND=y, unwind annotations have been added to the exception entry and save_trace() will repeatedly save the exception PC:
[0x7f000090] hrtimer_hander+0x8/0x10 [hrtimer] [0x8019ec50] __hrtimer_run_queues+0x18c/0x394 [0x8019f760] hrtimer_run_queues+0xbc/0xd0 [0x8019def0] update_process_times+0x34/0x80 [0x801ad2a4] tick_periodic+0x48/0xd0 [0x801ad3dc] tick_handle_periodic+0x1c/0x7c [0x8010f2e0] twd_handler+0x30/0x40 [0x80177620] handle_percpu_devid_irq+0xa0/0x23c [0x801718d0] generic_handle_domain_irq+0x24/0x34 [0x80502d28] gic_handle_irq+0x74/0x88 [0x8085817c] generic_handle_arch_irq+0x58/0x78 [0x80100ba8] __irq_svc+0x88/0xc8 [0x80108114] arch_cpu_idle+0x38/0x3c [0x80108114] arch_cpu_idle+0x38/0x3c <==== duplicate saved exception PC [0x80861bf8] default_idle_call+0x38/0x130 [0x8015d5cc] do_idle+0x150/0x214 [0x8015d978] cpu_startup_entry+0x18/0x1c [0x808589c0] rest_init+0xd8/0xdc [0x80c00a44] arch_post_acpi_subsys_init+0x0/0x8
We can move the special handling of the exception PC in save_trace() to the unwind_frame() of the frame pointer unwinder.
Signed-off-by: Li Huafei lihuafei1@huawei.com Reviewed-by: Linus Waleij linus.walleij@linaro.org Signed-off-by: Russell King (Oracle) rmk+kernel@armlinux.org.uk Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/include/asm/stacktrace.h | 6 +++++ arch/arm/kernel/return_address.c | 1 + arch/arm/kernel/stacktrace.c | 44 +++++++++++++++++++++---------- 3 files changed, 37 insertions(+), 14 deletions(-)
diff --git a/arch/arm/include/asm/stacktrace.h b/arch/arm/include/asm/stacktrace.h index 3e78f921b8b2..39be2d1aa27b 100644 --- a/arch/arm/include/asm/stacktrace.h +++ b/arch/arm/include/asm/stacktrace.h @@ -21,6 +21,9 @@ struct stackframe { struct llist_node *kr_cur; struct task_struct *tsk; #endif +#ifdef CONFIG_UNWINDER_FRAME_POINTER + bool ex_frame; +#endif };
static __always_inline @@ -34,6 +37,9 @@ void arm_get_current_stackframe(struct pt_regs *regs, struct stackframe *frame) frame->kr_cur = NULL; frame->tsk = current; #endif +#ifdef CONFIG_UNWINDER_FRAME_POINTER + frame->ex_frame = in_entry_text(frame->pc); +#endif }
extern int unwind_frame(struct stackframe *frame); diff --git a/arch/arm/kernel/return_address.c b/arch/arm/kernel/return_address.c index 8aac1e10b117..38f1ea9c724d 100644 --- a/arch/arm/kernel/return_address.c +++ b/arch/arm/kernel/return_address.c @@ -47,6 +47,7 @@ void *return_address(unsigned int level) frame.kr_cur = NULL; frame.tsk = current; #endif + frame.ex_frame = false;
walk_stackframe(&frame, save_return_addr, &data);
diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c index af87040b0353..85443b5d1922 100644 --- a/arch/arm/kernel/stacktrace.c +++ b/arch/arm/kernel/stacktrace.c @@ -82,6 +82,27 @@ int notrace unwind_frame(struct stackframe *frame) if (frame_pointer_check(frame)) return -EINVAL;
+ /* + * When we unwind through an exception stack, include the saved PC + * value into the stack trace. + */ + if (frame->ex_frame) { + struct pt_regs *regs = (struct pt_regs *)frame->sp; + + /* + * We check that 'regs + sizeof(struct pt_regs)' (that is, + * ®s[1]) does not exceed the bottom of the stack to avoid + * accessing data outside the task's stack. This may happen + * when frame->ex_frame is a false positive. + */ + if ((unsigned long)®s[1] > ALIGN(frame->sp, THREAD_SIZE)) + return -EINVAL; + + frame->pc = regs->ARM_pc; + frame->ex_frame = false; + return 0; + } + /* restore the registers from the stack frame */ #ifdef CONFIG_CC_IS_CLANG frame->sp = frame->fp; @@ -98,6 +119,9 @@ int notrace unwind_frame(struct stackframe *frame) (void *)frame->fp, &frame->kr_cur); #endif
+ if (in_entry_text(frame->pc)) + frame->ex_frame = true; + return 0; } #endif @@ -128,7 +152,6 @@ static int save_trace(struct stackframe *frame, void *d) { struct stack_trace_data *data = d; struct stack_trace *trace = data->trace; - struct pt_regs *regs; unsigned long addr = frame->pc;
if (data->no_sched_functions && in_sched_functions(addr)) @@ -139,19 +162,6 @@ static int save_trace(struct stackframe *frame, void *d) }
trace->entries[trace->nr_entries++] = addr; - - if (trace->nr_entries >= trace->max_entries) - return 1; - - if (!in_entry_text(frame->pc)) - return 0; - - regs = (struct pt_regs *)frame->sp; - if ((unsigned long)®s[1] > ALIGN(frame->sp, THREAD_SIZE)) - return 0; - - trace->entries[trace->nr_entries++] = regs->ARM_pc; - return trace->nr_entries >= trace->max_entries; }
@@ -193,6 +203,9 @@ static noinline void __save_stack_trace(struct task_struct *tsk, frame.kr_cur = NULL; frame.tsk = tsk; #endif +#ifdef CONFIG_UNWINDER_FRAME_POINTER + frame.ex_frame = false; +#endif
walk_stackframe(&frame, save_trace, &data); } @@ -214,6 +227,9 @@ void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) frame.kr_cur = NULL; frame.tsk = current; #endif +#ifdef CONFIG_UNWINDER_FRAME_POINTER + frame.ex_frame = in_entry_text(frame.pc); +#endif
walk_stackframe(&frame, save_trace, &data); }
From: Alex Sverdlin alexander.sverdlin@nokia.com
[ Upstream commit 823f606ab6b4759a1faf0388abcf4fb0776710d2 ]
In case CONFIG_KASAN_VMALLOC=y kasan_populate_vmalloc() allocates the shadow pages dynamically. But even worse is that kasan_release_vmalloc() releases them, which is not compatible with create_mapping() of MODULES_VADDR..MODULES_END range:
BUG: Bad page state in process kworker/9:1 pfn:2068b page:e5e06160 refcount:0 mapcount:0 mapping:00000000 index:0x0 flags: 0x1000(reserved) raw: 00001000 e5e06164 e5e06164 00000000 00000000 00000000 ffffffff 00000000 page dumped because: PAGE_FLAGS_CHECK_AT_FREE flag(s) set bad because of flags: 0x1000(reserved) Modules linked in: ip_tables CPU: 9 PID: 154 Comm: kworker/9:1 Not tainted 5.4.188-... #1 Hardware name: LSI Axxia AXM55XX Workqueue: events do_free_init unwind_backtrace show_stack dump_stack bad_page free_pcp_prepare free_unref_page kasan_depopulate_vmalloc_pte __apply_to_page_range apply_to_existing_page_range kasan_release_vmalloc __purge_vmap_area_lazy _vm_unmap_aliases.part.0 __vunmap do_free_init process_one_work worker_thread kthread
Reviewed-by: Linus Walleij linus.walleij@linaro.org Signed-off-by: Alexander Sverdlin alexander.sverdlin@nokia.com Signed-off-by: Russell King (Oracle) rmk+kernel@armlinux.org.uk Signed-off-by: Sasha Levin sashal@kernel.org --- arch/arm/mm/kasan_init.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/arch/arm/mm/kasan_init.c b/arch/arm/mm/kasan_init.c index 29caee9c79ce..46d9f4a622cb 100644 --- a/arch/arm/mm/kasan_init.c +++ b/arch/arm/mm/kasan_init.c @@ -268,12 +268,17 @@ void __init kasan_init(void)
/* * 1. The module global variables are in MODULES_VADDR ~ MODULES_END, - * so we need to map this area. + * so we need to map this area if CONFIG_KASAN_VMALLOC=n. With + * VMALLOC support KASAN will manage this region dynamically, + * refer to kasan_populate_vmalloc() and ARM's implementation of + * module_alloc(). * 2. PKMAP_BASE ~ PKMAP_BASE+PMD_SIZE's shadow and MODULES_VADDR * ~ MODULES_END's shadow is in the same PMD_SIZE, so we can't * use kasan_populate_zero_shadow. */ - create_mapping((void *)MODULES_VADDR, (void *)(PKMAP_BASE + PMD_SIZE)); + if (!IS_ENABLED(CONFIG_KASAN_VMALLOC) && IS_ENABLED(CONFIG_MODULES)) + create_mapping((void *)MODULES_VADDR, (void *)(MODULES_END)); + create_mapping((void *)PKMAP_BASE, (void *)(PKMAP_BASE + PMD_SIZE));
/* * KAsan may reuse the contents of kasan_early_shadow_pte directly, so
From: Zhao Gongyi zhaogongyi@huawei.com
[ Upstream commit 972cf4ce51ef5532d56822af17defb148aac0ccb ]
Some cpus will be left in offline state when online function exits in some error conditions. Use return instead of exit to fix it.
Signed-off-by: Zhao Gongyi zhaogongyi@huawei.com Signed-off-by: Shuah Khan skhan@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- .../selftests/cpu-hotplug/cpu-on-off-test.sh | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh index 0d26b5e3f966..940b68c940bb 100755 --- a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh +++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh @@ -4,6 +4,7 @@ SYSFS= # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 +retval=0
prerequisite() { @@ -102,10 +103,10 @@ online_cpu_expect_success()
if ! online_cpu $cpu; then echo $FUNCNAME $cpu: unexpected fail >&2 - exit 1 + retval=1 elif ! cpu_is_online $cpu; then echo $FUNCNAME $cpu: unexpected offline >&2 - exit 1 + retval=1 fi }
@@ -128,10 +129,10 @@ offline_cpu_expect_success()
if ! offline_cpu $cpu; then echo $FUNCNAME $cpu: unexpected fail >&2 - exit 1 + retval=1 elif ! cpu_is_offline $cpu; then echo $FUNCNAME $cpu: unexpected offline >&2 - exit 1 + retval=1 fi }
@@ -201,7 +202,7 @@ if [ $allcpus -eq 0 ]; then offline_cpu_expect_success $present_max online_cpu $present_max fi - exit 0 + exit $retval else echo "Full scope test: all hotplug cpus" echo -e "\t online all offline cpus" @@ -291,3 +292,5 @@ done
echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error /sbin/modprobe -q -r cpu-notifier-error-inject + +exit $retval
linux-stable-mirror@lists.linaro.org