From: Srinivas Pandruvada srinivas.pandruvada@linux.intel.com
[ Upstream commit 17da2d5f93692086dd096a975225ffd5622d0bf8 ]
As reported:
[ 256.104522] ====================================================== [ 256.113783] WARNING: possible circular locking dependency detected [ 256.120093] 5.16.0-rc6-yocto-standard+ #99 Not tainted [ 256.125362] ------------------------------------------------------ [ 256.131673] intel-speed-sel/844 is trying to acquire lock: [ 256.137290] ffffffffc036f0d0 (punit_misc_dev_lock){+.+.}-{3:3}, at: isst_if_open+0x18/0x90 [isst_if_common] [ 256.147171] [ 256.147171] but task is already holding lock: [ 256.153135] ffffffff8ee7cb50 (misc_mtx){+.+.}-{3:3}, at: misc_open+0x2a/0x170 [ 256.160407] [ 256.160407] which lock already depends on the new lock. [ 256.160407] [ 256.168712] [ 256.168712] the existing dependency chain (in reverse order) is: [ 256.176327] [ 256.176327] -> #1 (misc_mtx){+.+.}-{3:3}: [ 256.181946] lock_acquire+0x1e6/0x330 [ 256.186265] __mutex_lock+0x9b/0x9b0 [ 256.190497] mutex_lock_nested+0x1b/0x20 [ 256.195075] misc_register+0x32/0x1a0 [ 256.199390] isst_if_cdev_register+0x65/0x180 [isst_if_common] [ 256.205878] isst_if_probe+0x144/0x16e [isst_if_mmio] ... [ 256.241976] [ 256.241976] -> #0 (punit_misc_dev_lock){+.+.}-{3:3}: [ 256.248552] validate_chain+0xbc6/0x1750 [ 256.253131] __lock_acquire+0x88c/0xc10 [ 256.257618] lock_acquire+0x1e6/0x330 [ 256.261933] __mutex_lock+0x9b/0x9b0 [ 256.266165] mutex_lock_nested+0x1b/0x20 [ 256.270739] isst_if_open+0x18/0x90 [isst_if_common] [ 256.276356] misc_open+0x100/0x170 [ 256.280409] chrdev_open+0xa5/0x1e0 ...
The call sequence suggested that misc_device /dev file can be opened before misc device is yet to be registered, which is done only once.
Here punit_misc_dev_lock was used as common lock, to protect the registration by multiple ISST HW drivers, one time setup, prevent duplicate registry of misc device and prevent load/unload when device is open.
We can split into locks: - One which just prevent duplicate call to misc_register() and one time setup. Also never call again if the misc_register() failed or required one time setup is failed. This lock is not shared with any misc device callbacks.
- The other lock protects registry, load and unload of HW drivers.
Sequence in isst_if_cdev_register() - Register callbacks under punit_misc_dev_open_lock - Call isst_misc_reg() which registers misc_device on the first registry which is under punit_misc_dev_reg_lock, which is not shared with callbacks.
Sequence in isst_if_cdev_unregister Just opposite of isst_if_cdev_register
Reported-and-tested-by: Liwei Song liwei.song@windriver.com Signed-off-by: Srinivas Pandruvada srinivas.pandruvada@linux.intel.com Link: https://lore.kernel.org/r/20220112022521.54669-1-srinivas.pandruvada@linux.i... Reviewed-by: Hans de Goede hdegoede@redhat.com Signed-off-by: Hans de Goede hdegoede@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- .../intel_speed_select_if/isst_if_common.c | 97 ++++++++++++------- 1 file changed, 63 insertions(+), 34 deletions(-)
diff --git a/drivers/platform/x86/intel_speed_select_if/isst_if_common.c b/drivers/platform/x86/intel_speed_select_if/isst_if_common.c index 3de5a3c66529d..cf7b6dee82191 100644 --- a/drivers/platform/x86/intel_speed_select_if/isst_if_common.c +++ b/drivers/platform/x86/intel_speed_select_if/isst_if_common.c @@ -529,7 +529,10 @@ static long isst_if_def_ioctl(struct file *file, unsigned int cmd, return ret; }
-static DEFINE_MUTEX(punit_misc_dev_lock); +/* Lock to prevent module registration when already opened by user space */ +static DEFINE_MUTEX(punit_misc_dev_open_lock); +/* Lock to allow one share misc device for all ISST interace */ +static DEFINE_MUTEX(punit_misc_dev_reg_lock); static int misc_usage_count; static int misc_device_ret; static int misc_device_open; @@ -539,7 +542,7 @@ static int isst_if_open(struct inode *inode, struct file *file) int i, ret = 0;
/* Fail open, if a module is going away */ - mutex_lock(&punit_misc_dev_lock); + mutex_lock(&punit_misc_dev_open_lock); for (i = 0; i < ISST_IF_DEV_MAX; ++i) { struct isst_if_cmd_cb *cb = &punit_callbacks[i];
@@ -561,7 +564,7 @@ static int isst_if_open(struct inode *inode, struct file *file) } else { misc_device_open++; } - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock);
return ret; } @@ -570,7 +573,7 @@ static int isst_if_relase(struct inode *inode, struct file *f) { int i;
- mutex_lock(&punit_misc_dev_lock); + mutex_lock(&punit_misc_dev_open_lock); misc_device_open--; for (i = 0; i < ISST_IF_DEV_MAX; ++i) { struct isst_if_cmd_cb *cb = &punit_callbacks[i]; @@ -578,7 +581,7 @@ static int isst_if_relase(struct inode *inode, struct file *f) if (cb->registered) module_put(cb->owner); } - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock);
return 0; } @@ -595,6 +598,43 @@ static struct miscdevice isst_if_char_driver = { .fops = &isst_if_char_driver_ops, };
+static int isst_misc_reg(void) +{ + mutex_lock(&punit_misc_dev_reg_lock); + if (misc_device_ret) + goto unlock_exit; + + if (!misc_usage_count) { + misc_device_ret = isst_if_cpu_info_init(); + if (misc_device_ret) + goto unlock_exit; + + misc_device_ret = misc_register(&isst_if_char_driver); + if (misc_device_ret) { + isst_if_cpu_info_exit(); + goto unlock_exit; + } + } + misc_usage_count++; + +unlock_exit: + mutex_unlock(&punit_misc_dev_reg_lock); + + return misc_device_ret; +} + +static void isst_misc_unreg(void) +{ + mutex_lock(&punit_misc_dev_reg_lock); + if (misc_usage_count) + misc_usage_count--; + if (!misc_usage_count && !misc_device_ret) { + misc_deregister(&isst_if_char_driver); + isst_if_cpu_info_exit(); + } + mutex_unlock(&punit_misc_dev_reg_lock); +} + /** * isst_if_cdev_register() - Register callback for IOCTL * @device_type: The device type this callback handling. @@ -612,38 +652,31 @@ static struct miscdevice isst_if_char_driver = { */ int isst_if_cdev_register(int device_type, struct isst_if_cmd_cb *cb) { - if (misc_device_ret) - return misc_device_ret; + int ret;
if (device_type >= ISST_IF_DEV_MAX) return -EINVAL;
- mutex_lock(&punit_misc_dev_lock); + mutex_lock(&punit_misc_dev_open_lock); + /* Device is already open, we don't want to add new callbacks */ if (misc_device_open) { - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock); return -EAGAIN; } - if (!misc_usage_count) { - int ret; - - misc_device_ret = misc_register(&isst_if_char_driver); - if (misc_device_ret) - goto unlock_exit; - - ret = isst_if_cpu_info_init(); - if (ret) { - misc_deregister(&isst_if_char_driver); - misc_device_ret = ret; - goto unlock_exit; - } - } memcpy(&punit_callbacks[device_type], cb, sizeof(*cb)); punit_callbacks[device_type].registered = 1; - misc_usage_count++; -unlock_exit: - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock);
- return misc_device_ret; + ret = isst_misc_reg(); + if (ret) { + /* + * No need of mutex as the misc device register failed + * as no one can open device yet. Hence no contention. + */ + punit_callbacks[device_type].registered = 0; + return ret; + } + return 0; } EXPORT_SYMBOL_GPL(isst_if_cdev_register);
@@ -658,16 +691,12 @@ EXPORT_SYMBOL_GPL(isst_if_cdev_register); */ void isst_if_cdev_unregister(int device_type) { - mutex_lock(&punit_misc_dev_lock); - misc_usage_count--; + isst_misc_unreg(); + mutex_lock(&punit_misc_dev_open_lock); punit_callbacks[device_type].registered = 0; if (device_type == ISST_IF_DEV_MBOX) isst_delete_hash(); - if (!misc_usage_count && !misc_device_ret) { - misc_deregister(&isst_if_char_driver); - isst_if_cpu_info_exit(); - } - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock); } EXPORT_SYMBOL_GPL(isst_if_cdev_unregister);
From: Nícolas F. R. A. Prado nfraprado@collabora.com
[ Upstream commit f034cc1301e7d83d4ec428dd6b8ffb57ca446efb ]
The timeout setting for the rtc kselftest is currently 90 seconds. This setting is used by the kselftest runner to stop running a test if it takes longer than the assigned value.
However, two of the test cases inside rtc set alarms. These alarms are set to the next beginning of the minute, so each of these test cases may take up to, in the worst case, 60 seconds.
In order to allow for all test cases in rtc to run, even in the worst case, when using the kselftest runner, the timeout value should be increased to at least 120. Set it to 180, so there's some additional slack.
Correct operation can be tested by running the following command right after the start of a minute (low second count), and checking that all test cases run:
./run_kselftest.sh -c rtc
Signed-off-by: Nícolas F. R. A. Prado nfraprado@collabora.com Acked-by: Alexandre Belloni alexandre.belloni@bootlin.com Signed-off-by: Shuah Khan skhan@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/rtc/settings | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/testing/selftests/rtc/settings b/tools/testing/selftests/rtc/settings index ba4d85f74cd6b..a953c96aa16e1 100644 --- a/tools/testing/selftests/rtc/settings +++ b/tools/testing/selftests/rtc/settings @@ -1 +1 @@ -timeout=90 +timeout=180
From: Miquel Raynal miquel.raynal@bootlin.com
[ Upstream commit e5ce576d45bf72fd0e3dc37eff897bfcc488f6a9 ]
Upon error the ieee802154_xmit_complete() helper is not called. Only ieee802154_wake_queue() is called manually. In the Tx case we then leak the skb structure.
Free the skb structure upon error before returning when appropriate.
As the 'is_tx = 0' cannot be moved in the complete handler because of a possible race between the delay in switching to STATE_RX_AACK_ON and a new interrupt, we introduce an intermediate 'was_tx' boolean just for this purpose.
There is no Fixes tag applying here, many changes have been made on this area and the issue kind of always existed.
Suggested-by: Alexander Aring alex.aring@gmail.com Signed-off-by: Miquel Raynal miquel.raynal@bootlin.com Acked-by: Alexander Aring aahringo@redhat.com Link: https://lore.kernel.org/r/20220125121426.848337-4-miquel.raynal@bootlin.com Signed-off-by: Stefan Schmidt stefan@datenfreihafen.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/net/ieee802154/at86rf230.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index 7d67f41387f55..4f5ef8a9a9a87 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -100,6 +100,7 @@ struct at86rf230_local { unsigned long cal_timeout; bool is_tx; bool is_tx_from_off; + bool was_tx; u8 tx_retry; struct sk_buff *tx_skb; struct at86rf230_state_change tx; @@ -343,7 +344,11 @@ at86rf230_async_error_recover_complete(void *context) if (ctx->free) kfree(ctx);
- ieee802154_wake_queue(lp->hw); + if (lp->was_tx) { + lp->was_tx = 0; + dev_kfree_skb_any(lp->tx_skb); + ieee802154_wake_queue(lp->hw); + } }
static void @@ -352,7 +357,11 @@ at86rf230_async_error_recover(void *context) struct at86rf230_state_change *ctx = context; struct at86rf230_local *lp = ctx->lp;
- lp->is_tx = 0; + if (lp->is_tx) { + lp->was_tx = 1; + lp->is_tx = 0; + } + at86rf230_async_state_change(lp, ctx, STATE_RX_AACK_ON, at86rf230_async_error_recover_complete); }
From: Yang Xu xuyang2018.jy@fujitsu.com
[ Upstream commit fc4eb486a59d70bd35cf1209f0e68c2d8b979193 ]
Since commit 43209ea2d17a ("zram: remove max_comp_streams internals"), zram has switched to per-cpu streams. Even kernel still keep this interface for some reasons, but writing to max_comp_stream doesn't take any effect. So skip it on newer kernel ie 4.7.
The code that comparing kernel version is from xfstests testsuite ext4/053.
Signed-off-by: Yang Xu xuyang2018.jy@fujitsu.com Signed-off-by: Shuah Khan skhan@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/zram/zram_lib.sh | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+)
diff --git a/tools/testing/selftests/zram/zram_lib.sh b/tools/testing/selftests/zram/zram_lib.sh index 6f872f266fd11..f47fc0f27e99e 100755 --- a/tools/testing/selftests/zram/zram_lib.sh +++ b/tools/testing/selftests/zram/zram_lib.sh @@ -11,6 +11,9 @@ dev_mounted=-1
# Kselftest framework requirement - SKIP code is 4. ksft_skip=4 +kernel_version=`uname -r | cut -d'.' -f1,2` +kernel_major=${kernel_version%.*} +kernel_minor=${kernel_version#*.}
trap INT
@@ -25,6 +28,20 @@ check_prereqs() fi }
+kernel_gte() +{ + major=${1%.*} + minor=${1#*.} + + if [ $kernel_major -gt $major ]; then + return 0 + elif [[ $kernel_major -eq $major && $kernel_minor -ge $minor ]]; then + return 0 + fi + + return 1 +} + zram_cleanup() { echo "zram cleanup" @@ -86,6 +103,13 @@ zram_max_streams() { echo "set max_comp_streams to zram device(s)"
+ kernel_gte 4.7 + if [ $? -eq 0 ]; then + echo "The device attribute max_comp_streams was"\ + "deprecated in 4.7" + return 0 + fi + local i=0 for max_s in $zram_max_streams; do local sys_path="/sys/block/zram${i}/max_comp_streams"
From: Yang Xu xuyang2018.jy@fujitsu.com
[ Upstream commit d18da7ec3719559d6e74937266d0416e6c7e0b31 ]
zram01 uses `free -m` to measure zram memory usage. The results are no sense because they are polluted by all running processes on the system.
We Should only calculate the free memory delta for the current process. So use the third field of /sys/block/zram<id>/mm_stat to measure memory usage instead. The file is available since kernel 4.1.
orig_data_size(first): uncompressed size of data stored in this disk. compr_data_size(second): compressed size of data stored in this disk mem_used_total(third): the amount of memory allocated for this disk
Also remove useless zram cleanup call in zram_fill_fs and so we don't need to cleanup zram twice if fails.
Signed-off-by: Yang Xu xuyang2018.jy@fujitsu.com Signed-off-by: Shuah Khan skhan@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/zram/zram01.sh | 30 +++++++------------------- 1 file changed, 8 insertions(+), 22 deletions(-)
diff --git a/tools/testing/selftests/zram/zram01.sh b/tools/testing/selftests/zram/zram01.sh index 114863d9fb876..e9e9eb777e2c7 100755 --- a/tools/testing/selftests/zram/zram01.sh +++ b/tools/testing/selftests/zram/zram01.sh @@ -33,8 +33,6 @@ zram_algs="lzo"
zram_fill_fs() { - local mem_free0=$(free -m | awk 'NR==2 {print $4}') - for i in $(seq 0 $(($dev_num - 1))); do echo "fill zram$i..." local b=0 @@ -45,29 +43,17 @@ zram_fill_fs() b=$(($b + 1)) done echo "zram$i can be filled with '$b' KB" - done
- local mem_free1=$(free -m | awk 'NR==2 {print $4}') - local used_mem=$(($mem_free0 - $mem_free1)) + local mem_used_total=`awk '{print $3}' "/sys/block/zram$i/mm_stat"` + local v=$((100 * 1024 * $b / $mem_used_total)) + if [ "$v" -lt 100 ]; then + echo "FAIL compression ratio: 0.$v:1" + ERR_CODE=-1 + return + fi
- local total_size=0 - for sm in $zram_sizes; do - local s=$(echo $sm | sed 's/M//') - total_size=$(($total_size + $s)) + echo "zram compression ratio: $(echo "scale=2; $v / 100 " | bc):1: OK" done - - echo "zram used ${used_mem}M, zram disk sizes ${total_size}M" - - local v=$((100 * $total_size / $used_mem)) - - if [ "$v" -lt 100 ]; then - echo "FAIL compression ratio: 0.$v:1" - ERR_CODE=-1 - zram_cleanup - return - fi - - echo "zram compression ratio: $(echo "scale=2; $v / 100 " | bc):1: OK" }
check_prereqs
From: Yang Xu xuyang2018.jy@fujitsu.com
[ Upstream commit 01dabed20573804750af5c7bf8d1598a6bf7bf6e ]
If zram-generator package is installed and works, then we can not remove zram module because zram swap is being used. This case needs a clean zram environment, change this test by using hot_add/hot_remove interface. So even zram device is being used, we still can add zram device and remove them in cleanup.
The two interface was introduced since kernel commit 6566d1a32bf7("zram: add dynamic device add/remove functionality") in v4.2-rc1. If kernel supports these two interface, we use hot_add/hot_remove to slove this problem, if not, just check whether zram is being used or built in, then skip it on old kernel.
Signed-off-by: Yang Xu xuyang2018.jy@fujitsu.com Signed-off-by: Shuah Khan skhan@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/zram/zram.sh | 15 +--- tools/testing/selftests/zram/zram01.sh | 3 +- tools/testing/selftests/zram/zram02.sh | 1 - tools/testing/selftests/zram/zram_lib.sh | 110 +++++++++++++---------- 4 files changed, 66 insertions(+), 63 deletions(-)
diff --git a/tools/testing/selftests/zram/zram.sh b/tools/testing/selftests/zram/zram.sh index 232e958ec4547..b0b91d9b0dc21 100755 --- a/tools/testing/selftests/zram/zram.sh +++ b/tools/testing/selftests/zram/zram.sh @@ -2,9 +2,6 @@ # SPDX-License-Identifier: GPL-2.0 TCID="zram.sh"
-# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - . ./zram_lib.sh
run_zram () { @@ -18,14 +15,4 @@ echo ""
check_prereqs
-# check zram module exists -MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko -if [ -f $MODULE_PATH ]; then - run_zram -elif [ -b /dev/zram0 ]; then - run_zram -else - echo "$TCID : No zram.ko module or /dev/zram0 device file not found" - echo "$TCID : CONFIG_ZRAM is not set" - exit $ksft_skip -fi +run_zram diff --git a/tools/testing/selftests/zram/zram01.sh b/tools/testing/selftests/zram/zram01.sh index e9e9eb777e2c7..8f4affe34f3e4 100755 --- a/tools/testing/selftests/zram/zram01.sh +++ b/tools/testing/selftests/zram/zram01.sh @@ -33,7 +33,7 @@ zram_algs="lzo"
zram_fill_fs() { - for i in $(seq 0 $(($dev_num - 1))); do + for i in $(seq $dev_start $dev_end); do echo "fill zram$i..." local b=0 while [ true ]; do @@ -67,7 +67,6 @@ zram_mount
zram_fill_fs zram_cleanup -zram_unload
if [ $ERR_CODE -ne 0 ]; then echo "$TCID : [FAIL]" diff --git a/tools/testing/selftests/zram/zram02.sh b/tools/testing/selftests/zram/zram02.sh index e83b404807c09..2418b0c4ed136 100755 --- a/tools/testing/selftests/zram/zram02.sh +++ b/tools/testing/selftests/zram/zram02.sh @@ -36,7 +36,6 @@ zram_set_memlimit zram_makeswap zram_swapoff zram_cleanup -zram_unload
if [ $ERR_CODE -ne 0 ]; then echo "$TCID : [FAIL]" diff --git a/tools/testing/selftests/zram/zram_lib.sh b/tools/testing/selftests/zram/zram_lib.sh index f47fc0f27e99e..21ec1966de76c 100755 --- a/tools/testing/selftests/zram/zram_lib.sh +++ b/tools/testing/selftests/zram/zram_lib.sh @@ -5,10 +5,12 @@ # Author: Alexey Kodanev alexey.kodanev@oracle.com # Modified: Naresh Kamboju naresh.kamboju@linaro.org
-MODULE=0 dev_makeswap=-1 dev_mounted=-1 - +dev_start=0 +dev_end=-1 +module_load=-1 +sys_control=-1 # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 kernel_version=`uname -r | cut -d'.' -f1,2` @@ -46,57 +48,72 @@ zram_cleanup() { echo "zram cleanup" local i= - for i in $(seq 0 $dev_makeswap); do + for i in $(seq $dev_start $dev_makeswap); do swapoff /dev/zram$i done
- for i in $(seq 0 $dev_mounted); do + for i in $(seq $dev_start $dev_mounted); do umount /dev/zram$i done
- for i in $(seq 0 $(($dev_num - 1))); do + for i in $(seq $dev_start $dev_end); do echo 1 > /sys/block/zram${i}/reset rm -rf zram$i done
-} + if [ $sys_control -eq 1 ]; then + for i in $(seq $dev_start $dev_end); do + echo $i > /sys/class/zram-control/hot_remove + done + fi
-zram_unload() -{ - if [ $MODULE -ne 0 ] ; then - echo "zram rmmod zram" + if [ $module_load -eq 1 ]; then rmmod zram > /dev/null 2>&1 fi }
zram_load() { - # check zram module exists - MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko - if [ -f $MODULE_PATH ]; then - MODULE=1 - echo "create '$dev_num' zram device(s)" - modprobe zram num_devices=$dev_num - if [ $? -ne 0 ]; then - echo "failed to insert zram module" - exit 1 - fi - - dev_num_created=$(ls /dev/zram* | wc -w) + echo "create '$dev_num' zram device(s)" + + # zram module loaded, new kernel + if [ -d "/sys/class/zram-control" ]; then + echo "zram modules already loaded, kernel supports" \ + "zram-control interface" + dev_start=$(ls /dev/zram* | wc -w) + dev_end=$(($dev_start + $dev_num - 1)) + sys_control=1 + + for i in $(seq $dev_start $dev_end); do + cat /sys/class/zram-control/hot_add > /dev/null + done + + echo "all zram devices (/dev/zram$dev_start~$dev_end" \ + "successfully created" + return 0 + fi
- if [ "$dev_num_created" -ne "$dev_num" ]; then - echo "unexpected num of devices: $dev_num_created" - ERR_CODE=-1 + # detect old kernel or built-in + modprobe zram num_devices=$dev_num + if [ ! -d "/sys/class/zram-control" ]; then + if grep -q '^zram' /proc/modules; then + rmmod zram > /dev/null 2>&1 + if [ $? -ne 0 ]; then + echo "zram module is being used on old kernel" \ + "without zram-control interface" + exit $ksft_skip + fi else - echo "zram load module successful" + echo "test needs CONFIG_ZRAM=m on old kernel without" \ + "zram-control interface" + exit $ksft_skip fi - elif [ -b /dev/zram0 ]; then - echo "/dev/zram0 device file found: OK" - else - echo "ERROR: No zram.ko module or no /dev/zram0 device found" - echo "$TCID : CONFIG_ZRAM is not set" - exit 1 + modprobe zram num_devices=$dev_num fi + + module_load=1 + dev_end=$(($dev_num - 1)) + echo "all zram devices (/dev/zram0~$dev_end) successfully created" }
zram_max_streams() @@ -110,7 +127,7 @@ zram_max_streams() return 0 fi
- local i=0 + local i=$dev_start for max_s in $zram_max_streams; do local sys_path="/sys/block/zram${i}/max_comp_streams" echo $max_s > $sys_path || \ @@ -122,7 +139,7 @@ zram_max_streams() echo "FAIL can't set max_streams '$max_s', get $max_stream"
i=$(($i + 1)) - echo "$sys_path = '$max_streams' ($i/$dev_num)" + echo "$sys_path = '$max_streams'" done
echo "zram max streams: OK" @@ -132,15 +149,16 @@ zram_compress_alg() { echo "test that we can set compression algorithm"
- local algs=$(cat /sys/block/zram0/comp_algorithm) + local i=$dev_start + local algs=$(cat /sys/block/zram${i}/comp_algorithm) echo "supported algs: $algs" - local i=0 + for alg in $zram_algs; do local sys_path="/sys/block/zram${i}/comp_algorithm" echo "$alg" > $sys_path || \ echo "FAIL can't set '$alg' to $sys_path" i=$(($i + 1)) - echo "$sys_path = '$alg' ($i/$dev_num)" + echo "$sys_path = '$alg'" done
echo "zram set compression algorithm: OK" @@ -149,14 +167,14 @@ zram_compress_alg() zram_set_disksizes() { echo "set disk size to zram device(s)" - local i=0 + local i=$dev_start for ds in $zram_sizes; do local sys_path="/sys/block/zram${i}/disksize" echo "$ds" > $sys_path || \ echo "FAIL can't set '$ds' to $sys_path"
i=$(($i + 1)) - echo "$sys_path = '$ds' ($i/$dev_num)" + echo "$sys_path = '$ds'" done
echo "zram set disksizes: OK" @@ -166,14 +184,14 @@ zram_set_memlimit() { echo "set memory limit to zram device(s)"
- local i=0 + local i=$dev_start for ds in $zram_mem_limits; do local sys_path="/sys/block/zram${i}/mem_limit" echo "$ds" > $sys_path || \ echo "FAIL can't set '$ds' to $sys_path"
i=$(($i + 1)) - echo "$sys_path = '$ds' ($i/$dev_num)" + echo "$sys_path = '$ds'" done
echo "zram set memory limit: OK" @@ -182,8 +200,8 @@ zram_set_memlimit() zram_makeswap() { echo "make swap with zram device(s)" - local i=0 - for i in $(seq 0 $(($dev_num - 1))); do + local i=$dev_start + for i in $(seq $dev_start $dev_end); do mkswap /dev/zram$i > err.log 2>&1 if [ $? -ne 0 ]; then cat err.log @@ -206,7 +224,7 @@ zram_makeswap() zram_swapoff() { local i= - for i in $(seq 0 $dev_makeswap); do + for i in $(seq $dev_start $dev_end); do swapoff /dev/zram$i > err.log 2>&1 if [ $? -ne 0 ]; then cat err.log @@ -220,7 +238,7 @@ zram_swapoff()
zram_makefs() { - local i=0 + local i=$dev_start for fs in $zram_filesystems; do # if requested fs not supported default it to ext2 which mkfs.$fs > /dev/null 2>&1 || fs=ext2 @@ -239,7 +257,7 @@ zram_makefs() zram_mount() { local i=0 - for i in $(seq 0 $(($dev_num - 1))); do + for i in $(seq $dev_start $dev_end); do echo "mount /dev/zram$i" mkdir zram$i mount /dev/zram$i zram$i > /dev/null || \
From: Duoming Zhou duoming@zju.edu.cn
[ Upstream commit 4e0f718daf97d47cf7dec122da1be970f145c809 ]
The previous commit 1ade48d0c27d ("ax25: NPD bug when detaching AX25 device") introduce lock_sock() into ax25_kill_by_device to prevent NPD bug. But the concurrency NPD or UAF bug will occur, when lock_sock() or release_sock() dereferences the ax25_cb->sock.
The NULL pointer dereference bug can be shown as below:
ax25_kill_by_device() | ax25_release() | ax25_destroy_socket() | ax25_cb_del() ... | ... | ax25->sk=NULL; lock_sock(s->sk); //(1) | s->ax25_dev = NULL; | ... release_sock(s->sk); //(2) | ... |
The root cause is that the sock is set to null before dereference site (1) or (2). Therefore, this patch extracts the ax25_cb->sock in advance, and uses ax25_list_lock to protect it, which can synchronize with ax25_cb_del() and ensure the value of sock is not null before dereference sites.
The concurrency UAF bug can be shown as below:
ax25_kill_by_device() | ax25_release() | ax25_destroy_socket() ... | ... | sock_put(sk); //FREE lock_sock(s->sk); //(1) | s->ax25_dev = NULL; | ... release_sock(s->sk); //(2) | ... |
The root cause is that the sock is released before dereference site (1) or (2). Therefore, this patch uses sock_hold() to increase the refcount of sock and uses ax25_list_lock to protect it, which can synchronize with ax25_cb_del() in ax25_destroy_socket() and ensure the sock wil not be released before dereference sites.
Signed-off-by: Duoming Zhou duoming@zju.edu.cn Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- net/ax25/af_ax25.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 1f84d41e22c36..184af6da0defc 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -77,6 +77,7 @@ static void ax25_kill_by_device(struct net_device *dev) { ax25_dev *ax25_dev; ax25_cb *s; + struct sock *sk;
if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) return; @@ -85,13 +86,15 @@ static void ax25_kill_by_device(struct net_device *dev) again: ax25_for_each(s, &ax25_list) { if (s->ax25_dev == ax25_dev) { + sk = s->sk; + sock_hold(sk); spin_unlock_bh(&ax25_list_lock); - lock_sock(s->sk); + lock_sock(sk); s->ax25_dev = NULL; - release_sock(s->sk); + release_sock(sk); ax25_disconnect(s, ENETUNREACH); spin_lock_bh(&ax25_list_lock); - + sock_put(sk); /* The entry could have been deleted from the * list meanwhile and thus the next pointer is * no longer valid. Play it safe and restart
From: "Darrick J. Wong" djwong@kernel.org
[ Upstream commit 2719c7160dcfaae1f73a1c0c210ad3281c19022e ]
If we fail to synchronize the filesystem while preparing to freeze the fs, abort the freeze.
Signed-off-by: Darrick J. Wong djwong@kernel.org Reviewed-by: Jan Kara jack@suse.cz Reviewed-by: Christoph Hellwig hch@lst.de Acked-by: Christian Brauner brauner@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/super.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/fs/super.c b/fs/super.c index b289356f302fc..e255c18fa2c88 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1691,11 +1691,9 @@ static void lockdep_sb_freeze_acquire(struct super_block *sb) percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_); }
-static void sb_freeze_unlock(struct super_block *sb) +static void sb_freeze_unlock(struct super_block *sb, int level) { - int level; - - for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--) + for (level--; level >= 0; level--) percpu_up_write(sb->s_writers.rw_sem + level); }
@@ -1766,7 +1764,14 @@ int freeze_super(struct super_block *sb) sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
/* All writers are done so after syncing there won't be dirty data */ - sync_filesystem(sb); + ret = sync_filesystem(sb); + if (ret) { + sb->s_writers.frozen = SB_UNFROZEN; + sb_freeze_unlock(sb, SB_FREEZE_PAGEFAULT); + wake_up(&sb->s_writers.wait_unfrozen); + deactivate_locked_super(sb); + return ret; + }
/* Now wait for internal filesystem counter */ sb->s_writers.frozen = SB_FREEZE_FS; @@ -1778,7 +1783,7 @@ int freeze_super(struct super_block *sb) printk(KERN_ERR "VFS:Filesystem freeze failed\n"); sb->s_writers.frozen = SB_UNFROZEN; - sb_freeze_unlock(sb); + sb_freeze_unlock(sb, SB_FREEZE_FS); wake_up(&sb->s_writers.wait_unfrozen); deactivate_locked_super(sb); return ret; @@ -1829,7 +1834,7 @@ static int thaw_super_locked(struct super_block *sb) }
sb->s_writers.frozen = SB_UNFROZEN; - sb_freeze_unlock(sb); + sb_freeze_unlock(sb, SB_FREEZE_FS); out: wake_up(&sb->s_writers.wait_unfrozen); deactivate_locked_super(sb);
From: "Darrick J. Wong" djwong@kernel.org
[ Upstream commit dd5532a4994bfda0386eb2286ec00758cee08444 ]
Strangely, dquot_quota_sync ignores the return code from the ->sync_fs call, which means that quotacalls like Q_SYNC never see the error. This doesn't seem right, so fix that.
Signed-off-by: Darrick J. Wong djwong@kernel.org Reviewed-by: Jan Kara jack@suse.cz Reviewed-by: Christoph Hellwig hch@lst.de Acked-by: Christian Brauner brauner@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- fs/quota/dquot.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 7abc3230c21a4..dc5f8654b277d 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -693,9 +693,14 @@ int dquot_quota_sync(struct super_block *sb, int type) /* This is not very clever (and fast) but currently I don't know about * any other simple way of getting quota data to disk and we must get * them there for userspace to be visible... */ - if (sb->s_op->sync_fs) - sb->s_op->sync_fs(sb, 1); - sync_blockdev(sb->s_bdev); + if (sb->s_op->sync_fs) { + ret = sb->s_op->sync_fs(sb, 1); + if (ret) + return ret; + } + ret = sync_blockdev(sb->s_bdev); + if (ret) + return ret;
/* * Now when everything is written we can discard the pagecache so
From: Su Yue l@damenly.su
[ Upstream commit ea1d1ca4025ac6c075709f549f9aa036b5b6597d ]
Check item size before accessing the device item to avoid out of bound access, similar to inode_item check.
Signed-off-by: Su Yue l@damenly.su Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/tree-checker.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 368c43c6cbd08..d6e0eeb82fa9e 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -764,6 +764,7 @@ static int check_dev_item(struct extent_buffer *leaf, struct btrfs_key *key, int slot) { struct btrfs_dev_item *ditem; + const u32 item_size = btrfs_item_size(leaf, slot);
if (key->objectid != BTRFS_DEV_ITEMS_OBJECTID) { dev_item_err(leaf, slot, @@ -771,6 +772,13 @@ static int check_dev_item(struct extent_buffer *leaf, key->objectid, BTRFS_DEV_ITEMS_OBJECTID); return -EUCLEAN; } + + if (unlikely(item_size != sizeof(*ditem))) { + dev_item_err(leaf, slot, "invalid item size: has %u expect %zu", + item_size, sizeof(*ditem)); + return -EUCLEAN; + } + ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item); if (btrfs_device_id(leaf, ditem) != key->offset) { dev_item_err(leaf, slot,
From: Sagi Grimberg sagi@grimberg.me
[ Upstream commit 0fa0f99fc84e41057cbdd2efbfe91c6b2f47dd9d ]
Unlike .queue_rq, in .submit_async_event drivers may not check the ctrl readiness for AER submission. This may lead to a use-after-free condition that was observed with nvme-tcp.
The race condition may happen in the following scenario: 1. driver executes its reset_ctrl_work 2. -> nvme_stop_ctrl - flushes ctrl async_event_work 3. ctrl sends AEN which is received by the host, which in turn schedules AEN handling 4. teardown admin queue (which releases the queue socket) 5. AEN processed, submits another AER, calling the driver to submit 6. driver attempts to send the cmd ==> use-after-free
In order to fix that, add ctrl state check to validate the ctrl is actually able to accept the AER submission.
This addresses the above race in controller resets because the driver during teardown should: 1. change ctrl state to RESETTING 2. flush async_event_work (as well as other async work elements)
So after 1,2, any other AER command will find the ctrl state to be RESETTING and bail out without submitting the AER.
Signed-off-by: Sagi Grimberg sagi@grimberg.me Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvme/host/core.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a5b5a2305791d..6a9a42809f972 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3896,7 +3896,14 @@ static void nvme_async_event_work(struct work_struct *work) container_of(work, struct nvme_ctrl, async_event_work);
nvme_aen_uevent(ctrl); - ctrl->ops->submit_async_event(ctrl); + + /* + * The transport drivers must guarantee AER submission here is safe by + * flushing ctrl async_event_work after changing the controller state + * from LIVE and before freeing the admin queue. + */ + if (ctrl->state == NVME_CTRL_LIVE) + ctrl->ops->submit_async_event(ctrl); }
static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl)
From: Sagi Grimberg sagi@grimberg.me
[ Upstream commit ff9fc7ebf5c06de1ef72a69f9b1ab40af8b07f9e ]
While nvme_tcp_submit_async_event_work is checking the ctrl and queue state before preparing the AER command and scheduling io_work, in order to fully prevent a race where this check is not reliable the error recovery work must flush async_event_work before continuing to destroy the admin queue after setting the ctrl state to RESETTING such that there is no race .submit_async_event and the error recovery handler itself changing the ctrl state.
Tested-by: Chris Leech cleech@redhat.com Signed-off-by: Sagi Grimberg sagi@grimberg.me Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvme/host/tcp.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index ff0d06e8ebb53..284d5e756bd6d 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1947,6 +1947,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work) struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
nvme_stop_keep_alive(ctrl); + flush_work(&ctrl->async_event_work); nvme_tcp_teardown_io_queues(ctrl, false); /* unquiesce to fail fast pending requests */ nvme_start_queues(ctrl);
From: Sagi Grimberg sagi@grimberg.me
[ Upstream commit b6bb1722f34bbdbabed27acdceaf585d300c5fd2 ]
While nvme_rdma_submit_async_event_work is checking the ctrl and queue state before preparing the AER command and scheduling io_work, in order to fully prevent a race where this check is not reliable the error recovery work must flush async_event_work before continuing to destroy the admin queue after setting the ctrl state to RESETTING such that there is no race .submit_async_event and the error recovery handler itself changing the ctrl state.
Signed-off-by: Sagi Grimberg sagi@grimberg.me Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/nvme/host/rdma.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 08a23bb4b8b57..4213c71b02a4b 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1110,6 +1110,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) struct nvme_rdma_ctrl, err_work);
nvme_stop_keep_alive(&ctrl->ctrl); + flush_work(&ctrl->ctrl.async_event_work); nvme_rdma_teardown_io_queues(ctrl, false); nvme_start_queues(&ctrl->ctrl); nvme_rdma_teardown_admin_queue(ctrl, false);
From: Christian König christian.koenig@amd.com
[ Upstream commit e8ae38720e1a685fd98cfa5ae118c9d07b45ca79 ]
We probably never trigger this, but the logic inside the check is inverted.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Felix Kuehling Felix.Kuehling@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 58e14d3040f03..870dd78d5a21a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1976,7 +1976,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, unsigned i; int r;
- if (direct_submit && !ring->sched.ready) { + if (!direct_submit && !ring->sched.ready) { DRM_ERROR("Trying to move memory with ring turned off.\n"); return -EINVAL; }
From: Igor Pylypiv ipylypiv@google.com
[ Upstream commit 67d6212afda218d564890d1674bab28e8612170f ]
This reverts commit 774a1221e862b343388347bac9b318767336b20b.
We need to finish all async code before the module init sequence is done. In the reverted commit the PF_USED_ASYNC flag was added to mark a thread that called async_schedule(). Then the PF_USED_ASYNC flag was used to determine whether or not async_synchronize_full() needs to be invoked. This works when modprobe thread is calling async_schedule(), but it does not work if module dispatches init code to a worker thread which then calls async_schedule().
For example, PCI driver probing is invoked from a worker thread based on a node where device is attached:
if (cpu < nr_cpu_ids) error = work_on_cpu(cpu, local_pci_probe, &ddi); else error = local_pci_probe(&ddi);
We end up in a situation where a worker thread gets the PF_USED_ASYNC flag set instead of the modprobe thread. As a result, async_synchronize_full() is not invoked and modprobe completes without waiting for the async code to finish.
The issue was discovered while loading the pm80xx driver: (scsi_mod.scan=async)
modprobe pm80xx worker ... do_init_module() ... pci_call_probe() work_on_cpu(local_pci_probe) local_pci_probe() pm8001_pci_probe() scsi_scan_host() async_schedule() worker->flags |= PF_USED_ASYNC; ... < return from worker > ... if (current->flags & PF_USED_ASYNC) <--- false async_synchronize_full();
Commit 21c3c5d28007 ("block: don't request module during elevator init") fixed the deadlock issue which the reverted commit 774a1221e862 ("module, async: async_synchronize_full() on module init iff async is used") tried to fix.
Since commit 0fdff3ec6d87 ("async, kmod: warn on synchronous request_module() from async workers") synchronous module loading from async is not allowed.
Given that the original deadlock issue is fixed and it is no longer allowed to call synchronous request_module() from async we can remove PF_USED_ASYNC flag to make module init consistently invoke async_synchronize_full() unless async module probe is requested.
Signed-off-by: Igor Pylypiv ipylypiv@google.com Reviewed-by: Changyuan Lyu changyuanl@google.com Reviewed-by: Luis Chamberlain mcgrof@kernel.org Acked-by: Tejun Heo tj@kernel.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- include/linux/sched.h | 1 - kernel/async.c | 3 --- kernel/module.c | 25 +++++-------------------- 3 files changed, 5 insertions(+), 24 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h index afee5d5eb9458..b341471de9d60 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1454,7 +1454,6 @@ extern struct pid *cad_pid; #define PF_MEMALLOC 0x00000800 /* Allocating memory */ #define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */ #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ -#define PF_USED_ASYNC 0x00004000 /* Used async_schedule*(), used by module init */ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ #define PF_FROZEN 0x00010000 /* Frozen for system suspend */ #define PF_KSWAPD 0x00020000 /* I am kswapd */ diff --git a/kernel/async.c b/kernel/async.c index 4f9c1d6140168..74660f611b97d 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -205,9 +205,6 @@ async_cookie_t async_schedule_node_domain(async_func_t func, void *data, atomic_inc(&entry_count); spin_unlock_irqrestore(&async_lock, flags);
- /* mark that this task has queued an async job, used by module init */ - current->flags |= PF_USED_ASYNC; - /* schedule for execution */ queue_work_node(node, system_unbound_wq, &entry->work);
diff --git a/kernel/module.c b/kernel/module.c index 59d487b8d8dad..e7656cf1652c9 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3711,12 +3711,6 @@ static noinline int do_init_module(struct module *mod) } freeinit->module_init = mod->init_layout.base;
- /* - * We want to find out whether @mod uses async during init. Clear - * PF_USED_ASYNC. async_schedule*() will set it. - */ - current->flags &= ~PF_USED_ASYNC; - do_mod_ctors(mod); /* Start the module */ if (mod->init != NULL) @@ -3742,22 +3736,13 @@ static noinline int do_init_module(struct module *mod)
/* * We need to finish all async code before the module init sequence - * is done. This has potential to deadlock. For example, a newly - * detected block device can trigger request_module() of the - * default iosched from async probing task. Once userland helper - * reaches here, async_synchronize_full() will wait on the async - * task waiting on request_module() and deadlock. - * - * This deadlock is avoided by perfomring async_synchronize_full() - * iff module init queued any async jobs. This isn't a full - * solution as it will deadlock the same if module loading from - * async jobs nests more than once; however, due to the various - * constraints, this hack seems to be the best option for now. - * Please refer to the following thread for details. + * is done. This has potential to deadlock if synchronous module + * loading is requested from async (which is not allowed!). * - * http://thread.gmane.org/gmane.linux.kernel/1420814 + * See commit 0fdff3ec6d87 ("async, kmod: warn on synchronous + * request_module() from async workers") for more details. */ - if (!mod->async_probe_requested && (current->flags & PF_USED_ASYNC)) + if (!mod->async_probe_requested) async_synchronize_full();
ftrace_free_mem(mod, mod->init_layout.base, mod->init_layout.base +
linux-stable-mirror@lists.linaro.org