June 2023 - Linux-stable-mirror

[PATCH] serial: lantiq: add missing interrupt ack

by Bernhard Seibold

Currently, the error interrupt is never acknowledged, so once active it will stay active indefinitely, causing the handler to be called in an infinite loop. Fixes: 2f0fc4159a6a ("SERIAL: Lantiq: Add driver for MIPS Lantiq SOCs.") Cc: <stable(a)vger.kernel.org> Signed-off-by: Bernhard Seibold <mail(a)bernhard-seibold.de> --- drivers/tty/serial/lantiq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/lantiq.c b/drivers/tty/serial/lantiq.c index a58e9277dfad..f1387f1024db 100644 --- a/drivers/tty/serial/lantiq.c +++ b/drivers/tty/serial/lantiq.c @@ -250,6 +250,7 @@ lqasc_err_int(int irq, void *_port) struct ltq_uart_port *ltq_port = to_ltq_uart_port(port); spin_lock_irqsave(&ltq_port->lock, flags); + __raw_writel(ASC_IRNCR_EIR, port->membase + LTQ_ASC_IRNCR); /* clear any pending interrupts */ asc_update_bits(0, ASCWHBSTATE_CLRPE | ASCWHBSTATE_CLRFE | ASCWHBSTATE_CLRROE, port->membase + LTQ_ASC_WHBSTATE); -- 2.34.1

2 years, 1 month

3
3
0 0

[PATCH v2] ext4: fix race condition between buffer write and page_mkwrite

by Baokun Li

Syzbot reported a BUG_ON: ================================================================== EXT4-fs (loop0): mounted filesystem without journal. Quota mode: none. EXT4-fs error (device loop0): ext4_mb_generate_buddy:1098: group 0, block bitmap and bg descriptor inconsistent: 25 vs 150994969 free clusters ------------[ cut here ]------------ kernel BUG at fs/ext4/ext4_jbd2.c:53! invalid opcode: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 494 Comm: syz-executor.0 6.1.0-rc7-syzkaller-ga4412fdd49dc #0 RIP: 0010:__ext4_journal_stop+0x1b3/0x1c0 [...] Call Trace: ext4_write_inline_data_end+0xa39/0xdf0 ext4_da_write_end+0x1e2/0x950 generic_perform_write+0x401/0x5f0 ext4_buffered_write_iter+0x35f/0x640 ext4_file_write_iter+0x198/0x1cd0 vfs_write+0x8b5/0xef0 [...] ================================================================== The above BUG_ON is triggered by the following race: cpu1 cpu2 ________________________|________________________ ksys_write vfs_write new_sync_write ext4_file_write_iter ext4_buffered_write_iter generic_perform_write ext4_da_write_begin do_fault do_page_mkwrite ext4_page_mkwrite ext4_convert_inline_data ext4_convert_inline_data_nolock ext4_destroy_inline_data_nolock //clear EXT4_STATE_MAY_INLINE_DATA ext4_map_blocks --> return error ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) ext4_block_write_begin ext4_restore_inline_data // set EXT4_STATE_MAY_INLINE_DATA ext4_da_write_end ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) ext4_write_inline_data_end handle=NULL ext4_journal_stop(handle) __ext4_journal_stop ext4_put_nojournal(handle) ref_cnt = (unsigned long)handle BUG_ON(ref_cnt == 0) ---> BUG_ON The root cause of this problem is that the ext4_convert_inline_data() in ext4_page_mkwrite() does not grab i_rwsem, so it may race with ext4_buffered_write_iter() and cause the write_begin() and write_end() functions to be inconsistent and trigger BUG_ON. To solve the above issue, we can not add inode_lock directly to ext4_page_mkwrite(), which would not only cause performance degradation but also ABBA deadlock (see Link). Hence we move ext4_convert_inline_data() to ext4_file_mmap(), and only when inline_data is enabled and mmap a writeable file in shared mode, we hold the lock to convert, which avoids the above problems. Link: https://lore.kernel.org/r/20230530102804.6t7np7om6tczscuo@quack3/ Reported-by: Jun Nie <jun.nie(a)linaro.org> Closes: https://lore.kernel.org/lkml/63903521.5040307@huawei.com/t/ Reported-by: syzbot+a158d886ca08a3fecca4(a)syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?id=899b37f20ce4072bcdfecfe1647b39602e956e… Fixes: 7b4cc9787fe3 ("ext4: evict inline data when writing to memory map") CC: stable(a)vger.kernel.org # 4.12+ Signed-off-by: Baokun Li <libaokun1(a)huawei.com> --- fs/ext4/file.c | 24 +++++++++++++++++++++++- fs/ext4/inode.c | 4 ---- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index d101b3b0c7da..9df82d72eb90 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -795,7 +795,8 @@ static const struct vm_operations_struct ext4_file_vm_ops = { static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) { struct inode *inode = file->f_mapping->host; - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + struct super_block *sb = inode->i_sb; + struct ext4_sb_info *sbi = EXT4_SB(sb); struct dax_device *dax_dev = sbi->s_daxdev; if (unlikely(ext4_forced_shutdown(sbi))) @@ -808,6 +809,27 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) if (!daxdev_mapping_supported(vma, dax_dev)) return -EOPNOTSUPP; + /* + * Writing via mmap has no logic to handle inline data, so we + * need to call ext4_convert_inline_data() to convert the inode + * to normal format before doing so, otherwise a BUG_ON will be + * triggered in ext4_writepages() due to the + * EXT4_STATE_MAY_INLINE_DATA flag. Moreover, we need to grab + * i_rwsem during conversion, since clearing and setting the + * inline data flag may race with ext4_buffered_write_iter() + * to trigger a BUG_ON. + */ + if (ext4_has_feature_inline_data(sb) && + vma->vm_flags & VM_SHARED && vma->vm_flags & VM_MAYWRITE) { + int err; + + inode_lock(inode); + err = ext4_convert_inline_data(inode); + inode_unlock(inode); + if (err) + return err; + } + file_accessed(file); if (IS_DAX(file_inode(file))) { vma->vm_ops = &ext4_dax_vm_ops; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ce5f21b6c2b3..31844c4ec9fe 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -6043,10 +6043,6 @@ vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf) filemap_invalidate_lock_shared(mapping); - err = ext4_convert_inline_data(inode); - if (err) - goto out_ret; - /* * On data journalling we skip straight to the transaction handle: * there's no delalloc; page truncated will be checked later; the -- 2.31.1

2 years, 1 month

4
10
0 0

[GIT PULL] Asymmetric keys fix for v6.4-rc5

by Roberto Sassu

Hi Linus sorry for this unusual procedure of me requesting a patch to be pulled. I asked for several months the maintainers (David: asymmetric keys, Jarkko: key subsystem) to pick my patch but without any luck. I signed the tag, but probably it would not matter, since my key is not among your trusted keys. The following changes since commit 921bdc72a0d68977092d6a64855a1b8967acc1d9: Merge tag 'mmc-v6.4-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/mmc (2023-06-02 08:35:13 -0400) are available in the Git repository at: https://github.com/robertosassu/linux.git tags/asym-keys-fix-for-linus-v6.4-rc5 for you to fetch changes up to c3d03e8e35e005e1a614e51bb59053eeb5857f76: KEYS: asymmetric: Copy sig and digest in public_key_verify_signature() (2023-06-02 15:36:23 +0200) ---------------------------------------------------------------- Asymmetric keys fix for v6.4-rc5 Here is a small fix to make an unconditional copy of the buffer passed to crypto operations, to take into account the case of the stack not in the linear mapping area. It has been tested and verified to fix the bug. Signed-off-by: Roberto Sassu <roberto.sassu(a)huawei.com> ---------------------------------------------------------------- Roberto Sassu (1): KEYS: asymmetric: Copy sig and digest in public_key_verify_signature() crypto/asymmetric_keys/public_key.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-)

2 years, 1 month

7
10
0 0

Re:

by info＠vmsgroup.ba

We are a premier financial institution offering flexible loans ranging from 10,000 to 10 million euros at an interest rate of 2% to interested individuals or companies. E-Mail:aflimited@secretary.net

2 years, 1 month

1
0
0 0

[PATCH v5] kernel/watch_queue: NULL the dangling *pipe, and use it for clear check

by Siddh Raman Pant

NULL the dangling pipe reference while clearing watch_queue. If not done, a reference to a freed pipe remains in the watch_queue, as this function is called before freeing a pipe in free_pipe_info() (see line 834 of fs/pipe.c). The sole use of wqueue->defunct is for checking if the watch queue has been cleared, but wqueue->pipe is also NULLed while clearing. Thus, wqueue->defunct is superfluous, as wqueue->pipe can be checked for NULL. Hence, the former can be removed. Tested with keyutils testsuite. Cc: stable(a)vger.kernel.org # 6.1 Signed-off-by: Siddh Raman Pant <code(a)siddh.me> --- Changes in v5: - Rebased to latest mainline. - Added Cc to stable. - Specify tests passing. Note that all tests in the keyutils testsuite passed, except tests/features/builtin_trusted, which we should not worry about as it requires some kernel preparation according to David Howells in v4 discussion. Changes in v4 (11 Jan 2023): - Drop preceeding kerneldoc-changes patch and change appropriately. Changes in v3 (8 Jan 2023): - Minor rephrase of comment before NULLing in watch_queue_clear(). Changes in v2 (6 Aug 2022): - Merged the NULLing and removing defunct patches. - Removed READ_ONCE barrier in lock_wqueue(). - Better commit messages. include/linux/watch_queue.h | 3 +-- kernel/watch_queue.c | 12 ++++++------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/include/linux/watch_queue.h b/include/linux/watch_queue.h index fc6bba20273b..45cd42f55d49 100644 --- a/include/linux/watch_queue.h +++ b/include/linux/watch_queue.h @@ -38,7 +38,7 @@ struct watch_filter { struct watch_queue { struct rcu_head rcu; struct watch_filter __rcu *filter; - struct pipe_inode_info *pipe; /* The pipe we're using as a buffer */ + struct pipe_inode_info *pipe; /* Pipe we use as a buffer, NULL if queue closed */ struct hlist_head watches; /* Contributory watches */ struct page **notes; /* Preallocated notifications */ unsigned long *notes_bitmap; /* Allocation bitmap for notes */ @@ -46,7 +46,6 @@ struct watch_queue { spinlock_t lock; unsigned int nr_notes; /* Number of notes */ unsigned int nr_pages; /* Number of pages in notes[] */ - bool defunct; /* T when queues closed */ }; /* diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c index e91cb4c2833f..d0b6b390ee42 100644 --- a/kernel/watch_queue.c +++ b/kernel/watch_queue.c @@ -42,7 +42,7 @@ MODULE_AUTHOR("Red Hat, Inc."); static inline bool lock_wqueue(struct watch_queue *wqueue) { spin_lock_bh(&wqueue->lock); - if (unlikely(wqueue->defunct)) { + if (unlikely(!wqueue->pipe)) { spin_unlock_bh(&wqueue->lock); return false; } @@ -104,9 +104,6 @@ static bool post_one_notification(struct watch_queue *wqueue, unsigned int head, tail, mask, note, offset, len; bool done = false; - if (!pipe) - return false; - spin_lock_irq(&pipe->rd_wait.lock); mask = pipe->ring_size - 1; @@ -603,8 +600,11 @@ void watch_queue_clear(struct watch_queue *wqueue) rcu_read_lock(); spin_lock_bh(&wqueue->lock); - /* Prevent new notifications from being stored. */ - wqueue->defunct = true; + /* + * This pipe can be freed by callers like free_pipe_info(). + * Removing this reference also prevents new notifications. + */ + wqueue->pipe = NULL; while (!hlist_empty(&wqueue->watches)) { watch = hlist_entry(wqueue->watches.first, struct watch, queue_node); -- 2.39.2

2 years, 1 month

2
2
0 0

[PATCH bpf] bpf: Add extra path pointer check to d_path helper

by Jiri Olsa

Anastasios reported crash on stable 5.15 kernel with following bpf attached to lsm hook: SEC("lsm.s/bprm_creds_for_exec") int BPF_PROG(bprm_creds_for_exec, struct linux_binprm *bprm) { struct path *path = &bprm->executable->f_path; char p[128] = { 0 }; bpf_d_path(path, p, 128); return 0; } but bprm->executable can be NULL, so bpf_d_path call will crash: BUG: kernel NULL pointer dereference, address: 0000000000000018 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC NOPTI ... RIP: 0010:d_path+0x22/0x280 ... Call Trace: <TASK> bpf_d_path+0x21/0x60 bpf_prog_db9cf176e84498d9_bprm_creds_for_exec+0x94/0x99 bpf_trampoline_6442506293_0+0x55/0x1000 bpf_lsm_bprm_creds_for_exec+0x5/0x10 security_bprm_creds_for_exec+0x29/0x40 bprm_execve+0x1c1/0x900 do_execveat_common.isra.0+0x1af/0x260 __x64_sys_execve+0x32/0x40 It's problem for all stable trees with bpf_d_path helper, which was added in 5.9. This issue is fixed in current bpf code, where we identify and mark trusted pointers, so the above code would fail to load. For the sake of the stable trees and to workaround potentially broken verifier in the future, adding the code that reads the path object from the passed pointer and verifies it's valid in kernel space. Cc: stable(a)vger.kernel.org # v5.9+ Fixes: 6e22ab9da793 ("bpf: Add d_path helper") Suggested-by: Alexei Starovoitov <ast(a)kernel.org> Reported-by: Anastasios Papagiannis <tasos.papagiannnis(a)gmail.com> Signed-off-by: Jiri Olsa <jolsa(a)kernel.org> --- kernel/trace/bpf_trace.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 9a050e36dc6c..aecd98ee73dc 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -900,12 +900,22 @@ static const struct bpf_func_proto bpf_send_signal_thread_proto = { BPF_CALL_3(bpf_d_path, struct path *, path, char *, buf, u32, sz) { + struct path copy; long len; char *p; if (!sz) return 0; + /* + * The path pointer is verified as trusted and safe to use, + * but let's double check it's valid anyway to workaround + * potentially broken verifier. + */ + len = copy_from_kernel_nofault(&copy, path, sizeof(*path)); + if (len < 0) + return len; + p = d_path(path, buf, sz); if (IS_ERR(p)) { len = PTR_ERR(p); -- 2.40.1

2 years, 1 month

4
4
0 0

[PATCH v1 1/1] test_firmware: return ENOMEM instead of ENOSPC on failed allocation

by Mirsad Goran Todorovac

In a couple of situations like: name = kstrndup(buf, count, GFP_KERNEL); if (!name) return -ENOSPC; the error is not actually "No space left on device", but "Out of memory". So, it is semantically correct to return -ENOMEM in all failed kstrndup() and kzalloc() cases in this driver, as it is not a problem with disk space, but with kernel memory allocator. The semantically correct should be: name = kstrndup(buf, count, GFP_KERNEL); if (!name) return -ENOMEM; Cc: Dan Carpenter <error27(a)gmail.com> Cc: Takashi Iwai <tiwai(a)suse.de> Cc: Kees Cook <keescook(a)chromium.org> Cc: stable(a)vger.kernel.org Signed-off-by: Mirsad Goran Todorovac <mirsad.todorovac(a)alu.unizg.hr> --- lib/test_firmware.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/test_firmware.c b/lib/test_firmware.c index 1d7d480b8eeb..add4699fc6cd 100644 --- a/lib/test_firmware.c +++ b/lib/test_firmware.c @@ -214,7 +214,7 @@ static int __kstrncpy(char **dst, const char *name, size_t count, gfp_t gfp) { *dst = kstrndup(name, count, gfp); if (!*dst) - return -ENOSPC; + return -ENOMEM; return count; } @@ -671,7 +671,7 @@ static ssize_t trigger_request_store(struct device *dev, name = kstrndup(buf, count, GFP_KERNEL); if (!name) - return -ENOSPC; + return -ENOMEM; pr_info("loading '%s'\n", name); @@ -719,7 +719,7 @@ static ssize_t trigger_request_platform_store(struct device *dev, name = kstrndup(buf, count, GFP_KERNEL); if (!name) - return -ENOSPC; + return -ENOMEM; pr_info("inserting test platform fw '%s'\n", name); efi_embedded_fw.name = name; @@ -772,7 +772,7 @@ static ssize_t trigger_async_request_store(struct device *dev, name = kstrndup(buf, count, GFP_KERNEL); if (!name) - return -ENOSPC; + return -ENOMEM; pr_info("loading '%s'\n", name); @@ -817,7 +817,7 @@ static ssize_t trigger_custom_fallback_store(struct device *dev, name = kstrndup(buf, count, GFP_KERNEL); if (!name) - return -ENOSPC; + return -ENOMEM; pr_info("loading '%s' using custom fallback mechanism\n", name); @@ -868,7 +868,7 @@ static int test_fw_run_batch_request(void *data) test_buf = kzalloc(TEST_FIRMWARE_BUF_SIZE, GFP_KERNEL); if (!test_buf) - return -ENOSPC; + return -ENOMEM; if (test_fw_config->partial) req->rc = request_partial_firmware_into_buf -- 2.34.1

2 years, 1 month

2
2
0 0

Backport fix for CVE-2023-2124

by Michael Kochera

Hello, the following patch cherry-picked cleanly during my testing. Subject of Patch: xfs: verify buffer contents when we skip log replay Commit Hash: 22ed903eee23a5b174e240f1cdfa9acf393a5210 Reason why it should be applied: This fixes CVE-2023-2124. Kernel Versions to be applied to: 6.1, 5.15, 5.10 Thank You, Michael Kochera

2 years, 1 month

2
1
0 0

[PATCH v7 1/2] ceph: add a dedicated private data for netfs rreq

by xiubli＠redhat.com

From: Xiubo Li <xiubli(a)redhat.com> We need to save the 'f_ra.ra_pages' to expand the readahead window later. Cc: stable(a)vger.kernel.org Fixes: 49870056005c ("ceph: convert ceph_readpages to ceph_readahead") URL: https://lore.kernel.org/ceph-devel/20230504082510.247-1-sehuww@mail.scut.ed… URL: https://www.spinics.net/lists/ceph-users/msg76183.html Cc: Hu Weiwen <sehuww(a)mail.scut.edu.cn> Reviewed-by: Hu Weiwen <sehuww(a)mail.scut.edu.cn> Tested-by: Hu Weiwen <sehuww(a)mail.scut.edu.cn> Signed-off-by: Xiubo Li <xiubli(a)redhat.com> --- fs/ceph/addr.c | 45 ++++++++++++++++++++++++++++++++++----------- fs/ceph/super.h | 13 +++++++++++++ 2 files changed, 47 insertions(+), 11 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 3b20873733af..93fff1a7373f 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -404,18 +404,28 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file) { struct inode *inode = rreq->inode; int got = 0, want = CEPH_CAP_FILE_CACHE; + struct ceph_netfs_request_data *priv; int ret = 0; if (rreq->origin != NETFS_READAHEAD) return 0; + priv = kzalloc(sizeof(*priv), GFP_NOFS); + if (!priv) + return -ENOMEM; + if (file) { struct ceph_rw_context *rw_ctx; struct ceph_file_info *fi = file->private_data; + priv->file_ra_pages = file->f_ra.ra_pages; + priv->file_ra_disabled = file->f_mode & FMODE_RANDOM; + rw_ctx = ceph_find_rw_context(fi); - if (rw_ctx) + if (rw_ctx) { + rreq->netfs_priv = priv; return 0; + } } /* @@ -425,27 +435,40 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file) ret = ceph_try_get_caps(inode, CEPH_CAP_FILE_RD, want, true, &got); if (ret < 0) { dout("start_read %p, error getting cap\n", inode); - return ret; + goto out; } if (!(got & want)) { dout("start_read %p, no cache cap\n", inode); - return -EACCES; + ret = -EACCES; + goto out; + } + if (ret == 0) { + ret = -EACCES; + goto out; } - if (ret == 0) - return -EACCES; - rreq->netfs_priv = (void *)(uintptr_t)got; - return 0; + priv->caps = got; + rreq->netfs_priv = priv; + +out: + if (ret < 0) + kfree(priv); + + return ret; } static void ceph_netfs_free_request(struct netfs_io_request *rreq) { - struct ceph_inode_info *ci = ceph_inode(rreq->inode); - int got = (uintptr_t)rreq->netfs_priv; + struct ceph_netfs_request_data *priv = rreq->netfs_priv; + + if (!priv) + return; - if (got) - ceph_put_cap_refs(ci, got); + if (priv->caps) + ceph_put_cap_refs(ceph_inode(rreq->inode), priv->caps); + kfree(priv); + rreq->netfs_priv = NULL; } const struct netfs_request_ops ceph_netfs_ops = { diff --git a/fs/ceph/super.h b/fs/ceph/super.h index a226d36b3ecb..3a24b7974d46 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -470,6 +470,19 @@ struct ceph_inode_info { #endif }; +struct ceph_netfs_request_data { + int caps; + + /* + * Maximum size of a file readahead request. + * The fadvise could update the bdi's default ra_pages. + */ + unsigned int file_ra_pages; + + /* Set it if fadvise disables file readahead entirely */ + bool file_ra_disabled; +}; + static inline struct ceph_inode_info * ceph_inode(const struct inode *inode) { -- 2.40.1

2 years, 1 month

2
1
0 0

[PATCH v7 2/2] ceph: fix blindly expanding the readahead windows

by xiubli＠redhat.com

From: Xiubo Li <xiubli(a)redhat.com> Blindly expanding the readahead windows will cause unneccessary pagecache thrashing and also will introduce the network workload. We should disable expanding the windows if the readahead is disabled and also shouldn't expand the windows too much. Expanding forward firstly instead of expanding backward for possible sequential reads. Bound `rreq->len` to the actual file size to restore the previous page cache usage. The posix_fadvise may change the maximum size of a file readahead. Cc: stable(a)vger.kernel.org Fixes: 49870056005c ("ceph: convert ceph_readpages to ceph_readahead") URL: https://lore.kernel.org/ceph-devel/20230504082510.247-1-sehuww@mail.scut.ed… URL: https://www.spinics.net/lists/ceph-users/msg76183.html Cc: Hu Weiwen <sehuww(a)mail.scut.edu.cn> Reviewed-by: Hu Weiwen <sehuww(a)mail.scut.edu.cn> Tested-by: Hu Weiwen <sehuww(a)mail.scut.edu.cn> Signed-off-by: Xiubo Li <xiubli(a)redhat.com> --- fs/ceph/addr.c | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 93fff1a7373f..0c4fb3d23078 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -188,16 +188,42 @@ static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq) struct inode *inode = rreq->inode; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_file_layout *lo = &ci->i_layout; + unsigned long max_pages = inode->i_sb->s_bdi->ra_pages; + loff_t end = rreq->start + rreq->len, new_end; + struct ceph_netfs_request_data *priv = rreq->netfs_priv; + unsigned long max_len; u32 blockoff; - u64 blockno; - /* Expand the start downward */ - blockno = div_u64_rem(rreq->start, lo->stripe_unit, &blockoff); - rreq->start = blockno * lo->stripe_unit; - rreq->len += blockoff; + if (priv) { + /* Readahead is disabled by posix_fadvise POSIX_FADV_RANDOM */ + if (priv->file_ra_disabled) + max_pages = 0; + else + max_pages = priv->file_ra_pages; + + } + + /* Readahead is disabled */ + if (!max_pages) + return; - /* Now, round up the length to the next block */ - rreq->len = roundup(rreq->len, lo->stripe_unit); + max_len = max_pages << PAGE_SHIFT; + + /* + * Try to expand the length forward by rounding up it to the next + * block, but do not exceed the file size, unless the original + * request already exceeds it. + */ + new_end = min(round_up(end, lo->stripe_unit), rreq->i_size); + if (new_end > end && new_end <= rreq->start + max_len) + rreq->len = new_end - rreq->start; + + /* Try to expand the start downward */ + div_u64_rem(rreq->start, lo->stripe_unit, &blockoff); + if (rreq->len + blockoff <= max_len) { + rreq->start -= blockoff; + rreq->len += blockoff; + } } static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq) -- 2.40.1

2 years, 1 month

2
1
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror June 2023