- Linux-stable-mirror - lists.linaro.org

[PATCH 1/2] [v2] hfs/hfsplus: follow MacOS time behavior

by Arnd Bergmann

According to the official documentation for HFS+ [1], inode timestamps are supposed to cover the time range from 1904 to 2040 as originally used in classic MacOS. The traditional Linux usage is to convert the timestamps into an unsigned 32-bit number based on the Unix epoch and from there to a time_t. On 32-bit systems, that wraps the time from 2038 to 1902, so the last two years of the valid time range become garbled. On 64-bit systems, all times before 1970 get turned into timestamps between 2038 and 2106, which is more convenient but also different from the documented behavior. Looking at the Darwin sources [2], it seems that MacOS is inconsistent in yet another way: all timestamps are wrapped around to a 32-bit unsigned number when written to the disk, but when read back, all numeric values lower than 2082844800U are assumed to be invalid, so we cannot represent the times before 1970 or the times after 2040. While all implementations seem to agree on the interpretation of values between 1970 and 2038, they often differ on the exact range they support when reading back values outside of the common range: MacOS (traditional): 1904-2040 Apple Documentation: 1904-2040 MacOS X source comments: 1970-2040 MacOS X source code: 1970-2038 32-bit Linux: 1902-2038 64-bit Linux: 1970-2106 hfsfuse: 1970-2040 hfsutils (32 bit, old libc) 1902-2038 hfsutils (32 bit, new libc) 1970-2106 hfsutils (64 bit) 1904-2040 hfsplus-utils 1904-2040 hfsexplorer 1904-2040 7-zip 1904-2040 This changes Linux over to mostly the same behavior as described in the code comment in MacOS X, disallowing all times before 1970 and after 2040, while still allowing times between 2038 and 2040 like most other implementations do. Most importantly, it means we can have the same behavior on 32-bit and 64-bit. Cc: stable(a)vger.kernel.org Link: [1] https://developer.apple.com/library/archive/technotes/tn/tn1150.html Link: [2] https://opensource.apple.com/source/hfs/hfs-407.30.1/core/MacOSStubs.c.auto… Suggested-by: Viacheslav Dubeyko <slava(a)dubeyko.com> Signed-off-by: Arnd Bergmann <arnd(a)arndb.de> --- v2: treat pre-1970 dates as invalid following MacOS X behavior, reword and expand changelog text --- fs/hfs/hfs_fs.h | 29 +++++++++++++++++++++++++---- fs/hfsplus/hfsplus_fs.h | 26 +++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 7 deletions(-) diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index 6d0783e2e276..1af998fb522e 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -246,14 +246,35 @@ extern void hfs_mark_mdb_dirty(struct super_block *sb); * mac: unsigned big-endian since 00:00 GMT, Jan. 1, 1904 * */ -#define __hfs_u_to_mtime(sec) cpu_to_be32(sec + 2082844800U - sys_tz.tz_minuteswest * 60) -#define __hfs_m_to_utime(sec) (be32_to_cpu(sec) - 2082844800U + sys_tz.tz_minuteswest * 60) +static inline time64_t __hfs_m_to_utime(__be32 mt) +{ + time64_t ut = (u32)(be32_to_cpu(mt) - 2082844800U); + + /* + * Times past 2040-02-06 06:28 are assumed to be invalid, + * matching the MacOS behavior. + */ + if (ut > 2082844800U + UINT_MAX) + ut = 0; + + return ut + sys_tz.tz_minuteswest * 60; +} +static inline __be32 __hfs_u_to_mtime(time64_t ut) +{ + ut -= - sys_tz.tz_minuteswest * 60; + + /* + * MacOS wraps "invalid" times after 2040 when writing back, so + * let's do the same here. + */ + return cpu_to_be32(lower_32_bits(ut + 2082844800U)); +} #define HFS_I(inode) (container_of(inode, struct hfs_inode_info, vfs_inode)) #define HFS_SB(sb) ((struct hfs_sb_info *)(sb)->s_fs_info) -#define hfs_m_to_utime(time) (struct timespec){ .tv_sec = __hfs_m_to_utime(time) } -#define hfs_u_to_mtime(time) __hfs_u_to_mtime((time).tv_sec) +#define hfs_m_to_utime(time) (struct timespec){ .tv_sec = __hfs_m_to_utime(time) } +#define hfs_u_to_mtime(time) __hfs_u_to_mtime((time).tv_sec) #define hfs_mtime() __hfs_u_to_mtime(get_seconds()) static inline const char *hfs_mdb_name(struct super_block *sb) diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index d9255abafb81..7f0943e540a0 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -530,9 +530,29 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector, void *buf, void **data, int op, int op_flags); int hfsplus_read_wrapper(struct super_block *sb); -/* time macros */ -#define __hfsp_mt2ut(t) (be32_to_cpu(t) - 2082844800U) -#define __hfsp_ut2mt(t) (cpu_to_be32(t + 2082844800U)) +/* time helpers */ +static inline time64_t __hfsp_mt2ut(__be32 mt) +{ + time64_t ut = (u32)(be32_to_cpu(mt) - 2082844800U); + + /* + * Times past 2040-02-06 06:28 are assumed to be invalid, + * matching the MacOS behavior. + */ + if (ut > 2082844800U + UINT_MAX) + ut = 0; + + return ut; +} + +static inline __be32 __hfsp_ut2mt(time64_t ut) +{ + /* + * MacOS wraps "invalid" times after 2040 when writing back, so + * let's do the same here. + */ + return cpu_to_be32(lower_32_bits(ut + 2082844800U)); +} /* compatibility */ #define hfsp_mt2ut(t) (struct timespec){ .tv_sec = __hfsp_mt2ut(t) } -- 2.9.0

7 years, 1 month

2
2
0 0

[PATCH 2/3] [BUGFIX] ring_buffer: tracing: Inherit the tracing setting to next ring buffer

by Masami Hiramatsu

Inherit the tracing on/off setting on ring_buffer to next trace buffer when taking a snapshot. Taking a snapshot is done by swapping with backup ring buffer (max_tr_buffer). But since the tracing on/off setting is set in the ring buffer, when swapping it, tracing on/off setting can also be changed. This causes a strange result like below; /sys/kernel/debug/tracing # cat tracing_on 1 /sys/kernel/debug/tracing # echo 0 > tracing_on /sys/kernel/debug/tracing # echo 1 > snapshot /sys/kernel/debug/tracing # cat tracing_on 1 /sys/kernel/debug/tracing # echo 1 > snapshot /sys/kernel/debug/tracing # cat tracing_on 0 We don't touch tracing_on, but snapshot changes tracing_on setting each time. This must be a bug, because user never know that each "ring_buffer" stores tracing-enable state and snapshot is done by swapping ring buffers. This patch fixes above strange behavior. Fixes: commit debdd57f5145 ("tracing: Make a snapshot feature available from userspace") Signed-off-by: Masami Hiramatsu <mhiramat(a)kernel.org> Cc: Steven Rostedt <rostedt(a)goodmis.org> Cc: Ingo Molnar <mingo(a)redhat.com> Cc: Hiraku Toyooka <hiraku.toyooka(a)cybertrust.co.jp> Cc: stable(a)vger.kernel.org --- include/linux/ring_buffer.h | 1 + kernel/trace/ring_buffer.c | 12 ++++++++++++ kernel/trace/trace.c | 6 ++++++ 3 files changed, 19 insertions(+) diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index b72ebdff0b77..003d09ab308d 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -165,6 +165,7 @@ void ring_buffer_record_enable(struct ring_buffer *buffer); void ring_buffer_record_off(struct ring_buffer *buffer); void ring_buffer_record_on(struct ring_buffer *buffer); int ring_buffer_record_is_on(struct ring_buffer *buffer); +int ring_buffer_record_is_set_on(struct ring_buffer *buffer); void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu); void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 6a46af21765c..4038ed74ab95 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3227,6 +3227,18 @@ int ring_buffer_record_is_on(struct ring_buffer *buffer) } /** + * ring_buffer_record_is_set_on - return true if the ring buffer is set writable + * @buffer: The ring buffer to see if write is set enabled + * + * Returns true if the ring buffer is set writable by ring_buffer_record_on(). + * Note that this does NOT mean it is in a writable state. + */ +int ring_buffer_record_is_set_on(struct ring_buffer *buffer) +{ + return !(atomic_read(&buffer->record_disabled) & RB_BUFFER_OFF); +} + +/** * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer * @buffer: The ring buffer to stop writes to. * @cpu: The CPU buffer to stop diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2556d8c097d2..bbd5a94a7ef1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1378,6 +1378,12 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) arch_spin_lock(&tr->max_lock); + /* Inherit the recordable setting from trace_buffer */ + if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer)) + ring_buffer_record_on(tr->max_buffer.buffer); + else + ring_buffer_record_off(tr->max_buffer.buffer); + swap(tr->trace_buffer.buffer, tr->max_buffer.buffer); __update_max_tr(tr, tsk, cpu);

7 years, 1 month

2
2
0 0

[PATCHv3 3/3] mm: Fix vma_is_anonymous() false-positives

by Kirill A. Shutemov

vma_is_anonymous() relies on ->vm_ops being NULL to detect anonymous VMA. This is unreliable as ->mmap may not set ->vm_ops. False-positive vma_is_anonymous() may lead to crashes: next ffff8801ce5e7040 prev ffff8801d20eca50 mm ffff88019c1e13c0 prot 27 anon_vma ffff88019680cdd8 vm_ops 0000000000000000 pgoff 0 file ffff8801b2ec2d00 private_data 0000000000000000 flags: 0xff(read|write|exec|shared|mayread|maywrite|mayexec|mayshare) ------------[ cut here ]------------ kernel BUG at mm/memory.c:1422! invalid opcode: 0000 [#1] SMP KASAN CPU: 0 PID: 18486 Comm: syz-executor3 Not tainted 4.18.0-rc3+ #136 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:zap_pmd_range mm/memory.c:1421 [inline] RIP: 0010:zap_pud_range mm/memory.c:1466 [inline] RIP: 0010:zap_p4d_range mm/memory.c:1487 [inline] RIP: 0010:unmap_page_range+0x1c18/0x2220 mm/memory.c:1508 Code: ff 31 ff 4c 89 e6 42 c6 04 33 f8 e8 92 dd d0 ff 4d 85 e4 0f 85 4a eb ff ff e8 54 dc d0 ff 48 8b bd 10 fc ff ff e8 82 95 fe ff <0f> 0b e8 41 dc d0 ff 0f 0b 4c 89 ad 18 fc ff ff c7 85 7c fb ff ff RSP: 0018:ffff8801b0587330 EFLAGS: 00010286 RAX: 000000000000013c RBX: 1ffff100360b0e9c RCX: ffffc90002620000 RDX: 0000000000000000 RSI: ffffffff81631851 RDI: 0000000000000001 RBP: ffff8801b05877c8 R08: ffff880199d40300 R09: ffffed003b5c4fc0 R10: ffffed003b5c4fc0 R11: ffff8801dae27e07 R12: 0000000000000000 R13: ffff88019c1e13c0 R14: dffffc0000000000 R15: 0000000020e01000 FS: 00007fca32251700(0000) GS:ffff8801dae00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f04c540d000 CR3: 00000001ac1f0000 CR4: 00000000001426f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: unmap_single_vma+0x1a0/0x310 mm/memory.c:1553 zap_page_range_single+0x3cc/0x580 mm/memory.c:1644 unmap_mapping_range_vma mm/memory.c:2792 [inline] unmap_mapping_range_tree mm/memory.c:2813 [inline] unmap_mapping_pages+0x3a7/0x5b0 mm/memory.c:2845 unmap_mapping_range+0x48/0x60 mm/memory.c:2880 truncate_pagecache+0x54/0x90 mm/truncate.c:800 truncate_setsize+0x70/0xb0 mm/truncate.c:826 simple_setattr+0xe9/0x110 fs/libfs.c:409 notify_change+0xf13/0x10f0 fs/attr.c:335 do_truncate+0x1ac/0x2b0 fs/open.c:63 do_sys_ftruncate+0x492/0x560 fs/open.c:205 __do_sys_ftruncate fs/open.c:215 [inline] __se_sys_ftruncate fs/open.c:213 [inline] __x64_sys_ftruncate+0x59/0x80 fs/open.c:213 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Reproducer: #include <stdio.h> #include <stddef.h> #include <stdint.h> #include <stdlib.h> #include <string.h> #include <sys/types.h> #include <sys/stat.h> #include <sys/ioctl.h> #include <sys/mman.h> #include <unistd.h> #include <fcntl.h> #define KCOV_INIT_TRACE _IOR('c', 1, unsigned long) #define KCOV_ENABLE _IO('c', 100) #define KCOV_DISABLE _IO('c', 101) #define COVER_SIZE (1024<<10) #define KCOV_TRACE_PC 0 #define KCOV_TRACE_CMP 1 int main(int argc, char **argv) { int fd; unsigned long *cover; system("mount -t debugfs none /sys/kernel/debug"); fd = open("/sys/kernel/debug/kcov", O_RDWR); ioctl(fd, KCOV_INIT_TRACE, COVER_SIZE); cover = mmap(NULL, COVER_SIZE * sizeof(unsigned long), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); munmap(cover, COVER_SIZE * sizeof(unsigned long)); cover = mmap(NULL, COVER_SIZE * sizeof(unsigned long), PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); memset(cover, 0, COVER_SIZE * sizeof(unsigned long)); ftruncate(fd, 3UL << 20); return 0; } This can be fixed by assigning anonymous VMAs own vm_ops and not relying on it being NULL. If ->mmap() failed to set ->vm_ops, mmap_region() will set it to dummy_vm_ops. This way we will have non-NULL ->vm_ops for all VMAs. Signed-off-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com> Reported-by: syzbot+3f84280d52be9b7083cc(a)syzkaller.appspotmail.com Cc: stable(a)vger.kernel.org Cc: Dmitry Vyukov <dvyukov(a)google.com> Cc: Oleg Nesterov <oleg(a)redhat.com> Cc: Andrea Arcangeli <aarcange(a)redhat.com> --- drivers/char/mem.c | 1 + fs/exec.c | 1 + include/linux/mm.h | 8 ++++++++ mm/mmap.c | 3 +++ mm/nommu.c | 2 ++ 5 files changed, 15 insertions(+) diff --git a/drivers/char/mem.c b/drivers/char/mem.c index ffeb60d3434c..df66a9dd0aae 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -708,6 +708,7 @@ static int mmap_zero(struct file *file, struct vm_area_struct *vma) #endif if (vma->vm_flags & VM_SHARED) return shmem_zero_setup(vma); + vma_set_anonymous(vma); return 0; } diff --git a/fs/exec.c b/fs/exec.c index 72e961a62adb..bdd0eacefdf5 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -293,6 +293,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm) bprm->vma = vma = vm_area_alloc(mm); if (!vma) return -ENOMEM; + vma_set_anonymous(vma); if (down_write_killable(&mm->mmap_sem)) { err = -EINTR; diff --git a/include/linux/mm.h b/include/linux/mm.h index 31540f166987..7ba6d356d18f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -454,10 +454,18 @@ struct vm_operations_struct { static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) { + static const struct vm_operations_struct dummy_vm_ops = {}; + vma->vm_mm = mm; + vma->vm_ops = &dummy_vm_ops; INIT_LIST_HEAD(&vma->anon_vma_chain); } +static inline void vma_set_anonymous(struct vm_area_struct *vma) +{ + vma->vm_ops = NULL; +} + struct mmu_gather; struct inode; diff --git a/mm/mmap.c b/mm/mmap.c index ff1944d8d458..17bbf4d3e24f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1778,6 +1778,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr, error = shmem_zero_setup(vma); if (error) goto free_vma; + } else { + vma_set_anonymous(vma); } vma_link(mm, vma, prev, rb_link, rb_parent); @@ -2983,6 +2985,7 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla return -ENOMEM; } + vma_set_anonymous(vma); vma->vm_start = addr; vma->vm_end = addr + len; vma->vm_pgoff = pgoff; diff --git a/mm/nommu.c b/mm/nommu.c index 1d22fdbf7d7c..9fc9e43335b6 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1145,6 +1145,8 @@ static int do_mmap_private(struct vm_area_struct *vma, if (ret < len) memset(base + ret, 0, len - ret); + } else { + vma_set_anonymous(vma); } return 0; -- 2.18.0

7 years, 1 month

1
0
0 0

[PATCH] nvmet-fc: fix target sgl list on large transfers

by James Smart

The existing code to carve up the sg list expected an sg element-per-page which can be very incorrect with iommu's remapping multiple memory pages to fewer bus addresses. To hit this error required a large io payload (greater than 256k) and a system that maps on a per-page basis. It's possible that large ios could get by fine if the system condensed the sgl list into the first 64 elements. This patch corrects the sg list handling by specifically walking the sg list element by element and attempting to divide the transfer up on a per-sg element boundary. While doing so, it still tries to keep sequences under 256k, but will exceed that rule if a single sg element is larger than 256k. Fixes: 48fa362b6c3f ("nvmet-fc: simplify sg list handling") Cc: <stable(a)vger.kernel.org> # 4.14 Signed-off-by: James Smart <james.smart(a)broadcom.com> --- drivers/nvme/target/fc.c | 44 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 408279cb6f2c..29b4b236afd8 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -58,8 +58,8 @@ struct nvmet_fc_ls_iod { struct work_struct work; } __aligned(sizeof(unsigned long long)); +/* desired maximum for a single sequence - if sg list allows it */ #define NVMET_FC_MAX_SEQ_LENGTH (256 * 1024) -#define NVMET_FC_MAX_XFR_SGENTS (NVMET_FC_MAX_SEQ_LENGTH / PAGE_SIZE) enum nvmet_fcp_datadir { NVMET_FCP_NODATA, @@ -74,6 +74,7 @@ struct nvmet_fc_fcp_iod { struct nvme_fc_cmd_iu cmdiubuf; struct nvme_fc_ersp_iu rspiubuf; dma_addr_t rspdma; + struct scatterlist *next_sg; struct scatterlist *data_sg; int data_sg_cnt; u32 offset; @@ -1025,8 +1026,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, INIT_LIST_HEAD(&newrec->assoc_list); kref_init(&newrec->ref); ida_init(&newrec->assoc_cnt); - newrec->max_sg_cnt = min_t(u32, NVMET_FC_MAX_XFR_SGENTS, - template->max_sgl_segments); + newrec->max_sg_cnt = template->max_sgl_segments; ret = nvmet_fc_alloc_ls_iodlist(newrec); if (ret) { @@ -1722,6 +1722,7 @@ nvmet_fc_alloc_tgt_pgs(struct nvmet_fc_fcp_iod *fod) ((fod->io_dir == NVMET_FCP_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE)); /* note: write from initiator perspective */ + fod->next_sg = fod->data_sg; return 0; @@ -1866,24 +1867,49 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_fcp_iod *fod, u8 op) { struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; + struct scatterlist *sg = fod->next_sg; unsigned long flags; - u32 tlen; + u32 remaininglen = fod->req.transfer_len - fod->offset; + u32 tlen = 0; int ret; fcpreq->op = op; fcpreq->offset = fod->offset; fcpreq->timeout = NVME_FC_TGTOP_TIMEOUT_SEC; - tlen = min_t(u32, tgtport->max_sg_cnt * PAGE_SIZE, - (fod->req.transfer_len - fod->offset)); + /* + * for next sequence: + * break at a sg element boundary + * attempt to keep sequence length capped at + * NVMET_FC_MAX_SEQ_LENGTH but allow sequence to + * be longer if a single sg element is larger + * than that amount. This is done to avoid creating + * a new sg list to use for the tgtport api. + */ + fcpreq->sg = sg; + fcpreq->sg_cnt = 0; + while (tlen < remaininglen && + fcpreq->sg_cnt < tgtport->max_sg_cnt && + tlen + sg_dma_len(sg) < NVMET_FC_MAX_SEQ_LENGTH) { + fcpreq->sg_cnt++; + tlen += sg_dma_len(sg); + sg = sg_next(sg); + } + if (tlen < remaininglen && fcpreq->sg_cnt == 0) { + fcpreq->sg_cnt++; + tlen += min_t(u32, sg_dma_len(sg), remaininglen); + sg = sg_next(sg); + } + if (tlen < remaininglen) + fod->next_sg = sg; + else + fod->next_sg = NULL; + fcpreq->transfer_length = tlen; fcpreq->transferred_length = 0; fcpreq->fcp_error = 0; fcpreq->rsplen = 0; - fcpreq->sg = &fod->data_sg[fod->offset / PAGE_SIZE]; - fcpreq->sg_cnt = DIV_ROUND_UP(tlen, PAGE_SIZE); - /* * If the last READDATA request: check if LLDD supports * combined xfr with response. -- 2.13.1

7 years, 1 month

2
1
0 0

editing for your photos

by Roland

I would like to speak with the person that managing photos for your company? We provide image editing like – photos cutting out and retouching. Enhancing your images is just a part of what we can do for your business. Whether you’re an ecommerce store or portrait photographer, real estate professional, or an e-Retailer, we are your personal team of photo editors that integrate seamlessly with your business. Our mainly services are: . Cut out, masking, clipping path, deep etching, transparent background . Colour correction, black and white, light and shadows etc. . Dust cleaning, spot cleaning . Beauty retouching, skin retouching, face retouching, body retouching . Fashion/Beauty Image Retouching . Product image Retouching . Real estate image Retouching . Wedding & Event Album Design. . Restoration and repair old images . Vector Conversion . Portrait image Retouching We can provide you editing test on your photos. Please reply if you are interested. Thanks, Roland

7 years, 1 month

1
0
0 0

[PATCH] perf/core: fix a possible deadlock scenario

by Cong Wang

hrtimer_cancel() busy-waits for the hrtimer callback to stop, pretty much like del_timer_sync(). This creates a possible deadlock scenario where we hold a spinlock before calling hrtimer_cancel() while in trying to acquire the same spinlock in the callback. This kind of deadlock is already known and is catchable by lockdep, like for del_timer_sync(), we can add lockdep annotations. However, it is still missing for hrtimer_cancel(). (I have a WIP patch to make it complete for hrtimer_cancel() but it breaks booting.) And there is such a deadlock scenario in kernel/events/core.c too, well actually, it is a simpler version: the hrtimer callback waits for itself to finish on the same CPU! It sounds stupid but it is not obvious at all, it hides very deeply in the perf event code: cpu_clock_event_init(): perf_swevent_init_hrtimer(): hwc->hrtimer.function = perf_swevent_hrtimer; perf_swevent_hrtimer(): __perf_event_overflow(): __perf_event_account_interrupt(): perf_adjust_period(): pmu->stop(): cpu_clock_event_stop(): perf_swevent_cancel(): hrtimer_cancel() Getting stuck in a timer doesn't sound very scary, however, in this case, its consequences are a disaster: perf_event_overflow() which calls __perf_event_overflow() is called in NMI handler too, so it is racy with hrtimer callback as disabling IRQ can't possibly disable NMI. This means this hrtimer callback once interrupted by an NMI handler could deadlock within NMI! As a further consequence, other IRQ handling is blocked too, notably the IPI handler, especially when smp_call_function_*() waits for their callbacks synchronously. This is why we saw so many soft lockup's in smp_call_function_single() given how widely they are used in kernel. Ironically, perf event code uses synchronous smp_call_function_single() heavily too. The fix is not easy. To minimize the impact, ideally we should just avoid busy waiting when it is called within the hrtimer callback on the same CPU, there is no reason to wait for itself to finish anyway. Probably it doesn't even need to cancel itself either since it will restart by pmu->start() later. There are two possible fixes here: 1. Modify hrtimer API to detect if a hrtimer callback is running on the same CPU now. This does not look pretty though. 2. Passing some information from perf_swevent_hrtimer() down to perf_swevent_cancel(). So I pick the latter approach, it is simple and straightforward. Note, currently perf_swevent_hrtimer() still races with perf_event_overflow() in NMI on the same CPU anyway, given there is no lock around and probably locking does not even help. But it is nothing new, and the race itself is not bad either, at most we have some inconsistent updates on the event sample period. Fixes: abd50713944c ("perf: Reimplement frequency driven sampling") Cc: Peter Zijlstra <peterz(a)infradead.org> Cc: Ingo Molnar <mingo(a)redhat.com> Cc: Linus Torvalds <torvalds(a)linux-foundation.org> Cc: Arnaldo Carvalho de Melo <acme(a)kernel.org> Cc: Alexander Shishkin <alexander.shishkin(a)linux.intel.com> Cc: Jiri Olsa <jolsa(a)redhat.com> Cc: Namhyung Kim <namhyung(a)kernel.org> Cc: <stable(a)vger.kernel.org> Signed-off-by: Cong Wang <xiyou.wangcong(a)gmail.com> --- include/linux/perf_event.h | 3 +++ kernel/events/core.c | 43 +++++++++++++++++++++++++++---------------- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1fa12887ec02..aab39b8aa720 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -310,6 +310,9 @@ struct pmu { #define PERF_EF_START 0x01 /* start the counter when adding */ #define PERF_EF_RELOAD 0x02 /* reload the counter when starting */ #define PERF_EF_UPDATE 0x04 /* update the counter when stopping */ +#define PERF_EF_NO_WAIT 0x08 /* do not wait when stopping, for + * example, waiting for a timer + */ /* * Adds/Removes a counter to/from the PMU, can be done inside a diff --git a/kernel/events/core.c b/kernel/events/core.c index 8f0434a9951a..f15832346b35 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3555,7 +3555,8 @@ do { \ static DEFINE_PER_CPU(int, perf_throttled_count); static DEFINE_PER_CPU(u64, perf_throttled_seq); -static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count, bool disable) +static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count, + bool disable, bool nowait) { struct hw_perf_event *hwc = &event->hw; s64 period, sample_period; @@ -3574,8 +3575,13 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count, bo hwc->sample_period = sample_period; if (local64_read(&hwc->period_left) > 8*sample_period) { - if (disable) - event->pmu->stop(event, PERF_EF_UPDATE); + if (disable) { + int flags = PERF_EF_UPDATE; + + if (nowait) + flags |= PERF_EF_NO_WAIT; + event->pmu->stop(event, flags); + } local64_set(&hwc->period_left, 0); @@ -3645,7 +3651,7 @@ static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx, * twice. */ if (delta > 0) - perf_adjust_period(event, period, delta, false); + perf_adjust_period(event, period, delta, false, false); event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0); next: @@ -7681,7 +7687,8 @@ static void perf_log_itrace_start(struct perf_event *event) } static int -__perf_event_account_interrupt(struct perf_event *event, int throttle) +__perf_event_account_interrupt(struct perf_event *event, int throttle, + bool nowait) { struct hw_perf_event *hwc = &event->hw; int ret = 0; @@ -7710,7 +7717,8 @@ __perf_event_account_interrupt(struct perf_event *event, int throttle) hwc->freq_time_stamp = now; if (delta > 0 && delta < 2*TICK_NSEC) - perf_adjust_period(event, delta, hwc->last_period, true); + perf_adjust_period(event, delta, hwc->last_period, true, + nowait); } return ret; @@ -7718,7 +7726,7 @@ __perf_event_account_interrupt(struct perf_event *event, int throttle) int perf_event_account_interrupt(struct perf_event *event) { - return __perf_event_account_interrupt(event, 1); + return __perf_event_account_interrupt(event, 1, false); } /* @@ -7727,7 +7735,7 @@ int perf_event_account_interrupt(struct perf_event *event) static int __perf_event_overflow(struct perf_event *event, int throttle, struct perf_sample_data *data, - struct pt_regs *regs) + struct pt_regs *regs, bool nowait) { int events = atomic_read(&event->event_limit); int ret = 0; @@ -7739,7 +7747,7 @@ static int __perf_event_overflow(struct perf_event *event, if (unlikely(!is_sampling_event(event))) return 0; - ret = __perf_event_account_interrupt(event, throttle); + ret = __perf_event_account_interrupt(event, throttle, nowait); /* * XXX event_limit might not quite work as expected on inherited @@ -7768,7 +7776,7 @@ int perf_event_overflow(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs) { - return __perf_event_overflow(event, 1, data, regs); + return __perf_event_overflow(event, 1, data, regs, true); } /* @@ -7831,7 +7839,7 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow, for (; overflow; overflow--) { if (__perf_event_overflow(event, throttle, - data, regs)) { + data, regs, false)) { /* * We inhibit the overflow from happening when * hwc->interrupts == MAX_INTERRUPTS. @@ -9110,7 +9118,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) if (regs && !perf_exclude_event(event, regs)) { if (!(event->attr.exclude_idle && is_idle_task(current))) - if (__perf_event_overflow(event, 1, &data, regs)) + if (__perf_event_overflow(event, 1, &data, regs, true)) ret = HRTIMER_NORESTART; } @@ -9141,7 +9149,7 @@ static void perf_swevent_start_hrtimer(struct perf_event *event) HRTIMER_MODE_REL_PINNED); } -static void perf_swevent_cancel_hrtimer(struct perf_event *event) +static void perf_swevent_cancel_hrtimer(struct perf_event *event, bool sync) { struct hw_perf_event *hwc = &event->hw; @@ -9149,7 +9157,10 @@ static void perf_swevent_cancel_hrtimer(struct perf_event *event) ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); local64_set(&hwc->period_left, ktime_to_ns(remaining)); - hrtimer_cancel(&hwc->hrtimer); + if (sync) + hrtimer_cancel(&hwc->hrtimer); + else + hrtimer_try_to_cancel(&hwc->hrtimer); } } @@ -9200,7 +9211,7 @@ static void cpu_clock_event_start(struct perf_event *event, int flags) static void cpu_clock_event_stop(struct perf_event *event, int flags) { - perf_swevent_cancel_hrtimer(event); + perf_swevent_cancel_hrtimer(event, flags & PERF_EF_NO_WAIT); cpu_clock_event_update(event); } @@ -9277,7 +9288,7 @@ static void task_clock_event_start(struct perf_event *event, int flags) static void task_clock_event_stop(struct perf_event *event, int flags) { - perf_swevent_cancel_hrtimer(event); + perf_swevent_cancel_hrtimer(event, flags & PERF_EF_NO_WAIT); task_clock_event_update(event, event->ctx->time); } -- 2.14.4

7 years, 1 month

3
7
0 0

[PATCH v2] bcache: set max writeback rate when I/O request is idle

by Coly Li

Commit b1092c9af9ed ("bcache: allow quick writeback when backing idle") allows the writeback rate to be faster if there is no I/O request on a bcache device. It works well if there is only one bcache device attached to the cache set. If there are many bcache devices attached to a cache set, it may introduce performance regression because multiple faster writeback threads of the idle bcache devices will compete the btree level locks with the bcache device who have I/O requests coming. This patch fixes the above issue by only permitting fast writebac when all bcache devices attached on the cache set are idle. And if one of the bcache devices has new I/O request coming, minimized all writeback throughput immediately and let PI controller __update_writeback_rate() to decide the upcoming writeback rate for each bcache device. Also when all bcache devices are idle, limited wrieback rate to a small number is wast of thoughput, especially when backing devices are slower non-rotation devices (e.g. SATA SSD). This patch sets a max writeback rate for each backing device if the whole cache set is idle. A faster writeback rate in idle time means new I/Os may have more available space for dirty data, and people may observe a better write performance then. Please note bcache may change its cache mode in run time, and this patch still works if the cache mode is switched from writeback mode and there is still dirty data on cache. Fixes: Commit b1092c9af9ed ("bcache: allow quick writeback when backing idle") Cc: stable(a)vger.kernel.org #4.16+ Signed-off-by: Coly Li <colyli(a)suse.de> Tested-by: Kai Krakow <kai(a)kaishome.de> Cc: Michael Lyle <mlyle(a)lyle.org> Cc: Stefan Priebe <s.priebe(a)profihost.ag> --- Channgelog: v2, Fix a deadlock reported by Stefan Priebe. v1, Initial version. drivers/md/bcache/bcache.h | 11 ++-- drivers/md/bcache/request.c | 51 ++++++++++++++- drivers/md/bcache/super.c | 1 + drivers/md/bcache/sysfs.c | 14 +++-- drivers/md/bcache/util.c | 2 +- drivers/md/bcache/util.h | 2 +- drivers/md/bcache/writeback.c | 115 ++++++++++++++++++++++++++-------- 7 files changed, 155 insertions(+), 41 deletions(-) diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index d6bf294f3907..469ab1a955e0 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -328,13 +328,6 @@ struct cached_dev { */ atomic_t has_dirty; - /* - * Set to zero by things that touch the backing volume-- except - * writeback. Incremented by writeback. Used to determine when to - * accelerate idle writeback. - */ - atomic_t backing_idle; - struct bch_ratelimit writeback_rate; struct delayed_work writeback_rate_update; @@ -514,6 +507,8 @@ struct cache_set { struct cache_accounting accounting; unsigned long flags; + atomic_t idle_counter; + atomic_t at_max_writeback_rate; struct cache_sb sb; @@ -523,6 +518,8 @@ struct cache_set { struct bcache_device **devices; unsigned devices_max_used; + /* See set_at_max_writeback_rate() for how it is used */ + unsigned previous_dirty_dc_nr; struct list_head cached_devs; uint64_t cached_dev_sectors; struct closure caching; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index ae67f5fa8047..1af3d96abfa5 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1104,6 +1104,43 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio) /* Cached devices - read & write stuff */ +static void quit_max_writeback_rate(struct cache_set *c, + struct cached_dev *this_dc) +{ + int i; + struct bcache_device *d; + struct cached_dev *dc; + + /* + * If bch_register_lock is acquired by other attach/detach operations, + * waiting here will increase I/O request latency for seconds or more. + * To avoid such situation, only writeback rate of current cached device + * is set to 1, and __update_write_back() will decide writeback rate + * of other cached devices (remember c->idle_counter is 0 now). + */ + if (mutex_trylock(&bch_register_lock)){ + for (i = 0; i < c->devices_max_used; i++) { + if (!c->devices[i]) + continue; + + if (UUID_FLASH_ONLY(&c->uuids[i])) + continue; + + d = c->devices[i]; + dc = container_of(d, struct cached_dev, disk); + /* + * set writeback rate to default minimum value, + * then let update_writeback_rate() to decide the + * upcoming rate. + */ + atomic64_set(&dc->writeback_rate.rate, 1); + } + + mutex_unlock(&bch_register_lock); + } else + atomic64_set(&this_dc->writeback_rate.rate, 1); +} + static blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio) { @@ -1119,7 +1156,19 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q, return BLK_QC_T_NONE; } - atomic_set(&dc->backing_idle, 0); + if (d->c) { + atomic_set(&d->c->idle_counter, 0); + /* + * If at_max_writeback_rate of cache set is true and new I/O + * comes, quit max writeback rate of all cached devices + * attached to this cache set, and set at_max_writeback_rate + * to false. + */ + if (unlikely(atomic_read(&d->c->at_max_writeback_rate) == 1)) { + atomic_set(&d->c->at_max_writeback_rate, 0); + quit_max_writeback_rate(d->c, dc); + } + } generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0); bio_set_dev(bio, dc->bdev); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index fa4058e43202..fa532d9f9353 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1687,6 +1687,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) c->block_bits = ilog2(sb->block_size); c->nr_uuids = bucket_bytes(c) / sizeof(struct uuid_entry); c->devices_max_used = 0; + c->previous_dirty_dc_nr = 0; c->btree_pages = bucket_pages(c); if (c->btree_pages > BTREE_MAX_PAGES) c->btree_pages = max_t(int, c->btree_pages / 4, diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 225b15aa0340..d719021bff81 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -170,7 +170,8 @@ SHOW(__bch_cached_dev) var_printf(writeback_running, "%i"); var_print(writeback_delay); var_print(writeback_percent); - sysfs_hprint(writeback_rate, dc->writeback_rate.rate << 9); + sysfs_hprint(writeback_rate, + atomic64_read(&dc->writeback_rate.rate) << 9); sysfs_hprint(io_errors, atomic_read(&dc->io_errors)); sysfs_printf(io_error_limit, "%i", dc->error_limit); sysfs_printf(io_disable, "%i", dc->io_disable); @@ -188,7 +189,8 @@ SHOW(__bch_cached_dev) char change[20]; s64 next_io; - bch_hprint(rate, dc->writeback_rate.rate << 9); + bch_hprint(rate, + atomic64_read(&dc->writeback_rate.rate) << 9); bch_hprint(dirty, bcache_dev_sectors_dirty(&dc->disk) << 9); bch_hprint(target, dc->writeback_rate_target << 9); bch_hprint(proportional,dc->writeback_rate_proportional << 9); @@ -255,8 +257,12 @@ STORE(__cached_dev) sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, 40); - sysfs_strtoul_clamp(writeback_rate, - dc->writeback_rate.rate, 1, INT_MAX); + if (attr == &sysfs_writeback_rate) { + int v; + + sysfs_strtoul_clamp(writeback_rate, v, 1, INT_MAX); + atomic64_set(&dc->writeback_rate.rate, v); + } sysfs_strtoul_clamp(writeback_rate_update_seconds, dc->writeback_rate_update_seconds, diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index fc479b026d6d..84f90c3d996d 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -200,7 +200,7 @@ uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done) { uint64_t now = local_clock(); - d->next += div_u64(done * NSEC_PER_SEC, d->rate); + d->next += div_u64(done * NSEC_PER_SEC, atomic64_read(&d->rate)); /* Bound the time. Don't let us fall further than 2 seconds behind * (this prevents unnecessary backlog that would make it impossible diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index cced87f8eb27..7e17f32ab563 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -442,7 +442,7 @@ struct bch_ratelimit { * Rate at which we want to do work, in units per second * The units here correspond to the units passed to bch_next_delay() */ - uint32_t rate; + atomic64_t rate; }; static inline void bch_ratelimit_reset(struct bch_ratelimit *d) diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index ad45ebe1a74b..11ffadc3cf8f 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -49,6 +49,80 @@ static uint64_t __calc_target_rate(struct cached_dev *dc) return (cache_dirty_target * bdev_share) >> WRITEBACK_SHARE_SHIFT; } +static bool set_at_max_writeback_rate(struct cache_set *c, + struct cached_dev *dc) +{ + int i, dirty_dc_nr = 0; + struct bcache_device *d; + + /* + * bch_register_lock is acquired in cached_dev_detach_finish() before + * calling cancel_writeback_rate_update_dwork() to stop the delayed + * kworker writeback_rate_update (where the context we are for now). + * Therefore call mutex_lock() here may introduce deadlock when shut + * down the bcache device. + * c->previous_dirty_dc_nr is used to record previous calculated + * dirty_dc_nr when mutex_trylock() last time succeeded. Then if + * mutex_trylock() failed here, use c->previous_dirty_dc_nr as dirty + * cached device number. Of cause it might be inaccurate, but a few more + * or less loop before setting c->at_max_writeback_rate is much better + * then a deadlock here. + */ + if (mutex_trylock(&bch_register_lock)) { + for (i = 0; i < c->devices_max_used; i++) { + if (!c->devices[i]) + continue; + if (UUID_FLASH_ONLY(&c->uuids[i])) + continue; + d = c->devices[i]; + dc = container_of(d, struct cached_dev, disk); + if (atomic_read(&dc->has_dirty)) + dirty_dc_nr++; + } + c->previous_dirty_dc_nr = dirty_dc_nr; + + mutex_unlock(&bch_register_lock); + } else + dirty_dc_nr = c->previous_dirty_dc_nr; + + /* + * Idle_counter is increased everytime when update_writeback_rate() + * is rescheduled in. If all backing devices attached to the same + * cache set has same dc->writeback_rate_update_seconds value, it + * is about 10 rounds of update_writeback_rate() is called on each + * backing device, then the code will fall through at set 1 to + * c->at_max_writeback_rate, and a max wrteback rate to each + * dc->writeback_rate.rate. This is not very accurate but works well + * to make sure the whole cache set has no new I/O coming before + * writeback rate is set to a max number. + */ + if (atomic_inc_return(&c->idle_counter) < dirty_dc_nr * 10) + return false; + + if (atomic_read(&c->at_max_writeback_rate) != 1) + atomic_set(&c->at_max_writeback_rate, 1); + + + atomic64_set(&dc->writeback_rate.rate, INT_MAX); + + /* keep writeback_rate_target as existing value */ + dc->writeback_rate_proportional = 0; + dc->writeback_rate_integral_scaled = 0; + dc->writeback_rate_change = 0; + + /* + * Check c->idle_counter and c->at_max_writeback_rate agagain in case + * new I/O arrives during before set_at_max_writeback_rate() returns. + * Then the writeback rate is set to 1, and its new value should be + * decided via __update_writeback_rate(). + */ + if (atomic_read(&c->idle_counter) < dirty_dc_nr * 10 || + !atomic_read(&c->at_max_writeback_rate)) + return false; + + return true; +} + static void __update_writeback_rate(struct cached_dev *dc) { /* @@ -104,8 +178,9 @@ static void __update_writeback_rate(struct cached_dev *dc) dc->writeback_rate_proportional = proportional_scaled; dc->writeback_rate_integral_scaled = integral_scaled; - dc->writeback_rate_change = new_rate - dc->writeback_rate.rate; - dc->writeback_rate.rate = new_rate; + dc->writeback_rate_change = new_rate - + atomic64_read(&dc->writeback_rate.rate); + atomic64_set(&dc->writeback_rate.rate, new_rate); dc->writeback_rate_target = target; } @@ -138,9 +213,16 @@ static void update_writeback_rate(struct work_struct *work) down_read(&dc->writeback_lock); - if (atomic_read(&dc->has_dirty) && - dc->writeback_percent) - __update_writeback_rate(dc); + if (atomic_read(&dc->has_dirty) && dc->writeback_percent) { + /* + * If the whole cache set is idle, set_at_max_writeback_rate() + * will set writeback rate to a max number. Then it is + * unncessary to update writeback rate for an idle cache set + * in maximum writeback rate number(s). + */ + if (!set_at_max_writeback_rate(c, dc)) + __update_writeback_rate(dc); + } up_read(&dc->writeback_lock); @@ -422,27 +504,6 @@ static void read_dirty(struct cached_dev *dc) delay = writeback_delay(dc, size); - /* If the control system would wait for at least half a - * second, and there's been no reqs hitting the backing disk - * for awhile: use an alternate mode where we have at most - * one contiguous set of writebacks in flight at a time. If - * someone wants to do IO it will be quick, as it will only - * have to contend with one operation in flight, and we'll - * be round-tripping data to the backing disk as quickly as - * it can accept it. - */ - if (delay >= HZ / 2) { - /* 3 means at least 1.5 seconds, up to 7.5 if we - * have slowed way down. - */ - if (atomic_inc_return(&dc->backing_idle) >= 3) { - /* Wait for current I/Os to finish */ - closure_sync(&cl); - /* And immediately launch a new set. */ - delay = 0; - } - } - while (!kthread_should_stop() && !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) && delay) { @@ -715,7 +776,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc) dc->writeback_running = true; dc->writeback_percent = 10; dc->writeback_delay = 30; - dc->writeback_rate.rate = 1024; + atomic64_set(&dc->writeback_rate.rate, 1024); dc->writeback_rate_minimum = 8; dc->writeback_rate_update_seconds = WRITEBACK_RATE_UPDATE_SECS_DEFAULT; -- 2.17.1

7 years, 1 month

2
3
0 0

[PATCH v6 00/13] mm: Teach memory_failure() about ZONE_DEVICE pages

by Dan Williams

Changes since v5 [1]: * Move put_page() before memory_failure() in madvise_inject_error() (Naoya) * The previous change uncovered a latent bug / broken assumption in __put_devmap_managed_page(). We need to preserve page->mapping for dax pages when they go idle. * Rename mapping_size() to dev_pagemap_mapping_size() (Naoya) * Catch and fail attempts to soft-offline dax pages (Naoya) * Collect Naoya's ack on "mm, memory_failure: Collect mapping size in collect_procs()" [1]: https://lists.01.org/pipermail/linux-nvdimm/2018-July/016682.html --- As it stands, memory_failure() gets thoroughly confused by dev_pagemap backed mappings. The recovery code has specific enabling for several possible page states and needs new enabling to handle poison in dax mappings. In order to support reliable reverse mapping of user space addresses: 1/ Add new locking in the memory_failure() rmap path to prevent races that would typically be handled by the page lock. 2/ Since dev_pagemap pages are hidden from the page allocator and the "compound page" accounting machinery, add a mechanism to determine the size of the mapping that encompasses a given poisoned pfn. 3/ Given pmem errors can be repaired, change the speculatively accessed poison protection, mce_unmap_kpfn(), to be reversible and otherwise allow ongoing access from the kernel. A side effect of this enabling is that MADV_HWPOISON becomes usable for dax mappings, however the primary motivation is to allow the system to survive userspace consumption of hardware-poison via dax. Specifically the current behavior is: mce: Uncorrected hardware memory error in user-access at af34214200 {1}[Hardware Error]: It has been corrected by h/w and requires no further action mce: [Hardware Error]: Machine check events logged {1}[Hardware Error]: event severity: corrected Memory failure: 0xaf34214: reserved kernel page still referenced by 1 users [..] Memory failure: 0xaf34214: recovery action for reserved kernel page: Failed mce: Memory error not recovered <reboot> ...and with these changes: Injecting memory failure for pfn 0x20cb00 at process virtual address 0x7f763dd00000 Memory failure: 0x20cb00: Killing dax-pmd:5421 due to hardware memory corruption Memory failure: 0x20cb00: recovery action for dax page: Recovered Given all the cross dependencies I propose taking this through nvdimm.git with acks from Naoya, x86/core, x86/RAS, and of course dax folks. --- Dan Williams (13): device-dax: Convert to vmf_insert_mixed and vm_fault_t device-dax: Enable page_mapping() device-dax: Set page->index filesystem-dax: Set page->index mm, madvise_inject_error: Disable MADV_SOFT_OFFLINE for ZONE_DEVICE pages mm, dev_pagemap: Do not clear ->mapping on final put mm, madvise_inject_error: Let memory_failure() optionally take a page reference mm, memory_failure: Collect mapping size in collect_procs() filesystem-dax: Introduce dax_lock_mapping_entry() mm, memory_failure: Teach memory_failure() about dev_pagemap pages x86/mm/pat: Prepare {reserve,free}_memtype() for "decoy" addresses x86/memory_failure: Introduce {set,clear}_mce_nospec() libnvdimm, pmem: Restore page attributes when clearing errors arch/x86/include/asm/set_memory.h | 42 ++++++ arch/x86/kernel/cpu/mcheck/mce-internal.h | 15 -- arch/x86/kernel/cpu/mcheck/mce.c | 38 ----- arch/x86/mm/pat.c | 16 ++ drivers/dax/device.c | 75 +++++++--- drivers/nvdimm/pmem.c | 26 ++++ drivers/nvdimm/pmem.h | 13 ++ fs/dax.c | 125 ++++++++++++++++- include/linux/dax.h | 13 ++ include/linux/huge_mm.h | 5 - include/linux/mm.h | 1 include/linux/set_memory.h | 14 ++ kernel/memremap.c | 1 mm/hmm.c | 2 mm/huge_memory.c | 4 - mm/madvise.c | 16 ++ mm/memory-failure.c | 210 +++++++++++++++++++++++------ 17 files changed, 481 insertions(+), 135 deletions(-)

7 years, 1 month

4
5
0 0

[PATCH] x86/entry/64: Remove %ebx handling from error_entry/exit

by Andy Lutomirski

error_entry and error_exit communicate the user vs kernel status of the frame using %ebx. This is unnecessary -- the information is in regs->cs. Just use regs->cs. This makes error_entry simpler and makes error_exit more robust. It also fixes a nasty bug. Before all the Spectre nonsense, The xen_failsafe_callback entry point returned like this: ALLOC_PT_GPREGS_ON_STACK SAVE_C_REGS SAVE_EXTRA_REGS ENCODE_FRAME_POINTER jmp error_exit And it did not go through error_entry. This was bogus: RBX contained garbage, and error_exit expected a flag in RBX. Fortunately, it generally contained *nonzero* garbage, so the correct code path was used. As part of the Spectre fixes, code was added to clear RBX to mitigate certain speculation attacks. Now, depending on kernel configuration, RBX got zeroed and, when running some Wine workloads, the kernel crashes. This was introduced by: commit 3ac6d8c787b8 ("x86/entry/64: Clear registers for exceptions/interrupts, to reduce speculation attack surface") With this patch applied, RBX is no longer needed as a flag, and the problem goes away. I suspect that malicious userspace could use this bug to crash the kernel even without the offending patch applied, though. [Historical note: I wrote this patch as a cleanup before I was aware of the bug it fixed.] [Note to stable maintainers: this should probably get applied to all kernels. If you're nervous about that, a more conservative fix to add xorl %ebx,%ebx; incl %ebx before the jump to error_exit should also fix the problem.] Cc: Brian Gerst <brgerst(a)gmail.com> Cc: Borislav Petkov <bp(a)alien8.de> Cc: Dominik Brodowski <linux(a)dominikbrodowski.net> Cc: Ingo Molnar <mingo(a)redhat.com> Cc: "H. Peter Anvin" <hpa(a)zytor.com> Cc: Thomas Gleixner <tglx(a)linutronix.de> Cc: Boris Ostrovsky <boris.ostrovsky(a)oracle.com> Cc: Juergen Gross <jgross(a)suse.com> Cc: xen-devel(a)lists.xenproject.org Cc: x86(a)kernel.org Cc: stable(a)vger.kernel.org Fixes: 3ac6d8c787b8 ("x86/entry/64: Clear registers for exceptions/interrupts, to reduce speculation attack surface") Reported-and-tested-by: "M. Vefa Bicakci" <m.v.b(a)runbox.com> Signed-off-by: Andy Lutomirski <luto(a)kernel.org> --- I could also submit the conservative fix tagged for -stable and respin this on top of it. Ingo, Greg, what do you prefer? arch/x86/entry/entry_64.S | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 73a522d53b53..8ae7ffda8f98 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -981,7 +981,7 @@ ENTRY(\sym) call \do_sym - jmp error_exit /* %ebx: no swapgs flag */ + jmp error_exit .endif END(\sym) .endm @@ -1222,7 +1222,6 @@ END(paranoid_exit) /* * Save all registers in pt_regs, and switch GS if needed. - * Return: EBX=0: came from user mode; EBX=1: otherwise */ ENTRY(error_entry) UNWIND_HINT_FUNC @@ -1269,7 +1268,6 @@ ENTRY(error_entry) * for these here too. */ .Lerror_kernelspace: - incl %ebx leaq native_irq_return_iret(%rip), %rcx cmpq %rcx, RIP+8(%rsp) je .Lerror_bad_iret @@ -1303,28 +1301,20 @@ ENTRY(error_entry) /* * Pretend that the exception came from user mode: set up pt_regs - * as if we faulted immediately after IRET and clear EBX so that - * error_exit knows that we will be returning to user mode. + * as if we faulted immediately after IRET. */ mov %rsp, %rdi call fixup_bad_iret mov %rax, %rsp - decl %ebx jmp .Lerror_entry_from_usermode_after_swapgs END(error_entry) - -/* - * On entry, EBX is a "return to kernel mode" flag: - * 1: already in kernel mode, don't need SWAPGS - * 0: user gsbase is loaded, we need SWAPGS and standard preparation for return to usermode - */ ENTRY(error_exit) UNWIND_HINT_REGS DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF - testl %ebx, %ebx - jnz retint_kernel + testb $3, CS(%rsp) + jz retint_kernel jmp retint_user END(error_exit) -- 2.17.1

7 years, 1 month

2
2
0 0

[PATCH] USB: option: add support for DW5821e

by Aleksander Morgado

The device exposes AT, NMEA and DIAG ports in both USB configurations. The patch explicitly ignores interfaces 0 and 1, as they're bound to other drivers already; and also interface 6, which is a GNSS interface for which we don't have a driver yet. T: Bus=01 Lev=03 Prnt=04 Port=00 Cnt=01 Dev#= 18 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 2 P: Vendor=413c ProdID=81d7 Rev=03.18 S: Manufacturer=DELL S: Product=DW5821e Snapdragon X20 LTE S: SerialNumber=0123456789ABCDEF C: #Ifs= 7 Cfg#= 2 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 6 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) T: Bus=01 Lev=03 Prnt=04 Port=00 Cnt=01 Dev#= 16 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 2 P: Vendor=413c ProdID=81d7 Rev=03.18 S: Manufacturer=DELL S: Product=DW5821e Snapdragon X20 LTE S: SerialNumber=0123456789ABCDEF C: #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#= 1 Alt= 0 #EPs= 1 Cls=03(HID ) Sub=00 Prot=00 Driver=usbhid I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option Signed-off-by: Aleksander Morgado <aleksander(a)aleksander.es> Cc: stable <stable(a)vger.kernel.org> --- drivers/usb/serial/option.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 664e61f16b6a..0215b70c4efc 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -196,6 +196,8 @@ static void option_instat_callback(struct urb *urb); #define DELL_PRODUCT_5800_V2_MINICARD_VZW 0x8196 /* Novatel E362 */ #define DELL_PRODUCT_5804_MINICARD_ATT 0x819b /* Novatel E371 */ +#define DELL_PRODUCT_5821E 0x81d7 + #define KYOCERA_VENDOR_ID 0x0c88 #define KYOCERA_PRODUCT_KPC650 0x17da #define KYOCERA_PRODUCT_KPC680 0x180a @@ -1030,6 +1032,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, DELL_PRODUCT_5800_MINICARD_VZW, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, DELL_PRODUCT_5800_V2_MINICARD_VZW, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, DELL_PRODUCT_5804_MINICARD_ATT, 0xff, 0xff, 0xff) }, + { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5821E), + .driver_info = RSVD(0) | RSVD(1) | RSVD(6) }, { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_E100A) }, /* ADU-E100, ADU-310 */ { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_500A) }, { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_620UW) }, -- 2.18.0

7 years, 1 month

1
0
0 0

[merged] mm-memcg-fix-use-after-free-in-mem_cgroup_iter.patch removed from -mm tree

by akpm＠linux-foundation.org

The patch titled Subject: mm: memcg: fix use after free in mem_cgroup_iter() has been removed from the -mm tree. Its filename was mm-memcg-fix-use-after-free-in-mem_cgroup_iter.patch This patch was dropped because it was merged into mainline or a subsystem tree ------------------------------------------------------ From: Jing Xia <jing.xia.mail(a)gmail.com> Subject: mm: memcg: fix use after free in mem_cgroup_iter() It was reported that a kernel crash happened in mem_cgroup_iter(), which can be triggered if the legacy cgroup-v1 non-hierarchical mode is used. Unable to handle kernel paging request at virtual address 6b6b6b6b6b6b8f ...... Call trace: mem_cgroup_iter+0x2e0/0x6d4 shrink_zone+0x8c/0x324 balance_pgdat+0x450/0x640 kswapd+0x130/0x4b8 kthread+0xe8/0xfc ret_from_fork+0x10/0x20 mem_cgroup_iter(): ...... if (css_tryget(css)) <-- crash here break; ...... The crashing reason is that mem_cgroup_iter() uses the memcg object whose pointer is stored in iter->position, which has been freed before and filled with POISON_FREE(0x6b). And the root cause of the use-after-free issue is that invalidate_reclaim_iterators() fails to reset the value of iter->position to NULL when the css of the memcg is released in non- hierarchical mode. Link: http://lkml.kernel.org/r/1531994807-25639-1-git-send-email-jing.xia@unisoc.… Fixes: 6df38689e0e9 ("mm: memcontrol: fix possible memcg leak due to interrupted reclaim") Signed-off-by: Jing Xia <jing.xia.mail(a)gmail.com> Acked-by: Michal Hocko <mhocko(a)suse.com> Cc: Johannes Weiner <hannes(a)cmpxchg.org> Cc: Vladimir Davydov <vdavydov.dev(a)gmail.com> Cc: <chunyan.zhang(a)unisoc.com> Cc: Shakeel Butt <shakeelb(a)google.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/memcontrol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff -puN mm/memcontrol.c~mm-memcg-fix-use-after-free-in-mem_cgroup_iter mm/memcontrol.c --- a/mm/memcontrol.c~mm-memcg-fix-use-after-free-in-mem_cgroup_iter +++ a/mm/memcontrol.c @@ -850,7 +850,7 @@ static void invalidate_reclaim_iterators int nid; int i; - while ((memcg = parent_mem_cgroup(memcg))) { + for (; memcg; memcg = parent_mem_cgroup(memcg)) { for_each_node(nid) { mz = mem_cgroup_nodeinfo(memcg, nid); for (i = 0; i <= DEF_PRIORITY; i++) { _ Patches currently in -mm which might be from jing.xia.mail(a)gmail.com are

7 years, 1 month

1
0
0 0

[merged] thp-fix-data-loss-when-splitting-a-file-pmd.patch removed from -mm tree

by akpm＠linux-foundation.org

The patch titled Subject: mm/huge_memory.c: fix data loss when splitting a file pmd has been removed from the -mm tree. Its filename was thp-fix-data-loss-when-splitting-a-file-pmd.patch This patch was dropped because it was merged into mainline or a subsystem tree ------------------------------------------------------ From: Hugh Dickins <hughd(a)google.com> Subject: mm/huge_memory.c: fix data loss when splitting a file pmd __split_huge_pmd_locked() must check if the cleared huge pmd was dirty, and propagate that to PageDirty: otherwise, data may be lost when a huge tmpfs page is modified then split then reclaimed. How has this taken so long to be noticed? Because there was no problem when the huge page is written by a write system call (shmem_write_end() calls set_page_dirty()), nor when the page is allocated for a write fault (fault_dirty_shared_page() calls set_page_dirty()); but when allocated for a read fault (which MAP_POPULATE simulates), no set_page_dirty(). Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1807111741430.1106@eggly.anvils Fixes: d21b9e57c74c ("thp: handle file pages in split_huge_pmd()") Signed-off-by: Hugh Dickins <hughd(a)google.com> Reported-by: Ashwin Chaugule <ashwinch(a)google.com> Reviewed-by: Yang Shi <yang.shi(a)linux.alibaba.com> Reviewed-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com> Cc: "Huang, Ying" <ying.huang(a)intel.com> Cc: <stable(a)vger.kernel.org> [4.8+] Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/huge_memory.c | 2 ++ 1 file changed, 2 insertions(+) diff -puN mm/huge_memory.c~thp-fix-data-loss-when-splitting-a-file-pmd mm/huge_memory.c --- a/mm/huge_memory.c~thp-fix-data-loss-when-splitting-a-file-pmd +++ a/mm/huge_memory.c @@ -2084,6 +2084,8 @@ static void __split_huge_pmd_locked(stru if (vma_is_dax(vma)) return; page = pmd_page(_pmd); + if (!PageDirty(page) && pmd_dirty(_pmd)) + set_page_dirty(page); if (!PageReferenced(page) && pmd_young(_pmd)) SetPageReferenced(page); page_remove_rmap(page, true); _ Patches currently in -mm which might be from hughd(a)google.com are

7 years, 1 month

1
0
0 0

[merged] fat-fix-memory-allocation-failure-handling-of-match_strdup.patch removed from -mm tree

by akpm＠linux-foundation.org

The patch titled Subject: fat: fix memory allocation failure handling of match_strdup() has been removed from the -mm tree. Its filename was fat-fix-memory-allocation-failure-handling-of-match_strdup.patch This patch was dropped because it was merged into mainline or a subsystem tree ------------------------------------------------------ From: OGAWA Hirofumi <hirofumi(a)mail.parknet.co.jp> Subject: fat: fix memory allocation failure handling of match_strdup() In parse_options(), if match_strdup() failed, parse_options() leaves opts->iocharset in unexpected state (i.e. still pointing the freed string). And this can be the cause of double free. To fix, this initialize opts->iocharset always when freeing. Link: http://lkml.kernel.org/r/8736wp9dzc.fsf@mail.parknet.co.jp Signed-off-by: OGAWA Hirofumi <hirofumi(a)mail.parknet.co.jp> Reported-by: syzbot+90b8e10515ae88228a92(a)syzkaller.appspotmail.com Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- fs/fat/inode.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff -puN fs/fat/inode.c~fat-fix-memory-allocation-failure-handling-of-match_strdup fs/fat/inode.c --- a/fs/fat/inode.c~fat-fix-memory-allocation-failure-handling-of-match_strdup +++ a/fs/fat/inode.c @@ -707,13 +707,21 @@ static void fat_set_state(struct super_b brelse(bh); } +static void fat_reset_iocharset(struct fat_mount_options *opts) +{ + if (opts->iocharset != fat_default_iocharset) { + /* Note: opts->iocharset can be NULL here */ + kfree(opts->iocharset); + opts->iocharset = fat_default_iocharset; + } +} + static void delayed_free(struct rcu_head *p) { struct msdos_sb_info *sbi = container_of(p, struct msdos_sb_info, rcu); unload_nls(sbi->nls_disk); unload_nls(sbi->nls_io); - if (sbi->options.iocharset != fat_default_iocharset) - kfree(sbi->options.iocharset); + fat_reset_iocharset(&sbi->options); kfree(sbi); } @@ -1132,7 +1140,7 @@ static int parse_options(struct super_bl opts->fs_fmask = opts->fs_dmask = current_umask(); opts->allow_utime = -1; opts->codepage = fat_default_codepage; - opts->iocharset = fat_default_iocharset; + fat_reset_iocharset(opts); if (is_vfat) { opts->shortname = VFAT_SFN_DISPLAY_WINNT|VFAT_SFN_CREATE_WIN95; opts->rodir = 0; @@ -1289,8 +1297,7 @@ static int parse_options(struct super_bl /* vfat specific */ case Opt_charset: - if (opts->iocharset != fat_default_iocharset) - kfree(opts->iocharset); + fat_reset_iocharset(opts); iocharset = match_strdup(&args[0]); if (!iocharset) return -ENOMEM; @@ -1881,8 +1888,7 @@ out_fail: iput(fat_inode); unload_nls(sbi->nls_io); unload_nls(sbi->nls_disk); - if (sbi->options.iocharset != fat_default_iocharset) - kfree(sbi->options.iocharset); + fat_reset_iocharset(&sbi->options); sb->s_fs_info = NULL; kfree(sbi); return error; _ Patches currently in -mm which might be from hirofumi(a)mail.parknet.co.jp are fat-validate-i_start-before-using.patch

7 years, 1 month

1
0
0 0

[alternative-merged] mm-fix-vma_is_anonymous-false-positives.patch removed from -mm tree

by akpm＠linux-foundation.org

The patch titled Subject: mm: fix vma_is_anonymous() false-positives has been removed from the -mm tree. Its filename was mm-fix-vma_is_anonymous-false-positives.patch This patch was dropped because an alternative patch was merged ------------------------------------------------------ From: "Kirill A. Shutemov" <kirill.shutemov(a)linux.intel.com> Subject: mm: fix vma_is_anonymous() false-positives vma_is_anonymous() relies on ->vm_ops being NULL to detect anonymous VMA. This is unreliable as ->mmap may not set ->vm_ops. False-positive vma_is_anonymous() may lead to crashes: next ffff8801ce5e7040 prev ffff8801d20eca50 mm ffff88019c1e13c0 prot 27 anon_vma ffff88019680cdd8 vm_ops 0000000000000000 pgoff 0 file ffff8801b2ec2d00 private_data 0000000000000000 flags: 0xff(read|write|exec|shared|mayread|maywrite|mayexec|mayshare) ------------[ cut here ]------------ kernel BUG at mm/memory.c:1422! invalid opcode: 0000 [#1] SMP KASAN CPU: 0 PID: 18486 Comm: syz-executor3 Not tainted 4.18.0-rc3+ #136 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:zap_pmd_range mm/memory.c:1421 [inline] RIP: 0010:zap_pud_range mm/memory.c:1466 [inline] RIP: 0010:zap_p4d_range mm/memory.c:1487 [inline] RIP: 0010:unmap_page_range+0x1c18/0x2220 mm/memory.c:1508 Code: ff 31 ff 4c 89 e6 42 c6 04 33 f8 e8 92 dd d0 ff 4d 85 e4 0f 85 4a eb ff ff e8 54 dc d0 ff 48 8b bd 10 fc ff ff e8 82 95 fe ff <0f> 0b e8 41 dc d0 ff 0f 0b 4c 89 ad 18 fc ff ff c7 85 7c fb ff ff RSP: 0018:ffff8801b0587330 EFLAGS: 00010286 RAX: 000000000000013c RBX: 1ffff100360b0e9c RCX: ffffc90002620000 RDX: 0000000000000000 RSI: ffffffff81631851 RDI: 0000000000000001 RBP: ffff8801b05877c8 R08: ffff880199d40300 R09: ffffed003b5c4fc0 R10: ffffed003b5c4fc0 R11: ffff8801dae27e07 R12: 0000000000000000 R13: ffff88019c1e13c0 R14: dffffc0000000000 R15: 0000000020e01000 FS: 00007fca32251700(0000) GS:ffff8801dae00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f04c540d000 CR3: 00000001ac1f0000 CR4: 00000000001426f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: unmap_single_vma+0x1a0/0x310 mm/memory.c:1553 zap_page_range_single+0x3cc/0x580 mm/memory.c:1644 unmap_mapping_range_vma mm/memory.c:2792 [inline] unmap_mapping_range_tree mm/memory.c:2813 [inline] unmap_mapping_pages+0x3a7/0x5b0 mm/memory.c:2845 unmap_mapping_range+0x48/0x60 mm/memory.c:2880 truncate_pagecache+0x54/0x90 mm/truncate.c:800 truncate_setsize+0x70/0xb0 mm/truncate.c:826 simple_setattr+0xe9/0x110 fs/libfs.c:409 notify_change+0xf13/0x10f0 fs/attr.c:335 do_truncate+0x1ac/0x2b0 fs/open.c:63 do_sys_ftruncate+0x492/0x560 fs/open.c:205 __do_sys_ftruncate fs/open.c:215 [inline] __se_sys_ftruncate fs/open.c:213 [inline] __x64_sys_ftruncate+0x59/0x80 fs/open.c:213 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Reproducer: #include <stdio.h> #include <stddef.h> #include <stdint.h> #include <stdlib.h> #include <string.h> #include <sys/types.h> #include <sys/stat.h> #include <sys/ioctl.h> #include <sys/mman.h> #include <unistd.h> #include <fcntl.h> #define KCOV_INIT_TRACE _IOR('c', 1, unsigned long) #define KCOV_ENABLE _IO('c', 100) #define KCOV_DISABLE _IO('c', 101) #define COVER_SIZE (1024<<10) #define KCOV_TRACE_PC 0 #define KCOV_TRACE_CMP 1 int main(int argc, char **argv) { int fd; unsigned long *cover; system("mount -t debugfs none /sys/kernel/debug"); fd = open("/sys/kernel/debug/kcov", O_RDWR); ioctl(fd, KCOV_INIT_TRACE, COVER_SIZE); cover = mmap(NULL, COVER_SIZE * sizeof(unsigned long), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); munmap(cover, COVER_SIZE * sizeof(unsigned long)); cover = mmap(NULL, COVER_SIZE * sizeof(unsigned long), PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); memset(cover, 0, COVER_SIZE * sizeof(unsigned long)); ftruncate(fd, 3UL << 20); return 0; } This can be fixed by assigning anonymous VMAs own vm_ops and not relying on it being NULL. If ->mmap() failed to set ->vm_ops, mmap_region() will set it to dummy_vm_ops. This way we will have non-NULL ->vm_ops for all VMAs. [kirill(a)shutemov.name: add comments] Link: http://lkml.kernel.org/r/20180711121521.omugjfpuuyxscjjf@kshutemo-mobl1 [kirill.shutemov(a)linux.intel.com: v2] Link: http://lkml.kernel.org/r/20180712145626.41665-2-kirill.shutemov@linux.intel… [kirill.shutemov(a)linux.intel.com: fix splat reported by Marcel] Link: http://lkml.kernel.org/r/20180716142049.ioa2irsd2d7sphn4@black.fi.intel.com Link: http://lkml.kernel.org/r/20180710134821.84709-2-kirill.shutemov@linux.intel… Signed-off-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com> Reported-by: syzbot+3f84280d52be9b7083cc(a)syzkaller.appspotmail.com Reviewed-by: Andrew Morton <akpm(a)linux-foundation.org> Cc: Dmitry Vyukov <dvyukov(a)google.com> Cc: Oleg Nesterov <oleg(a)redhat.com> Cc: Marcel Ziswiler <marcel.ziswiler(a)toradex.com> Cc: Andrea Arcangeli <aarcange(a)redhat.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- arch/arm/kernel/process.c | 1 + arch/ia64/kernel/perfmon.c | 1 + arch/ia64/mm/init.c | 2 ++ drivers/char/mem.c | 1 + fs/exec.c | 1 + fs/hugetlbfs/inode.c | 1 + include/linux/mm.h | 5 ++++- mm/khugepaged.c | 4 ++-- mm/mmap.c | 11 +++++++++++ mm/nommu.c | 9 ++++++++- mm/shmem.c | 1 + mm/util.c | 12 ++++++++++++ 12 files changed, 45 insertions(+), 4 deletions(-) diff -puN arch/arm/kernel/process.c~mm-fix-vma_is_anonymous-false-positives arch/arm/kernel/process.c --- a/arch/arm/kernel/process.c~mm-fix-vma_is_anonymous-false-positives +++ a/arch/arm/kernel/process.c @@ -334,6 +334,7 @@ static struct vm_area_struct gate_vma = .vm_start = 0xffff0000, .vm_end = 0xffff0000 + PAGE_SIZE, .vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC, + .vm_ops = &dummy_vm_ops, }; static int __init gate_vma_init(void) diff -puN arch/ia64/kernel/perfmon.c~mm-fix-vma_is_anonymous-false-positives arch/ia64/kernel/perfmon.c --- a/arch/ia64/kernel/perfmon.c~mm-fix-vma_is_anonymous-false-positives +++ a/arch/ia64/kernel/perfmon.c @@ -2292,6 +2292,7 @@ pfm_smpl_buffer_alloc(struct task_struct vma->vm_file = get_file(filp); vma->vm_flags = VM_READ|VM_MAYREAD|VM_DONTEXPAND|VM_DONTDUMP; vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */ + vma->vm_ops = &dummy_vm_ops; /* * Now we have everything we need and we can initialize diff -puN arch/ia64/mm/init.c~mm-fix-vma_is_anonymous-false-positives arch/ia64/mm/init.c --- a/arch/ia64/mm/init.c~mm-fix-vma_is_anonymous-false-positives +++ a/arch/ia64/mm/init.c @@ -122,6 +122,7 @@ ia64_init_addr_space (void) vma->vm_end = vma->vm_start + PAGE_SIZE; vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT; vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); + vma->vm_ops = &dummy_vm_ops; down_write(&current->mm->mmap_sem); if (insert_vm_struct(current->mm, vma)) { up_write(&current->mm->mmap_sem); @@ -141,6 +142,7 @@ ia64_init_addr_space (void) vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT); vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | VM_DONTEXPAND | VM_DONTDUMP; + vma->vm_ops = &dummy_vm_ops; down_write(&current->mm->mmap_sem); if (insert_vm_struct(current->mm, vma)) { up_write(&current->mm->mmap_sem); diff -puN drivers/char/mem.c~mm-fix-vma_is_anonymous-false-positives drivers/char/mem.c --- a/drivers/char/mem.c~mm-fix-vma_is_anonymous-false-positives +++ a/drivers/char/mem.c @@ -708,6 +708,7 @@ static int mmap_zero(struct file *file, #endif if (vma->vm_flags & VM_SHARED) return shmem_zero_setup(vma); + vma->vm_ops = &anon_vm_ops; return 0; } diff -puN fs/exec.c~mm-fix-vma_is_anonymous-false-positives fs/exec.c --- a/fs/exec.c~mm-fix-vma_is_anonymous-false-positives +++ a/fs/exec.c @@ -307,6 +307,7 @@ static int __bprm_mm_init(struct linux_b * configured yet. */ BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); + vma->vm_ops = &anon_vm_ops; vma->vm_end = STACK_TOP_MAX; vma->vm_start = vma->vm_end - PAGE_SIZE; vma->vm_flags = VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; diff -puN fs/hugetlbfs/inode.c~mm-fix-vma_is_anonymous-false-positives fs/hugetlbfs/inode.c --- a/fs/hugetlbfs/inode.c~mm-fix-vma_is_anonymous-false-positives +++ a/fs/hugetlbfs/inode.c @@ -597,6 +597,7 @@ static long hugetlbfs_fallocate(struct f memset(&pseudo_vma, 0, sizeof(struct vm_area_struct)); pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED); pseudo_vma.vm_file = file; + pseudo_vma.vm_ops = &dummy_vm_ops; for (index = start; index < end; index++) { /* diff -puN include/linux/mm.h~mm-fix-vma_is_anonymous-false-positives include/linux/mm.h --- a/include/linux/mm.h~mm-fix-vma_is_anonymous-false-positives +++ a/include/linux/mm.h @@ -1536,9 +1536,12 @@ int clear_page_dirty_for_io(struct page int get_cmdline(struct task_struct *task, char *buffer, int buflen); +extern const struct vm_operations_struct anon_vm_ops; +extern const struct vm_operations_struct dummy_vm_ops; + static inline bool vma_is_anonymous(struct vm_area_struct *vma) { - return !vma->vm_ops; + return vma->vm_ops == &anon_vm_ops; } #ifdef CONFIG_SHMEM diff -puN mm/khugepaged.c~mm-fix-vma_is_anonymous-false-positives mm/khugepaged.c --- a/mm/khugepaged.c~mm-fix-vma_is_anonymous-false-positives +++ a/mm/khugepaged.c @@ -440,7 +440,7 @@ int khugepaged_enter_vma_merge(struct vm * page fault if needed. */ return 0; - if (vma->vm_ops || (vm_flags & VM_NO_KHUGEPAGED)) + if (!vma_is_anonymous(vma) || (vm_flags & VM_NO_KHUGEPAGED)) /* khugepaged not yet working on file or special mappings */ return 0; hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; @@ -831,7 +831,7 @@ static bool hugepage_vma_check(struct vm return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, HPAGE_PMD_NR); } - if (!vma->anon_vma || vma->vm_ops) + if (!vma->anon_vma || !vma_is_anonymous(vma)) return false; if (is_vma_temporary_stack(vma)) return false; diff -puN mm/mmap.c~mm-fix-vma_is_anonymous-false-positives mm/mmap.c --- a/mm/mmap.c~mm-fix-vma_is_anonymous-false-positives +++ a/mm/mmap.c @@ -561,6 +561,8 @@ static unsigned long count_vma_pages_ran void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, struct rb_node **rb_link, struct rb_node *rb_parent) { + WARN_ONCE(!vma->vm_ops, "missing vma->vm_ops"); + /* Update tracking information for the gap following the new vma. */ if (vma->vm_next) vma_gap_update(vma->vm_next); @@ -1762,6 +1764,11 @@ unsigned long mmap_region(struct file *f */ vma->vm_file = get_file(file); error = call_mmap(file, vma); + + /* All mappings must have ->vm_ops set */ + if (!vma->vm_ops) + vma->vm_ops = &dummy_vm_ops; + if (error) goto unmap_and_free_vma; @@ -1780,6 +1787,9 @@ unsigned long mmap_region(struct file *f error = shmem_zero_setup(vma); if (error) goto free_vma; + } else { + /* vma_is_anonymous() relies on this. */ + vma->vm_ops = &anon_vm_ops; } vma_link(mm, vma, prev, rb_link, rb_parent); @@ -2992,6 +3002,7 @@ static int do_brk_flags(unsigned long ad INIT_LIST_HEAD(&vma->anon_vma_chain); vma->vm_mm = mm; + vma->vm_ops = &anon_vm_ops; vma->vm_start = addr; vma->vm_end = addr + len; vma->vm_pgoff = pgoff; diff -puN mm/nommu.c~mm-fix-vma_is_anonymous-false-positives mm/nommu.c --- a/mm/nommu.c~mm-fix-vma_is_anonymous-false-positives +++ a/mm/nommu.c @@ -1058,6 +1058,8 @@ static int do_mmap_shared_file(struct vm int ret; ret = call_mmap(vma->vm_file, vma); + if (!vma->vm_ops) + vma->vm_ops = &dummy_vm_ops; if (ret == 0) { vma->vm_region->vm_top = vma->vm_region->vm_end; return 0; @@ -1089,6 +1091,8 @@ static int do_mmap_private(struct vm_are */ if (capabilities & NOMMU_MAP_DIRECT) { ret = call_mmap(vma->vm_file, vma); + if (!vma->vm_ops) + vma->vm_ops = &dummy_vm_ops; if (ret == 0) { /* shouldn't return success if we're not sharing */ BUG_ON(!(vma->vm_flags & VM_MAYSHARE)); @@ -1137,6 +1141,8 @@ static int do_mmap_private(struct vm_are fpos = vma->vm_pgoff; fpos <<= PAGE_SHIFT; + vma->vm_ops = &dummy_vm_ops; + ret = kernel_read(vma->vm_file, base, len, &fpos); if (ret < 0) goto error_free; @@ -1144,7 +1150,8 @@ static int do_mmap_private(struct vm_are /* clear the last little bit */ if (ret < len) memset(base + ret, 0, len - ret); - + } else { + vma->vm_ops = &anon_vm_ops; } return 0; diff -puN mm/shmem.c~mm-fix-vma_is_anonymous-false-positives mm/shmem.c --- a/mm/shmem.c~mm-fix-vma_is_anonymous-false-positives +++ a/mm/shmem.c @@ -1424,6 +1424,7 @@ static void shmem_pseudo_vma_init(struct /* Bias interleave by inode number to distribute better across nodes */ vma->vm_pgoff = index + info->vfs_inode.i_ino; vma->vm_policy = mpol_shared_policy_lookup(&info->policy, index); + vma->vm_ops = &dummy_vm_ops; } static void shmem_pseudo_vma_destroy(struct vm_area_struct *vma) diff -puN mm/util.c~mm-fix-vma_is_anonymous-false-positives mm/util.c --- a/mm/util.c~mm-fix-vma_is_anonymous-false-positives +++ a/mm/util.c @@ -20,6 +20,18 @@ #include "internal.h" +/* + * All anonymous VMAs have ->vm_ops set to anon_vm_ops. + * vma_is_anonymous() reiles on anon_vm_ops to detect anonymous VMA. + */ +const struct vm_operations_struct anon_vm_ops = {}; + +/* + * All VMAs have to have ->vm_ops set. dummy_vm_ops can be used if the VMA + * doesn't need to handle any of the operations. + */ +const struct vm_operations_struct dummy_vm_ops = {}; + static inline int is_kernel_rodata(unsigned long addr) { return addr >= (unsigned long)__start_rodata && _ Patches currently in -mm which might be from kirill.shutemov(a)linux.intel.com are mm-page_ext-drop-definition-of-unused-page_ext_debug_poison.patch mm-page_ext-constify-lookup_page_ext-argument.patch mm-drop-unneeded-vm_ops-checks-v2.patch

7 years, 1 month

1
0
0 0

[PATCH] MIPS: Change definition of cpu_relax() for Loongson-3

by Huacai Chen

Linux expects that if a CPU modifies a memory location, then that modification will eventually become visible to other CPUs in the system. On Loongson-3 processor with SFB (Store Fill Buffer), loads may be prioritised over stores so it is possible for a store operation to be postponed if a polling loop immediately follows it. If the variable being polled indirectly depends on the outstanding store [for example, another CPU may be polling the variable that is pending modification] then there is the potential for deadlock if interrupts are disabled. This deadlock occurs in qspinlock code. This patch changes the definition of cpu_relax() to smp_mb() for Loongson-3, forcing a flushing of the SFB on SMP systems before the next load takes place. If the Kernel is not compiled for SMP support, this will expand to a barrier() as before. References: 534be1d5a2da940 (ARM: 6194/1: change definition of cpu_relax() for ARM11MPCore) Cc: stable(a)vger.kernel.org Signed-off-by: Huacai Chen <chenhc(a)lemote.com> --- arch/mips/include/asm/processor.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index af34afb..a8c4a3a 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -386,7 +386,17 @@ unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) (task_pt_regs(tsk)->regs[29]) #define KSTK_STATUS(tsk) (task_pt_regs(tsk)->cp0_status) +#ifdef CONFIG_CPU_LOONGSON3 +/* + * Loongson-3's SFB (Store-Fill-Buffer) may get starved when stuck in a read + * loop. Since spin loops of any kind should have a cpu_relax() in them, force + * a Store-Fill-Buffer flush from cpu_relax() such that any pending writes will + * become available as expected. + */ +#define cpu_relax() smp_mb() +#else #define cpu_relax() barrier() +#endif /* * Return_address is a replacement for __builtin_return_address(count) -- 2.7.0

7 years, 1 month

5
7
0 0

FAILED: patch "[PATCH] drm/nouveau/drm/nouveau: Fix runtime PM leak in" failed to apply to 4.17-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.17-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From e5d54f1935722f83df7619f3978f774c2b802cd8 Mon Sep 17 00:00:00 2001 From: Lyude Paul <lyude(a)redhat.com> Date: Thu, 12 Jul 2018 13:02:53 -0400 Subject: [PATCH] drm/nouveau/drm/nouveau: Fix runtime PM leak in nv50_disp_atomic_commit() A CRTC being enabled doesn't mean it's on! It doesn't even necessarily mean it's being used. This fixes runtime PM leaks on the P50 I've got next to me. Signed-off-by: Lyude Paul <lyude(a)redhat.com> Cc: stable(a)vger.kernel.org Signed-off-by: Ben Skeggs <bskeggs(a)redhat.com> diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 9382e99a0bc7..31b12b4f321a 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -1878,7 +1878,7 @@ nv50_disp_atomic_commit(struct drm_device *dev, nv50_disp_atomic_commit_tail(state); drm_for_each_crtc(crtc, dev) { - if (crtc->state->enable) { + if (crtc->state->active) { if (!drm->have_disp_power_ref) { drm->have_disp_power_ref = true; return 0;

7 years, 1 month

2
1
0 0

[stable-4.14 00/23] block/scsi multiqueue performance enhancement and

by Jack Wang

Hi Greg, Please consider this patchset, which include block/scsi multiqueue performance enhancement and bugfix. We've run multiple benchmark and different tests for over one week, looks good. These patches are also included in Oracle UEK5. They're almost just simple cherry-pick, only 2 patches need minor adjust. They can apply cleanly on 4.14.57. Jens Axboe (3): Revert "blk-mq: don't handle TAG_SHARED in restart" blk-mq: fix issue with shared tag queue re-running blk-mq: only run the hardware queue if IO is pending Jianchao Wang (1): blk-mq: put the driver tag of nxt rq before first one is requeued Ming Lei (19): blk-mq-sched: move actual dispatching into one helper blk-mq: introduce .get_budget and .put_budget in blk_mq_ops sbitmap: introduce __sbitmap_for_each_set() blk-mq-sched: improve dispatching from sw queue scsi: allow passing in null rq to scsi_prep_state_check() scsi: implement .get_budget and .put_budget for blk-mq SCSI: don't get target/host busy_count in scsi_mq_get_budget() blk-mq: don't handle TAG_SHARED in restart blk-mq: don't restart queue when .get_budget returns BLK_STS_RESOURCE blk-mq: don't handle failure in .get_budget blk-flush: don't run queue for requests bypassing flush block: pass 'run_queue' to blk_mq_request_bypass_insert blk-flush: use blk_mq_request_bypass_insert() blk-mq-sched: decide how to handle flush rq via RQF_FLUSH_SEQ blk-mq: move blk_mq_put_driver_tag*() into blk-mq.h blk-mq: don't allocate driver tag upfront for flush rq blk-mq: put driver tag if dispatch budget can't be got blk-mq: quiesce queue during switching io sched and updating nr_requests scsi: core: run queue if SCSI device queue isn't ready and queue is idle block/blk-core.c | 2 +- block/blk-flush.c | 37 +++++-- block/blk-mq-debugfs.c | 1 - block/blk-mq-sched.c | 203 ++++++++++++++++++++++------------- block/blk-mq.c | 278 +++++++++++++++++++++++++++--------------------- block/blk-mq.h | 58 +++++++++- block/elevator.c | 2 + drivers/scsi/scsi_lib.c | 53 ++++++--- include/linux/blk-mq.h | 20 +++- include/linux/sbitmap.h | 64 ++++++++--- 10 files changed, 475 insertions(+), 243 deletions(-) -- 2.7.4

7 years, 1 month

4
31
0 0

[PATCH] cxl: Fix wrong comparison in cxl_adapter_context_get()

by Vaibhav Jain

Function atomic_inc_unless_negative() returns a bool to indicate success/failure. However cxl_adapter_context_get() wrongly compares the return value against '>=0' which will always be true. The patch fixes this comparison to '==0' there by also fixing this compile time warning: drivers/misc/cxl/main.c:290 cxl_adapter_context_get() warn: 'atomic_inc_unless_negative(&adapter->contexts_num)' is unsigned Cc: stable(a)vger.kernel.org Fixes: 70b565bbdb91 ("cxl: Prevent adapter reset if an active context exists") Reported-by: Dan Carpenter <dan.carpenter(a)oracle.com> Signed-off-by: Vaibhav Jain <vaibhav(a)linux.ibm.com> --- drivers/misc/cxl/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index c1ba0d42cbc8..e0f29b8a872d 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -287,7 +287,7 @@ int cxl_adapter_context_get(struct cxl *adapter) int rc; rc = atomic_inc_unless_negative(&adapter->contexts_num); - return rc >= 0 ? 0 : -EBUSY; + return rc ? 0 : -EBUSY; } void cxl_adapter_context_put(struct cxl *adapter) -- 2.17.1

7 years, 1 month

4
3
0 0

[PATCH] bcache: set max writeback rate when I/O request is idle

by Coly Li

Commit b1092c9af9ed ("bcache: allow quick writeback when backing idle") allows the writeback rate to be faster if there is no I/O request on a bcache device. It works well if there is only one bcache device attached to the cache set. If there are many bcache devices attached to a cache set, it may introduce performance regression because multiple faster writeback threads of the idle bcache devices will compete the btree level locks with the bcache device who have I/O requests coming. This patch fixes the above issue by only permitting fast writebac when all bcache devices attached on the cache set are idle. And if one of the bcache devices has new I/O request coming, minimized all writeback throughput immediately and let PI controller __update_writeback_rate() to decide the upcoming writeback rate for each bcache device. Also when all bcache devices are idle, limited wrieback rate to a small number is wast of thoughput, especially when backing devices are slower non-rotation devices (e.g. SATA SSD). This patch sets a max writeback rate for each backing device if the whole cache set is idle. A faster writeback rate in idle time means new I/Os may have more available space for dirty data, and people may observe a better write performance then. Please note bcache may change its cache mode in run time, and this patch still works if the cache mode is switched from writeback mode and there is still dirty data on cache. Fixes: Commit b1092c9af9ed ("bcache: allow quick writeback when backing idle") Cc: stable(a)vger.kernel.org #4.16+ Signed-off-by: Coly Li <colyli(a)suse.de> Cc: Michael Lyle <mlyle(a)lyle.org> --- drivers/md/bcache/bcache.h | 9 +--- drivers/md/bcache/request.c | 42 ++++++++++++++- drivers/md/bcache/sysfs.c | 14 +++-- drivers/md/bcache/util.c | 2 +- drivers/md/bcache/util.h | 2 +- drivers/md/bcache/writeback.c | 98 +++++++++++++++++++++++++---------- 6 files changed, 126 insertions(+), 41 deletions(-) diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index d6bf294f3907..f7451e8be03c 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -328,13 +328,6 @@ struct cached_dev { */ atomic_t has_dirty; - /* - * Set to zero by things that touch the backing volume-- except - * writeback. Incremented by writeback. Used to determine when to - * accelerate idle writeback. - */ - atomic_t backing_idle; - struct bch_ratelimit writeback_rate; struct delayed_work writeback_rate_update; @@ -514,6 +507,8 @@ struct cache_set { struct cache_accounting accounting; unsigned long flags; + atomic_t idle_counter; + atomic_t at_max_writeback_rate; struct cache_sb sb; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index ae67f5fa8047..fe45f561a054 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1104,6 +1104,34 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio) /* Cached devices - read & write stuff */ +static void quit_max_writeback_rate(struct cache_set *c) +{ + int i; + struct bcache_device *d; + struct cached_dev *dc; + + mutex_lock(&bch_register_lock); + + for (i = 0; i < c->devices_max_used; i++) { + if (!c->devices[i]) + continue; + + if (UUID_FLASH_ONLY(&c->uuids[i])) + continue; + + d = c->devices[i]; + dc = container_of(d, struct cached_dev, disk); + /* + * set writeback rate to default minimum value, + * then let update_writeback_rate() to decide the + * upcoming rate. + */ + atomic64_set(&dc->writeback_rate.rate, 1); + } + + mutex_unlock(&bch_register_lock); +} + static blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio) { @@ -1119,7 +1147,19 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q, return BLK_QC_T_NONE; } - atomic_set(&dc->backing_idle, 0); + if (d->c) { + atomic_set(&d->c->idle_counter, 0); + /* + * If at_max_writeback_rate of cache set is true and new I/O + * comes, quit max writeback rate of all cached devices + * attached to this cache set, and set at_max_writeback_rate + * to false. + */ + if (unlikely(atomic_read(&d->c->at_max_writeback_rate) == 1)) { + atomic_set(&d->c->at_max_writeback_rate, 0); + quit_max_writeback_rate(d->c); + } + } generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0); bio_set_dev(bio, dc->bdev); diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 225b15aa0340..d719021bff81 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -170,7 +170,8 @@ SHOW(__bch_cached_dev) var_printf(writeback_running, "%i"); var_print(writeback_delay); var_print(writeback_percent); - sysfs_hprint(writeback_rate, dc->writeback_rate.rate << 9); + sysfs_hprint(writeback_rate, + atomic64_read(&dc->writeback_rate.rate) << 9); sysfs_hprint(io_errors, atomic_read(&dc->io_errors)); sysfs_printf(io_error_limit, "%i", dc->error_limit); sysfs_printf(io_disable, "%i", dc->io_disable); @@ -188,7 +189,8 @@ SHOW(__bch_cached_dev) char change[20]; s64 next_io; - bch_hprint(rate, dc->writeback_rate.rate << 9); + bch_hprint(rate, + atomic64_read(&dc->writeback_rate.rate) << 9); bch_hprint(dirty, bcache_dev_sectors_dirty(&dc->disk) << 9); bch_hprint(target, dc->writeback_rate_target << 9); bch_hprint(proportional,dc->writeback_rate_proportional << 9); @@ -255,8 +257,12 @@ STORE(__cached_dev) sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, 40); - sysfs_strtoul_clamp(writeback_rate, - dc->writeback_rate.rate, 1, INT_MAX); + if (attr == &sysfs_writeback_rate) { + int v; + + sysfs_strtoul_clamp(writeback_rate, v, 1, INT_MAX); + atomic64_set(&dc->writeback_rate.rate, v); + } sysfs_strtoul_clamp(writeback_rate_update_seconds, dc->writeback_rate_update_seconds, diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index fc479b026d6d..84f90c3d996d 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -200,7 +200,7 @@ uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done) { uint64_t now = local_clock(); - d->next += div_u64(done * NSEC_PER_SEC, d->rate); + d->next += div_u64(done * NSEC_PER_SEC, atomic64_read(&d->rate)); /* Bound the time. Don't let us fall further than 2 seconds behind * (this prevents unnecessary backlog that would make it impossible diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index cced87f8eb27..7e17f32ab563 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -442,7 +442,7 @@ struct bch_ratelimit { * Rate at which we want to do work, in units per second * The units here correspond to the units passed to bch_next_delay() */ - uint32_t rate; + atomic64_t rate; }; static inline void bch_ratelimit_reset(struct bch_ratelimit *d) diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index ad45ebe1a74b..72059f910230 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -49,6 +49,63 @@ static uint64_t __calc_target_rate(struct cached_dev *dc) return (cache_dirty_target * bdev_share) >> WRITEBACK_SHARE_SHIFT; } +static bool set_at_max_writeback_rate(struct cache_set *c, + struct cached_dev *dc) +{ + int i, dirty_dc_nr = 0; + struct bcache_device *d; + + mutex_lock(&bch_register_lock); + for (i = 0; i < c->devices_max_used; i++) { + if (!c->devices[i]) + continue; + if (UUID_FLASH_ONLY(&c->uuids[i])) + continue; + d = c->devices[i]; + dc = container_of(d, struct cached_dev, disk); + if (atomic_read(&dc->has_dirty)) + dirty_dc_nr++; + } + mutex_unlock(&bch_register_lock); + + /* + * Idle_counter is increased everytime when update_writeback_rate() + * is rescheduled in. If all backing devices attached to the same + * cache set has same dc->writeback_rate_update_seconds value, it + * is about 10 rounds of update_writeback_rate() is called on each + * backing device, then the code will fall through at set 1 to + * c->at_max_writeback_rate, and a max wrteback rate to each + * dc->writeback_rate.rate. This is not very accurate but works well + * to make sure the whole cache set has no new I/O coming before + * writeback rate is set to a max number. + */ + if (atomic_inc_return(&c->idle_counter) < dirty_dc_nr * 10) + return false; + + if (atomic_read(&c->at_max_writeback_rate) != 1) + atomic_set(&c->at_max_writeback_rate, 1); + + + atomic64_set(&dc->writeback_rate.rate, INT_MAX); + + /* keep writeback_rate_target as existing value */ + dc->writeback_rate_proportional = 0; + dc->writeback_rate_integral_scaled = 0; + dc->writeback_rate_change = 0; + + /* + * Check c->idle_counter and c->at_max_writeback_rate agagain in case + * new I/O arrives during before set_at_max_writeback_rate() returns. + * Then the writeback rate is set to 1, and its new value should be + * decided via __update_writeback_rate(). + */ + if (atomic_read(&c->idle_counter) < dirty_dc_nr * 10 || + !atomic_read(&c->at_max_writeback_rate)) + return false; + + return true; +} + static void __update_writeback_rate(struct cached_dev *dc) { /* @@ -104,8 +161,9 @@ static void __update_writeback_rate(struct cached_dev *dc) dc->writeback_rate_proportional = proportional_scaled; dc->writeback_rate_integral_scaled = integral_scaled; - dc->writeback_rate_change = new_rate - dc->writeback_rate.rate; - dc->writeback_rate.rate = new_rate; + dc->writeback_rate_change = new_rate - + atomic64_read(&dc->writeback_rate.rate); + atomic64_set(&dc->writeback_rate.rate, new_rate); dc->writeback_rate_target = target; } @@ -138,9 +196,16 @@ static void update_writeback_rate(struct work_struct *work) down_read(&dc->writeback_lock); - if (atomic_read(&dc->has_dirty) && - dc->writeback_percent) - __update_writeback_rate(dc); + if (atomic_read(&dc->has_dirty) && dc->writeback_percent) { + /* + * If the whole cache set is idle, set_at_max_writeback_rate() + * will set writeback rate to a max number. Then it is + * unncessary to update writeback rate for an idle cache set + * in maximum writeback rate number(s). + */ + if (!set_at_max_writeback_rate(c, dc)) + __update_writeback_rate(dc); + } up_read(&dc->writeback_lock); @@ -422,27 +487,6 @@ static void read_dirty(struct cached_dev *dc) delay = writeback_delay(dc, size); - /* If the control system would wait for at least half a - * second, and there's been no reqs hitting the backing disk - * for awhile: use an alternate mode where we have at most - * one contiguous set of writebacks in flight at a time. If - * someone wants to do IO it will be quick, as it will only - * have to contend with one operation in flight, and we'll - * be round-tripping data to the backing disk as quickly as - * it can accept it. - */ - if (delay >= HZ / 2) { - /* 3 means at least 1.5 seconds, up to 7.5 if we - * have slowed way down. - */ - if (atomic_inc_return(&dc->backing_idle) >= 3) { - /* Wait for current I/Os to finish */ - closure_sync(&cl); - /* And immediately launch a new set. */ - delay = 0; - } - } - while (!kthread_should_stop() && !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) && delay) { @@ -715,7 +759,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc) dc->writeback_running = true; dc->writeback_percent = 10; dc->writeback_delay = 30; - dc->writeback_rate.rate = 1024; + atomic64_set(&dc->writeback_rate.rate, 1024); dc->writeback_rate_minimum = 8; dc->writeback_rate_update_seconds = WRITEBACK_RATE_UPDATE_SECS_DEFAULT; -- 2.17.1

7 years, 1 month

3
5
0 0

[PATCH 12/12] can: xilinx_can: fix power management handling

by Marc Kleine-Budde

From: Anssi Hannula <anssi.hannula(a)bitwise.fi> There are several issues with the suspend/resume handling code of the driver: - The device is attached and detached in the runtime_suspend() and runtime_resume() callbacks if the interface is running. However, during xcan_chip_start() the interface is considered running, causing the resume handler to incorrectly call netif_start_queue() at the beginning of xcan_chip_start(), and on xcan_chip_start() error return the suspend handler detaches the device leaving the user unable to bring-up the device anymore. - The device is not brought properly up on system resume. A reset is done and the code tries to determine the bus state after that. However, after reset the device is always in Configuration mode (down), so the state checking code does not make sense and communication will also not work. - The suspend callback tries to set the device to sleep mode (low-power mode which monitors the bus and brings the device back to normal mode on activity), but then immediately disables the clocks (possibly before the device reaches the sleep mode), which does not make sense to me. If a clean shutdown is wanted before disabling clocks, we can just bring it down completely instead of only sleep mode. Reorganize the PM code so that only the clock logic remains in the runtime PM callbacks and the system PM callbacks contain the device bring-up/down logic. This makes calling the runtime PM callbacks during e.g. xcan_chip_start() safe. The system PM callbacks now simply call common code to start/stop the HW if the interface was running, replacing the broken code from before. xcan_chip_stop() is updated to use the common reset code so that it will wait for the reset to complete. Reset also disables all interrupts so do not do that separately. Also, the device_may_wakeup() checks are removed as the driver does not have wakeup support. Tested on Zynq-7000 integrated CAN. Signed-off-by: Anssi Hannula <anssi.hannula(a)bitwise.fi> Cc: Michal Simek <michal.simek(a)xilinx.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de> --- drivers/net/can/xilinx_can.c | 69 +++++++++++++++--------------------- 1 file changed, 28 insertions(+), 41 deletions(-) diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index cb80a9aa7281..5a24039733ef 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -984,13 +984,9 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id) static void xcan_chip_stop(struct net_device *ndev) { struct xcan_priv *priv = netdev_priv(ndev); - u32 ier; /* Disable interrupts and leave the can in configuration mode */ - ier = priv->read_reg(priv, XCAN_IER_OFFSET); - ier &= ~XCAN_INTR_ALL; - priv->write_reg(priv, XCAN_IER_OFFSET, ier); - priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_RESET_MASK); + set_reset_mode(ndev); priv->can.state = CAN_STATE_STOPPED; } @@ -1123,10 +1119,15 @@ static const struct net_device_ops xcan_netdev_ops = { */ static int __maybe_unused xcan_suspend(struct device *dev) { - if (!device_may_wakeup(dev)) - return pm_runtime_force_suspend(dev); + struct net_device *ndev = dev_get_drvdata(dev); - return 0; + if (netif_running(ndev)) { + netif_stop_queue(ndev); + netif_device_detach(ndev); + xcan_chip_stop(ndev); + } + + return pm_runtime_force_suspend(dev); } /** @@ -1138,11 +1139,27 @@ static int __maybe_unused xcan_suspend(struct device *dev) */ static int __maybe_unused xcan_resume(struct device *dev) { - if (!device_may_wakeup(dev)) - return pm_runtime_force_resume(dev); + struct net_device *ndev = dev_get_drvdata(dev); + int ret; - return 0; + ret = pm_runtime_force_resume(dev); + if (ret) { + dev_err(dev, "pm_runtime_force_resume failed on resume\n"); + return ret; + } + + if (netif_running(ndev)) { + ret = xcan_chip_start(ndev); + if (ret) { + dev_err(dev, "xcan_chip_start failed on resume\n"); + return ret; + } + netif_device_attach(ndev); + netif_start_queue(ndev); + } + + return 0; } /** @@ -1157,14 +1174,6 @@ static int __maybe_unused xcan_runtime_suspend(struct device *dev) struct net_device *ndev = dev_get_drvdata(dev); struct xcan_priv *priv = netdev_priv(ndev); - if (netif_running(ndev)) { - netif_stop_queue(ndev); - netif_device_detach(ndev); - } - - priv->write_reg(priv, XCAN_MSR_OFFSET, XCAN_MSR_SLEEP_MASK); - priv->can.state = CAN_STATE_SLEEPING; - clk_disable_unprepare(priv->bus_clk); clk_disable_unprepare(priv->can_clk); @@ -1183,7 +1192,6 @@ static int __maybe_unused xcan_runtime_resume(struct device *dev) struct net_device *ndev = dev_get_drvdata(dev); struct xcan_priv *priv = netdev_priv(ndev); int ret; - u32 isr, status; ret = clk_prepare_enable(priv->bus_clk); if (ret) { @@ -1197,27 +1205,6 @@ static int __maybe_unused xcan_runtime_resume(struct device *dev) return ret; } - priv->write_reg(priv, XCAN_SRR_OFFSET, XCAN_SRR_RESET_MASK); - isr = priv->read_reg(priv, XCAN_ISR_OFFSET); - status = priv->read_reg(priv, XCAN_SR_OFFSET); - - if (netif_running(ndev)) { - if (isr & XCAN_IXR_BSOFF_MASK) { - priv->can.state = CAN_STATE_BUS_OFF; - priv->write_reg(priv, XCAN_SRR_OFFSET, - XCAN_SRR_RESET_MASK); - } else if ((status & XCAN_SR_ESTAT_MASK) == - XCAN_SR_ESTAT_MASK) { - priv->can.state = CAN_STATE_ERROR_PASSIVE; - } else if (status & XCAN_SR_ERRWRN_MASK) { - priv->can.state = CAN_STATE_ERROR_WARNING; - } else { - priv->can.state = CAN_STATE_ERROR_ACTIVE; - } - netif_device_attach(ndev); - netif_start_queue(ndev); - } - return 0; } -- 2.18.0

7 years, 1 month

1
0
0 0

[PATCH 11/12] can: xilinx_can: fix incorrect clear of non-processed interrupts

by Marc Kleine-Budde

From: Anssi Hannula <anssi.hannula(a)bitwise.fi> xcan_interrupt() clears ERROR|RXOFLV|BSOFF|ARBLST interrupts if any of them is asserted. This does not take into account that some of them could have been asserted between interrupt status read and interrupt clear, therefore clearing them without handling them. Fix the code to only clear those interrupts that it knows are asserted and therefore going to be processed in xcan_err_interrupt(). Fixes: b1201e44f50b ("can: xilinx CAN controller support") Signed-off-by: Anssi Hannula <anssi.hannula(a)bitwise.fi> Cc: Michal Simek <michal.simek(a)xilinx.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de> --- drivers/net/can/xilinx_can.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index ea9f9d1a5ba7..cb80a9aa7281 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -938,6 +938,7 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id) struct net_device *ndev = (struct net_device *)dev_id; struct xcan_priv *priv = netdev_priv(ndev); u32 isr, ier; + u32 isr_errors; /* Get the interrupt status from Xilinx CAN */ isr = priv->read_reg(priv, XCAN_ISR_OFFSET); @@ -956,11 +957,10 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id) xcan_tx_interrupt(ndev, isr); /* Check for the type of error interrupt and Processing it */ - if (isr & (XCAN_IXR_ERROR_MASK | XCAN_IXR_RXOFLW_MASK | - XCAN_IXR_BSOFF_MASK | XCAN_IXR_ARBLST_MASK)) { - priv->write_reg(priv, XCAN_ICR_OFFSET, (XCAN_IXR_ERROR_MASK | - XCAN_IXR_RXOFLW_MASK | XCAN_IXR_BSOFF_MASK | - XCAN_IXR_ARBLST_MASK)); + isr_errors = isr & (XCAN_IXR_ERROR_MASK | XCAN_IXR_RXOFLW_MASK | + XCAN_IXR_BSOFF_MASK | XCAN_IXR_ARBLST_MASK); + if (isr_errors) { + priv->write_reg(priv, XCAN_ICR_OFFSET, isr_errors); xcan_err_interrupt(ndev, isr); } -- 2.18.0

7 years, 1 month

1
0
0 0

[PATCH 10/12] can: xilinx_can: fix RX overflow interrupt not being enabled

by Marc Kleine-Budde

From: Anssi Hannula <anssi.hannula(a)bitwise.fi> RX overflow interrupt (RXOFLW) is disabled even though xcan_interrupt() processes it. This means that an RX overflow interrupt will only be processed when another interrupt gets asserted (e.g. for RX/TX). Fix that by enabling the RXOFLW interrupt. Fixes: b1201e44f50b ("can: xilinx CAN controller support") Signed-off-by: Anssi Hannula <anssi.hannula(a)bitwise.fi> Cc: Michal Simek <michal.simek(a)xilinx.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de> --- drivers/net/can/xilinx_can.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index dcbdc3cd651c..ea9f9d1a5ba7 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -104,7 +104,7 @@ enum xcan_reg { #define XCAN_INTR_ALL (XCAN_IXR_TXOK_MASK | XCAN_IXR_BSOFF_MASK |\ XCAN_IXR_WKUP_MASK | XCAN_IXR_SLP_MASK | \ XCAN_IXR_RXNEMP_MASK | XCAN_IXR_ERROR_MASK | \ - XCAN_IXR_ARBLST_MASK) + XCAN_IXR_RXOFLW_MASK | XCAN_IXR_ARBLST_MASK) /* CAN register bit shift - XCAN_<REG>_<BIT>_SHIFT */ #define XCAN_BTR_SJW_SHIFT 7 /* Synchronous jump width */ -- 2.18.0

7 years, 1 month

1
0
0 0

[PATCH 09/12] can: xilinx_can: keep only 1-2 frames in TX FIFO to fix TX accounting

by Marc Kleine-Budde

From: Anssi Hannula <anssi.hannula(a)bitwise.fi> The xilinx_can driver assumes that the TXOK interrupt only clears after it has been acknowledged as many times as there have been successfully sent frames. However, the documentation does not mention such behavior, instead saying just that the interrupt is cleared when the clear bit is set. Similarly, testing seems to also suggest that it is immediately cleared regardless of the amount of frames having been sent. Performing some heavy TX load and then going back to idle has the tx_head drifting further away from tx_tail over time, steadily reducing the amount of frames the driver keeps in the TX FIFO (but not to zero, as the TXOK interrupt always frees up space for 1 frame from the driver's perspective, so frames continue to be sent) and delaying the local echo frames. The TX FIFO tracking is also otherwise buggy as it does not account for TX FIFO being cleared after software resets, causing BUG!, TX FIFO full when queue awake! messages to be output. There does not seem to be any way to accurately track the state of the TX FIFO for local echo support while using the full TX FIFO. The Zynq version of the HW (but not the soft-AXI version) has watermark programming support and with it an additional TX-FIFO-empty interrupt bit. Modify the driver to only put 1 frame into TX FIFO at a time on soft-AXI and 2 frames at a time on Zynq. On Zynq the TXFEMP interrupt bit is used to detect whether 1 or 2 frames have been sent at interrupt processing time. Tested with the integrated CAN on Zynq-7000 SoC. The 1-frame-FIFO mode was also tested. An alternative way to solve this would be to drop local echo support but keep using the full TX FIFO. v2: Add FIFO space check before TX queue wake with locking to synchronize with queue stop. This avoids waking the queue when xmit() had just filled it. v3: Keep local echo support and reduce the amount of frames in FIFO instead as suggested by Marc Kleine-Budde. Fixes: b1201e44f50b ("can: xilinx CAN controller support") Signed-off-by: Anssi Hannula <anssi.hannula(a)bitwise.fi> Cc: <stable(a)vger.kernel.org> Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de> --- drivers/net/can/xilinx_can.c | 139 +++++++++++++++++++++++++++++++---- 1 file changed, 123 insertions(+), 16 deletions(-) diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 763408a3eafb..dcbdc3cd651c 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -26,8 +26,10 @@ #include <linux/module.h> #include <linux/netdevice.h> #include <linux/of.h> +#include <linux/of_device.h> #include <linux/platform_device.h> #include <linux/skbuff.h> +#include <linux/spinlock.h> #include <linux/string.h> #include <linux/types.h> #include <linux/can/dev.h> @@ -119,6 +121,7 @@ enum xcan_reg { /** * struct xcan_priv - This definition define CAN driver instance * @can: CAN private data structure. + * @tx_lock: Lock for synchronizing TX interrupt handling * @tx_head: Tx CAN packets ready to send on the queue * @tx_tail: Tx CAN packets successfully sended on the queue * @tx_max: Maximum number packets the driver can send @@ -133,6 +136,7 @@ enum xcan_reg { */ struct xcan_priv { struct can_priv can; + spinlock_t tx_lock; unsigned int tx_head; unsigned int tx_tail; unsigned int tx_max; @@ -160,6 +164,11 @@ static const struct can_bittiming_const xcan_bittiming_const = { .brp_inc = 1, }; +#define XCAN_CAP_WATERMARK 0x0001 +struct xcan_devtype_data { + unsigned int caps; +}; + /** * xcan_write_reg_le - Write a value to the device register little endian * @priv: Driver private data structure @@ -239,6 +248,10 @@ static int set_reset_mode(struct net_device *ndev) usleep_range(500, 10000); } + /* reset clears FIFOs */ + priv->tx_head = 0; + priv->tx_tail = 0; + return 0; } @@ -393,6 +406,7 @@ static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev) struct net_device_stats *stats = &ndev->stats; struct can_frame *cf = (struct can_frame *)skb->data; u32 id, dlc, data[2] = {0, 0}; + unsigned long flags; if (can_dropped_invalid_skb(ndev, skb)) return NETDEV_TX_OK; @@ -440,6 +454,9 @@ static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev) data[1] = be32_to_cpup((__be32 *)(cf->data + 4)); can_put_echo_skb(skb, ndev, priv->tx_head % priv->tx_max); + + spin_lock_irqsave(&priv->tx_lock, flags); + priv->tx_head++; /* Write the Frame to Xilinx CAN TX FIFO */ @@ -455,10 +472,16 @@ static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev) stats->tx_bytes += cf->can_dlc; } + /* Clear TX-FIFO-empty interrupt for xcan_tx_interrupt() */ + if (priv->tx_max > 1) + priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXFEMP_MASK); + /* Check if the TX buffer is full */ if ((priv->tx_head - priv->tx_tail) == priv->tx_max) netif_stop_queue(ndev); + spin_unlock_irqrestore(&priv->tx_lock, flags); + return NETDEV_TX_OK; } @@ -832,19 +855,71 @@ static void xcan_tx_interrupt(struct net_device *ndev, u32 isr) { struct xcan_priv *priv = netdev_priv(ndev); struct net_device_stats *stats = &ndev->stats; + unsigned int frames_in_fifo; + int frames_sent = 1; /* TXOK => at least 1 frame was sent */ + unsigned long flags; + int retries = 0; + + /* Synchronize with xmit as we need to know the exact number + * of frames in the FIFO to stay in sync due to the TXFEMP + * handling. + * This also prevents a race between netif_wake_queue() and + * netif_stop_queue(). + */ + spin_lock_irqsave(&priv->tx_lock, flags); + + frames_in_fifo = priv->tx_head - priv->tx_tail; - while ((priv->tx_head - priv->tx_tail > 0) && - (isr & XCAN_IXR_TXOK_MASK)) { + if (WARN_ON_ONCE(frames_in_fifo == 0)) { + /* clear TXOK anyway to avoid getting back here */ priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXOK_MASK); + spin_unlock_irqrestore(&priv->tx_lock, flags); + return; + } + + /* Check if 2 frames were sent (TXOK only means that at least 1 + * frame was sent). + */ + if (frames_in_fifo > 1) { + WARN_ON(frames_in_fifo > priv->tx_max); + + /* Synchronize TXOK and isr so that after the loop: + * (1) isr variable is up-to-date at least up to TXOK clear + * time. This avoids us clearing a TXOK of a second frame + * but not noticing that the FIFO is now empty and thus + * marking only a single frame as sent. + * (2) No TXOK is left. Having one could mean leaving a + * stray TXOK as we might process the associated frame + * via TXFEMP handling as we read TXFEMP *after* TXOK + * clear to satisfy (1). + */ + while ((isr & XCAN_IXR_TXOK_MASK) && !WARN_ON(++retries == 100)) { + priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXOK_MASK); + isr = priv->read_reg(priv, XCAN_ISR_OFFSET); + } + + if (isr & XCAN_IXR_TXFEMP_MASK) { + /* nothing in FIFO anymore */ + frames_sent = frames_in_fifo; + } + } else { + /* single frame in fifo, just clear TXOK */ + priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_TXOK_MASK); + } + + while (frames_sent--) { can_get_echo_skb(ndev, priv->tx_tail % priv->tx_max); priv->tx_tail++; stats->tx_packets++; - isr = priv->read_reg(priv, XCAN_ISR_OFFSET); } + + netif_wake_queue(ndev); + + spin_unlock_irqrestore(&priv->tx_lock, flags); + can_led_event(ndev, CAN_LED_EVENT_TX); xcan_update_error_state_after_rxtx(ndev); - netif_wake_queue(ndev); } /** @@ -1151,6 +1226,18 @@ static const struct dev_pm_ops xcan_dev_pm_ops = { SET_RUNTIME_PM_OPS(xcan_runtime_suspend, xcan_runtime_resume, NULL) }; +static const struct xcan_devtype_data xcan_zynq_data = { + .caps = XCAN_CAP_WATERMARK, +}; + +/* Match table for OF platform binding */ +static const struct of_device_id xcan_of_match[] = { + { .compatible = "xlnx,zynq-can-1.0", .data = &xcan_zynq_data }, + { .compatible = "xlnx,axi-can-1.00.a", }, + { /* end of list */ }, +}; +MODULE_DEVICE_TABLE(of, xcan_of_match); + /** * xcan_probe - Platform registration call * @pdev: Handle to the platform device structure @@ -1165,8 +1252,10 @@ static int xcan_probe(struct platform_device *pdev) struct resource *res; /* IO mem resources */ struct net_device *ndev; struct xcan_priv *priv; + const struct of_device_id *of_id; + int caps = 0; void __iomem *addr; - int ret, rx_max, tx_max; + int ret, rx_max, tx_max, tx_fifo_depth; /* Get the virtual base address for the device */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -1176,7 +1265,8 @@ static int xcan_probe(struct platform_device *pdev) goto err; } - ret = of_property_read_u32(pdev->dev.of_node, "tx-fifo-depth", &tx_max); + ret = of_property_read_u32(pdev->dev.of_node, "tx-fifo-depth", + &tx_fifo_depth); if (ret < 0) goto err; @@ -1184,6 +1274,30 @@ static int xcan_probe(struct platform_device *pdev) if (ret < 0) goto err; + of_id = of_match_device(xcan_of_match, &pdev->dev); + if (of_id) { + const struct xcan_devtype_data *devtype_data = of_id->data; + + if (devtype_data) + caps = devtype_data->caps; + } + + /* There is no way to directly figure out how many frames have been + * sent when the TXOK interrupt is processed. If watermark programming + * is supported, we can have 2 frames in the FIFO and use TXFEMP + * to determine if 1 or 2 frames have been sent. + * Theoretically we should be able to use TXFWMEMP to determine up + * to 3 frames, but it seems that after putting a second frame in the + * FIFO, with watermark at 2 frames, it can happen that TXFWMEMP (less + * than 2 frames in FIFO) is set anyway with no TXOK (a frame was + * sent), which is not a sensible state - possibly TXFWMEMP is not + * completely synchronized with the rest of the bits? + */ + if (caps & XCAN_CAP_WATERMARK) + tx_max = min(tx_fifo_depth, 2); + else + tx_max = 1; + /* Create a CAN device instance */ ndev = alloc_candev(sizeof(struct xcan_priv), tx_max); if (!ndev) @@ -1198,6 +1312,7 @@ static int xcan_probe(struct platform_device *pdev) CAN_CTRLMODE_BERR_REPORTING; priv->reg_base = addr; priv->tx_max = tx_max; + spin_lock_init(&priv->tx_lock); /* Get IRQ for the device */ ndev->irq = platform_get_irq(pdev, 0); @@ -1262,9 +1377,9 @@ static int xcan_probe(struct platform_device *pdev) pm_runtime_put(&pdev->dev); - netdev_dbg(ndev, "reg_base=0x%p irq=%d clock=%d, tx fifo depth:%d\n", + netdev_dbg(ndev, "reg_base=0x%p irq=%d clock=%d, tx fifo depth: actual %d, using %d\n", priv->reg_base, ndev->irq, priv->can.clock.freq, - priv->tx_max); + tx_fifo_depth, priv->tx_max); return 0; @@ -1298,14 +1413,6 @@ static int xcan_remove(struct platform_device *pdev) return 0; } -/* Match table for OF platform binding */ -static const struct of_device_id xcan_of_match[] = { - { .compatible = "xlnx,zynq-can-1.0", }, - { .compatible = "xlnx,axi-can-1.00.a", }, - { /* end of list */ }, -}; -MODULE_DEVICE_TABLE(of, xcan_of_match); - static struct platform_driver xcan_driver = { .probe = xcan_probe, .remove = xcan_remove, -- 2.18.0

7 years, 1 month

1
0
0 0

[PATCH 08/12] can: xilinx_can: fix recovery from error states not being propagated

by Marc Kleine-Budde

From: Anssi Hannula <anssi.hannula(a)bitwise.fi> The xilinx_can driver contains no mechanism for propagating recovery from CAN_STATE_ERROR_WARNING and CAN_STATE_ERROR_PASSIVE. Add such a mechanism by factoring the handling of XCAN_STATE_ERROR_PASSIVE and XCAN_STATE_ERROR_WARNING out of xcan_err_interrupt and checking for recovery after RX and TX if the interface is in one of those states. Tested with the integrated CAN on Zynq-7000 SoC. Fixes: b1201e44f50b ("can: xilinx CAN controller support") Signed-off-by: Anssi Hannula <anssi.hannula(a)bitwise.fi> Cc: <stable(a)vger.kernel.org> Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de> --- drivers/net/can/xilinx_can.c | 155 ++++++++++++++++++++++++++++------- 1 file changed, 127 insertions(+), 28 deletions(-) diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 1bda47aa62f5..763408a3eafb 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -2,6 +2,7 @@ * * Copyright (C) 2012 - 2014 Xilinx, Inc. * Copyright (C) 2009 PetaLogix. All rights reserved. + * Copyright (C) 2017 Sandvik Mining and Construction Oy * * Description: * This driver is developed for Axi CAN IP and for Zynq CANPS Controller. @@ -529,6 +530,123 @@ static int xcan_rx(struct net_device *ndev) return 1; } +/** + * xcan_current_error_state - Get current error state from HW + * @ndev: Pointer to net_device structure + * + * Checks the current CAN error state from the HW. Note that this + * only checks for ERROR_PASSIVE and ERROR_WARNING. + * + * Return: + * ERROR_PASSIVE or ERROR_WARNING if either is active, ERROR_ACTIVE + * otherwise. + */ +static enum can_state xcan_current_error_state(struct net_device *ndev) +{ + struct xcan_priv *priv = netdev_priv(ndev); + u32 status = priv->read_reg(priv, XCAN_SR_OFFSET); + + if ((status & XCAN_SR_ESTAT_MASK) == XCAN_SR_ESTAT_MASK) + return CAN_STATE_ERROR_PASSIVE; + else if (status & XCAN_SR_ERRWRN_MASK) + return CAN_STATE_ERROR_WARNING; + else + return CAN_STATE_ERROR_ACTIVE; +} + +/** + * xcan_set_error_state - Set new CAN error state + * @ndev: Pointer to net_device structure + * @new_state: The new CAN state to be set + * @cf: Error frame to be populated or NULL + * + * Set new CAN error state for the device, updating statistics and + * populating the error frame if given. + */ +static void xcan_set_error_state(struct net_device *ndev, + enum can_state new_state, + struct can_frame *cf) +{ + struct xcan_priv *priv = netdev_priv(ndev); + u32 ecr = priv->read_reg(priv, XCAN_ECR_OFFSET); + u32 txerr = ecr & XCAN_ECR_TEC_MASK; + u32 rxerr = (ecr & XCAN_ECR_REC_MASK) >> XCAN_ESR_REC_SHIFT; + + priv->can.state = new_state; + + if (cf) { + cf->can_id |= CAN_ERR_CRTL; + cf->data[6] = txerr; + cf->data[7] = rxerr; + } + + switch (new_state) { + case CAN_STATE_ERROR_PASSIVE: + priv->can.can_stats.error_passive++; + if (cf) + cf->data[1] = (rxerr > 127) ? + CAN_ERR_CRTL_RX_PASSIVE : + CAN_ERR_CRTL_TX_PASSIVE; + break; + case CAN_STATE_ERROR_WARNING: + priv->can.can_stats.error_warning++; + if (cf) + cf->data[1] |= (txerr > rxerr) ? + CAN_ERR_CRTL_TX_WARNING : + CAN_ERR_CRTL_RX_WARNING; + break; + case CAN_STATE_ERROR_ACTIVE: + if (cf) + cf->data[1] |= CAN_ERR_CRTL_ACTIVE; + break; + default: + /* non-ERROR states are handled elsewhere */ + WARN_ON(1); + break; + } +} + +/** + * xcan_update_error_state_after_rxtx - Update CAN error state after RX/TX + * @ndev: Pointer to net_device structure + * + * If the device is in a ERROR-WARNING or ERROR-PASSIVE state, check if + * the performed RX/TX has caused it to drop to a lesser state and set + * the interface state accordingly. + */ +static void xcan_update_error_state_after_rxtx(struct net_device *ndev) +{ + struct xcan_priv *priv = netdev_priv(ndev); + enum can_state old_state = priv->can.state; + enum can_state new_state; + + /* changing error state due to successful frame RX/TX can only + * occur from these states + */ + if (old_state != CAN_STATE_ERROR_WARNING && + old_state != CAN_STATE_ERROR_PASSIVE) + return; + + new_state = xcan_current_error_state(ndev); + + if (new_state != old_state) { + struct sk_buff *skb; + struct can_frame *cf; + + skb = alloc_can_err_skb(ndev, &cf); + + xcan_set_error_state(ndev, new_state, skb ? cf : NULL); + + if (skb) { + struct net_device_stats *stats = &ndev->stats; + + stats->rx_packets++; + stats->rx_bytes += cf->can_dlc; + netif_rx(skb); + } + } +} + /** * xcan_err_interrupt - error frame Isr * @ndev: net_device pointer @@ -544,16 +662,12 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr) struct net_device_stats *stats = &ndev->stats; struct can_frame *cf; struct sk_buff *skb; - u32 err_status, status, txerr = 0, rxerr = 0; + u32 err_status; skb = alloc_can_err_skb(ndev, &cf); err_status = priv->read_reg(priv, XCAN_ESR_OFFSET); priv->write_reg(priv, XCAN_ESR_OFFSET, err_status); - txerr = priv->read_reg(priv, XCAN_ECR_OFFSET) & XCAN_ECR_TEC_MASK; - rxerr = ((priv->read_reg(priv, XCAN_ECR_OFFSET) & - XCAN_ECR_REC_MASK) >> XCAN_ESR_REC_SHIFT); - status = priv->read_reg(priv, XCAN_SR_OFFSET); if (isr & XCAN_IXR_BSOFF_MASK) { priv->can.state = CAN_STATE_BUS_OFF; @@ -563,28 +677,10 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr) can_bus_off(ndev); if (skb) cf->can_id |= CAN_ERR_BUSOFF; - } else if ((status & XCAN_SR_ESTAT_MASK) == XCAN_SR_ESTAT_MASK) { - priv->can.state = CAN_STATE_ERROR_PASSIVE; - priv->can.can_stats.error_passive++; - if (skb) { - cf->can_id |= CAN_ERR_CRTL; - cf->data[1] = (rxerr > 127) ? - CAN_ERR_CRTL_RX_PASSIVE : - CAN_ERR_CRTL_TX_PASSIVE; - cf->data[6] = txerr; - cf->data[7] = rxerr; - } - } else if (status & XCAN_SR_ERRWRN_MASK) { - priv->can.state = CAN_STATE_ERROR_WARNING; - priv->can.can_stats.error_warning++; - if (skb) { - cf->can_id |= CAN_ERR_CRTL; - cf->data[1] |= (txerr > rxerr) ? - CAN_ERR_CRTL_TX_WARNING : - CAN_ERR_CRTL_RX_WARNING; - cf->data[6] = txerr; - cf->data[7] = rxerr; - } + } else { + enum can_state new_state = xcan_current_error_state(ndev); + + xcan_set_error_state(ndev, new_state, skb ? cf : NULL); } /* Check for Arbitration lost interrupt */ @@ -713,8 +809,10 @@ static int xcan_rx_poll(struct napi_struct *napi, int quota) isr = priv->read_reg(priv, XCAN_ISR_OFFSET); } - if (work_done) + if (work_done) { can_led_event(ndev, CAN_LED_EVENT_RX); + xcan_update_error_state_after_rxtx(ndev); + } if (work_done < quota) { napi_complete_done(napi, work_done); @@ -745,6 +843,7 @@ static void xcan_tx_interrupt(struct net_device *ndev, u32 isr) isr = priv->read_reg(priv, XCAN_ISR_OFFSET); } can_led_event(ndev, CAN_LED_EVENT_TX); + xcan_update_error_state_after_rxtx(ndev); netif_wake_queue(ndev); } -- 2.18.0

7 years, 1 month

1
0
0 0

[PATCH 07/12] can: xilinx_can: fix RX loop if RXNEMP is asserted without RXOK

by Marc Kleine-Budde

From: Anssi Hannula <anssi.hannula(a)bitwise.fi> If the device gets into a state where RXNEMP (RX FIFO not empty) interrupt is asserted without RXOK (new frame received successfully) interrupt being asserted, xcan_rx_poll() will continue to try to clear RXNEMP without actually reading frames from RX FIFO. If the RX FIFO is not empty, the interrupt will not be cleared and napi_schedule() will just be called again. This situation can occur when: (a) xcan_rx() returns without reading RX FIFO due to an error condition. The code tries to clear both RXOK and RXNEMP but RXNEMP will not clear due to a frame still being in the FIFO. The frame will never be read from the FIFO as RXOK is no longer set. (b) A frame is received between xcan_rx_poll() reading interrupt status and clearing RXOK. RXOK will be cleared, but RXNEMP will again remain set as the new message is still in the FIFO. I'm able to trigger case (b) by flooding the bus with frames under load. There does not seem to be any benefit in using both RXNEMP and RXOK in the way the driver does, and the polling example in the reference manual (UG585 v1.10 18.3.7 Read Messages from RxFIFO) also says that either RXOK or RXNEMP can be used for detecting incoming messages. Fix the issue and simplify the RX processing by only using RXNEMP without RXOK. Tested with the integrated CAN on Zynq-7000 SoC. Fixes: b1201e44f50b ("can: xilinx CAN controller support") Signed-off-by: Anssi Hannula <anssi.hannula(a)bitwise.fi> Cc: <stable(a)vger.kernel.org> Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de> --- drivers/net/can/xilinx_can.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 389a9603db8c..1bda47aa62f5 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -101,7 +101,7 @@ enum xcan_reg { #define XCAN_INTR_ALL (XCAN_IXR_TXOK_MASK | XCAN_IXR_BSOFF_MASK |\ XCAN_IXR_WKUP_MASK | XCAN_IXR_SLP_MASK | \ XCAN_IXR_RXNEMP_MASK | XCAN_IXR_ERROR_MASK | \ - XCAN_IXR_ARBLST_MASK | XCAN_IXR_RXOK_MASK) + XCAN_IXR_ARBLST_MASK) /* CAN register bit shift - XCAN_<REG>_<BIT>_SHIFT */ #define XCAN_BTR_SJW_SHIFT 7 /* Synchronous jump width */ @@ -708,15 +708,7 @@ static int xcan_rx_poll(struct napi_struct *napi, int quota) isr = priv->read_reg(priv, XCAN_ISR_OFFSET); while ((isr & XCAN_IXR_RXNEMP_MASK) && (work_done < quota)) { - if (isr & XCAN_IXR_RXOK_MASK) { - priv->write_reg(priv, XCAN_ICR_OFFSET, - XCAN_IXR_RXOK_MASK); - work_done += xcan_rx(ndev); - } else { - priv->write_reg(priv, XCAN_ICR_OFFSET, - XCAN_IXR_RXNEMP_MASK); - break; - } + work_done += xcan_rx(ndev); priv->write_reg(priv, XCAN_ICR_OFFSET, XCAN_IXR_RXNEMP_MASK); isr = priv->read_reg(priv, XCAN_ISR_OFFSET); } @@ -727,7 +719,7 @@ static int xcan_rx_poll(struct napi_struct *napi, int quota) if (work_done < quota) { napi_complete_done(napi, work_done); ier = priv->read_reg(priv, XCAN_IER_OFFSET); - ier |= (XCAN_IXR_RXOK_MASK | XCAN_IXR_RXNEMP_MASK); + ier |= XCAN_IXR_RXNEMP_MASK; priv->write_reg(priv, XCAN_IER_OFFSET, ier); } return work_done; @@ -799,9 +791,9 @@ static irqreturn_t xcan_interrupt(int irq, void *dev_id) } /* Check for the type of receive interrupt and Processing it */ - if (isr & (XCAN_IXR_RXNEMP_MASK | XCAN_IXR_RXOK_MASK)) { + if (isr & XCAN_IXR_RXNEMP_MASK) { ier = priv->read_reg(priv, XCAN_IER_OFFSET); - ier &= ~(XCAN_IXR_RXNEMP_MASK | XCAN_IXR_RXOK_MASK); + ier &= ~XCAN_IXR_RXNEMP_MASK; priv->write_reg(priv, XCAN_IER_OFFSET, ier); napi_schedule(&priv->napi); } -- 2.18.0

7 years, 1 month

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror