The patch titled
Subject: Revert "mm: page_alloc: skip over regions of invalid pfns where possible"
has been added to the -mm tree. Its filename is
revert-mm-page_alloc-skip-over-regions-of-invalid-pfns-where-possible.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/revert-mm-page_alloc-skip-over-reg…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/revert-mm-page_alloc-skip-over-reg…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Daniel Vacek <neelx(a)redhat.com>
Subject: Revert "mm: page_alloc: skip over regions of invalid pfns where possible"
This reverts b92df1de5d289c0b ("mm: page_alloc: skip over regions of
invalid pfns where possible"). The commit is meant to be a boot init
speed up skipping the loop in memmap_init_zone() for invalid pfns. But
given some specific memory mapping on x86_64 (or more generally
theoretically anywhere but on arm with CONFIG_HAVE_ARCH_PFN_VALID) the
implementation also skips valid pfns which is plain wrong and causes
'kernel BUG at mm/page_alloc.c:1389!'
crash> log | grep -e BUG -e RIP -e Call.Trace -e move_freepages_block -e rmqueue -e freelist -A1
kernel BUG at mm/page_alloc.c:1389!
invalid opcode: 0000 [#1] SMP
--
RIP: 0010:[<ffffffff8118833e>] [<ffffffff8118833e>] move_freepages+0x15e/0x160
RSP: 0018:ffff88054d727688 EFLAGS: 00010087
--
Call Trace:
[<ffffffff811883b3>] move_freepages_block+0x73/0x80
[<ffffffff81189e63>] __rmqueue+0x263/0x460
[<ffffffff8118c781>] get_page_from_freelist+0x7e1/0x9e0
[<ffffffff8118caf6>] __alloc_pages_nodemask+0x176/0x420
--
RIP [<ffffffff8118833e>] move_freepages+0x15e/0x160
RSP <ffff88054d727688>
crash> page_init_bug -v | grep RAM
<struct resource 0xffff88067fffd2f8> 1000 - 9bfff System RAM (620.00 KiB)
<struct resource 0xffff88067fffd3a0> 100000 - 430bffff System RAM ( 1.05 GiB = 1071.75 MiB = 1097472.00 KiB)
<struct resource 0xffff88067fffd410> 4b0c8000 - 4bf9cfff System RAM ( 14.83 MiB = 15188.00 KiB)
<struct resource 0xffff88067fffd480> 4bfac000 - 646b1fff System RAM (391.02 MiB = 400408.00 KiB)
<struct resource 0xffff88067fffd560> 7b788000 - 7b7fffff System RAM (480.00 KiB)
<struct resource 0xffff88067fffd640> 100000000 - 67fffffff System RAM ( 22.00 GiB)
crash> page_init_bug | head -6
<struct resource 0xffff88067fffd560> 7b788000 - 7b7fffff System RAM (480.00 KiB)
<struct page 0xffffea0001ede200> 1fffff00000000 0 <struct pglist_data 0xffff88047ffd9000> 1 <struct zone 0xffff88047ffd9800> DMA32 4096 1048575
<struct page 0xffffea0001ede200> 505736 505344 <struct page 0xffffea0001ed8000> 505855 <struct page 0xffffea0001edffc0>
<struct page 0xffffea0001ed8000> 0 0 <struct pglist_data 0xffff88047ffd9000> 0 <struct zone 0xffff88047ffd9000> DMA 1 4095
<struct page 0xffffea0001edffc0> 1fffff00000400 0 <struct pglist_data 0xffff88047ffd9000> 1 <struct zone 0xffff88047ffd9800> DMA32 4096 1048575
BUG, zones differ!
crash> kmem -p 77fff000 78000000 7b5ff000 7b600000 7b787000 7b788000
PAGE PHYSICAL MAPPING INDEX CNT FLAGS
ffffea0001e00000 78000000 0 0 0 0
ffffea0001ed7fc0 7b5ff000 0 0 0 0
ffffea0001ed8000 7b600000 0 0 0 0 <<<<
ffffea0001ede1c0 7b787000 0 0 0 0
ffffea0001ede200 7b788000 0 0 1 1fffff00000000
Link: http://lkml.kernel.org/r/20180316143855.29838-1-neelx@redhat.com
Fixes: b92df1de5d28 ("mm: page_alloc: skip over regions of invalid pfns where possible")
Signed-off-by: Daniel Vacek <neelx(a)redhat.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel(a)linaro.org>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Reviewed-by: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Mel Gorman <mgorman(a)techsingularity.net>
Cc: Pavel Tatashin <pasha.tatashin(a)oracle.com>
Cc: Paul Burton <paul.burton(a)imgtec.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/memblock.h | 1 -
mm/memblock.c | 28 ----------------------------
mm/page_alloc.c | 11 +----------
3 files changed, 1 insertion(+), 39 deletions(-)
diff -puN include/linux/memblock.h~revert-mm-page_alloc-skip-over-regions-of-invalid-pfns-where-possible include/linux/memblock.h
--- a/include/linux/memblock.h~revert-mm-page_alloc-skip-over-regions-of-invalid-pfns-where-possible
+++ a/include/linux/memblock.h
@@ -187,7 +187,6 @@ int memblock_search_pfn_nid(unsigned lon
unsigned long *end_pfn);
void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
unsigned long *out_end_pfn, int *out_nid);
-unsigned long memblock_next_valid_pfn(unsigned long pfn, unsigned long max_pfn);
/**
* for_each_mem_pfn_range - early memory pfn range iterator
diff -puN mm/memblock.c~revert-mm-page_alloc-skip-over-regions-of-invalid-pfns-where-possible mm/memblock.c
--- a/mm/memblock.c~revert-mm-page_alloc-skip-over-regions-of-invalid-pfns-where-possible
+++ a/mm/memblock.c
@@ -1101,34 +1101,6 @@ void __init_memblock __next_mem_pfn_rang
*out_nid = r->nid;
}
-unsigned long __init_memblock memblock_next_valid_pfn(unsigned long pfn,
- unsigned long max_pfn)
-{
- struct memblock_type *type = &memblock.memory;
- unsigned int right = type->cnt;
- unsigned int mid, left = 0;
- phys_addr_t addr = PFN_PHYS(++pfn);
-
- do {
- mid = (right + left) / 2;
-
- if (addr < type->regions[mid].base)
- right = mid;
- else if (addr >= (type->regions[mid].base +
- type->regions[mid].size))
- left = mid + 1;
- else {
- /* addr is within the region, so pfn is valid */
- return pfn;
- }
- } while (left < right);
-
- if (right == type->cnt)
- return -1UL;
- else
- return PHYS_PFN(type->regions[right].base);
-}
-
/**
* memblock_set_node - set node ID on memblock regions
* @base: base of area to set node ID for
diff -puN mm/page_alloc.c~revert-mm-page_alloc-skip-over-regions-of-invalid-pfns-where-possible mm/page_alloc.c
--- a/mm/page_alloc.c~revert-mm-page_alloc-skip-over-regions-of-invalid-pfns-where-possible
+++ a/mm/page_alloc.c
@@ -5356,17 +5356,8 @@ void __meminit memmap_init_zone(unsigned
if (context != MEMMAP_EARLY)
goto not_early;
- if (!early_pfn_valid(pfn)) {
-#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
- /*
- * Skip to the pfn preceding the next valid one (or
- * end_pfn), such that we hit a valid pfn (or end_pfn)
- * on our next iteration of the loop.
- */
- pfn = memblock_next_valid_pfn(pfn, end_pfn) - 1;
-#endif
+ if (!early_pfn_valid(pfn))
continue;
- }
if (!early_pfn_in_nid(pfn, nid))
continue;
if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised))
_
Patches currently in -mm which might be from neelx(a)redhat.com are
revert-mm-page_alloc-skip-over-regions-of-invalid-pfns-where-possible.patch
The patch titled
Subject: mm/shmem: do not wait for lock_page() in shmem_unused_huge_shrink()
has been added to the -mm tree. Its filename is
mm-shmem-do-not-wait-for-lock_page-in-shmem_unused_huge_shrink.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/mm-shmem-do-not-wait-for-lock_page…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/mm-shmem-do-not-wait-for-lock_page…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: "Kirill A. Shutemov" <kirill.shutemov(a)linux.intel.com>
Subject: mm/shmem: do not wait for lock_page() in shmem_unused_huge_shrink()
shmem_unused_huge_shrink() gets called from reclaim path. Waiting for
page lock may lead to deadlock there.
There was a bug report that may be attributed to this:
http://lkml.kernel.org/r/alpine.LRH.2.11.1801242349220.30642@mail.ewheeler.…
Replace lock_page() with trylock_page() and skip the page if we failed to
lock it. We will get to the page on the next scan.
We can test for the PageTransHuge() outside the page lock as we only need
protection against splitting the page under us. Holding pin oni the page
is enough for this.
Link: http://lkml.kernel.org/r/20180316210830.43738-1-kirill.shutemov@linux.intel…
Fixes: 779750d20b93 ("shmem: split huge pages beyond i_size under memory pressure")
Signed-off-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Reported-by: Eric Wheeler <linux-mm(a)lists.ewheeler.net>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Reviewed-by: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: <stable(a)vger.kernel.org> [4.8+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/shmem.c | 31 ++++++++++++++++++++-----------
1 file changed, 20 insertions(+), 11 deletions(-)
diff -puN mm/shmem.c~mm-shmem-do-not-wait-for-lock_page-in-shmem_unused_huge_shrink mm/shmem.c
--- a/mm/shmem.c~mm-shmem-do-not-wait-for-lock_page-in-shmem_unused_huge_shrink
+++ a/mm/shmem.c
@@ -493,36 +493,45 @@ next:
info = list_entry(pos, struct shmem_inode_info, shrinklist);
inode = &info->vfs_inode;
- if (nr_to_split && split >= nr_to_split) {
- iput(inode);
- continue;
- }
+ if (nr_to_split && split >= nr_to_split)
+ goto leave;
- page = find_lock_page(inode->i_mapping,
+ page = find_get_page(inode->i_mapping,
(inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT);
if (!page)
goto drop;
+ /* No huge page at the end of the file: nothing to split */
if (!PageTransHuge(page)) {
- unlock_page(page);
put_page(page);
goto drop;
}
+ /*
+ * Leave the inode on the list if we failed to lock
+ * the page at this time.
+ *
+ * Waiting for the lock may lead to deadlock in the
+ * reclaim path.
+ */
+ if (!trylock_page(page)) {
+ put_page(page);
+ goto leave;
+ }
+
ret = split_huge_page(page);
unlock_page(page);
put_page(page);
- if (ret) {
- /* split failed: leave it on the list */
- iput(inode);
- continue;
- }
+ /* If split failed leave the inode on the list */
+ if (ret)
+ goto leave;
split++;
drop:
list_del_init(&info->shrinklist);
removed++;
+leave:
iput(inode);
}
_
Patches currently in -mm which might be from kirill.shutemov(a)linux.intel.com are
mm-khugepaged-convert-vm_bug_on-to-collapse-fail.patch
mm-thp-do-not-wait-for-lock_page-in-deferred_split_scan.patch
mm-shmem-do-not-wait-for-lock_page-in-shmem_unused_huge_shrink.patch
The patch titled
Subject: mm/hmm: HMM should have a callback before MM is destroyed
has been added to the -mm tree. Its filename is
mm-hmm-hmm-should-have-a-callback-before-mm-is-destroyed-v2.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/mm-hmm-hmm-should-have-a-callback-…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/mm-hmm-hmm-should-have-a-callback-…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Ralph Campbell <rcampbell(a)nvidia.com>
Subject: mm/hmm: HMM should have a callback before MM is destroyed
hmm_mirror_register() registers a callback for when the CPU pagetable is
modified. Normally, the device driver will call hmm_mirror_unregister()
when the process using the device is finished. However, if the process
exits uncleanly, the struct_mm can be destroyed with no warning to the
device driver.
Link: http://lkml.kernel.org/r/20180316191414.3223-4-jglisse@redhat.com
Signed-off-by: Ralph Campbell <rcampbell(a)nvidia.com>
Signed-off-by: Jérôme Glisse <jglisse(a)redhat.com>
Cc: Evgeny Baskakov <ebaskakov(a)nvidia.com>
Cc: Mark Hairgrove <mhairgrove(a)nvidia.com>
Cc: John Hubbard <jhubbard(a)nvidia.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/hmm.h | 10 ++++++++++
mm/hmm.c | 18 +++++++++++++++++-
2 files changed, 27 insertions(+), 1 deletion(-)
diff -puN include/linux/hmm.h~mm-hmm-hmm-should-have-a-callback-before-mm-is-destroyed-v2 include/linux/hmm.h
--- a/include/linux/hmm.h~mm-hmm-hmm-should-have-a-callback-before-mm-is-destroyed-v2
+++ a/include/linux/hmm.h
@@ -218,6 +218,16 @@ enum hmm_update_type {
* @update: callback to update range on a device
*/
struct hmm_mirror_ops {
+ /* release() - release hmm_mirror
+ *
+ * @mirror: pointer to struct hmm_mirror
+ *
+ * This is called when the mm_struct is being released.
+ * The callback should make sure no references to the mirror occur
+ * after the callback returns.
+ */
+ void (*release)(struct hmm_mirror *mirror);
+
/* sync_cpu_device_pagetables() - synchronize page tables
*
* @mirror: pointer to struct hmm_mirror
diff -puN mm/hmm.c~mm-hmm-hmm-should-have-a-callback-before-mm-is-destroyed-v2 mm/hmm.c
--- a/mm/hmm.c~mm-hmm-hmm-should-have-a-callback-before-mm-is-destroyed-v2
+++ a/mm/hmm.c
@@ -160,6 +160,21 @@ static void hmm_invalidate_range(struct
up_read(&hmm->mirrors_sem);
}
+static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm)
+{
+ struct hmm *hmm = mm->hmm;
+ struct hmm_mirror *mirror;
+ struct hmm_mirror *mirror_next;
+
+ down_write(&hmm->mirrors_sem);
+ list_for_each_entry_safe(mirror, mirror_next, &hmm->mirrors, list) {
+ list_del_init(&mirror->list);
+ if (mirror->ops->release)
+ mirror->ops->release(mirror);
+ }
+ up_write(&hmm->mirrors_sem);
+}
+
static void hmm_invalidate_range_start(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
@@ -185,6 +200,7 @@ static void hmm_invalidate_range_end(str
}
static const struct mmu_notifier_ops hmm_mmu_notifier_ops = {
+ .release = hmm_release,
.invalidate_range_start = hmm_invalidate_range_start,
.invalidate_range_end = hmm_invalidate_range_end,
};
@@ -230,7 +246,7 @@ void hmm_mirror_unregister(struct hmm_mi
struct hmm *hmm = mirror->hmm;
down_write(&hmm->mirrors_sem);
- list_del(&mirror->list);
+ list_del_init(&mirror->list);
up_write(&hmm->mirrors_sem);
}
EXPORT_SYMBOL(hmm_mirror_unregister);
_
Patches currently in -mm which might be from rcampbell(a)nvidia.com are
mm-hmm-documentation-editorial-update-to-hmm-documentation.patch
mm-hmm-hmm-should-have-a-callback-before-mm-is-destroyed-v2.patch
The patch titled
Subject: mm/hmm: fix header file if/else/endif maze
has been added to the -mm tree. Its filename is
mm-hmm-fix-header-file-if-else-endif-maze.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/mm-hmm-fix-header-file-if-else-end…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/mm-hmm-fix-header-file-if-else-end…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Jérôme Glisse <jglisse(a)redhat.com>
Subject: mm/hmm: fix header file if/else/endif maze
The #if/#else/#endif for IS_ENABLED(CONFIG_HMM) were wrong.
Link: http://lkml.kernel.org/r/20180316191414.3223-3-jglisse@redhat.com
Signed-off-by: Jérôme Glisse <jglisse(a)redhat.com>
Acked-by: Balbir Singh <bsingharora(a)gmail.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Ralph Campbell <rcampbell(a)nvidia.com>
Cc: John Hubbard <jhubbard(a)nvidia.com>
Cc: Evgeny Baskakov <ebaskakov(a)nvidia.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/hmm.h | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff -puN include/linux/hmm.h~mm-hmm-fix-header-file-if-else-endif-maze include/linux/hmm.h
--- a/include/linux/hmm.h~mm-hmm-fix-header-file-if-else-endif-maze
+++ a/include/linux/hmm.h
@@ -498,6 +498,9 @@ struct hmm_device {
struct hmm_device *hmm_device_new(void *drvdata);
void hmm_device_put(struct hmm_device *hmm_device);
#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
+#else /* IS_ENABLED(CONFIG_HMM) */
+static inline void hmm_mm_destroy(struct mm_struct *mm) {}
+static inline void hmm_mm_init(struct mm_struct *mm) {}
#endif /* IS_ENABLED(CONFIG_HMM) */
/* Below are for HMM internal use only! Not to be used by device driver! */
@@ -513,8 +516,4 @@ static inline void hmm_mm_destroy(struct
static inline void hmm_mm_init(struct mm_struct *mm) {}
#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
-
-#else /* IS_ENABLED(CONFIG_HMM) */
-static inline void hmm_mm_destroy(struct mm_struct *mm) {}
-static inline void hmm_mm_init(struct mm_struct *mm) {}
#endif /* LINUX_HMM_H */
_
Patches currently in -mm which might be from jglisse(a)redhat.com are
mm-hmm-fix-header-file-if-else-endif-maze.patch
mm-hmm-hmm_pfns_bad-was-accessing-wrong-struct.patch
mm-hmm-use-struct-for-hmm_vma_fault-hmm_vma_get_pfns-parameters.patch
mm-hmm-remove-hmm_pfn_read-flag-and-ignore-peculiar-architecture.patch
mm-hmm-use-uint64_t-for-hmm-pfn-instead-of-defining-hmm_pfn_t-to-ulong.patch
mm-hmm-cleanup-special-vma-handling-vm_special.patch
mm-hmm-do-not-differentiate-between-empty-entry-or-missing-directory.patch
mm-hmm-rename-hmm_pfn_device_unaddressable-to-hmm_pfn_device_private.patch
mm-hmm-move-hmm_pfns_clear-closer-to-where-it-is-use.patch
mm-hmm-factor-out-pte-and-pmd-handling-to-simplify-hmm_vma_walk_pmd.patch
mm-hmm-change-hmm_vma_fault-to-allow-write-fault-on-page-basis.patch
mm-hmm-use-device-driver-encoding-for-hmm-pfn.patch
shmem_unused_huge_shrink() gets called from reclaim path. Waiting for page
lock may lead to deadlock there.
There was a bug report that may be attributed to this:
http://lkml.kernel.org/r/alpine.LRH.2.11.1801242349220.30642@mail.ewheeler.…
Replace lock_page() with trylock_page() and skip the page if we failed
to lock it. We will get to the page on the next scan.
We can test for the PageTransHuge() outside the page lock as we only
need protection against splitting the page under us. Holding pin oni
the page is enough for this.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Reported-by: Eric Wheeler <linux-mm(a)lists.ewheeler.net>
Fixes: 779750d20b93 ("shmem: split huge pages beyond i_size under memory pressure")
Cc: stable(a)vger.kernel.org # v4.8+
---
mm/shmem.c | 31 ++++++++++++++++++++-----------
1 file changed, 20 insertions(+), 11 deletions(-)
diff --git a/mm/shmem.c b/mm/shmem.c
index 1907688b75ee..b85919243399 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -493,36 +493,45 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
info = list_entry(pos, struct shmem_inode_info, shrinklist);
inode = &info->vfs_inode;
- if (nr_to_split && split >= nr_to_split) {
- iput(inode);
- continue;
- }
+ if (nr_to_split && split >= nr_to_split)
+ goto leave;
- page = find_lock_page(inode->i_mapping,
+ page = find_get_page(inode->i_mapping,
(inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT);
if (!page)
goto drop;
+ /* No huge page at the end of the file: nothing to split */
if (!PageTransHuge(page)) {
- unlock_page(page);
put_page(page);
goto drop;
}
+ /*
+ * Leave the inode on the list if we failed to lock
+ * the page at this time.
+ *
+ * Waiting for the lock may lead to deadlock in the
+ * reclaim path.
+ */
+ if (!trylock_page(page)) {
+ put_page(page);
+ goto leave;
+ }
+
ret = split_huge_page(page);
unlock_page(page);
put_page(page);
- if (ret) {
- /* split failed: leave it on the list */
- iput(inode);
- continue;
- }
+ /* If split failed leave the inode on the list */
+ if (ret)
+ goto leave;
split++;
drop:
list_del_init(&info->shrinklist);
removed++;
+leave:
iput(inode);
}
--
2.16.1
From: Mathias Kresin <dev(a)kresin.me>
Enable syscon to use it for the RCU MFD on Amazon SE as well.
The Amazon SE also has similar reset controller system as Danube and
XWAY and use their drivers mostly. As these drivers now need syscon also
activate the syscon subsystem for for Amazon SE.
Fixes: 2b6639d4c794 ("MIPS: lantiq: Enable MFD_SYSCON to be able to use it for the RCU MFD")
Cc: <stable(a)vger.kernel.org> # 4.14+
Signed-off-by: Mathias Kresin <dev(a)kresin.me>
Acked-by: Martin Blumenstingl<martin.blumenstingl(a)googlemail.com>
Signed-off-by: Hauke Mehrtens <hauke(a)hauke-m.de>
---
arch/mips/lantiq/Kconfig | 2 ++
1 file changed, 2 insertions(+)
diff --git a/arch/mips/lantiq/Kconfig b/arch/mips/lantiq/Kconfig
index 692ae85a3e3d..8e3a1fc2bc39 100644
--- a/arch/mips/lantiq/Kconfig
+++ b/arch/mips/lantiq/Kconfig
@@ -13,6 +13,8 @@ choice
config SOC_AMAZON_SE
bool "Amazon SE"
select SOC_TYPE_XWAY
+ select MFD_SYSCON
+ select MFD_CORE
config SOC_XWAY
bool "XWAY"
--
2.11.0
From: Mathias Kresin <dev(a)kresin.me>
On Danube the USB0 controller registers are at 1e101000 and the USB0 PHY
register is at 1f203018 similar to all other lantiq SoCs. Activate the
USB controller gating clock thorough the USB controller driver and not
the PHY.
This fixes a problem introduced in a previous commit.
Fixes: dea54fbad332 ("phy: Add an USB PHY driver for the Lantiq SoCs using the RCU module")
Cc: <stable(a)vger.kernel.org> # 4.14+
Signed-off-by: Mathias Kresin <dev(a)kresin.me>
Acked-by: Martin Blumenstingl<martin.blumenstingl(a)googlemail.com>
Signed-off-by: Hauke Mehrtens <hauke(a)hauke-m.de>
---
arch/mips/lantiq/xway/sysctrl.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c
index 52500d3b7004..f11f1dd10493 100644
--- a/arch/mips/lantiq/xway/sysctrl.c
+++ b/arch/mips/lantiq/xway/sysctrl.c
@@ -560,7 +560,7 @@ void __init ltq_soc_init(void)
} else {
clkdev_add_static(ltq_danube_cpu_hz(), ltq_danube_fpi_hz(),
ltq_danube_fpi_hz(), ltq_danube_pp32_hz());
- clkdev_add_pmu("1f203018.usb2-phy", "ctrl", 1, 0, PMU_USB0);
+ clkdev_add_pmu("1e101000.usb", "otg", 1, 0, PMU_USB0);
clkdev_add_pmu("1f203018.usb2-phy", "phy", 1, 0, PMU_USB0_P);
clkdev_add_pmu("1e103000.sdio", NULL, 1, 0, PMU_SDIO);
clkdev_add_pmu("1e103100.deu", NULL, 1, 0, PMU_DEU);
--
2.11.0
A vma with vm_pgoff large enough to overflow a loff_t type when
converted to a byte offset can be passed via the remap_file_pages
system call. The hugetlbfs mmap routine uses the byte offset to
calculate reservations and file size.
A sequence such as:
mmap(0x20a00000, 0x600000, 0, 0x66033, -1, 0);
remap_file_pages(0x20a00000, 0x600000, 0, 0x20000000000000, 0);
will result in the following when task exits/file closed,
kernel BUG at mm/hugetlb.c:749!
Call Trace:
hugetlbfs_evict_inode+0x2f/0x40
evict+0xcb/0x190
__dentry_kill+0xcb/0x150
__fput+0x164/0x1e0
task_work_run+0x84/0xa0
exit_to_usermode_loop+0x7d/0x80
do_syscall_64+0x18b/0x190
entry_SYSCALL_64_after_hwframe+0x3d/0xa2
The overflowed pgoff value causes hugetlbfs to try to set up a
mapping with a negative range (end < start) that leaves invalid
state which causes the BUG.
The previous overflow fix to this code was incomplete and did not
take the remap_file_pages system call into account.
Fixes: 045c7a3f53d9 ("hugetlbfs: fix offset overflow in hugetlbfs mmap")
Cc: <stable(a)vger.kernel.org>
Reported-by: Nic Losby <blurbdust(a)gmail.com>
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
---
Changes in v3
* Use a simpler mask computation as suggested by Andrew Morton
Changes in v2
* Use bitmask for overflow check as suggested by Yisheng Xie
* Add explicit (from > to) check when setting up reservations
* Cc stable
fs/hugetlbfs/inode.c | 16 +++++++++++++---
mm/hugetlb.c | 6 ++++++
2 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 8fe1b0aa2896..e46117dc006a 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -108,6 +108,15 @@ static void huge_pagevec_release(struct pagevec *pvec)
pagevec_reinit(pvec);
}
+/*
+ * Mask used when checking the page offset value passed in via system
+ * calls. This value will be converted to a loff_t which is signed.
+ * Therefore, we want to check the upper PAGE_SHIFT + 1 bits of the
+ * value. The extra bit (- 1 in the shift value) is to take the sign
+ * bit into account.
+ */
+#define PGOFF_LOFFT_MAX (PAGE_MASK << (BITS_PER_LONG - (2 * PAGE_SHIFT) - 1))
+
static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct inode *inode = file_inode(file);
@@ -127,12 +136,13 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
vma->vm_ops = &hugetlb_vm_ops;
/*
- * Offset passed to mmap (before page shift) could have been
- * negative when represented as a (l)off_t.
+ * page based offset in vm_pgoff could be sufficiently large to
+ * overflow a (l)off_t when converted to byte offset.
*/
- if (((loff_t)vma->vm_pgoff << PAGE_SHIFT) < 0)
+ if (vma->vm_pgoff & PGOFF_LOFFT_MAX)
return -EINVAL;
+ /* must be huge page aligned */
if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
return -EINVAL;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7c204e3d132b..8eeade0a0b7a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4374,6 +4374,12 @@ int hugetlb_reserve_pages(struct inode *inode,
struct resv_map *resv_map;
long gbl_reserve;
+ /* This should never happen */
+ if (from > to) {
+ VM_WARN(1, "%s called with a negative range\n", __func__);
+ return -EINVAL;
+ }
+
/*
* Only apply hugepage reservation if asked. At fault time, an
* attempt will be made for VM_NORESERVE to allocate a page
--
2.13.6
This is the start of the stable review cycle for the 4.4.121 release.
There are 36 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Mon Mar 12 00:17:54 UTC 2018.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.4.121-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.4.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.4.121-rc1
Ernesto A. Fernández <ernesto.mnd.fernandez(a)gmail.com>
btrfs: preserve i_mode if __btrfs_set_acl() fails
Daniel Borkmann <daniel(a)iogearbox.net>
bpf, x64: implement retpoline for tail call
Mike Snitzer <snitzer(a)redhat.com>
dm io: fix duplicate bio completion due to missing ref count
Dan Williams <dan.j.williams(a)intel.com>
mpls, nospec: Sanitize array index in mpls_label_ok()
David Ahern <dsahern(a)gmail.com>
net: mpls: Pull common label check into helper
Alexey Kodanev <alexey.kodanev(a)oracle.com>
sctp: verify size of a new chunk in _sctp_make_chunk()
Julian Wiedmann <jwi(a)linux.vnet.ibm.com>
s390/qeth: fix IPA command submission race
Julian Wiedmann <jwi(a)linux.vnet.ibm.com>
s390/qeth: fix SETIP command handling
Alexey Kodanev <alexey.kodanev(a)oracle.com>
sctp: fix dst refcnt leak in sctp_v6_get_dst()
Tommi Rantala <tommi.t.rantala(a)nokia.com>
sctp: fix dst refcnt leak in sctp_v4_get_dst
Alexey Kodanev <alexey.kodanev(a)oracle.com>
udplite: fix partial checksum initialization
Guillaume Nault <g.nault(a)alphalink.fr>
ppp: prevent unregistered channels from connecting to PPP units
Nicolas Dichtel <nicolas.dichtel(a)6wind.com>
netlink: ensure to loop over all netns in genlmsg_multicast_allns()
Sabrina Dubroca <sd(a)queasysnail.net>
net: ipv4: don't allow setting net.ipv4.route.min_pmtu below 68
Jakub Kicinski <jakub.kicinski(a)netronome.com>
net: fix race on decreasing number of TX queues
Arnd Bergmann <arnd(a)arndb.de>
ipv6 sit: work around bogus gcc-8 -Wrestrict warning
Denis Du <dudenis2000(a)yahoo.ca>
hdlc_ppp: carrier detect ok, don't turn off negotiation
Stefano Brivio <sbrivio(a)redhat.com>
fib_semantics: Don't match route with mismatching tclassid
Xin Long <lucien.xin(a)gmail.com>
bridge: check brport attr show in brport_show
Ben Hutchings <ben.hutchings(a)codethink.co.uk>
Revert "led: core: Fix brightness setting when setting delay_off=0"
Dan Carpenter <dan.carpenter(a)oracle.com>
x86/spectre: Fix an error message
Nathan Sullivan <nathan.sullivan(a)ni.com>
leds: do not overflow sysfs buffer in led_trigger_show
Thomas Gleixner <tglx(a)linutronix.de>
x86/apic/vector: Handle legacy irq data correctly
Adam Ford <aford173(a)gmail.com>
ARM: dts: LogicPD Torpedo: Fix I2C1 pinmux
Jan Kara <jack(a)suse.cz>
btrfs: Don't clear SGID when inheriting ACLs
Jiri Slaby <jslaby(a)suse.cz>
x86/syscall: Sanitize syscall table de-references under speculation fix
Wanpeng Li <wanpeng.li(a)hotmail.com>
KVM: mmu: Fix overlap between public and private memslots
Ulf Magnusson <ulfalizer(a)gmail.com>
ARM: mvebu: Fix broken PL310_ERRATA_753970 selects
Rasmus Villemoes <linux(a)rasmusvillemoes.dk>
nospec: Allow index argument to have const-qualified type
Mauro Carvalho Chehab <mchehab(a)s-opensource.com>
media: m88ds3103: don't call a non-initalized function
Viresh Kumar <viresh.kumar(a)linaro.org>
cpufreq: s3c24xx: Fix broken s3c_cpufreq_init()
Hans de Goede <hdegoede(a)redhat.com>
ALSA: hda: Add a power_save blacklist
Erik Veijola <erik.veijola(a)gmail.com>
ALSA: usb-audio: Add a quirck for B&W PX headphones
Jeremy Boone <jeremy.boone(a)nccgroup.trust>
tpm_i2c_nuvoton: fix potential buffer overruns caused by bit glitches on the bus
Jeremy Boone <jeremy.boone(a)nccgroup.trust>
tpm_i2c_infineon: fix potential buffer overruns caused by bit glitches on the bus
Jeremy Boone <jeremy.boone(a)nccgroup.trust>
tpm: st33zp24: fix potential buffer overruns caused by bit glitches on the bus
-------------
Diffstat:
Makefile | 4 +--
arch/arm/boot/dts/logicpd-torpedo-som.dtsi | 8 +++++
arch/arm/mach-mvebu/Kconfig | 4 +--
arch/x86/entry/entry_64.S | 16 ++++++----
arch/x86/include/asm/nospec-branch.h | 37 +++++++++++++++++++++++
arch/x86/kernel/apic/vector.c | 14 ++++-----
arch/x86/kernel/cpu/bugs.c | 3 +-
arch/x86/net/bpf_jit_comp.c | 9 +++---
drivers/char/tpm/st33zp24/st33zp24.c | 4 +--
drivers/char/tpm/tpm_i2c_infineon.c | 5 ++--
drivers/char/tpm/tpm_i2c_nuvoton.c | 8 +++--
drivers/cpufreq/s3c24xx-cpufreq.c | 8 ++++-
drivers/leds/led-core.c | 2 +-
drivers/leds/led-triggers.c | 12 ++++----
drivers/md/dm-io.c | 1 +
drivers/media/dvb-frontends/m88ds3103.c | 7 +++--
drivers/net/ppp/ppp_generic.c | 9 ++++++
drivers/net/wan/hdlc_ppp.c | 5 +++-
drivers/s390/net/qeth_core.h | 5 ++++
drivers/s390/net/qeth_core_main.c | 33 +++++++++++----------
fs/btrfs/acl.c | 19 +++++++-----
include/linux/nospec.h | 3 +-
include/net/udplite.h | 1 +
net/bridge/br_sysfs_if.c | 3 ++
net/core/dev.c | 11 +++++--
net/ipv4/fib_semantics.c | 5 ++++
net/ipv4/route.c | 8 +++--
net/ipv4/udp.c | 5 ++++
net/ipv6/ip6_checksum.c | 5 ++++
net/ipv6/sit.c | 2 +-
net/mpls/af_mpls.c | 36 +++++++++++++----------
net/netlink/genetlink.c | 12 ++++++--
net/sctp/ipv6.c | 10 +++++--
net/sctp/protocol.c | 10 +++----
net/sctp/sm_make_chunk.c | 8 +++--
sound/pci/hda/hda_intel.c | 38 ++++++++++++++++++++++--
sound/usb/quirks-table.h | 47 ++++++++++++++++++++++++++++++
virt/kvm/kvm_main.c | 3 +-
38 files changed, 320 insertions(+), 100 deletions(-)