The following patches fix the swapfile page-to-sector mapping for block devices that implement rw_page for all the stable kernels.
This is related to the upstream fix of commit caf6912f3f4a ("swap: fix swapfile read/write offset"), but for kernels prior to v5.12-rc1 the bug only affects swapfiles that sit on top of block devices which provide a rw_page operation.
commit caf6912f3f4af7232340d500a4a2008f81b93f14 upstream.
Fix block device sector offset calculation for swap page io on top of blockdevs that provide a rw_page operation and do page-sized io directly (without the block layer).
Currently swap_page_sector() maps a swap page into a blockdev sector by obtaining the swap page offset (swap map slot), but ignores the swapfile starting offset into the blockdev.
In setups where swapfiles are sitting on top of a filesystem, this results into swapping out activity potentially overwriting filesystem blocks that fall outside the swapfile region.
[This issue only affects swapfiles on filesystems on top of blockdevs that implement rw_page ops (brd, zram, btt, pmem), and not on top of any other block devices, in contrast to the upstream commit fix.]
Fixes: dd6bd0d9c7db ("swap: use bdev_read_page() / bdev_write_page()") Cc: stable@vger.kernel.org # 4.14 4.19
Signed-off-by: Anthony Iliopoulos ailiop@suse.com --- mm/page_io.c | 11 +++-------- mm/swapfile.c | 2 +- 2 files changed, 4 insertions(+), 9 deletions(-)
diff --git a/mm/page_io.c b/mm/page_io.c index 08d2eae58fce..9b646f07f47f 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -38,7 +38,6 @@ static struct bio *get_swap_bio(gfp_t gfp_flags,
bio->bi_iter.bi_sector = map_swap_page(page, &bdev); bio_set_dev(bio, bdev); - bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9; bio->bi_end_io = end_io;
for (i = 0; i < nr; i++) @@ -262,11 +261,6 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) return ret; }
-static sector_t swap_page_sector(struct page *page) -{ - return (sector_t)__page_file_index(page) << (PAGE_SHIFT - 9); -} - static inline void count_swpout_vm_event(struct page *page) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -325,7 +319,8 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, return ret; }
- ret = bdev_write_page(sis->bdev, swap_page_sector(page), page, wbc); + ret = bdev_write_page(sis->bdev, map_swap_page(page, &sis->bdev), + page, wbc); if (!ret) { count_swpout_vm_event(page); return 0; @@ -376,7 +371,7 @@ int swap_readpage(struct page *page, bool synchronous) return ret; }
- ret = bdev_read_page(sis->bdev, swap_page_sector(page), page); + ret = bdev_read_page(sis->bdev, map_swap_page(page, &sis->bdev), page); if (!ret) { if (trylock_page(page)) { swap_slot_free_notify(page); diff --git a/mm/swapfile.c b/mm/swapfile.c index 130e2e41a48c..057e6907bf7b 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2305,7 +2305,7 @@ sector_t map_swap_page(struct page *page, struct block_device **bdev) { swp_entry_t entry; entry.val = page_private(page); - return map_swap_entry(entry, bdev); + return map_swap_entry(entry, bdev) << (PAGE_SHIFT - 9); }
/*
commit caf6912f3f4af7232340d500a4a2008f81b93f14 upstream.
Fix block device sector offset calculation for swap page io on top of blockdevs that provide a rw_page operation and do page-sized io directly (without the block layer).
Currently swap_page_sector() maps a swap page into a blockdev sector by obtaining the swap page offset (swap map slot), but ignores the swapfile starting offset into the blockdev.
In setups where swapfiles are sitting on top of a filesystem, this results into swapping out activity potentially overwriting filesystem blocks that fall outside the swapfile region.
[This issue only affects swapfiles on filesystems on top of blockdevs that implement rw_page ops (brd, zram, btt, pmem), and not on top of any other block devices, in contrast to the upstream commit fix.]
Fixes: dd6bd0d9c7db ("swap: use bdev_read_page() / bdev_write_page()") Cc: stable@vger.kernel.org # 4.4
Signed-off-by: Anthony Iliopoulos ailiop@suse.com --- mm/page_io.c | 11 +++-------- mm/swapfile.c | 2 +- 2 files changed, 4 insertions(+), 9 deletions(-)
diff --git a/mm/page_io.c b/mm/page_io.c index b995a5ba5e8f..ab92cd559404 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -32,7 +32,6 @@ static struct bio *get_swap_bio(gfp_t gfp_flags, bio = bio_alloc(gfp_flags, 1); if (bio) { bio->bi_iter.bi_sector = map_swap_page(page, &bio->bi_bdev); - bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9; bio->bi_end_io = end_io;
bio_add_page(bio, page, PAGE_SIZE, 0); @@ -244,11 +243,6 @@ out: return ret; }
-static sector_t swap_page_sector(struct page *page) -{ - return (sector_t)__page_file_index(page) << (PAGE_CACHE_SHIFT - 9); -} - int __swap_writepage(struct page *page, struct writeback_control *wbc, bio_end_io_t end_write_func) { @@ -297,7 +291,8 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, return ret; }
- ret = bdev_write_page(sis->bdev, swap_page_sector(page), page, wbc); + ret = bdev_write_page(sis->bdev, map_swap_page(page, &sis->bdev), + page, wbc); if (!ret) { count_vm_event(PSWPOUT); return 0; @@ -345,7 +340,7 @@ int swap_readpage(struct page *page) return ret; }
- ret = bdev_read_page(sis->bdev, swap_page_sector(page), page); + ret = bdev_read_page(sis->bdev, map_swap_page(page, &sis->bdev), page); if (!ret) { count_vm_event(PSWPIN); return 0; diff --git a/mm/swapfile.c b/mm/swapfile.c index 8e25ff2b693a..b338d8829239 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1653,7 +1653,7 @@ sector_t map_swap_page(struct page *page, struct block_device **bdev) { swp_entry_t entry; entry.val = page_private(page); - return map_swap_entry(entry, bdev); + return map_swap_entry(entry, bdev) << (PAGE_SHIFT - 9); }
/*
commit caf6912f3f4af7232340d500a4a2008f81b93f14 upstream.
Fix block device sector offset calculation for swap page io on top of blockdevs that provide a rw_page operation and do page-sized io directly (without the block layer).
Currently swap_page_sector() maps a swap page into a blockdev sector by obtaining the swap page offset (swap map slot), but ignores the swapfile starting offset into the blockdev.
In setups where swapfiles are sitting on top of a filesystem, this results into swapping out activity potentially overwriting filesystem blocks that fall outside the swapfile region.
[This issue only affects swapfiles on filesystems on top of blockdevs that implement rw_page ops (brd, zram, btt, pmem), and not on top of any other block devices, in contrast to the upstream commit fix.]
Fixes: dd6bd0d9c7db ("swap: use bdev_read_page() / bdev_write_page()") Cc: stable@vger.kernel.org # 4.9
Signed-off-by: Anthony Iliopoulos ailiop@suse.com --- mm/page_io.c | 11 +++-------- mm/swapfile.c | 2 +- 2 files changed, 4 insertions(+), 9 deletions(-)
diff --git a/mm/page_io.c b/mm/page_io.c index a2651f58c86a..ad0e0ce31090 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -32,7 +32,6 @@ static struct bio *get_swap_bio(gfp_t gfp_flags, bio = bio_alloc(gfp_flags, 1); if (bio) { bio->bi_iter.bi_sector = map_swap_page(page, &bio->bi_bdev); - bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9; bio->bi_end_io = end_io;
bio_add_page(bio, page, PAGE_SIZE, 0); @@ -252,11 +251,6 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) return ret; }
-static sector_t swap_page_sector(struct page *page) -{ - return (sector_t)__page_file_index(page) << (PAGE_SHIFT - 9); -} - int __swap_writepage(struct page *page, struct writeback_control *wbc, bio_end_io_t end_write_func) { @@ -306,7 +300,8 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, return ret; }
- ret = bdev_write_page(sis->bdev, swap_page_sector(page), page, wbc); + ret = bdev_write_page(sis->bdev, map_swap_page(page, &sis->bdev), + page, wbc); if (!ret) { count_vm_event(PSWPOUT); return 0; @@ -357,7 +352,7 @@ int swap_readpage(struct page *page) return ret; }
- ret = bdev_read_page(sis->bdev, swap_page_sector(page), page); + ret = bdev_read_page(sis->bdev, map_swap_page(page, &sis->bdev), page); if (!ret) { if (trylock_page(page)) { swap_slot_free_notify(page); diff --git a/mm/swapfile.c b/mm/swapfile.c index 855f62ab8c1b..8a0d969a6ebd 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1666,7 +1666,7 @@ sector_t map_swap_page(struct page *page, struct block_device **bdev) { swp_entry_t entry; entry.val = page_private(page); - return map_swap_entry(entry, bdev); + return map_swap_entry(entry, bdev) << (PAGE_SHIFT - 9); }
/*
commit caf6912f3f4af7232340d500a4a2008f81b93f14 upstream.
Fix block device sector offset calculation for swap page io on top of blockdevs that provide a rw_page operation and do page-sized io directly (without the block layer).
Currently swap_page_sector() maps a swap page into a blockdev sector by obtaining the swap page offset (swap map slot), but ignores the swapfile starting offset into the blockdev.
In setups where swapfiles are sitting on top of a filesystem, this results into swapping out activity potentially overwriting filesystem blocks that fall outside the swapfile region.
[This issue only affects swapfiles on filesystems on top of blockdevs that implement rw_page ops (brd, zram, btt, pmem), and not on top of any other block devices, in contrast to the upstream commit fix.]
Fixes: dd6bd0d9c7db ("swap: use bdev_read_page() / bdev_write_page()") Cc: stable@vger.kernel.org # 5.10+
Signed-off-by: Anthony Iliopoulos ailiop@suse.com --- mm/page_io.c | 12 ++++-------- mm/swapfile.c | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-)
diff --git a/mm/page_io.c b/mm/page_io.c index 433df1263349..1541c0d6ad6e 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -37,7 +37,6 @@ static struct bio *get_swap_bio(gfp_t gfp_flags,
bio->bi_iter.bi_sector = map_swap_page(page, &bdev); bio_set_dev(bio, bdev); - bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9; bio->bi_end_io = end_io;
bio_add_page(bio, page, thp_size(page), 0); @@ -273,11 +272,6 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) return ret; }
-static sector_t swap_page_sector(struct page *page) -{ - return (sector_t)__page_file_index(page) << (PAGE_SHIFT - 9); -} - static inline void count_swpout_vm_event(struct page *page) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -353,7 +347,8 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, return ret; }
- ret = bdev_write_page(sis->bdev, swap_page_sector(page), page, wbc); + ret = bdev_write_page(sis->bdev, map_swap_page(page, &sis->bdev), + page, wbc); if (!ret) { count_swpout_vm_event(page); return 0; @@ -412,7 +407,8 @@ int swap_readpage(struct page *page, bool synchronous) }
if (sis->flags & SWP_SYNCHRONOUS_IO) { - ret = bdev_read_page(sis->bdev, swap_page_sector(page), page); + ret = bdev_read_page(sis->bdev, map_swap_page(page, &sis->bdev), + page); if (!ret) { if (trylock_page(page)) { swap_slot_free_notify(page); diff --git a/mm/swapfile.c b/mm/swapfile.c index 16db9d1ebcbf..4adbb2a4a2ad 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2311,7 +2311,7 @@ sector_t map_swap_page(struct page *page, struct block_device **bdev) { swp_entry_t entry; entry.val = page_private(page); - return map_swap_entry(entry, bdev); + return map_swap_entry(entry, bdev) << (PAGE_SHIFT - 9); }
/*
On Thu, Mar 04, 2021 at 04:08:24PM +0100, Anthony Iliopoulos wrote:
commit caf6912f3f4af7232340d500a4a2008f81b93f14 upstream.
No, this does not look like that commit.
Why can I not just take caf6912f3f4a ("swap: fix swapfile read/write offset") directly for 5.10 and 5.11? WHat has changed to prevent that?
thanks,
greg k-h
On Thu, Mar 04, 2021 at 04:16:26PM +0100, Greg Kroah-Hartman wrote:
On Thu, Mar 04, 2021 at 04:08:24PM +0100, Anthony Iliopoulos wrote:
commit caf6912f3f4af7232340d500a4a2008f81b93f14 upstream.
No, this does not look like that commit.
Why can I not just take caf6912f3f4a ("swap: fix swapfile read/write offset") directly for 5.10 and 5.11? WHat has changed to prevent that?
You're right of course, the upstream fix applies even on v5.4 so you could just take it directly for those branches if this is preferable.
Regards, Anthony
On Thu, Mar 04, 2021 at 05:30:00PM +0100, Anthony Iliopoulos wrote:
On Thu, Mar 04, 2021 at 04:16:26PM +0100, Greg Kroah-Hartman wrote:
On Thu, Mar 04, 2021 at 04:08:24PM +0100, Anthony Iliopoulos wrote:
commit caf6912f3f4af7232340d500a4a2008f81b93f14 upstream.
No, this does not look like that commit.
Why can I not just take caf6912f3f4a ("swap: fix swapfile read/write offset") directly for 5.10 and 5.11? WHat has changed to prevent that?
You're right of course, the upstream fix applies even on v5.4 so you could just take it directly for those branches if this is preferable.
But, that commit says it fixes 48d15436fde6 ("mm: remove get_swap_bio"), which is NOT what you are saying here in these patches.
So which is it? Is there a problem in 5.11 and older kernels (48d15436fde6 ("mm: remove get_swap_bio") showed up in 5.12-rc1), that requires this fix, or is there nothing needed to be backported?
As a note, I've been running swapfiles on 5.11 and earlier just fine for a very long time now, so is this really an issue?
confused,
greg k-h
On Thu, Mar 04, 2021 at 05:58:49PM +0100, Greg Kroah-Hartman wrote:
On Thu, Mar 04, 2021 at 05:30:00PM +0100, Anthony Iliopoulos wrote:
On Thu, Mar 04, 2021 at 04:16:26PM +0100, Greg Kroah-Hartman wrote:
On Thu, Mar 04, 2021 at 04:08:24PM +0100, Anthony Iliopoulos wrote:
commit caf6912f3f4af7232340d500a4a2008f81b93f14 upstream.
No, this does not look like that commit.
Why can I not just take caf6912f3f4a ("swap: fix swapfile read/write offset") directly for 5.10 and 5.11? WHat has changed to prevent that?
You're right of course, the upstream fix applies even on v5.4 so you could just take it directly for those branches if this is preferable.
But, that commit says it fixes 48d15436fde6 ("mm: remove get_swap_bio"), which is NOT what you are saying here in these patches.
It is admittedly a bit confusing as the upstream commit fixes two issues in one swoop:
- the bug which was introduced in v5.12-rc1 via 48d15436fde6 ("mm: remove get_swap_bio"), which affected swapfiles running on regular block devices, in addition to:
- an identical bug which up until 48d15436fde6 was only applicable to swapfiles on top of blockdevs that can do page io without the block layer, which was introduced with dd6bd0d9c7db ("swap: use bdev_read_page() / bdev_write_page()")
So which is it? Is there a problem in 5.11 and older kernels (48d15436fde6 ("mm: remove get_swap_bio") showed up in 5.12-rc1), that requires this fix, or is there nothing needed to be backported?
The second point/bug mentioned above is present on 5.11 and all older kernels, so some form of this fix is required.
As a note, I've been running swapfiles on 5.11 and earlier just fine for a very long time now, so is this really an issue?
Yes there is an issue on all kernels since v3.16-rc1 when dd6bd0d9c7db was introduced, but it is applicable only to setups with swapfiles on filesystems sitting on top of brd, zram, btt or pmem.
I can trivially reproduce this e.g. on v5.11 by creating a swapfile on top of a zram or pmem blockdev and pushing the system to swap out pages, at which point it corrupts filesystem blocks that don't belong to the swapfile.
Regards, Anthony
On Thu, Mar 04, 2021 at 10:17:08PM +0100, Anthony Iliopoulos wrote:
On Thu, Mar 04, 2021 at 05:58:49PM +0100, Greg Kroah-Hartman wrote:
On Thu, Mar 04, 2021 at 05:30:00PM +0100, Anthony Iliopoulos wrote:
On Thu, Mar 04, 2021 at 04:16:26PM +0100, Greg Kroah-Hartman wrote:
On Thu, Mar 04, 2021 at 04:08:24PM +0100, Anthony Iliopoulos wrote:
commit caf6912f3f4af7232340d500a4a2008f81b93f14 upstream.
No, this does not look like that commit.
Why can I not just take caf6912f3f4a ("swap: fix swapfile read/write offset") directly for 5.10 and 5.11? WHat has changed to prevent that?
You're right of course, the upstream fix applies even on v5.4 so you could just take it directly for those branches if this is preferable.
But, that commit says it fixes 48d15436fde6 ("mm: remove get_swap_bio"), which is NOT what you are saying here in these patches.
It is admittedly a bit confusing as the upstream commit fixes two issues in one swoop:
the bug which was introduced in v5.12-rc1 via 48d15436fde6 ("mm: remove get_swap_bio"), which affected swapfiles running on regular block devices, in addition to:
an identical bug which up until 48d15436fde6 was only applicable to swapfiles on top of blockdevs that can do page io without the block layer, which was introduced with dd6bd0d9c7db ("swap: use bdev_read_page() / bdev_write_page()")
So which is it? Is there a problem in 5.11 and older kernels (48d15436fde6 ("mm: remove get_swap_bio") showed up in 5.12-rc1), that requires this fix, or is there nothing needed to be backported?
The second point/bug mentioned above is present on 5.11 and all older kernels, so some form of this fix is required.
As a note, I've been running swapfiles on 5.11 and earlier just fine for a very long time now, so is this really an issue?
Yes there is an issue on all kernels since v3.16-rc1 when dd6bd0d9c7db was introduced, but it is applicable only to setups with swapfiles on filesystems sitting on top of brd, zram, btt or pmem.
I can trivially reproduce this e.g. on v5.11 by creating a swapfile on top of a zram or pmem blockdev and pushing the system to swap out pages, at which point it corrupts filesystem blocks that don't belong to the swapfile.
Ok, thanks for the detailed description, all now queued up.
greg k-h
commit caf6912f3f4af7232340d500a4a2008f81b93f14 upstream.
Fix block device sector offset calculation for swap page io on top of blockdevs that provide a rw_page operation and do page-sized io directly (without the block layer).
Currently swap_page_sector() maps a swap page into a blockdev sector by obtaining the swap page offset (swap map slot), but ignores the swapfile starting offset into the blockdev.
In setups where swapfiles are sitting on top of a filesystem, this results into swapping out activity potentially overwriting filesystem blocks that fall outside the swapfile region.
[This issue only affects swapfiles on filesystems on top of blockdevs that implement rw_page ops (brd, zram, btt, pmem), and not on top of any other block devices, in contrast to the upstream commit fix.]
Fixes: dd6bd0d9c7db ("swap: use bdev_read_page() / bdev_write_page()") Cc: stable@vger.kernel.org # 5.4
Signed-off-by: Anthony Iliopoulos ailiop@suse.com --- mm/page_io.c | 11 +++-------- mm/swapfile.c | 2 +- 2 files changed, 4 insertions(+), 9 deletions(-)
diff --git a/mm/page_io.c b/mm/page_io.c index 60a66a58b9bf..f03dca3f43d9 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -37,7 +37,6 @@ static struct bio *get_swap_bio(gfp_t gfp_flags,
bio->bi_iter.bi_sector = map_swap_page(page, &bdev); bio_set_dev(bio, bdev); - bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9; bio->bi_end_io = end_io;
bio_add_page(bio, page, PAGE_SIZE * hpage_nr_pages(page), 0); @@ -260,11 +259,6 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) return ret; }
-static sector_t swap_page_sector(struct page *page) -{ - return (sector_t)__page_file_index(page) << (PAGE_SHIFT - 9); -} - static inline void count_swpout_vm_event(struct page *page) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -323,7 +317,8 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, return ret; }
- ret = bdev_write_page(sis->bdev, swap_page_sector(page), page, wbc); + ret = bdev_write_page(sis->bdev, map_swap_page(page, &sis->bdev), + page, wbc); if (!ret) { count_swpout_vm_event(page); return 0; @@ -374,7 +369,7 @@ int swap_readpage(struct page *page, bool synchronous) return ret; }
- ret = bdev_read_page(sis->bdev, swap_page_sector(page), page); + ret = bdev_read_page(sis->bdev, map_swap_page(page, &sis->bdev), page); if (!ret) { if (trylock_page(page)) { swap_slot_free_notify(page); diff --git a/mm/swapfile.c b/mm/swapfile.c index 7947633d3ced..2434387194e6 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2275,7 +2275,7 @@ sector_t map_swap_page(struct page *page, struct block_device **bdev) { swp_entry_t entry; entry.val = page_private(page); - return map_swap_entry(entry, bdev); + return map_swap_entry(entry, bdev) << (PAGE_SHIFT - 9); }
/*
linux-stable-mirror@lists.linaro.org