To accommodate varying hardware performance and use cases,
the default kunit test case timeout (currently 300 seconds)
is now configurable. Users can adjust the timeout by
either setting the 'timeout' module parameter or the
KUNIT_DEFAULT_TIMEOUT Kconfig option to their desired
timeout in seconds.
Signed-off-by: Marie Zhussupova <marievic(a)google.com>
---
lib/kunit/Kconfig | 13 +++++++++++++
lib/kunit/test.c | 15 ++++++++-------
2 files changed, 21 insertions(+), 7 deletions(-)
diff --git a/lib/kunit/Kconfig b/lib/kunit/Kconfig
index a97897edd964..c10ede4b1d22 100644
--- a/lib/kunit/Kconfig
+++ b/lib/kunit/Kconfig
@@ -93,4 +93,17 @@ config KUNIT_AUTORUN_ENABLED
In most cases this should be left as Y. Only if additional opt-in
behavior is needed should this be set to N.
+config KUNIT_DEFAULT_TIMEOUT
+ int "Default value of the timeout module parameter"
+ default 300
+ help
+ Sets the default timeout, in seconds, for Kunit test cases. This value
+ is further multiplied by a factor determined by the assigned speed
+ setting: 1x for `DEFAULT`, 3x for `KUNIT_SPEED_SLOW`, and 12x for
+ `KUNIT_SPEED_VERY_SLOW`. This allows slower tests on slower machines
+ sufficient time to complete.
+
+ If unsure, the default timeout of 300 seconds is suitable for most
+ cases.
+
endif # KUNIT
diff --git a/lib/kunit/test.c b/lib/kunit/test.c
index 002121675605..f3c6b11f12b8 100644
--- a/lib/kunit/test.c
+++ b/lib/kunit/test.c
@@ -69,6 +69,13 @@ static bool enable_param;
module_param_named(enable, enable_param, bool, 0);
MODULE_PARM_DESC(enable, "Enable KUnit tests");
+/*
+ * Configure the base timeout.
+ */
+static unsigned long kunit_base_timeout = CONFIG_KUNIT_DEFAULT_TIMEOUT;
+module_param_named(timeout, kunit_base_timeout, ulong, 0644);
+MODULE_PARM_DESC(timeout, "Set the base timeout for Kunit test cases");
+
/*
* KUnit statistic mode:
* 0 - disabled
@@ -393,12 +400,6 @@ static int kunit_timeout_mult(enum kunit_speed speed)
static unsigned long kunit_test_timeout(struct kunit_suite *suite, struct kunit_case *test_case)
{
int mult = 1;
- /*
- * TODO: Make the default (base) timeout configurable, so that users with
- * particularly slow or fast machines can successfully run tests, while
- * still taking advantage of the relative speed.
- */
- unsigned long default_timeout = 300;
/*
* The default test timeout is 300 seconds and will be adjusted by mult
@@ -409,7 +410,7 @@ static unsigned long kunit_test_timeout(struct kunit_suite *suite, struct kunit_
mult = kunit_timeout_mult(suite->attr.speed);
if (test_case->attr.speed != KUNIT_SPEED_UNSET)
mult = kunit_timeout_mult(test_case->attr.speed);
- return mult * default_timeout * msecs_to_jiffies(MSEC_PER_SEC);
+ return mult * kunit_base_timeout * msecs_to_jiffies(MSEC_PER_SEC);
}
--
2.50.0.rc2.761.g2dc52ea45b-goog
A few selftest harness changes being merged to v6.16, which exposed some
bugs and vulnerabilities in the iommufd selftest code. Fix them properly.
Note that the patch fixing the build warnings at mfd is not ideal, as it
has possibly hit some corner case in the gcc:
https://lore.kernel.org/all/aEi8DV+ReF3v3Rlf@nvidia.com/
This is on github:
https://github.com/nicolinc/iommufd/commits/iommufd_selftest_fixes-v6.16
Changelog:
v2
* Add "Reviewed-by" from Jason
* Only use kfree() in the teardown()
* Add an mmap_buffer_size for readability
v1
https://lore.kernel.org/all/cover.1750049883.git.nicolinc@nvidia.com/
Thanks
Nicolin
Nicolin Chen (4):
iommufd/selftest: Fix iommufd_dirty_tracking with large hugepage sizes
iommufd/selftest: Add missing close(mfd) in memfd_mmap()
iommufd/selftest: Add asserts testing global mfd
iommufd/selftest: Fix build warnings due to uninitialized mfd
tools/testing/selftests/iommu/iommufd_utils.h | 9 ++++-
tools/testing/selftests/iommu/iommufd.c | 40 ++++++++++++++-----
2 files changed, 36 insertions(+), 13 deletions(-)
--
2.43.0
This patch series fixes some of the false positives in generic
mm selftests and skips tests that cannot run correctly due to
missing features or system limitations.
Please let us know if you have any feedback.
Thanks,
Aboorva
Aboorva Devarajan (2):
selftests/mm: Fix child process exit codes in KSM tests
selftests/mm: Mark thuge-gen as skipped if shmmax is too small or no
1G pages
Donet Tom (4):
mm/selftests: Fix virtual_address_range test issues.
selftest/mm: Fix ksm_funtional_test failures
selftests/mm : fix test_prctl_fork_exec failure
mm/selftests: Fix split_huge_page_test failure on systems with 64KB
page size
.../selftests/mm/ksm_functional_tests.c | 24 +++++++++++++------
.../selftests/mm/split_huge_page_test.c | 23 ++++++++++++++----
tools/testing/selftests/mm/thuge-gen.c | 11 +++++----
.../selftests/mm/virtual_address_range.c | 14 +++--------
4 files changed, 45 insertions(+), 27 deletions(-)
--
2.43.5
Add support for SuperH/"sh" to nolibc.
Only sh4 is tested for now.
This is only tested on QEMU so far.
Additional testing would be very welcome.
Signed-off-by: Thomas Weißschuh <linux(a)weissschuh.net>
---
Thomas Weißschuh (3):
selftests/nolibc: fix EXTRACONFIG variables ordering
selftests/nolibc: use file driver for QEMU serial
tools/nolibc: add support for SuperH
tools/include/nolibc/arch-sh.h | 162 ++++++++++++++++++++++++++++
tools/include/nolibc/arch.h | 2 +
tools/testing/selftests/nolibc/Makefile | 15 ++-
tools/testing/selftests/nolibc/run-tests.sh | 3 +-
4 files changed, 177 insertions(+), 5 deletions(-)
---
base-commit: 6275a61db2f0586b8a5d651dfc7b4aacf9d0b2d6
change-id: 20250528-nolibc-sh-8b4e3bb8efcb
Best regards,
--
Thomas Weißschuh <linux(a)weissschuh.net>
Reading /proc/pid/maps requires read-locking mmap_lock which prevents any
other task from concurrently modifying the address space. This guarantees
coherent reporting of virtual address ranges, however it can block
important updates from happening. Oftentimes /proc/pid/maps readers are
low priority monitoring tasks and them blocking high priority tasks
results in priority inversion.
Locking the entire address space is required to present fully coherent
picture of the address space, however even current implementation does not
strictly guarantee that by outputting vmas in page-size chunks and
dropping mmap_lock in between each chunk. Address space modifications are
possible while mmap_lock is dropped and userspace reading the content is
expected to deal with possible concurrent address space modifications.
Considering these relaxed rules, holding mmap_lock is not strictly needed
as long as we can guarantee that a concurrently modified vma is reported
either in its original form or after it was modified.
This patchset switches from holding mmap_lock while reading /proc/pid/maps
to taking per-vma locks as we walk the vma tree. This reduces the
contention with tasks modifying the address space because they would have
to contend for the same vma as opposed to the entire address space. Same
is done for PROCMAP_QUERY ioctl which locks only the vma that fell into
the requested range instead of the entire address space. Previous version
of this patchset [1] tried to perform /proc/pid/maps reading under RCU,
however its implementation is quite complex and the results are worse than
the new version because it still relied on mmap_lock speculation which
retries if any part of the address space gets modified. New implementaion
is both simpler and results in less contention. Note that similar approach
would not work for /proc/pid/smaps reading as it also walks the page table
and that's not RCU-safe.
Paul McKenney's designed a test [2] to measure mmap/munmap latencies while
concurrently reading /proc/pid/maps. The test has a pair of processes
scanning /proc/PID/maps, and another process unmapping and remapping 4K
pages from a 128MB range of anonymous memory. At the end of each 10
second run, the latency of each mmap() or munmap() operation is measured,
and for each run the maximum and mean latency is printed. The map/unmap
process is started first, its PID is passed to the scanners, and then the
map/unmap process waits until both scanners are running before starting
its timed test. The scanners keep scanning until the specified
/proc/PID/maps file disappears. This test registered close to 10x
improvement in update latencies:
Before the change:
./run-proc-vs-map.sh --nsamples 100 --rawdata -- --busyduration 2
0.011 0.008 0.455
0.011 0.008 0.472
0.011 0.008 0.535
0.011 0.009 0.545
...
0.011 0.014 2.875
0.011 0.014 2.913
0.011 0.014 3.007
0.011 0.015 3.018
After the change:
./run-proc-vs-map.sh --nsamples 100 --rawdata -- --busyduration 2
0.006 0.005 0.036
0.006 0.005 0.039
0.006 0.005 0.039
0.006 0.005 0.039
...
0.006 0.006 0.403
0.006 0.006 0.474
0.006 0.006 0.479
0.006 0.006 0.498
The patchset also adds a number of tests to check for /proc/pid/maps data
coherency. They are designed to detect any unexpected data tearing while
performing some common address space modifications (vma split, resize and
remap). Even before these changes, reading /proc/pid/maps might have
inconsistent data because the file is read page-by-page with mmap_lock
being dropped between the pages. An example of user-visible inconsistency
can be that the same vma is printed twice: once before it was modified and
then after the modifications. For example if vma was extended, it might be
found and reported twice. What is not expected is to see a gap where there
should have been a vma both before and after modification. This patchset
increases the chances of such tearing, therefore it's even more important
now to test for unexpected inconsistencies.
In [3] Lorenzo identified the following possible vma merging/splitting
scenarios:
Merges with changes to existing vmas:
1 Merge both - mapping a vma over another one and between two vmas which
can be merged after this replacement;
2. Merge left full - mapping a vma at the end of an existing one and
completely over its right neighbor;
3. Merge left partial - mapping a vma at the end of an existing one and
partially over its right neighbor;
4. Merge right full - mapping a vma before the start of an existing one
and completely over its left neighbor;
5. Merge right partial - mapping a vma before the start of an existing one
and partially over its left neighbor;
Merges without changes to existing vmas:
6. Merge both - mapping a vma into a gap between two vmas which can be
merged after the insertion;
7. Merge left - mapping a vma at the end of an existing one;
8. Merge right - mapping a vma before the start end of an existing one;
Splits
9. Split with new vma at the lower address;
10. Split with new vma at the higher address;
If such merges or splits happen concurrently with the /proc/maps reading
we might report a vma twice, once before the modification and once after
it is modified:
Case 1 might report overwritten and previous vma along with the final
merged vma;
Case 2 might report previous and the final merged vma;
Case 3 might cause us to retry once we detect the temporary gap caused by
shrinking of the right neighbor;
Case 4 might report overritten and the final merged vma;
Case 5 might cause us to retry once we detect the temporary gap caused by
shrinking of the left neighbor;
Case 6 might report previous vma and the gap along with the final marged
vma;
Case 7 might report previous and the final merged vma;
Case 8 might report the original gap and the final merged vma covering the
gap;
Case 9 might cause us to retry once we detect the temporary gap caused by
shrinking of the original vma at the vma start;
Case 10 might cause us to retry once we detect the temporary gap caused by
shrinking of the original vma at the vma end;
In all these cases the retry mechanism prevents us from reporting possible
temporary gaps.
Changes from v4 [4]:
- refactored trylock_vma() and other locking parts into mmap_lock.c, per
Lorenzo
- renamed {lock|unlock}_content() into {lock|unlock}_vma_range(), per
Lorenzo
- added clarifying comments for sentinels, per Lorenzo
- introduced is_sentinel_pos() helper function
- fixed position reset logic when last_addr is a sentinel, per Lorenzo
- added Acked-by to the last patch, per Andrii Nakryiko
[1] https://lore.kernel.org/all/20250418174959.1431962-1-surenb@google.com/
[2] https://github.com/paulmckrcu/proc-mmap_sem-test
[3] https://lore.kernel.org/all/e1863f40-39ab-4e5b-984a-c48765ffde1c@lucifer.lo…
[4] https://lore.kernel.org/all/20250604231151.799834-1-surenb@google.com/
Suren Baghdasaryan (7):
selftests/proc: add /proc/pid/maps tearing from vma split test
selftests/proc: extend /proc/pid/maps tearing test to include vma
resizing
selftests/proc: extend /proc/pid/maps tearing test to include vma
remapping
selftests/proc: test PROCMAP_QUERY ioctl while vma is concurrently
modified
selftests/proc: add verbose more for tests to facilitate debugging
mm/maps: read proc/pid/maps under per-vma lock
mm/maps: execute PROCMAP_QUERY ioctl under per-vma locks
fs/proc/internal.h | 5 +
fs/proc/task_mmu.c | 179 ++++-
include/linux/mmap_lock.h | 11 +
mm/mmap_lock.c | 88 +++
tools/testing/selftests/proc/proc-pid-vm.c | 793 ++++++++++++++++++++-
5 files changed, 1053 insertions(+), 23 deletions(-)
base-commit: 0b2a863368fb0cf674b40925c55dc8898c5a33af
--
2.50.0.714.g196bf9f422-goog
eOn Tue, Jun 24, 2025 at 11:45:09AM +0530, Dev Jain wrote:
>
> On 23/06/25 11:02 pm, Donet Tom wrote:
> > On Mon, Jun 23, 2025 at 10:23:02AM +0530, Dev Jain wrote:
> > > On 21/06/25 11:25 pm, Donet Tom wrote:
> > > > On Fri, Jun 20, 2025 at 08:15:25PM +0530, Dev Jain wrote:
> > > > > On 19/06/25 1:53 pm, Donet Tom wrote:
> > > > > > On Wed, Jun 18, 2025 at 08:13:54PM +0530, Dev Jain wrote:
> > > > > > > On 18/06/25 8:05 pm, Lorenzo Stoakes wrote:
> > > > > > > > On Wed, Jun 18, 2025 at 07:47:18PM +0530, Dev Jain wrote:
> > > > > > > > > On 18/06/25 7:37 pm, Lorenzo Stoakes wrote:
> > > > > > > > > > On Wed, Jun 18, 2025 at 07:28:16PM +0530, Dev Jain wrote:
> > > > > > > > > > > On 18/06/25 5:27 pm, Lorenzo Stoakes wrote:
> > > > > > > > > > > > On Wed, Jun 18, 2025 at 05:15:50PM +0530, Dev Jain wrote:
> > > > > > > > > > > > Are you accounting for sys.max_map_count? If not, then you'll be hitting that
> > > > > > > > > > > > first.
> > > > > > > > > > > run_vmtests.sh will run the test in overcommit mode so that won't be an issue.
> > > > > > > > > > Umm, what? You mean overcommit all mode, and that has no bearing on the max
> > > > > > > > > > mapping count check.
> > > > > > > > > >
> > > > > > > > > > In do_mmap():
> > > > > > > > > >
> > > > > > > > > > /* Too many mappings? */
> > > > > > > > > > if (mm->map_count > sysctl_max_map_count)
> > > > > > > > > > return -ENOMEM;
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > As well as numerous other checks in mm/vma.c.
> > > > > > > > > Ah sorry, didn't look at the code properly just assumed that overcommit_always meant overriding
> > > > > > > > > this.
> > > > > > > > No problem! It's hard to be aware of everything in mm :)
> > > > > > > >
> > > > > > > > > > I'm not sure why an overcommit toggle is even necessary when you could use
> > > > > > > > > > MAP_NORESERVE or simply map PROT_NONE to avoid the OVERCOMMIT_GUESS limits?
> > > > > > > > > >
> > > > > > > > > > I'm pretty confused as to what this test is really achieving honestly. This
> > > > > > > > > > isn't a useful way of asserting mmap() behaviour as far as I can tell.
> > > > > > > > > Well, seems like a useful way to me at least : ) Not sure if you are in the mood
> > > > > > > > > to discuss that but if you'd like me to explain from start to end what the test
> > > > > > > > > is doing, I can do that : )
> > > > > > > > >
> > > > > > > > I just don't have time right now, I guess I'll have to come back to it
> > > > > > > > later... it's not the end of the world for it to be iffy in my view as long as
> > > > > > > > it passes, but it might just not be of great value.
> > > > > > > >
> > > > > > > > Philosophically I'd rather we didn't assert internal implementation details like
> > > > > > > > where we place mappings in userland memory. At no point do we promise to not
> > > > > > > > leave larger gaps if we feel like it :)
> > > > > > > You have a fair point. Anyhow a debate for another day.
> > > > > > >
> > > > > > > > I'm guessing, reading more, the _real_ test here is some mathematical assertion
> > > > > > > > about layout from HIGH_ADDR_SHIFT -> end of address space when using hints.
> > > > > > > >
> > > > > > > > But again I'm not sure that achieves much and again also is asserting internal
> > > > > > > > implementation details.
> > > > > > > >
> > > > > > > > Correct behaviour of this kind of thing probably better belongs to tests in the
> > > > > > > > userland VMA testing I'd say.
> > > > > > > >
> > > > > > > > Sorry I don't mean to do down work you've done before, just giving an honest
> > > > > > > > technical appraisal!
> > > > > > > Nah, it will be rather hilarious to see it all go down the drain xD
> > > > > > >
> > > > > > > > Anyway don't let this block work to fix the test if it's failing. We can revisit
> > > > > > > > this later.
> > > > > > > Sure. @Aboorva and Donet, I still believe that the correct approach is to elide
> > > > > > > the gap check at the crossing boundary. What do you think?
> > > > > > >
> > > > > > One problem I am seeing with this approach is that, since the hint address
> > > > > > is generated randomly, the VMAs are also being created at randomly based on
> > > > > > the hint address.So, for the VMAs created at high addresses, we cannot guarantee
> > > > > > that the gaps between them will be aligned to MAP_CHUNK_SIZE.
> > > > > >
> > > > > > High address VMAs
> > > > > > -----------------
> > > > > > 1000000000000-1000040000000 r--p 00000000 00:00 0
> > > > > > 2000000000000-2000040000000 r--p 00000000 00:00 0
> > > > > > 4000000000000-4000040000000 r--p 00000000 00:00 0
> > > > > > 8000000000000-8000040000000 r--p 00000000 00:00 0
> > > > > > e80009d260000-fffff9d260000 r--p 00000000 00:00 0
> > > > > >
> > > > > > I have a different approach to solve this issue.
> > > > > It is really weird that such a large amount of VA space
> > > > > is left between the two VMAs yet mmap is failing.
> > > > >
> > > > >
> > > > >
> > > > > Can you please do the following:
> > > > > set /proc/sys/vm/max_map_count to the highest value possible.
> > > > > If running without run_vmtests.sh, set /proc/sys/vm/overcommit_memory to 1.
> > > > > In validate_complete_va_space:
> > > > >
> > > > > if (start_addr >= HIGH_ADDR_MARK && found == false) {
> > > > > found = true;
> > > > > continue;
> > > > > }
> > > > Thanks Dev for the suggestion. I set max_map_count and set overcommit
> > > > memory to 1, added this code change as well, and then tried. Still, the
> > > > test is failing
> > > >
> > > > > where found is initialized to false. This will skip the check
> > > > > for the boundary.
> > > > >
> > > > > After this can you tell whether the test is still failing.
> > > > >
> > > > > Also can you give me the complete output of proc/pid/maps
> > > > > after putting a sleep at the end of the test.
> > > > >
> > > > on powerpc support DEFAULT_MAP_WINDOW is 128TB and with
> > > > total address space size is 4PB With hint it can map upto
> > > > 4PB. Since the hint addres is random in this test random hing VMAs
> > > > are getting created. IIUC this is expected only.
> > > >
> > > >
> > > > 10000000-10010000 r-xp 00000000 fd:05 134226638 /home/donet/linux/tools/testing/selftests/mm/virtual_address_range
> > > > 10010000-10020000 r--p 00000000 fd:05 134226638 /home/donet/linux/tools/testing/selftests/mm/virtual_address_range
> > > > 10020000-10030000 rw-p 00010000 fd:05 134226638 /home/donet/linux/tools/testing/selftests/mm/virtual_address_range
> > > > 30000000-10030000000 r--p 00000000 00:00 0 [anon:virtual_address_range]
> > > > 10030770000-100307a0000 rw-p 00000000 00:00 0 [heap]
> > > > 1004f000000-7fff8f000000 r--p 00000000 00:00 0 [anon:virtual_address_range]
> > > > 7fff8faf0000-7fff8fe00000 rw-p 00000000 00:00 0
> > > > 7fff8fe00000-7fff90030000 r-xp 00000000 fd:00 792355 /usr/lib64/libc.so.6
> > > > 7fff90030000-7fff90040000 r--p 00230000 fd:00 792355 /usr/lib64/libc.so.6
> > > > 7fff90040000-7fff90050000 rw-p 00240000 fd:00 792355 /usr/lib64/libc.so.6
> > > > 7fff90050000-7fff90130000 r-xp 00000000 fd:00 792358 /usr/lib64/libm.so.6
> > > > 7fff90130000-7fff90140000 r--p 000d0000 fd:00 792358 /usr/lib64/libm.so.6
> > > > 7fff90140000-7fff90150000 rw-p 000e0000 fd:00 792358 /usr/lib64/libm.so.6
> > > > 7fff90160000-7fff901a0000 r--p 00000000 00:00 0 [vvar]
> > > > 7fff901a0000-7fff901b0000 r-xp 00000000 00:00 0 [vdso]
> > > > 7fff901b0000-7fff90200000 r-xp 00000000 fd:00 792351 /usr/lib64/ld64.so.2
> > > > 7fff90200000-7fff90210000 r--p 00040000 fd:00 792351 /usr/lib64/ld64.so.2
> > > > 7fff90210000-7fff90220000 rw-p 00050000 fd:00 792351 /usr/lib64/ld64.so.2
> > > > 7fffc9770000-7fffc9880000 rw-p 00000000 00:00 0 [stack]
> > > > 1000000000000-1000040000000 r--p 00000000 00:00 0 [anon:virtual_address_range]
> > > > 2000000000000-2000040000000 r--p 00000000 00:00 0 [anon:virtual_address_range]
> > > > 4000000000000-4000040000000 r--p 00000000 00:00 0 [anon:virtual_address_range]
> > > > 8000000000000-8000040000000 r--p 00000000 00:00 0 [anon:virtual_address_range]
> > > > eb95410220000-fffff90220000 r--p 00000000 00:00 0 [anon:virtual_address_range]
> > > >
> > > >
> > > >
> > > >
> > > > If I give the hint address serially from 128TB then the address
> > > > space is contigous and gap is also MAP_SIZE, the test is passing.
> > > >
> > > > 10000000-10010000 r-xp 00000000 fd:05 134226638 /home/donet/linux/tools/testing/selftests/mm/virtual_address_range
> > > > 10010000-10020000 r--p 00000000 fd:05 134226638 /home/donet/linux/tools/testing/selftests/mm/virtual_address_range
> > > > 10020000-10030000 rw-p 00010000 fd:05 134226638 /home/donet/linux/tools/testing/selftests/mm/virtual_address_range
> > > > 33000000-10033000000 r--p 00000000 00:00 0 [anon:virtual_address_range]
> > > > 10033380000-100333b0000 rw-p 00000000 00:00 0 [heap]
> > > > 1006f0f0000-10071000000 rw-p 00000000 00:00 0
> > > > 10071000000-7fffb1000000 r--p 00000000 00:00 0 [anon:virtual_address_range]
> > > > 7fffb15d0000-7fffb1800000 r-xp 00000000 fd:00 792355 /usr/lib64/libc.so.6
> > > > 7fffb1800000-7fffb1810000 r--p 00230000 fd:00 792355 /usr/lib64/libc.so.6
> > > > 7fffb1810000-7fffb1820000 rw-p 00240000 fd:00 792355 /usr/lib64/libc.so.6
> > > > 7fffb1820000-7fffb1900000 r-xp 00000000 fd:00 792358 /usr/lib64/libm.so.6
> > > > 7fffb1900000-7fffb1910000 r--p 000d0000 fd:00 792358 /usr/lib64/libm.so.6
> > > > 7fffb1910000-7fffb1920000 rw-p 000e0000 fd:00 792358 /usr/lib64/libm.so.6
> > > > 7fffb1930000-7fffb1970000 r--p 00000000 00:00 0 [vvar]
> > > > 7fffb1970000-7fffb1980000 r-xp 00000000 00:00 0 [vdso]
> > > > 7fffb1980000-7fffb19d0000 r-xp 00000000 fd:00 792351 /usr/lib64/ld64.so.2
> > > > 7fffb19d0000-7fffb19e0000 r--p 00040000 fd:00 792351 /usr/lib64/ld64.so.2
> > > > 7fffb19e0000-7fffb19f0000 rw-p 00050000 fd:00 792351 /usr/lib64/ld64.so.2
> > > > 7fffc5470000-7fffc5580000 rw-p 00000000 00:00 0 [stack]
> > > > 800000000000-2aab000000000 r--p 00000000 00:00 0 [anon:virtual_address_range]
> > > >
> > > >
> > > Thank you for this output. I can't wrap my head around why this behaviour changes
> > > when you generate the hint sequentially. The mmap() syscall is supposed to do the
> > > following (irrespective of high VA space or not) - if the allocation at the hint
> > Yes, it is working as expected. On PowerPC, the DEFAULT_MAP_WINDOW is
> > 128TB, and the system can map up to 4PB.
> >
> > In the test, the first mmap call maps memory up to 128TB without any
> > hint, so the VMAs are created below the 128TB boundary.
> >
> > In the second mmap call, we provide a hint starting from 256TB, and
> > the hint address is generated randomly above 256TB. The mappings are
> > correctly created at these hint addresses. Since the hint addresses
> > are random, the resulting VMAs are also created at random locations.
> >
> > So, what I tried is: mapping from 0 to 128TB without any hint, and
> > then for the second mmap, instead of starting the hint from 256TB, I
> > started from 128TB. Instead of using random hint addresses, I used
> > sequential hint addresses from 128TB up to 512TB. With this change,
> > the VMAs are created in order, and the test passes.
> >
> > 800000000000-2aab000000000 r--p 00000000 00:00 0 128TB to 512TB VMA
> >
> > I think we will see same behaviour on x86 with X86_FEATURE_LA57.
> >
> > I will send the updated patch in V2.
>
> Since you say it fails on both radix and hash, it means that the generic
> code path is failing. I see that on my system, when I run the test with
> LPA2 config, write() fails with errno set to -ENOMEM. Can you apply
> the following diff and check whether the test fails still. Doing this
> fixed it for arm64.
>
> diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c
>
> index b380e102b22f..3032902d01f2 100644
>
> --- a/tools/testing/selftests/mm/virtual_address_range.c
>
> +++ b/tools/testing/selftests/mm/virtual_address_range.c
>
> @@ -173,10 +173,6 @@ static int validate_complete_va_space(void)
>
> */
>
> hop = 0;
>
> while (start_addr + hop < end_addr) {
>
> - if (write(fd, (void *)(start_addr + hop), 1) != 1)
>
> - return 1;
>
> - lseek(fd, 0, SEEK_SET);
>
> -
>
> if (is_marked_vma(vma_name))
>
> munmap((char *)(start_addr + hop), MAP_CHUNK_SIZE);
>
Even with this change, the test is still failing. In this case,
we are allocating physical memory and writing into it, but our
issue seems to be with the gap between VMAs, so I believe this
might not be directly related.
I will send the next revision where the test passes and no
issues are observed
Just curious — with LPA2, is the second mmap() call successful?
And are the VMAs being created at the hint address as expected?
> >
> > > addr succeeds, then all is well, otherwise, do a top-down search for a large
> > > enough gap. I am not aware of the nuances in powerpc but I really am suspecting
> > > a bug in powerpc mmap code. Can you try to do some tracing - which function
> > > eventually fails to find the empty gap?
> > >
> > > Through my limited code tracing - we should end up in slice_find_area_topdown,
> > > then we ask the generic code to find the gap using vm_unmapped_area. So I
> > > suspect something is happening between this, probably slice_scan_available().
> > >
> > > > > > From 0 to 128TB, we map memory directly without using any hint. For the range above
> > > > > > 256TB up to 512TB, we perform the mapping using hint addresses. In the current test,
> > > > > > we use random hint addresses, but I have modified it to generate hint addresses linearly
> > > > > > starting from 128TB.
> > > > > >
> > > > > > With this change:
> > > > > >
> > > > > > The 0–128TB range is mapped without hints and verified accordingly.
> > > > > >
> > > > > > The 128TB–512TB range is mapped using linear hint addresses and then verified.
> > > > > >
> > > > > > Below are the VMAs obtained with this approach:
> > > > > >
> > > > > > 10000000-10010000 r-xp 00000000 fd:05 135019531
> > > > > > 10010000-10020000 r--p 00000000 fd:05 135019531
> > > > > > 10020000-10030000 rw-p 00010000 fd:05 135019531
> > > > > > 20000000-10020000000 r--p 00000000 00:00 0
> > > > > > 10020800000-10020830000 rw-p 00000000 00:00 0
> > > > > > 1004bcf0000-1004c000000 rw-p 00000000 00:00 0
> > > > > > 1004c000000-7fff8c000000 r--p 00000000 00:00 0
> > > > > > 7fff8c130000-7fff8c360000 r-xp 00000000 fd:00 792355
> > > > > > 7fff8c360000-7fff8c370000 r--p 00230000 fd:00 792355
> > > > > > 7fff8c370000-7fff8c380000 rw-p 00240000 fd:00 792355
> > > > > > 7fff8c380000-7fff8c460000 r-xp 00000000 fd:00 792358
> > > > > > 7fff8c460000-7fff8c470000 r--p 000d0000 fd:00 792358
> > > > > > 7fff8c470000-7fff8c480000 rw-p 000e0000 fd:00 792358
> > > > > > 7fff8c490000-7fff8c4d0000 r--p 00000000 00:00 0
> > > > > > 7fff8c4d0000-7fff8c4e0000 r-xp 00000000 00:00 0
> > > > > > 7fff8c4e0000-7fff8c530000 r-xp 00000000 fd:00 792351
> > > > > > 7fff8c530000-7fff8c540000 r--p 00040000 fd:00 792351
> > > > > > 7fff8c540000-7fff8c550000 rw-p 00050000 fd:00 792351
> > > > > > 7fff8d000000-7fffcd000000 r--p 00000000 00:00 0
> > > > > > 7fffe9c80000-7fffe9d90000 rw-p 00000000 00:00 0
> > > > > > 800000000000-2000000000000 r--p 00000000 00:00 0 -> High Address (128TB to 512TB)
> > > > > >
> > > > > > diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c
> > > > > > index 4c4c35eac15e..0be008cba4b0 100644
> > > > > > --- a/tools/testing/selftests/mm/virtual_address_range.c
> > > > > > +++ b/tools/testing/selftests/mm/virtual_address_range.c
> > > > > > @@ -56,21 +56,21 @@
> > > > > > #ifdef __aarch64__
> > > > > > #define HIGH_ADDR_MARK ADDR_MARK_256TB
> > > > > > -#define HIGH_ADDR_SHIFT 49
> > > > > > +#define HIGH_ADDR_SHIFT 48
> > > > > > #define NR_CHUNKS_LOW NR_CHUNKS_256TB
> > > > > > #define NR_CHUNKS_HIGH NR_CHUNKS_3840TB
> > > > > > #else
> > > > > > #define HIGH_ADDR_MARK ADDR_MARK_128TB
> > > > > > -#define HIGH_ADDR_SHIFT 48
> > > > > > +#define HIGH_ADDR_SHIFT 47
> > > > > > #define NR_CHUNKS_LOW NR_CHUNKS_128TB
> > > > > > #define NR_CHUNKS_HIGH NR_CHUNKS_384TB
> > > > > > #endif
> > > > > > -static char *hint_addr(void)
> > > > > > +static char *hint_addr(int hint)
> > > > > > {
> > > > > > - int bits = HIGH_ADDR_SHIFT + rand() % (63 - HIGH_ADDR_SHIFT);
> > > > > > + unsigned long addr = ((1UL << HIGH_ADDR_SHIFT) + (hint * MAP_CHUNK_SIZE));
> > > > > > - return (char *) (1UL << bits);
> > > > > > + return (char *) (addr);
> > > > > > }
> > > > > > static void validate_addr(char *ptr, int high_addr)
> > > > > > @@ -217,7 +217,7 @@ int main(int argc, char *argv[])
> > > > > > }
> > > > > > for (i = 0; i < NR_CHUNKS_HIGH; i++) {
> > > > > > - hint = hint_addr();
> > > > > > + hint = hint_addr(i);
> > > > > > hptr[i] = mmap(hint, MAP_CHUNK_SIZE, PROT_READ,
> > > > > > MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> > > > > >
> > > > > >
> > > > > >
> > > > > > Can we fix it this way?