From: Vincent Whitchurch vincent.whitchurch@axis.com
[ Upstream commit 1beaef29c34154ccdcb3f1ae557f6883eda18840 ]
For memcpy, the source pages are memset to zero only when --cycles is used. This leads to wildly different results with or without --cycles, since all sources pages are likely to be mapped to the same zero page without explicit writes.
Before this fix:
$ export cmd="./perf stat -e LLC-loads -- ./perf bench \ mem memcpy -s 1024MB -l 100 -f default" $ $cmd
2,935,826 LLC-loads 3.821677452 seconds time elapsed
$ $cmd --cycles
217,533,436 LLC-loads 8.616725985 seconds time elapsed
After this fix:
$ $cmd
214,459,686 LLC-loads 8.674301124 seconds time elapsed
$ $cmd --cycles
214,758,651 LLC-loads 8.644480006 seconds time elapsed
Fixes: 47b5757bac03c338 ("perf bench mem: Move boilerplate memory allocation to the infrastructure") Signed-off-by: Vincent Whitchurch vincent.whitchurch@axis.com Cc: Alexander Shishkin alexander.shishkin@linux.intel.com Cc: Jiri Olsa jolsa@redhat.com Cc: Mark Rutland mark.rutland@arm.com Cc: Namhyung Kim namhyung@kernel.org Cc: Peter Zijlstra peterz@infradead.org Cc: kernel@axis.com Link: http://lore.kernel.org/lkml/20200810133404.30829-1-vincent.whitchurch@axis.c... Signed-off-by: Arnaldo Carvalho de Melo acme@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- tools/perf/bench/mem-functions.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-)
diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c index 9235b76501be8..19d45c377ac18 100644 --- a/tools/perf/bench/mem-functions.c +++ b/tools/perf/bench/mem-functions.c @@ -223,12 +223,8 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info * return 0; }
-static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst) +static void memcpy_prefault(memcpy_t fn, size_t size, void *src, void *dst) { - u64 cycle_start = 0ULL, cycle_end = 0ULL; - memcpy_t fn = r->fn.memcpy; - int i; - /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */ memset(src, 0, size);
@@ -237,6 +233,15 @@ static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, vo * to not measure page fault overhead: */ fn(dst, src, size); +} + +static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst) +{ + u64 cycle_start = 0ULL, cycle_end = 0ULL; + memcpy_t fn = r->fn.memcpy; + int i; + + memcpy_prefault(fn, size, src, dst);
cycle_start = get_cycles(); for (i = 0; i < nr_loops; ++i) @@ -252,11 +257,7 @@ static double do_memcpy_gettimeofday(const struct function *r, size_t size, void memcpy_t fn = r->fn.memcpy; int i;
- /* - * We prefault the freshly allocated memory range here, - * to not measure page fault overhead: - */ - fn(dst, src, size); + memcpy_prefault(fn, size, src, dst);
BUG_ON(gettimeofday(&tv_start, NULL)); for (i = 0; i < nr_loops; ++i)