From c0f6ff51ae3dbeae5ba4ee0fea0b2c151759f02a Mon Sep 17 00:00:00 2001 From: Timo Kokkonen Date: Fri, 8 Oct 2010 21:42:20 +0300 Subject: [PATCH] Vectorize memory transfers Modern CPUs have instructions that can operate with several words of data at once. In order to allow compiler to take advantage of such instructions, the memory copies need to be taking place from array to array. This makes the memory copy less CPU bound. Signed-off-by: Timo Kokkonen --- membench.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/membench.c b/membench.c index e99e6ea..6167b6c 100644 --- a/membench.c +++ b/membench.c @@ -15,7 +15,7 @@ long long usec_diff(const struct timeval *a, const struct timeval *b) int main(int argc, char *argv[]) { int iterations, j, k, count, size, mask, latcount; - unsigned long *buf, i, tmp = 0; + unsigned long *buf, i, tmp[8] = {0}; struct timeval start, end; printf("Benchmark sequential access bandwidth " @@ -85,9 +85,17 @@ int main(int argc, char *argv[]) gettimeofday(&start, 0); for (k = 0; k < count; k++) { - buf[0] = tmp; - for (i = 0; i < size; i++) - tmp += buf[i]; + buf[0] = tmp[0]; + for (i = 0; i < size; i += 8) { + tmp[0] += buf[i]; + tmp[1] += buf[i+1]; + tmp[2] += buf[i+2]; + tmp[3] += buf[i+3]; + tmp[4] += buf[i+4]; + tmp[5] += buf[i+5]; + tmp[6] += buf[i+6]; + tmp[7] += buf[i+7]; + } } gettimeofday(&end, 0); @@ -102,9 +110,9 @@ int main(int argc, char *argv[]) gettimeofday(&start, 0); for (k = 0; k < latcount; k++) { - buf[0] = tmp; + buf[0] = tmp[0]; for (i = 0; i < size; i++) - tmp += buf[random() & mask]; + tmp[0] += buf[random() & mask]; } gettimeofday(&end, 0); @@ -119,8 +127,16 @@ int main(int argc, char *argv[]) gettimeofday(&start, 0); for (k = 0; k < count; k++) - for (i = 0; i < size; i++) - buf[i] = i; + for (i = 0; i < size; i += 8) { + buf[i ] = tmp[0]; + buf[i+1] = tmp[1]; + buf[i+2] = tmp[2]; + buf[i+3] = tmp[3]; + buf[i+4] = tmp[4]; + buf[i+5] = tmp[5]; + buf[i+6] = tmp[6]; + buf[i+7] = tmp[7]; + } gettimeofday(&end, 0); printf("% 9.2f ", (double)(size * lsize) * -- 2.44.0