From: Timo Kokkonen Date: Fri, 8 Oct 2010 18:42:20 +0000 (+0300) Subject: Vectorize memory transfers X-Git-Url: http://git.itanic.dy.fi/?p=membench;a=commitdiff_plain Vectorize memory transfers Modern CPUs have instructions that can operate with several words of data at once. In order to allow compiler to take advantage of such instructions, the memory copies need to be taking place from array to array. This makes the memory copy less CPU bound. Signed-off-by: Timo Kokkonen --- diff --git a/membench.c b/membench.c index e99e6ea..6167b6c 100644 --- a/membench.c +++ b/membench.c @@ -15,7 +15,7 @@ long long usec_diff(const struct timeval *a, const struct timeval *b) int main(int argc, char *argv[]) { int iterations, j, k, count, size, mask, latcount; - unsigned long *buf, i, tmp = 0; + unsigned long *buf, i, tmp[8] = {0}; struct timeval start, end; printf("Benchmark sequential access bandwidth " @@ -85,9 +85,17 @@ int main(int argc, char *argv[]) gettimeofday(&start, 0); for (k = 0; k < count; k++) { - buf[0] = tmp; - for (i = 0; i < size; i++) - tmp += buf[i]; + buf[0] = tmp[0]; + for (i = 0; i < size; i += 8) { + tmp[0] += buf[i]; + tmp[1] += buf[i+1]; + tmp[2] += buf[i+2]; + tmp[3] += buf[i+3]; + tmp[4] += buf[i+4]; + tmp[5] += buf[i+5]; + tmp[6] += buf[i+6]; + tmp[7] += buf[i+7]; + } } gettimeofday(&end, 0); @@ -102,9 +110,9 @@ int main(int argc, char *argv[]) gettimeofday(&start, 0); for (k = 0; k < latcount; k++) { - buf[0] = tmp; + buf[0] = tmp[0]; for (i = 0; i < size; i++) - tmp += buf[random() & mask]; + tmp[0] += buf[random() & mask]; } gettimeofday(&end, 0); @@ -119,8 +127,16 @@ int main(int argc, char *argv[]) gettimeofday(&start, 0); for (k = 0; k < count; k++) - for (i = 0; i < size; i++) - buf[i] = i; + for (i = 0; i < size; i += 8) { + buf[i ] = tmp[0]; + buf[i+1] = tmp[1]; + buf[i+2] = tmp[2]; + buf[i+3] = tmp[3]; + buf[i+4] = tmp[4]; + buf[i+5] = tmp[5]; + buf[i+6] = tmp[6]; + buf[i+7] = tmp[7]; + } gettimeofday(&end, 0); printf("% 9.2f ", (double)(size * lsize) *