From: Timo Kokkonen Date: Fri, 8 Oct 2010 18:42:20 +0000 (+0300) Subject: Vectorize memory transfers X-Git-Url: http://git.itanic.dy.fi/?p=membench;a=commitdiff_plain;h=HEAD;hp=7a82a5bfb95c27a658e1ee3ed3b893aa46a25e0b Vectorize memory transfers Modern CPUs have instructions that can operate with several words of data at once. In order to allow compiler to take advantage of such instructions, the memory copies need to be taking place from array to array. This makes the memory copy less CPU bound. Signed-off-by: Timo Kokkonen --- diff --git a/membench.c b/membench.c index e99e6ea..6167b6c 100644 --- a/membench.c +++ b/membench.c @@ -15,7 +15,7 @@ long long usec_diff(const struct timeval *a, const struct timeval *b) int main(int argc, char *argv[]) { int iterations, j, k, count, size, mask, latcount; - unsigned long *buf, i, tmp = 0; + unsigned long *buf, i, tmp[8] = {0}; struct timeval start, end; printf("Benchmark sequential access bandwidth " @@ -85,9 +85,17 @@ int main(int argc, char *argv[]) gettimeofday(&start, 0); for (k = 0; k < count; k++) { - buf[0] = tmp; - for (i = 0; i < size; i++) - tmp += buf[i]; + buf[0] = tmp[0]; + for (i = 0; i < size; i += 8) { + tmp[0] += buf[i]; + tmp[1] += buf[i+1]; + tmp[2] += buf[i+2]; + tmp[3] += buf[i+3]; + tmp[4] += buf[i+4]; + tmp[5] += buf[i+5]; + tmp[6] += buf[i+6]; + tmp[7] += buf[i+7]; + } } gettimeofday(&end, 0); @@ -102,9 +110,9 @@ int main(int argc, char *argv[]) gettimeofday(&start, 0); for (k = 0; k < latcount; k++) { - buf[0] = tmp; + buf[0] = tmp[0]; for (i = 0; i < size; i++) - tmp += buf[random() & mask]; + tmp[0] += buf[random() & mask]; } gettimeofday(&end, 0); @@ -119,8 +127,16 @@ int main(int argc, char *argv[]) gettimeofday(&start, 0); for (k = 0; k < count; k++) - for (i = 0; i < size; i++) - buf[i] = i; + for (i = 0; i < size; i += 8) { + buf[i ] = tmp[0]; + buf[i+1] = tmp[1]; + buf[i+2] = tmp[2]; + buf[i+3] = tmp[3]; + buf[i+4] = tmp[4]; + buf[i+5] = tmp[5]; + buf[i+6] = tmp[6]; + buf[i+7] = tmp[7]; + } gettimeofday(&end, 0); printf("% 9.2f ", (double)(size * lsize) *