]> git.itanic.dy.fi Git - membench/commitdiff
Vectorize memory transfers master
authorTimo Kokkonen <kaapeli@itanic.dy.fi>
Fri, 8 Oct 2010 18:42:20 +0000 (21:42 +0300)
committerTimo Kokkonen <kaapeli@itanic.dy.fi>
Fri, 8 Oct 2010 18:42:20 +0000 (21:42 +0300)
Modern CPUs have instructions that can operate with several words of
data at once. In order to allow compiler to take advantage of such
instructions, the memory copies need to be taking place from array to
array. This makes the memory copy less CPU bound.

Signed-off-by: Timo Kokkonen <kaapeli@itanic.dy.fi>
membench.c

index e99e6ea047ff2b0512e73b8497b5184d6427b4f5..6167b6cb639928bee8a42746997e4b9f35ff2c53 100644 (file)
@@ -15,7 +15,7 @@ long long usec_diff(const struct timeval *a, const struct timeval *b)
 int main(int argc, char *argv[])
 {
        int iterations, j, k, count, size, mask, latcount;
-       unsigned long *buf, i, tmp = 0;
+       unsigned long *buf, i, tmp[8] = {0};
        struct timeval start, end;
 
        printf("Benchmark sequential access bandwidth "
@@ -85,9 +85,17 @@ int main(int argc, char *argv[])
 
                gettimeofday(&start, 0);
                for (k = 0; k < count; k++) {
-                       buf[0] = tmp;
-                       for (i = 0; i < size; i++)
-                               tmp += buf[i];
+                       buf[0] = tmp[0];
+                       for (i = 0; i < size; i += 8) {
+                               tmp[0] += buf[i];
+                               tmp[1] += buf[i+1];
+                               tmp[2] += buf[i+2];
+                               tmp[3] += buf[i+3];
+                               tmp[4] += buf[i+4];
+                               tmp[5] += buf[i+5];
+                               tmp[6] += buf[i+6];
+                               tmp[7] += buf[i+7];
+                       }
                }
 
                gettimeofday(&end, 0);
@@ -102,9 +110,9 @@ int main(int argc, char *argv[])
 
                gettimeofday(&start, 0);
                for (k = 0; k < latcount; k++) {
-                       buf[0] = tmp;
+                       buf[0] = tmp[0];
                        for (i = 0; i < size; i++)
-                               tmp += buf[random() & mask];
+                               tmp[0] += buf[random() & mask];
                }
 
                gettimeofday(&end, 0);
@@ -119,8 +127,16 @@ int main(int argc, char *argv[])
 
                gettimeofday(&start, 0);
                for (k = 0; k < count; k++)
-                       for (i = 0; i < size; i++)
-                                buf[i] = i;
+                       for (i = 0; i < size; i += 8) {
+                               buf[i  ] = tmp[0];
+                               buf[i+1] = tmp[1];
+                               buf[i+2] = tmp[2];
+                               buf[i+3] = tmp[3];
+                               buf[i+4] = tmp[4];
+                               buf[i+5] = tmp[5];
+                               buf[i+6] = tmp[6];
+                               buf[i+7] = tmp[7];
+                       }
                gettimeofday(&end, 0);
 
                printf("% 9.2f  ", (double)(size * lsize) *