]> git.itanic.dy.fi Git - membench/blobdiff - membench.c
Vectorize memory transfers
[membench] / membench.c
index 7cad40b1af76c5950155602b4a9d435c1cca74d4..6167b6cb639928bee8a42746997e4b9f35ff2c53 100644 (file)
@@ -4,9 +4,9 @@
 
 #define lsize sizeof(unsigned long)
 
-long usec_diff(const struct timeval *a, const struct timeval *b)
+long long usec_diff(const struct timeval *a, const struct timeval *b)
 {
-       long usec_a, usec_b;
+       long long usec_a, usec_b;
        usec_a = a->tv_sec * 1000000 + a->tv_usec;
        usec_b = b->tv_sec * 1000000 + b->tv_usec;
        return usec_b - usec_a;
@@ -14,10 +14,13 @@ long usec_diff(const struct timeval *a, const struct timeval *b)
 
 int main(int argc, char *argv[])
 {
-       int iterations, j, k, count, size;
-       unsigned long *buf, i, tmp = 0;
+       int iterations, j, k, count, size, mask, latcount;
+       unsigned long *buf, i, tmp[8] = {0};
        struct timeval start, end;
 
+       printf("Benchmark sequential access bandwidth "
+               "and random access latency\n\n");
+
        if (argc > 1)
                iterations = atoi(argv[1]);
        else
@@ -26,12 +29,13 @@ int main(int argc, char *argv[])
        if (argc > 2)
                count = atoi(argv[2]) * 1024;
        else
-               count = 1024;
+               count = 1024 * 64;
+       latcount = count / 10;
 
        printf("Doing %d runs with buffer size goin up to %dk\n",
               iterations, 1 << (iterations - 10));
        printf("Running each round %d times\n", count);
-       printf("Size of unsigned long is %lu bits\n", lsize * 8);
+       printf("Size of unsigned long is %d bits\n\n", (int)lsize * 8);
 
        switch (lsize) {
        case 4:
@@ -45,9 +49,24 @@ int main(int argc, char *argv[])
                exit(1);
        }
 
+       printf("          |"
+              "         Read         |"
+              "         Write        |\n"
+              "          |"
+              "-----------------------"
+              "-----------------------\n"
+              "    Size  |"
+              "    MiB/s         ns  |"
+              "    MiB/s         ns  |\n"
+              "----------|"
+              "-----------------------"
+              "-----------------------\n"
+               );
+
        for (; j < iterations + 1; j++) {
                size = (1 << j) / lsize;
                buf = malloc(size * lsize);
+               mask = size - 1;
 
                if (buf == NULL) {
                        printf("Malloc failed\n");
@@ -58,40 +77,92 @@ int main(int argc, char *argv[])
                for (i = 0; i < size; i++)
                        buf[i] = i;
 
-               printf("%luk\t", size * lsize / 1024);
+               printf("%8dk |", (int)(size * lsize / 1024));
+
+               /* Read bandwidth*/
 
-               printf("read: ");
                fflush(stdout);
 
                gettimeofday(&start, 0);
                for (k = 0; k < count; k++) {
-                       buf[0] = tmp;
-                       for (i = 0; i < size; i++)
-                               tmp += buf[i];
+                       buf[0] = tmp[0];
+                       for (i = 0; i < size; i += 8) {
+                               tmp[0] += buf[i];
+                               tmp[1] += buf[i+1];
+                               tmp[2] += buf[i+2];
+                               tmp[3] += buf[i+3];
+                               tmp[4] += buf[i+4];
+                               tmp[5] += buf[i+5];
+                               tmp[6] += buf[i+6];
+                               tmp[7] += buf[i+7];
+                       }
                }
 
                gettimeofday(&end, 0);
 
-               printf("% 9.2f MB/s ", (double)(size * lsize) * 
+               printf("% 9.2f  ", (double)(size * lsize) * 
                       (1000000 / (1024.0 * 1024.0)) * count / 
                       (double) usec_diff(&start, &end));
 
+               fflush(stdout);
+
+               /* Read latency */
+
+               gettimeofday(&start, 0);
+               for (k = 0; k < latcount; k++) {
+                       buf[0] = tmp[0];
+                       for (i = 0; i < size; i++)
+                               tmp[0] += buf[random() & mask];
+               }
+
+               gettimeofday(&end, 0);
+
+               printf("% 9.3f  |", usec_diff(&start, &end) * 1000 / 
+                      ((double) (size * latcount )));
+
 
-               printf("write: ");
                fflush(stdout);
 
+               /* Write bandwidth */
+
                gettimeofday(&start, 0);
                for (k = 0; k < count; k++)
-                       for (i = 0; i < size; i++)
-                                buf[i] = i;
+                       for (i = 0; i < size; i += 8) {
+                               buf[i  ] = tmp[0];
+                               buf[i+1] = tmp[1];
+                               buf[i+2] = tmp[2];
+                               buf[i+3] = tmp[3];
+                               buf[i+4] = tmp[4];
+                               buf[i+5] = tmp[5];
+                               buf[i+6] = tmp[6];
+                               buf[i+7] = tmp[7];
+                       }
                gettimeofday(&end, 0);
 
-               printf("% 9.2f MB/s ", (double)(size * lsize) * 
+               printf("% 9.2f  ", (double)(size * lsize) * 
                       (1000000 / (1024.0 * 1024.0)) * count / 
                       (double) usec_diff(&start, &end));
        
+               fflush(stdout);
+
+               /* Write latency */
+
+               gettimeofday(&start, 0);
+               for (k = 0; k < latcount; k++)
+                       for (i = 0; i < size; i++)
+                               buf[random() % mask] = i;
+               gettimeofday(&end, 0);
+
+               printf("% 9.3f  |", usec_diff(&start, &end) * 1000 / 
+                      ((double) (size * latcount )));
+       
                printf("\n");
 
+               count /= 2;
+               count = count ? count : 1;
+               latcount = count / 10;
+               latcount = latcount ? latcount : 1;
+
                free(buf);
        }
        return 0;