logoalt Hacker News

wang_liyesterday at 4:53 PM0 repliesview on HN

Yes, all the M-series have more cores, they often have better thermal management, and they have more memory bandwidth. (The the Neo still has crazy high bandwidth.) But, for a single threaded, strictly compute task that runs in 10 seconds, it outperforms the M4 cores. I don't know why, I'm just sharing my experience.

The actual code I am using for this is:

    #include <stdio.h>
    #include <sys/time.h>

    int gettimeofday(struct timeval *tp, void *tzp);

    int main(int argc, char *argv[]) {
      double xmin, xmax;
      double ymin, ymax;
      double x, y, xs, ys;
      int max_iter;
      int i, px, py;
      int width, height;
      volatile double wx, wy, t;
      double start, now;
      struct timeval tv;
      int count;

      xmin = -2.0;
      ymin = -1.5;
      xmax = 1.0;
      ymax = 1.5;

      max_iter = 1000;
      width = 200;
      height = 200;

      xs = (xmax - xmin) / (double) width;
      ys = (ymax - ymin) / (double) height;

      gettimeofday(&tv, NULL);
      start = (tv.tv_sec * 1000000.0 + tv.tv_usec) / 1000000.0;
      count = 0;
      now = start;
      while (now - start < 10.0 /* && count == 0 */) {
        for (y=ymin, py = 0; py < height; py++, y += ys) {
          for (x=xmin, px = 0; px < width; px++, x += xs) {
            wx = 0.0;
            wy = 0.0;
            for (i=0; i < max_iter && (wx * wx + wy * wy) < 4; i++) {
              t = wx * wx - wy * wy + x;
              wy = 2 * wx * wy + y;
              wx = t;
            }
          }
        }
        gettimeofday(&tv, NULL);
        now = (tv.tv_sec * 1000000.0 + tv.tv_usec) / 1000000.0;
        count ++;
      }
      printf("%d iterations in %.2f seconds.\n", count, now - start);
    }