numactl --interleave=all ./testing_cgeqrf -N 100 -N 1000 --range 10:90:10 --range 100:900:100 --range 1000:9000:1000 --range 10000:20000:2000
MAGMA 1.6.0  compiled for CUDA capability >= 3.5
CUDA runtime 7000, driver 7000. OpenMP threads 16. MKL 11.2.3, MKL threads 16. 
device 0: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 1: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 2: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
Usage: ./testing_cgeqrf [options] [-h|--help]

ngpu 1
    M     N   CPU GFlop/s (sec)   GPU GFlop/s (sec)   ||R||_F / ||A||_F
=======================================================================
  100   100     ---   (  ---  )      2.53 (   0.00)     ---
 1000  1000     ---   (  ---  )    200.33 (   0.03)     ---
   10    10     ---   (  ---  )      0.11 (   0.00)     ---
   20    20     ---   (  ---  )      0.60 (   0.00)     ---
   30    30     ---   (  ---  )      1.48 (   0.00)     ---
   40    40     ---   (  ---  )      2.56 (   0.00)     ---
   50    50     ---   (  ---  )      3.45 (   0.00)     ---
   60    60     ---   (  ---  )      4.57 (   0.00)     ---
   70    70     ---   (  ---  )      1.65 (   0.00)     ---
   80    80     ---   (  ---  )      2.46 (   0.00)     ---
   90    90     ---   (  ---  )      3.21 (   0.00)     ---
  100   100     ---   (  ---  )      4.29 (   0.00)     ---
  200   200     ---   (  ---  )     15.56 (   0.00)     ---
  300   300     ---   (  ---  )     33.84 (   0.00)     ---
  400   400     ---   (  ---  )     54.11 (   0.01)     ---
  500   500     ---   (  ---  )     78.57 (   0.01)     ---
  600   600     ---   (  ---  )    122.03 (   0.01)     ---
  700   700     ---   (  ---  )    146.82 (   0.01)     ---
  800   800     ---   (  ---  )    184.33 (   0.01)     ---
  900   900     ---   (  ---  )    211.19 (   0.02)     ---
 1000  1000     ---   (  ---  )    245.28 (   0.02)     ---
 2000  2000     ---   (  ---  )    636.57 (   0.07)     ---
 3000  3000     ---   (  ---  )   1023.85 (   0.14)     ---
 4000  4000     ---   (  ---  )   1380.78 (   0.25)     ---
 5000  5000     ---   (  ---  )   1525.60 (   0.44)     ---
 6000  6000     ---   (  ---  )   1796.98 (   0.64)     ---
 7000  7000     ---   (  ---  )   1953.79 (   0.94)     ---
 8000  8000     ---   (  ---  )   2092.46 (   1.31)     ---
 9000  9000     ---   (  ---  )   2176.85 (   1.79)     ---
10000 10000     ---   (  ---  )   2225.92 (   2.40)     ---
12000 12000     ---   (  ---  )   2276.70 (   4.05)     ---
14000 14000     ---   (  ---  )   2386.88 (   6.13)     ---
16000 16000     ---   (  ---  )   2410.47 (   9.06)     ---
18000 18000     ---   (  ---  )   2423.50 (  12.84)     ---
20000 20000     ---   (  ---  )   2463.29 (  17.32)     ---

numactl --interleave=all ./testing_cgeqrf_gpu -N 100 -N 1000 --range 10:90:10 --range 100:900:100 --range 1000:9000:1000 --range 10000:20000:2000
MAGMA 1.6.0  compiled for CUDA capability >= 3.5
CUDA runtime 7000, driver 7000. OpenMP threads 16. MKL 11.2.3, MKL threads 16. 
device 0: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 1: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 2: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
Usage: ./testing_cgeqrf_gpu [options] [-h|--help]

version 1
    M     N   CPU GFlop/s (sec)   GPU GFlop/s (sec)   ||Ax-b||_F/(N*||A||_F*||x||_F)
====================================================================================
  100   100     ---   (  ---  )      3.13 (   0.00)     ---
 1000  1000     ---   (  ---  )    227.70 (   0.02)     ---
   10    10     ---   (  ---  )      0.01 (   0.00)     ---
   20    20     ---   (  ---  )      0.05 (   0.00)     ---
   30    30     ---   (  ---  )      0.17 (   0.00)     ---
   40    40     ---   (  ---  )      0.34 (   0.00)     ---
   50    50     ---   (  ---  )      0.63 (   0.00)     ---
   60    60     ---   (  ---  )      1.03 (   0.00)     ---
   70    70     ---   (  ---  )      1.18 (   0.00)     ---
   80    80     ---   (  ---  )      1.74 (   0.00)     ---
   90    90     ---   (  ---  )      2.58 (   0.00)     ---
  100   100     ---   (  ---  )      6.99 (   0.00)     ---
  200   200     ---   (  ---  )     14.59 (   0.00)     ---
  300   300     ---   (  ---  )     33.09 (   0.00)     ---
  400   400     ---   (  ---  )     53.73 (   0.01)     ---
  500   500     ---   (  ---  )     81.98 (   0.01)     ---
  600   600     ---   (  ---  )    109.58 (   0.01)     ---
  700   700     ---   (  ---  )    141.41 (   0.01)     ---
  800   800     ---   (  ---  )    172.23 (   0.02)     ---
  900   900     ---   (  ---  )    202.47 (   0.02)     ---
 1000  1000     ---   (  ---  )    237.28 (   0.02)     ---
 2000  2000     ---   (  ---  )    426.74 (   0.10)     ---
 3000  3000     ---   (  ---  )    848.40 (   0.17)     ---
 4000  4000     ---   (  ---  )   1218.24 (   0.28)     ---
 5000  5000     ---   (  ---  )   1410.25 (   0.47)     ---
 6000  6000     ---   (  ---  )   1683.70 (   0.68)     ---
 7000  7000     ---   (  ---  )   1878.09 (   0.97)     ---
 8000  8000     ---   (  ---  )   2008.94 (   1.36)     ---
 9000  9000     ---   (  ---  )   2109.85 (   1.84)     ---
10000 10000     ---   (  ---  )   2179.43 (   2.45)     ---
12000 12000     ---   (  ---  )   2288.47 (   4.03)     ---
14000 14000     ---   (  ---  )   2330.71 (   6.28)     ---
16000 16000     ---   (  ---  )   2389.16 (   9.14)     ---
18000 18000     ---   (  ---  )   2418.19 (  12.86)     ---
20000 20000     ---   (  ---  )   2455.86 (  17.38)     ---
