numactl --interleave=all ./testing_dgeqrf -N 100 -N 1000 --range 10:90:10 --range 100:900:100 --range 1000:9000:1000 --range 10000:20000:2000
MAGMA 1.6.1  compiled for CUDA capability >= 3.5
CUDA runtime 7000, driver 7000. OpenMP threads 16. MKL 11.2.3, MKL threads 16. 
ndevices 3
device 0: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 1: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 2: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
Usage: ./testing_dgeqrf [options] [-h|--help]

ngpu 1
    M     N   CPU GFlop/s (sec)   GPU GFlop/s (sec)   |R - Q^H*A|   |I - Q^H*Q|
===============================================================================
  100   100     ---   (  ---  )      0.33 (   0.00)       ---
 1000  1000     ---   (  ---  )     69.55 (   0.02)       ---
   10    10     ---   (  ---  )      0.03 (   0.00)       ---
   20    20     ---   (  ---  )      0.17 (   0.00)       ---
   30    30     ---   (  ---  )      0.46 (   0.00)       ---
   40    40     ---   (  ---  )      0.84 (   0.00)       ---
   50    50     ---   (  ---  )      1.25 (   0.00)       ---
   60    60     ---   (  ---  )      1.61 (   0.00)       ---
   70    70     ---   (  ---  )      0.47 (   0.00)       ---
   80    80     ---   (  ---  )      0.71 (   0.00)       ---
   90    90     ---   (  ---  )      0.96 (   0.00)       ---
  100   100     ---   (  ---  )      1.24 (   0.00)       ---
  200   200     ---   (  ---  )      4.54 (   0.00)       ---
  300   300     ---   (  ---  )     10.16 (   0.00)       ---
  400   400     ---   (  ---  )     16.67 (   0.01)       ---
  500   500     ---   (  ---  )     24.70 (   0.01)       ---
  600   600     ---   (  ---  )     32.95 (   0.01)       ---
  700   700     ---   (  ---  )     42.56 (   0.01)       ---
  800   800     ---   (  ---  )     51.44 (   0.01)       ---
  900   900     ---   (  ---  )     59.56 (   0.02)       ---
 1000  1000     ---   (  ---  )     71.06 (   0.02)       ---
 2000  2000     ---   (  ---  )    186.61 (   0.06)       ---
 3000  3000     ---   (  ---  )    305.99 (   0.12)       ---
 4000  4000     ---   (  ---  )    378.07 (   0.23)       ---
 5000  5000     ---   (  ---  )    483.83 (   0.34)       ---
 6000  6000     ---   (  ---  )    584.61 (   0.49)       ---
 7000  7000     ---   (  ---  )    707.00 (   0.65)       ---
 8000  8000     ---   (  ---  )    733.32 (   0.93)       ---
 9000  9000     ---   (  ---  )    779.32 (   1.25)       ---
10000 10000     ---   (  ---  )    840.03 (   1.59)       ---
12000 12000     ---   (  ---  )    901.33 (   2.56)       ---
14000 14000     ---   (  ---  )    947.07 (   3.86)       ---
16000 16000     ---   (  ---  )    962.32 (   5.68)       ---
18000 18000     ---   (  ---  )    999.23 (   7.78)       ---
20000 20000     ---   (  ---  )   1014.13 (  10.52)       ---

numactl --interleave=all ./testing_dgeqrf_gpu -N 100 -N 1000 --range 10:90:10 --range 100:900:100 --range 1000:9000:1000 --range 10000:20000:2000
MAGMA 1.6.1  compiled for CUDA capability >= 3.5
CUDA runtime 7000, driver 7000. OpenMP threads 16. MKL 11.2.3, MKL threads 16. 
ndevices 3
device 0: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 1: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
device 2: Tesla K40c, 745.0 MHz clock, 11519.6 MB memory, capability 3.5
Usage: ./testing_dgeqrf_gpu [options] [-h|--help]

version 1
    M     N   CPU GFlop/s (sec)   GPU GFlop/s (sec)    |b - A*x|
================================================================
  100   100     ---   (  ---  )      0.48 (   0.00)       ---
 1000  1000     ---   (  ---  )     70.28 (   0.02)       ---
   10    10     ---   (  ---  )      0.00 (   0.00)       ---
   20    20     ---   (  ---  )      0.01 (   0.00)       ---
   30    30     ---   (  ---  )      0.04 (   0.00)       ---
   40    40     ---   (  ---  )      0.08 (   0.00)       ---
   50    50     ---   (  ---  )      0.16 (   0.00)       ---
   60    60     ---   (  ---  )      0.27 (   0.00)       ---
   70    70     ---   (  ---  )      0.31 (   0.00)       ---
   80    80     ---   (  ---  )      0.46 (   0.00)       ---
   90    90     ---   (  ---  )      0.70 (   0.00)       ---
  100   100     ---   (  ---  )      2.03 (   0.00)       ---
  200   200     ---   (  ---  )      4.09 (   0.00)       ---
  300   300     ---   (  ---  )      9.35 (   0.00)       ---
  400   400     ---   (  ---  )     15.81 (   0.01)       ---
  500   500     ---   (  ---  )     24.34 (   0.01)       ---
  600   600     ---   (  ---  )     32.75 (   0.01)       ---
  700   700     ---   (  ---  )     42.67 (   0.01)       ---
  800   800     ---   (  ---  )     52.59 (   0.01)       ---
  900   900     ---   (  ---  )     61.51 (   0.02)       ---
 1000  1000     ---   (  ---  )     73.34 (   0.02)       ---
 2000  2000     ---   (  ---  )    195.61 (   0.05)       ---
 3000  3000     ---   (  ---  )    311.99 (   0.12)       ---
 4000  4000     ---   (  ---  )    400.75 (   0.21)       ---
 5000  5000     ---   (  ---  )    517.73 (   0.32)       ---
 6000  6000     ---   (  ---  )    622.53 (   0.46)       ---
 7000  7000     ---   (  ---  )    688.27 (   0.66)       ---
 8000  8000     ---   (  ---  )    749.37 (   0.91)       ---
 9000  9000     ---   (  ---  )    790.68 (   1.23)       ---
10000 10000     ---   (  ---  )    825.02 (   1.62)       ---
12000 12000     ---   (  ---  )    890.99 (   2.59)       ---
14000 14000     ---   (  ---  )    938.17 (   3.90)       ---
16000 16000     ---   (  ---  )    970.53 (   5.63)       ---
18000 18000     ---   (  ---  )    985.13 (   7.89)       ---
20000 20000     ---   (  ---  )   1002.88 (  10.64)       ---
