-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathflops.h
351 lines (281 loc) · 23 KB
/
flops.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
/**
*
* @file flops.h
*
* File provided by Univ. of Tennessee,
*
* @version 1.0.0
* @author Mathieu Faverge
* @date 2010-12-20
*
**/
/*
* This file provide the flops formula for all Level 3 BLAS and some
* Lapack routines. Each macro uses the same size parameters as the
* function associated and provide one formula for additions and one
* for multiplications. Example to use these macros:
*
* FLOPS_ZGEMM( m, n, k )
*
* All the formula are reported in the LAPACK Lawn 41:
* http://www.netlib.org/lapack/lawns/lawn41.ps
*/
#ifndef _FLOPS_H_
#define _FLOPS_H_
/************************************************************************
* Generic formula coming from LAWN 41
***********************************************************************/
/*
* Level 2 BLAS
*/
#define FMULS_GEMV(__m, __n) ((double)(__m) * (double)(__n) + 2. * (double)(__m))
#define FADDS_GEMV(__m, __n) ((double)(__m) * (double)(__n) )
#define FMULS_SYMV(__n) FMULS_GEMV( (__n), (__n) )
#define FADDS_SYMV(__n) FADDS_GEMV( (__n), (__n) )
#define FMULS_HEMV FMULS_SYMV
#define FADDS_HEMV FADDS_SYMV
/*
* Level 3 BLAS
*/
#define FMULS_GEMM(__m, __n, __k) ((double)(__m) * (double)(__n) * (double)(__k))
#define FADDS_GEMM(__m, __n, __k) ((double)(__m) * (double)(__n) * (double)(__k))
#define FMULS_SYMM(__side, __m, __n) ( ( (__side) == Left ) ? FMULS_GEMM((__m), (__m), (__n)) : FMULS_GEMM((__m), (__n), (__n)) )
#define FADDS_SYMM(__side, __m, __n) ( ( (__side) == Left ) ? FADDS_GEMM((__m), (__m), (__n)) : FADDS_GEMM((__m), (__n), (__n)) )
#define FMULS_HEMM FMULS_SYMM
#define FADDS_HEMM FADDS_SYMM
#define FMULS_SYRK(__k, __n) (0.5 * (double)(__k) * (double)(__n) * ((double)(__n)+1.))
#define FADDS_SYRK(__k, __n) (0.5 * (double)(__k) * (double)(__n) * ((double)(__n)+1.))
#define FMULS_HERK FMULS_SYRK
#define FADDS_HERK FADDS_SYRK
#define FMULS_SYR2K(__k, __n) ((double)(__k) * (double)(__n) * (double)(__n) )
#define FADDS_SYR2K(__k, __n) ((double)(__k) * (double)(__n) * (double)(__n) + (double)(__n))
#define FMULS_HER2K FMULS_SYR2K
#define FADDS_HER2K FADDS_SYR2K
#define FMULS_TRMM_2(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)+1.))
#define FADDS_TRMM_2(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)-1.))
#define FMULS_TRMM(__side, __m, __n) ( ( (__side) == 'L' ) ? FMULS_TRMM_2((__m), (__n)) : FMULS_TRMM_2((__n), (__m)) )
#define FADDS_TRMM(__side, __m, __n) ( ( (__side) == 'L' ) ? FADDS_TRMM_2((__m), (__n)) : FADDS_TRMM_2((__n), (__m)) )
#define FMULS_TRSM FMULS_TRMM
#define FADDS_TRSM FMULS_TRMM
/*
* Lapack
*/
#define FMULS_GETRF(__m, __n) ( ((__m) < (__n)) ? (0.5 * (double)(__m) * ((double)(__m) * ((double)(__n) - (1./3.) * (__m) - 1. ) + (double)(__n)) + (2. / 3.) * (__m)) \
: (0.5 * (double)(__n) * ((double)(__n) * ((double)(__m) - (1./3.) * (__n) - 1. ) + (double)(__m)) + (2. / 3.) * (__n)) )
#define FADDS_GETRF(__m, __n) ( ((__m) < (__n)) ? (0.5 * (double)(__m) * ((double)(__m) * ((double)(__n) - (1./3.) * (__m) ) - (double)(__n)) + (1. / 6.) * (__m)) \
: (0.5 * (double)(__n) * ((double)(__n) * ((double)(__m) - (1./3.) * (__n) ) - (double)(__m)) + (1. / 6.) * (__n)) )
#define FMULS_GETRI(__n) ( (double)(__n) * ((5. / 6.) + (double)(__n) * ((2. / 3.) * (double)(__n) + 0.5)) )
#define FADDS_GETRI(__n) ( (double)(__n) * ((5. / 6.) + (double)(__n) * ((2. / 3.) * (double)(__n) - 1.5)) )
#define FMULS_GETRS(__n, __nrhs) ((double)(__nrhs) * (double)(__n) * (double)(__n) )
#define FADDS_GETRS(__n, __nrhs) ((double)(__nrhs) * (double)(__n) * ((double)(__n) - 1. ))
#define FMULS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) + 0.5) * (double)(__n) + (1. / 3.)))
#define FADDS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) ) * (double)(__n) - (1. / 6.)))
#define FMULS_POTRI(__n) ( (double)(__n) * ((2. / 3.) + (double)(__n) * ((1. / 3.) * (double)(__n) + 1. )) )
#define FADDS_POTRI(__n) ( (double)(__n) * ((1. / 6.) + (double)(__n) * ((1. / 3.) * (double)(__n) - 0.5)) )
#define FMULS_POTRS(__n, __nrhs) ((double)(__nrhs) * (double)(__n) * ((double)(__n) + 1. ))
#define FADDS_POTRS(__n, __nrhs) ((double)(__nrhs) * (double)(__n) * ((double)(__n) - 1. ))
//SPBTRF
//SPBTRS
//SSYTRF
//SSYTRI
//SSYTRS
#define FMULS_GEQRF(__m, __n) (((__m) > (__n)) ? ((double)(__n) * ((double)(__n) * ( 0.5-(1./3.) * (double)(__n) + (double)(__m)) + (double)(__m) + 23. / 6.)) \
: ((double)(__m) * ((double)(__m) * ( -0.5-(1./3.) * (double)(__m) + (double)(__n)) + 2.*(double)(__n) + 23. / 6.)) )
#define FADDS_GEQRF(__m, __n) (((__m) > (__n)) ? ((double)(__n) * ((double)(__n) * ( 0.5-(1./3.) * (double)(__n) + (double)(__m)) + 5. / 6.)) \
: ((double)(__m) * ((double)(__m) * ( -0.5-(1./3.) * (double)(__m) + (double)(__n)) + (double)(__n) + 5. / 6.)) )
#define FMULS_GEQLF(__m, __n) FMULS_GEQRF(__m, __n)
#define FADDS_GEQLF(__m, __n) FADDS_GEQRF(__m, __n)
#define FMULS_GERQF(__m, __n) (((__m) > (__n)) ? ((double)(__n) * ((double)(__n) * ( 0.5-(1./3.) * (double)(__n) + (double)(__m)) + (double)(__m) + 29. / 6.)) \
: ((double)(__m) * ((double)(__m) * ( -0.5-(1./3.) * (double)(__m) + (double)(__n)) + 2.*(double)(__n) + 29. / 6.)) )
#define FADDS_GERQF(__m, __n) (((__m) > (__n)) ? ((double)(__n) * ((double)(__n) * ( -0.5-(1./3.) * (double)(__n) + (double)(__m)) + (double)(__m) + 5. / 6.)) \
: ((double)(__m) * ((double)(__m) * ( 0.5-(1./3.) * (double)(__m) + (double)(__n)) + + 5. / 6.)) )
#define FMULS_GELQF(__m, __n) FMULS_GERQF(__m, __n)
#define FADDS_GELQF(__m, __n) FADDS_GERQF(__m, __n)
#define FMULS_UNGQR(__m, __n, __k) ((double)(__k) * (2.* (double)(__m) * (double)(__n) + 2. * (double)(__n) - 5./3. + (double)(__k) * ( 2./3. * (double)(__k) - ((double)(__m) + (double)(__n)) - 1.)))
#define FADDS_UNGQR(__m, __n, __k) ((double)(__k) * (2.* (double)(__m) * (double)(__n) + (double)(__n) - (double)(__m) + 1./3. + (double)(__k) * ( 2./3. * (double)(__k) - ((double)(__m) + (double)(__n)) )))
#define FMULS_UNGQL FMULS_UNGQR
#define FMULS_ORGQR FMULS_UNGQR
#define FMULS_ORGQL FMULS_UNGQR
#define FADDS_UNGQL FADDS_UNGQR
#define FADDS_ORGQR FADDS_UNGQR
#define FADDS_ORGQL FADDS_UNGQR
#define FMULS_UNGRQ(__m, __n, __k) ((double)(__k) * (2.* (double)(__m) * (double)(__n) + (double)(__m) + (double)(__n) - 2./3. + (double)(__k) * ( 2./3. * (double)(__k) - ((double)(__m) + (double)(__n)) - 1.)))
#define FADDS_UNGRQ(__m, __n, __k) ((double)(__k) * (2.* (double)(__m) * (double)(__n) + (double)(__m) - (double)(__n) + 1./3. + (double)(__k) * ( 2./3. * (double)(__k) - ((double)(__m) + (double)(__n)) )))
#define FMULS_UNGLQ FMULS_UNGRQ
#define FMULS_ORGRQ FMULS_UNGRQ
#define FMULS_ORGLQ FMULS_UNGRQ
#define FADDS_UNGLQ FADDS_UNGRQ
#define FADDS_ORGRQ FADDS_UNGRQ
#define FADDS_ORGLQ FADDS_UNGRQ
#define FMULS_GEQRS(__m, __n, __nrhs) ((double)(__nrhs) * ((double)(__n) * ( 2.* (double)(__m) - 0.5 * (double)(__n) + 2.5)))
#define FADDS_GEQRS(__m, __n, __nrhs) ((double)(__nrhs) * ((double)(__n) * ( 2.* (double)(__m) - 0.5 * (double)(__n) + 0.5)))
//UNMQR, UNMLQ, UNMQL, UNMRQ (Left)
//UNMQR, UNMLQ, UNMQL, UNMRQ (Right)
#define FMULS_TRTRI(__n) ((double)(__n) * ((double)(__n) * ( 1./6. * (double)(__n) + 0.5 ) + 1./3.))
#define FADDS_TRTRI(__n) ((double)(__n) * ((double)(__n) * ( 1./6. * (double)(__n) - 0.5 ) + 1./3.))
#define FMULS_GEHRD(__n) ( (double)(__n) * ((double)(__n) * (5./3. *(double)(__n) + 0.5) - 7./6.) - 13. )
#define FADDS_GEHRD(__n) ( (double)(__n) * ((double)(__n) * (5./3. *(double)(__n) - 1. ) - 2./3.) - 8. )
#define FMULS_SYTRD(__n) ( (double)(__n) * ( (double)(__n) * ( 2./3. * (double)(__n) + 2.5 ) - 1./6. ) - 15.)
#define FADDS_SYTRD(__n) ( (double)(__n) * ( (double)(__n) * ( 2./3. * (double)(__n) + 1. ) - 8./3. ) - 4.)
#define FMULS_HETRD FMULS_SYTRD
#define FADDS_HETRD FADDS_SYTRD
#define FMULS_GEBRD(__m, __n) ( ((__m) >= (__n)) ? ((double)(__n) * ((double)(__n) * (2. * (double)(__m) - 2./3. * (double)(__n) + 2. ) + 20./3.)) \
: ((double)(__m) * ((double)(__m) * (2. * (double)(__n) - 2./3. * (double)(__m) + 2. ) + 20./3.)) )
#define FADDS_GEBRD(__m, __n) ( ((__m) >= (__n)) ? ((double)(__n) * ((double)(__n) * (2. * (double)(__m) - 2./3. * (double)(__n) + 1. ) - (double)(__m) + 5./3.)) \
: ((double)(__m) * ((double)(__m) * (2. * (double)(__n) - 2./3. * (double)(__m) + 1. ) - (double)(__n) + 5./3.)) )
/*******************************************************************************
* Users functions
******************************************************************************/
/*
* Level 2 BLAS
*/
#define FLOPS_ZGEMV(__m, __n) (6. * FMULS_GEMV((__m), (__n)) + 2.0 * FADDS_GEMV((__m), (__n)) )
#define FLOPS_CGEMV(__m, __n) (6. * FMULS_GEMV((__m), (__n)) + 2.0 * FADDS_GEMV((__m), (__n)) )
#define FLOPS_DGEMV(__m, __n) ( FMULS_GEMV((__m), (__n)) + FADDS_GEMV((__m), (__n)) )
#define FLOPS_SGEMV(__m, __n) ( FMULS_GEMV((__m), (__n)) + FADDS_GEMV((__m), (__n)) )
#define FLOPS_ZHEMV(__n) (6. * FMULS_HEMV((__n)) + 2.0 * FADDS_HEMV((__n)) )
#define FLOPS_CHEMV(__n) (6. * FMULS_HEMV((__n)) + 2.0 * FADDS_HEMV((__n)) )
#define FLOPS_ZSYMV(__n) (6. * FMULS_SYMV((__n)) + 2.0 * FADDS_SYMV((__n)) )
#define FLOPS_CSYMV(__n) (6. * FMULS_SYMV((__n)) + 2.0 * FADDS_SYMV((__n)) )
#define FLOPS_DSYMV(__n) ( FMULS_SYMV((__n)) + FADDS_SYMV((__n)) )
#define FLOPS_SSYMV(__n) ( FMULS_SYMV((__n)) + FADDS_SYMV((__n)) )
/*
* Level 3 BLAS
*/
#define FLOPS_ZGEMM(__m, __n, __k) (6. * FMULS_GEMM((__m), (__n), (__k)) + 2.0 * FADDS_GEMM((__m), (__n), (__k)) )
#define FLOPS_CGEMM(__m, __n, __k) (6. * FMULS_GEMM((__m), (__n), (__k)) + 2.0 * FADDS_GEMM((__m), (__n), (__k)) )
#define FLOPS_DGEMM(__m, __n, __k) ( FMULS_GEMM((__m), (__n), (__k)) + FADDS_GEMM((__m), (__n), (__k)) )
#define FLOPS_SGEMM(__m, __n, __k) ( FMULS_GEMM((__m), (__n), (__k)) + FADDS_GEMM((__m), (__n), (__k)) )
#define FLOPS_ZHEMM(__side, __m, __n) (6. * FMULS_HEMM(__side, (__m), (__n)) + 2.0 * FADDS_HEMM(__side, (__m), (__n)) )
#define FLOPS_CHEMM(__side, __m, __n) (6. * FMULS_HEMM(__side, (__m), (__n)) + 2.0 * FADDS_HEMM(__side, (__m), (__n)) )
#define FLOPS_ZSYMM(__side, __m, __n) (6. * FMULS_SYMM(__side, (__m), (__n)) + 2.0 * FADDS_SYMM(__side, (__m), (__n)) )
#define FLOPS_CSYMM(__side, __m, __n) (6. * FMULS_SYMM(__side, (__m), (__n)) + 2.0 * FADDS_SYMM(__side, (__m), (__n)) )
#define FLOPS_DSYMM(__side, __m, __n) ( FMULS_SYMM(__side, (__m), (__n)) + FADDS_SYMM(__side, (__m), (__n)) )
#define FLOPS_SSYMM(__side, __m, __n) ( FMULS_SYMM(__side, (__m), (__n)) + FADDS_SYMM(__side, (__m), (__n)) )
#define FLOPS_ZHERK(__k, __n) (6. * FMULS_HERK((__k), (__n)) + 2.0 * FADDS_HERK((__k), (__n)) )
#define FLOPS_CHERK(__k, __n) (6. * FMULS_HERK((__k), (__n)) + 2.0 * FADDS_HERK((__k), (__n)) )
#define FLOPS_ZSYRK(__k, __n) (6. * FMULS_SYRK((__k), (__n)) + 2.0 * FADDS_SYRK((__k), (__n)) )
#define FLOPS_CSYRK(__k, __n) (6. * FMULS_SYRK((__k), (__n)) + 2.0 * FADDS_SYRK((__k), (__n)) )
#define FLOPS_DSYRK(__k, __n) ( FMULS_SYRK((__k), (__n)) + FADDS_SYRK((__k), (__n)) )
#define FLOPS_SSYRK(__k, __n) ( FMULS_SYRK((__k), (__n)) + FADDS_SYRK((__k), (__n)) )
#define FLOPS_ZHER2K(__k, __n) (6. * FMULS_HER2K((__k), (__n)) + 2.0 * FADDS_HER2K((__k), (__n)) )
#define FLOPS_CHER2K(__k, __n) (6. * FMULS_HER2K((__k), (__n)) + 2.0 * FADDS_HER2K((__k), (__n)) )
#define FLOPS_ZSYR2K(__k, __n) (6. * FMULS_SYR2K((__k), (__n)) + 2.0 * FADDS_SYR2K((__k), (__n)) )
#define FLOPS_CSYR2K(__k, __n) (6. * FMULS_SYR2K((__k), (__n)) + 2.0 * FADDS_SYR2K((__k), (__n)) )
#define FLOPS_DSYR2K(__k, __n) ( FMULS_SYR2K((__k), (__n)) + FADDS_SYR2K((__k), (__n)) )
#define FLOPS_SSYR2K(__k, __n) ( FMULS_SYR2K((__k), (__n)) + FADDS_SYR2K((__k), (__n)) )
#define FLOPS_ZTRMM(__side, __m, __n) (6. * FMULS_TRMM(__side, (__m), (__n)) + 2.0 * FADDS_TRMM(__side, (__m), (__n)) )
#define FLOPS_CTRMM(__side, __m, __n) (6. * FMULS_TRMM(__side, (__m), (__n)) + 2.0 * FADDS_TRMM(__side, (__m), (__n)) )
#define FLOPS_DTRMM(__side, __m, __n) ( FMULS_TRMM(__side, (__m), (__n)) + FADDS_TRMM(__side, (__m), (__n)) )
#define FLOPS_STRMM(__side, __m, __n) ( FMULS_TRMM(__side, (__m), (__n)) + FADDS_TRMM(__side, (__m), (__n)) )
#define FLOPS_ZTRSM(__side, __m, __n) (6. * FMULS_TRSM(__side, (__m), (__n)) + 2.0 * FADDS_TRSM(__side, (__m), (__n)) )
#define FLOPS_CTRSM(__side, __m, __n) (6. * FMULS_TRSM(__side, (__m), (__n)) + 2.0 * FADDS_TRSM(__side, (__m), (__n)) )
#define FLOPS_DTRSM(__side, __m, __n) ( FMULS_TRSM(__side, (__m), (__n)) + FADDS_TRSM(__side, (__m), (__n)) )
#define FLOPS_STRSM(__side, __m, __n) ( FMULS_TRSM(__side, (__m), (__n)) + FADDS_TRSM(__side, (__m), (__n)) )
/*
* Lapack
*/
#define FLOPS_ZGETRF(__m, __n) (6. * FMULS_GETRF((__m), (__n)) + 2.0 * FADDS_GETRF((__m), (__n)) )
#define FLOPS_CGETRF(__m, __n) (6. * FMULS_GETRF((__m), (__n)) + 2.0 * FADDS_GETRF((__m), (__n)) )
#define FLOPS_DGETRF(__m, __n) ( FMULS_GETRF((__m), (__n)) + FADDS_GETRF((__m), (__n)) )
#define FLOPS_SGETRF(__m, __n) ( FMULS_GETRF((__m), (__n)) + FADDS_GETRF((__m), (__n)) )
#define FLOPS_ZGETRI(__n) (6. * FMULS_GETRI((__n)) + 2.0 * FADDS_GETRI((__n)) )
#define FLOPS_CGETRI(__n) (6. * FMULS_GETRI((__n)) + 2.0 * FADDS_GETRI((__n)) )
#define FLOPS_DGETRI(__n) ( FMULS_GETRI((__n)) + FADDS_GETRI((__n)) )
#define FLOPS_SGETRI(__n) ( FMULS_GETRI((__n)) + FADDS_GETRI((__n)) )
#define FLOPS_ZGETRS(__n, __nrhs) (6. * FMULS_GETRS((__n), (__nrhs)) + 2.0 * FADDS_GETRS((__n), (__nrhs)) )
#define FLOPS_CGETRS(__n, __nrhs) (6. * FMULS_GETRS((__n), (__nrhs)) + 2.0 * FADDS_GETRS((__n), (__nrhs)) )
#define FLOPS_DGETRS(__n, __nrhs) ( FMULS_GETRS((__n), (__nrhs)) + FADDS_GETRS((__n), (__nrhs)) )
#define FLOPS_SGETRS(__n, __nrhs) ( FMULS_GETRS((__n), (__nrhs)) + FADDS_GETRS((__n), (__nrhs)) )
#define FLOPS_ZPOTRF(__n) (6. * FMULS_POTRF((__n)) + 2.0 * FADDS_POTRF((__n)) )
#define FLOPS_CPOTRF(__n) (6. * FMULS_POTRF((__n)) + 2.0 * FADDS_POTRF((__n)) )
#define FLOPS_DPOTRF(__n) ( FMULS_POTRF((__n)) + FADDS_POTRF((__n)) )
#define FLOPS_SPOTRF(__n) ( FMULS_POTRF((__n)) + FADDS_POTRF((__n)) )
#define FLOPS_ZPOTRI(__n) (6. * FMULS_POTRI((__n)) + 2.0 * FADDS_POTRI((__n)) )
#define FLOPS_CPOTRI(__n) (6. * FMULS_POTRI((__n)) + 2.0 * FADDS_POTRI((__n)) )
#define FLOPS_DPOTRI(__n) ( FMULS_POTRI((__n)) + FADDS_POTRI((__n)) )
#define FLOPS_SPOTRI(__n) ( FMULS_POTRI((__n)) + FADDS_POTRI((__n)) )
#define FLOPS_ZPOTRS(__n, __nrhs) (6. * FMULS_POTRS((__n), (__nrhs)) + 2.0 * FADDS_POTRS((__n), (__nrhs)) )
#define FLOPS_CPOTRS(__n, __nrhs) (6. * FMULS_POTRS((__n), (__nrhs)) + 2.0 * FADDS_POTRS((__n), (__nrhs)) )
#define FLOPS_DPOTRS(__n, __nrhs) ( FMULS_POTRS((__n), (__nrhs)) + FADDS_POTRS((__n), (__nrhs)) )
#define FLOPS_SPOTRS(__n, __nrhs) ( FMULS_POTRS((__n), (__nrhs)) + FADDS_POTRS((__n), (__nrhs)) )
#define FLOPS_ZGEQRF(__m, __n) (6. * FMULS_GEQRF((__m), (__n)) + 2.0 * FADDS_GEQRF((__m), (__n)) )
#define FLOPS_CGEQRF(__m, __n) (6. * FMULS_GEQRF((__m), (__n)) + 2.0 * FADDS_GEQRF((__m), (__n)) )
#define FLOPS_DGEQRF(__m, __n) ( FMULS_GEQRF((__m), (__n)) + FADDS_GEQRF((__m), (__n)) )
#define FLOPS_SGEQRF(__m, __n) ( FMULS_GEQRF((__m), (__n)) + FADDS_GEQRF((__m), (__n)) )
#define FLOPS_ZGEQLF(__m, __n) (6. * FMULS_GEQLF((__m), (__n)) + 2.0 * FADDS_GEQLF((__m), (__n)) )
#define FLOPS_CGEQLF(__m, __n) (6. * FMULS_GEQLF((__m), (__n)) + 2.0 * FADDS_GEQLF((__m), (__n)) )
#define FLOPS_DGEQLF(__m, __n) ( FMULS_GEQLF((__m), (__n)) + FADDS_GEQLF((__m), (__n)) )
#define FLOPS_SGEQLF(__m, __n) ( FMULS_GEQLF((__m), (__n)) + FADDS_GEQLF((__m), (__n)) )
#define FLOPS_ZGERQF(__m, __n) (6. * FMULS_GERQF((__m), (__n)) + 2.0 * FADDS_GERQF((__m), (__n)) )
#define FLOPS_CGERQF(__m, __n) (6. * FMULS_GERQF((__m), (__n)) + 2.0 * FADDS_GERQF((__m), (__n)) )
#define FLOPS_DGERQF(__m, __n) ( FMULS_GERQF((__m), (__n)) + FADDS_GERQF((__m), (__n)) )
#define FLOPS_SGERQF(__m, __n) ( FMULS_GERQF((__m), (__n)) + FADDS_GERQF((__m), (__n)) )
#define FLOPS_ZGELQF(__m, __n) (6. * FMULS_GELQF((__m), (__n)) + 2.0 * FADDS_GELQF((__m), (__n)) )
#define FLOPS_CGELQF(__m, __n) (6. * FMULS_GELQF((__m), (__n)) + 2.0 * FADDS_GELQF((__m), (__n)) )
#define FLOPS_DGELQF(__m, __n) ( FMULS_GELQF((__m), (__n)) + FADDS_GELQF((__m), (__n)) )
#define FLOPS_SGELQF(__m, __n) ( FMULS_GELQF((__m), (__n)) + FADDS_GELQF((__m), (__n)) )
#define FLOPS_ZUNGQR(__m, __n, __k) (6. * FMULS_UNGQR((__m), (__n), (__k)) + 2.0 * FADDS_UNGQR((__m), (__n), (__k)) )
#define FLOPS_CUNGQR(__m, __n, __k) (6. * FMULS_UNGQR((__m), (__n), (__k)) + 2.0 * FADDS_UNGQR((__m), (__n), (__k)) )
#define FLOPS_DUNGQR(__m, __n, __k) ( FMULS_UNGQR((__m), (__n), (__k)) + FADDS_UNGQR((__m), (__n), (__k)) )
#define FLOPS_SUNGQR(__m, __n, __k) ( FMULS_UNGQR((__m), (__n), (__k)) + FADDS_UNGQR((__m), (__n), (__k)) )
#define FLOPS_ZUNGQL(__m, __n, __k) (6. * FMULS_UNGQL((__m), (__n), (__k)) + 2.0 * FADDS_UNGQL((__m), (__n), (__k)) )
#define FLOPS_CUNGQL(__m, __n, __k) (6. * FMULS_UNGQL((__m), (__n), (__k)) + 2.0 * FADDS_UNGQL((__m), (__n), (__k)) )
#define FLOPS_DUNGQL(__m, __n, __k) ( FMULS_UNGQL((__m), (__n), (__k)) + FADDS_UNGQL((__m), (__n), (__k)) )
#define FLOPS_SUNGQL(__m, __n, __k) ( FMULS_UNGQL((__m), (__n), (__k)) + FADDS_UNGQL((__m), (__n), (__k)) )
#define FLOPS_ZORGQR(__m, __n, __k) (6. * FMULS_ORGQR((__m), (__n), (__k)) + 2.0 * FADDS_ORGQR((__m), (__n), (__k)) )
#define FLOPS_CORGQR(__m, __n, __k) (6. * FMULS_ORGQR((__m), (__n), (__k)) + 2.0 * FADDS_ORGQR((__m), (__n), (__k)) )
#define FLOPS_DORGQR(__m, __n, __k) ( FMULS_ORGQR((__m), (__n), (__k)) + FADDS_ORGQR((__m), (__n), (__k)) )
#define FLOPS_SORGQR(__m, __n, __k) ( FMULS_ORGQR((__m), (__n), (__k)) + FADDS_ORGQR((__m), (__n), (__k)) )
#define FLOPS_ZORGQL(__m, __n, __k) (6. * FMULS_ORGQL((__m), (__n), (__k)) + 2.0 * FADDS_ORGQL((__m), (__n), (__k)) )
#define FLOPS_CORGQL(__m, __n, __k) (6. * FMULS_ORGQL((__m), (__n), (__k)) + 2.0 * FADDS_ORGQL((__m), (__n), (__k)) )
#define FLOPS_DORGQL(__m, __n, __k) ( FMULS_ORGQL((__m), (__n), (__k)) + FADDS_ORGQL((__m), (__n), (__k)) )
#define FLOPS_SORGQL(__m, __n, __k) ( FMULS_ORGQL((__m), (__n), (__k)) + FADDS_ORGQL((__m), (__n), (__k)) )
#define FLOPS_ZUNGRQ(__m, __n, __k) (6. * FMULS_UNGRQ((__m), (__n), (__k)) + 2.0 * FADDS_UNGRQ((__m), (__n), (__k)) )
#define FLOPS_CUNGRQ(__m, __n, __k) (6. * FMULS_UNGRQ((__m), (__n), (__k)) + 2.0 * FADDS_UNGRQ((__m), (__n), (__k)) )
#define FLOPS_DUNGRQ(__m, __n, __k) ( FMULS_UNGRQ((__m), (__n), (__k)) + FADDS_UNGRQ((__m), (__n), (__k)) )
#define FLOPS_SUNGRQ(__m, __n, __k) ( FMULS_UNGRQ((__m), (__n), (__k)) + FADDS_UNGRQ((__m), (__n), (__k)) )
#define FLOPS_ZUNGLQ(__m, __n, __k) (6. * FMULS_UNGLQ((__m), (__n), (__k)) + 2.0 * FADDS_UNGLQ((__m), (__n), (__k)) )
#define FLOPS_CUNGLQ(__m, __n, __k) (6. * FMULS_UNGLQ((__m), (__n), (__k)) + 2.0 * FADDS_UNGLQ((__m), (__n), (__k)) )
#define FLOPS_DUNGLQ(__m, __n, __k) ( FMULS_UNGLQ((__m), (__n), (__k)) + FADDS_UNGLQ((__m), (__n), (__k)) )
#define FLOPS_SUNGLQ(__m, __n, __k) ( FMULS_UNGLQ((__m), (__n), (__k)) + FADDS_UNGLQ((__m), (__n), (__k)) )
#define FLOPS_ZORGRQ(__m, __n, __k) (6. * FMULS_ORGRQ((__m), (__n), (__k)) + 2.0 * FADDS_ORGRQ((__m), (__n), (__k)) )
#define FLOPS_CORGRQ(__m, __n, __k) (6. * FMULS_ORGRQ((__m), (__n), (__k)) + 2.0 * FADDS_ORGRQ((__m), (__n), (__k)) )
#define FLOPS_DORGRQ(__m, __n, __k) ( FMULS_ORGRQ((__m), (__n), (__k)) + FADDS_ORGRQ((__m), (__n), (__k)) )
#define FLOPS_SORGRQ(__m, __n, __k) ( FMULS_ORGRQ((__m), (__n), (__k)) + FADDS_ORGRQ((__m), (__n), (__k)) )
#define FLOPS_ZORGLQ(__m, __n, __k) (6. * FMULS_ORGLQ((__m), (__n), (__k)) + 2.0 * FADDS_ORGLQ((__m), (__n), (__k)) )
#define FLOPS_CORGLQ(__m, __n, __k) (6. * FMULS_ORGLQ((__m), (__n), (__k)) + 2.0 * FADDS_ORGLQ((__m), (__n), (__k)) )
#define FLOPS_DORGLQ(__m, __n, __k) ( FMULS_ORGLQ((__m), (__n), (__k)) + FADDS_ORGLQ((__m), (__n), (__k)) )
#define FLOPS_SORGLQ(__m, __n, __k) ( FMULS_ORGLQ((__m), (__n), (__k)) + FADDS_ORGLQ((__m), (__n), (__k)) )
///////
#define FMULS_UNMQR(m_, n_, k_, side_) (( (side_) == Left ) \
? (2.*(n_)*(m_)*(k_) - (n_)*(k_)*(k_) + 2.*(n_)*(k_)) \
: (2.*(n_)*(m_)*(k_) - (m_)*(k_)*(k_) + (m_)*(k_) + (n_)*(k_) - 0.5*(k_)*(k_) + 0.5*(k_)))
#define FADDS_UNMQR(m_, n_, k_, side_) (( ((side_)) == Left ) \
? (2.*(n_)*(m_)*(k_) - (n_)*(k_)*(k_) + (n_)*(k_)) \
: (2.*(n_)*(m_)*(k_) - (m_)*(k_)*(k_) + (m_)*(k_)))
#define FLOPS_DORMQR(m_, n_, k_, side_) ( FMULS_UNMQR((double)(m_), (double)(n_), (double)(k_), (side_)) + FADDS_UNMQR((double)(m_), (double)(n_), (double)(k_), (side_)) )
//////
#define FLOPS_ZGEQRS(__m, __n, __nrhs) (6. * FMULS_GEQRS((__m), (__n), (__nrhs)) + 2.0 * FADDS_GEQRS((__m), (__n), (__nrhs)) )
#define FLOPS_CGEQRS(__m, __n, __nrhs) (6. * FMULS_GEQRS((__m), (__n), (__nrhs)) + 2.0 * FADDS_GEQRS((__m), (__n), (__nrhs)) )
#define FLOPS_DGEQRS(__m, __n, __nrhs) ( FMULS_GEQRS((__m), (__n), (__nrhs)) + FADDS_GEQRS((__m), (__n), (__nrhs)) )
#define FLOPS_SGEQRS(__m, __n, __nrhs) ( FMULS_GEQRS((__m), (__n), (__nrhs)) + FADDS_GEQRS((__m), (__n), (__nrhs)) )
#define FLOPS_ZTRTRI(__n) (6. * FMULS_TRTRI((__n)) + 2.0 * FADDS_TRTRI((__n)) )
#define FLOPS_CTRTRI(__n) (6. * FMULS_TRTRI((__n)) + 2.0 * FADDS_TRTRI((__n)) )
#define FLOPS_DTRTRI(__n) ( FMULS_TRTRI((__n)) + FADDS_TRTRI((__n)) )
#define FLOPS_STRTRI(__n) ( FMULS_TRTRI((__n)) + FADDS_TRTRI((__n)) )
#define FLOPS_ZGEHRD(__n) (6. * FMULS_GEHRD((__n)) + 2.0 * FADDS_GEHRD((__n)) )
#define FLOPS_CGEHRD(__n) (6. * FMULS_GEHRD((__n)) + 2.0 * FADDS_GEHRD((__n)) )
#define FLOPS_DGEHRD(__n) ( FMULS_GEHRD((__n)) + FADDS_GEHRD((__n)) )
#define FLOPS_SGEHRD(__n) ( FMULS_GEHRD((__n)) + FADDS_GEHRD((__n)) )
#define FLOPS_ZHETRD(__n) (6. * FMULS_HETRD((__n)) + 2.0 * FADDS_HETRD((__n)) )
#define FLOPS_CHETRD(__n) (6. * FMULS_HETRD((__n)) + 2.0 * FADDS_HETRD((__n)) )
#define FLOPS_ZSYTRD(__n) (6. * FMULS_SYTRD((__n)) + 2.0 * FADDS_SYTRD((__n)) )
#define FLOPS_CSYTRD(__n) (6. * FMULS_SYTRD((__n)) + 2.0 * FADDS_SYTRD((__n)) )
#define FLOPS_DSYTRD(__n) ( FMULS_SYTRD((__n)) + FADDS_SYTRD((__n)) )
#define FLOPS_SSYTRD(__n) ( FMULS_SYTRD((__n)) + FADDS_SYTRD((__n)) )
#define FLOPS_ZGEBRD(__m, __n) (6. * FMULS_GEBRD((__m), (__n)) + 2.0 * FADDS_GEBRD((__m), (__n)) )
#define FLOPS_CGEBRD(__m, __n) (6. * FMULS_GEBRD((__m), (__n)) + 2.0 * FADDS_GEBRD((__m), (__n)) )
#define FLOPS_DGEBRD(__m, __n) ( FMULS_GEBRD((__m), (__n)) + FADDS_GEBRD((__m), (__n)) )
#define FLOPS_SGEBRD(__m, __n) ( FMULS_GEBRD((__m), (__n)) + FADDS_GEBRD((__m), (__n)) )
#endif /* _FLOPS_H_ */