Go to the documentation of this file.
17 #ifndef INC_PASO_BLOCKOPS
18 #define INC_PASO_BLOCKOPS
25 #include <mkl_lapack.h>
26 #include <mkl_cblas.h>
38 #define PASO_MISSING_CLAPACK Esys_setError(TYPE_ERROR, "You need to install a CLAPACK version to run a block size > 3.")
42 #define Paso_BlockOps_Cpy_2(R, V) \
48 #define Paso_BlockOps_Cpy_3(R, V) \
55 #define Paso_BlockOps_Cpy_N(N,R,V) memcpy((void*)R, (void*)V, ( (size_t) N ) * sizeof(double) )
60 #define Paso_BlockOps_SMV_2(R,MAT, V) \
62 register double S1=*(V+0); \
63 register double S2=*(V+1);\
64 register double A11=*(MAT+0);\
65 register double A12=*(MAT+2);\
66 register double A21=*(MAT+1);\
67 register double A22=*(MAT+3);\
68 *(R+0)-=A11 * S1 + A12 * S2;\
69 *(R+1)-=A21 * S1 + A22 * S2;\
72 #define Paso_BlockOps_SMV_3(R, MAT, V) \
74 register double S1=*(V+0);\
75 register double S2=*(V+1);\
76 register double S3=*(V+2);\
77 register double A11=*(MAT+0);\
78 register double A21=*(MAT+1);\
79 register double A31=*(MAT+2);\
80 register double A12=*(MAT+3);\
81 register double A22=*(MAT+4);\
82 register double A32=*(MAT+5);\
83 register double A13=*(MAT+6);\
84 register double A23=*(MAT+7);\
85 register double A33=*(MAT+8);\
86 *(R+0)-=A11 * S1 + A12 * S2 + A13 * S3; \
87 *(R+1)-=A21 * S1 + A22 * S2 + A23 * S3;\
88 *(R+2)-=A31 * S1 + A32 * S2 + A33 * S3;\
95 #define Paso_BlockOps_SMV_N(_N_, _R_, _MAT_, _V_) cblas_dgemv(CblasColMajor,CblasNoTrans,_N_,_N_, -1., _MAT_, _N_, _V_, 1, (1.), _R_, 1)
97 #define Paso_BlockOps_SMV_N(N, R, MAT, V) PASO_MISSING_CLAPACK
101 #define Paso_BlockOps_MV_N(_N_, _R_, _MAT_, _V_) cblas_dgemv(CblasColMajor,CblasNoTrans,_N_,_N_, 1., _MAT_, _N_, _V_, 1, (0.), _R_, 1)
103 #define Paso_BlockOps_MV_N(N, R, MAT, V) PASO_MISSING_CLAPACK
106 #define Paso_BlockOps_invM_2(invA, A, failed) \
108 register double A11=*(A+0);\
109 register double A12=*(A+2);\
110 register double A21=*(A+1);\
111 register double A22=*(A+3);\
112 register double D = A11*A22-A12*A21; \
124 #define Paso_BlockOps_invM_3(invA, A, failed) \
126 register double A11=*(A+0);\
127 register double A21=*(A+1);\
128 register double A31=*(A+2);\
129 register double A12=*(A+3);\
130 register double A22=*(A+4);\
131 register double A32=*(A+5);\
132 register double A13=*(A+6);\
133 register double A23=*(A+7);\
134 register double A33=*(A+8);\
135 register double D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);\
138 *(invA )=(A22*A33-A23*A32)*D;\
139 *(invA+1)=(A31*A23-A21*A33)*D;\
140 *(invA+2)=(A21*A32-A31*A22)*D;\
141 *(invA+3)=(A13*A32-A12*A33)*D;\
142 *(invA+4)=(A11*A33-A31*A13)*D;\
143 *(invA+5)=(A12*A31-A11*A32)*D;\
144 *(invA+6)=(A12*A23-A13*A22)*D;\
145 *(invA+7)=(A13*A21-A11*A23)*D;\
146 *(invA+8)=(A11*A22-A12*A21)*D;\
154 #define Paso_BlockOps_invM_N(N, MAT, PIVOT,failed) \
158 dgetrf(&NN,&NN,MAT,&NN,PIVOT,&res); \
159 if (res!=0) *failed=1;\
162 #define Paso_BlockOps_invM_N(N, MAT, PIVOT,failed) \
164 int res=clapack_dgetrf(CblasColMajor, N,N,MAT,N, PIVOT); \
165 if (res!=0) *failed=1;\
169 #define Paso_BlockOps_invM_N(N, MAT, PIVOT,failed) PASO_MISSING_CLAPACK
174 #define Paso_BlockOps_solve_N(N, X, MAT, PIVOT,failed) \
179 dgetrs("N", &NN, &ONE, MAT, &NN, PIVOT, X, &NN, &res); \
180 if (res!=0) *failed=1;\
183 #define Paso_BlockOps_solve_N(N, X, MAT, PIVOT,failed) \
185 int res=clapack_dgetrs(CblasColMajor, CblasNoTrans, N,1,MAT,N, PIVOT, X, N); \
186 if (res!=0) *failed=1;\
190 #define Paso_BlockOps_solve_N(N, X, MAT, PIVOT,failed) PASO_MISSING_CLAPACK
196 #define Paso_BlockOps_MViP_2(MAT, V) \
198 register double S1=*(V+0);\
199 register double S2=*(V+1);\
200 register double A11=*(MAT+0);\
201 register double A12=*(MAT+2);\
202 register double A21=*(MAT+1);\
203 register double A22=*(MAT+3);\
204 *(V+0) = A11 * S1 + A12 * S2;\
205 *(V+1) = A21 * S1 + A22 * S2;\
209 #define Paso_BlockOps_MViP_3(MAT, V) \
211 register double S1=*(V+0);\
212 register double S2=*(V+1);\
213 register double S3=*(V+2);\
214 register double A11=*(MAT+0);\
215 register double A21=*(MAT+1);\
216 register double A31=*(MAT+2);\
217 register double A12=*(MAT+3);\
218 register double A22=*(MAT+4);\
219 register double A32=*(MAT+5);\
220 register double A13=*(MAT+6);\
221 register double A23=*(MAT+7);\
222 register double A33=*(MAT+8);\
223 *(V+0)=A11 * S1 + A12 * S2 + A13 * S3; \
224 *(V+1)=A21 * S1 + A22 * S2 + A23 * S3;\
225 *(V+2)=A31 * S1 + A32 * S2 + A33 * S3;\