Go to the documentation of this file.
17 #ifndef INC_PASO_BLOCKOPS
18 #define INC_PASO_BLOCKOPS
25 #include <mkl_lapack.h>
26 #include <mkl_cblas.h>
35 #define PASO_MISSING_CLAPACK Esys_setError(TYPE_ERROR, "You need to install a CLAPACK version to run a block size > 3.")
39 #define Paso_BlockOps_Cpy_2(R, V) \
45 #define Paso_BlockOps_Cpy_3(R, V) \
52 #define Paso_BlockOps_Cpy_N(N,R,V) memcpy((void*)R, (void*)V, ( (size_t) N ) * sizeof(double) )
57 #define Paso_BlockOps_SMV_2(R,MAT, V) \
59 register double S1=*(V+0); \
60 register double S2=*(V+1);\
61 register double A11=*(MAT+0);\
62 register double A12=*(MAT+2);\
63 register double A21=*(MAT+1);\
64 register double A22=*(MAT+3);\
65 *(R+0)-=A11 * S1 + A12 * S2;\
66 *(R+1)-=A21 * S1 + A22 * S2;\
69 #define Paso_BlockOps_SMV_3(R, MAT, V) \
71 register double S1=*(V+0);\
72 register double S2=*(V+1);\
73 register double S3=*(V+2);\
74 register double A11=*(MAT+0);\
75 register double A21=*(MAT+1);\
76 register double A31=*(MAT+2);\
77 register double A12=*(MAT+3);\
78 register double A22=*(MAT+4);\
79 register double A32=*(MAT+5);\
80 register double A13=*(MAT+6);\
81 register double A23=*(MAT+7);\
82 register double A33=*(MAT+8);\
83 *(R+0)-=A11 * S1 + A12 * S2 + A13 * S3; \
84 *(R+1)-=A21 * S1 + A22 * S2 + A23 * S3;\
85 *(R+2)-=A31 * S1 + A32 * S2 + A33 * S3;\
92 #define Paso_BlockOps_SMV_N(_N_, _R_, _MAT_, _V_) cblas_dgemv(CblasColMajor,CblasNoTrans,_N_,_N_, -1., _MAT_, _N_, _V_, 1, (1.), _R_, 1)
94 #define Paso_BlockOps_SMV_N(N, R, MAT, V) PASO_MISSING_CLAPACK
98 #define Paso_BlockOps_MV_N(_N_, _R_, _MAT_, _V_) cblas_dgemv(CblasColMajor,CblasNoTrans,_N_,_N_, 1., _MAT_, _N_, _V_, 1, (0.), _R_, 1)
100 #define Paso_BlockOps_MV_N(N, R, MAT, V) PASO_MISSING_CLAPACK
103 #define Paso_BlockOps_invM_2(invA, A, failed) \
105 register double A11=*(A+0);\
106 register double A12=*(A+2);\
107 register double A21=*(A+1);\
108 register double A22=*(A+3);\
109 register double D = A11*A22-A12*A21; \
121 #define Paso_BlockOps_invM_3(invA, A, failed) \
123 register double A11=*(A+0);\
124 register double A21=*(A+1);\
125 register double A31=*(A+2);\
126 register double A12=*(A+3);\
127 register double A22=*(A+4);\
128 register double A32=*(A+5);\
129 register double A13=*(A+6);\
130 register double A23=*(A+7);\
131 register double A33=*(A+8);\
132 register double D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);\
135 *(invA )=(A22*A33-A23*A32)*D;\
136 *(invA+1)=(A31*A23-A21*A33)*D;\
137 *(invA+2)=(A21*A32-A31*A22)*D;\
138 *(invA+3)=(A13*A32-A12*A33)*D;\
139 *(invA+4)=(A11*A33-A31*A13)*D;\
140 *(invA+5)=(A12*A31-A11*A32)*D;\
141 *(invA+6)=(A12*A23-A13*A22)*D;\
142 *(invA+7)=(A13*A21-A11*A23)*D;\
143 *(invA+8)=(A11*A22-A12*A21)*D;\
151 #define Paso_BlockOps_invM_N(N, MAT, PIVOT,failed) \
155 dgetrf(&NN,&NN,MAT,&NN,PIVOT,&res); \
156 if (res!=0) *failed=1;\
159 #define Paso_BlockOps_invM_N(N, MAT, PIVOT,failed) \
161 int res=clapack_dgetrf(CblasColMajor, N,N,MAT,N, PIVOT); \
162 if (res!=0) *failed=1;\
166 #define Paso_BlockOps_invM_N(N, MAT, PIVOT,failed) PASO_MISSING_CLAPACK
171 #define Paso_BlockOps_solve_N(N, X, MAT, PIVOT,failed) \
176 dgetrs("N", &NN, &ONE, MAT, &NN, PIVOT, X, &NN, &res); \
177 if (res!=0) *failed=1;\
180 #define Paso_BlockOps_solve_N(N, X, MAT, PIVOT,failed) \
182 int res=clapack_dgetrs(CblasColMajor, CblasNoTrans, N,1,MAT,N, PIVOT, X, N); \
183 if (res!=0) *failed=1;\
187 #define Paso_BlockOps_solve_N(N, X, MAT, PIVOT,failed) PASO_MISSING_CLAPACK
193 #define Paso_BlockOps_MViP_2(MAT, V) \
195 register double S1=*(V+0);\
196 register double S2=*(V+1);\
197 register double A11=*(MAT+0);\
198 register double A12=*(MAT+2);\
199 register double A21=*(MAT+1);\
200 register double A22=*(MAT+3);\
201 *(V+0) = A11 * S1 + A12 * S2;\
202 *(V+1) = A21 * S1 + A22 * S2;\
206 #define Paso_BlockOps_MViP_3(MAT, V) \
208 register double S1=*(V+0);\
209 register double S2=*(V+1);\
210 register double S3=*(V+2);\
211 register double A11=*(MAT+0);\
212 register double A21=*(MAT+1);\
213 register double A31=*(MAT+2);\
214 register double A12=*(MAT+3);\
215 register double A22=*(MAT+4);\
216 register double A32=*(MAT+5);\
217 register double A13=*(MAT+6);\
218 register double A23=*(MAT+7);\
219 register double A33=*(MAT+8);\
220 *(V+0)=A11 * S1 + A12 * S2 + A13 * S3; \
221 *(V+1)=A21 * S1 + A22 * S2 + A23 * S3;\
222 *(V+2)=A31 * S1 + A32 * S2 + A33 * S3;\