Escript  Revision_4320
BlockOps.h
Go to the documentation of this file.
1 
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2013 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development since 2012 by School of Earth Sciences
13 *
14 *****************************************************************************/
15 
16 
17 #ifndef INC_PASO_BLOCKOPS
18 #define INC_PASO_BLOCKOPS
19 
20 #include "Common.h"
21 #include "Paso.h"
22 
23 #ifdef USE_LAPACK
24  #ifdef MKL_LAPACK
25  #include <mkl_lapack.h>
26  #include <mkl_cblas.h>
27  #else
28  #include <clapack.h>
29  #include <cblas.h>
30  #endif
31 #endif
32 
33 void Paso_BlockOps_solveAll(dim_t n_block,dim_t n,double* D,index_t* pivot,double* x);
34 
35 #define PASO_MISSING_CLAPACK Esys_setError(TYPE_ERROR, "You need to install a CLAPACK version to run a block size > 3.")
36 
37 
38 /* copy functions: */
39 #define Paso_BlockOps_Cpy_2(R, V) \
40 {\
41  *(R+0)=*(V+0);\
42  *(R+1)=*(V+1);\
43 }
44 
45 #define Paso_BlockOps_Cpy_3(R, V) \
46 {\
47  *(R+0)=*(V+0);\
48  *(R+1)=*(V+1);\
49  *(R+2)=*(V+2);\
50 }
51 
52 #define Paso_BlockOps_Cpy_N(N,R,V) memcpy((void*)R, (void*)V, ( (size_t) N ) * sizeof(double) )
53 
54 
55 /* operations R=R-MAT*V (V and R are not overlapping) */
56 
57 #define Paso_BlockOps_SMV_2(R,MAT, V) \
58 {\
59 register double S1=*(V+0); \
60 register double S2=*(V+1);\
61 register double A11=*(MAT+0);\
62 register double A12=*(MAT+2);\
63 register double A21=*(MAT+1);\
64 register double A22=*(MAT+3);\
65 *(R+0)-=A11 * S1 + A12 * S2;\
66 *(R+1)-=A21 * S1 + A22 * S2;\
67 }
68 
69 #define Paso_BlockOps_SMV_3(R, MAT, V) \
70 {\
71 register double S1=*(V+0);\
72 register double S2=*(V+1);\
73 register double S3=*(V+2);\
74 register double A11=*(MAT+0);\
75 register double A21=*(MAT+1);\
76 register double A31=*(MAT+2);\
77 register double A12=*(MAT+3);\
78 register double A22=*(MAT+4);\
79 register double A32=*(MAT+5);\
80 register double A13=*(MAT+6);\
81 register double A23=*(MAT+7);\
82 register double A33=*(MAT+8);\
83 *(R+0)-=A11 * S1 + A12 * S2 + A13 * S3; \
84 *(R+1)-=A21 * S1 + A22 * S2 + A23 * S3;\
85 *(R+2)-=A31 * S1 + A32 * S2 + A33 * S3;\
86 }
87 
88 
89 /* Paso_BlockOps_SMV_N */
90 
91 #ifdef USE_LAPACK
92  #define Paso_BlockOps_SMV_N(_N_, _R_, _MAT_, _V_) cblas_dgemv(CblasColMajor,CblasNoTrans,_N_,_N_, -1., _MAT_, _N_, _V_, 1, (1.), _R_, 1)
93 #else
94  #define Paso_BlockOps_SMV_N(N, R, MAT, V) PASO_MISSING_CLAPACK
95 #endif
96 
97 #ifdef USE_LAPACK
98  #define Paso_BlockOps_MV_N(_N_, _R_, _MAT_, _V_) cblas_dgemv(CblasColMajor,CblasNoTrans,_N_,_N_, 1., _MAT_, _N_, _V_, 1, (0.), _R_, 1)
99  #else
100  #define Paso_BlockOps_MV_N(N, R, MAT, V) PASO_MISSING_CLAPACK
101 #endif
102 
103 #define Paso_BlockOps_invM_2(invA, A, failed) \
104 {\
105  register double A11=*(A+0);\
106  register double A12=*(A+2);\
107  register double A21=*(A+1);\
108  register double A22=*(A+3);\
109  register double D = A11*A22-A12*A21; \
110  if (ABS(D) > 0 ){\
111  D=1./D;\
112  *(invA)= A22*D;\
113  *(invA+1)=-A21*D;\
114  *(invA+2)=-A12*D;\
115  *(invA+3)= A11*D;\
116  } else {\
117  *failed=1;\
118  }\
119 }
120 
121 #define Paso_BlockOps_invM_3(invA, A, failed) \
122 {\
123  register double A11=*(A+0);\
124  register double A21=*(A+1);\
125  register double A31=*(A+2);\
126  register double A12=*(A+3);\
127  register double A22=*(A+4);\
128  register double A32=*(A+5);\
129  register double A13=*(A+6);\
130  register double A23=*(A+7);\
131  register double A33=*(A+8);\
132  register double D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);\
133  if (ABS(D) > 0 ){\
134  D=1./D;\
135  *(invA )=(A22*A33-A23*A32)*D;\
136  *(invA+1)=(A31*A23-A21*A33)*D;\
137  *(invA+2)=(A21*A32-A31*A22)*D;\
138  *(invA+3)=(A13*A32-A12*A33)*D;\
139  *(invA+4)=(A11*A33-A31*A13)*D;\
140  *(invA+5)=(A12*A31-A11*A32)*D;\
141  *(invA+6)=(A12*A23-A13*A22)*D;\
142  *(invA+7)=(A13*A21-A11*A23)*D;\
143  *(invA+8)=(A11*A22-A12*A21)*D;\
144  } else {\
145  *failed=1;\
146  }\
147 }
148 
149 #ifdef USE_LAPACK
150  #ifdef MKL_LAPACK
151  #define Paso_BlockOps_invM_N(N, MAT, PIVOT,failed) \
152  {\
153  int NN=N; \
154  int res =0; \
155  dgetrf(&NN,&NN,MAT,&NN,PIVOT,&res); \
156  if (res!=0) *failed=1;\
157  }
158  #else
159  #define Paso_BlockOps_invM_N(N, MAT, PIVOT,failed) \
160  {\
161  int res=clapack_dgetrf(CblasColMajor, N,N,MAT,N, PIVOT); \
162  if (res!=0) *failed=1;\
163  }
164  #endif
165 #else
166  #define Paso_BlockOps_invM_N(N, MAT, PIVOT,failed) PASO_MISSING_CLAPACK
167 #endif
168 
169 #ifdef USE_LAPACK
170  #ifdef MKL_LAPACK
171  #define Paso_BlockOps_solve_N(N, X, MAT, PIVOT,failed) \
172  {\
173  int NN=N; \
174  int res =0; \
175  int ONE=1; \
176  dgetrs("N", &NN, &ONE, MAT, &NN, PIVOT, X, &NN, &res); \
177  if (res!=0) *failed=1;\
178  }
179  #else
180  #define Paso_BlockOps_solve_N(N, X, MAT, PIVOT,failed) \
181  {\
182  int res=clapack_dgetrs(CblasColMajor, CblasNoTrans, N,1,MAT,N, PIVOT, X, N); \
183  if (res!=0) *failed=1;\
184  }
185  #endif
186 #else
187  #define Paso_BlockOps_solve_N(N, X, MAT, PIVOT,failed) PASO_MISSING_CLAPACK
188 #endif
189 
190 
191 /* inplace matrix vector product */
192 
193 #define Paso_BlockOps_MViP_2(MAT, V) \
194 { \
195  register double S1=*(V+0);\
196  register double S2=*(V+1);\
197  register double A11=*(MAT+0);\
198  register double A12=*(MAT+2);\
199  register double A21=*(MAT+1);\
200  register double A22=*(MAT+3);\
201  *(V+0) = A11 * S1 + A12 * S2;\
202  *(V+1) = A21 * S1 + A22 * S2;\
203 }
204 
205 
206 #define Paso_BlockOps_MViP_3(MAT, V) \
207 { \
208  register double S1=*(V+0);\
209  register double S2=*(V+1);\
210  register double S3=*(V+2);\
211  register double A11=*(MAT+0);\
212  register double A21=*(MAT+1);\
213  register double A31=*(MAT+2);\
214  register double A12=*(MAT+3);\
215  register double A22=*(MAT+4);\
216  register double A32=*(MAT+5);\
217  register double A13=*(MAT+6);\
218  register double A23=*(MAT+7);\
219  register double A33=*(MAT+8);\
220  *(V+0)=A11 * S1 + A12 * S2 + A13 * S3; \
221  *(V+1)=A21 * S1 + A22 * S2 + A23 * S3;\
222  *(V+2)=A31 * S1 + A32 * S2 + A33 * S3;\
223 }
224 
225 
226 #endif /* #INC_Paso_BlockOpsOPS */