ESScript  Revision_
BlockOps.h
Go to the documentation of this file.
1 
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2013 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development since 2012 by School of Earth Sciences
13 *
14 *****************************************************************************/
15 
16 
17 #ifndef INC_PASO_BLOCKOPS
18 #define INC_PASO_BLOCKOPS
19 
20 #include "Common.h"
21 #include "Paso.h"
22 
23 #ifdef USE_LAPACK
24  #ifdef MKL_LAPACK
25  #include <mkl_lapack.h>
26  #include <mkl_cblas.h>
27  #else
28  extern "C"
29  {
30  #include <clapack.h>
31  #include <cblas.h>
32  }
33  #endif
34 #endif
35 
36 void Paso_BlockOps_solveAll(dim_t n_block,dim_t n,double* D,index_t* pivot,double* x);
37 
38 #define PASO_MISSING_CLAPACK Esys_setError(TYPE_ERROR, "You need to install a CLAPACK version to run a block size > 3.")
39 
40 
41 /* copy functions: */
42 #define Paso_BlockOps_Cpy_2(R, V) \
43 {\
44  *(R+0)=*(V+0);\
45  *(R+1)=*(V+1);\
46 }
47 
48 #define Paso_BlockOps_Cpy_3(R, V) \
49 {\
50  *(R+0)=*(V+0);\
51  *(R+1)=*(V+1);\
52  *(R+2)=*(V+2);\
53 }
54 
55 #define Paso_BlockOps_Cpy_N(N,R,V) memcpy((void*)R, (void*)V, ( (size_t) N ) * sizeof(double) )
56 
57 
58 /* operations R=R-MAT*V (V and R are not overlapping) */
59 
60 #define Paso_BlockOps_SMV_2(R,MAT, V) \
61 {\
62 register double S1=*(V+0); \
63 register double S2=*(V+1);\
64 register double A11=*(MAT+0);\
65 register double A12=*(MAT+2);\
66 register double A21=*(MAT+1);\
67 register double A22=*(MAT+3);\
68 *(R+0)-=A11 * S1 + A12 * S2;\
69 *(R+1)-=A21 * S1 + A22 * S2;\
70 }
71 
72 #define Paso_BlockOps_SMV_3(R, MAT, V) \
73 {\
74 register double S1=*(V+0);\
75 register double S2=*(V+1);\
76 register double S3=*(V+2);\
77 register double A11=*(MAT+0);\
78 register double A21=*(MAT+1);\
79 register double A31=*(MAT+2);\
80 register double A12=*(MAT+3);\
81 register double A22=*(MAT+4);\
82 register double A32=*(MAT+5);\
83 register double A13=*(MAT+6);\
84 register double A23=*(MAT+7);\
85 register double A33=*(MAT+8);\
86 *(R+0)-=A11 * S1 + A12 * S2 + A13 * S3; \
87 *(R+1)-=A21 * S1 + A22 * S2 + A23 * S3;\
88 *(R+2)-=A31 * S1 + A32 * S2 + A33 * S3;\
89 }
90 
91 
92 /* Paso_BlockOps_SMV_N */
93 
94 #ifdef USE_LAPACK
95  #define Paso_BlockOps_SMV_N(_N_, _R_, _MAT_, _V_) cblas_dgemv(CblasColMajor,CblasNoTrans,_N_,_N_, -1., _MAT_, _N_, _V_, 1, (1.), _R_, 1)
96 #else
97  #define Paso_BlockOps_SMV_N(N, R, MAT, V) PASO_MISSING_CLAPACK
98 #endif
99 
100 #ifdef USE_LAPACK
101  #define Paso_BlockOps_MV_N(_N_, _R_, _MAT_, _V_) cblas_dgemv(CblasColMajor,CblasNoTrans,_N_,_N_, 1., _MAT_, _N_, _V_, 1, (0.), _R_, 1)
102  #else
103  #define Paso_BlockOps_MV_N(N, R, MAT, V) PASO_MISSING_CLAPACK
104 #endif
105 
106 #define Paso_BlockOps_invM_2(invA, A, failed) \
107 {\
108  register double A11=*(A+0);\
109  register double A12=*(A+2);\
110  register double A21=*(A+1);\
111  register double A22=*(A+3);\
112  register double D = A11*A22-A12*A21; \
113  if (ABS(D) > 0 ){\
114  D=1./D;\
115  *(invA)= A22*D;\
116  *(invA+1)=-A21*D;\
117  *(invA+2)=-A12*D;\
118  *(invA+3)= A11*D;\
119  } else {\
120  *failed=1;\
121  }\
122 }
123 
124 #define Paso_BlockOps_invM_3(invA, A, failed) \
125 {\
126  register double A11=*(A+0);\
127  register double A21=*(A+1);\
128  register double A31=*(A+2);\
129  register double A12=*(A+3);\
130  register double A22=*(A+4);\
131  register double A32=*(A+5);\
132  register double A13=*(A+6);\
133  register double A23=*(A+7);\
134  register double A33=*(A+8);\
135  register double D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);\
136  if (ABS(D) > 0 ){\
137  D=1./D;\
138  *(invA )=(A22*A33-A23*A32)*D;\
139  *(invA+1)=(A31*A23-A21*A33)*D;\
140  *(invA+2)=(A21*A32-A31*A22)*D;\
141  *(invA+3)=(A13*A32-A12*A33)*D;\
142  *(invA+4)=(A11*A33-A31*A13)*D;\
143  *(invA+5)=(A12*A31-A11*A32)*D;\
144  *(invA+6)=(A12*A23-A13*A22)*D;\
145  *(invA+7)=(A13*A21-A11*A23)*D;\
146  *(invA+8)=(A11*A22-A12*A21)*D;\
147  } else {\
148  *failed=1;\
149  }\
150 }
151 
152 #ifdef USE_LAPACK
153  #ifdef MKL_LAPACK
154  #define Paso_BlockOps_invM_N(N, MAT, PIVOT,failed) \
155  {\
156  int NN=N; \
157  int res =0; \
158  dgetrf(&NN,&NN,MAT,&NN,PIVOT,&res); \
159  if (res!=0) *failed=1;\
160  }
161  #else
162  #define Paso_BlockOps_invM_N(N, MAT, PIVOT,failed) \
163  {\
164  int res=clapack_dgetrf(CblasColMajor, N,N,MAT,N, PIVOT); \
165  if (res!=0) *failed=1;\
166  }
167  #endif
168 #else
169  #define Paso_BlockOps_invM_N(N, MAT, PIVOT,failed) PASO_MISSING_CLAPACK
170 #endif
171 
172 #ifdef USE_LAPACK
173  #ifdef MKL_LAPACK
174  #define Paso_BlockOps_solve_N(N, X, MAT, PIVOT,failed) \
175  {\
176  int NN=N; \
177  int res =0; \
178  int ONE=1; \
179  dgetrs("N", &NN, &ONE, MAT, &NN, PIVOT, X, &NN, &res); \
180  if (res!=0) *failed=1;\
181  }
182  #else
183  #define Paso_BlockOps_solve_N(N, X, MAT, PIVOT,failed) \
184  {\
185  int res=clapack_dgetrs(CblasColMajor, CblasNoTrans, N,1,MAT,N, PIVOT, X, N); \
186  if (res!=0) *failed=1;\
187  }
188  #endif
189 #else
190  #define Paso_BlockOps_solve_N(N, X, MAT, PIVOT,failed) PASO_MISSING_CLAPACK
191 #endif
192 
193 
194 /* inplace matrix vector product */
195 
196 #define Paso_BlockOps_MViP_2(MAT, V) \
197 { \
198  register double S1=*(V+0);\
199  register double S2=*(V+1);\
200  register double A11=*(MAT+0);\
201  register double A12=*(MAT+2);\
202  register double A21=*(MAT+1);\
203  register double A22=*(MAT+3);\
204  *(V+0) = A11 * S1 + A12 * S2;\
205  *(V+1) = A21 * S1 + A22 * S2;\
206 }
207 
208 
209 #define Paso_BlockOps_MViP_3(MAT, V) \
210 { \
211  register double S1=*(V+0);\
212  register double S2=*(V+1);\
213  register double S3=*(V+2);\
214  register double A11=*(MAT+0);\
215  register double A21=*(MAT+1);\
216  register double A31=*(MAT+2);\
217  register double A12=*(MAT+3);\
218  register double A22=*(MAT+4);\
219  register double A32=*(MAT+5);\
220  register double A13=*(MAT+6);\
221  register double A23=*(MAT+7);\
222  register double A33=*(MAT+8);\
223  *(V+0)=A11 * S1 + A12 * S2 + A13 * S3; \
224  *(V+1)=A21 * S1 + A22 * S2 + A23 * S3;\
225  *(V+2)=A31 * S1 + A32 * S2 + A33 * S3;\
226 }
227 
228 
229 #endif /* #INC_Paso_BlockOpsOPS */