00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #if !defined escript_LocalOps_H
00016 #define escript_LocalOps_H
00017 #if defined(_WIN32) && defined(__INTEL_COMPILER)
00018 # include <mathimf.h>
00019 #else
00020 # include <math.h>
00021 #endif
00022 #ifndef M_PI
00023 # define M_PI 3.14159265358979323846
00024 #endif
00025
00026
00035 namespace escript {
00036
00037
00045 inline
00046 void eigenvalues1(const double A00,double* ev0) {
00047
00048 *ev0=A00;
00049
00050 }
00061 inline
00062 void eigenvalues2(const double A00,const double A01,const double A11,
00063 double* ev0, double* ev1) {
00064 const register double trA=(A00+A11)/2.;
00065 const register double A_00=A00-trA;
00066 const register double A_11=A11-trA;
00067 const register double s=sqrt(A01*A01-A_00*A_11);
00068 *ev0=trA-s;
00069 *ev1=trA+s;
00070 }
00085 inline
00086 void eigenvalues3(const double A00, const double A01, const double A02,
00087 const double A11, const double A12,
00088 const double A22,
00089 double* ev0, double* ev1,double* ev2) {
00090
00091 const register double trA=(A00+A11+A22)/3.;
00092 const register double A_00=A00-trA;
00093 const register double A_11=A11-trA;
00094 const register double A_22=A22-trA;
00095 const register double A01_2=A01*A01;
00096 const register double A02_2=A02*A02;
00097 const register double A12_2=A12*A12;
00098 const register double p=A02_2+A12_2+A01_2+(A_00*A_00+A_11*A_11+A_22*A_22)/2.;
00099 if (p<=0.) {
00100 *ev2=trA;
00101 *ev1=trA;
00102 *ev0=trA;
00103
00104 } else {
00105 const register double q=(A02_2*A_11+A12_2*A_00+A01_2*A_22)-(A_00*A_11*A_22+2*A01*A12*A02);
00106 const register double sq_p=sqrt(p/3.);
00107 register double z=-q/(2*pow(sq_p,3));
00108 if (z<-1.) {
00109 z=-1.;
00110 } else if (z>1.) {
00111 z=1.;
00112 }
00113 const register double alpha_3=acos(z)/3.;
00114 *ev2=trA+2.*sq_p*cos(alpha_3);
00115 *ev1=trA-2.*sq_p*cos(alpha_3+M_PI/3.);
00116 *ev0=trA-2.*sq_p*cos(alpha_3-M_PI/3.);
00117 }
00118 }
00128 inline
00129 void eigenvalues_and_eigenvectors1(const double A00,double* ev0,double* V00,const double tol)
00130 {
00131 eigenvalues1(A00,ev0);
00132 *V00=1.;
00133 return;
00134 }
00146 inline
00147 void vectorInKernel2(const double A00,const double A10,const double A01,const double A11,
00148 double* V0, double*V1)
00149 {
00150 register double absA00=fabs(A00);
00151 register double absA10=fabs(A10);
00152 register double absA01=fabs(A01);
00153 register double absA11=fabs(A11);
00154 register double m=absA11>absA10 ? absA11 : absA10;
00155 if (absA00>m || absA01>m) {
00156 *V0=-A01;
00157 *V1=A00;
00158 } else {
00159 if (m<=0) {
00160 *V0=1.;
00161 *V1=0.;
00162 } else {
00163 *V0=A11;
00164 *V1=-A10;
00165 }
00166 }
00167 }
00186 inline
00187 void vectorInKernel3__nonZeroA00(const double A00,const double A10,const double A20,
00188 const double A01,const double A11,const double A21,
00189 const double A02,const double A12,const double A22,
00190 double* V0,double* V1,double* V2)
00191 {
00192 double TEMP0,TEMP1;
00193 register const double I00=1./A00;
00194 register const double IA10=I00*A10;
00195 register const double IA20=I00*A20;
00196 vectorInKernel2(A11-IA10*A01,A12-IA10*A02,
00197 A21-IA20*A01,A22-IA20*A02,&TEMP0,&TEMP1);
00198 *V0=-(A10*TEMP0+A20*TEMP1);
00199 *V1=A00*TEMP0;
00200 *V2=A00*TEMP1;
00201 }
00202
00220 inline
00221 void eigenvalues_and_eigenvectors2(const double A00,const double A01,const double A11,
00222 double* ev0, double* ev1,
00223 double* V00, double* V10, double* V01, double* V11,
00224 const double tol)
00225 {
00226 double TEMP0,TEMP1;
00227 eigenvalues2(A00,A01,A11,ev0,ev1);
00228 const register double absev0=fabs(*ev0);
00229 const register double absev1=fabs(*ev1);
00230 register double max_ev=absev0>absev1 ? absev0 : absev1;
00231 if (fabs((*ev0)-(*ev1))<tol*max_ev) {
00232 *V00=1.;
00233 *V10=0.;
00234 *V01=0.;
00235 *V11=1.;
00236 } else {
00237 vectorInKernel2(A00-(*ev0),A01,A01,A11-(*ev0),&TEMP0,&TEMP1);
00238 const register double scale=1./sqrt(TEMP0*TEMP0+TEMP1*TEMP1);
00239 if (TEMP0<0.) {
00240 *V00=-TEMP0*scale;
00241 *V10=-TEMP1*scale;
00242 if (TEMP1<0.) {
00243 *V01= *V10;
00244 *V11=-(*V00);
00245 } else {
00246 *V01=-(*V10);
00247 *V11= (*V10);
00248 }
00249 } else if (TEMP0>0.) {
00250 *V00=TEMP0*scale;
00251 *V10=TEMP1*scale;
00252 if (TEMP1<0.) {
00253 *V01=-(*V10);
00254 *V11= (*V00);
00255 } else {
00256 *V01= (*V10);
00257 *V11=-(*V00);
00258 }
00259 } else {
00260 *V00=0.;
00261 *V10=1;
00262 *V11=0.;
00263 *V01=1.;
00264 }
00265 }
00266 }
00275 inline
00276 void normalizeVector3(double* V0,double* V1,double* V2)
00277 {
00278 register double s;
00279 if (*V0>0) {
00280 s=1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
00281 *V0*=s;
00282 *V1*=s;
00283 *V2*=s;
00284 } else if (*V0<0) {
00285 s=-1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
00286 *V0*=s;
00287 *V1*=s;
00288 *V2*=s;
00289 } else {
00290 if (*V1>0) {
00291 s=1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
00292 *V1*=s;
00293 *V2*=s;
00294 } else if (*V1<0) {
00295 s=-1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
00296 *V1*=s;
00297 *V2*=s;
00298 } else {
00299 *V2=1.;
00300 }
00301 }
00302 }
00329 inline
00330 void eigenvalues_and_eigenvectors3(const double A00, const double A01, const double A02,
00331 const double A11, const double A12, const double A22,
00332 double* ev0, double* ev1, double* ev2,
00333 double* V00, double* V10, double* V20,
00334 double* V01, double* V11, double* V21,
00335 double* V02, double* V12, double* V22,
00336 const double tol)
00337 {
00338 register const double absA01=fabs(A01);
00339 register const double absA02=fabs(A02);
00340 register const double m=absA01>absA02 ? absA01 : absA02;
00341 if (m<=0) {
00342 double TEMP_V00,TEMP_V10,TEMP_V01,TEMP_V11,TEMP_EV0,TEMP_EV1;
00343 eigenvalues_and_eigenvectors2(A11,A12,A22,
00344 &TEMP_EV0,&TEMP_EV1,
00345 &TEMP_V00,&TEMP_V10,&TEMP_V01,&TEMP_V11,tol);
00346 if (A00<=TEMP_EV0) {
00347 *V00=1.;
00348 *V10=0.;
00349 *V20=0.;
00350 *V01=0.;
00351 *V11=TEMP_V00;
00352 *V21=TEMP_V10;
00353 *V02=0.;
00354 *V12=TEMP_V01;
00355 *V22=TEMP_V11;
00356 *ev0=A00;
00357 *ev1=TEMP_EV0;
00358 *ev2=TEMP_EV1;
00359 } else if (A00>TEMP_EV1) {
00360 *V02=1.;
00361 *V12=0.;
00362 *V22=0.;
00363 *V00=0.;
00364 *V10=TEMP_V00;
00365 *V20=TEMP_V10;
00366 *V01=0.;
00367 *V11=TEMP_V01;
00368 *V21=TEMP_V11;
00369 *ev0=TEMP_EV0;
00370 *ev1=TEMP_EV1;
00371 *ev2=A00;
00372 } else {
00373 *V01=1.;
00374 *V11=0.;
00375 *V21=0.;
00376 *V00=0.;
00377 *V10=TEMP_V00;
00378 *V20=TEMP_V10;
00379 *V02=0.;
00380 *V12=TEMP_V01;
00381 *V22=TEMP_V11;
00382 *ev0=TEMP_EV0;
00383 *ev1=A00;
00384 *ev2=TEMP_EV1;
00385 }
00386 } else {
00387 eigenvalues3(A00,A01,A02,A11,A12,A22,ev0,ev1,ev2);
00388 const register double absev0=fabs(*ev0);
00389 const register double absev1=fabs(*ev1);
00390 const register double absev2=fabs(*ev2);
00391 register double max_ev=absev0>absev1 ? absev0 : absev1;
00392 max_ev=max_ev>absev2 ? max_ev : absev2;
00393 const register double d_01=fabs((*ev0)-(*ev1));
00394 const register double d_12=fabs((*ev1)-(*ev2));
00395 const register double max_d=d_01>d_12 ? d_01 : d_12;
00396 if (max_d<=tol*max_ev) {
00397 *V00=1.;
00398 *V10=0;
00399 *V20=0;
00400 *V01=0;
00401 *V11=1.;
00402 *V21=0;
00403 *V02=0;
00404 *V12=0;
00405 *V22=1.;
00406 } else {
00407 const register double S00=A00-(*ev0);
00408 const register double absS00=fabs(S00);
00409 if (absS00>m) {
00410 vectorInKernel3__nonZeroA00(S00,A01,A02,A01,A11-(*ev0),A12,A02,A12,A22-(*ev0),V00,V10,V20);
00411 } else if (absA02<m) {
00412 vectorInKernel3__nonZeroA00(A01,A11-(*ev0),A12,S00,A01,A02,A02,A12,A22-(*ev0),V00,V10,V20);
00413 } else {
00414 vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev0),S00,A01,A02,A01,A11-(*ev0),A12,V00,V10,V20);
00415 }
00416 normalizeVector3(V00,V10,V20);;
00417 const register double T00=A00-(*ev2);
00418 const register double absT00=fabs(T00);
00419 if (absT00>m) {
00420 vectorInKernel3__nonZeroA00(T00,A01,A02,A01,A11-(*ev2),A12,A02,A12,A22-(*ev2),V02,V12,V22);
00421 } else if (absA02<m) {
00422 vectorInKernel3__nonZeroA00(A01,A11-(*ev2),A12,T00,A01,A02,A02,A12,A22-(*ev2),V02,V12,V22);
00423 } else {
00424 vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev2),T00,A01,A02,A01,A11-(*ev2),A12,V02,V12,V22);
00425 }
00426 const register double dot=(*V02)*(*V00)+(*V12)*(*V10)+(*V22)*(*V20);
00427 *V02-=dot*(*V00);
00428 *V12-=dot*(*V10);
00429 *V22-=dot*(*V20);
00430 normalizeVector3(V02,V12,V22);
00431 *V01=(*V10)*(*V22)-(*V12)*(*V20);
00432 *V11=(*V20)*(*V02)-(*V00)*(*V22);
00433 *V21=(*V00)*(*V12)-(*V02)*(*V10);
00434 normalizeVector3(V01,V11,V21);
00435 }
00436 }
00437 }
00438
00439
00440
00441 inline
00442 void matrix_matrix_product(const int SL, const int SM, const int SR, const double* A, const double* B, double* C, int transpose)
00443 {
00444 if (transpose == 0) {
00445 for (int i=0; i<SL; i++) {
00446 for (int j=0; j<SR; j++) {
00447 double sum = 0.0;
00448 for (int l=0; l<SM; l++) {
00449 sum += A[i+SL*l] * B[l+SM*j];
00450 }
00451 C[i+SL*j] = sum;
00452 }
00453 }
00454 }
00455 else if (transpose == 1) {
00456 for (int i=0; i<SL; i++) {
00457 for (int j=0; j<SR; j++) {
00458 double sum = 0.0;
00459 for (int l=0; l<SM; l++) {
00460 sum += A[i*SM+l] * B[l+SM*j];
00461 }
00462 C[i+SL*j] = sum;
00463 }
00464 }
00465 }
00466 else if (transpose == 2) {
00467 for (int i=0; i<SL; i++) {
00468 for (int j=0; j<SR; j++) {
00469 double sum = 0.0;
00470 for (int l=0; l<SM; l++) {
00471 sum += A[i+SL*l] * B[l*SR+j];
00472 }
00473 C[i+SL*j] = sum;
00474 }
00475 }
00476 }
00477 }
00478
00479 template <typename UnaryFunction>
00480 inline void tensor_unary_operation(const int size,
00481 const double *arg1,
00482 double * argRes,
00483 UnaryFunction operation)
00484 {
00485 for (int i = 0; i < size; ++i) {
00486 argRes[i] = operation(arg1[i]);
00487 }
00488 return;
00489 }
00490
00491 template <typename BinaryFunction>
00492 inline void tensor_binary_operation(const int size,
00493 const double *arg1,
00494 const double *arg2,
00495 double * argRes,
00496 BinaryFunction operation)
00497 {
00498 for (int i = 0; i < size; ++i) {
00499 argRes[i] = operation(arg1[i], arg2[i]);
00500 }
00501 return;
00502 }
00503
00504 template <typename BinaryFunction>
00505 inline void tensor_binary_operation(const int size,
00506 double arg1,
00507 const double *arg2,
00508 double *argRes,
00509 BinaryFunction operation)
00510 {
00511 for (int i = 0; i < size; ++i) {
00512 argRes[i] = operation(arg1, arg2[i]);
00513 }
00514 return;
00515 }
00516
00517 template <typename BinaryFunction>
00518 inline void tensor_binary_operation(const int size,
00519 const double *arg1,
00520 double arg2,
00521 double *argRes,
00522 BinaryFunction operation)
00523 {
00524 for (int i = 0; i < size; ++i) {
00525 argRes[i] = operation(arg1[i], arg2);
00526 }
00527 return;
00528 }
00529
00530 }
00531 #endif