17 #if !defined escript_LocalOps_H
18 #define escript_LocalOps_H
19 #if defined(_WIN32) && defined(__INTEL_COMPILER)
25 # define M_PI 3.14159265358979323846
98 void eigenvalues2(
const double A00,
const double A01,
const double A11,
99 double* ev0,
double* ev1) {
100 const register double trA=(A00+A11)/2.;
101 const register double A_00=A00-trA;
102 const register double A_11=A11-trA;
103 const register double s=sqrt(A01*A01-A_00*A_11);
122 void eigenvalues3(
const double A00,
const double A01,
const double A02,
123 const double A11,
const double A12,
125 double* ev0,
double* ev1,
double* ev2) {
127 const register double trA=(A00+A11+A22)/3.;
128 const register double A_00=A00-trA;
129 const register double A_11=A11-trA;
130 const register double A_22=A22-trA;
131 const register double A01_2=A01*A01;
132 const register double A02_2=A02*A02;
133 const register double A12_2=A12*A12;
134 const register double p=A02_2+A12_2+A01_2+(A_00*A_00+A_11*A_11+A_22*A_22)/2.;
141 const register double q=(A02_2*A_11+A12_2*A_00+A01_2*A_22)-(A_00*A_11*A_22+2*A01*A12*A02);
142 const register double sq_p=sqrt(p/3.);
143 register double z=-q/(2*pow(sq_p,3));
149 const register double alpha_3=acos(z)/3.;
150 *ev2=trA+2.*sq_p*cos(alpha_3);
151 *ev1=trA-2.*sq_p*cos(alpha_3+
M_PI/3.);
152 *ev0=trA-2.*sq_p*cos(alpha_3-
M_PI/3.);
183 void vectorInKernel2(
const double A00,
const double A10,
const double A01,
const double A11,
184 double* V0,
double*V1)
186 register double absA00=fabs(A00);
187 register double absA10=fabs(A10);
188 register double absA01=fabs(A01);
189 register double absA11=fabs(A11);
190 register double m=absA11>absA10 ? absA11 : absA10;
191 if (absA00>m || absA01>m) {
224 const double A01,
const double A11,
const double A21,
225 const double A02,
const double A12,
const double A22,
226 double* V0,
double* V1,
double* V2)
229 register const double I00=1./A00;
230 register const double IA10=I00*A10;
231 register const double IA20=I00*A20;
233 A21-IA20*A01,A22-IA20*A02,&TEMP0,&TEMP1);
234 *V0=-(A10*TEMP0+A20*TEMP1);
258 double* ev0,
double* ev1,
259 double* V00,
double* V10,
double* V01,
double* V11,
264 const register double absev0=fabs(*ev0);
265 const register double absev1=fabs(*ev1);
266 register double max_ev=absev0>absev1 ? absev0 : absev1;
267 if (fabs((*ev0)-(*ev1))<tol*max_ev) {
274 const register double scale=1./sqrt(TEMP0*TEMP0+TEMP1*TEMP1);
285 }
else if (TEMP0>0.) {
316 s=1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
321 s=-1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
327 s=1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
331 s=-1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
367 const double A11,
const double A12,
const double A22,
368 double* ev0,
double* ev1,
double* ev2,
369 double* V00,
double* V10,
double* V20,
370 double* V01,
double* V11,
double* V21,
371 double* V02,
double* V12,
double* V22,
374 register const double absA01=fabs(A01);
375 register const double absA02=fabs(A02);
376 register const double m=absA01>absA02 ? absA01 : absA02;
378 double TEMP_V00,TEMP_V10,TEMP_V01,TEMP_V11,TEMP_EV0,TEMP_EV1;
381 &TEMP_V00,&TEMP_V10,&TEMP_V01,&TEMP_V11,tol);
395 }
else if (A00>TEMP_EV1) {
424 const register double absev0=fabs(*ev0);
425 const register double absev1=fabs(*ev1);
426 const register double absev2=fabs(*ev2);
427 register double max_ev=absev0>absev1 ? absev0 : absev1;
428 max_ev=max_ev>absev2 ? max_ev : absev2;
429 const register double d_01=fabs((*ev0)-(*ev1));
430 const register double d_12=fabs((*ev1)-(*ev2));
431 const register double max_d=d_01>d_12 ? d_01 : d_12;
432 if (max_d<=tol*max_ev) {
443 const register double S00=A00-(*ev0);
444 const register double absS00=fabs(S00);
446 vectorInKernel3__nonZeroA00(S00,A01,A02,A01,A11-(*ev0),A12,A02,A12,A22-(*ev0),V00,V10,V20);
447 }
else if (absA02<m) {
448 vectorInKernel3__nonZeroA00(A01,A11-(*ev0),A12,S00,A01,A02,A02,A12,A22-(*ev0),V00,V10,V20);
450 vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev0),S00,A01,A02,A01,A11-(*ev0),A12,V00,V10,V20);
453 const register double T00=A00-(*ev2);
454 const register double absT00=fabs(T00);
456 vectorInKernel3__nonZeroA00(T00,A01,A02,A01,A11-(*ev2),A12,A02,A12,A22-(*ev2),V02,V12,V22);
457 }
else if (absA02<m) {
458 vectorInKernel3__nonZeroA00(A01,A11-(*ev2),A12,T00,A01,A02,A02,A12,A22-(*ev2),V02,V12,V22);
460 vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev2),T00,A01,A02,A01,A11-(*ev2),A12,V02,V12,V22);
462 const register double dot=(*V02)*(*V00)+(*V12)*(*V10)+(*V22)*(*V20);
467 *V01=(*V10)*(*V22)-(*V12)*(*V20);
468 *V11=(*V20)*(*V02)-(*V00)*(*V22);
469 *V21=(*V00)*(*V12)-(*V02)*(*V10);
480 if (transpose == 0) {
481 for (
int i=0; i<SL; i++) {
482 for (
int j=0; j<SR; j++) {
484 for (
int l=0; l<SM; l++) {
485 sum += A[i+SL*l] * B[l+SM*j];
491 else if (transpose == 1) {
492 for (
int i=0; i<SL; i++) {
493 for (
int j=0; j<SR; j++) {
495 for (
int l=0; l<SM; l++) {
496 sum += A[i*SM+l] * B[l+SM*j];
502 else if (transpose == 2) {
503 for (
int i=0; i<SL; i++) {
504 for (
int j=0; j<SR; j++) {
506 for (
int l=0; l<SM; l++) {
507 sum += A[i+SL*l] * B[l*SR+j];
515 template <
typename UnaryFunction>
519 UnaryFunction operation)
521 for (
int i = 0; i < size; ++i) {
522 argRes[i] = operation(arg1[i]);
527 template <
typename BinaryFunction>
532 BinaryFunction operation)
534 for (
int i = 0; i < size; ++i) {
535 argRes[i] = operation(arg1[i], arg2[i]);
540 template <
typename BinaryFunction>
545 BinaryFunction operation)
547 for (
int i = 0; i < size; ++i) {
548 argRes[i] = operation(arg1, arg2[i]);
553 template <
typename BinaryFunction>
558 BinaryFunction operation)
560 for (
int i = 0; i < size; ++i) {
561 argRes[i] = operation(arg1[i], arg2);