escript  Revision_4925
LocalOps.h
Go to the documentation of this file.
1 
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16 
17 
18 #if !defined escript_LocalOps_H
19 #define escript_LocalOps_H
20 #if defined(_WIN32) && defined(__INTEL_COMPILER)
21 # include <mathimf.h>
22 #else
23 # include <cmath>
24 #endif
25 #ifndef M_PI
26 # define M_PI 3.14159265358979323846 /* pi */
27 #endif
28 
29 
38 namespace escript {
39 
44 inline
45 bool nancheck(double d)
46 {
47  // Q: so why not just test d!=d?
48  // A: Coz it doesn't always work [I've checked].
49  // One theory is that the optimizer skips the test.
50 #ifdef isnan
51  return isnan(d);
52 #elif defined _isnan
53  return _isnan(d);
54 #else
55  return false;
56 #endif
57 }
58 
63 inline
64 double makeNaN()
65 {
66 #ifdef nan
67  return nan("");
68 #else
69  return sqrt(-1.);
70 #endif
71 
72 }
73 
74 
82 inline
83 void eigenvalues1(const double A00,double* ev0) {
84 
85  *ev0=A00;
86 
87 }
98 inline
99 void eigenvalues2(const double A00,const double A01,const double A11,
100  double* ev0, double* ev1) {
101  const register double trA=(A00+A11)/2.;
102  const register double A_00=A00-trA;
103  const register double A_11=A11-trA;
104  const register double s=sqrt(A01*A01-A_00*A_11);
105  *ev0=trA-s;
106  *ev1=trA+s;
107 }
122 inline
123 void eigenvalues3(const double A00, const double A01, const double A02,
124  const double A11, const double A12,
125  const double A22,
126  double* ev0, double* ev1,double* ev2) {
127 
128  const register double trA=(A00+A11+A22)/3.;
129  const register double A_00=A00-trA;
130  const register double A_11=A11-trA;
131  const register double A_22=A22-trA;
132  const register double A01_2=A01*A01;
133  const register double A02_2=A02*A02;
134  const register double A12_2=A12*A12;
135  const register double p=A02_2+A12_2+A01_2+(A_00*A_00+A_11*A_11+A_22*A_22)/2.;
136  if (p<=0.) {
137  *ev2=trA;
138  *ev1=trA;
139  *ev0=trA;
140 
141  } else {
142  const register double q=(A02_2*A_11+A12_2*A_00+A01_2*A_22)-(A_00*A_11*A_22+2*A01*A12*A02);
143  const register double sq_p=sqrt(p/3.);
144  register double z=-q/(2*pow(sq_p,3));
145  if (z<-1.) {
146  z=-1.;
147  } else if (z>1.) {
148  z=1.;
149  }
150  const register double alpha_3=acos(z)/3.;
151  *ev2=trA+2.*sq_p*cos(alpha_3);
152  *ev1=trA-2.*sq_p*cos(alpha_3+M_PI/3.);
153  *ev0=trA-2.*sq_p*cos(alpha_3-M_PI/3.);
154  }
155 }
165 inline
166 void eigenvalues_and_eigenvectors1(const double A00,double* ev0,double* V00,const double tol)
167 {
168  eigenvalues1(A00,ev0);
169  *V00=1.;
170  return;
171 }
183 inline
184 void vectorInKernel2(const double A00,const double A10,const double A01,const double A11,
185  double* V0, double*V1)
186 {
187  register double absA00=fabs(A00);
188  register double absA10=fabs(A10);
189  register double absA01=fabs(A01);
190  register double absA11=fabs(A11);
191  register double m=absA11>absA10 ? absA11 : absA10;
192  if (absA00>m || absA01>m) {
193  *V0=-A01;
194  *V1=A00;
195  } else {
196  if (m<=0) {
197  *V0=1.;
198  *V1=0.;
199  } else {
200  *V0=A11;
201  *V1=-A10;
202  }
203  }
204 }
223 inline
224 void vectorInKernel3__nonZeroA00(const double A00,const double A10,const double A20,
225  const double A01,const double A11,const double A21,
226  const double A02,const double A12,const double A22,
227  double* V0,double* V1,double* V2)
228 {
229  double TEMP0,TEMP1;
230  register const double I00=1./A00;
231  register const double IA10=I00*A10;
232  register const double IA20=I00*A20;
233  vectorInKernel2(A11-IA10*A01,A12-IA10*A02,
234  A21-IA20*A01,A22-IA20*A02,&TEMP0,&TEMP1);
235  *V0=-(A10*TEMP0+A20*TEMP1);
236  *V1=A00*TEMP0;
237  *V2=A00*TEMP1;
238 }
239 
257 inline
258 void eigenvalues_and_eigenvectors2(const double A00,const double A01,const double A11,
259  double* ev0, double* ev1,
260  double* V00, double* V10, double* V01, double* V11,
261  const double tol)
262 {
263  double TEMP0,TEMP1;
264  eigenvalues2(A00,A01,A11,ev0,ev1);
265  const register double absev0=fabs(*ev0);
266  const register double absev1=fabs(*ev1);
267  register double max_ev=absev0>absev1 ? absev0 : absev1;
268  if (fabs((*ev0)-(*ev1))<tol*max_ev) {
269  *V00=1.;
270  *V10=0.;
271  *V01=0.;
272  *V11=1.;
273  } else {
274  vectorInKernel2(A00-(*ev0),A01,A01,A11-(*ev0),&TEMP0,&TEMP1);
275  const register double scale=1./sqrt(TEMP0*TEMP0+TEMP1*TEMP1);
276  if (TEMP0<0.) {
277  *V00=-TEMP0*scale;
278  *V10=-TEMP1*scale;
279  if (TEMP1<0.) {
280  *V01= *V10;
281  *V11=-(*V00);
282  } else {
283  *V01=-(*V10);
284  *V11= (*V00);
285  }
286  } else if (TEMP0>0.) {
287  *V00=TEMP0*scale;
288  *V10=TEMP1*scale;
289  if (TEMP1<0.) {
290  *V01=-(*V10);
291  *V11= (*V00);
292  } else {
293  *V01= (*V10);
294  *V11=-(*V00);
295  }
296  } else {
297  *V00=0.;
298  *V10=1;
299  *V11=0.;
300  *V01=1.;
301  }
302  }
303 }
312 inline
313 void normalizeVector3(double* V0,double* V1,double* V2)
314 {
315  register double s;
316  if (*V0>0) {
317  s=1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
318  *V0*=s;
319  *V1*=s;
320  *V2*=s;
321  } else if (*V0<0) {
322  s=-1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
323  *V0*=s;
324  *V1*=s;
325  *V2*=s;
326  } else {
327  if (*V1>0) {
328  s=1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
329  *V1*=s;
330  *V2*=s;
331  } else if (*V1<0) {
332  s=-1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
333  *V1*=s;
334  *V2*=s;
335  } else {
336  *V2=1.;
337  }
338  }
339 }
366 inline
367 void eigenvalues_and_eigenvectors3(const double A00, const double A01, const double A02,
368  const double A11, const double A12, const double A22,
369  double* ev0, double* ev1, double* ev2,
370  double* V00, double* V10, double* V20,
371  double* V01, double* V11, double* V21,
372  double* V02, double* V12, double* V22,
373  const double tol)
374 {
375  register const double absA01=fabs(A01);
376  register const double absA02=fabs(A02);
377  register const double m=absA01>absA02 ? absA01 : absA02;
378  if (m<=0) {
379  double TEMP_V00,TEMP_V10,TEMP_V01,TEMP_V11,TEMP_EV0,TEMP_EV1;
380  eigenvalues_and_eigenvectors2(A11,A12,A22,
381  &TEMP_EV0,&TEMP_EV1,
382  &TEMP_V00,&TEMP_V10,&TEMP_V01,&TEMP_V11,tol);
383  if (A00<=TEMP_EV0) {
384  *V00=1.;
385  *V10=0.;
386  *V20=0.;
387  *V01=0.;
388  *V11=TEMP_V00;
389  *V21=TEMP_V10;
390  *V02=0.;
391  *V12=TEMP_V01;
392  *V22=TEMP_V11;
393  *ev0=A00;
394  *ev1=TEMP_EV0;
395  *ev2=TEMP_EV1;
396  } else if (A00>TEMP_EV1) {
397  *V02=1.;
398  *V12=0.;
399  *V22=0.;
400  *V00=0.;
401  *V10=TEMP_V00;
402  *V20=TEMP_V10;
403  *V01=0.;
404  *V11=TEMP_V01;
405  *V21=TEMP_V11;
406  *ev0=TEMP_EV0;
407  *ev1=TEMP_EV1;
408  *ev2=A00;
409  } else {
410  *V01=1.;
411  *V11=0.;
412  *V21=0.;
413  *V00=0.;
414  *V10=TEMP_V00;
415  *V20=TEMP_V10;
416  *V02=0.;
417  *V12=TEMP_V01;
418  *V22=TEMP_V11;
419  *ev0=TEMP_EV0;
420  *ev1=A00;
421  *ev2=TEMP_EV1;
422  }
423  } else {
424  eigenvalues3(A00,A01,A02,A11,A12,A22,ev0,ev1,ev2);
425  const register double absev0=fabs(*ev0);
426  const register double absev1=fabs(*ev1);
427  const register double absev2=fabs(*ev2);
428  register double max_ev=absev0>absev1 ? absev0 : absev1;
429  max_ev=max_ev>absev2 ? max_ev : absev2;
430  const register double d_01=fabs((*ev0)-(*ev1));
431  const register double d_12=fabs((*ev1)-(*ev2));
432  const register double max_d=d_01>d_12 ? d_01 : d_12;
433  if (max_d<=tol*max_ev) {
434  *V00=1.;
435  *V10=0;
436  *V20=0;
437  *V01=0;
438  *V11=1.;
439  *V21=0;
440  *V02=0;
441  *V12=0;
442  *V22=1.;
443  } else {
444  const register double S00=A00-(*ev0);
445  const register double absS00=fabs(S00);
446  if (absS00>m) {
447  vectorInKernel3__nonZeroA00(S00,A01,A02,A01,A11-(*ev0),A12,A02,A12,A22-(*ev0),V00,V10,V20);
448  } else if (absA02<m) {
449  vectorInKernel3__nonZeroA00(A01,A11-(*ev0),A12,S00,A01,A02,A02,A12,A22-(*ev0),V00,V10,V20);
450  } else {
451  vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev0),S00,A01,A02,A01,A11-(*ev0),A12,V00,V10,V20);
452  }
453  normalizeVector3(V00,V10,V20);;
454  const register double T00=A00-(*ev2);
455  const register double absT00=fabs(T00);
456  if (absT00>m) {
457  vectorInKernel3__nonZeroA00(T00,A01,A02,A01,A11-(*ev2),A12,A02,A12,A22-(*ev2),V02,V12,V22);
458  } else if (absA02<m) {
459  vectorInKernel3__nonZeroA00(A01,A11-(*ev2),A12,T00,A01,A02,A02,A12,A22-(*ev2),V02,V12,V22);
460  } else {
461  vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev2),T00,A01,A02,A01,A11-(*ev2),A12,V02,V12,V22);
462  }
463  const register double dot=(*V02)*(*V00)+(*V12)*(*V10)+(*V22)*(*V20);
464  *V02-=dot*(*V00);
465  *V12-=dot*(*V10);
466  *V22-=dot*(*V20);
467  normalizeVector3(V02,V12,V22);
468  *V01=(*V10)*(*V22)-(*V12)*(*V20);
469  *V11=(*V20)*(*V02)-(*V00)*(*V22);
470  *V21=(*V00)*(*V12)-(*V02)*(*V10);
471  normalizeVector3(V01,V11,V21);
472  }
473  }
474 }
475 
476 // General tensor product: arg_2(SL x SR) = arg_0(SL x SM) * arg_1(SM x SR)
477 // SM is the product of the last axis_offset entries in arg_0.getShape().
478 inline
479 void matrix_matrix_product(const int SL, const int SM, const int SR, const double* A, const double* B, double* C, int transpose)
480 {
481  if (transpose == 0) {
482  for (int i=0; i<SL; i++) {
483  for (int j=0; j<SR; j++) {
484  double sum = 0.0;
485  for (int l=0; l<SM; l++) {
486  sum += A[i+SL*l] * B[l+SM*j];
487  }
488  C[i+SL*j] = sum;
489  }
490  }
491  }
492  else if (transpose == 1) {
493  for (int i=0; i<SL; i++) {
494  for (int j=0; j<SR; j++) {
495  double sum = 0.0;
496  for (int l=0; l<SM; l++) {
497  sum += A[i*SM+l] * B[l+SM*j];
498  }
499  C[i+SL*j] = sum;
500  }
501  }
502  }
503  else if (transpose == 2) {
504  for (int i=0; i<SL; i++) {
505  for (int j=0; j<SR; j++) {
506  double sum = 0.0;
507  for (int l=0; l<SM; l++) {
508  sum += A[i+SL*l] * B[l*SR+j];
509  }
510  C[i+SL*j] = sum;
511  }
512  }
513  }
514 }
515 
516 template <typename UnaryFunction>
517 inline void tensor_unary_operation(const int size,
518  const double *arg1,
519  double * argRes,
520  UnaryFunction operation)
521 {
522  for (int i = 0; i < size; ++i) {
523  argRes[i] = operation(arg1[i]);
524  }
525  return;
526 }
527 
528 template <typename BinaryFunction>
529 inline void tensor_binary_operation(const int size,
530  const double *arg1,
531  const double *arg2,
532  double * argRes,
533  BinaryFunction operation)
534 {
535  for (int i = 0; i < size; ++i) {
536  argRes[i] = operation(arg1[i], arg2[i]);
537  }
538  return;
539 }
540 
541 template <typename BinaryFunction>
542 inline void tensor_binary_operation(const int size,
543  double arg1,
544  const double *arg2,
545  double *argRes,
546  BinaryFunction operation)
547 {
548  for (int i = 0; i < size; ++i) {
549  argRes[i] = operation(arg1, arg2[i]);
550  }
551  return;
552 }
553 
554 template <typename BinaryFunction>
555 inline void tensor_binary_operation(const int size,
556  const double *arg1,
557  double arg2,
558  double *argRes,
559  BinaryFunction operation)
560 {
561  for (int i = 0; i < size; ++i) {
562  argRes[i] = operation(arg1[i], arg2);
563  }
564  return;
565 }
566 
567 } // end of namespace
568 #endif
void tensor_unary_operation(const int size, const double *arg1, double *argRes, UnaryFunction operation)
Definition: LocalOps.h:517
void transpose(const DataTypes::ValueType &in, const DataTypes::ShapeType &inShape, DataTypes::ValueType::size_type inOffset, DataTypes::ValueType &ev, const DataTypes::ShapeType &evShape, DataTypes::ValueType::size_type evOffset, int axis_offset)
Transpose each data point of this Data object around the given axis.
Definition: DataMaths.h:394
void vectorInKernel2(const double A00, const double A10, const double A01, const double A11, double *V0, double *V1)
returns a non-zero vector in the kernel of [[A00,A01],[A01,A11]] assuming that the kernel dimension i...
Definition: LocalOps.h:184
double makeNaN()
returns a NaN.
Definition: LocalOps.h:64
#define M_PI
Definition: LocalOps.h:26
void eigenvalues2(const double A00, const double A01, const double A11, double *ev0, double *ev1)
solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A
Definition: LocalOps.h:99
bool nancheck(double d)
acts as a wrapper to isnan.
Definition: LocalOps.h:45
void scale(dim_t N, double *x, double a)
x = a*x
Definition: PasoUtil.h:93
void matrix_matrix_product(const int SL, const int SM, const int SR, const double *A, const double *B, double *C, int transpose)
Definition: LocalOps.h:479
void eigenvalues_and_eigenvectors1(const double A00, double *ev0, double *V00, const double tol)
solves a 1x1 eigenvalue A*V=ev*V problem for symmetric A
Definition: LocalOps.h:166
void eigenvalues1(const double A00, double *ev0)
solves a 1x1 eigenvalue A*V=ev*V problem
Definition: LocalOps.h:83
void normalizeVector3(double *V0, double *V1, double *V2)
nomalizes a 3-d vector such that length is one and first non-zero component is positive.
Definition: LocalOps.h:313
void eigenvalues_and_eigenvectors2(const double A00, const double A01, const double A11, double *ev0, double *ev1, double *V00, double *V10, double *V01, double *V11, const double tol)
solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are ordered by increasing valu...
Definition: LocalOps.h:258
void eigenvalues3(const double A00, const double A01, const double A02, const double A11, const double A12, const double A22, double *ev0, double *ev1, double *ev2)
solves a 3x3 eigenvalue A*V=ev*V problem for symmetric A
Definition: LocalOps.h:123
void eigenvalues_and_eigenvectors3(const double A00, const double A01, const double A02, const double A11, const double A12, const double A22, double *ev0, double *ev1, double *ev2, double *V00, double *V10, double *V20, double *V01, double *V11, double *V21, double *V02, double *V12, double *V22, const double tol)
solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are ordered by increasing valu...
Definition: LocalOps.h:367
void vectorInKernel3__nonZeroA00(const double A00, const double A10, const double A20, const double A01, const double A11, const double A21, const double A02, const double A12, const double A22, double *V0, double *V1, double *V2)
returns a non-zero vector in the kernel of [[A00,A01,A02],[A10,A11,A12],[A20,A21,A22]] assuming that ...
Definition: LocalOps.h:224
void tensor_binary_operation(const int size, const double *arg1, const double *arg2, double *argRes, BinaryFunction operation)
Definition: LocalOps.h:529