Escript  Revision_4320
LocalOps.h
Go to the documentation of this file.
1 
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2013 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development since 2012 by School of Earth Sciences
13 *
14 *****************************************************************************/
15 
16 
17 #if !defined escript_LocalOps_H
18 #define escript_LocalOps_H
19 #if defined(_WIN32) && defined(__INTEL_COMPILER)
20 # include <mathimf.h>
21 #else
22 # include <math.h>
23 #endif
24 #ifndef M_PI
25 # define M_PI 3.14159265358979323846 /* pi */
26 #endif
27 
28 
37 namespace escript {
38 
43 inline
44 bool nancheck(double d)
45 {
46  // Q: so why not just test d!=d?
47  // A: Coz it doesn't always work [I've checked].
48  // One theory is that the optimizer skips the test.
49 #ifdef isnan
50  return isnan(d);
51 #elif defined _isnan
52  return _isnan(d);
53 #else
54  return false;
55 #endif
56 }
57 
62 inline
63 double makeNaN()
64 {
65 #ifdef nan
66  return nan("");
67 #else
68  return sqrt(-1.);
69 #endif
70 
71 }
72 
73 
81 inline
82 void eigenvalues1(const double A00,double* ev0) {
83 
84  *ev0=A00;
85 
86 }
97 inline
98 void eigenvalues2(const double A00,const double A01,const double A11,
99  double* ev0, double* ev1) {
100  const register double trA=(A00+A11)/2.;
101  const register double A_00=A00-trA;
102  const register double A_11=A11-trA;
103  const register double s=sqrt(A01*A01-A_00*A_11);
104  *ev0=trA-s;
105  *ev1=trA+s;
106 }
121 inline
122 void eigenvalues3(const double A00, const double A01, const double A02,
123  const double A11, const double A12,
124  const double A22,
125  double* ev0, double* ev1,double* ev2) {
126 
127  const register double trA=(A00+A11+A22)/3.;
128  const register double A_00=A00-trA;
129  const register double A_11=A11-trA;
130  const register double A_22=A22-trA;
131  const register double A01_2=A01*A01;
132  const register double A02_2=A02*A02;
133  const register double A12_2=A12*A12;
134  const register double p=A02_2+A12_2+A01_2+(A_00*A_00+A_11*A_11+A_22*A_22)/2.;
135  if (p<=0.) {
136  *ev2=trA;
137  *ev1=trA;
138  *ev0=trA;
139 
140  } else {
141  const register double q=(A02_2*A_11+A12_2*A_00+A01_2*A_22)-(A_00*A_11*A_22+2*A01*A12*A02);
142  const register double sq_p=sqrt(p/3.);
143  register double z=-q/(2*pow(sq_p,3));
144  if (z<-1.) {
145  z=-1.;
146  } else if (z>1.) {
147  z=1.;
148  }
149  const register double alpha_3=acos(z)/3.;
150  *ev2=trA+2.*sq_p*cos(alpha_3);
151  *ev1=trA-2.*sq_p*cos(alpha_3+M_PI/3.);
152  *ev0=trA-2.*sq_p*cos(alpha_3-M_PI/3.);
153  }
154 }
164 inline
165 void eigenvalues_and_eigenvectors1(const double A00,double* ev0,double* V00,const double tol)
166 {
167  eigenvalues1(A00,ev0);
168  *V00=1.;
169  return;
170 }
182 inline
183 void vectorInKernel2(const double A00,const double A10,const double A01,const double A11,
184  double* V0, double*V1)
185 {
186  register double absA00=fabs(A00);
187  register double absA10=fabs(A10);
188  register double absA01=fabs(A01);
189  register double absA11=fabs(A11);
190  register double m=absA11>absA10 ? absA11 : absA10;
191  if (absA00>m || absA01>m) {
192  *V0=-A01;
193  *V1=A00;
194  } else {
195  if (m<=0) {
196  *V0=1.;
197  *V1=0.;
198  } else {
199  *V0=A11;
200  *V1=-A10;
201  }
202  }
203 }
222 inline
223 void vectorInKernel3__nonZeroA00(const double A00,const double A10,const double A20,
224  const double A01,const double A11,const double A21,
225  const double A02,const double A12,const double A22,
226  double* V0,double* V1,double* V2)
227 {
228  double TEMP0,TEMP1;
229  register const double I00=1./A00;
230  register const double IA10=I00*A10;
231  register const double IA20=I00*A20;
232  vectorInKernel2(A11-IA10*A01,A12-IA10*A02,
233  A21-IA20*A01,A22-IA20*A02,&TEMP0,&TEMP1);
234  *V0=-(A10*TEMP0+A20*TEMP1);
235  *V1=A00*TEMP0;
236  *V2=A00*TEMP1;
237 }
238 
256 inline
257 void eigenvalues_and_eigenvectors2(const double A00,const double A01,const double A11,
258  double* ev0, double* ev1,
259  double* V00, double* V10, double* V01, double* V11,
260  const double tol)
261 {
262  double TEMP0,TEMP1;
263  eigenvalues2(A00,A01,A11,ev0,ev1);
264  const register double absev0=fabs(*ev0);
265  const register double absev1=fabs(*ev1);
266  register double max_ev=absev0>absev1 ? absev0 : absev1;
267  if (fabs((*ev0)-(*ev1))<tol*max_ev) {
268  *V00=1.;
269  *V10=0.;
270  *V01=0.;
271  *V11=1.;
272  } else {
273  vectorInKernel2(A00-(*ev0),A01,A01,A11-(*ev0),&TEMP0,&TEMP1);
274  const register double scale=1./sqrt(TEMP0*TEMP0+TEMP1*TEMP1);
275  if (TEMP0<0.) {
276  *V00=-TEMP0*scale;
277  *V10=-TEMP1*scale;
278  if (TEMP1<0.) {
279  *V01= *V10;
280  *V11=-(*V00);
281  } else {
282  *V01=-(*V10);
283  *V11= (*V00);
284  }
285  } else if (TEMP0>0.) {
286  *V00=TEMP0*scale;
287  *V10=TEMP1*scale;
288  if (TEMP1<0.) {
289  *V01=-(*V10);
290  *V11= (*V00);
291  } else {
292  *V01= (*V10);
293  *V11=-(*V00);
294  }
295  } else {
296  *V00=0.;
297  *V10=1;
298  *V11=0.;
299  *V01=1.;
300  }
301  }
302 }
311 inline
312 void normalizeVector3(double* V0,double* V1,double* V2)
313 {
314  register double s;
315  if (*V0>0) {
316  s=1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
317  *V0*=s;
318  *V1*=s;
319  *V2*=s;
320  } else if (*V0<0) {
321  s=-1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
322  *V0*=s;
323  *V1*=s;
324  *V2*=s;
325  } else {
326  if (*V1>0) {
327  s=1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
328  *V1*=s;
329  *V2*=s;
330  } else if (*V1<0) {
331  s=-1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
332  *V1*=s;
333  *V2*=s;
334  } else {
335  *V2=1.;
336  }
337  }
338 }
365 inline
366 void eigenvalues_and_eigenvectors3(const double A00, const double A01, const double A02,
367  const double A11, const double A12, const double A22,
368  double* ev0, double* ev1, double* ev2,
369  double* V00, double* V10, double* V20,
370  double* V01, double* V11, double* V21,
371  double* V02, double* V12, double* V22,
372  const double tol)
373 {
374  register const double absA01=fabs(A01);
375  register const double absA02=fabs(A02);
376  register const double m=absA01>absA02 ? absA01 : absA02;
377  if (m<=0) {
378  double TEMP_V00,TEMP_V10,TEMP_V01,TEMP_V11,TEMP_EV0,TEMP_EV1;
379  eigenvalues_and_eigenvectors2(A11,A12,A22,
380  &TEMP_EV0,&TEMP_EV1,
381  &TEMP_V00,&TEMP_V10,&TEMP_V01,&TEMP_V11,tol);
382  if (A00<=TEMP_EV0) {
383  *V00=1.;
384  *V10=0.;
385  *V20=0.;
386  *V01=0.;
387  *V11=TEMP_V00;
388  *V21=TEMP_V10;
389  *V02=0.;
390  *V12=TEMP_V01;
391  *V22=TEMP_V11;
392  *ev0=A00;
393  *ev1=TEMP_EV0;
394  *ev2=TEMP_EV1;
395  } else if (A00>TEMP_EV1) {
396  *V02=1.;
397  *V12=0.;
398  *V22=0.;
399  *V00=0.;
400  *V10=TEMP_V00;
401  *V20=TEMP_V10;
402  *V01=0.;
403  *V11=TEMP_V01;
404  *V21=TEMP_V11;
405  *ev0=TEMP_EV0;
406  *ev1=TEMP_EV1;
407  *ev2=A00;
408  } else {
409  *V01=1.;
410  *V11=0.;
411  *V21=0.;
412  *V00=0.;
413  *V10=TEMP_V00;
414  *V20=TEMP_V10;
415  *V02=0.;
416  *V12=TEMP_V01;
417  *V22=TEMP_V11;
418  *ev0=TEMP_EV0;
419  *ev1=A00;
420  *ev2=TEMP_EV1;
421  }
422  } else {
423  eigenvalues3(A00,A01,A02,A11,A12,A22,ev0,ev1,ev2);
424  const register double absev0=fabs(*ev0);
425  const register double absev1=fabs(*ev1);
426  const register double absev2=fabs(*ev2);
427  register double max_ev=absev0>absev1 ? absev0 : absev1;
428  max_ev=max_ev>absev2 ? max_ev : absev2;
429  const register double d_01=fabs((*ev0)-(*ev1));
430  const register double d_12=fabs((*ev1)-(*ev2));
431  const register double max_d=d_01>d_12 ? d_01 : d_12;
432  if (max_d<=tol*max_ev) {
433  *V00=1.;
434  *V10=0;
435  *V20=0;
436  *V01=0;
437  *V11=1.;
438  *V21=0;
439  *V02=0;
440  *V12=0;
441  *V22=1.;
442  } else {
443  const register double S00=A00-(*ev0);
444  const register double absS00=fabs(S00);
445  if (absS00>m) {
446  vectorInKernel3__nonZeroA00(S00,A01,A02,A01,A11-(*ev0),A12,A02,A12,A22-(*ev0),V00,V10,V20);
447  } else if (absA02<m) {
448  vectorInKernel3__nonZeroA00(A01,A11-(*ev0),A12,S00,A01,A02,A02,A12,A22-(*ev0),V00,V10,V20);
449  } else {
450  vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev0),S00,A01,A02,A01,A11-(*ev0),A12,V00,V10,V20);
451  }
452  normalizeVector3(V00,V10,V20);;
453  const register double T00=A00-(*ev2);
454  const register double absT00=fabs(T00);
455  if (absT00>m) {
456  vectorInKernel3__nonZeroA00(T00,A01,A02,A01,A11-(*ev2),A12,A02,A12,A22-(*ev2),V02,V12,V22);
457  } else if (absA02<m) {
458  vectorInKernel3__nonZeroA00(A01,A11-(*ev2),A12,T00,A01,A02,A02,A12,A22-(*ev2),V02,V12,V22);
459  } else {
460  vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev2),T00,A01,A02,A01,A11-(*ev2),A12,V02,V12,V22);
461  }
462  const register double dot=(*V02)*(*V00)+(*V12)*(*V10)+(*V22)*(*V20);
463  *V02-=dot*(*V00);
464  *V12-=dot*(*V10);
465  *V22-=dot*(*V20);
466  normalizeVector3(V02,V12,V22);
467  *V01=(*V10)*(*V22)-(*V12)*(*V20);
468  *V11=(*V20)*(*V02)-(*V00)*(*V22);
469  *V21=(*V00)*(*V12)-(*V02)*(*V10);
470  normalizeVector3(V01,V11,V21);
471  }
472  }
473 }
474 
475 // General tensor product: arg_2(SL x SR) = arg_0(SL x SM) * arg_1(SM x SR)
476 // SM is the product of the last axis_offset entries in arg_0.getShape().
477 inline
478 void matrix_matrix_product(const int SL, const int SM, const int SR, const double* A, const double* B, double* C, int transpose)
479 {
480  if (transpose == 0) {
481  for (int i=0; i<SL; i++) {
482  for (int j=0; j<SR; j++) {
483  double sum = 0.0;
484  for (int l=0; l<SM; l++) {
485  sum += A[i+SL*l] * B[l+SM*j];
486  }
487  C[i+SL*j] = sum;
488  }
489  }
490  }
491  else if (transpose == 1) {
492  for (int i=0; i<SL; i++) {
493  for (int j=0; j<SR; j++) {
494  double sum = 0.0;
495  for (int l=0; l<SM; l++) {
496  sum += A[i*SM+l] * B[l+SM*j];
497  }
498  C[i+SL*j] = sum;
499  }
500  }
501  }
502  else if (transpose == 2) {
503  for (int i=0; i<SL; i++) {
504  for (int j=0; j<SR; j++) {
505  double sum = 0.0;
506  for (int l=0; l<SM; l++) {
507  sum += A[i+SL*l] * B[l*SR+j];
508  }
509  C[i+SL*j] = sum;
510  }
511  }
512  }
513 }
514 
515 template <typename UnaryFunction>
516 inline void tensor_unary_operation(const int size,
517  const double *arg1,
518  double * argRes,
519  UnaryFunction operation)
520 {
521  for (int i = 0; i < size; ++i) {
522  argRes[i] = operation(arg1[i]);
523  }
524  return;
525 }
526 
527 template <typename BinaryFunction>
528 inline void tensor_binary_operation(const int size,
529  const double *arg1,
530  const double *arg2,
531  double * argRes,
532  BinaryFunction operation)
533 {
534  for (int i = 0; i < size; ++i) {
535  argRes[i] = operation(arg1[i], arg2[i]);
536  }
537  return;
538 }
539 
540 template <typename BinaryFunction>
541 inline void tensor_binary_operation(const int size,
542  double arg1,
543  const double *arg2,
544  double *argRes,
545  BinaryFunction operation)
546 {
547  for (int i = 0; i < size; ++i) {
548  argRes[i] = operation(arg1, arg2[i]);
549  }
550  return;
551 }
552 
553 template <typename BinaryFunction>
554 inline void tensor_binary_operation(const int size,
555  const double *arg1,
556  double arg2,
557  double *argRes,
558  BinaryFunction operation)
559 {
560  for (int i = 0; i < size; ++i) {
561  argRes[i] = operation(arg1[i], arg2);
562  }
563  return;
564 }
565 
566 } // end of namespace
567 #endif