--- scalapack-doc-1.5.orig/man/manl/pcheevx.l +++ scalapack-doc-1.5/man/manl/pcheevx.l @@ -1,6 +1,8 @@ .TH PCHEEVX l "12 May 1997" "LAPACK version 1.5" "LAPACK routine (version 1.5)" .SH NAME - +PCHEEVX - compute selected eigenvalues and, optionally, eigenvectors +of a complex hermitian matrix A by calling the recommended sequence +of ScaLAPACK routines .SH SYNOPSIS .TP 20 SUBROUTINE PCHEEVX( @@ -121,1251 +123,449 @@ .ti +4 INTRINSIC ABS, CMPLX, ICHAR, MAX, MIN, MOD, REAL, SQRT -.TP 20 -.ti +4 -IF( -BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* -RSRC_.LT.0 )RETURN -.TP 20 -.ti +4 -QUICKRETURN -= ( N.EQ.0 ) -.TP 20 -.ti +4 -CALL -BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) -.TP 20 -.ti +4 -INFO -= 0 -.TP 20 -.ti +4 -IF( -NPROW.EQ.-1 ) THEN -.TP 20 -.ti +4 -INFO -= -( 800+CTXT_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+CTXT_ ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -CALL -CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 8, INFO ) -.TP 20 -.ti +4 -CALL -CHK1MAT( N, 4, N, 4, IZ, JZ, DESCZ, 21, INFO ) -.TP 20 -.ti +4 -IF( -INFO.EQ.0 ) THEN -.TP 20 -.ti +4 -SAFMIN -= PSLAMCH( DESCA( CTXT_ ), 'Safe minimum' ) -.TP 20 -.ti +4 -EPS -= PSLAMCH( DESCA( CTXT_ ), 'Precision' ) -.TP 20 -.ti +4 -SMLNUM -= SAFMIN / EPS -.TP 20 -.ti +4 -BIGNUM -= ONE / SMLNUM -.TP 20 -.ti +4 -RMIN -= SQRT( SMLNUM ) -.TP 20 -.ti +4 -RMAX -= MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) -.TP 20 -.ti +4 -NPROCS -= NPROW*NPCOL -.TP 20 -.ti +4 -LOWER -= LSAME( UPLO, 'L' ) -.TP 20 -.ti +4 -WANTZ -= LSAME( JOBZ, 'V' ) -.TP 20 -.ti +4 -ALLEIG -= LSAME( RANGE, 'A' ) -.TP 20 -.ti +4 -VALEIG -= LSAME( RANGE, 'V' ) -.TP 20 -.ti +4 -INDEIG -= LSAME( RANGE, 'I' ) -.TP 20 -.ti +4 -INDTAU -= 1 -.TP 20 -.ti +4 -INDWORK -= INDTAU + N -.TP 20 -.ti +4 -LLWORK -= LWORK - INDWORK + 1 -.TP 20 -.ti +4 -INDE -= 1 -.TP 20 -.ti +4 -INDD -= INDE + N -.TP 20 -.ti +4 -INDD2 -= INDD + N -.TP 20 -.ti +4 -INDE2 -= INDD2 + N -.TP 20 -.ti +4 -INDRWORK -= INDE2 + N -.TP 20 -.ti +4 -LLRWORK -= LRWORK - INDRWORK + 1 -.TP 20 -.ti +4 -ISIZESTEIN -= 3*N + NPROCS + 1 -.TP 20 -.ti +4 -ISIZESTEBZ -= MAX( 4*N, 14, NPROCS ) -.TP 20 -.ti +4 -INDIBL -= ( MAX( ISIZESTEIN, ISIZESTEBZ ) ) + 1 -.TP 20 -.ti +4 -INDISP -= INDIBL + N -.TP 20 -.ti +4 -LQUERY -= .FALSE. -.TP 20 -.ti +4 -IF( -LWORK.EQ.-1 .OR. LIWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) -LQUERY = .TRUE. -.TP 20 -.ti +4 -NNP -= MAX( N, NPROCS+1, 4 ) -.TP 20 -.ti +4 -LIWMIN -= 6*NNP -.TP 20 -.ti +4 -NPROCS -= NPROW*NPCOL -.TP 20 -.ti +4 -NB_A -= DESCA( NB_ ) -.TP 20 -.ti +4 -MB_A -= DESCA( MB_ ) -.TP 20 -.ti +4 -NB_Z -= DESCZ( NB_ ) -.TP 20 -.ti +4 -MB_Z -= DESCZ( MB_ ) -.TP 20 -.ti +4 -NB -= NB_A -.TP 20 -.ti +4 -NN -= MAX( N, NB, 2 ) -.TP 20 -.ti +4 -RSRC_A -= DESCA( RSRC_ ) -.TP 20 -.ti +4 -CSRC_A -= DESCA( CSRC_ ) -.TP 20 -.ti +4 -RSRC_Z -= DESCZ( RSRC_ ) -.TP 20 -.ti +4 -IROFFA -= MOD( IA-1, MB_A ) -.TP 20 -.ti +4 -ICOFFA -= MOD( JA-1, NB_A ) -.TP 20 -.ti +4 -IROFFZ -= MOD( IZ-1, MB_A ) -.TP 20 -.ti +4 -IAROW -= INDXG2P( 1, NB_A, MYROW, RSRC_A, NPROW ) -.TP 20 -.ti +4 -IACOL -= INDXG2P( 1, MB_A, MYCOL, CSRC_A, NPCOL ) -.TP 20 -.ti +4 -IZROW -= INDXG2P( 1, NB_A, MYROW, RSRC_Z, NPROW ) -.TP 20 -.ti +4 -NP0 -= NUMROC( N+IROFFA, NB_Z, MYROW, IAROW, NPROW ) -.TP 20 -.ti +4 -MQ0 -= NUMROC( N+ICOFFA, NB_Z, MYCOL, IACOL, NPCOL ) -.TP 20 -.ti +4 -IF( -( .NOT.WANTZ ) .OR. ( VALEIG .AND. ( .NOT.LQUERY ) ) ) -THEN -.TP 20 -.ti +4 -LWMIN -= N + MAX( NB*( NP0+1 ), 3 ) -.TP 20 -.ti +4 -LRWMIN -= 5*NN + 4*N -.TP 20 -.ti +4 -NEIG -= 0 -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IF( -ALLEIG .OR. VALEIG ) THEN -.TP 20 -.ti +4 -NEIG -= N -.TP 20 -.ti +4 -ELSE -IF( INDEIG ) THEN -.TP 20 -.ti +4 -NEIG -= IU - IL + 1 -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -MQ0 -= NUMROC( MAX( NEIG, NB, 2 ), NB, MYCOL, IACOL, -NPCOL ) -.TP 20 -.ti +4 -NQ0 -= NUMROC( NN, NB, 0, 0, NPCOL ) -.TP 20 -.ti +4 -LWMIN -= N + ( NP0+NQ0+NB )*NB -.TP 20 -.ti +4 -LRWMIN -= 4*N + MAX( 5*NN, NP0*MQ0 ) + -ICEIL( NEIG, NPROW*NPCOL )*NN -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -INFO.EQ.0 ) THEN -.TP 20 -.ti +4 -IF( -MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN -.TP 20 -.ti +4 -RWORK( -1 ) = ABSTOL -.TP 20 -.ti +4 -IF( -VALEIG ) THEN -.TP 20 -.ti +4 -RWORK( -2 ) = VL -.TP 20 -.ti +4 -RWORK( -3 ) = VU -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -RWORK( -2 ) = ZERO -.TP 20 -.ti +4 -RWORK( -3 ) = ZERO -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -SGEBS2D( DESCA( CTXT_ ), 'ALL', ' ', 3, 1, RWORK, -3 ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -CALL -SGEBR2D( DESCA( CTXT_ ), 'ALL', ' ', 3, 1, RWORK, -3, 0, 0 ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -.NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN -.TP 20 -.ti +4 -INFO -= -1 -.TP 20 -.ti +4 -ELSE -IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN -.TP 20 -.ti +4 -INFO -= -2 -.TP 20 -.ti +4 -ELSE -IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN -.TP 20 -.ti +4 -INFO -= -3 -.TP 20 -.ti +4 -ELSE -IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN -.TP 20 -.ti +4 -INFO -= -10 -.TP 20 -.ti +4 -ELSE -IF( INDEIG .AND. ( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) ) -THEN -.TP 20 -.ti +4 -INFO -= -11 -.TP 20 -.ti +4 -ELSE -IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) -THEN -.TP 20 -.ti +4 -INFO -= -12 -.TP 20 -.ti +4 -ELSE -IF( LWORK.LT.LWMIN .AND. LWORK.NE.-1 ) THEN -.TP 20 -.ti +4 -INFO -= -23 -.TP 20 -.ti +4 -ELSE -IF( LRWORK.LT.LRWMIN .AND. LRWORK.NE.-1 ) THEN -.TP 20 -.ti +4 -INFO -= -25 -.TP 20 -.ti +4 -ELSE -IF( LIWORK.LT.LIWMIN .AND. LIWORK.NE.-1 ) THEN -.TP 20 -.ti +4 -INFO -= -27 -.TP 20 -.ti +4 -ELSE -IF( VALEIG .AND. ( ABS( RWORK( 2 )-VL ).GT.FIVE*EPS* -ABS( VL ) ) ) THEN -.TP 20 -.ti +4 -INFO -= -9 -.TP 20 -.ti +4 -ELSE -IF( VALEIG .AND. ( ABS( RWORK( 3 )-VU ).GT.FIVE*EPS* -ABS( VU ) ) ) THEN -.TP 20 -.ti +4 -INFO -= -10 -.TP 20 -.ti +4 -ELSE -IF( ABS( RWORK( 1 )-ABSTOL ).GT.FIVE*EPS* -ABS( ABSTOL ) ) THEN -.TP 20 -.ti +4 -INFO -= -13 -.TP 20 -.ti +4 -ELSE -IF( IROFFA.NE.IROFFZ ) THEN -.TP 20 -.ti +4 -INFO -= -19 -.TP 20 -.ti +4 -ELSE -IF( IROFFA.NE.0 ) THEN -.TP 20 -.ti +4 -INFO -= -6 -.TP 20 -.ti +4 -ELSE -IF( IAROW.NE.IZROW ) THEN -.TP 20 -.ti +4 -INFO -= -19 -.TP 20 -.ti +4 -ELSE -IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 800+NB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( M_ ).NE.DESCZ( M_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+M_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( N_ ).NE.DESCZ( N_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+N_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+MB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( NB_ ).NE.DESCZ( NB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+NB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( RSRC_ ).NE.DESCZ( RSRC_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+RSRC_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CSRC_ ).NE.DESCZ( CSRC_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+CSRC_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+CTXT_ ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -WANTZ ) THEN -.TP 20 -.ti +4 -IDUM1( -1 ) = ICHAR( 'V' ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -1 ) = ICHAR( 'N' ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -1 ) = 1 -.TP 20 -.ti +4 -IF( -LOWER ) THEN -.TP 20 -.ti +4 -IDUM1( -2 ) = ICHAR( 'L' ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -2 ) = ICHAR( 'U' ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -2 ) = 2 -.TP 20 -.ti +4 -IF( -ALLEIG ) THEN -.TP 20 -.ti +4 -IDUM1( -3 ) = ICHAR( 'A' ) -.TP 20 -.ti +4 -ELSE -IF( INDEIG ) THEN -.TP 20 -.ti +4 -IDUM1( -3 ) = ICHAR( 'I' ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -3 ) = ICHAR( 'V' ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -3 ) = 3 -.TP 20 -.ti +4 -IF( -LQUERY ) THEN -.TP 20 -.ti +4 -IDUM1( -4 ) = -1 -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -4 ) = 1 -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -4 ) = 4 -.TP 20 -.ti +4 -CALL -PCHK2MAT( N, 4, N, 4, IA, JA, DESCA, 8, N, 4, N, 4, IZ, -JZ, DESCZ, 21, 4, IDUM1, IDUM2, INFO ) -.TP 20 -.ti +4 -WORK( -1 ) = CMPLX( LWMIN ) -.TP 20 -.ti +4 -RWORK( -1 ) = REAL( LRWMIN ) -.TP 20 -.ti +4 -IWORK( -1 ) = LIWMIN -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -INFO.NE.0 ) THEN -.TP 20 -.ti +4 -CALL -PXERBLA( DESCA( CTXT_ ), 'PCHEEVX', -INFO ) -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -ELSE -IF( LQUERY ) THEN -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -QUICKRETURN ) THEN -.TP 20 -.ti +4 -IF( -WANTZ ) THEN -.TP 20 -.ti +4 -NZ -= 0 -.TP 20 -.ti +4 -ICLUSTR( -1 ) = 0 -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -M -= 0 -.TP 20 -.ti +4 -WORK( -1 ) = CMPLX( LWMIN ) -.TP 20 -.ti +4 -RWORK( -1 ) = REAL( LRWMIN ) -.TP 20 -.ti +4 -IWORK( -1 ) = LIWMIN -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -ABSTLL -= ABSTOL -.TP 20 -.ti +4 -ISCALE -= 0 -.TP 20 -.ti +4 -IF( -VALEIG ) THEN -.TP 20 -.ti +4 -VLL -= VL -.TP 20 -.ti +4 -VUU -= VU -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -VLL -= ZERO -.TP 20 -.ti +4 -VUU -= ZERO -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -ANRM -= PCLANHE( '1', UPLO, N, A, IA, JA, DESCA, -RWORK( INDRWORK ) ) -.TP 20 -.ti +4 -IF( -ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN -.TP 20 -.ti +4 -ISCALE -= 1 -.TP 20 -.ti +4 -SIGMA -= RMIN / ANRM -.TP 20 -.ti +4 -ANRM -= ANRM*SIGMA -.TP 20 -.ti +4 -ELSE -IF( ANRM.GT.RMAX ) THEN -.TP 20 -.ti +4 -ISCALE -= 1 -.TP 20 -.ti +4 -SIGMA -= RMAX / ANRM -.TP 20 -.ti +4 -ANRM -= ANRM*SIGMA -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -ISCALE.EQ.1 ) THEN -.TP 20 -.ti +4 -CALL -PCLASCL( UPLO, ONE, SIGMA, N, N, A, IA, JA, DESCA, -IINFO ) -.TP 20 -.ti +4 -IF( -ABSTOL.GT.0 ) -ABSTLL = ABSTOL*SIGMA -.TP 20 -.ti +4 -IF( -VALEIG ) THEN -.TP 20 -.ti +4 -VLL -= VL*SIGMA -.TP 20 -.ti +4 -VUU -= VU*SIGMA -.TP 20 -.ti +4 -IF( -VUU.EQ.VLL ) THEN -.TP 20 -.ti +4 -VUU -= VUU + 2*MAX( ABS( VUU )*EPS, SAFMIN ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -LALLWORK -= LLRWORK -.TP 20 -.ti +4 -CALL -PCHETRD( UPLO, N, A, IA, JA, DESCA, RWORK( INDD ), -RWORK( INDE ), WORK( INDTAU ), WORK( INDWORK ), -LLWORK, IINFO ) -.TP 20 -.ti +4 -OFFSET -= 0 -.TP 20 -.ti +4 -IF( -IA.EQ.1 .AND. JA.EQ.1 .AND. RSRC_A.EQ.0 .AND. CSRC_A.EQ.0 ) -THEN -.TP 20 -.ti +4 -CALL -PSLARED1D( N, IA, JA, DESCA, RWORK( INDD ), -RWORK( INDD2 ), RWORK( INDRWORK ), LLRWORK ) -.TP 20 -.ti +4 -CALL -PSLARED1D( N, IA, JA, DESCA, RWORK( INDE ), -RWORK( INDE2 ), RWORK( INDRWORK ), LLRWORK ) -.TP 20 -.ti +4 -IF( -.NOT.LOWER ) -OFFSET = 1 -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -DO -10 I = 1, N -.TP 20 -.ti +4 -CALL -PCELGET( 'A', ' ', WORK( INDD2+I-1 ), A, I+IA-1, -I+JA-1, DESCA ) -.TP 20 -.ti +4 -RWORK( -INDD2+I-1 ) = REAL( WORK( INDD2+I-1 ) ) -.TP 20 -.ti +4 -10 -CONTINUE -.TP 20 -.ti +4 -IF( -LSAME( UPLO, 'U' ) ) THEN -.TP 20 -.ti +4 -DO -20 I = 1, N - 1 -.TP 20 -.ti +4 -CALL -PCELGET( 'A', ' ', WORK( INDE2+I-1 ), A, I+IA-1, -I+JA, DESCA ) -.TP 20 -.ti +4 -RWORK( -INDE2+I-1 ) = REAL( WORK( INDE2+I-1 ) ) -.TP 20 -.ti +4 -20 -CONTINUE -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -DO -30 I = 1, N - 1 -.TP 20 -.ti +4 -CALL -PCELGET( 'A', ' ', WORK( INDE2+I-1 ), A, I+IA, -I+JA-1, DESCA ) -.TP 20 -.ti +4 -RWORK( -INDE2+I-1 ) = REAL( WORK( INDE2+I-1 ) ) -.TP 20 -.ti +4 -30 -CONTINUE -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -WANTZ ) THEN -.TP 20 -.ti +4 -ORDER -= 'b' -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -ORDER -= 'e' -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -PSSTEBZ( DESCA( CTXT_ ), RANGE, ORDER, N, VLL, VUU, IL, IU, -ABSTLL, RWORK( INDD2 ), RWORK( INDE2+OFFSET ), M, -NSPLIT, W, IWORK( INDIBL ), IWORK( INDISP ), -RWORK( INDRWORK ), LLRWORK, IWORK( 1 ), ISIZESTEBZ, -IINFO ) -.TP 20 -.ti +4 -IF( -IINFO.NE.0 ) THEN -.TP 20 -.ti +4 -INFO -= INFO + IERREBZ -.TP 20 -.ti +4 -DO -40 I = 1, M -.TP 20 -.ti +4 -IWORK( -INDIBL+I-1 ) = ABS( IWORK( INDIBL+I-1 ) ) -.TP 20 -.ti +4 -40 -CONTINUE -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -WANTZ ) THEN -.TP 20 -.ti +4 -IF( -VALEIG ) THEN -.TP 20 -.ti +4 -CALL -IGAMN2D( DESCA( CTXT_ ), 'A', ' ', 1, 1, LALLWORK, 1, -1, 1, -1, -1, -1 ) -.TP 20 -.ti +4 -MAXEIGS -= DESCZ( N_ ) -.TP 20 -.ti +4 -DO -50 NZ = MIN( MAXEIGS, M ), 0, -1 -.TP 20 -.ti +4 -MQ0 -= NUMROC( NZ, NB, 0, 0, NPCOL ) -.TP 20 -.ti +4 -SIZESTEIN -= ICEIL( NZ, NPROCS )*N + MAX( 5*N, NP0*MQ0 ) -.TP 20 -.ti +4 -SIZEORMTR -= MAX( ( NB*( NB-1 ) ) / 2, ( MQ0+NP0 )*NB ) + -NB*NB -.TP 20 -.ti +4 -SIZEHEEVX -= MAX( SIZESTEIN, SIZEORMTR ) -.TP 20 -.ti +4 -IF( -SIZEHEEVX.LE.LALLWORK ) -GO TO 60 -.TP 20 -.ti +4 -50 -CONTINUE -.TP 20 -.ti +4 -60 -CONTINUE -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -NZ -= M -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -NZ -= MAX( NZ, 0 ) -.TP 20 -.ti +4 -IF( -NZ.NE.M ) THEN -.TP 20 -.ti +4 -INFO -= INFO + IERRSPC -.TP 20 -.ti +4 -DO -70 I = 1, M -.TP 20 -.ti +4 -IFAIL( -I ) = 0 -.TP 20 -.ti +4 -70 -CONTINUE -.TP 20 -.ti +4 -IF( -NSPLIT.GT.1 ) THEN -.TP 20 -.ti +4 -CALL -SLASRT( 'I', M, W, IINFO ) -.TP 20 -.ti +4 -IF( -NZ.GT.0 ) THEN -.TP 20 -.ti +4 -VUU -= W( NZ ) - TEN*( EPS*ANRM+SAFMIN ) -.TP 20 -.ti +4 -IF( -VLL.GE.VUU ) THEN -.TP 20 -.ti +4 -NZZ -= 0 -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -CALL -PSSTEBZ( DESCA( CTXT_ ), RANGE, ORDER, N, -VLL, VUU, IL, IU, ABSTLL, -RWORK( INDD2 ), RWORK( INDE2+ -OFFSET ), NZZ, NSPLIT, W, -IWORK( INDIBL ), IWORK( INDISP ), -RWORK( INDRWORK ), LLRWORK, -IWORK( 1 ), ISIZESTEBZ, IINFO ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -MOD( INFO / IERREBZ, 1 ).EQ.0 ) THEN -.TP 20 -.ti +4 -IF( -NZZ.GT.NZ .OR. IINFO.NE.0 ) THEN -.TP 20 -.ti +4 -INFO -= INFO + IERREBZ -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -NZ -= MIN( NZ, NZZ ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -PCSTEIN( N, RWORK( INDD2 ), RWORK( INDE2+OFFSET ), NZ, W, -IWORK( INDIBL ), IWORK( INDISP ), ORFAC, Z, IZ, -JZ, DESCZ, RWORK( INDRWORK ), LALLWORK, -IWORK( 1 ), ISIZESTEIN, IFAIL, ICLUSTR, GAP, -IINFO ) -.TP 20 -.ti +4 -IF( -IINFO.GE.NZ+1 ) -INFO = INFO + IERRCLS -.TP 20 -.ti +4 -IF( -MOD( IINFO, NZ+1 ).NE.0 ) -INFO = INFO + IERREIN -.TP 20 -.ti +4 -IF( -NZ.GT.0 ) THEN -.TP 20 -.ti +4 -CALL -PCUNMTR( 'L', UPLO, 'N', N, NZ, A, IA, JA, DESCA, -WORK( INDTAU ), Z, IZ, JZ, DESCZ, -WORK( INDWORK ), LLWORK, IINFO ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -ISCALE.EQ.1 ) THEN -.TP 20 -.ti +4 -CALL -SSCAL( M, ONE / SIGMA, W, 1 ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -WORK( -1 ) = CMPLX( LWMIN ) -.TP 20 -.ti +4 -RWORK( -1 ) = REAL( LRWMIN ) -.TP 20 -.ti +4 -IWORK( -1 ) = LIWMIN -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -END + .SH PURPOSE +PCHEEVX computes selected eigenvalues and, optionally, eigenvectors +of a complex hermitian matrix A by calling the recommended sequence +of ScaLAPACK routines. Eigenvalues/vectors can be selected by +specifying a range of values or a range of indices for the desired +eigenvalues. +.SH NOTES +Each global data object is described by an associated description +vector. This vector stores the information required to establish +the mapping between an object element and its corresponding process +and memory location. + +Let A be a generic term for any 2D block cyclicly distributed array. +Such a global array has an associated description vector DESCA. +In the following comments, the character _ should be read as +"of the global array". + +NOTATION STORED IN EXPLANATION +.br +--------------- -------------- -------------------------------------- +.br +DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, + DTYPE_A = 1. +.br +CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating + the BLACS process grid A is distribu- + ted over. The context itself is glo- + bal, but the handle (the integer + value) may vary. +.br +M_A (global) DESCA( M_ ) The number of rows in the global + array A. +.br +N_A (global) DESCA( N_ ) The number of columns in the global + array A. +.br +MB_A (global) DESCA( MB_ ) The blocking factor used to distribute + the rows of the array. +.br +NB_A (global) DESCA( NB_ ) The blocking factor used to distribute + the columns of the array. +.br +RSRC_A (global) DESCA( RSRC_ ) The process row over which the first + row of the array A is distributed. +.br +CSRC_A (global) DESCA( CSRC_ ) The process column over which the + first column of the array A is + distributed. +.br +LLD_A (local) DESCA( LLD_ ) The leading dimension of the local + array. LLD_A >= MAX(1,LOCr(M_A)). + +Let K be the number of rows or columns of a distributed matrix, +and assume that its process grid has dimension p x q. +LOCr( K ) denotes the number of elements of K that a process +would receive if K were distributed over the p processes of its +process column.S +.br +Similarly, LOCc( K ) denotes the number of elements of K that a +process would receive if K were distributed over the q processes of +its process row. +.br +The values of LOCr() and LOCc() may be determined via a call to the +ScaLAPACK tool function, NUMROC: +.br + LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), +.br + LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). +.br +An upper bound for these quantities may be computed by: +.br + LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A +.br + LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A + +PCHEEVX assumes IEEE 754 standard compliant arithmetic. To port +to a system which does not have IEEE 754 arithmetic, modify +the appropriate SLmake.inc file to include the compiler switch +-DNO_IEEE. This switch only affects the compilation of pslaiect.c. + +.SH ARGUMENTS + NP = the number of rows local to a given process. + NQ = the number of columns local to a given process. + +.tp 8 +JOBZ (global input) CHARACTER*1 + Specifies whether or not to compute the eigenvectors: + = 'N': Compute eigenvalues only. + = 'V': Compute eigenvalues and eigenvectors. + +.tp 8 +RANGE (global input) CHARACTER*1 + = 'A': all eigenvalues will be found. + = 'V': all eigenvalues in the interval [VL,VU] will be found. + = 'I': the IL-th through IU-th eigenvalues will be found. + +.tp 8 +UPLO (global input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + Hermitian matrix A is stored: + = 'U': Upper triangular + = 'L': Lower triangular + +.tp 8 +N (global input) INTEGER + The number of rows and columns of the matrix A. N >= 0. + +.tp 8 +A (local input/workspace) block cyclic COMPLEX array, + global dimension (N, N), + local dimension ( LLD_A, LOCc(JA+N-1) ) + + On entry, the Hermitian matrix A. If UPLO = 'U', only the + upper triangular part of A is used to define the elements of + the Hermitian matrix. If UPLO = 'L', only the lower + triangular part of A is used to define the elements of the + Hermitian matrix. + + On exit, the lower triangle (if UPLO='L') or the upper + triangle (if UPLO='U') of A, including the diagonal, is + destroyed. + +.tp 8 +IA (global input) INTEGER + A's global row index, which points to the beginning of the + submatrix which is to be operated on. + +.tp 8 +JA (global input) INTEGER + A's global column index, which points to the beginning of + the submatrix which is to be operated on. + +.tp 8 +DESCA (global and local input) INTEGER array of dimension DLEN_. + The array descriptor for the distributed matrix A. + If DESCA( CTXT_ ) is incorrect, PCHEEVX cannot guarantee + correct error reporting. + +.tp 8 +VL (global input) REAL + If RANGE='V', the lower bound of the interval to be searched + for eigenvalues. Not referenced if RANGE = 'A' or 'I'. + +.tp 8 +VU (global input) REAL + If RANGE='V', the upper bound of the interval to be searched + for eigenvalues. Not referenced if RANGE = 'A' or 'I'. + +.tp 8 +IL (global input) INTEGER + If RANGE='I', the index (from smallest to largest) of the + smallest eigenvalue to be returned. IL >= 1. + Not referenced if RANGE = 'A' or 'V'. + +.tp 8 +IU (global input) INTEGER + If RANGE='I', the index (from smallest to largest) of the + largest eigenvalue to be returned. min(IL,N) <= IU <= N. + Not referenced if RANGE = 'A' or 'V'. + +.tp 8 +ABSTOL (global input) REAL + If JOBZ='V', setting ABSTOL to PSLAMCH( CONTEXT, 'U') yields + the most orthogonal eigenvectors. + + The absolute error tolerance for the eigenvalues. + An approximate eigenvalue is accepted as converged + when it is determined to lie in an interval [a,b] + of width less than or equal to + + ABSTOL + EPS * max( |a|,|b| ) , + + where EPS is the machine precision. If ABSTOL is less than + or equal to zero, then EPS*norm(T) will be used in its place, + where norm(T) is the 1-norm of the tridiagonal matrix + obtained by reducing A to tridiagonal form. + + Eigenvalues will be computed most accurately when ABSTOL is + set to twice the underflow threshold 2*PSLAMCH('S') not zero. + If this routine returns with ((MOD(INFO,2).NE.0) .OR. + (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or + eigenvectors did not converge, try setting ABSTOL to + 2*PSLAMCH('S'). + + See "Computing Small Singular Values of Bidiagonal Matrices + with Guaranteed High Relative Accuracy," by Demmel and + Kahan, LAPACK Working Note #3. + + See "On the correctness of Parallel Bisection in Floating + Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70 + +.tp 8 +M (global output) INTEGER + Total number of eigenvalues found. 0 <= M <= N. + +.tp 8 +NZ (global output) INTEGER + Total number of eigenvectors computed. 0 <= NZ <= M. + The number of columns of Z that are filled. + If JOBZ .NE. 'V', NZ is not referenced. + If JOBZ .EQ. 'V', NZ = M unless the user supplies + insufficient space and PCHEEVX is not able to detect this + before beginning computation. To get all the eigenvectors + requested, the user must supply both sufficient + space to hold the eigenvectors in Z (M .LE. DESCZ(N_)) + and sufficient workspace to compute them. (See LWORK below.) + PCHEEVX is always able to detect insufficient space without + computation unless RANGE .EQ. 'V'. + +.tp 8 +W (global output) REAL array, dimension (N) + On normal exit, the first M entries contain the selected + eigenvalues in ascending order. + +.tp 8 +ORFAC (global input) REAL + Specifies which eigenvectors should be reorthogonalized. + Eigenvectors that correspond to eigenvalues which are within + tol=ORFAC*norm(A) of each other are to be reorthogonalized. + However, if the workspace is insufficient (see LWORK), + tol may be decreased until all eigenvectors to be + reorthogonalized can be stored in one process. + No reorthogonalization will be done if ORFAC equals zero. + A default value of 10^-3 is used if ORFAC is negative. + ORFAC should be identical on all processes. + +.tp 8 +Z (local output) COMPLEX array, + global dimension (N, N), + local dimension ( LLD_Z, LOCc(JZ+N-1) ) + If JOBZ = 'V', then on normal exit the first M columns of Z + contain the orthonormal eigenvectors of the matrix + corresponding to the selected eigenvalues. If an eigenvector + fails to converge, then that column of Z contains the latest + approximation to the eigenvector, and the index of the + eigenvector is returned in IFAIL. + If JOBZ = 'N', then Z is not referenced. + +.tp 8 +IZ (global input) INTEGER + Z's global row index, which points to the beginning of the + submatrix which is to be operated on. + +.tp 8 +JZ (global input) INTEGER + Z's global column index, which points to the beginning of + the submatrix which is to be operated on. + +.tp 8 +DESCZ (global and local input) INTEGER array of dimension DLEN_. + The array descriptor for the distributed matrix Z. + DESCZ( CTXT_ ) must equal DESCA( CTXT_ ) + +.tp 8 +WORK (local workspace/output) COMPLEX array, + dimension (LWORK) + WORK(1) returns workspace adequate workspace to allow + optimal performance. + +.tp 8 +LWORK (local input) INTEGER + Size of WORK array. If only eigenvalues are requested: + LWORK >= N + MAX( NB * ( NP0 + 1 ), 3 ) + If eigenvectors are requested: + LWORK >= N + ( NP0 + MQ0 + NB ) * NB + with NQ0 = NUMROC( NN, NB, 0, 0, NPCOL ). + + For optimal performance, greater workspace is needed, i.e. + LWORK >= MAX( LWORK, NHETRD_LWORK ) + Where LWORK is as defined above, and + NHETRD_LWORK = N + 2*( ANB+1 )*( 4*NPS+2 ) + + ( NPS + 1 ) * NPS + + ICTXT = DESCA( CTXT_ ) + ANB = PJLAENV( ICTXT, 3, 'PCHETTRD', 'L', 0, 0, 0, 0 ) + SQNPC = SQRT( DBLE( NPROW * NPCOL ) ) + NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) + + NUMROC is a ScaLAPACK tool functions; + PJLAENV is a ScaLAPACK envionmental inquiry function + MYROW, MYCOL, NPROW and NPCOL can be determined by calling + the subroutine BLACS_GRIDINFO. + + If LWORK = -1, then LWORK is global input and a workspace + query is assumed; the routine only calculates the + optimal size for all work arrays. Each of these + values is returned in the first entry of the corresponding + work array, and no error message is issued by PXERBLA. + +.tp 8 +RWORK (local workspace/output) REAL array, + dimension (LRWORK) + On return, WROK(1) contains the optimal amount of + workspace required for efficient execution. + if JOBZ='N' RWORK(1) = optimal amount of workspace + required to compute eigenvalues efficiently + if JOBZ='V' RWORK(1) = optimal amount of workspace + required to compute eigenvalues and eigenvectors + efficiently with no guarantee on orthogonality. + If RANGE='V', it is assumed that all eigenvectors + may be required. + +.tp 8 +LRWORK (local input) INTEGER + Size of RWORK + See below for definitions of variables used to define LRWORK. + If no eigenvectors are requested (JOBZ = 'N') then + LRWORK >= 5 * NN + 4 * N + If eigenvectors are requested (JOBZ = 'V' ) then + the amount of workspace required to guarantee that all + eigenvectors are computed is: + LRWORK >= 4*N + MAX( 5*NN, NP0 * MQ0 ) + + ICEIL( NEIG, NPROW*NPCOL)*NN + + The computed eigenvectors may not be orthogonal if the + minimal workspace is supplied and ORFAC is too small. + If you want to guarantee orthogonality (at the cost + of potentially poor performance) you should add + the following to LRWORK: + (CLUSTERSIZE-1)*N + where CLUSTERSIZE is the number of eigenvalues in the + largest cluster, where a cluster is defined as a set of + close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) | + W(J+1) <= W(J) + ORFAC*2*norm(A) } + Variable definitions: + NEIG = number of eigenvectors requested + NB = DESCA( MB_ ) = DESCA( NB_ ) = + DESCZ( MB_ ) = DESCZ( NB_ ) + NN = MAX( N, NB, 2 ) + DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) = + DESCZ( CSRC_ ) = 0 + NP0 = NUMROC( NN, NB, 0, 0, NPROW ) + MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) + ICEIL( X, Y ) is a ScaLAPACK function returning + ceiling(X/Y) + + When LRWORK is too small: + If LRWORK is too small to guarantee orthogonality, + PCHEEVX attempts to maintain orthogonality in + the clusters with the smallest + spacing between the eigenvalues. + If LRWORK is too small to compute all the eigenvectors + requested, no computation is performed and INFO=-25 + is returned. Note that when RANGE='V', PCHEEVX does + not know how many eigenvectors are requested until + the eigenvalues are computed. Therefore, when RANGE='V' + and as long as LRWORK is large enough to allow PCHEEVX to + compute the eigenvalues, PCHEEVX will compute the + eigenvalues and as many eigenvectors as it can. + + Relationship between workspace, orthogonality & performance: + If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing + enough space to compute all the eigenvectors + orthogonally will cause serious degradation in + performance. In the limit (i.e. CLUSTERSIZE = N-1) + PCSTEIN will perform no better than CSTEIN on 1 + processor. + For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing + all eigenvectors will increase the total execution time + by a factor of 2 or more. + For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will + grow as the square of the cluster size, all other factors + remaining equal and assuming enough workspace. Less + workspace means less reorthogonalization but faster + execution. + + If LRWORK = -1, then LRWORK is global input and a workspace + query is assumed; the routine only calculates the size + required for optimal performance for all work arrays. Each of + these values is returned in the first entry of the + corresponding work arrays, and no error message is issued by + PXERBLA. + +.tp 8 +IWORK (local workspace) INTEGER array + On return, IWORK(1) contains the amount of integer workspace + required. + +.tp 8 +LIWORK (local input) INTEGER + size of IWORK + LIWORK >= 6 * NNP + Where: + NNP = MAX( N, NPROW*NPCOL + 1, 4 ) + If LIWORK = -1, then LIWORK is global input and a workspace + query is assumed; the routine only calculates the minimum + and optimal size for all work arrays. Each of these + values is returned in the first entry of the corresponding + work array, and no error message is issued by PXERBLA. + +.tp 8 +IFAIL (global output) INTEGER array, dimension (N) + If JOBZ = 'V', then on normal exit, the first M elements of + IFAIL are zero. If (MOD(INFO,2).NE.0) on exit, then + IFAIL contains the + indices of the eigenvectors that failed to converge. + If JOBZ = 'N', then IFAIL is not referenced. + +.tp 8 +ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL) + This array contains indices of eigenvectors corresponding to + a cluster of eigenvalues that could not be reorthogonalized + due to insufficient workspace (see LWORK, ORFAC and INFO). + Eigenvectors corresponding to clusters of eigenvalues indexed + ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be + reorthogonalized due to lack of workspace. Hence the + eigenvectors corresponding to these clusters may not be + orthogonal. ICLUSTR() is a zero terminated array. + (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if + K is the number of clusters + ICLUSTR is not referenced if JOBZ = 'N' + +.tp 8 +GAP (global output) REAL array, + dimension (NPROW*NPCOL) + This array contains the gap between eigenvalues whose + eigenvectors could not be reorthogonalized. The output + values in this array correspond to the clusters indicated + by the array ICLUSTR. As a result, the dot product between + eigenvectors correspoding to the I^th cluster may be as high + as ( C * n ) / GAP(I) where C is a small constant. + +.tp 8 +INFO (global output) INTEGER + = 0: successful exit + < 0: If the i-th argument is an array and the j-entry had + an illegal value, then INFO = -(i*100+j), if the i-th + argument is a scalar and had an illegal value, then + INFO = -i. + > 0: if (MOD(INFO,2).NE.0), then one or more eigenvectors + failed to converge. Their indices are stored + in IFAIL. Ensure ABSTOL=2.0*PSLAMCH( 'U' ) + Send e-mail to scalapack@cs.utk.edu + if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding + to one or more clusters of eigenvalues could not be + reorthogonalized because of insufficient workspace. + The indices of the clusters are stored in the array + ICLUSTR. + if (MOD(INFO/4,2).NE.0), then space limit prevented + PCHEEVX from computing all of the eigenvectors + between VL and VU. The number of eigenvectors + computed is returned in NZ. + if (MOD(INFO/8,2).NE.0), then PCSTEBZ failed to compute + eigenvalues. Ensure ABSTOL=2.0*PSLAMCH( 'U' ) + Send e-mail to scalapack@cs.utk.edu --- scalapack-doc-1.5.orig/man/manl/pchegvx.l +++ scalapack-doc-1.5/man/manl/pchegvx.l @@ -1,6 +1,7 @@ .TH PCHEGVX l "12 May 1997" "LAPACK version 1.5" "LAPACK routine (version 1.5)" .SH NAME - +PCHEGVX - compute all the eigenvalues, and optionally, +the eigenvectors of a complex generalized Hermitian-definite eigenproblem .SH SYNOPSIS .TP 20 SUBROUTINE PCHEGVX( @@ -119,816 +120,491 @@ .ti +4 INTRINSIC ABS, CMPLX, ICHAR, MAX, MIN, MOD, REAL -.TP 20 -.ti +4 -IF( -BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* -RSRC_.LT.0 )RETURN -.TP 20 -.ti +4 -ICTXT -= DESCA( CTXT_ ) -.TP 20 -.ti +4 -CALL -BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) -.TP 20 -.ti +4 -INFO -= 0 -.TP 20 -.ti +4 -IF( -NPROW.EQ.-1 ) THEN -.TP 20 -.ti +4 -INFO -= -( 900+CTXT_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CTXT_ ).NE.DESCB( CTXT_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+CTXT_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2600+CTXT_ ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -EPS -= PSLAMCH( DESCA( CTXT_ ), 'Precision' ) -.TP 20 -.ti +4 -WANTZ -= LSAME( JOBZ, 'V' ) -.TP 20 -.ti +4 -UPPER -= LSAME( UPLO, 'U' ) -.TP 20 -.ti +4 -ALLEIG -= LSAME( RANGE, 'A' ) -.TP 20 -.ti +4 -VALEIG -= LSAME( RANGE, 'V' ) -.TP 20 -.ti +4 -INDEIG -= LSAME( RANGE, 'I' ) -.TP 20 -.ti +4 -CALL -CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 9, INFO ) -.TP 20 -.ti +4 -CALL -CHK1MAT( N, 4, N, 4, IB, JB, DESCB, 13, INFO ) -.TP 20 -.ti +4 -CALL -CHK1MAT( N, 4, N, 4, IZ, JZ, DESCZ, 26, INFO ) -.TP 20 -.ti +4 -IF( -INFO.EQ.0 ) THEN -.TP 20 -.ti +4 -IF( -MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN -.TP 20 -.ti +4 -RWORK( -1 ) = ABSTOL -.TP 20 -.ti +4 -IF( -VALEIG ) THEN -.TP 20 -.ti +4 -RWORK( -2 ) = VL -.TP 20 -.ti +4 -RWORK( -3 ) = VU -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -RWORK( -2 ) = ZERO -.TP 20 -.ti +4 -RWORK( -3 ) = ZERO -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -SGEBS2D( DESCA( CTXT_ ), 'ALL', ' ', 3, 1, RWORK, -3 ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -CALL -SGEBR2D( DESCA( CTXT_ ), 'ALL', ' ', 3, 1, RWORK, -3, 0, 0 ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IAROW -= INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), -NPROW ) -.TP 20 -.ti +4 -IBROW -= INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), -NPROW ) -.TP 20 -.ti +4 -IACOL -= INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), -NPCOL ) -.TP 20 -.ti +4 -IBCOL -= INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), -NPCOL ) -.TP 20 -.ti +4 -IROFFA -= MOD( IA-1, DESCA( MB_ ) ) -.TP 20 -.ti +4 -ICOFFA -= MOD( JA-1, DESCA( NB_ ) ) -.TP 20 -.ti +4 -IROFFB -= MOD( IB-1, DESCB( MB_ ) ) -.TP 20 -.ti +4 -ICOFFB -= MOD( JB-1, DESCB( NB_ ) ) -.TP 20 -.ti +4 -LQUERY -= .FALSE. -.TP 20 -.ti +4 -IF( -LWORK.EQ.-1 .OR. LIWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) -LQUERY = .TRUE. -.TP 20 -.ti +4 -LIWMIN -= 6*MAX( N, ( NPROW*NPCOL )+1, 4 ) -.TP 20 -.ti +4 -NB -= DESCA( MB_ ) -.TP 20 -.ti +4 -NN -= MAX( N, NB, 2 ) -.TP 20 -.ti +4 -NP0 -= NUMROC( NN, NB, 0, 0, NPROW ) -.TP 20 -.ti +4 -IF( -( .NOT.WANTZ ) .OR. ( VALEIG .AND. ( .NOT.LQUERY ) ) ) -THEN -.TP 20 -.ti +4 -LWMIN -= N + MAX( NB*( NP0+1 ), 3 ) -.TP 20 -.ti +4 -LRWMIN -= 5*NN + 4*N -.TP 20 -.ti +4 -NEIG -= 0 -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IF( -ALLEIG .OR. VALEIG ) THEN -.TP 20 -.ti +4 -NEIG -= N -.TP 20 -.ti +4 -ELSE -IF( INDEIG ) THEN -.TP 20 -.ti +4 -NEIG -= IU - IL + 1 -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -MQ0 -= NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) -.TP 20 -.ti +4 -LWMIN -= N + ( NP0+MQ0+NB )*NB -.TP 20 -.ti +4 -LRWMIN -= 4*N + MAX( 5*NN, NP0*MQ0 ) + -ICEIL( NEIG, NPROW*NPCOL )*NN -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -IBTYPE.LT.1 .OR. IBTYPE.GT.3 ) THEN -.TP 20 -.ti +4 -INFO -= -1 -.TP 20 -.ti +4 -ELSE -IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN -.TP 20 -.ti +4 -INFO -= -2 -.TP 20 -.ti +4 -ELSE -IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN -.TP 20 -.ti +4 -INFO -= -3 -.TP 20 -.ti +4 -ELSE -IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN -.TP 20 -.ti +4 -INFO -= -4 -.TP 20 -.ti +4 -ELSE -IF( N.LT.0 ) THEN -.TP 20 -.ti +4 -INFO -= -5 -.TP 20 -.ti +4 -ELSE -IF( IROFFA.NE.0 ) THEN -.TP 20 -.ti +4 -INFO -= -7 -.TP 20 -.ti +4 -ELSE -IF( ICOFFA.NE.0 ) THEN -.TP 20 -.ti +4 -INFO -= -8 -.TP 20 -.ti +4 -ELSE -IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 900+NB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( M_ ).NE.DESCB( M_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+M_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( N_ ).NE.DESCB( N_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+N_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+MB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( NB_ ).NE.DESCB( NB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+NB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( RSRC_ ).NE.DESCB( RSRC_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+RSRC_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CSRC_ ).NE.DESCB( CSRC_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+CSRC_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CTXT_ ).NE.DESCB( CTXT_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+CTXT_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( M_ ).NE.DESCZ( M_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2200+M_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( N_ ).NE.DESCZ( N_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2200+N_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2200+MB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( NB_ ).NE.DESCZ( NB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2200+NB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( RSRC_ ).NE.DESCZ( RSRC_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2200+RSRC_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CSRC_ ).NE.DESCZ( CSRC_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2200+CSRC_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2200+CTXT_ ) -.TP 20 -.ti +4 -ELSE -IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN -.TP 20 -.ti +4 -INFO -= -11 -.TP 20 -.ti +4 -ELSE -IF( ICOFFB.NE.0 .OR. IBCOL.NE.IACOL ) THEN -.TP 20 -.ti +4 -INFO -= -12 -.TP 20 -.ti +4 -ELSE -IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN -.TP 20 -.ti +4 -INFO -= -15 -.TP 20 -.ti +4 -ELSE -IF( INDEIG .AND. ( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) ) -THEN -.TP 20 -.ti +4 -INFO -= -16 -.TP 20 -.ti +4 -ELSE -IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) -THEN -.TP 20 -.ti +4 -INFO -= -17 -.TP 20 -.ti +4 -ELSE -IF( VALEIG .AND. ( ABS( RWORK( 2 )-VL ).GT.FIVE*EPS* -ABS( VL ) ) ) THEN -.TP 20 -.ti +4 -INFO -= -14 -.TP 20 -.ti +4 -ELSE -IF( VALEIG .AND. ( ABS( RWORK( 3 )-VU ).GT.FIVE*EPS* -ABS( VU ) ) ) THEN -.TP 20 -.ti +4 -INFO -= -15 -.TP 20 -.ti +4 -ELSE -IF( ABS( RWORK( 1 )-ABSTOL ).GT.FIVE*EPS* -ABS( ABSTOL ) ) THEN -.TP 20 -.ti +4 -INFO -= -18 -.TP 20 -.ti +4 -ELSE -IF( LWORK.LT.LWMIN .AND. LWORK.NE.-1 ) THEN -.TP 20 -.ti +4 -INFO -= -28 -.TP 20 -.ti +4 -ELSE -IF( LRWORK.LT.LRWMIN .AND. LRWORK.NE.-1 ) THEN -.TP 20 -.ti +4 -INFO -= -30 -.TP 20 -.ti +4 -ELSE -IF( LIWORK.LT.LIWMIN .AND. LIWORK.NE.-1 ) THEN -.TP 20 -.ti +4 -INFO -= -32 -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM1( -1 ) = IBTYPE -.TP 20 -.ti +4 -IDUM2( -1 ) = 1 -.TP 20 -.ti +4 -IF( -WANTZ ) THEN -.TP 20 -.ti +4 -IDUM1( -2 ) = ICHAR( 'V' ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -2 ) = ICHAR( 'N' ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -2 ) = 2 -.TP 20 -.ti +4 -IF( -UPPER ) THEN -.TP 20 -.ti +4 -IDUM1( -3 ) = ICHAR( 'U' ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -3 ) = ICHAR( 'L' ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -3 ) = 3 -.TP 20 -.ti +4 -IF( -ALLEIG ) THEN -.TP 20 -.ti +4 -IDUM1( -4 ) = ICHAR( 'A' ) -.TP 20 -.ti +4 -ELSE -IF( INDEIG ) THEN -.TP 20 -.ti +4 -IDUM1( -4 ) = ICHAR( 'I' ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -4 ) = ICHAR( 'V' ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -4 ) = 4 -.TP 20 -.ti +4 -IF( -LQUERY ) THEN -.TP 20 -.ti +4 -IDUM1( -5 ) = -1 -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -5 ) = 1 -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -5 ) = 5 -.TP 20 -.ti +4 -CALL -PCHK2MAT( N, 4, N, 4, IA, JA, DESCA, 9, N, 4, N, 4, IB, -JB, DESCB, 13, 5, IDUM1, IDUM2, INFO ) -.TP 20 -.ti +4 -CALL -PCHK1MAT( N, 4, N, 4, IZ, JZ, DESCZ, 26, 0, IDUM1, IDUM2, -INFO ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -WORK( -1 ) = CMPLX( REAL( LWMIN ) ) -.TP 20 -.ti +4 -RWORK( -1 ) = REAL( LRWMIN ) -.TP 20 -.ti +4 -IWORK( -1 ) = LIWMIN -.TP 20 -.ti +4 -IF( -INFO.NE.0 ) THEN -.TP 20 -.ti +4 -CALL -PXERBLA( ICTXT, 'PCHEGVX ', -INFO ) -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -ELSE -IF( LQUERY ) THEN -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -PCPOTRF( UPLO, N, B, IB, JB, DESCB, INFO ) -.TP 20 -.ti +4 -IF( -INFO.NE.0 ) THEN -.TP 20 -.ti +4 -IFAIL( -1 ) = INFO -.TP 20 -.ti +4 -INFO -= IERRNPD -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -PCHEGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, -DESCB, SCALE, INFO ) -.TP 20 -.ti +4 -CALL -PCHEEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, VU, IL, -IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ, -WORK, LWORK, RWORK, LRWORK, IWORK, LIWORK, IFAIL, -ICLUSTR, GAP, INFO ) -.TP 20 -.ti +4 -IF( -WANTZ ) THEN -.TP 20 -.ti +4 -NEIG -= M -.TP 20 -.ti +4 -IF( -IBTYPE.EQ.1 .OR. IBTYPE.EQ.2 ) THEN -.TP 20 -.ti +4 -IF( -UPPER ) THEN -.TP 20 -.ti +4 -TRANS -= 'N' -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -TRANS -= 'C' -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -PCTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, -B, IB, JB, DESCB, Z, IZ, JZ, DESCZ ) -.TP 20 -.ti +4 -ELSE -IF( IBTYPE.EQ.3 ) THEN -.TP 20 -.ti +4 -IF( -UPPER ) THEN -.TP 20 -.ti +4 -TRANS -= 'C' -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -TRANS -= 'N' -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -PCTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, -B, IB, JB, DESCB, Z, IZ, JZ, DESCZ ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -SCALE.NE.ONE ) THEN -.TP 20 -.ti +4 -CALL -SSCAL( N, SCALE, W, 1 ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -END + .SH PURPOSE +PCHEGVX computes all the eigenvalues, and optionally, +the eigenvectors +of a complex generalized Hermitian-definite eigenproblem, of the form +sub( A )*x=(lambda)*sub( B )*x, sub( A )*sub( B )x=(lambda)*x, or +sub( B )*sub( A )*x=(lambda)*x. +Here sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ) is assumed to be +Hermitian, and sub( B ) denoting B( IB:IB+N-1, JB:JB+N-1 ) is assumed +to be Hermitian positive definite. + +.SH NOTES +Each global data object is described by an associated description +vector. This vector stores the information required to establish +the mapping between an object element and its corresponding process +and memory location. + +Let A be a generic term for any 2D block cyclicly distributed array. +Such a global array has an associated description vector DESCA. +In the following comments, the character _ should be read as +"of the global array". + +NOTATION STORED IN EXPLANATION +.br +--------------- -------------- -------------------------------------- +.br +DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, + DTYPE_A = 1. +.br +CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating + the BLACS process grid A is distribu- + ted over. The context itself is glo- + bal, but the handle (the integer + value) may vary. +.br +M_A (global) DESCA( M_ ) The number of rows in the global + array A. +.br +N_A (global) DESCA( N_ ) The number of columns in the global + array A. +.br +MB_A (global) DESCA( MB_ ) The blocking factor used to distribute + the rows of the array. +.br +NB_A (global) DESCA( NB_ ) The blocking factor used to distribute + the columns of the array. +.br +RSRC_A (global) DESCA( RSRC_ ) The process row over which the first + row of the array A is distributed. +.br +CSRC_A (global) DESCA( CSRC_ ) The process column over which the + first column of the array A is + distributed. +.br +LLD_A (local) DESCA( LLD_ ) The leading dimension of the local + array. LLD_A >= MAX(1,LOCr(M_A)). + +Let K be the number of rows or columns of a distributed matrix, +and assume that its process grid has dimension p x q. +LOCr( K ) denotes the number of elements of K that a process +would receive if K were distributed over the p processes of its +process column. +.br +Similarly, LOCc( K ) denotes the number of elements of K that a +process would receive if K were distributed over the q processes of +its process row. +.br +The values of LOCr() and LOCc() may be determined via a call to the +ScaLAPACK tool function, NUMROC: +.br + LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), +.br + LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). +.br +An upper bound for these quantities may be computed by: +.br + LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A +.br + LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A + +.SH ARGUMENTS +.tp 8 +IBTYPE (global input) INTEGER + Specifies the problem type to be solved: + = 1: sub( A )*x = (lambda)*sub( B )*x + = 2: sub( A )*sub( B )*x = (lambda)*x + = 3: sub( B )*sub( A )*x = (lambda)*x + +.tp 8 +JOBZ (global input) CHARACTER*1 + = 'N': Compute eigenvalues only; + = 'V': Compute eigenvalues and eigenvectors. + +.tp 8 +RANGE (global input) CHARACTER*1 + = 'A': all eigenvalues will be found. + = 'V': all eigenvalues in the interval [VL,VU] will be found. + = 'I': the IL-th through IU-th eigenvalues will be found. + +.tp 8 +UPLO (global input) CHARACTER*1 + = 'U': Upper triangles of sub( A ) and sub( B ) are stored; + = 'L': Lower triangles of sub( A ) and sub( B ) are stored. + +.tp 8 +N (global input) INTEGER + The order of the matrices sub( A ) and sub( B ). N >= 0. + +.tp 8 +A (local input/local output) COMPLEX pointer into the + local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). + On entry, this array contains the local pieces of the + N-by-N Hermitian distributed matrix sub( A ). If UPLO = 'U', + the leading N-by-N upper triangular part of sub( A ) contains + the upper triangular part of the matrix. If UPLO = 'L', the + leading N-by-N lower triangular part of sub( A ) contains + the lower triangular part of the matrix. + + On exit, if JOBZ = 'V', then if INFO = 0, sub( A ) contains + the distributed matrix Z of eigenvectors. The eigenvectors + are normalized as follows: + if IBTYPE = 1 or 2, Z**H*sub( B )*Z = I; + if IBTYPE = 3, Z**H*inv( sub( B ) )*Z = I. + If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') + or the lower triangle (if UPLO='L') of sub( A ), including + the diagonal, is destroyed. + +.tp 8 +IA (global input) INTEGER + The row index in the global array A indicating the first + row of sub( A ). + +.tp 8 +JA (global input) INTEGER + The column index in the global array A indicating the + first column of sub( A ). + +.tp 8 +DESCA (global and local input) INTEGER array of dimension DLEN_. + The array descriptor for the distributed matrix A. + If DESCA( CTXT_ ) is incorrect, PCHEGVX cannot guarantee + correct error reporting. + +.tp 8 +B (local input/local output) COMPLEX pointer into the + local memory to an array of dimension (LLD_B, LOCc(JB+N-1)). + On entry, this array contains the local pieces of the + N-by-N Hermitian distributed matrix sub( B ). If UPLO = 'U', + the leading N-by-N upper triangular part of sub( B ) contains + the upper triangular part of the matrix. If UPLO = 'L', the + leading N-by-N lower triangular part of sub( B ) contains + the lower triangular part of the matrix. + + On exit, if INFO <= N, the part of sub( B ) containing the + matrix is overwritten by the triangular factor U or L from + the Cholesky factorization sub( B ) = U**H*U or + sub( B ) = L*L**H. + +.tp 8 +IB (global input) INTEGER + The row index in the global array B indicating the first + row of sub( B ). + +.tp 8 +JB (global input) INTEGER + The column index in the global array B indicating the + first column of sub( B ). + +.tp 8 +DESCB (global and local input) INTEGER array of dimension DLEN_. + The array descriptor for the distributed matrix B. + DESCB( CTXT_ ) must equal DESCA( CTXT_ ) + +.tp 8 +VL (global input) REAL + If RANGE='V', the lower bound of the interval to be searched + for eigenvalues. Not referenced if RANGE = 'A' or 'I'. + +.tp 8 +VU (global input) REAL + If RANGE='V', the upper bound of the interval to be searched + for eigenvalues. Not referenced if RANGE = 'A' or 'I'. + +.tp 8 +IL (global input) INTEGER + If RANGE='I', the index (from smallest to largest) of the + smallest eigenvalue to be returned. IL >= 1. + Not referenced if RANGE = 'A' or 'V'. + +.tp 8 +IU (global input) INTEGER + If RANGE='I', the index (from smallest to largest) of the + largest eigenvalue to be returned. min(IL,N) <= IU <= N. + Not referenced if RANGE = 'A' or 'V'. + +.tp 8 +ABSTOL (global input) REAL + If JOBZ='V', setting ABSTOL to PSLAMCH( CONTEXT, 'U') yields + the most orthogonal eigenvectors. + + The absolute error tolerance for the eigenvalues. + An approximate eigenvalue is accepted as converged + when it is determined to lie in an interval [a,b] + of width less than or equal to + + ABSTOL + EPS * max( |a|,|b| ) , + + where EPS is the machine precision. If ABSTOL is less than + or equal to zero, then EPS*norm(T) will be used in its place, + where norm(T) is the 1-norm of the tridiagonal matrix + obtained by reducing A to tridiagonal form. + + Eigenvalues will be computed most accurately when ABSTOL is + set to twice the underflow threshold 2*PSLAMCH('S') not zero. + If this routine returns with ((MOD(INFO,2).NE.0) .OR. + (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or + eigenvectors did not converge, try setting ABSTOL to + 2*PSLAMCH('S'). + + See "Computing Small Singular Values of Bidiagonal Matrices + with Guaranteed High Relative Accuracy," by Demmel and + Kahan, LAPACK Working Note #3. + + See "On the correctness of Parallel Bisection in Floating + Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70 + +.tp 8 +M (global output) INTEGER + Total number of eigenvalues found. 0 <= M <= N. + +.tp 8 +NZ (global output) INTEGER + Total number of eigenvectors computed. 0 <= NZ <= M. + The number of columns of Z that are filled. + If JOBZ .NE. 'V', NZ is not referenced. + If JOBZ .EQ. 'V', NZ = M unless the user supplies + insufficient space and PCHEGVX is not able to detect this + before beginning computation. To get all the eigenvectors + requested, the user must supply both sufficient + space to hold the eigenvectors in Z (M .LE. DESCZ(N_)) + and sufficient workspace to compute them. (See LWORK below.) + PCHEGVX is always able to detect insufficient space without + computation unless RANGE .EQ. 'V'. + +.tp 8 +W (global output) REAL array, dimension (N) + On normal exit, the first M entries contain the selected + eigenvalues in ascending order. + +.tp 8 +ORFAC (global input) REAL + Specifies which eigenvectors should be reorthogonalized. + Eigenvectors that correspond to eigenvalues which are within + tol=ORFAC*norm(A) of each other are to be reorthogonalized. + However, if the workspace is insufficient (see LWORK), + tol may be decreased until all eigenvectors to be + reorthogonalized can be stored in one process. + No reorthogonalization will be done if ORFAC equals zero. + A default value of 10^-3 is used if ORFAC is negative. + ORFAC should be identical on all processes. + +.tp 8 +Z (local output) COMPLEX array, + global dimension (N, N), + local dimension ( LLD_Z, LOCc(JZ+N-1) ) + If JOBZ = 'V', then on normal exit the first M columns of Z + contain the orthonormal eigenvectors of the matrix + corresponding to the selected eigenvalues. If an eigenvector + fails to converge, then that column of Z contains the latest + approximation to the eigenvector, and the index of the + eigenvector is returned in IFAIL. + If JOBZ = 'N', then Z is not referenced. + +.tp 8 +IZ (global input) INTEGER + The row index in the global array Z indicating the first + row of sub( Z ). + +.tp 8 +JZ (global input) INTEGER + The column index in the global array Z indicating the + first column of sub( Z ). + +.tp 8 +DESCZ (global and local input) INTEGER array of dimension DLEN_. + The array descriptor for the distributed matrix Z. + DESCZ( CTXT_ ) must equal DESCA( CTXT_ ) + +.tp 8 +WORK (local workspace/output) COMPLEX array, + dimension (LWORK) + WORK(1) returns the optimal workspace. + +.tp 8 +LWORK (local input) INTEGER + Size of WORK array. If only eigenvalues are requested: + LWORK >= N + MAX( NB * ( NP0 + 1 ), 3 ) + If eigenvectors are requested: + LWORK >= N + ( NP0 + MQ0 + NB ) * NB + with NQ0 = NUMROC( NN, NB, 0, 0, NPCOL ). + + For optimal performance, greater workspace is needed, i.e. + LWORK >= MAX( LWORK, N + NHETRD_LWOPT, + NHEGST_LWOPT ) + Where LWORK is as defined above, and + NHETRD_LWORK = 2*( ANB+1 )*( 4*NPS+2 ) + + ( NPS + 1 ) * NPS + NHEGST_LWOPT = 2*NP0*NB + NQ0*NB + NB*NB + + NB = DESCA( MB_ ) + NP0 = NUMROC( N, NB, 0, 0, NPROW ) + NQ0 = NUMROC( N, NB, 0, 0, NPCOL ) + ICTXT = DESCA( CTXT_ ) + ANB = PJLAENV( ICTXT, 3, 'PCHETTRD', 'L', 0, 0, 0, 0 ) + SQNPC = SQRT( DBLE( NPROW * NPCOL ) ) + NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) + + NUMROC is a ScaLAPACK tool functions; + PJLAENV is a ScaLAPACK envionmental inquiry function + MYROW, MYCOL, NPROW and NPCOL can be determined by calling + the subroutine BLACS_GRIDINFO. + + If LWORK = -1, then LWORK is global input and a workspace + query is assumed; the routine only calculates the optimal + size for all work arrays. Each of these values is returned + in the first entry of the correspondingwork array, and no + error message is issued by PXERBLA. + +.tp 8 +RWORK (local workspace/output) REAL array, + dimension (LRWORK) + On return, RWORK(1) contains the amount of workspace + required for optimal efficiency + if JOBZ='N' RWORK(1) = optimal amount of workspace + required to compute eigenvalues efficiently + if JOBZ='V' RWORK(1) = optimal amount of workspace + required to compute eigenvalues and eigenvectors + efficiently with no guarantee on orthogonality. + If RANGE='V', it is assumed that all eigenvectors + may be required when computing optimal workspace. + +.tp 8 +LRWORK (local input) INTEGER + Size of RWORK + See below for definitions of variables used to define LRWORK. + If no eigenvectors are requested (JOBZ = 'N') then + LRWORK >= 5 * NN + 4 * N + If eigenvectors are requested (JOBZ = 'V' ) then + the amount of workspace required to guarantee that all + eigenvectors are computed is: + LRWORK >= 4*N + MAX( 5*NN, NP0 * MQ0 ) + + ICEIL( NEIG, NPROW*NPCOL)*NN + + The computed eigenvectors may not be orthogonal if the + minimal workspace is supplied and ORFAC is too small. + If you want to guarantee orthogonality (at the cost + of potentially poor performance) you should add + the following to LRWORK: + (CLUSTERSIZE-1)*N + where CLUSTERSIZE is the number of eigenvalues in the + largest cluster, where a cluster is defined as a set of + close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) | + W(J+1) <= W(J) + ORFAC*2*norm(A) } + Variable definitions: + NEIG = number of eigenvectors requested + NB = DESCA( MB_ ) = DESCA( NB_ ) = DESCZ( MB_ ) = + DESCZ( NB_ ) + NN = MAX( N, NB, 2 ) + DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) = + DESCZ( CSRC_ ) = 0 + NP0 = NUMROC( NN, NB, 0, 0, NPROW ) + MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) + ICEIL( X, Y ) is a ScaLAPACK function returning + ceiling(X/Y) + + When LRWORK is too small: + If LRWORK is too small to guarantee orthogonality, + PCHEGVX attempts to maintain orthogonality in + the clusters with the smallest + spacing between the eigenvalues. + If LRWORK is too small to compute all the eigenvectors + requested, no computation is performed and INFO=-25 + is returned. Note that when RANGE='V', PCHEGVX does + not know how many eigenvectors are requested until + the eigenvalues are computed. Therefore, when RANGE='V' + and as long as LRWORK is large enough to allow PCHEGVX to + compute the eigenvalues, PCHEGVX will compute the + eigenvalues and as many eigenvectors as it can. + + Relationship between workspace, orthogonality & performance: + If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing + enough space to compute all the eigenvectors + orthogonally will cause serious degradation in + performance. In the limit (i.e. CLUSTERSIZE = N-1) + PCSTEIN will perform no better than CSTEIN on 1 processor. + For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing + all eigenvectors will increase the total execution time + by a factor of 2 or more. + For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will + grow as the square of the cluster size, all other factors + remaining equal and assuming enough workspace. Less + workspace means less reorthogonalization but faster + execution. + + If LRWORK = -1, then LRWORK is global input and a workspace + query is assumed; the routine only calculates the minimum + and optimal size for all work arrays. Each of these + values is returned in the first entry of the corresponding + work array, and no error message is issued by PXERBLA. + +.tp 8 +IWORK (local workspace) INTEGER array + On return, IWORK(1) contains the amount of integer workspace + required. + +.tp 8 +LIWORK (local input) INTEGER + size of IWORK + LIWORK >= 6 * NNP + Where: + NNP = MAX( N, NPROW*NPCOL + 1, 4 ) + + If LIWORK = -1, then LIWORK is global input and a workspace + query is assumed; the routine only calculates the minimum + and optimal size for all work arrays. Each of these + values is returned in the first entry of the corresponding + work array, and no error message is issued by PXERBLA. + +.tp 8 +IFAIL (output) INTEGER array, dimension (N) + IFAIL provides additional information when INFO .NE. 0 + If (MOD(INFO/16,2).NE.0) then IFAIL(1) indicates the order of + the smallest minor which is not positive definite. + If (MOD(INFO,2).NE.0) on exit, then IFAIL contains the + indices of the eigenvectors that failed to converge. + + If neither of the above error conditions hold and JOBZ = 'V', + then the first M elements of IFAIL are set to zero. + +.tp 8 +ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL) + This array contains indices of eigenvectors corresponding to + a cluster of eigenvalues that could not be reorthogonalized + due to insufficient workspace (see LWORK, ORFAC and INFO). + Eigenvectors corresponding to clusters of eigenvalues indexed + ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be + reorthogonalized due to lack of workspace. Hence the + eigenvectors corresponding to these clusters may not be + orthogonal. ICLUSTR() is a zero terminated array. + (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if + K is the number of clusters + ICLUSTR is not referenced if JOBZ = 'N' + +.tp 8 +GAP (global output) REAL array, + dimension (NPROW*NPCOL) + This array contains the gap between eigenvalues whose + eigenvectors could not be reorthogonalized. The output + values in this array correspond to the clusters indicated + by the array ICLUSTR. As a result, the dot product between + eigenvectors correspoding to the I^th cluster may be as high + as ( C * n ) / GAP(I) where C is a small constant. + +.tp 8 +INFO (global output) INTEGER + = 0: successful exit + < 0: If the i-th argument is an array and the j-entry had + an illegal value, then INFO = -(i*100+j), if the i-th + argument is a scalar and had an illegal value, then + INFO = -i. + > 0: if (MOD(INFO,2).NE.0), then one or more eigenvectors + failed to converge. Their indices are stored + in IFAIL. Send e-mail to scalapack@cs.utk.edu + if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding + to one or more clusters of eigenvalues could not be + reorthogonalized because of insufficient workspace. + The indices of the clusters are stored in the array + ICLUSTR. + if (MOD(INFO/4,2).NE.0), then space limit prevented + PCHEGVX from computing all of the eigenvectors + between VL and VU. The number of eigenvectors + computed is returned in NZ. + if (MOD(INFO/8,2).NE.0), then PCSTEBZ failed to + compute eigenvalues. + Send e-mail to scalapack@cs.utk.edu + if (MOD(INFO/16,2).NE.0), then B was not positive + definite. IFAIL(1) indicates the order of + the smallest minor which is not positive definite. --- scalapack-doc-1.5.orig/man/manl/pdsyev.l +++ scalapack-doc-1.5/man/manl/pdsyev.l @@ -1,6 +1,8 @@ .TH PDSYEV l "12 May 1997" "LAPACK version 1.3" "LAPACK routine (version 1.3)" .SH NAME - +PDSYEV - compute all eigenvalues and, optionally, eigenvectors +of a real symmetric matrix A by calling the recommended sequence +of ScaLAPACK routines. .SH SYNOPSIS .TP 19 SUBROUTINE PDSYEV( @@ -101,756 +103,222 @@ .ti +4 INTRINSIC DBLE, ICHAR, MAX, MIN, MOD, SQRT -.TP 19 -.ti +4 -IF( -BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* -RSRC_.LT.0 )RETURN -.TP 19 -.ti +4 -IF( -N.EQ.0 ) -RETURN -.TP 19 -.ti +4 -CALL -BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) -.TP 19 -.ti +4 -INFO -= 0 -.TP 19 -.ti +4 -IF( -NPROW.EQ.-1 ) THEN -.TP 19 -.ti +4 -INFO -= -( 700+CTXT_ ) -.TP 19 -.ti +4 -ELSE -IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN -.TP 19 -.ti +4 -INFO -= -( 1200+CTXT_ ) -.TP 19 -.ti +4 -ELSE -.TP 19 -.ti +4 -CALL -CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, INFO ) -.TP 19 -.ti +4 -CALL -CHK1MAT( N, 3, N, 3, IZ, JZ, DESCZ, 12, INFO ) -.TP 19 -.ti +4 -IF( -INFO.EQ.0 ) THEN -.TP 19 -.ti +4 -SAFMIN -= PDLAMCH( DESCA( CTXT_ ), 'Safe minimum' ) -.TP 19 -.ti +4 -EPS -= PDLAMCH( DESCA( CTXT_ ), 'Precision' ) -.TP 19 -.ti +4 -SMLNUM -= SAFMIN / EPS -.TP 19 -.ti +4 -BIGNUM -= ONE / SMLNUM -.TP 19 -.ti +4 -RMIN -= SQRT( SMLNUM ) -.TP 19 -.ti +4 -RMAX -= MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) -.TP 19 -.ti +4 -NPROCS -= NPROW*NPCOL -.TP 19 -.ti +4 -NB_A -= DESCA( NB_ ) -.TP 19 -.ti +4 -MB_A -= DESCA( MB_ ) -.TP 19 -.ti +4 -NB_Z -= DESCZ( NB_ ) -.TP 19 -.ti +4 -MB_Z -= DESCZ( MB_ ) -.TP 19 -.ti +4 -NB -= NB_A -.TP 19 -.ti +4 -LOWER -= LSAME( UPLO, 'L' ) -.TP 19 -.ti +4 -WANTZ -= LSAME( JOBZ, 'V' ) -.TP 19 -.ti +4 -RSRC_A -= DESCA( RSRC_ ) -.TP 19 -.ti +4 -CSRC_A -= DESCA( CSRC_ ) -.TP 19 -.ti +4 -RSRC_Z -= DESCZ( RSRC_ ) -.TP 19 -.ti +4 -LCM -= ILCM( NPROW, NPCOL ) -.TP 19 -.ti +4 -LCMQ -= LCM / NPCOL -.TP 19 -.ti +4 -IROFFA -= MOD( IA-1, MB_A ) -.TP 19 -.ti +4 -ICOFFA -= MOD( JA-1, NB_A ) -.TP 19 -.ti +4 -IROFFZ -= MOD( IZ-1, MB_A ) -.TP 19 -.ti +4 -IAROW -= INDXG2P( 1, NB_A, MYROW, RSRC_A, NPROW ) -.TP 19 -.ti +4 -IACOL -= INDXG2P( 1, MB_A, MYCOL, CSRC_A, NPCOL ) -.TP 19 -.ti +4 -IZROW -= INDXG2P( 1, NB_A, MYROW, RSRC_Z, NPROW ) -.TP 19 -.ti +4 -NP -= NUMROC( N+IROFFA, NB_Z, MYROW, IAROW, NPROW ) -.TP 19 -.ti +4 -NQ -= NUMROC( N+ICOFFA, NB_Z, MYCOL, IACOL, NPCOL ) -.TP 19 -.ti +4 -SIZEMQRLEFT -= MAX( ( NB_A*( NB_A-1 ) ) / 2, -( NP+NQ )*NB_A ) + NB_A*NB_A -.TP 19 -.ti +4 -SIZEMQRRIGHT -= MAX( ( NB_A*( NB_A-1 ) ) / 2, -( NQ+MAX( NP+NUMROC( NUMROC( N+ICOFFA, NB_A, -0, 0, NPCOL ), NB, 0, 0, LCMQ ), NP ) )* -NB_A ) + NB_A*NB_A -.TP 19 -.ti +4 -LDC -= 0 -.TP 19 -.ti +4 -IF( -WANTZ ) THEN -.TP 19 -.ti +4 -CONTEXTC -= SL_GRIDRESHAPE( DESCA( CTXT_ ), 0, 1, 1, -NPROCS, 1 ) -.TP 19 -.ti +4 -CALL -BLACS_GRIDINFO( CONTEXTC, NPROWC, NPCOLC, MYPROWC, -MYPCOLC ) -.TP 19 -.ti +4 -NRC -= NUMROC( N, NB_A, MYPROWC, 0, NPROCS ) -.TP 19 -.ti +4 -LDC -= MAX( 1, NRC ) -.TP 19 -.ti +4 -CALL -DESCINIT( DESCQR, N, N, NB, NB, 0, 0, CONTEXTC, -LDC, INFO ) -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -INDTAU -= 1 -.TP 19 -.ti +4 -INDE -= INDTAU + N -.TP 19 -.ti +4 -INDD -= INDE + N -.TP 19 -.ti +4 -INDD2 -= INDD + N -.TP 19 -.ti +4 -INDE2 -= INDD2 + N -.TP 19 -.ti +4 -INDWORK -= INDE2 + N -.TP 19 -.ti +4 -INDWORK2 -= INDWORK + N*LDC -.TP 19 -.ti +4 -LLWORK -= LWORK - INDWORK + 1 -.TP 19 -.ti +4 -NN -= MAX( N, NB, 2 ) -.TP 19 -.ti +4 -IF( -WANTZ ) THEN -.TP 19 -.ti +4 -QRMEM -= 5*N + MAX( 2*NP+NQ+NB*NN, 2*NN-2 ) + N*LDC -.TP 19 -.ti +4 -LWMIN -= MAX( SIZEMQRLEFT, SIZEMQRRIGHT, QRMEM ) -.TP 19 -.ti +4 -ELSE -.TP 19 -.ti +4 -LWMIN -= 5*N + 2*NP + NQ + NB*NN -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -IF( -INFO.EQ.0 ) THEN -.TP 19 -.ti +4 -IF( -.NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN -.TP 19 -.ti +4 -INFO -= -1 -.TP 19 -.ti +4 -ELSE -IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN -.TP 19 -.ti +4 -INFO -= -2 -.TP 19 -.ti +4 -ELSE -IF( LWORK.LT.LWMIN .AND. LWORK.NE.-1 ) THEN -.TP 19 -.ti +4 -INFO -= -14 -.TP 19 -.ti +4 -ELSE -IF( IROFFA.NE.IROFFZ ) THEN -.TP 19 -.ti +4 -INFO -= -10 -.TP 19 -.ti +4 -ELSE -IF( IROFFA.NE.0 ) THEN -.TP 19 -.ti +4 -INFO -= -5 -.TP 19 -.ti +4 -ELSE -IF( IAROW.NE.IZROW ) THEN -.TP 19 -.ti +4 -INFO -= -10 -.TP 19 -.ti +4 -ELSE -IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN -.TP 19 -.ti +4 -INFO -= -( 700+NB_ ) -.TP 19 -.ti +4 -ELSE -IF( DESCA( M_ ).NE.DESCZ( M_ ) ) THEN -.TP 19 -.ti +4 -INFO -= -( 1200+M_ ) -.TP 19 -.ti +4 -ELSE -IF( DESCA( N_ ).NE.DESCZ( N_ ) ) THEN -.TP 19 -.ti +4 -INFO -= -( 1200+N_ ) -.TP 19 -.ti +4 -ELSE -IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN -.TP 19 -.ti +4 -INFO -= -( 1200+MB_ ) -.TP 19 -.ti +4 -ELSE -IF( DESCA( NB_ ).NE.DESCZ( NB_ ) ) THEN -.TP 19 -.ti +4 -INFO -= -( 1200+NB_ ) -.TP 19 -.ti +4 -ELSE -IF( DESCA( RSRC_ ).NE.DESCZ( RSRC_ ) ) THEN -.TP 19 -.ti +4 -INFO -= -( 1200+RSRC_ ) -.TP 19 -.ti +4 -ELSE -IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN -.TP 19 -.ti +4 -INFO -= -( 1200+CTXT_ ) -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -IF( -WANTZ ) THEN -.TP 19 -.ti +4 -IDUM1( -1 ) = ICHAR( 'V' ) -.TP 19 -.ti +4 -ELSE -.TP 19 -.ti +4 -IDUM1( -1 ) = ICHAR( 'N' ) -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -IDUM2( -1 ) = 1 -.TP 19 -.ti +4 -IF( -LOWER ) THEN -.TP 19 -.ti +4 -IDUM1( -2 ) = ICHAR( 'L' ) -.TP 19 -.ti +4 -ELSE -.TP 19 -.ti +4 -IDUM1( -2 ) = ICHAR( 'U' ) -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -IDUM2( -2 ) = 2 -.TP 19 -.ti +4 -IF( -LWORK.EQ.-1 ) THEN -.TP 19 -.ti +4 -IDUM1( -3 ) = -1 -.TP 19 -.ti +4 -ELSE -.TP 19 -.ti +4 -IDUM1( -3 ) = 1 -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -IDUM2( -3 ) = 3 -.TP 19 -.ti +4 -CALL -PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 7, N, 3, N, 3, IZ, -JZ, DESCZ, 12, 3, IDUM1, IDUM2, INFO ) -.TP 19 -.ti +4 -WORK( -1 ) = DBLE( LWMIN ) -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -IF( -INFO.NE.0 ) THEN -.TP 19 -.ti +4 -CALL -PXERBLA( DESCA( CTXT_ ), 'PDSYEV', -INFO ) -.TP 19 -.ti +4 -IF( -WANTZ ) -CALL BLACS_GRIDEXIT( CONTEXTC ) -.TP 19 -.ti +4 -RETURN -.TP 19 -.ti +4 -ELSE -IF( LWORK.EQ.-1 ) THEN -.TP 19 -.ti +4 -IF( -WANTZ ) -CALL BLACS_GRIDEXIT( CONTEXTC ) -.TP 19 -.ti +4 -RETURN -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -ISCALE -= 0 -.TP 19 -.ti +4 -ANRM -= PDLANSY( '1', UPLO, N, A, IA, JA, DESCA, WORK( INDWORK ) ) -.TP 19 -.ti +4 -IF( -ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN -.TP 19 -.ti +4 -ISCALE -= 1 -.TP 19 -.ti +4 -SIGMA -= RMIN / ANRM -.TP 19 -.ti +4 -ELSE -IF( ANRM.GT.RMAX ) THEN -.TP 19 -.ti +4 -ISCALE -= 1 -.TP 19 -.ti +4 -SIGMA -= RMAX / ANRM -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -IF( -ISCALE.EQ.1 ) THEN -.TP 19 -.ti +4 -CALL -PDLASCL( UPLO, ONE, SIGMA, N, N, A, IA, JA, DESCA, -IINFO ) -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -CALL -PDSYTRD( UPLO, N, A, IA, JA, DESCA, WORK( INDD ), -WORK( INDE ), WORK( INDTAU ), WORK( INDWORK ), -LLWORK, IINFO ) -.TP 19 -.ti +4 -DO -10 I = 1, N -.TP 19 -.ti +4 -CALL -PDELGET( 'A', ' ', WORK( INDD2+I-1 ), A, I+IA-1, I+JA-1, -DESCA ) -.TP 19 -.ti +4 -10 -CONTINUE -.TP 19 -.ti +4 -IF( -LSAME( UPLO, 'U' ) ) THEN -.TP 19 -.ti +4 -DO -20 I = 1, N - 1 -.TP 19 -.ti +4 -CALL -PDELGET( 'A', ' ', WORK( INDE2+I-1 ), A, I+IA-1, I+JA, -DESCA ) -.TP 19 -.ti +4 -20 -CONTINUE -.TP 19 -.ti +4 -ELSE -.TP 19 -.ti +4 -DO -30 I = 1, N - 1 -.TP 19 -.ti +4 -CALL -PDELGET( 'A', ' ', WORK( INDE2+I-1 ), A, I+IA, I+JA-1, -DESCA ) -.TP 19 -.ti +4 -30 -CONTINUE -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -IF( -WANTZ ) THEN -.TP 19 -.ti +4 -CALL -PDLASET( 'Full', N, N, ZERO, ONE, WORK( INDWORK ), 1, 1, -DESCQR ) -.TP 19 -.ti +4 -CALL -DSTEQR2( 'I', N, WORK( INDD2 ), WORK( INDE2 ), -WORK( INDWORK ), LDC, NRC, WORK( INDWORK2 ), -INFO ) -.TP 19 -.ti +4 -CALL -PDGEMR2D( N, N, WORK( INDWORK ), 1, 1, DESCQR, Z, 1, 1, -DESCZ, CONTEXTC ) -.TP 19 -.ti +4 -CALL -PDORMTR( 'L', UPLO, 'N', N, N, A, IA, JA, DESCA, -WORK( INDTAU ), Z, IZ, JZ, DESCZ, -WORK( INDWORK ), LLWORK, IINFO ) -.TP 19 -.ti +4 -ELSE -.TP 19 -.ti +4 -CALL -DSTEQR2( 'N', N, WORK( INDD2 ), WORK( INDE2 ), -WORK( INDWORK ), 1, 1, WORK( INDWORK2 ), INFO ) -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -CALL -DCOPY( N, WORK( INDD2 ), 1, W, 1 ) -.TP 19 -.ti +4 -IF( -ISCALE.EQ.1 ) THEN -.TP 19 -.ti +4 -CALL -DSCAL( N, ONE / SIGMA, W, 1 ) -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -WORK( -1 ) = DBLE( LWMIN ) -.TP 19 -.ti +4 -IF( -WANTZ ) THEN -.TP 19 -.ti +4 -CALL -BLACS_GRIDEXIT( CONTEXTC ) -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -IF( -N.LE.ITHVAL ) THEN -.TP 19 -.ti +4 -J -= N -.TP 19 -.ti +4 -K -= 1 -.TP 19 -.ti +4 -ELSE -.TP 19 -.ti +4 -J -= N / ITHVAL -.TP 19 -.ti +4 -K -= ITHVAL -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -DO -40 I = 1, J -.TP 19 -.ti +4 -WORK( -I+INDTAU ) = W( ( I-1 )*K+1 ) -.TP 19 -.ti +4 -WORK( -I+INDE ) = W( ( I-1 )*K+1 ) -.TP 19 -.ti +4 -40 -CONTINUE -.TP 19 -.ti +4 -CALL -DGAMN2D( DESCA( CTXT_ ), 'a', ' ', J, 1, WORK( 1+INDTAU ), -J, 1, 1, -1, -1, 0 ) -.TP 19 -.ti +4 -CALL -DGAMX2D( DESCA( CTXT_ ), 'a', ' ', J, 1, WORK( 1+INDE ), J, -1, 1, -1, -1, 0 ) -.TP 19 -.ti +4 -DO -50 I = 1, J -.TP 19 -.ti +4 -IF( -INFO.EQ.0 .AND. ( WORK( I+INDTAU )-WORK( I+INDE ).NE. -ZERO ) ) THEN -.TP 19 -.ti +4 -INFO -= N + 1 -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -50 -CONTINUE -.TP 19 -.ti +4 -RETURN -.TP 19 -.ti +4 -END .SH PURPOSE +PDSYEV computes all eigenvalues and, optionally, eigenvectors +of a real symmetric matrix A by calling the recommended sequence +of ScaLAPACK routines. + +In its present form, PDSYEV assumes a homogeneous system and makes +no checks for consistency of the eigenvalues or eigenvectors across +the different processes. Because of this, it is possible that a +heterogeneous system may return incorrect results without any error +messages. +.SH NOTES +A description vector is associated with each 2D block-cyclicly dis- +tributed matrix. This vector stores the information required to +establish the mapping between a matrix entry and its corresponding +process and memory location. + +In the following comments, the character _ should be read as +"of the distributed matrix". Let A be a generic term for any 2D +block cyclicly distributed matrix. Its description vector is DESCA: + +NOTATION STORED IN EXPLANATION +.br +--------------- -------------- -------------------------------------- +.br +DTYPE_A(global) DESCA( DTYPE_) The descriptor type. +.br +CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating + the BLACS process grid A is distribu- + ted over. The context itself is glo- + bal, but the handle (the integer + value) may vary. +.br +M_A (global) DESCA( M_ ) The number of rows in the distributed + matrix A. +.br +N_A (global) DESCA( N_ ) The number of columns in the distri- + buted matrix A. +.br +MB_A (global) DESCA( MB_ ) The blocking factor used to distribute + the rows of A. +.br +NB_A (global) DESCA( NB_ ) The blocking factor used to distribute + the columns of A. +.br +RSRC_A (global) DESCA( RSRC_ ) The process row over which the first + row of the matrix A is distributed. +.br +CSRC_A (global) DESCA( CSRC_ ) The process column over which the + first column of A is distributed. +.br +LLD_A (local) DESCA( LLD_ ) The leading dimension of the local + array storing the local blocks of the + distributed matrix A. + LLD_A >= MAX(1,LOCr(M_A)). + +Let K be the number of rows or columns of a distributed matrix, +and assume that its process grid has dimension p x q. +LOCr( K ) denotes the number of elements of K that a process +would receive if K were distributed over the p processes of its +process column. +.br +Similarly, LOCc( K ) denotes the number of elements of K that a +process would receive if K were distributed over the q processes of +its process row. +.br +The values of LOCr() and LOCc() may be determined via a call to the +ScaLAPACK tool function, NUMROC: +.br + LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), +.br + LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). + +.SH ARGUMENTS + NP = the number of rows local to a given process. + NQ = the number of columns local to a given process. + +.tp 8 +JOBZ (global input) CHARACTER*1 + Specifies whether or not to compute the eigenvectors: + = 'N': Compute eigenvalues only. + = 'V': Compute eigenvalues and eigenvectors. + +.tp 8 +UPLO (global input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + symmetric matrix A is stored: + = 'U': Upper triangular + = 'L': Lower triangular + +.tp 8 +N (global input) INTEGER + The number of rows and columns of the matrix A. N >= 0. + +.tp 8 +A (local input/workspace) block cyclic DOUBLE PRECISION array, + global dimension (N, N), local dimension ( LLD_A, + LOCc(JA+N-1) ) + + On entry, the symmetric matrix A. If UPLO = 'U', only the + upper triangular part of A is used to define the elements of + the symmetric matrix. If UPLO = 'L', only the lower + triangular part of A is used to define the elements of the + symmetric matrix. + + On exit, the lower triangle (if UPLO='L') or the upper + triangle (if UPLO='U') of A, including the diagonal, is + destroyed. + +.tp 8 +IA (global input) INTEGER + A's global row index, which points to the beginning of the + submatrix which is to be operated on. + +.tp 8 +JA (global input) INTEGER + A's global column index, which points to the beginning of + the submatrix which is to be operated on. + +.tp 8 +DESCA (global and local input) INTEGER array of dimension DLEN_. + The array descriptor for the distributed matrix A. + If DESCA( CTXT_ ) is incorrect, PDSYEV cannot guarantee + correct error reporting. + +.tp 8 +W (global output) DOUBLE PRECISION array, dimension (N) + On normal exit, the first M entries contain the selected + eigenvalues in ascending order. + +.tp 8 +Z (local output) DOUBLE PRECISION array, + global dimension (N, N), + local dimension ( LLD_Z, LOCc(JZ+N-1) ) + If JOBZ = 'V', then on normal exit the first M columns of Z + contain the orthonormal eigenvectors of the matrix + corresponding to the selected eigenvalues. + If JOBZ = 'N', then Z is not referenced. + +.tp 8 +IZ (global input) INTEGER + Z's global row index, which points to the beginning of the + submatrix which is to be operated on. + +.tp 8 +JZ (global input) INTEGER + Z's global column index, which points to the beginning of + the submatrix which is to be operated on. + +.tp 8 +DESCZ (global and local input) INTEGER array of dimension DLEN_. + The array descriptor for the distributed matrix Z. + DESCZ( CTXT_ ) must equal DESCA( CTXT_ ) + +.tp 8 +WORK (local workspace/output) DOUBLE PRECISION array, + dimension (LWORK) + Version 1.0: on output, WORK(1) returns the workspace + needed to guarantee completion. + If the input parameters are incorrect, WORK(1) may also be + incorrect. + + If JOBZ='N' WORK(1) = minimal=optimal amount of workspace + If JOBZ='V' WORK(1) = minimal workspace required to + generate all the eigenvectors. + + +.tp 8 +LWORK (local input) INTEGER + See below for definitions of variables used to define LWORK. + If no eigenvectors are requested (JOBZ = 'N') then + LWORK >= 5*N + SIZESYTRD + 1 + where + SIZESYTRD = The workspace requirement for PDSYTRD + and is MAX( NB * ( NP +1 ), 3 * NB ) + If eigenvectors are requested (JOBZ = 'V' ) then + the amount of workspace required to guarantee that all + eigenvectors are computed is: + + QRMEM = 2*N-2 + LWMIN = 5*N + N*LDC + MAX( SIZEMQRLEFT, QRMEM ) + 1 + + Variable definitions: + NB = DESCA( MB_ ) = DESCA( NB_ ) = + DESCZ( MB_ ) = DESCZ( NB_ ) + NN = MAX( N, NB, 2 ) + DESCA( RSRC_ ) = DESCA( RSRC_ ) = DESCZ( RSRC_ ) = + DESCZ( CSRC_ ) = 0 + NP = NUMROC( NN, NB, 0, 0, NPROW ) + NQ = NUMROC( MAX( N, NB, 2 ), NB, 0, 0, NPCOL ) + NRC = NUMROC( N, NB, MYPROWC, 0, NPROCS) + LDC = MAX( 1, NRC ) + SIZEMQRLEFT = The workspace requirement for PDORMTR + when it's SIDE argument is 'L'. + + With MYPROWC defined when a new context is created as: + CALL BLACS_GET( DESCA( CTXT_ ), 0, CONTEXTC ) + CALL BLACS_GRIDINIT( CONTEXTC, 'R', NPROCS, 1 ) + CALL BLACS_GRIDINFO( CONTEXTC, NPROWC, NPCOLC, MYPROWC, + MYPCOLC ) + + If LWORK = -1, the LWORK is global input and a workspace + query is assumed; the routine only calculates the minimum + size for the WORK array. The required workspace is returned + as the first element of WORK and no error message is issued + by PXERBLA. + +.tp 8 +INFO (global output) INTEGER + = 0: successful exit + < 0: If the i-th argument is an array and the j-entry had + an illegal value, then INFO = -(i*100+j), if the i-th + argument is a scalar and had an illegal value, then + INFO = -i. + > 0: If INFO = 1 through N, the i(th) eigenvalue did not + converge in DSTEQR2 after a total of 30*N iterations. + If INFO = N+1, then PDSYEV has detected heterogeneity + by finding that eigenvalues were not identical across + the process grid. In this case, the accuracy of + the results from PDSYEV cannot be guaranteed. --- scalapack-doc-1.5.orig/man/manl/pdsyevx.l +++ scalapack-doc-1.5/man/manl/pdsyevx.l @@ -1,6 +1,8 @@ .TH PDSYEVX l "12 May 1997" "LAPACK version 1.5" "LAPACK routine (version 1.5)" .SH NAME - +PDSYEVX - compute selected eigenvalues and, optionally, eigenvectors +of a real symmetric matrix A by calling the recommended sequence +of ScaLAPACK routines .SH SYNOPSIS .TP 20 SUBROUTINE PDSYEVX( @@ -116,1198 +118,443 @@ .ti +4 INTRINSIC ABS, DBLE, ICHAR, MAX, MIN, MOD, SQRT -.TP 20 -.ti +4 -IF( -BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* -RSRC_.LT.0 )RETURN -.TP 20 -.ti +4 -QUICKRETURN -= ( N.EQ.0 ) -.TP 20 -.ti +4 -CALL -BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) -.TP 20 -.ti +4 -INFO -= 0 -.TP 20 -.ti +4 -IF( -NPROW.EQ.-1 ) THEN -.TP 20 -.ti +4 -INFO -= -( 800+CTXT_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+CTXT_ ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -CALL -CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 8, INFO ) -.TP 20 -.ti +4 -CALL -CHK1MAT( N, 4, N, 4, IZ, JZ, DESCZ, 21, INFO ) -.TP 20 -.ti +4 -IF( -INFO.EQ.0 ) THEN -.TP 20 -.ti +4 -SAFMIN -= PDLAMCH( DESCA( CTXT_ ), 'Safe minimum' ) -.TP 20 -.ti +4 -EPS -= PDLAMCH( DESCA( CTXT_ ), 'Precision' ) -.TP 20 -.ti +4 -SMLNUM -= SAFMIN / EPS -.TP 20 -.ti +4 -BIGNUM -= ONE / SMLNUM -.TP 20 -.ti +4 -RMIN -= SQRT( SMLNUM ) -.TP 20 -.ti +4 -RMAX -= MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) -.TP 20 -.ti +4 -NPROCS -= NPROW*NPCOL -.TP 20 -.ti +4 -LOWER -= LSAME( UPLO, 'L' ) -.TP 20 -.ti +4 -WANTZ -= LSAME( JOBZ, 'V' ) -.TP 20 -.ti +4 -ALLEIG -= LSAME( RANGE, 'A' ) -.TP 20 -.ti +4 -VALEIG -= LSAME( RANGE, 'V' ) -.TP 20 -.ti +4 -INDEIG -= LSAME( RANGE, 'I' ) -.TP 20 -.ti +4 -INDTAU -= 1 -.TP 20 -.ti +4 -INDE -= INDTAU + N -.TP 20 -.ti +4 -INDD -= INDE + N -.TP 20 -.ti +4 -INDD2 -= INDD + N -.TP 20 -.ti +4 -INDE2 -= INDD2 + N -.TP 20 -.ti +4 -INDWORK -= INDE2 + N -.TP 20 -.ti +4 -LLWORK -= LWORK - INDWORK + 1 -.TP 20 -.ti +4 -ISIZESTEIN -= 3*N + NPROCS + 1 -.TP 20 -.ti +4 -ISIZESTEBZ -= MAX( 4*N, 14, NPROCS ) -.TP 20 -.ti +4 -INDIBL -= ( MAX( ISIZESTEIN, ISIZESTEBZ ) ) + 1 -.TP 20 -.ti +4 -INDISP -= INDIBL + N -.TP 20 -.ti +4 -LQUERY -= .FALSE. -.TP 20 -.ti +4 -IF( -LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) -LQUERY = .TRUE. -.TP 20 -.ti +4 -NNP -= MAX( N, NPROCS+1, 4 ) -.TP 20 -.ti +4 -LIWMIN -= 6*NNP -.TP 20 -.ti +4 -NPROCS -= NPROW*NPCOL -.TP 20 -.ti +4 -NB_A -= DESCA( NB_ ) -.TP 20 -.ti +4 -MB_A -= DESCA( MB_ ) -.TP 20 -.ti +4 -NB_Z -= DESCZ( NB_ ) -.TP 20 -.ti +4 -MB_Z -= DESCZ( MB_ ) -.TP 20 -.ti +4 -NB -= NB_A -.TP 20 -.ti +4 -NN -= MAX( N, NB, 2 ) -.TP 20 -.ti +4 -RSRC_A -= DESCA( RSRC_ ) -.TP 20 -.ti +4 -CSRC_A -= DESCA( CSRC_ ) -.TP 20 -.ti +4 -RSRC_Z -= DESCZ( RSRC_ ) -.TP 20 -.ti +4 -IROFFA -= MOD( IA-1, MB_A ) -.TP 20 -.ti +4 -ICOFFA -= MOD( JA-1, NB_A ) -.TP 20 -.ti +4 -IROFFZ -= MOD( IZ-1, MB_A ) -.TP 20 -.ti +4 -IAROW -= INDXG2P( 1, NB_A, MYROW, RSRC_A, NPROW ) -.TP 20 -.ti +4 -IACOL -= INDXG2P( 1, MB_A, MYCOL, CSRC_A, NPCOL ) -.TP 20 -.ti +4 -IZROW -= INDXG2P( 1, NB_A, MYROW, RSRC_Z, NPROW ) -.TP 20 -.ti +4 -NP0 -= NUMROC( N+IROFFA, NB_Z, MYROW, IAROW, NPROW ) -.TP 20 -.ti +4 -MQ0 -= NUMROC( N+ICOFFA, NB_Z, MYCOL, IACOL, NPCOL ) -.TP 20 -.ti +4 -IF( -( .NOT.WANTZ ) .OR. ( VALEIG .AND. ( .NOT.LQUERY ) ) ) -THEN -.TP 20 -.ti +4 -LWMIN -= 5*N + MAX( 5*NN, NB*( NP0+1 ) ) -.TP 20 -.ti +4 -NEIG -= 0 -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IF( -ALLEIG .OR. VALEIG ) THEN -.TP 20 -.ti +4 -NEIG -= N -.TP 20 -.ti +4 -ELSE -IF( INDEIG ) THEN -.TP 20 -.ti +4 -NEIG -= IU - IL + 1 -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -MQ0 -= NUMROC( MAX( NEIG, NB, 2 ), NB, MYCOL, IACOL, -NPCOL ) -.TP 20 -.ti +4 -LWMIN -= 5*N + MAX( 5*NN, NP0*MQ0+2*NB*NB ) + -ICEIL( NEIG, NPROW*NPCOL )*NN -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -INFO.EQ.0 ) THEN -.TP 20 -.ti +4 -IF( -MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN -.TP 20 -.ti +4 -WORK( -1 ) = ABSTOL -.TP 20 -.ti +4 -IF( -VALEIG ) THEN -.TP 20 -.ti +4 -WORK( -2 ) = VL -.TP 20 -.ti +4 -WORK( -3 ) = VU -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -WORK( -2 ) = ZERO -.TP 20 -.ti +4 -WORK( -3 ) = ZERO -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -DGEBS2D( DESCA( CTXT_ ), 'ALL', ' ', 3, 1, WORK, -3 ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -CALL -DGEBR2D( DESCA( CTXT_ ), 'ALL', ' ', 3, 1, WORK, 3, -0, 0 ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -.NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN -.TP 20 -.ti +4 -INFO -= -1 -.TP 20 -.ti +4 -ELSE -IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN -.TP 20 -.ti +4 -INFO -= -2 -.TP 20 -.ti +4 -ELSE -IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN -.TP 20 -.ti +4 -INFO -= -3 -.TP 20 -.ti +4 -ELSE -IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN -.TP 20 -.ti +4 -INFO -= -10 -.TP 20 -.ti +4 -ELSE -IF( INDEIG .AND. ( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) ) -THEN -.TP 20 -.ti +4 -INFO -= -11 -.TP 20 -.ti +4 -ELSE -IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) -THEN -.TP 20 -.ti +4 -INFO -= -12 -.TP 20 -.ti +4 -ELSE -IF( LWORK.LT.LWMIN .AND. LWORK.NE.-1 ) THEN -.TP 20 -.ti +4 -INFO -= -23 -.TP 20 -.ti +4 -ELSE -IF( LIWORK.LT.LIWMIN .AND. LIWORK.NE.-1 ) THEN -.TP 20 -.ti +4 -INFO -= -25 -.TP 20 -.ti +4 -ELSE -IF( VALEIG .AND. ( ABS( WORK( 2 )-VL ).GT.FIVE*EPS* -ABS( VL ) ) ) THEN -.TP 20 -.ti +4 -INFO -= -9 -.TP 20 -.ti +4 -ELSE -IF( VALEIG .AND. ( ABS( WORK( 3 )-VU ).GT.FIVE*EPS* -ABS( VU ) ) ) THEN -.TP 20 -.ti +4 -INFO -= -10 -.TP 20 -.ti +4 -ELSE -IF( ABS( WORK( 1 )-ABSTOL ).GT.FIVE*EPS*ABS( ABSTOL ) ) -THEN -.TP 20 -.ti +4 -INFO -= -13 -.TP 20 -.ti +4 -ELSE -IF( IROFFA.NE.IROFFZ ) THEN -.TP 20 -.ti +4 -INFO -= -19 -.TP 20 -.ti +4 -ELSE -IF( IROFFA.NE.0 ) THEN -.TP 20 -.ti +4 -INFO -= -6 -.TP 20 -.ti +4 -ELSE -IF( IAROW.NE.IZROW ) THEN -.TP 20 -.ti +4 -INFO -= -19 -.TP 20 -.ti +4 -ELSE -IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 800+NB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( M_ ).NE.DESCZ( M_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+M_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( N_ ).NE.DESCZ( N_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+N_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+MB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( NB_ ).NE.DESCZ( NB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+NB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( RSRC_ ).NE.DESCZ( RSRC_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+RSRC_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CSRC_ ).NE.DESCZ( CSRC_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+CSRC_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+CTXT_ ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -WANTZ ) THEN -.TP 20 -.ti +4 -IDUM1( -1 ) = ICHAR( 'V' ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -1 ) = ICHAR( 'N' ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -1 ) = 1 -.TP 20 -.ti +4 -IF( -LOWER ) THEN -.TP 20 -.ti +4 -IDUM1( -2 ) = ICHAR( 'L' ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -2 ) = ICHAR( 'U' ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -2 ) = 2 -.TP 20 -.ti +4 -IF( -ALLEIG ) THEN -.TP 20 -.ti +4 -IDUM1( -3 ) = ICHAR( 'A' ) -.TP 20 -.ti +4 -ELSE -IF( INDEIG ) THEN -.TP 20 -.ti +4 -IDUM1( -3 ) = ICHAR( 'I' ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -3 ) = ICHAR( 'V' ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -3 ) = 3 -.TP 20 -.ti +4 -IF( -LQUERY ) THEN -.TP 20 -.ti +4 -IDUM1( -4 ) = -1 -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -4 ) = 1 -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -4 ) = 4 -.TP 20 -.ti +4 -CALL -PCHK2MAT( N, 4, N, 4, IA, JA, DESCA, 8, N, 4, N, 4, IZ, -JZ, DESCZ, 21, 4, IDUM1, IDUM2, INFO ) -.TP 20 -.ti +4 -WORK( -1 ) = DBLE( LWMIN ) -.TP 20 -.ti +4 -IWORK( -1 ) = LIWMIN -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -INFO.NE.0 ) THEN -.TP 20 -.ti +4 -CALL -PXERBLA( DESCA( CTXT_ ), 'PDSYEVX', -INFO ) -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -ELSE -IF( LQUERY ) THEN -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -QUICKRETURN ) THEN -.TP 20 -.ti +4 -IF( -WANTZ ) THEN -.TP 20 -.ti +4 -NZ -= 0 -.TP 20 -.ti +4 -ICLUSTR( -1 ) = 0 -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -M -= 0 -.TP 20 -.ti +4 -WORK( -1 ) = DBLE( LWMIN ) -.TP 20 -.ti +4 -IWORK( -1 ) = LIWMIN -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -ABSTLL -= ABSTOL -.TP 20 -.ti +4 -ISCALE -= 0 -.TP 20 -.ti +4 -IF( -VALEIG ) THEN -.TP 20 -.ti +4 -VLL -= VL -.TP 20 -.ti +4 -VUU -= VU -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -VLL -= ZERO -.TP 20 -.ti +4 -VUU -= ZERO -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -ANRM -= PDLANSY( '1', UPLO, N, A, IA, JA, DESCA, WORK( INDWORK ) ) -.TP 20 -.ti +4 -IF( -ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN -.TP 20 -.ti +4 -ISCALE -= 1 -.TP 20 -.ti +4 -SIGMA -= RMIN / ANRM -.TP 20 -.ti +4 -ANRM -= ANRM*SIGMA -.TP 20 -.ti +4 -ELSE -IF( ANRM.GT.RMAX ) THEN -.TP 20 -.ti +4 -ISCALE -= 1 -.TP 20 -.ti +4 -SIGMA -= RMAX / ANRM -.TP 20 -.ti +4 -ANRM -= ANRM*SIGMA -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -ISCALE.EQ.1 ) THEN -.TP 20 -.ti +4 -CALL -PDLASCL( UPLO, ONE, SIGMA, N, N, A, IA, JA, DESCA, -IINFO ) -.TP 20 -.ti +4 -IF( -ABSTOL.GT.0 ) -ABSTLL = ABSTOL*SIGMA -.TP 20 -.ti +4 -IF( -VALEIG ) THEN -.TP 20 -.ti +4 -VLL -= VL*SIGMA -.TP 20 -.ti +4 -VUU -= VU*SIGMA -.TP 20 -.ti +4 -IF( -VUU.EQ.VLL ) THEN -.TP 20 -.ti +4 -VUU -= VUU + 2*MAX( ABS( VUU )*EPS, SAFMIN ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -LALLWORK -= LLWORK -.TP 20 -.ti +4 -CALL -PDSYTRD( UPLO, N, A, IA, JA, DESCA, WORK( INDD ), -WORK( INDE ), WORK( INDTAU ), WORK( INDWORK ), -LLWORK, IINFO ) -.TP 20 -.ti +4 -OFFSET -= 0 -.TP 20 -.ti +4 -IF( -IA.EQ.1 .AND. JA.EQ.1 .AND. RSRC_A.EQ.0 .AND. CSRC_A.EQ.0 ) -THEN -.TP 20 -.ti +4 -CALL -PDLARED1D( N, IA, JA, DESCA, WORK( INDD ), WORK( INDD2 ), -WORK( INDWORK ), LLWORK ) -.TP 20 -.ti +4 -CALL -PDLARED1D( N, IA, JA, DESCA, WORK( INDE ), WORK( INDE2 ), -WORK( INDWORK ), LLWORK ) -.TP 20 -.ti +4 -IF( -.NOT.LOWER ) -OFFSET = 1 -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -DO -10 I = 1, N -.TP 20 -.ti +4 -CALL -PDELGET( 'A', ' ', WORK( INDD2+I-1 ), A, I+IA-1, -I+JA-1, DESCA ) -.TP 20 -.ti +4 -10 -CONTINUE -.TP 20 -.ti +4 -IF( -LSAME( UPLO, 'U' ) ) THEN -.TP 20 -.ti +4 -DO -20 I = 1, N - 1 -.TP 20 -.ti +4 -CALL -PDELGET( 'A', ' ', WORK( INDE2+I-1 ), A, I+IA-1, -I+JA, DESCA ) -.TP 20 -.ti +4 -20 -CONTINUE -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -DO -30 I = 1, N - 1 -.TP 20 -.ti +4 -CALL -PDELGET( 'A', ' ', WORK( INDE2+I-1 ), A, I+IA, -I+JA-1, DESCA ) -.TP 20 -.ti +4 -30 -CONTINUE -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -WANTZ ) THEN -.TP 20 -.ti +4 -ORDER -= 'b' -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -ORDER -= 'e' -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -PDSTEBZ( DESCA( CTXT_ ), RANGE, ORDER, N, VLL, VUU, IL, IU, -ABSTLL, WORK( INDD2 ), WORK( INDE2+OFFSET ), M, -NSPLIT, W, IWORK( INDIBL ), IWORK( INDISP ), -WORK( INDWORK ), LLWORK, IWORK( 1 ), ISIZESTEBZ, -IINFO ) -.TP 20 -.ti +4 -IF( -IINFO.NE.0 ) THEN -.TP 20 -.ti +4 -INFO -= INFO + IERREBZ -.TP 20 -.ti +4 -DO -40 I = 1, M -.TP 20 -.ti +4 -IWORK( -INDIBL+I-1 ) = ABS( IWORK( INDIBL+I-1 ) ) -.TP 20 -.ti +4 -40 -CONTINUE -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -WANTZ ) THEN -.TP 20 -.ti +4 -IF( -VALEIG ) THEN -.TP 20 -.ti +4 -CALL -IGAMN2D( DESCA( CTXT_ ), 'A', ' ', 1, 1, LALLWORK, 1, -1, 1, -1, -1, -1 ) -.TP 20 -.ti +4 -MAXEIGS -= DESCZ( N_ ) -.TP 20 -.ti +4 -DO -50 NZ = MIN( MAXEIGS, M ), 0, -1 -.TP 20 -.ti +4 -MQ0 -= NUMROC( NZ, NB, 0, 0, NPCOL ) -.TP 20 -.ti +4 -SIZESTEIN -= ICEIL( NZ, NPROCS )*N + MAX( 5*N, NP0*MQ0 ) -.TP 20 -.ti +4 -SIZEORMTR -= MAX( ( NB*( NB-1 ) ) / 2, ( MQ0+NP0 )*NB ) + -NB*NB -.TP 20 -.ti +4 -SIZESYEVX -= MAX( SIZESTEIN, SIZEORMTR ) -.TP 20 -.ti +4 -IF( -SIZESYEVX.LE.LALLWORK ) -GO TO 60 -.TP 20 -.ti +4 -50 -CONTINUE -.TP 20 -.ti +4 -60 -CONTINUE -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -NZ -= M -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -NZ -= MAX( NZ, 0 ) -.TP 20 -.ti +4 -IF( -NZ.NE.M ) THEN -.TP 20 -.ti +4 -INFO -= INFO + IERRSPC -.TP 20 -.ti +4 -DO -70 I = 1, M -.TP 20 -.ti +4 -IFAIL( -I ) = 0 -.TP 20 -.ti +4 -70 -CONTINUE -.TP 20 -.ti +4 -IF( -NSPLIT.GT.1 ) THEN -.TP 20 -.ti +4 -CALL -DLASRT( 'I', M, W, IINFO ) -.TP 20 -.ti +4 -IF( -NZ.GT.0 ) THEN -.TP 20 -.ti +4 -VUU -= W( NZ ) - TEN*( EPS*ANRM+SAFMIN ) -.TP 20 -.ti +4 -IF( -VLL.GE.VUU ) THEN -.TP 20 -.ti +4 -NZZ -= 0 -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -CALL -PDSTEBZ( DESCA( CTXT_ ), RANGE, ORDER, N, -VLL, VUU, IL, IU, ABSTLL, -WORK( INDD2 ), WORK( INDE2+OFFSET ), -NZZ, NSPLIT, W, IWORK( INDIBL ), -IWORK( INDISP ), WORK( INDWORK ), -LLWORK, IWORK( 1 ), ISIZESTEBZ, -IINFO ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -MOD( INFO / IERREBZ, 1 ).EQ.0 ) THEN -.TP 20 -.ti +4 -IF( -NZZ.GT.NZ .OR. IINFO.NE.0 ) THEN -.TP 20 -.ti +4 -INFO -= INFO + IERREBZ -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -NZ -= MIN( NZ, NZZ ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -PDSTEIN( N, WORK( INDD2 ), WORK( INDE2+OFFSET ), NZ, W, -IWORK( INDIBL ), IWORK( INDISP ), ORFAC, Z, IZ, -JZ, DESCZ, WORK( INDWORK ), LALLWORK, -IWORK( 1 ), ISIZESTEIN, IFAIL, ICLUSTR, GAP, -IINFO ) -.TP 20 -.ti +4 -IF( -IINFO.GE.NZ+1 ) -INFO = INFO + IERRCLS -.TP 20 -.ti +4 -IF( -MOD( IINFO, NZ+1 ).NE.0 ) -INFO = INFO + IERREIN -.TP 20 -.ti +4 -IF( -NZ.GT.0 ) THEN -.TP 20 -.ti +4 -CALL -PDORMTR( 'L', UPLO, 'N', N, NZ, A, IA, JA, DESCA, -WORK( INDTAU ), Z, IZ, JZ, DESCZ, -WORK( INDWORK ), LLWORK, IINFO ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -ISCALE.EQ.1 ) THEN -.TP 20 -.ti +4 -CALL -DSCAL( M, ONE / SIGMA, W, 1 ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -WORK( -1 ) = DBLE( LWMIN ) -.TP 20 -.ti +4 -IWORK( -1 ) = LIWMIN -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -END + .SH PURPOSE +PDSYEVX computes selected eigenvalues and, optionally, eigenvectors +of a real symmetric matrix A by calling the recommended sequence +of ScaLAPACK routines. Eigenvalues/vectors can be selected by +specifying a range of values or a range of indices for the desired +eigenvalues. +.SH NOTES +Each global data object is described by an associated description +vector. This vector stores the information required to establish +the mapping between an object element and its corresponding process +and memory location. + +Let A be a generic term for any 2D block cyclicly distributed array. +Such a global array has an associated description vector DESCA. +In the following comments, the character _ should be read as +"of the global array". + +NOTATION STORED IN EXPLANATION +.br +--------------- -------------- -------------------------------------- +.br +DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, + DTYPE_A = 1. +.br +CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating + the BLACS process grid A is distribu- + ted over. The context itself is glo- + bal, but the handle (the integer + value) may vary. +.br +M_A (global) DESCA( M_ ) The number of rows in the global + array A. +.br +N_A (global) DESCA( N_ ) The number of columns in the global + array A. +.br +MB_A (global) DESCA( MB_ ) The blocking factor used to distribute + the rows of the array. +.br +NB_A (global) DESCA( NB_ ) The blocking factor used to distribute + the columns of the array. +.br +RSRC_A (global) DESCA( RSRC_ ) The process row over which the first + row of the array A is distributed. +.br +CSRC_A (global) DESCA( CSRC_ ) The process column over which the + first column of the array A is + distributed. +.br +LLD_A (local) DESCA( LLD_ ) The leading dimension of the local + array. LLD_A >= MAX(1,LOCr(M_A)). + +Let K be the number of rows or columns of a distributed matrix, +and assume that its process grid has dimension p x q. +LOCr( K ) denotes the number of elements of K that a process +would receive if K were distributed over the p processes of its +process column. +.br +Similarly, LOCc( K ) denotes the number of elements of K that a +process would receive if K were distributed over the q processes of +its process row. +.br +The values of LOCr() and LOCc() may be determined via a call to the +ScaLAPACK tool function, NUMROC: +.br + LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), +.br + LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). +.br +An upper bound for these quantities may be computed by: +.br + LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A +.br + LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A + +PDSYEVX assumes IEEE 754 standard compliant arithmetic. To port +to a system which does not have IEEE 754 arithmetic, modify +the appropriate SLmake.inc file to include the compiler switch +-DNO_IEEE. This switch only affects the compilation of pdlaiect.c. + +.SH ARGUMENTS + + NP = the number of rows local to a given process. + NQ = the number of columns local to a given process. + +.tp 8 +JOBZ (global input) CHARACTER*1 + Specifies whether or not to compute the eigenvectors: + = 'N': Compute eigenvalues only. + = 'V': Compute eigenvalues and eigenvectors. + +.tp 8 +RANGE (global input) CHARACTER*1 + = 'A': all eigenvalues will be found. + = 'V': all eigenvalues in the interval [VL,VU] will be found. + = 'I': the IL-th through IU-th eigenvalues will be found. + +.tp 8 +UPLO (global input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + symmetric matrix A is stored: + = 'U': Upper triangular + = 'L': Lower triangular + +.tp 8 +N (global input) INTEGER + The number of rows and columns of the matrix A. N >= 0. + +.tp 8 +A (local input/workspace) block cyclic DOUBLE PRECISION array, + global dimension (N, N), + local dimension ( LLD_A, LOCc(JA+N-1) ) + + On entry, the symmetric matrix A. If UPLO = 'U', only the + upper triangular part of A is used to define the elements of + the symmetric matrix. If UPLO = 'L', only the lower + triangular part of A is used to define the elements of the + symmetric matrix. + + On exit, the lower triangle (if UPLO='L') or the upper + triangle (if UPLO='U') of A, including the diagonal, is + destroyed. + +.tp 8 +IA (global input) INTEGER + A's global row index, which points to the beginning of the + submatrix which is to be operated on. + +.tp 8 +JA (global input) INTEGER + A's global column index, which points to the beginning of + the submatrix which is to be operated on. + +.tp 8 +DESCA (global and local input) INTEGER array of dimension DLEN_. + The array descriptor for the distributed matrix A. + If DESCA( CTXT_ ) is incorrect, PDSYEVX cannot guarantee + correct error reporting. + +.tp 8 +VL (global input) DOUBLE PRECISION + If RANGE='V', the lower bound of the interval to be searched + for eigenvalues. Not referenced if RANGE = 'A' or 'I'. + +.tp 8 +VU (global input) DOUBLE PRECISION + If RANGE='V', the upper bound of the interval to be searched + for eigenvalues. Not referenced if RANGE = 'A' or 'I'. + +.tp 8 +IL (global input) INTEGER + If RANGE='I', the index (from smallest to largest) of the + smallest eigenvalue to be returned. IL >= 1. + Not referenced if RANGE = 'A' or 'V'. + +.tp 8 +IU (global input) INTEGER + If RANGE='I', the index (from smallest to largest) of the + largest eigenvalue to be returned. min(IL,N) <= IU <= N. + Not referenced if RANGE = 'A' or 'V'. + +.tp 8 +ABSTOL (global input) DOUBLE PRECISION + If JOBZ='V', setting ABSTOL to PDLAMCH( CONTEXT, 'U') yields + the most orthogonal eigenvectors. + + The absolute error tolerance for the eigenvalues. + An approximate eigenvalue is accepted as converged + when it is determined to lie in an interval [a,b] + of width less than or equal to + + ABSTOL + EPS * max( |a|,|b| ) , + + where EPS is the machine precision. If ABSTOL is less than + or equal to zero, then EPS*norm(T) will be used in its place, + where norm(T) is the 1-norm of the tridiagonal matrix + obtained by reducing A to tridiagonal form. + + Eigenvalues will be computed most accurately when ABSTOL is + set to twice the underflow threshold 2*PDLAMCH('S') not zero. + If this routine returns with ((MOD(INFO,2).NE.0) .OR. + (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or + eigenvectors did not converge, try setting ABSTOL to + 2*PDLAMCH('S'). + + See "Computing Small Singular Values of Bidiagonal Matrices + with Guaranteed High Relative Accuracy," by Demmel and + Kahan, LAPACK Working Note #3. + + See "On the correctness of Parallel Bisection in Floating + Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70 + +.tp 8 +M (global output) INTEGER + Total number of eigenvalues found. 0 <= M <= N. + +.tp 8 +NZ (global output) INTEGER + Total number of eigenvectors computed. 0 <= NZ <= M. + The number of columns of Z that are filled. + If JOBZ .NE. 'V', NZ is not referenced. + If JOBZ .EQ. 'V', NZ = M unless the user supplies + insufficient space and PDSYEVX is not able to detect this + before beginning computation. To get all the eigenvectors + requested, the user must supply both sufficient + space to hold the eigenvectors in Z (M .LE. DESCZ(N_)) + and sufficient workspace to compute them. (See LWORK below.) + PDSYEVX is always able to detect insufficient space without + computation unless RANGE .EQ. 'V'. + +.tp 8 +W (global output) DOUBLE PRECISION array, dimension (N) + On normal exit, the first M entries contain the selected + eigenvalues in ascending order. + +.tp 8 +ORFAC (global input) DOUBLE PRECISION + Specifies which eigenvectors should be reorthogonalized. + Eigenvectors that correspond to eigenvalues which are within + tol=ORFAC*norm(A) of each other are to be reorthogonalized. + However, if the workspace is insufficient (see LWORK), + tol may be decreased until all eigenvectors to be + reorthogonalized can be stored in one process. + No reorthogonalization will be done if ORFAC equals zero. + A default value of 10^-3 is used if ORFAC is negative. + ORFAC should be identical on all processes. + +.tp 8 +Z (local output) DOUBLE PRECISION array, + global dimension (N, N), + local dimension ( LLD_Z, LOCc(JZ+N-1) ) + If JOBZ = 'V', then on normal exit the first M columns of Z + contain the orthonormal eigenvectors of the matrix + corresponding to the selected eigenvalues. If an eigenvector + fails to converge, then that column of Z contains the latest + approximation to the eigenvector, and the index of the + eigenvector is returned in IFAIL. + If JOBZ = 'N', then Z is not referenced. + +.tp 8 +IZ (global input) INTEGER + Z's global row index, which points to the beginning of the + submatrix which is to be operated on. + +.tp 8 +JZ (global input) INTEGER + Z's global column index, which points to the beginning of + the submatrix which is to be operated on. + +.tp 8 +DESCZ (global and local input) INTEGER array of dimension DLEN_. + The array descriptor for the distributed matrix Z. + DESCZ( CTXT_ ) must equal DESCA( CTXT_ ) + +.tp 8 +WORK (local workspace/output) DOUBLE PRECISION array, + dimension (LWORK) + On return, WROK(1) contains the optimal amount of + workspace required for efficient execution. + if JOBZ='N' WORK(1) = optimal amount of workspace + required to compute eigenvalues efficiently + if JOBZ='V' WORK(1) = optimal amount of workspace + required to compute eigenvalues and eigenvectors + efficiently with no guarantee on orthogonality. + If RANGE='V', it is assumed that all eigenvectors + may be required. + +.tp 8 +LWORK (local input) INTEGER + Size of WORK + See below for definitions of variables used to define LWORK. + If no eigenvectors are requested (JOBZ = 'N') then + LWORK >= 5 * N + MAX( 5 * NN, NB * ( NP0 + 1 ) ) + If eigenvectors are requested (JOBZ = 'V' ) then + the amount of workspace required to guarantee that all + eigenvectors are computed is: + LWORK >= 5*N + MAX( 5*NN, NP0 * MQ0 + 2 * NB * NB ) + + ICEIL( NEIG, NPROW*NPCOL)*NN + + The computed eigenvectors may not be orthogonal if the + minimal workspace is supplied and ORFAC is too small. + If you want to guarantee orthogonality (at the cost + of potentially poor performance) you should add + the following to LWORK: + (CLUSTERSIZE-1)*N + where CLUSTERSIZE is the number of eigenvalues in the + largest cluster, where a cluster is defined as a set of + close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) | + W(J+1) <= W(J) + ORFAC*2*norm(A) } + Variable definitions: + NEIG = number of eigenvectors requested + NB = DESCA( MB_ ) = DESCA( NB_ ) = + DESCZ( MB_ ) = DESCZ( NB_ ) + NN = MAX( N, NB, 2 ) + DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) = + DESCZ( CSRC_ ) = 0 + NP0 = NUMROC( NN, NB, 0, 0, NPROW ) + MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) + ICEIL( X, Y ) is a ScaLAPACK function returning + ceiling(X/Y) + + When LWORK is too small: + If LWORK is too small to guarantee orthogonality, + PDSYEVX attempts to maintain orthogonality in + the clusters with the smallest + spacing between the eigenvalues. + If LWORK is too small to compute all the eigenvectors + requested, no computation is performed and INFO=-23 + is returned. Note that when RANGE='V', PDSYEVX does + not know how many eigenvectors are requested until + the eigenvalues are computed. Therefore, when RANGE='V' + and as long as LWORK is large enough to allow PDSYEVX to + compute the eigenvalues, PDSYEVX will compute the + eigenvalues and as many eigenvectors as it can. + + Relationship between workspace, orthogonality & performance: + Greater performance can be achieved if adequate workspace + is provided. On the other hand, in some situations, + performance can decrease as the workspace provided + increases above the workspace amount shown below: + + For optimal performance, greater workspace may be + needed, i.e. + LWORK >= MAX( LWORK, 5*N + NSYTRD_LWOPT ) + Where: + LWORK, as defined previously, depends upon the number + of eigenvectors requested, and + NSYTRD_LWOPT = N + 2*( ANB+1 )*( 4*NPS+2 ) + + ( NPS + 3 ) * NPS + + ANB = PJLAENV( DESCA( CTXT_), 3, 'PDSYTTRD', 'L', + 0, 0, 0, 0) + SQNPC = INT( SQRT( DBLE( NPROW * NPCOL ) ) ) + NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) + + NUMROC is a ScaLAPACK tool functions; + PJLAENV is a ScaLAPACK envionmental inquiry function + MYROW, MYCOL, NPROW and NPCOL can be determined by + calling the subroutine BLACS_GRIDINFO. + + For large N, no extra workspace is needed, however the + biggest boost in performance comes for small N, so it + is wise to provide the extra workspace (typically less + than a Megabyte per process). + + If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing + enough space to compute all the eigenvectors + orthogonally will cause serious degradation in + performance. In the limit (i.e. CLUSTERSIZE = N-1) + PDSTEIN will perform no better than DSTEIN on 1 + processor. + For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing + all eigenvectors will increase the total execution time + by a factor of 2 or more. + For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will + grow as the square of the cluster size, all other factors + remaining equal and assuming enough workspace. Less + workspace means less reorthogonalization but faster + execution. + + If LWORK = -1, then LWORK is global input and a workspace + query is assumed; the routine only calculates the size + required for optimal performance for all work arrays. Each of + these values is returned in the first entry of the + corresponding work arrays, and no error message is issued by + PXERBLA. + +.tp 8 +IWORK (local workspace) INTEGER array + On return, IWORK(1) contains the amount of integer workspace + required. + +.tp 8 +LIWORK (local input) INTEGER + size of IWORK + LIWORK >= 6 * NNP + Where: + NNP = MAX( N, NPROW*NPCOL + 1, 4 ) + If LIWORK = -1, then LIWORK is global input and a workspace + query is assumed; the routine only calculates the minimum + and optimal size for all work arrays. Each of these + values is returned in the first entry of the corresponding + work array, and no error message is issued by PXERBLA. + +.tp 8 +IFAIL (global output) INTEGER array, dimension (N) + If JOBZ = 'V', then on normal exit, the first M elements of + IFAIL are zero. If (MOD(INFO,2).NE.0) on exit, then + IFAIL contains the + indices of the eigenvectors that failed to converge. + If JOBZ = 'N', then IFAIL is not referenced. + +.tp 8 +ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL) + This array contains indices of eigenvectors corresponding to + a cluster of eigenvalues that could not be reorthogonalized + due to insufficient workspace (see LWORK, ORFAC and INFO). + Eigenvectors corresponding to clusters of eigenvalues indexed + ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be + reorthogonalized due to lack of workspace. Hence the + eigenvectors corresponding to these clusters may not be + orthogonal. ICLUSTR() is a zero terminated array. + (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if + K is the number of clusters + ICLUSTR is not referenced if JOBZ = 'N' + +.tp 8 +GAP (global output) DOUBLE PRECISION array, + dimension (NPROW*NPCOL) + This array contains the gap between eigenvalues whose + eigenvectors could not be reorthogonalized. The output + values in this array correspond to the clusters indicated + by the array ICLUSTR. As a result, the dot product between + eigenvectors correspoding to the I^th cluster may be as high + as ( C * n ) / GAP(I) where C is a small constant. + +.tp 8 +INFO (global output) INTEGER + = 0: successful exit + < 0: If the i-th argument is an array and the j-entry had + an illegal value, then INFO = -(i*100+j), if the i-th + argument is a scalar and had an illegal value, then + INFO = -i. + > 0: if (MOD(INFO,2).NE.0), then one or more eigenvectors + failed to converge. Their indices are stored + in IFAIL. Ensure ABSTOL=2.0*PDLAMCH( 'U' ) + Send e-mail to scalapack@cs.utk.edu + if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding + to one or more clusters of eigenvalues could not be + reorthogonalized because of insufficient workspace. + The indices of the clusters are stored in the array + ICLUSTR. + if (MOD(INFO/4,2).NE.0), then space limit prevented + PDSYEVX from computing all of the eigenvectors + between VL and VU. The number of eigenvectors + computed is returned in NZ. + if (MOD(INFO/8,2).NE.0), then PDSTEBZ failed to compute + eigenvalues. Ensure ABSTOL=2.0*PDLAMCH( 'U' ) + Send e-mail to scalapack@cs.utk.edu --- scalapack-doc-1.5.orig/man/manl/pdsygvx.l +++ scalapack-doc-1.5/man/manl/pdsygvx.l @@ -1,6 +1,8 @@ .TH PDSYGVX l "12 May 1997" "LAPACK version 1.5" "LAPACK routine (version 1.5)" .SH NAME - +PDSYGVX - compute all the eigenvalues, and optionally, +the eigenvectors +of a real generalized SY-definite eigenproblem .SH SYNOPSIS .TP 20 SUBROUTINE PDSYGVX( @@ -115,796 +117,484 @@ .ti +4 INTRINSIC ABS, DBLE, ICHAR, MAX, MIN, MOD -.TP 20 -.ti +4 -IF( -BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* -RSRC_.LT.0 )RETURN -.TP 20 -.ti +4 -ICTXT -= DESCA( CTXT_ ) -.TP 20 -.ti +4 -CALL -BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) -.TP 20 -.ti +4 -INFO -= 0 -.TP 20 -.ti +4 -IF( -NPROW.EQ.-1 ) THEN -.TP 20 -.ti +4 -INFO -= -( 900+CTXT_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CTXT_ ).NE.DESCB( CTXT_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+CTXT_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2600+CTXT_ ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -EPS -= PDLAMCH( DESCA( CTXT_ ), 'Precision' ) -.TP 20 -.ti +4 -WANTZ -= LSAME( JOBZ, 'V' ) -.TP 20 -.ti +4 -UPPER -= LSAME( UPLO, 'U' ) -.TP 20 -.ti +4 -ALLEIG -= LSAME( RANGE, 'A' ) -.TP 20 -.ti +4 -VALEIG -= LSAME( RANGE, 'V' ) -.TP 20 -.ti +4 -INDEIG -= LSAME( RANGE, 'I' ) -.TP 20 -.ti +4 -CALL -CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 9, INFO ) -.TP 20 -.ti +4 -CALL -CHK1MAT( N, 4, N, 4, IB, JB, DESCB, 13, INFO ) -.TP 20 -.ti +4 -CALL -CHK1MAT( N, 4, N, 4, IZ, JZ, DESCZ, 26, INFO ) -.TP 20 -.ti +4 -IF( -INFO.EQ.0 ) THEN -.TP 20 -.ti +4 -IF( -MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN -.TP 20 -.ti +4 -WORK( -1 ) = ABSTOL -.TP 20 -.ti +4 -IF( -VALEIG ) THEN -.TP 20 -.ti +4 -WORK( -2 ) = VL -.TP 20 -.ti +4 -WORK( -3 ) = VU -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -WORK( -2 ) = ZERO -.TP 20 -.ti +4 -WORK( -3 ) = ZERO -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -DGEBS2D( DESCA( CTXT_ ), 'ALL', ' ', 3, 1, WORK, -3 ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -CALL -DGEBR2D( DESCA( CTXT_ ), 'ALL', ' ', 3, 1, WORK, 3, -0, 0 ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IAROW -= INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), -NPROW ) -.TP 20 -.ti +4 -IBROW -= INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), -NPROW ) -.TP 20 -.ti +4 -IACOL -= INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), -NPCOL ) -.TP 20 -.ti +4 -IBCOL -= INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), -NPCOL ) -.TP 20 -.ti +4 -IROFFA -= MOD( IA-1, DESCA( MB_ ) ) -.TP 20 -.ti +4 -ICOFFA -= MOD( JA-1, DESCA( NB_ ) ) -.TP 20 -.ti +4 -IROFFB -= MOD( IB-1, DESCB( MB_ ) ) -.TP 20 -.ti +4 -ICOFFB -= MOD( JB-1, DESCB( NB_ ) ) -.TP 20 -.ti +4 -LQUERY -= .FALSE. -.TP 20 -.ti +4 -IF( -LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) -LQUERY = .TRUE. -.TP 20 -.ti +4 -LIWMIN -= 6*MAX( N, ( NPROW*NPCOL )+1, 4 ) -.TP 20 -.ti +4 -NB -= DESCA( MB_ ) -.TP 20 -.ti +4 -NN -= MAX( N, NB, 2 ) -.TP 20 -.ti +4 -NP0 -= NUMROC( NN, NB, 0, 0, NPROW ) -.TP 20 -.ti +4 -IF( -( .NOT.WANTZ ) .OR. ( VALEIG .AND. ( .NOT.LQUERY ) ) ) -THEN -.TP 20 -.ti +4 -LWMIN -= 5*N + MAX( 5*NN, NB*( NP0+1 ) ) -.TP 20 -.ti +4 -NEIG -= 0 -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IF( -ALLEIG .OR. VALEIG ) THEN -.TP 20 -.ti +4 -NEIG -= N -.TP 20 -.ti +4 -ELSE -IF( INDEIG ) THEN -.TP 20 -.ti +4 -NEIG -= IU - IL + 1 -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -MQ0 -= NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) -.TP 20 -.ti +4 -LWMIN -= 5*N + MAX( 5*NN, NP0*MQ0+2*NB*NB ) + -ICEIL( NEIG, NPROW*NPCOL )*NN -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -IBTYPE.LT.1 .OR. IBTYPE.GT.3 ) THEN -.TP 20 -.ti +4 -INFO -= -1 -.TP 20 -.ti +4 -ELSE -IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN -.TP 20 -.ti +4 -INFO -= -2 -.TP 20 -.ti +4 -ELSE -IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN -.TP 20 -.ti +4 -INFO -= -3 -.TP 20 -.ti +4 -ELSE -IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN -.TP 20 -.ti +4 -INFO -= -4 -.TP 20 -.ti +4 -ELSE -IF( N.LT.0 ) THEN -.TP 20 -.ti +4 -INFO -= -5 -.TP 20 -.ti +4 -ELSE -IF( IROFFA.NE.0 ) THEN -.TP 20 -.ti +4 -INFO -= -7 -.TP 20 -.ti +4 -ELSE -IF( ICOFFA.NE.0 ) THEN -.TP 20 -.ti +4 -INFO -= -8 -.TP 20 -.ti +4 -ELSE -IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 900+NB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( M_ ).NE.DESCB( M_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+M_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( N_ ).NE.DESCB( N_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+N_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+MB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( NB_ ).NE.DESCB( NB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+NB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( RSRC_ ).NE.DESCB( RSRC_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+RSRC_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CSRC_ ).NE.DESCB( CSRC_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+CSRC_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CTXT_ ).NE.DESCB( CTXT_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+CTXT_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( M_ ).NE.DESCZ( M_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2200+M_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( N_ ).NE.DESCZ( N_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2200+N_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2200+MB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( NB_ ).NE.DESCZ( NB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2200+NB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( RSRC_ ).NE.DESCZ( RSRC_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2200+RSRC_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CSRC_ ).NE.DESCZ( CSRC_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2200+CSRC_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2200+CTXT_ ) -.TP 20 -.ti +4 -ELSE -IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN -.TP 20 -.ti +4 -INFO -= -11 -.TP 20 -.ti +4 -ELSE -IF( ICOFFB.NE.0 .OR. IBCOL.NE.IACOL ) THEN -.TP 20 -.ti +4 -INFO -= -12 -.TP 20 -.ti +4 -ELSE -IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN -.TP 20 -.ti +4 -INFO -= -15 -.TP 20 -.ti +4 -ELSE -IF( INDEIG .AND. ( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) ) -THEN -.TP 20 -.ti +4 -INFO -= -16 -.TP 20 -.ti +4 -ELSE -IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) -THEN -.TP 20 -.ti +4 -INFO -= -17 -.TP 20 -.ti +4 -ELSE -IF( VALEIG .AND. ( ABS( WORK( 2 )-VL ).GT.FIVE*EPS* -ABS( VL ) ) ) THEN -.TP 20 -.ti +4 -INFO -= -14 -.TP 20 -.ti +4 -ELSE -IF( VALEIG .AND. ( ABS( WORK( 3 )-VU ).GT.FIVE*EPS* -ABS( VU ) ) ) THEN -.TP 20 -.ti +4 -INFO -= -15 -.TP 20 -.ti +4 -ELSE -IF( ABS( WORK( 1 )-ABSTOL ).GT.FIVE*EPS*ABS( ABSTOL ) ) -THEN -.TP 20 -.ti +4 -INFO -= -18 -.TP 20 -.ti +4 -ELSE -IF( LWORK.LT.LWMIN .AND. LWORK.NE.-1 ) THEN -.TP 20 -.ti +4 -INFO -= -28 -.TP 20 -.ti +4 -ELSE -IF( LIWORK.LT.LIWMIN .AND. LIWORK.NE.-1 ) THEN -.TP 20 -.ti +4 -INFO -= -30 -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM1( -1 ) = IBTYPE -.TP 20 -.ti +4 -IDUM2( -1 ) = 1 -.TP 20 -.ti +4 -IF( -WANTZ ) THEN -.TP 20 -.ti +4 -IDUM1( -2 ) = ICHAR( 'V' ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -2 ) = ICHAR( 'N' ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -2 ) = 2 -.TP 20 -.ti +4 -IF( -UPPER ) THEN -.TP 20 -.ti +4 -IDUM1( -3 ) = ICHAR( 'U' ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -3 ) = ICHAR( 'L' ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -3 ) = 3 -.TP 20 -.ti +4 -IF( -ALLEIG ) THEN -.TP 20 -.ti +4 -IDUM1( -4 ) = ICHAR( 'A' ) -.TP 20 -.ti +4 -ELSE -IF( INDEIG ) THEN -.TP 20 -.ti +4 -IDUM1( -4 ) = ICHAR( 'I' ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -4 ) = ICHAR( 'V' ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -4 ) = 4 -.TP 20 -.ti +4 -IF( -LQUERY ) THEN -.TP 20 -.ti +4 -IDUM1( -5 ) = -1 -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -5 ) = 1 -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -5 ) = 5 -.TP 20 -.ti +4 -CALL -PCHK2MAT( N, 4, N, 4, IA, JA, DESCA, 9, N, 4, N, 4, IB, -JB, DESCB, 13, 5, IDUM1, IDUM2, INFO ) -.TP 20 -.ti +4 -CALL -PCHK1MAT( N, 4, N, 4, IZ, JZ, DESCZ, 26, 0, IDUM1, IDUM2, -INFO ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -WORK( -1 ) = DBLE( LWMIN ) -.TP 20 -.ti +4 -IWORK( -1 ) = LIWMIN -.TP 20 -.ti +4 -IF( -INFO.NE.0 ) THEN -.TP 20 -.ti +4 -CALL -PXERBLA( ICTXT, 'PDSYGVX ', -INFO ) -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -ELSE -IF( LQUERY ) THEN -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -PDPOTRF( UPLO, N, B, IB, JB, DESCB, INFO ) -.TP 20 -.ti +4 -IF( -INFO.NE.0 ) THEN -.TP 20 -.ti +4 -IFAIL( -1 ) = INFO -.TP 20 -.ti +4 -INFO -= IERRNPD -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -PDSYGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, -DESCB, SCALE, INFO ) -.TP 20 -.ti +4 -CALL -PDSYEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, VU, IL, -IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ, -WORK, LWORK, IWORK, LIWORK, IFAIL, ICLUSTR, GAP, -INFO ) -.TP 20 -.ti +4 -IF( -WANTZ ) THEN -.TP 20 -.ti +4 -NEIG -= M -.TP 20 -.ti +4 -IF( -IBTYPE.EQ.1 .OR. IBTYPE.EQ.2 ) THEN -.TP 20 -.ti +4 -IF( -UPPER ) THEN -.TP 20 -.ti +4 -TRANS -= 'N' -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -TRANS -= 'T' -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -PDTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, -B, IB, JB, DESCB, Z, IZ, JZ, DESCZ ) -.TP 20 -.ti +4 -ELSE -IF( IBTYPE.EQ.3 ) THEN -.TP 20 -.ti +4 -IF( -UPPER ) THEN -.TP 20 -.ti +4 -TRANS -= 'T' -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -TRANS -= 'N' -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -PDTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, -B, IB, JB, DESCB, Z, IZ, JZ, DESCZ ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -SCALE.NE.ONE ) THEN -.TP 20 -.ti +4 -CALL -DSCAL( N, SCALE, W, 1 ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -END + .SH PURPOSE +PDSYGVX computes all the eigenvalues, and optionally, +the eigenvectors +of a real generalized SY-definite eigenproblem, of the form +sub( A )*x=(lambda)*sub( B )*x, sub( A )*sub( B )x=(lambda)*x, or +sub( B )*sub( A )*x=(lambda)*x. +Here sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ) is assumed to be +SY, and sub( B ) denoting B( IB:IB+N-1, JB:JB+N-1 ) is assumed +to be symmetric positive definite. +.SH NOTES +Each global data object is described by an associated description +vector. This vector stores the information required to establish +the mapping between an object element and its corresponding process +and memory location. + +Let A be a generic term for any 2D block cyclicly distributed array. +Such a global array has an associated description vector DESCA. +In the following comments, the character _ should be read as +"of the global array". + +NOTATION STORED IN EXPLANATION +.br +--------------- -------------- -------------------------------------- +.br +DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, + DTYPE_A = 1. +.br +CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating + the BLACS process grid A is distribu- + ted over. The context itself is glo- + bal, but the handle (the integer + value) may vary. +.br +M_A (global) DESCA( M_ ) The number of rows in the global + array A. +.br +N_A (global) DESCA( N_ ) The number of columns in the global + array A. +.br +MB_A (global) DESCA( MB_ ) The blocking factor used to distribute + the rows of the array. +.br +NB_A (global) DESCA( NB_ ) The blocking factor used to distribute + the columns of the array. +.br +RSRC_A (global) DESCA( RSRC_ ) The process row over which the first + row of the array A is distributed. +.br +CSRC_A (global) DESCA( CSRC_ ) The process column over which the + first column of the array A is + distributed. +.br +LLD_A (local) DESCA( LLD_ ) The leading dimension of the local + array. LLD_A >= MAX(1,LOCr(M_A)). + +Let K be the number of rows or columns of a distributed matrix, +and assume that its process grid has dimension p x q. +LOCr( K ) denotes the number of elements of K that a process +would receive if K were distributed over the p processes of its +process column. +.br +Similarly, LOCc( K ) denotes the number of elements of K that a +process would receive if K were distributed over the q processes of +its process row. +.br +The values of LOCr() and LOCc() may be determined via a call to the +ScaLAPACK tool function, NUMROC: +.br + LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), +.br + LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). +.br +An upper bound for these quantities may be computed by: +.br + LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A +.br + LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A + +.SH ARGUMENTS + +.tp 8 +IBTYPE (global input) INTEGER + Specifies the problem type to be solved: + = 1: sub( A )*x = (lambda)*sub( B )*x + = 2: sub( A )*sub( B )*x = (lambda)*x + = 3: sub( B )*sub( A )*x = (lambda)*x + +.tp 8 +JOBZ (global input) CHARACTER*1 + = 'N': Compute eigenvalues only; + = 'V': Compute eigenvalues and eigenvectors. + +.tp 8 +RANGE (global input) CHARACTER*1 + = 'A': all eigenvalues will be found. + = 'V': all eigenvalues in the interval [VL,VU] will be found. + = 'I': the IL-th through IU-th eigenvalues will be found. + +.tp 8 +UPLO (global input) CHARACTER*1 + = 'U': Upper triangles of sub( A ) and sub( B ) are stored; + = 'L': Lower triangles of sub( A ) and sub( B ) are stored. + +.tp 8 +N (global input) INTEGER + The order of the matrices sub( A ) and sub( B ). N >= 0. + +.tp 8 +A (local input/local output) DOUBLE PRECISION pointer into the + local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). + On entry, this array contains the local pieces of the + N-by-N symmetric distributed matrix sub( A ). If UPLO = 'U', + the leading N-by-N upper triangular part of sub( A ) contains + the upper triangular part of the matrix. If UPLO = 'L', the + leading N-by-N lower triangular part of sub( A ) contains + the lower triangular part of the matrix. + + On exit, if JOBZ = 'V', then if INFO = 0, sub( A ) contains + the distributed matrix Z of eigenvectors. The eigenvectors + are normalized as follows: + if IBTYPE = 1 or 2, Z**T*sub( B )*Z = I; + if IBTYPE = 3, Z**T*inv( sub( B ) )*Z = I. + If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') + or the lower triangle (if UPLO='L') of sub( A ), including + the diagonal, is destroyed. + +.tp 8 +IA (global input) INTEGER + The row index in the global array A indicating the first + row of sub( A ). + +.tp 8 +JA (global input) INTEGER + The column index in the global array A indicating the + first column of sub( A ). + +.tp 8 +DESCA (global and local input) INTEGER array of dimension DLEN_. + The array descriptor for the distributed matrix A. + If DESCA( CTXT_ ) is incorrect, PDSYGVX cannot guarantee + correct error reporting. + +.tp 8 +B (local input/local output) DOUBLE PRECISION pointer into the + local memory to an array of dimension (LLD_B, LOCc(JB+N-1)). + On entry, this array contains the local pieces of the + N-by-N symmetric distributed matrix sub( B ). If UPLO = 'U', + the leading N-by-N upper triangular part of sub( B ) contains + the upper triangular part of the matrix. If UPLO = 'L', the + leading N-by-N lower triangular part of sub( B ) contains + the lower triangular part of the matrix. + + On exit, if INFO <= N, the part of sub( B ) containing the + matrix is overwritten by the triangular factor U or L from + the Cholesky factorization sub( B ) = U**T*U or + sub( B ) = L*L**T. + +.tp 8 +IB (global input) INTEGER + The row index in the global array B indicating the first + row of sub( B ). + +.tp 8 +JB (global input) INTEGER + The column index in the global array B indicating the + first column of sub( B ). + +.tp 8 +DESCB (global and local input) INTEGER array of dimension DLEN_. + The array descriptor for the distributed matrix B. + DESCB( CTXT_ ) must equal DESCA( CTXT_ ) + +.tp 8 +VL (global input) DOUBLE PRECISION + If RANGE='V', the lower bound of the interval to be searched + for eigenvalues. Not referenced if RANGE = 'A' or 'I'. + +.tp 8 +VU (global input) DOUBLE PRECISION + If RANGE='V', the upper bound of the interval to be searched + for eigenvalues. Not referenced if RANGE = 'A' or 'I'. + +.tp 8 +IL (global input) INTEGER + If RANGE='I', the index (from smallest to largest) of the + smallest eigenvalue to be returned. IL >= 1. + Not referenced if RANGE = 'A' or 'V'. + +.tp 8 +IU (global input) INTEGER + If RANGE='I', the index (from smallest to largest) of the + largest eigenvalue to be returned. min(IL,N) <= IU <= N. + Not referenced if RANGE = 'A' or 'V'. + +.tp 8 +ABSTOL (global input) DOUBLE PRECISION + If JOBZ='V', setting ABSTOL to PDLAMCH( CONTEXT, 'U') yields + the most orthogonal eigenvectors. + + The absolute error tolerance for the eigenvalues. + An approximate eigenvalue is accepted as converged + when it is determined to lie in an interval [a,b] + of width less than or equal to + + ABSTOL + EPS * max( |a|,|b| ) , + + where EPS is the machine precision. If ABSTOL is less than + or equal to zero, then EPS*norm(T) will be used in its place, + where norm(T) is the 1-norm of the tridiagonal matrix + obtained by reducing A to tridiagonal form. + + Eigenvalues will be computed most accurately when ABSTOL is + set to twice the underflow threshold 2*PDLAMCH('S') not zero. + If this routine returns with ((MOD(INFO,2).NE.0) .OR. + (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or + eigenvectors did not converge, try setting ABSTOL to + 2*PDLAMCH('S'). + + See "Computing Small Singular Values of Bidiagonal Matrices + with Guaranteed High Relative Accuracy," by Demmel and + Kahan, LAPACK Working Note #3. + + See "On the correctness of Parallel Bisection in Floating + Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70 + +.tp 8 +M (global output) INTEGER + Total number of eigenvalues found. 0 <= M <= N. + +.tp 8 +NZ (global output) INTEGER + Total number of eigenvectors computed. 0 <= NZ <= M. + The number of columns of Z that are filled. + If JOBZ .NE. 'V', NZ is not referenced. + If JOBZ .EQ. 'V', NZ = M unless the user supplies + insufficient space and PDSYGVX is not able to detect this + before beginning computation. To get all the eigenvectors + requested, the user must supply both sufficient + space to hold the eigenvectors in Z (M .LE. DESCZ(N_)) + and sufficient workspace to compute them. (See LWORK below.) + PDSYGVX is always able to detect insufficient space without + computation unless RANGE .EQ. 'V'. + +.tp 8 +W (global output) DOUBLE PRECISION array, dimension (N) + On normal exit, the first M entries contain the selected + eigenvalues in ascending order. + +.tp 8 +ORFAC (global input) DOUBLE PRECISION + Specifies which eigenvectors should be reorthogonalized. + Eigenvectors that correspond to eigenvalues which are within + tol=ORFAC*norm(A) of each other are to be reorthogonalized. + However, if the workspace is insufficient (see LWORK), + tol may be decreased until all eigenvectors to be + reorthogonalized can be stored in one process. + No reorthogonalization will be done if ORFAC equals zero. + A default value of 10^-3 is used if ORFAC is negative. + ORFAC should be identical on all processes. + +.tp 8 +Z (local output) DOUBLE PRECISION array, + global dimension (N, N), + local dimension ( LLD_Z, LOCc(JZ+N-1) ) + If JOBZ = 'V', then on normal exit the first M columns of Z + contain the orthonormal eigenvectors of the matrix + corresponding to the selected eigenvalues. If an eigenvector + fails to converge, then that column of Z contains the latest + approximation to the eigenvector, and the index of the + eigenvector is returned in IFAIL. + If JOBZ = 'N', then Z is not referenced. + +.tp 8 +IZ (global input) INTEGER + The row index in the global array Z indicating the first + row of sub( Z ). + +.tp 8 +JZ (global input) INTEGER + The column index in the global array Z indicating the + first column of sub( Z ). + +.tp 8 +DESCZ (global and local input) INTEGER array of dimension DLEN_. + The array descriptor for the distributed matrix Z. + DESCZ( CTXT_ ) must equal DESCA( CTXT_ ) + +.tp 8 +WORK (local workspace/output) DOUBLE PRECISION array, + dimension (LWORK) + if JOBZ='N' WORK(1) = optimal amount of workspace + required to compute eigenvalues efficiently + if JOBZ='V' WORK(1) = optimal amount of workspace + required to compute eigenvalues and eigenvectors + efficiently with no guarantee on orthogonality. + If RANGE='V', it is assumed that all eigenvectors + may be required. + +.tp 8 +LWORK (local input) INTEGER + See below for definitions of variables used to define LWORK. + If no eigenvectors are requested (JOBZ = 'N') then + LWORK >= 5 * N + MAX( 5 * NN, NB * ( NP0 + 1 ) ) + If eigenvectors are requested (JOBZ = 'V' ) then + the amount of workspace required to guarantee that all + eigenvectors are computed is: + LWORK >= 5 * N + MAX( 5*NN, NP0 * MQ0 + 2 * NB * NB ) + + ICEIL( NEIG, NPROW*NPCOL)*NN + + The computed eigenvectors may not be orthogonal if the + minimal workspace is supplied and ORFAC is too small. + If you want to guarantee orthogonality (at the cost + of potentially poor performance) you should add + the following to LWORK: + (CLUSTERSIZE-1)*N + where CLUSTERSIZE is the number of eigenvalues in the + largest cluster, where a cluster is defined as a set of + close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) | + W(J+1) <= W(J) + ORFAC*2*norm(A) } + Variable definitions: + NEIG = number of eigenvectors requested + NB = DESCA( MB_ ) = DESCA( NB_ ) = DESCZ( MB_ ) = + DESCZ( NB_ ) + NN = MAX( N, NB, 2 ) + DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) = + DESCZ( CSRC_ ) = 0 + NP0 = NUMROC( NN, NB, 0, 0, NPROW ) + MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) + ICEIL( X, Y ) is a ScaLAPACK function returning + ceiling(X/Y) + + When LWORK is too small: + If LWORK is too small to guarantee orthogonality, + PDSYGVX attempts to maintain orthogonality in + the clusters with the smallest + spacing between the eigenvalues. + If LWORK is too small to compute all the eigenvectors + requested, no computation is performed and INFO=-23 + is returned. Note that when RANGE='V', PDSYGVX does + not know how many eigenvectors are requested until + the eigenvalues are computed. Therefore, when RANGE='V' + and as long as LWORK is large enough to allow PDSYGVX to + compute the eigenvalues, PDSYGVX will compute the + eigenvalues and as many eigenvectors as it can. + + Relationship between workspace, orthogonality & performance: + Greater performance can be achieved if adequate workspace + is provided. On the other hand, in some situations, + performance can decrease as the workspace provided + increases above the workspace amount shown below: + + For optimal performance, greater workspace may be + needed, i.e. + LWORK >= MAX( LWORK, 5 * N + NSYTRD_LWOPT, + NSYGST_LWOPT ) + Where: + LWORK, as defined previously, depends upon the number + of eigenvectors requested, and + NSYTRD_LWOPT = N + 2*( ANB+1 )*( 4*NPS+2 ) + + ( NPS + 3 ) * NPS + NSYGST_LWOPT = 2*NP0*NB + NQ0*NB + NB*NB + + ANB = PJLAENV( DESCA( CTXT_), 3, 'PDSYTTRD', 'L', + 0, 0, 0, 0) + SQNPC = INT( SQRT( DBLE( NPROW * NPCOL ) ) ) + NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) + NB = DESCA( MB_ ) + NP0 = NUMROC( N, NB, 0, 0, NPROW ) + NQ0 = NUMROC( N, NB, 0, 0, NPCOL ) + + NUMROC is a ScaLAPACK tool functions; + PJLAENV is a ScaLAPACK envionmental inquiry function + MYROW, MYCOL, NPROW and NPCOL can be determined by + calling the subroutine BLACS_GRIDINFO. + + For large N, no extra workspace is needed, however the + biggest boost in performance comes for small N, so it + is wise to provide the extra workspace (typically less + than a Megabyte per process). + + If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing + enough space to compute all the eigenvectors + orthogonally will cause serious degradation in + performance. In the limit (i.e. CLUSTERSIZE = N-1) + PDSTEIN will perform no better than DSTEIN on 1 processor. + For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing + all eigenvectors will increase the total execution time + by a factor of 2 or more. + For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will + grow as the square of the cluster size, all other factors + remaining equal and assuming enough workspace. Less + workspace means less reorthogonalization but faster + execution. + + If LWORK = -1, then LWORK is global input and a workspace + query is assumed; the routine only calculates the size + required for optimal performance on all work arrays. + Each of these values is returned in the first entry of the + corresponding work array, and no error message is issued by + PXERBLA. + + +.tp 8 +IWORK (local workspace) INTEGER array + On return, IWORK(1) contains the amount of integer workspace + required. + +.tp 8 +LIWORK (local input) INTEGER + size of IWORK + LIWORK >= 6 * NNP + Where: + NNP = MAX( N, NPROW*NPCOL + 1, 4 ) + + If LIWORK = -1, then LIWORK is global input and a workspace + query is assumed; the routine only calculates the minimum + and optimal size for all work arrays. Each of these + values is returned in the first entry of the corresponding + work array, and no error message is issued by PXERBLA. + +.tp 8 +IFAIL (output) INTEGER array, dimension (N) + IFAIL provides additional information when INFO .NE. 0 + If (MOD(INFO/16,2).NE.0) then IFAIL(1) indicates the order of + the smallest minor which is not positive definite. + If (MOD(INFO,2).NE.0) on exit, then IFAIL contains the + indices of the eigenvectors that failed to converge. + + If neither of the above error conditions hold and JOBZ = 'V', + then the first M elements of IFAIL are set to zero. + +.tp 8 +ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL) + This array contains indices of eigenvectors corresponding to + a cluster of eigenvalues that could not be reorthogonalized + due to insufficient workspace (see LWORK, ORFAC and INFO). + Eigenvectors corresponding to clusters of eigenvalues indexed + ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be + reorthogonalized due to lack of workspace. Hence the + eigenvectors corresponding to these clusters may not be + orthogonal. ICLUSTR() is a zero terminated array. + (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if + K is the number of clusters + ICLUSTR is not referenced if JOBZ = 'N' + +.tp 8 +GAP (global output) DOUBLE PRECISION array, + dimension (NPROW*NPCOL) + This array contains the gap between eigenvalues whose + eigenvectors could not be reorthogonalized. The output + values in this array correspond to the clusters indicated + by the array ICLUSTR. As a result, the dot product between + eigenvectors correspoding to the I^th cluster may be as high + as ( C * n ) / GAP(I) where C is a small constant. + +.tp 8 +INFO (global output) INTEGER + = 0: successful exit + < 0: If the i-th argument is an array and the j-entry had + an illegal value, then INFO = -(i*100+j), if the i-th + argument is a scalar and had an illegal value, then + INFO = -i. + > 0: if (MOD(INFO,2).NE.0), then one or more eigenvectors + failed to converge. Their indices are stored + in IFAIL. Send e-mail to scalapack@cs.utk.edu + if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding + to one or more clusters of eigenvalues could not be + reorthogonalized because of insufficient workspace. + The indices of the clusters are stored in the array + ICLUSTR. + if (MOD(INFO/4,2).NE.0), then space limit prevented + PDSYGVX from computing all of the eigenvectors + between VL and VU. The number of eigenvectors + computed is returned in NZ. + if (MOD(INFO/8,2).NE.0), then PDSTEBZ failed to + compute eigenvalues. + Send e-mail to scalapack@cs.utk.edu + if (MOD(INFO/16,2).NE.0), then B was not positive + definite. IFAIL(1) indicates the order of + the smallest minor which is not positive definite. --- scalapack-doc-1.5.orig/man/manl/pssyev.l +++ scalapack-doc-1.5/man/manl/pssyev.l @@ -1,6 +1,8 @@ .TH PSSYEV l "12 May 1997" "LAPACK version 1.3" "LAPACK routine (version 1.3)" .SH NAME - +PSSYEV - compute all eigenvalues and, optionally, eigenvectors +of a real symmetric matrix A by calling the recommended sequence +of ScaLAPACK routines .SH SYNOPSIS .TP 19 SUBROUTINE PSSYEV( @@ -101,756 +103,224 @@ .ti +4 INTRINSIC ICHAR, MAX, MIN, MOD, REAL, SQRT -.TP 19 -.ti +4 -IF( -BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* -RSRC_.LT.0 )RETURN -.TP 19 -.ti +4 -IF( -N.EQ.0 ) -RETURN -.TP 19 -.ti +4 -CALL -BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) -.TP 19 -.ti +4 -INFO -= 0 -.TP 19 -.ti +4 -IF( -NPROW.EQ.-1 ) THEN -.TP 19 -.ti +4 -INFO -= -( 700+CTXT_ ) -.TP 19 -.ti +4 -ELSE -IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN -.TP 19 -.ti +4 -INFO -= -( 1200+CTXT_ ) -.TP 19 -.ti +4 -ELSE -.TP 19 -.ti +4 -CALL -CHK1MAT( N, 3, N, 3, IA, JA, DESCA, 7, INFO ) -.TP 19 -.ti +4 -CALL -CHK1MAT( N, 3, N, 3, IZ, JZ, DESCZ, 12, INFO ) -.TP 19 -.ti +4 -IF( -INFO.EQ.0 ) THEN -.TP 19 -.ti +4 -SAFMIN -= PSLAMCH( DESCA( CTXT_ ), 'Safe minimum' ) -.TP 19 -.ti +4 -EPS -= PSLAMCH( DESCA( CTXT_ ), 'Precision' ) -.TP 19 -.ti +4 -SMLNUM -= SAFMIN / EPS -.TP 19 -.ti +4 -BIGNUM -= ONE / SMLNUM -.TP 19 -.ti +4 -RMIN -= SQRT( SMLNUM ) -.TP 19 -.ti +4 -RMAX -= MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) -.TP 19 -.ti +4 -NPROCS -= NPROW*NPCOL -.TP 19 -.ti +4 -NB_A -= DESCA( NB_ ) -.TP 19 -.ti +4 -MB_A -= DESCA( MB_ ) -.TP 19 -.ti +4 -NB_Z -= DESCZ( NB_ ) -.TP 19 -.ti +4 -MB_Z -= DESCZ( MB_ ) -.TP 19 -.ti +4 -NB -= NB_A -.TP 19 -.ti +4 -LOWER -= LSAME( UPLO, 'L' ) -.TP 19 -.ti +4 -WANTZ -= LSAME( JOBZ, 'V' ) -.TP 19 -.ti +4 -RSRC_A -= DESCA( RSRC_ ) -.TP 19 -.ti +4 -CSRC_A -= DESCA( CSRC_ ) -.TP 19 -.ti +4 -RSRC_Z -= DESCZ( RSRC_ ) -.TP 19 -.ti +4 -LCM -= ILCM( NPROW, NPCOL ) -.TP 19 -.ti +4 -LCMQ -= LCM / NPCOL -.TP 19 -.ti +4 -IROFFA -= MOD( IA-1, MB_A ) -.TP 19 -.ti +4 -ICOFFA -= MOD( JA-1, NB_A ) -.TP 19 -.ti +4 -IROFFZ -= MOD( IZ-1, MB_A ) -.TP 19 -.ti +4 -IAROW -= INDXG2P( 1, NB_A, MYROW, RSRC_A, NPROW ) -.TP 19 -.ti +4 -IACOL -= INDXG2P( 1, MB_A, MYCOL, CSRC_A, NPCOL ) -.TP 19 -.ti +4 -IZROW -= INDXG2P( 1, NB_A, MYROW, RSRC_Z, NPROW ) -.TP 19 -.ti +4 -NP -= NUMROC( N+IROFFA, NB_Z, MYROW, IAROW, NPROW ) -.TP 19 -.ti +4 -NQ -= NUMROC( N+ICOFFA, NB_Z, MYCOL, IACOL, NPCOL ) -.TP 19 -.ti +4 -SIZEMQRLEFT -= MAX( ( NB_A*( NB_A-1 ) ) / 2, -( NP+NQ )*NB_A ) + NB_A*NB_A -.TP 19 -.ti +4 -SIZEMQRRIGHT -= MAX( ( NB_A*( NB_A-1 ) ) / 2, -( NQ+MAX( NP+NUMROC( NUMROC( N+ICOFFA, NB_A, -0, 0, NPCOL ), NB, 0, 0, LCMQ ), NP ) )* -NB_A ) + NB_A*NB_A -.TP 19 -.ti +4 -LDC -= 0 -.TP 19 -.ti +4 -IF( -WANTZ ) THEN -.TP 19 -.ti +4 -CONTEXTC -= SL_GRIDRESHAPE( DESCA( CTXT_ ), 0, 1, 1, -NPROCS, 1 ) -.TP 19 -.ti +4 -CALL -BLACS_GRIDINFO( CONTEXTC, NPROWC, NPCOLC, MYPROWC, -MYPCOLC ) -.TP 19 -.ti +4 -NRC -= NUMROC( N, NB_A, MYPROWC, 0, NPROCS ) -.TP 19 -.ti +4 -LDC -= MAX( 1, NRC ) -.TP 19 -.ti +4 -CALL -DESCINIT( DESCQR, N, N, NB, NB, 0, 0, CONTEXTC, -LDC, INFO ) -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -INDTAU -= 1 -.TP 19 -.ti +4 -INDE -= INDTAU + N -.TP 19 -.ti +4 -INDD -= INDE + N -.TP 19 -.ti +4 -INDD2 -= INDD + N -.TP 19 -.ti +4 -INDE2 -= INDD2 + N -.TP 19 -.ti +4 -INDWORK -= INDE2 + N -.TP 19 -.ti +4 -INDWORK2 -= INDWORK + N*LDC -.TP 19 -.ti +4 -LLWORK -= LWORK - INDWORK + 1 -.TP 19 -.ti +4 -NN -= MAX( N, NB, 2 ) -.TP 19 -.ti +4 -IF( -WANTZ ) THEN -.TP 19 -.ti +4 -QRMEM -= 5*N + MAX( 2*NP+NQ+NB*NN, 2*NN-2 ) + N*LDC -.TP 19 -.ti +4 -LWMIN -= MAX( SIZEMQRLEFT, SIZEMQRRIGHT, QRMEM ) -.TP 19 -.ti +4 -ELSE -.TP 19 -.ti +4 -LWMIN -= 5*N + 2*NP + NQ + NB*NN -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -IF( -INFO.EQ.0 ) THEN -.TP 19 -.ti +4 -IF( -.NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN -.TP 19 -.ti +4 -INFO -= -1 -.TP 19 -.ti +4 -ELSE -IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN -.TP 19 -.ti +4 -INFO -= -2 -.TP 19 -.ti +4 -ELSE -IF( LWORK.LT.LWMIN .AND. LWORK.NE.-1 ) THEN -.TP 19 -.ti +4 -INFO -= -14 -.TP 19 -.ti +4 -ELSE -IF( IROFFA.NE.IROFFZ ) THEN -.TP 19 -.ti +4 -INFO -= -10 -.TP 19 -.ti +4 -ELSE -IF( IROFFA.NE.0 ) THEN -.TP 19 -.ti +4 -INFO -= -5 -.TP 19 -.ti +4 -ELSE -IF( IAROW.NE.IZROW ) THEN -.TP 19 -.ti +4 -INFO -= -10 -.TP 19 -.ti +4 -ELSE -IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN -.TP 19 -.ti +4 -INFO -= -( 700+NB_ ) -.TP 19 -.ti +4 -ELSE -IF( DESCA( M_ ).NE.DESCZ( M_ ) ) THEN -.TP 19 -.ti +4 -INFO -= -( 1200+M_ ) -.TP 19 -.ti +4 -ELSE -IF( DESCA( N_ ).NE.DESCZ( N_ ) ) THEN -.TP 19 -.ti +4 -INFO -= -( 1200+N_ ) -.TP 19 -.ti +4 -ELSE -IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN -.TP 19 -.ti +4 -INFO -= -( 1200+MB_ ) -.TP 19 -.ti +4 -ELSE -IF( DESCA( NB_ ).NE.DESCZ( NB_ ) ) THEN -.TP 19 -.ti +4 -INFO -= -( 1200+NB_ ) -.TP 19 -.ti +4 -ELSE -IF( DESCA( RSRC_ ).NE.DESCZ( RSRC_ ) ) THEN -.TP 19 -.ti +4 -INFO -= -( 1200+RSRC_ ) -.TP 19 -.ti +4 -ELSE -IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN -.TP 19 -.ti +4 -INFO -= -( 1200+CTXT_ ) -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -IF( -WANTZ ) THEN -.TP 19 -.ti +4 -IDUM1( -1 ) = ICHAR( 'V' ) -.TP 19 -.ti +4 -ELSE -.TP 19 -.ti +4 -IDUM1( -1 ) = ICHAR( 'N' ) -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -IDUM2( -1 ) = 1 -.TP 19 -.ti +4 -IF( -LOWER ) THEN -.TP 19 -.ti +4 -IDUM1( -2 ) = ICHAR( 'L' ) -.TP 19 -.ti +4 -ELSE -.TP 19 -.ti +4 -IDUM1( -2 ) = ICHAR( 'U' ) -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -IDUM2( -2 ) = 2 -.TP 19 -.ti +4 -IF( -LWORK.EQ.-1 ) THEN -.TP 19 -.ti +4 -IDUM1( -3 ) = -1 -.TP 19 -.ti +4 -ELSE -.TP 19 -.ti +4 -IDUM1( -3 ) = 1 -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -IDUM2( -3 ) = 3 -.TP 19 -.ti +4 -CALL -PCHK2MAT( N, 3, N, 3, IA, JA, DESCA, 7, N, 3, N, 3, IZ, -JZ, DESCZ, 12, 3, IDUM1, IDUM2, INFO ) -.TP 19 -.ti +4 -WORK( -1 ) = REAL( LWMIN ) -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -IF( -INFO.NE.0 ) THEN -.TP 19 -.ti +4 -CALL -PXERBLA( DESCA( CTXT_ ), 'PSSYEV', -INFO ) -.TP 19 -.ti +4 -IF( -WANTZ ) -CALL BLACS_GRIDEXIT( CONTEXTC ) -.TP 19 -.ti +4 -RETURN -.TP 19 -.ti +4 -ELSE -IF( LWORK.EQ.-1 ) THEN -.TP 19 -.ti +4 -IF( -WANTZ ) -CALL BLACS_GRIDEXIT( CONTEXTC ) -.TP 19 -.ti +4 -RETURN -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -ISCALE -= 0 -.TP 19 -.ti +4 -ANRM -= PSLANSY( '1', UPLO, N, A, IA, JA, DESCA, WORK( INDWORK ) ) -.TP 19 -.ti +4 -IF( -ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN -.TP 19 -.ti +4 -ISCALE -= 1 -.TP 19 -.ti +4 -SIGMA -= RMIN / ANRM -.TP 19 -.ti +4 -ELSE -IF( ANRM.GT.RMAX ) THEN -.TP 19 -.ti +4 -ISCALE -= 1 -.TP 19 -.ti +4 -SIGMA -= RMAX / ANRM -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -IF( -ISCALE.EQ.1 ) THEN -.TP 19 -.ti +4 -CALL -PSLASCL( UPLO, ONE, SIGMA, N, N, A, IA, JA, DESCA, -IINFO ) -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -CALL -PSSYTRD( UPLO, N, A, IA, JA, DESCA, WORK( INDD ), -WORK( INDE ), WORK( INDTAU ), WORK( INDWORK ), -LLWORK, IINFO ) -.TP 19 -.ti +4 -DO -10 I = 1, N -.TP 19 -.ti +4 -CALL -PSELGET( 'A', ' ', WORK( INDD2+I-1 ), A, I+IA-1, I+JA-1, -DESCA ) -.TP 19 -.ti +4 -10 -CONTINUE -.TP 19 -.ti +4 -IF( -LSAME( UPLO, 'U' ) ) THEN -.TP 19 -.ti +4 -DO -20 I = 1, N - 1 -.TP 19 -.ti +4 -CALL -PSELGET( 'A', ' ', WORK( INDE2+I-1 ), A, I+IA-1, I+JA, -DESCA ) -.TP 19 -.ti +4 -20 -CONTINUE -.TP 19 -.ti +4 -ELSE -.TP 19 -.ti +4 -DO -30 I = 1, N - 1 -.TP 19 -.ti +4 -CALL -PSELGET( 'A', ' ', WORK( INDE2+I-1 ), A, I+IA, I+JA-1, -DESCA ) -.TP 19 -.ti +4 -30 -CONTINUE -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -IF( -WANTZ ) THEN -.TP 19 -.ti +4 -CALL -PSLASET( 'Full', N, N, ZERO, ONE, WORK( INDWORK ), 1, 1, -DESCQR ) -.TP 19 -.ti +4 -CALL -SSTEQR2( 'I', N, WORK( INDD2 ), WORK( INDE2 ), -WORK( INDWORK ), LDC, NRC, WORK( INDWORK2 ), -INFO ) -.TP 19 -.ti +4 -CALL -PSGEMR2D( N, N, WORK( INDWORK ), 1, 1, DESCQR, Z, 1, 1, -DESCZ, CONTEXTC ) -.TP 19 -.ti +4 -CALL -PSORMTR( 'L', UPLO, 'N', N, N, A, IA, JA, DESCA, -WORK( INDTAU ), Z, IZ, JZ, DESCZ, -WORK( INDWORK ), LLWORK, IINFO ) -.TP 19 -.ti +4 -ELSE -.TP 19 -.ti +4 -CALL -SSTEQR2( 'N', N, WORK( INDD2 ), WORK( INDE2 ), -WORK( INDWORK ), 1, 1, WORK( INDWORK2 ), INFO ) -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -CALL -SCOPY( N, WORK( INDD2 ), 1, W, 1 ) -.TP 19 -.ti +4 -IF( -ISCALE.EQ.1 ) THEN -.TP 19 -.ti +4 -CALL -SSCAL( N, ONE / SIGMA, W, 1 ) -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -WORK( -1 ) = REAL( LWMIN ) -.TP 19 -.ti +4 -IF( -WANTZ ) THEN -.TP 19 -.ti +4 -CALL -BLACS_GRIDEXIT( CONTEXTC ) -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -IF( -N.LE.ITHVAL ) THEN -.TP 19 -.ti +4 -J -= N -.TP 19 -.ti +4 -K -= 1 -.TP 19 -.ti +4 -ELSE -.TP 19 -.ti +4 -J -= N / ITHVAL -.TP 19 -.ti +4 -K -= ITHVAL -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -DO -40 I = 1, J -.TP 19 -.ti +4 -WORK( -I+INDTAU ) = W( ( I-1 )*K+1 ) -.TP 19 -.ti +4 -WORK( -I+INDE ) = W( ( I-1 )*K+1 ) -.TP 19 -.ti +4 -40 -CONTINUE -.TP 19 -.ti +4 -CALL -SGAMN2D( DESCA( CTXT_ ), 'a', ' ', J, 1, WORK( 1+INDTAU ), -J, 1, 1, -1, -1, 0 ) -.TP 19 -.ti +4 -CALL -SGAMX2D( DESCA( CTXT_ ), 'a', ' ', J, 1, WORK( 1+INDE ), J, -1, 1, -1, -1, 0 ) -.TP 19 -.ti +4 -DO -50 I = 1, J -.TP 19 -.ti +4 -IF( -INFO.EQ.0 .AND. ( WORK( I+INDTAU )-WORK( I+INDE ).NE. -ZERO ) ) THEN -.TP 19 -.ti +4 -INFO -= N + 1 -.TP 19 -.ti +4 -END -IF -.TP 19 -.ti +4 -50 -CONTINUE -.TP 19 -.ti +4 -RETURN -.TP 19 -.ti +4 -END + .SH PURPOSE +PSSYEV computes all eigenvalues and, optionally, eigenvectors +of a real symmetric matrix A by calling the recommended sequence +of ScaLAPACK routines. + +In its present form, PSSYEV assumes a homogeneous system and makes +no checks for consistency of the eigenvalues or eigenvectors across +the different processes. Because of this, it is possible that a +heterogeneous system may return incorrect results without any error +messages. +.SH NOTES +A description vector is associated with each 2D block-cyclicly dis- +tributed matrix. This vector stores the information required to +establish the mapping between a matrix entry and its corresponding +process and memory location. + +In the following comments, the character _ should be read as +"of the distributed matrix". Let A be a generic term for any 2D +block cyclicly distributed matrix. Its description vector is DESCA: + +NOTATION STORED IN EXPLANATION +.br +--------------- -------------- -------------------------------------- +.br +DTYPE_A(global) DESCA( DTYPE_) The descriptor type. +.br +CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating + the BLACS process grid A is distribu- + ted over. The context itself is glo- + bal, but the handle (the integer + value) may vary. +.br +M_A (global) DESCA( M_ ) The number of rows in the distributed + matrix A. +.br +N_A (global) DESCA( N_ ) The number of columns in the distri- + buted matrix A. +.br +MB_A (global) DESCA( MB_ ) The blocking factor used to distribute + the rows of A. +.br +NB_A (global) DESCA( NB_ ) The blocking factor used to distribute + the columns of A. +.br +RSRC_A (global) DESCA( RSRC_ ) The process row over which the first + row of the matrix A is distributed. +.br +CSRC_A (global) DESCA( CSRC_ ) The process column over which the + first column of A is distributed. +.br +LLD_A (local) DESCA( LLD_ ) The leading dimension of the local + array storing the local blocks of the + distributed matrix A. + LLD_A >= MAX(1,LOCr(M_A)). + +Let K be the number of rows or columns of a distributed matrix, +and assume that its process grid has dimension p x q. +LOCr( K ) denotes the number of elements of K that a process +would receive if K were distributed over the p processes of its +process column.S +.br +Similarly, LOCc( K ) denotes the number of elements of K that a +process would receive if K were distributed over the q processes of +its process row. +.br +The values of LOCr() and LOCc() may be determined via a call to the +ScaLAPACK tool function, NUMROC: +.br + LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), +.br + LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). + +.SH ARGUMENTS + + NP = the number of rows local to a given process. + NQ = the number of columns local to a given process. + +.tp 8 +JOBZ (global input) CHARACTER*1 + Specifies whether or not to compute the eigenvectors: + = 'N': Compute eigenvalues only. + = 'V': Compute eigenvalues and eigenvectors. + +.tp 8 +UPLO (global input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + symmetric matrix A is stored: + = 'U': Upper triangular + = 'L': Lower triangular + +.tp 8 +N (global input) INTEGER + The number of rows and columns of the matrix A. N >= 0. + +.tp 8 +A (local input/workspace) block cyclic DOUBLE PRECISION array, + global dimension (N, N), local dimension ( LLD_A, + LOCc(JA+N-1) ) + + On entry, the symmetric matrix A. If UPLO = 'U', only the + upper triangular part of A is used to define the elements of + the symmetric matrix. If UPLO = 'L', only the lower + triangular part of A is used to define the elements of the + symmetric matrix. + + On exit, the lower triangle (if UPLO='L') or the upper + triangle (if UPLO='U') of A, including the diagonal, is + destroyed. + +.tp 8 +IA (global input) INTEGER + A's global row index, which points to the beginning of the + submatrix which is to be operated on. + +.tp 8 +JA (global input) INTEGER + A's global column index, which points to the beginning of + the submatrix which is to be operated on. + +.tp 8 +DESCA (global and local input) INTEGER array of dimension DLEN_. + The array descriptor for the distributed matrix A. + If DESCA( CTXT_ ) is incorrect, PSSYEV cannot guarantee + correct error reporting. + +.tp 8 +W (global output) REAL array, dimension (N) + On normal exit, the first M entries contain the selected + eigenvalues in ascending order. + +.tp 8 +Z (local output) REAL array, + global dimension (N, N), + local dimension ( LLD_Z, LOCc(JZ+N-1) ) + If JOBZ = 'V', then on normal exit the first M columns of Z + contain the orthonormal eigenvectors of the matrix + corresponding to the selected eigenvalues. + If JOBZ = 'N', then Z is not referenced. + +.tp 8 +IZ (global input) INTEGER + Z's global row index, which points to the beginning of the + submatrix which is to be operated on. + +.tp 8 +JZ (global input) INTEGER + Z's global column index, which points to the beginning of + the submatrix which is to be operated on. + +.tp 8 +DESCZ (global and local input) INTEGER array of dimension DLEN_. + The array descriptor for the distributed matrix Z. + DESCZ( CTXT_ ) must equal DESCA( CTXT_ ) + +.tp 8 +WORK (local workspace/output) REAL array, + dimension (LWORK) + Version 1.0: on output, WORK(1) returns the workspace + needed to guarantee completion. + If the input parameters are incorrect, WORK(1) may also be + incorrect. + + If JOBZ='N' WORK(1) = minimal=optimal amount of workspace + If JOBZ='V' WORK(1) = minimal workspace required to + generate all the eigenvectors. + + +.tp 8 +LWORK (local input) INTEGER + See below for definitions of variables used to define LWORK. + If no eigenvectors are requested (JOBZ = 'N') then + LWORK >= 5*N + SIZESYTRD + 1 + where + SIZESYTRD = The workspace requirement for PSSYTRD + and is MAX( NB * ( NP +1 ), 3 * NB ) + If eigenvectors are requested (JOBZ = 'V' ) then + the amount of workspace required to guarantee that all + eigenvectors are computed is: + + QRMEM = 2*N-2 + LWMIN = 5*N + N*LDC + MAX( SIZEMQRLEFT, QRMEM ) + 1 + + Variable definitions: + NB = DESCA( MB_ ) = DESCA( NB_ ) = + DESCZ( MB_ ) = DESCZ( NB_ ) + NN = MAX( N, NB, 2 ) + DESCA( RSRC_ ) = DESCA( RSRC_ ) = DESCZ( RSRC_ ) = + DESCZ( CSRC_ ) = 0 + NP = NUMROC( NN, NB, 0, 0, NPROW ) + NQ = NUMROC( MAX( N, NB, 2 ), NB, 0, 0, NPCOL ) + NRC = NUMROC( N, NB, MYPROWC, 0, NPROCS) + LDC = MAX( 1, NRC ) + SIZEMQRLEFT = The workspace requirement for PSORMTR + when it's SIDE argument is 'L'. + + With MYPROWC defined when a new context is created as: + CALL BLACS_GET( DESCA( CTXT_ ), 0, CONTEXTC ) + CALL BLACS_GRIDINIT( CONTEXTC, 'R', NPROCS, 1 ) + CALL BLACS_GRIDINFO( CONTEXTC, NPROWC, NPCOLC, MYPROWC, + MYPCOLC ) + + If LWORK = -1, the LWORK is global input and a workspace + query is assumed; the routine only calculates the minimum + size for the WORK array. The required workspace is returned + as the first element of WORK and no error message is issued + by PXERBLA. + +.tp 8 +INFO (global output) INTEGER + = 0: successful exit + < 0: If the i-th argument is an array and the j-entry had + an illegal value, then INFO = -(i*100+j), if the i-th + argument is a scalar and had an illegal value, then + INFO = -i. + > 0: If INFO = 1 through N, the i(th) eigenvalue did not + converge in SSTEQR2 after a total of 30*N iterations. + If INFO = N+1, then PSSYEV has detected heterogeneity + by finding that eigenvalues were not identical across + the process grid. In this case, the accuracy of + the results from PSSYEV cannot be guaranteed. --- scalapack-doc-1.5.orig/man/manl/pssyevx.l +++ scalapack-doc-1.5/man/manl/pssyevx.l @@ -1,6 +1,8 @@ .TH PSSYEVX l "12 May 1997" "LAPACK version 1.5" "LAPACK routine (version 1.5)" .SH NAME - +PSSYEVX - compute selected eigenvalues and, optionally, eigenvectors +of a real symmetric matrix A by calling the recommended sequence +of ScaLAPACK routines .SH SYNOPSIS .TP 20 SUBROUTINE PSSYEVX( @@ -116,1198 +118,440 @@ .ti +4 INTRINSIC ABS, ICHAR, MAX, MIN, MOD, REAL, SQRT -.TP 20 -.ti +4 -IF( -BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* -RSRC_.LT.0 )RETURN -.TP 20 -.ti +4 -QUICKRETURN -= ( N.EQ.0 ) -.TP 20 -.ti +4 -CALL -BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) -.TP 20 -.ti +4 -INFO -= 0 -.TP 20 -.ti +4 -IF( -NPROW.EQ.-1 ) THEN -.TP 20 -.ti +4 -INFO -= -( 800+CTXT_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+CTXT_ ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -CALL -CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 8, INFO ) -.TP 20 -.ti +4 -CALL -CHK1MAT( N, 4, N, 4, IZ, JZ, DESCZ, 21, INFO ) -.TP 20 -.ti +4 -IF( -INFO.EQ.0 ) THEN -.TP 20 -.ti +4 -SAFMIN -= PSLAMCH( DESCA( CTXT_ ), 'Safe minimum' ) -.TP 20 -.ti +4 -EPS -= PSLAMCH( DESCA( CTXT_ ), 'Precision' ) -.TP 20 -.ti +4 -SMLNUM -= SAFMIN / EPS -.TP 20 -.ti +4 -BIGNUM -= ONE / SMLNUM -.TP 20 -.ti +4 -RMIN -= SQRT( SMLNUM ) -.TP 20 -.ti +4 -RMAX -= MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) -.TP 20 -.ti +4 -NPROCS -= NPROW*NPCOL -.TP 20 -.ti +4 -LOWER -= LSAME( UPLO, 'L' ) -.TP 20 -.ti +4 -WANTZ -= LSAME( JOBZ, 'V' ) -.TP 20 -.ti +4 -ALLEIG -= LSAME( RANGE, 'A' ) -.TP 20 -.ti +4 -VALEIG -= LSAME( RANGE, 'V' ) -.TP 20 -.ti +4 -INDEIG -= LSAME( RANGE, 'I' ) -.TP 20 -.ti +4 -INDTAU -= 1 -.TP 20 -.ti +4 -INDE -= INDTAU + N -.TP 20 -.ti +4 -INDD -= INDE + N -.TP 20 -.ti +4 -INDD2 -= INDD + N -.TP 20 -.ti +4 -INDE2 -= INDD2 + N -.TP 20 -.ti +4 -INDWORK -= INDE2 + N -.TP 20 -.ti +4 -LLWORK -= LWORK - INDWORK + 1 -.TP 20 -.ti +4 -ISIZESTEIN -= 3*N + NPROCS + 1 -.TP 20 -.ti +4 -ISIZESTEBZ -= MAX( 4*N, 14, NPROCS ) -.TP 20 -.ti +4 -INDIBL -= ( MAX( ISIZESTEIN, ISIZESTEBZ ) ) + 1 -.TP 20 -.ti +4 -INDISP -= INDIBL + N -.TP 20 -.ti +4 -LQUERY -= .FALSE. -.TP 20 -.ti +4 -IF( -LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) -LQUERY = .TRUE. -.TP 20 -.ti +4 -NNP -= MAX( N, NPROCS+1, 4 ) -.TP 20 -.ti +4 -LIWMIN -= 6*NNP -.TP 20 -.ti +4 -NPROCS -= NPROW*NPCOL -.TP 20 -.ti +4 -NB_A -= DESCA( NB_ ) -.TP 20 -.ti +4 -MB_A -= DESCA( MB_ ) -.TP 20 -.ti +4 -NB_Z -= DESCZ( NB_ ) -.TP 20 -.ti +4 -MB_Z -= DESCZ( MB_ ) -.TP 20 -.ti +4 -NB -= NB_A -.TP 20 -.ti +4 -NN -= MAX( N, NB, 2 ) -.TP 20 -.ti +4 -RSRC_A -= DESCA( RSRC_ ) -.TP 20 -.ti +4 -CSRC_A -= DESCA( CSRC_ ) -.TP 20 -.ti +4 -RSRC_Z -= DESCZ( RSRC_ ) -.TP 20 -.ti +4 -IROFFA -= MOD( IA-1, MB_A ) -.TP 20 -.ti +4 -ICOFFA -= MOD( JA-1, NB_A ) -.TP 20 -.ti +4 -IROFFZ -= MOD( IZ-1, MB_A ) -.TP 20 -.ti +4 -IAROW -= INDXG2P( 1, NB_A, MYROW, RSRC_A, NPROW ) -.TP 20 -.ti +4 -IACOL -= INDXG2P( 1, MB_A, MYCOL, CSRC_A, NPCOL ) -.TP 20 -.ti +4 -IZROW -= INDXG2P( 1, NB_A, MYROW, RSRC_Z, NPROW ) -.TP 20 -.ti +4 -NP0 -= NUMROC( N+IROFFA, NB_Z, MYROW, IAROW, NPROW ) -.TP 20 -.ti +4 -MQ0 -= NUMROC( N+ICOFFA, NB_Z, MYCOL, IACOL, NPCOL ) -.TP 20 -.ti +4 -IF( -( .NOT.WANTZ ) .OR. ( VALEIG .AND. ( .NOT.LQUERY ) ) ) -THEN -.TP 20 -.ti +4 -LWMIN -= 5*N + MAX( 5*NN, NB*( NP0+1 ) ) -.TP 20 -.ti +4 -NEIG -= 0 -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IF( -ALLEIG .OR. VALEIG ) THEN -.TP 20 -.ti +4 -NEIG -= N -.TP 20 -.ti +4 -ELSE -IF( INDEIG ) THEN -.TP 20 -.ti +4 -NEIG -= IU - IL + 1 -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -MQ0 -= NUMROC( MAX( NEIG, NB, 2 ), NB, MYCOL, IACOL, -NPCOL ) -.TP 20 -.ti +4 -LWMIN -= 5*N + MAX( 5*NN, NP0*MQ0+2*NB*NB ) + -ICEIL( NEIG, NPROW*NPCOL )*NN -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -INFO.EQ.0 ) THEN -.TP 20 -.ti +4 -IF( -MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN -.TP 20 -.ti +4 -WORK( -1 ) = ABSTOL -.TP 20 -.ti +4 -IF( -VALEIG ) THEN -.TP 20 -.ti +4 -WORK( -2 ) = VL -.TP 20 -.ti +4 -WORK( -3 ) = VU -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -WORK( -2 ) = ZERO -.TP 20 -.ti +4 -WORK( -3 ) = ZERO -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -SGEBS2D( DESCA( CTXT_ ), 'ALL', ' ', 3, 1, WORK, -3 ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -CALL -SGEBR2D( DESCA( CTXT_ ), 'ALL', ' ', 3, 1, WORK, 3, -0, 0 ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -.NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN -.TP 20 -.ti +4 -INFO -= -1 -.TP 20 -.ti +4 -ELSE -IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN -.TP 20 -.ti +4 -INFO -= -2 -.TP 20 -.ti +4 -ELSE -IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN -.TP 20 -.ti +4 -INFO -= -3 -.TP 20 -.ti +4 -ELSE -IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN -.TP 20 -.ti +4 -INFO -= -10 -.TP 20 -.ti +4 -ELSE -IF( INDEIG .AND. ( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) ) -THEN -.TP 20 -.ti +4 -INFO -= -11 -.TP 20 -.ti +4 -ELSE -IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) -THEN -.TP 20 -.ti +4 -INFO -= -12 -.TP 20 -.ti +4 -ELSE -IF( LWORK.LT.LWMIN .AND. LWORK.NE.-1 ) THEN -.TP 20 -.ti +4 -INFO -= -23 -.TP 20 -.ti +4 -ELSE -IF( LIWORK.LT.LIWMIN .AND. LIWORK.NE.-1 ) THEN -.TP 20 -.ti +4 -INFO -= -25 -.TP 20 -.ti +4 -ELSE -IF( VALEIG .AND. ( ABS( WORK( 2 )-VL ).GT.FIVE*EPS* -ABS( VL ) ) ) THEN -.TP 20 -.ti +4 -INFO -= -9 -.TP 20 -.ti +4 -ELSE -IF( VALEIG .AND. ( ABS( WORK( 3 )-VU ).GT.FIVE*EPS* -ABS( VU ) ) ) THEN -.TP 20 -.ti +4 -INFO -= -10 -.TP 20 -.ti +4 -ELSE -IF( ABS( WORK( 1 )-ABSTOL ).GT.FIVE*EPS*ABS( ABSTOL ) ) -THEN -.TP 20 -.ti +4 -INFO -= -13 -.TP 20 -.ti +4 -ELSE -IF( IROFFA.NE.IROFFZ ) THEN -.TP 20 -.ti +4 -INFO -= -19 -.TP 20 -.ti +4 -ELSE -IF( IROFFA.NE.0 ) THEN -.TP 20 -.ti +4 -INFO -= -6 -.TP 20 -.ti +4 -ELSE -IF( IAROW.NE.IZROW ) THEN -.TP 20 -.ti +4 -INFO -= -19 -.TP 20 -.ti +4 -ELSE -IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 800+NB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( M_ ).NE.DESCZ( M_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+M_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( N_ ).NE.DESCZ( N_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+N_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+MB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( NB_ ).NE.DESCZ( NB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+NB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( RSRC_ ).NE.DESCZ( RSRC_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+RSRC_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CSRC_ ).NE.DESCZ( CSRC_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+CSRC_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+CTXT_ ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -WANTZ ) THEN -.TP 20 -.ti +4 -IDUM1( -1 ) = ICHAR( 'V' ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -1 ) = ICHAR( 'N' ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -1 ) = 1 -.TP 20 -.ti +4 -IF( -LOWER ) THEN -.TP 20 -.ti +4 -IDUM1( -2 ) = ICHAR( 'L' ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -2 ) = ICHAR( 'U' ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -2 ) = 2 -.TP 20 -.ti +4 -IF( -ALLEIG ) THEN -.TP 20 -.ti +4 -IDUM1( -3 ) = ICHAR( 'A' ) -.TP 20 -.ti +4 -ELSE -IF( INDEIG ) THEN -.TP 20 -.ti +4 -IDUM1( -3 ) = ICHAR( 'I' ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -3 ) = ICHAR( 'V' ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -3 ) = 3 -.TP 20 -.ti +4 -IF( -LQUERY ) THEN -.TP 20 -.ti +4 -IDUM1( -4 ) = -1 -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -4 ) = 1 -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -4 ) = 4 -.TP 20 -.ti +4 -CALL -PCHK2MAT( N, 4, N, 4, IA, JA, DESCA, 8, N, 4, N, 4, IZ, -JZ, DESCZ, 21, 4, IDUM1, IDUM2, INFO ) -.TP 20 -.ti +4 -WORK( -1 ) = REAL( LWMIN ) -.TP 20 -.ti +4 -IWORK( -1 ) = LIWMIN -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -INFO.NE.0 ) THEN -.TP 20 -.ti +4 -CALL -PXERBLA( DESCA( CTXT_ ), 'PSSYEVX', -INFO ) -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -ELSE -IF( LQUERY ) THEN -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -QUICKRETURN ) THEN -.TP 20 -.ti +4 -IF( -WANTZ ) THEN -.TP 20 -.ti +4 -NZ -= 0 -.TP 20 -.ti +4 -ICLUSTR( -1 ) = 0 -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -M -= 0 -.TP 20 -.ti +4 -WORK( -1 ) = REAL( LWMIN ) -.TP 20 -.ti +4 -IWORK( -1 ) = LIWMIN -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -ABSTLL -= ABSTOL -.TP 20 -.ti +4 -ISCALE -= 0 -.TP 20 -.ti +4 -IF( -VALEIG ) THEN -.TP 20 -.ti +4 -VLL -= VL -.TP 20 -.ti +4 -VUU -= VU -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -VLL -= ZERO -.TP 20 -.ti +4 -VUU -= ZERO -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -ANRM -= PSLANSY( '1', UPLO, N, A, IA, JA, DESCA, WORK( INDWORK ) ) -.TP 20 -.ti +4 -IF( -ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN -.TP 20 -.ti +4 -ISCALE -= 1 -.TP 20 -.ti +4 -SIGMA -= RMIN / ANRM -.TP 20 -.ti +4 -ANRM -= ANRM*SIGMA -.TP 20 -.ti +4 -ELSE -IF( ANRM.GT.RMAX ) THEN -.TP 20 -.ti +4 -ISCALE -= 1 -.TP 20 -.ti +4 -SIGMA -= RMAX / ANRM -.TP 20 -.ti +4 -ANRM -= ANRM*SIGMA -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -ISCALE.EQ.1 ) THEN -.TP 20 -.ti +4 -CALL -PSLASCL( UPLO, ONE, SIGMA, N, N, A, IA, JA, DESCA, -IINFO ) -.TP 20 -.ti +4 -IF( -ABSTOL.GT.0 ) -ABSTLL = ABSTOL*SIGMA -.TP 20 -.ti +4 -IF( -VALEIG ) THEN -.TP 20 -.ti +4 -VLL -= VL*SIGMA -.TP 20 -.ti +4 -VUU -= VU*SIGMA -.TP 20 -.ti +4 -IF( -VUU.EQ.VLL ) THEN -.TP 20 -.ti +4 -VUU -= VUU + 2*MAX( ABS( VUU )*EPS, SAFMIN ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -LALLWORK -= LLWORK -.TP 20 -.ti +4 -CALL -PSSYTRD( UPLO, N, A, IA, JA, DESCA, WORK( INDD ), -WORK( INDE ), WORK( INDTAU ), WORK( INDWORK ), -LLWORK, IINFO ) -.TP 20 -.ti +4 -OFFSET -= 0 -.TP 20 -.ti +4 -IF( -IA.EQ.1 .AND. JA.EQ.1 .AND. RSRC_A.EQ.0 .AND. CSRC_A.EQ.0 ) -THEN -.TP 20 -.ti +4 -CALL -PSLARED1D( N, IA, JA, DESCA, WORK( INDD ), WORK( INDD2 ), -WORK( INDWORK ), LLWORK ) -.TP 20 -.ti +4 -CALL -PSLARED1D( N, IA, JA, DESCA, WORK( INDE ), WORK( INDE2 ), -WORK( INDWORK ), LLWORK ) -.TP 20 -.ti +4 -IF( -.NOT.LOWER ) -OFFSET = 1 -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -DO -10 I = 1, N -.TP 20 -.ti +4 -CALL -PSELGET( 'A', ' ', WORK( INDD2+I-1 ), A, I+IA-1, -I+JA-1, DESCA ) -.TP 20 -.ti +4 -10 -CONTINUE -.TP 20 -.ti +4 -IF( -LSAME( UPLO, 'U' ) ) THEN -.TP 20 -.ti +4 -DO -20 I = 1, N - 1 -.TP 20 -.ti +4 -CALL -PSELGET( 'A', ' ', WORK( INDE2+I-1 ), A, I+IA-1, -I+JA, DESCA ) -.TP 20 -.ti +4 -20 -CONTINUE -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -DO -30 I = 1, N - 1 -.TP 20 -.ti +4 -CALL -PSELGET( 'A', ' ', WORK( INDE2+I-1 ), A, I+IA, -I+JA-1, DESCA ) -.TP 20 -.ti +4 -30 -CONTINUE -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -WANTZ ) THEN -.TP 20 -.ti +4 -ORDER -= 'b' -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -ORDER -= 'e' -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -PSSTEBZ( DESCA( CTXT_ ), RANGE, ORDER, N, VLL, VUU, IL, IU, -ABSTLL, WORK( INDD2 ), WORK( INDE2+OFFSET ), M, -NSPLIT, W, IWORK( INDIBL ), IWORK( INDISP ), -WORK( INDWORK ), LLWORK, IWORK( 1 ), ISIZESTEBZ, -IINFO ) -.TP 20 -.ti +4 -IF( -IINFO.NE.0 ) THEN -.TP 20 -.ti +4 -INFO -= INFO + IERREBZ -.TP 20 -.ti +4 -DO -40 I = 1, M -.TP 20 -.ti +4 -IWORK( -INDIBL+I-1 ) = ABS( IWORK( INDIBL+I-1 ) ) -.TP 20 -.ti +4 -40 -CONTINUE -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -WANTZ ) THEN -.TP 20 -.ti +4 -IF( -VALEIG ) THEN -.TP 20 -.ti +4 -CALL -IGAMN2D( DESCA( CTXT_ ), 'A', ' ', 1, 1, LALLWORK, 1, -1, 1, -1, -1, -1 ) -.TP 20 -.ti +4 -MAXEIGS -= DESCZ( N_ ) -.TP 20 -.ti +4 -DO -50 NZ = MIN( MAXEIGS, M ), 0, -1 -.TP 20 -.ti +4 -MQ0 -= NUMROC( NZ, NB, 0, 0, NPCOL ) -.TP 20 -.ti +4 -SIZESTEIN -= ICEIL( NZ, NPROCS )*N + MAX( 5*N, NP0*MQ0 ) -.TP 20 -.ti +4 -SIZEORMTR -= MAX( ( NB*( NB-1 ) ) / 2, ( MQ0+NP0 )*NB ) + -NB*NB -.TP 20 -.ti +4 -SIZESYEVX -= MAX( SIZESTEIN, SIZEORMTR ) -.TP 20 -.ti +4 -IF( -SIZESYEVX.LE.LALLWORK ) -GO TO 60 -.TP 20 -.ti +4 -50 -CONTINUE -.TP 20 -.ti +4 -60 -CONTINUE -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -NZ -= M -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -NZ -= MAX( NZ, 0 ) -.TP 20 -.ti +4 -IF( -NZ.NE.M ) THEN -.TP 20 -.ti +4 -INFO -= INFO + IERRSPC -.TP 20 -.ti +4 -DO -70 I = 1, M -.TP 20 -.ti +4 -IFAIL( -I ) = 0 -.TP 20 -.ti +4 -70 -CONTINUE -.TP 20 -.ti +4 -IF( -NSPLIT.GT.1 ) THEN -.TP 20 -.ti +4 -CALL -SLASRT( 'I', M, W, IINFO ) -.TP 20 -.ti +4 -IF( -NZ.GT.0 ) THEN -.TP 20 -.ti +4 -VUU -= W( NZ ) - TEN*( EPS*ANRM+SAFMIN ) -.TP 20 -.ti +4 -IF( -VLL.GE.VUU ) THEN -.TP 20 -.ti +4 -NZZ -= 0 -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -CALL -PSSTEBZ( DESCA( CTXT_ ), RANGE, ORDER, N, -VLL, VUU, IL, IU, ABSTLL, -WORK( INDD2 ), WORK( INDE2+OFFSET ), -NZZ, NSPLIT, W, IWORK( INDIBL ), -IWORK( INDISP ), WORK( INDWORK ), -LLWORK, IWORK( 1 ), ISIZESTEBZ, -IINFO ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -MOD( INFO / IERREBZ, 1 ).EQ.0 ) THEN -.TP 20 -.ti +4 -IF( -NZZ.GT.NZ .OR. IINFO.NE.0 ) THEN -.TP 20 -.ti +4 -INFO -= INFO + IERREBZ -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -NZ -= MIN( NZ, NZZ ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -PSSTEIN( N, WORK( INDD2 ), WORK( INDE2+OFFSET ), NZ, W, -IWORK( INDIBL ), IWORK( INDISP ), ORFAC, Z, IZ, -JZ, DESCZ, WORK( INDWORK ), LALLWORK, -IWORK( 1 ), ISIZESTEIN, IFAIL, ICLUSTR, GAP, -IINFO ) -.TP 20 -.ti +4 -IF( -IINFO.GE.NZ+1 ) -INFO = INFO + IERRCLS -.TP 20 -.ti +4 -IF( -MOD( IINFO, NZ+1 ).NE.0 ) -INFO = INFO + IERREIN -.TP 20 -.ti +4 -IF( -NZ.GT.0 ) THEN -.TP 20 -.ti +4 -CALL -PSORMTR( 'L', UPLO, 'N', N, NZ, A, IA, JA, DESCA, -WORK( INDTAU ), Z, IZ, JZ, DESCZ, -WORK( INDWORK ), LLWORK, IINFO ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -ISCALE.EQ.1 ) THEN -.TP 20 -.ti +4 -CALL -SSCAL( M, ONE / SIGMA, W, 1 ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -WORK( -1 ) = REAL( LWMIN ) -.TP 20 -.ti +4 -IWORK( -1 ) = LIWMIN -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -END .SH PURPOSE +PSSYEVX computes selected eigenvalues and, optionally, eigenvectors +of a real symmetric matrix A by calling the recommended sequence +of ScaLAPACK routines. Eigenvalues/vectors can be selected by +specifying a range of values or a range of indices for the desired +eigenvalues. +.SH NOTES +Each global data object is described by an associated description +vector. This vector stores the information required to establish +the mapping between an object element and its corresponding process +and memory location. + +Let A be a generic term for any 2D block cyclicly distributed array. +Such a global array has an associated description vector DESCA. +In the following comments, the character _ should be read as +"of the global array". + +NOTATION STORED IN EXPLANATION +.br +--------------- -------------- -------------------------------------- +.br +DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, + DTYPE_A = 1. +.br +CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating + the BLACS process grid A is distribu- + ted over. The context itself is glo- + bal, but the handle (the integer + value) may vary. +.br +M_A (global) DESCA( M_ ) The number of rows in the global + array A. +.br +N_A (global) DESCA( N_ ) The number of columns in the global + array A. +.br +MB_A (global) DESCA( MB_ ) The blocking factor used to distribute + the rows of the array. +.br +NB_A (global) DESCA( NB_ ) The blocking factor used to distribute + the columns of the array. +.br +RSRC_A (global) DESCA( RSRC_ ) The process row over which the first + row of the array A is distributed. +.br +CSRC_A (global) DESCA( CSRC_ ) The process column over which the + first column of the array A is + distributed. +.br +LLD_A (local) DESCA( LLD_ ) The leading dimension of the local + array. LLD_A >= MAX(1,LOCr(M_A)). + +Let K be the number of rows or columns of a distributed matrix, +and assume that its process grid has dimension p x q. +LOCr( K ) denotes the number of elements of K that a process +would receive if K were distributed over the p processes of its +process column. +.br +Similarly, LOCc( K ) denotes the number of elements of K that a +process would receive if K were distributed over the q processes of +its process row. +.br +The values of LOCr() and LOCc() may be determined via a call to the +ScaLAPACK tool function, NUMROC: +.br + LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), +.br + LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). +.br +An upper bound for these quantities may be computed by: +.br + LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A +.br + LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A + +PSSYEVX assumes IEEE 754 standard compliant arithmetic. To port +to a system which does not have IEEE 754 arithmetic, modify +the appropriate SLmake.inc file to include the compiler switch +-DNO_IEEE. This switch only affects the compilation of pslaiect.c. +.SH ARGUMENTS + NP = the number of rows local to a given process. + NQ = the number of columns local to a given process. + +.tp 8 +JOBZ (global input) CHARACTER*1 + Specifies whether or not to compute the eigenvectors: + = 'N': Compute eigenvalues only. + = 'V': Compute eigenvalues and eigenvectors. + +.tp 8 +RANGE (global input) CHARACTER*1 + = 'A': all eigenvalues will be found. + = 'V': all eigenvalues in the interval [VL,VU] will be found. + = 'I': the IL-th through IU-th eigenvalues will be found. + +.tp 8 +UPLO (global input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + symmetric matrix A is stored: + = 'U': Upper triangular + = 'L': Lower triangular + +.tp 8 +N (global input) INTEGER + The number of rows and columns of the matrix A. N >= 0. + +.tp 8 +A (local input/workspace) block cyclic REAL array, + global dimension (N, N), + local dimension ( LLD_A, LOCc(JA+N-1) ) + + On entry, the symmetric matrix A. If UPLO = 'U', only the + upper triangular part of A is used to define the elements of + the symmetric matrix. If UPLO = 'L', only the lower + triangular part of A is used to define the elements of the + symmetric matrix. + + On exit, the lower triangle (if UPLO='L') or the upper + triangle (if UPLO='U') of A, including the diagonal, is + destroyed. + +.tp 8 +IA (global input) INTEGER + A's global row index, which points to the beginning of the + submatrix which is to be operated on. + +.tp 8 +JA (global input) INTEGER + A's global column index, which points to the beginning of + the submatrix which is to be operated on. + +.tp 8 +DESCA (global and local input) INTEGER array of dimension DLEN_. + The array descriptor for the distributed matrix A. + If DESCA( CTXT_ ) is incorrect, PSSYEVX cannot guarantee + correct error reporting. + +.tp 8 +VL (global input) REAL + If RANGE='V', the lower bound of the interval to be searched + for eigenvalues. Not referenced if RANGE = 'A' or 'I'. + +.tp 8 +VU (global input) REAL + If RANGE='V', the upper bound of the interval to be searched + for eigenvalues. Not referenced if RANGE = 'A' or 'I'. + +.tp 8 +IL (global input) INTEGER + If RANGE='I', the index (from smallest to largest) of the + smallest eigenvalue to be returned. IL >= 1. + Not referenced if RANGE = 'A' or 'V'. + +.tp 8 +IU (global input) INTEGER + If RANGE='I', the index (from smallest to largest) of the + largest eigenvalue to be returned. min(IL,N) <= IU <= N. + Not referenced if RANGE = 'A' or 'V'. + +.tp 8 +ABSTOL (global input) REAL + If JOBZ='V', setting ABSTOL to PSLAMCH( CONTEXT, 'U') yields + the most orthogonal eigenvectors. + + The absolute error tolerance for the eigenvalues. + An approximate eigenvalue is accepted as converged + when it is determined to lie in an interval [a,b] + of width less than or equal to + + ABSTOL + EPS * max( |a|,|b| ) , + + where EPS is the machine precision. If ABSTOL is less than + or equal to zero, then EPS*norm(T) will be used in its place, + where norm(T) is the 1-norm of the tridiagonal matrix + obtained by reducing A to tridiagonal form. + + Eigenvalues will be computed most accurately when ABSTOL is + set to twice the underflow threshold 2*PSLAMCH('S') not zero. + If this routine returns with ((MOD(INFO,2).NE.0) .OR. + (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or + eigenvectors did not converge, try setting ABSTOL to + 2*PSLAMCH('S'). + + See "Computing Small Singular Values of Bidiagonal Matrices + with Guaranteed High Relative Accuracy," by Demmel and + Kahan, LAPACK Working Note #3. + + See "On the correctness of Parallel Bisection in Floating + Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70 + +.tp 8 +M (global output) INTEGER + Total number of eigenvalues found. 0 <= M <= N. + +.tp 8 +NZ (global output) INTEGER + Total number of eigenvectors computed. 0 <= NZ <= M. + The number of columns of Z that are filled. + If JOBZ .NE. 'V', NZ is not referenced. + If JOBZ .EQ. 'V', NZ = M unless the user supplies + insufficient space and PSSYEVX is not able to detect this + before beginning computation. To get all the eigenvectors + requested, the user must supply both sufficient + space to hold the eigenvectors in Z (M .LE. DESCZ(N_)) + and sufficient workspace to compute them. (See LWORK below.) + PSSYEVX is always able to detect insufficient space without + computation unless RANGE .EQ. 'V'. + +.tp 8 +W (global output) REAL array, dimension (N) + On normal exit, the first M entries contain the selected + eigenvalues in ascending order. + +.tp 8 +ORFAC (global input) REAL + Specifies which eigenvectors should be reorthogonalized. + Eigenvectors that correspond to eigenvalues which are within + tol=ORFAC*norm(A) of each other are to be reorthogonalized. + However, if the workspace is insufficient (see LWORK), + tol may be decreased until all eigenvectors to be + reorthogonalized can be stored in one process. + No reorthogonalization will be done if ORFAC equals zero. + A default value of 10^-3 is used if ORFAC is negative. + ORFAC should be identical on all processes. + +.tp 8 +Z (local output) REAL array, + global dimension (N, N), + local dimension ( LLD_Z, LOCc(JZ+N-1) ) + If JOBZ = 'V', then on normal exit the first M columns of Z + contain the orthonormal eigenvectors of the matrix + corresponding to the selected eigenvalues. If an eigenvector + fails to converge, then that column of Z contains the latest + approximation to the eigenvector, and the index of the + eigenvector is returned in IFAIL. + If JOBZ = 'N', then Z is not referenced. + +.tp 8 +IZ (global input) INTEGER + Z's global row index, which points to the beginning of the + submatrix which is to be operated on. + +.tp 8 +JZ (global input) INTEGER + Z's global column index, which points to the beginning of + the submatrix which is to be operated on. + +.tp 8 +DESCZ (global and local input) INTEGER array of dimension DLEN_. + The array descriptor for the distributed matrix Z. + DESCZ( CTXT_ ) must equal DESCA( CTXT_ ) + +.tp 8 +WORK (local workspace/output) REAL array, + dimension (LWORK) + On return, WROK(1) contains the optimal amount of + workspace required for efficient execution. + if JOBZ='N' WORK(1) = optimal amount of workspace + required to compute eigenvalues efficiently + if JOBZ='V' WORK(1) = optimal amount of workspace + required to compute eigenvalues and eigenvectors + efficiently with no guarantee on orthogonality. + If RANGE='V', it is assumed that all eigenvectors + may be required. + +.tp 8 +LWORK (local input) INTEGER + Size of WORK + See below for definitions of variables used to define LWORK. + If no eigenvectors are requested (JOBZ = 'N') then + LWORK >= 5 * N + MAX( 5 * NN, NB * ( NP0 + 1 ) ) + If eigenvectors are requested (JOBZ = 'V' ) then + the amount of workspace required to guarantee that all + eigenvectors are computed is: + LWORK >= 5*N + MAX( 5*NN, NP0 * MQ0 + 2 * NB * NB ) + + ICEIL( NEIG, NPROW*NPCOL)*NN + + The computed eigenvectors may not be orthogonal if the + minimal workspace is supplied and ORFAC is too small. + If you want to guarantee orthogonality (at the cost + of potentially poor performance) you should add + the following to LWORK: + (CLUSTERSIZE-1)*N + where CLUSTERSIZE is the number of eigenvalues in the + largest cluster, where a cluster is defined as a set of + close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) | + W(J+1) <= W(J) + ORFAC*2*norm(A) } + Variable definitions: + NEIG = number of eigenvectors requested + NB = DESCA( MB_ ) = DESCA( NB_ ) = + DESCZ( MB_ ) = DESCZ( NB_ ) + NN = MAX( N, NB, 2 ) + DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) = + DESCZ( CSRC_ ) = 0 + NP0 = NUMROC( NN, NB, 0, 0, NPROW ) + MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) + ICEIL( X, Y ) is a ScaLAPACK function returning + ceiling(X/Y) + + When LWORK is too small: + If LWORK is too small to guarantee orthogonality, + PSSYEVX attempts to maintain orthogonality in + the clusters with the smallest + spacing between the eigenvalues. + If LWORK is too small to compute all the eigenvectors + requested, no computation is performed and INFO=-23 + is returned. Note that when RANGE='V', PSSYEVX does + not know how many eigenvectors are requested until + the eigenvalues are computed. Therefore, when RANGE='V' + and as long as LWORK is large enough to allow PSSYEVX to + compute the eigenvalues, PSSYEVX will compute the + eigenvalues and as many eigenvectors as it can. + + Relationship between workspace, orthogonality & performance: + Greater performance can be achieved if adequate workspace + is provided. On the other hand, in some situations, + performance can decrease as the workspace provided + increases above the workspace amount shown below: + + For optimal performance, greater workspace may be + needed, i.e. + LWORK >= MAX( LWORK, 5*N + NSYTRD_LWOPT ) + Where: + LWORK, as defined previously, depends upon the number + of eigenvectors requested, and + NSYTRD_LWOPT = N + 2*( ANB+1 )*( 4*NPS+2 ) + + ( NPS + 3 ) * NPS + + ANB = PJLAENV( DESCA( CTXT_), 3, 'PSSYTTRD', 'L', + 0, 0, 0, 0) + SQNPC = INT( SQRT( DBLE( NPROW * NPCOL ) ) ) + NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) + + NUMROC is a ScaLAPACK tool functions; + PJLAENV is a ScaLAPACK envionmental inquiry function + MYROW, MYCOL, NPROW and NPCOL can be determined by + calling the subroutine BLACS_GRIDINFO. + + For large N, no extra workspace is needed, however the + biggest boost in performance comes for small N, so it + is wise to provide the extra workspace (typically less + than a Megabyte per process). + + If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing + enough space to compute all the eigenvectors + orthogonally will cause serious degradation in + performance. In the limit (i.e. CLUSTERSIZE = N-1) + PSSTEIN will perform no better than SSTEIN on 1 + processor. + For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing + all eigenvectors will increase the total execution time + by a factor of 2 or more. + For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will + grow as the square of the cluster size, all other factors + remaining equal and assuming enough workspace. Less + workspace means less reorthogonalization but faster + execution. + + If LWORK = -1, then LWORK is global input and a workspace + query is assumed; the routine only calculates the size + required for optimal performance for all work arrays. Each of + these values is returned in the first entry of the + corresponding work arrays, and no error message is issued by + PXERBLA. + +.tp 8 +IWORK (local workspace) INTEGER array + On return, IWORK(1) contains the amount of integer workspace + required. + +.tp 8 +LIWORK (local input) INTEGER + size of IWORK + LIWORK >= 6 * NNP + Where: + NNP = MAX( N, NPROW*NPCOL + 1, 4 ) + If LIWORK = -1, then LIWORK is global input and a workspace + query is assumed; the routine only calculates the minimum + and optimal size for all work arrays. Each of these + values is returned in the first entry of the corresponding + work array, and no error message is issued by PXERBLA. + +.tp 8 +IFAIL (global output) INTEGER array, dimension (N) + If JOBZ = 'V', then on normal exit, the first M elements of + IFAIL are zero. If (MOD(INFO,2).NE.0) on exit, then + IFAIL contains the + indices of the eigenvectors that failed to converge. + If JOBZ = 'N', then IFAIL is not referenced. + +.tp 8 +ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL) + This array contains indices of eigenvectors corresponding to + a cluster of eigenvalues that could not be reorthogonalized + due to insufficient workspace (see LWORK, ORFAC and INFO). + Eigenvectors corresponding to clusters of eigenvalues indexed + ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be + reorthogonalized due to lack of workspace. Hence the + eigenvectors corresponding to these clusters may not be + orthogonal. ICLUSTR() is a zero terminated array. + (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if + K is the number of clusters + ICLUSTR is not referenced if JOBZ = 'N' + +.tp 8 +GAP (global output) REAL array, + dimension (NPROW*NPCOL) + This array contains the gap between eigenvalues whose + eigenvectors could not be reorthogonalized. The output + values in this array correspond to the clusters indicated + by the array ICLUSTR. As a result, the dot product between + eigenvectors correspoding to the I^th cluster may be as high + as ( C * n ) / GAP(I) where C is a small constant. + +.tp 8 +INFO (global output) INTEGER + = 0: successful exit + < 0: If the i-th argument is an array and the j-entry had + an illegal value, then INFO = -(i*100+j), if the i-th + argument is a scalar and had an illegal value, then + INFO = -i. + > 0: if (MOD(INFO,2).NE.0), then one or more eigenvectors + failed to converge. Their indices are stored + in IFAIL. Ensure ABSTOL=2.0*PSLAMCH( 'U' ) + Send e-mail to scalapack@cs.utk.edu + if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding + to one or more clusters of eigenvalues could not be + reorthogonalized because of insufficient workspace. + The indices of the clusters are stored in the array + ICLUSTR. + if (MOD(INFO/4,2).NE.0), then space limit prevented + PSSYEVX from computing all of the eigenvectors + between VL and VU. The number of eigenvectors + computed is returned in NZ. + if (MOD(INFO/8,2).NE.0), then PSSTEBZ failed to compute + eigenvalues. Ensure ABSTOL=2.0*PSLAMCH( 'U' ) + Send e-mail to scalapack@cs.utk.edu --- scalapack-doc-1.5.orig/man/manl/pssygvx.l +++ scalapack-doc-1.5/man/manl/pssygvx.l @@ -1,6 +1,7 @@ .TH PSSYGVX l "12 May 1997" "LAPACK version 1.5" "LAPACK routine (version 1.5)" .SH NAME - +PSSYGVX - compute all the eigenvalues, and optionally, +the eigenvectors of a real generalized SY-definite eigenproblem .SH SYNOPSIS .TP 20 SUBROUTINE PSSYGVX( @@ -115,796 +116,484 @@ .ti +4 INTRINSIC ABS, ICHAR, MAX, MIN, MOD, REAL -.TP 20 -.ti +4 -IF( -BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* -RSRC_.LT.0 )RETURN -.TP 20 -.ti +4 -ICTXT -= DESCA( CTXT_ ) -.TP 20 -.ti +4 -CALL -BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) -.TP 20 -.ti +4 -INFO -= 0 -.TP 20 -.ti +4 -IF( -NPROW.EQ.-1 ) THEN -.TP 20 -.ti +4 -INFO -= -( 900+CTXT_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CTXT_ ).NE.DESCB( CTXT_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+CTXT_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2600+CTXT_ ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -EPS -= PSLAMCH( DESCA( CTXT_ ), 'Precision' ) -.TP 20 -.ti +4 -WANTZ -= LSAME( JOBZ, 'V' ) -.TP 20 -.ti +4 -UPPER -= LSAME( UPLO, 'U' ) -.TP 20 -.ti +4 -ALLEIG -= LSAME( RANGE, 'A' ) -.TP 20 -.ti +4 -VALEIG -= LSAME( RANGE, 'V' ) -.TP 20 -.ti +4 -INDEIG -= LSAME( RANGE, 'I' ) -.TP 20 -.ti +4 -CALL -CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 9, INFO ) -.TP 20 -.ti +4 -CALL -CHK1MAT( N, 4, N, 4, IB, JB, DESCB, 13, INFO ) -.TP 20 -.ti +4 -CALL -CHK1MAT( N, 4, N, 4, IZ, JZ, DESCZ, 26, INFO ) -.TP 20 -.ti +4 -IF( -INFO.EQ.0 ) THEN -.TP 20 -.ti +4 -IF( -MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN -.TP 20 -.ti +4 -WORK( -1 ) = ABSTOL -.TP 20 -.ti +4 -IF( -VALEIG ) THEN -.TP 20 -.ti +4 -WORK( -2 ) = VL -.TP 20 -.ti +4 -WORK( -3 ) = VU -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -WORK( -2 ) = ZERO -.TP 20 -.ti +4 -WORK( -3 ) = ZERO -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -SGEBS2D( DESCA( CTXT_ ), 'ALL', ' ', 3, 1, WORK, -3 ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -CALL -SGEBR2D( DESCA( CTXT_ ), 'ALL', ' ', 3, 1, WORK, 3, -0, 0 ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IAROW -= INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), -NPROW ) -.TP 20 -.ti +4 -IBROW -= INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), -NPROW ) -.TP 20 -.ti +4 -IACOL -= INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), -NPCOL ) -.TP 20 -.ti +4 -IBCOL -= INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), -NPCOL ) -.TP 20 -.ti +4 -IROFFA -= MOD( IA-1, DESCA( MB_ ) ) -.TP 20 -.ti +4 -ICOFFA -= MOD( JA-1, DESCA( NB_ ) ) -.TP 20 -.ti +4 -IROFFB -= MOD( IB-1, DESCB( MB_ ) ) -.TP 20 -.ti +4 -ICOFFB -= MOD( JB-1, DESCB( NB_ ) ) -.TP 20 -.ti +4 -LQUERY -= .FALSE. -.TP 20 -.ti +4 -IF( -LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) -LQUERY = .TRUE. -.TP 20 -.ti +4 -LIWMIN -= 6*MAX( N, ( NPROW*NPCOL )+1, 4 ) -.TP 20 -.ti +4 -NB -= DESCA( MB_ ) -.TP 20 -.ti +4 -NN -= MAX( N, NB, 2 ) -.TP 20 -.ti +4 -NP0 -= NUMROC( NN, NB, 0, 0, NPROW ) -.TP 20 -.ti +4 -IF( -( .NOT.WANTZ ) .OR. ( VALEIG .AND. ( .NOT.LQUERY ) ) ) -THEN -.TP 20 -.ti +4 -LWMIN -= 5*N + MAX( 5*NN, NB*( NP0+1 ) ) -.TP 20 -.ti +4 -NEIG -= 0 -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IF( -ALLEIG .OR. VALEIG ) THEN -.TP 20 -.ti +4 -NEIG -= N -.TP 20 -.ti +4 -ELSE -IF( INDEIG ) THEN -.TP 20 -.ti +4 -NEIG -= IU - IL + 1 -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -MQ0 -= NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) -.TP 20 -.ti +4 -LWMIN -= 5*N + MAX( 5*NN, NP0*MQ0+2*NB*NB ) + -ICEIL( NEIG, NPROW*NPCOL )*NN -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -IBTYPE.LT.1 .OR. IBTYPE.GT.3 ) THEN -.TP 20 -.ti +4 -INFO -= -1 -.TP 20 -.ti +4 -ELSE -IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN -.TP 20 -.ti +4 -INFO -= -2 -.TP 20 -.ti +4 -ELSE -IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN -.TP 20 -.ti +4 -INFO -= -3 -.TP 20 -.ti +4 -ELSE -IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN -.TP 20 -.ti +4 -INFO -= -4 -.TP 20 -.ti +4 -ELSE -IF( N.LT.0 ) THEN -.TP 20 -.ti +4 -INFO -= -5 -.TP 20 -.ti +4 -ELSE -IF( IROFFA.NE.0 ) THEN -.TP 20 -.ti +4 -INFO -= -7 -.TP 20 -.ti +4 -ELSE -IF( ICOFFA.NE.0 ) THEN -.TP 20 -.ti +4 -INFO -= -8 -.TP 20 -.ti +4 -ELSE -IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 900+NB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( M_ ).NE.DESCB( M_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+M_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( N_ ).NE.DESCB( N_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+N_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+MB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( NB_ ).NE.DESCB( NB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+NB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( RSRC_ ).NE.DESCB( RSRC_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+RSRC_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CSRC_ ).NE.DESCB( CSRC_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+CSRC_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CTXT_ ).NE.DESCB( CTXT_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+CTXT_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( M_ ).NE.DESCZ( M_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2200+M_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( N_ ).NE.DESCZ( N_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2200+N_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2200+MB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( NB_ ).NE.DESCZ( NB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2200+NB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( RSRC_ ).NE.DESCZ( RSRC_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2200+RSRC_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CSRC_ ).NE.DESCZ( CSRC_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2200+CSRC_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2200+CTXT_ ) -.TP 20 -.ti +4 -ELSE -IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN -.TP 20 -.ti +4 -INFO -= -11 -.TP 20 -.ti +4 -ELSE -IF( ICOFFB.NE.0 .OR. IBCOL.NE.IACOL ) THEN -.TP 20 -.ti +4 -INFO -= -12 -.TP 20 -.ti +4 -ELSE -IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN -.TP 20 -.ti +4 -INFO -= -15 -.TP 20 -.ti +4 -ELSE -IF( INDEIG .AND. ( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) ) -THEN -.TP 20 -.ti +4 -INFO -= -16 -.TP 20 -.ti +4 -ELSE -IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) -THEN -.TP 20 -.ti +4 -INFO -= -17 -.TP 20 -.ti +4 -ELSE -IF( VALEIG .AND. ( ABS( WORK( 2 )-VL ).GT.FIVE*EPS* -ABS( VL ) ) ) THEN -.TP 20 -.ti +4 -INFO -= -14 -.TP 20 -.ti +4 -ELSE -IF( VALEIG .AND. ( ABS( WORK( 3 )-VU ).GT.FIVE*EPS* -ABS( VU ) ) ) THEN -.TP 20 -.ti +4 -INFO -= -15 -.TP 20 -.ti +4 -ELSE -IF( ABS( WORK( 1 )-ABSTOL ).GT.FIVE*EPS*ABS( ABSTOL ) ) -THEN -.TP 20 -.ti +4 -INFO -= -18 -.TP 20 -.ti +4 -ELSE -IF( LWORK.LT.LWMIN .AND. LWORK.NE.-1 ) THEN -.TP 20 -.ti +4 -INFO -= -28 -.TP 20 -.ti +4 -ELSE -IF( LIWORK.LT.LIWMIN .AND. LIWORK.NE.-1 ) THEN -.TP 20 -.ti +4 -INFO -= -30 -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM1( -1 ) = IBTYPE -.TP 20 -.ti +4 -IDUM2( -1 ) = 1 -.TP 20 -.ti +4 -IF( -WANTZ ) THEN -.TP 20 -.ti +4 -IDUM1( -2 ) = ICHAR( 'V' ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -2 ) = ICHAR( 'N' ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -2 ) = 2 -.TP 20 -.ti +4 -IF( -UPPER ) THEN -.TP 20 -.ti +4 -IDUM1( -3 ) = ICHAR( 'U' ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -3 ) = ICHAR( 'L' ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -3 ) = 3 -.TP 20 -.ti +4 -IF( -ALLEIG ) THEN -.TP 20 -.ti +4 -IDUM1( -4 ) = ICHAR( 'A' ) -.TP 20 -.ti +4 -ELSE -IF( INDEIG ) THEN -.TP 20 -.ti +4 -IDUM1( -4 ) = ICHAR( 'I' ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -4 ) = ICHAR( 'V' ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -4 ) = 4 -.TP 20 -.ti +4 -IF( -LQUERY ) THEN -.TP 20 -.ti +4 -IDUM1( -5 ) = -1 -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -5 ) = 1 -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -5 ) = 5 -.TP 20 -.ti +4 -CALL -PCHK2MAT( N, 4, N, 4, IA, JA, DESCA, 9, N, 4, N, 4, IB, -JB, DESCB, 13, 5, IDUM1, IDUM2, INFO ) -.TP 20 -.ti +4 -CALL -PCHK1MAT( N, 4, N, 4, IZ, JZ, DESCZ, 26, 0, IDUM1, IDUM2, -INFO ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -WORK( -1 ) = REAL( LWMIN ) -.TP 20 -.ti +4 -IWORK( -1 ) = LIWMIN -.TP 20 -.ti +4 -IF( -INFO.NE.0 ) THEN -.TP 20 -.ti +4 -CALL -PXERBLA( ICTXT, 'PSSYGVX ', -INFO ) -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -ELSE -IF( LQUERY ) THEN -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -PSPOTRF( UPLO, N, B, IB, JB, DESCB, INFO ) -.TP 20 -.ti +4 -IF( -INFO.NE.0 ) THEN -.TP 20 -.ti +4 -IFAIL( -1 ) = INFO -.TP 20 -.ti +4 -INFO -= IERRNPD -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -PSSYGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, -DESCB, SCALE, INFO ) -.TP 20 -.ti +4 -CALL -PSSYEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, VU, IL, -IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ, -WORK, LWORK, IWORK, LIWORK, IFAIL, ICLUSTR, GAP, -INFO ) -.TP 20 -.ti +4 -IF( -WANTZ ) THEN -.TP 20 -.ti +4 -NEIG -= M -.TP 20 -.ti +4 -IF( -IBTYPE.EQ.1 .OR. IBTYPE.EQ.2 ) THEN -.TP 20 -.ti +4 -IF( -UPPER ) THEN -.TP 20 -.ti +4 -TRANS -= 'N' -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -TRANS -= 'T' -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -PSTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, -B, IB, JB, DESCB, Z, IZ, JZ, DESCZ ) -.TP 20 -.ti +4 -ELSE -IF( IBTYPE.EQ.3 ) THEN -.TP 20 -.ti +4 -IF( -UPPER ) THEN -.TP 20 -.ti +4 -TRANS -= 'T' -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -TRANS -= 'N' -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -PSTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, -B, IB, JB, DESCB, Z, IZ, JZ, DESCZ ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -SCALE.NE.ONE ) THEN -.TP 20 -.ti +4 -CALL -SSCAL( N, SCALE, W, 1 ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -END .SH PURPOSE +PSSYGVX computes all the eigenvalues, and optionally, +the eigenvectors +of a real generalized SY-definite eigenproblem, of the form +sub( A )*x=(lambda)*sub( B )*x, sub( A )*sub( B )x=(lambda)*x, or +sub( B )*sub( A )*x=(lambda)*x. +Here sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ) is assumed to be +SY, and sub( B ) denoting B( IB:IB+N-1, JB:JB+N-1 ) is assumed +to be symmetric positive definite. +.SH NOTES +Each global data object is described by an associated description +vector. This vector stores the information required to establish +the mapping between an object element and its corresponding process +and memory location. + +Let A be a generic term for any 2D block cyclicly distributed array. +Such a global array has an associated description vector DESCA. +In the following comments, the character _ should be read as +"of the global array". + +NOTATION STORED IN EXPLANATION +.br +--------------- -------------- -------------------------------------- +.br +DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, + DTYPE_A = 1. +.br +CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating + the BLACS process grid A is distribu- + ted over. The context itself is glo- + bal, but the handle (the integer + value) may vary. +.br +M_A (global) DESCA( M_ ) The number of rows in the global + array A. +.br +N_A (global) DESCA( N_ ) The number of columns in the global + array A. +.br +MB_A (global) DESCA( MB_ ) The blocking factor used to distribute + the rows of the array. +.br +NB_A (global) DESCA( NB_ ) The blocking factor used to distribute + the columns of the array. +.br +RSRC_A (global) DESCA( RSRC_ ) The process row over which the first + row of the array A is distributed. +.br +CSRC_A (global) DESCA( CSRC_ ) The process column over which the + first column of the array A is + distributed. +.br +LLD_A (local) DESCA( LLD_ ) The leading dimension of the local + array. LLD_A >= MAX(1,LOCr(M_A)). + +Let K be the number of rows or columns of a distributed matrix, +and assume that its process grid has dimension p x q. +LOCr( K ) denotes the number of elements of K that a process +would receive if K were distributed over the p processes of its +process column. +.br +Similarly, LOCc( K ) denotes the number of elements of K that a +process would receive if K were distributed over the q processes of +its process row. +.br +The values of LOCr() and LOCc() may be determined via a call to the +ScaLAPACK tool function, NUMROC: +.br + LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), +.br + LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). +.br +An upper bound for these quantities may be computed by: +.br + LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A +.br + LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A + +.SH ARGUMENTS + +.tp 8 +IBTYPE (global input) INTEGER + Specifies the problem type to be solved: + = 1: sub( A )*x = (lambda)*sub( B )*x + = 2: sub( A )*sub( B )*x = (lambda)*x + = 3: sub( B )*sub( A )*x = (lambda)*x + +.tp 8 +JOBZ (global input) CHARACTER*1 + = 'N': Compute eigenvalues only; + = 'V': Compute eigenvalues and eigenvectors. + +.tp 8 +RANGE (global input) CHARACTER*1 + = 'A': all eigenvalues will be found. + = 'V': all eigenvalues in the interval [VL,VU] will be found. + = 'I': the IL-th through IU-th eigenvalues will be found. + +.tp 8 +UPLO (global input) CHARACTER*1 + = 'U': Upper triangles of sub( A ) and sub( B ) are stored; + = 'L': Lower triangles of sub( A ) and sub( B ) are stored. + +.tp 8 +N (global input) INTEGER + The order of the matrices sub( A ) and sub( B ). N >= 0. + +.tp 8 +A (local input/local output) REAL pointer into the + local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). + On entry, this array contains the local pieces of the + N-by-N symmetric distributed matrix sub( A ). If UPLO = 'U', + the leading N-by-N upper triangular part of sub( A ) contains + the upper triangular part of the matrix. If UPLO = 'L', the + leading N-by-N lower triangular part of sub( A ) contains + the lower triangular part of the matrix. + + On exit, if JOBZ = 'V', then if INFO = 0, sub( A ) contains + the distributed matrix Z of eigenvectors. The eigenvectors + are normalized as follows: + if IBTYPE = 1 or 2, Z**T*sub( B )*Z = I; + if IBTYPE = 3, Z**T*inv( sub( B ) )*Z = I. + If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') + or the lower triangle (if UPLO='L') of sub( A ), including + the diagonal, is destroyed. + +.tp 8 +IA (global input) INTEGER + The row index in the global array A indicating the first + row of sub( A ). + +.tp 8 +JA (global input) INTEGER + The column index in the global array A indicating the + first column of sub( A ). + +.tp 8 +DESCA (global and local input) INTEGER array of dimension DLEN_. + The array descriptor for the distributed matrix A. + If DESCA( CTXT_ ) is incorrect, PSSYGVX cannot guarantee + correct error reporting. + +.tp 8 +B (local input/local output) REAL pointer into the + local memory to an array of dimension (LLD_B, LOCc(JB+N-1)). + On entry, this array contains the local pieces of the + N-by-N symmetric distributed matrix sub( B ). If UPLO = 'U', + the leading N-by-N upper triangular part of sub( B ) contains + the upper triangular part of the matrix. If UPLO = 'L', the + leading N-by-N lower triangular part of sub( B ) contains + the lower triangular part of the matrix. + +.tp 8 + On exit, if INFO <= N, the part of sub( B ) containing the + matrix is overwritten by the triangular factor U or L from + the Cholesky factorization sub( B ) = U**T*U or + sub( B ) = L*L**T. + +.tp 8 +IB (global input) INTEGER + The row index in the global array B indicating the first + row of sub( B ). + +.tp 8 +JB (global input) INTEGER + The column index in the global array B indicating the + first column of sub( B ). + +.tp 8 +DESCB (global and local input) INTEGER array of dimension DLEN_. + The array descriptor for the distributed matrix B. + DESCB( CTXT_ ) must equal DESCA( CTXT_ ) + +.tp 8 +VL (global input) REAL + If RANGE='V', the lower bound of the interval to be searched + for eigenvalues. Not referenced if RANGE = 'A' or 'I'. + +.tp 8 +VU (global input) REAL + If RANGE='V', the upper bound of the interval to be searched + for eigenvalues. Not referenced if RANGE = 'A' or 'I'. + +.tp 8 +IL (global input) INTEGER + If RANGE='I', the index (from smallest to largest) of the + smallest eigenvalue to be returned. IL >= 1. + Not referenced if RANGE = 'A' or 'V'. + +.tp 8 +IU (global input) INTEGER + If RANGE='I', the index (from smallest to largest) of the + largest eigenvalue to be returned. min(IL,N) <= IU <= N. + Not referenced if RANGE = 'A' or 'V'. + +.tp 8 +ABSTOL (global input) REAL + If JOBZ='V', setting ABSTOL to PSLAMCH( CONTEXT, 'U') yields + the most orthogonal eigenvectors. + + The absolute error tolerance for the eigenvalues. + An approximate eigenvalue is accepted as converged + when it is determined to lie in an interval [a,b] + of width less than or equal to + + ABSTOL + EPS * max( |a|,|b| ) , + + where EPS is the machine precision. If ABSTOL is less than + or equal to zero, then EPS*norm(T) will be used in its place, + where norm(T) is the 1-norm of the tridiagonal matrix + obtained by reducing A to tridiagonal form. + + Eigenvalues will be computed most accurately when ABSTOL is + set to twice the underflow threshold 2*PSLAMCH('S') not zero. + If this routine returns with ((MOD(INFO,2).NE.0) .OR. + (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or + eigenvectors did not converge, try setting ABSTOL to + 2*PSLAMCH('S'). + + See "Computing Small Singular Values of Bidiagonal Matrices + with Guaranteed High Relative Accuracy," by Demmel and + Kahan, LAPACK Working Note #3. + + See "On the correctness of Parallel Bisection in Floating + Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70 + +.tp 8 +M (global output) INTEGER + Total number of eigenvalues found. 0 <= M <= N. + +.tp 8 +NZ (global output) INTEGER + Total number of eigenvectors computed. 0 <= NZ <= M. + The number of columns of Z that are filled. + If JOBZ .NE. 'V', NZ is not referenced. + If JOBZ .EQ. 'V', NZ = M unless the user supplies + insufficient space and PSSYGVX is not able to detect this + before beginning computation. To get all the eigenvectors + requested, the user must supply both sufficient + space to hold the eigenvectors in Z (M .LE. DESCZ(N_)) + and sufficient workspace to compute them. (See LWORK below.) + PSSYGVX is always able to detect insufficient space without + computation unless RANGE .EQ. 'V'. + +.tp 8 +W (global output) REAL array, dimension (N) + On normal exit, the first M entries contain the selected + eigenvalues in ascending order. + +.tp 8 +ORFAC (global input) REAL + Specifies which eigenvectors should be reorthogonalized. + Eigenvectors that correspond to eigenvalues which are within + tol=ORFAC*norm(A) of each other are to be reorthogonalized. + However, if the workspace is insufficient (see LWORK), + tol may be decreased until all eigenvectors to be + reorthogonalized can be stored in one process. + No reorthogonalization will be done if ORFAC equals zero. + A default value of 10^-3 is used if ORFAC is negative. + ORFAC should be identical on all processes. + +.tp 8 +Z (local output) REAL array, + global dimension (N, N), + local dimension ( LLD_Z, LOCc(JZ+N-1) ) + If JOBZ = 'V', then on normal exit the first M columns of Z + contain the orthonormal eigenvectors of the matrix + corresponding to the selected eigenvalues. If an eigenvector + fails to converge, then that column of Z contains the latest + approximation to the eigenvector, and the index of the + eigenvector is returned in IFAIL. + If JOBZ = 'N', then Z is not referenced. + +.tp 8 +IZ (global input) INTEGER + The row index in the global array Z indicating the first + row of sub( Z ). + +.tp 8 +JZ (global input) INTEGER + The column index in the global array Z indicating the + first column of sub( Z ). + +.tp 8 +DESCZ (global and local input) INTEGER array of dimension DLEN_. + The array descriptor for the distributed matrix Z. + DESCZ( CTXT_ ) must equal DESCA( CTXT_ ) + +.tp 8 +WORK (local workspace/output) REAL array, + dimension (LWORK) + if JOBZ='N' WORK(1) = optimal amount of workspace + required to compute eigenvalues efficiently + if JOBZ='V' WORK(1) = optimal amount of workspace + required to compute eigenvalues and eigenvectors + efficiently with no guarantee on orthogonality. + If RANGE='V', it is assumed that all eigenvectors + may be required. + +.tp 8 +LWORK (local input) INTEGER + See below for definitions of variables used to define LWORK. + If no eigenvectors are requested (JOBZ = 'N') then + LWORK >= 5 * N + MAX( 5 * NN, NB * ( NP0 + 1 ) ) + If eigenvectors are requested (JOBZ = 'V' ) then + the amount of workspace required to guarantee that all + eigenvectors are computed is: + LWORK >= 5 * N + MAX( 5*NN, NP0 * MQ0 + 2 * NB * NB ) + + ICEIL( NEIG, NPROW*NPCOL)*NN + + The computed eigenvectors may not be orthogonal if the + minimal workspace is supplied and ORFAC is too small. + If you want to guarantee orthogonality (at the cost + of potentially poor performance) you should add + the following to LWORK: + (CLUSTERSIZE-1)*N + where CLUSTERSIZE is the number of eigenvalues in the + largest cluster, where a cluster is defined as a set of + close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) | + W(J+1) <= W(J) + ORFAC*2*norm(A) } + Variable definitions: + NEIG = number of eigenvectors requested + NB = DESCA( MB_ ) = DESCA( NB_ ) = DESCZ( MB_ ) = + DESCZ( NB_ ) + NN = MAX( N, NB, 2 ) + DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) = + DESCZ( CSRC_ ) = 0 + NP0 = NUMROC( NN, NB, 0, 0, NPROW ) + MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) + ICEIL( X, Y ) is a ScaLAPACK function returning + ceiling(X/Y) + + When LWORK is too small: + If LWORK is too small to guarantee orthogonality, + PSSYGVX attempts to maintain orthogonality in + the clusters with the smallest + spacing between the eigenvalues. + If LWORK is too small to compute all the eigenvectors + requested, no computation is performed and INFO=-23 + is returned. Note that when RANGE='V', PSSYGVX does + not know how many eigenvectors are requested until + the eigenvalues are computed. Therefore, when RANGE='V' + and as long as LWORK is large enough to allow PSSYGVX to + compute the eigenvalues, PSSYGVX will compute the + eigenvalues and as many eigenvectors as it can. + + Relationship between workspace, orthogonality & performance: + Greater performance can be achieved if adequate workspace + is provided. On the other hand, in some situations, + performance can decrease as the workspace provided + increases above the workspace amount shown below: + + For optimal performance, greater workspace may be + needed, i.e. + LWORK >= MAX( LWORK, 5 * N + NSYTRD_LWOPT, + NSYGST_LWOPT ) + Where: + LWORK, as defined previously, depends upon the number + of eigenvectors requested, and + NSYTRD_LWOPT = N + 2*( ANB+1 )*( 4*NPS+2 ) + + ( NPS + 3 ) * NPS + NSYGST_LWOPT = 2*NP0*NB + NQ0*NB + NB*NB + + ANB = PJLAENV( DESCA( CTXT_), 3, 'PSSYTTRD', 'L', + 0, 0, 0, 0) + SQNPC = INT( SQRT( DBLE( NPROW * NPCOL ) ) ) + NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) + NB = DESCA( MB_ ) + NP0 = NUMROC( N, NB, 0, 0, NPROW ) + NQ0 = NUMROC( N, NB, 0, 0, NPCOL ) + + NUMROC is a ScaLAPACK tool functions; + PJLAENV is a ScaLAPACK envionmental inquiry function + MYROW, MYCOL, NPROW and NPCOL can be determined by + calling the subroutine BLACS_GRIDINFO. + + For large N, no extra workspace is needed, however the + biggest boost in performance comes for small N, so it + is wise to provide the extra workspace (typically less + than a Megabyte per process). + + If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing + enough space to compute all the eigenvectors + orthogonally will cause serious degradation in + performance. In the limit (i.e. CLUSTERSIZE = N-1) + PSSTEIN will perform no better than SSTEIN on 1 processor. + For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing + all eigenvectors will increase the total execution time + by a factor of 2 or more. + For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will + grow as the square of the cluster size, all other factors + remaining equal and assuming enough workspace. Less + workspace means less reorthogonalization but faster + execution. + + If LWORK = -1, then LWORK is global input and a workspace + query is assumed; the routine only calculates the size + required for optimal performance on all work arrays. + Each of these values is returned in the first entry of the + corresponding work array, and no error message is issued by + PXERBLA. + + +.tp 8 +IWORK (local workspace) INTEGER array + On return, IWORK(1) contains the amount of integer workspace + required. + +.tp 8 +LIWORK (local input) INTEGER + size of IWORK + LIWORK >= 6 * NNP + Where: + NNP = MAX( N, NPROW*NPCOL + 1, 4 ) + + If LIWORK = -1, then LIWORK is global input and a workspace + query is assumed; the routine only calculates the minimum + and optimal size for all work arrays. Each of these + values is returned in the first entry of the corresponding + work array, and no error message is issued by PXERBLA. + +.tp 8 +IFAIL (output) INTEGER array, dimension (N) + IFAIL provides additional information when INFO .NE. 0 + If (MOD(INFO/16,2).NE.0) then IFAIL(1) indicates the order of + the smallest minor which is not positive definite. + If (MOD(INFO,2).NE.0) on exit, then IFAIL contains the + indices of the eigenvectors that failed to converge. + + If neither of the above error conditions hold and JOBZ = 'V', + then the first M elements of IFAIL are set to zero. + +.tp 8 +ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL) + This array contains indices of eigenvectors corresponding to + a cluster of eigenvalues that could not be reorthogonalized + due to insufficient workspace (see LWORK, ORFAC and INFO). + Eigenvectors corresponding to clusters of eigenvalues indexed + ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be + reorthogonalized due to lack of workspace. Hence the + eigenvectors corresponding to these clusters may not be + orthogonal. ICLUSTR() is a zero terminated array. + (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if + K is the number of clusters + ICLUSTR is not referenced if JOBZ = 'N' + +.tp 8 +GAP (global output) REAL array, + dimension (NPROW*NPCOL) + This array contains the gap between eigenvalues whose + eigenvectors could not be reorthogonalized. The output + values in this array correspond to the clusters indicated + by the array ICLUSTR. As a result, the dot product between + eigenvectors correspoding to the I^th cluster may be as high + as ( C * n ) / GAP(I) where C is a small constant. + +.tp 8 +INFO (global output) INTEGER + = 0: successful exit + < 0: If the i-th argument is an array and the j-entry had + an illegal value, then INFO = -(i*100+j), if the i-th + argument is a scalar and had an illegal value, then + INFO = -i. + > 0: if (MOD(INFO,2).NE.0), then one or more eigenvectors + failed to converge. Their indices are stored + in IFAIL. Send e-mail to scalapack@cs.utk.edu + if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding + to one or more clusters of eigenvalues could not be + reorthogonalized because of insufficient workspace. + The indices of the clusters are stored in the array + ICLUSTR. + if (MOD(INFO/4,2).NE.0), then space limit prevented + PSSYGVX from computing all of the eigenvectors + between VL and VU. The number of eigenvectors + computed is returned in NZ. + if (MOD(INFO/8,2).NE.0), then PSSTEBZ failed to + compute eigenvalues. + Send e-mail to scalapack@cs.utk.edu + if (MOD(INFO/16,2).NE.0), then B was not positive + definite. IFAIL(1) indicates the order of + the smallest minor which is not positive definite. --- scalapack-doc-1.5.orig/man/manl/pzheevx.l +++ scalapack-doc-1.5/man/manl/pzheevx.l @@ -1,6 +1,8 @@ .TH PZHEEVX l "12 May 1997" "LAPACK version 1.5" "LAPACK routine (version 1.5)" .SH NAME - +PZHEEVX - compute selected eigenvalues and, optionally, eigenvectors +of a complex hermitian matrix A by calling the recommended sequence +of ScaLAPACK routines .SH SYNOPSIS .TP 20 SUBROUTINE PZHEEVX( @@ -121,1251 +123,420 @@ .ti +4 INTRINSIC ABS, DBLE, DCMPLX, ICHAR, MAX, MIN, MOD, SQRT -.TP 20 -.ti +4 -IF( -BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* -RSRC_.LT.0 )RETURN -.TP 20 -.ti +4 -QUICKRETURN -= ( N.EQ.0 ) -.TP 20 -.ti +4 -CALL -BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYROW, MYCOL ) -.TP 20 -.ti +4 -INFO -= 0 -.TP 20 -.ti +4 -IF( -NPROW.EQ.-1 ) THEN -.TP 20 -.ti +4 -INFO -= -( 800+CTXT_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+CTXT_ ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -CALL -CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 8, INFO ) -.TP 20 -.ti +4 -CALL -CHK1MAT( N, 4, N, 4, IZ, JZ, DESCZ, 21, INFO ) -.TP 20 -.ti +4 -IF( -INFO.EQ.0 ) THEN -.TP 20 -.ti +4 -SAFMIN -= PDLAMCH( DESCA( CTXT_ ), 'Safe minimum' ) -.TP 20 -.ti +4 -EPS -= PDLAMCH( DESCA( CTXT_ ), 'Precision' ) -.TP 20 -.ti +4 -SMLNUM -= SAFMIN / EPS -.TP 20 -.ti +4 -BIGNUM -= ONE / SMLNUM -.TP 20 -.ti +4 -RMIN -= SQRT( SMLNUM ) -.TP 20 -.ti +4 -RMAX -= MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) -.TP 20 -.ti +4 -NPROCS -= NPROW*NPCOL -.TP 20 -.ti +4 -LOWER -= LSAME( UPLO, 'L' ) -.TP 20 -.ti +4 -WANTZ -= LSAME( JOBZ, 'V' ) -.TP 20 -.ti +4 -ALLEIG -= LSAME( RANGE, 'A' ) -.TP 20 -.ti +4 -VALEIG -= LSAME( RANGE, 'V' ) -.TP 20 -.ti +4 -INDEIG -= LSAME( RANGE, 'I' ) -.TP 20 -.ti +4 -INDTAU -= 1 -.TP 20 -.ti +4 -INDWORK -= INDTAU + N -.TP 20 -.ti +4 -LLWORK -= LWORK - INDWORK + 1 -.TP 20 -.ti +4 -INDE -= 1 -.TP 20 -.ti +4 -INDD -= INDE + N -.TP 20 -.ti +4 -INDD2 -= INDD + N -.TP 20 -.ti +4 -INDE2 -= INDD2 + N -.TP 20 -.ti +4 -INDRWORK -= INDE2 + N -.TP 20 -.ti +4 -LLRWORK -= LRWORK - INDRWORK + 1 -.TP 20 -.ti +4 -ISIZESTEIN -= 3*N + NPROCS + 1 -.TP 20 -.ti +4 -ISIZESTEBZ -= MAX( 4*N, 14, NPROCS ) -.TP 20 -.ti +4 -INDIBL -= ( MAX( ISIZESTEIN, ISIZESTEBZ ) ) + 1 -.TP 20 -.ti +4 -INDISP -= INDIBL + N -.TP 20 -.ti +4 -LQUERY -= .FALSE. -.TP 20 -.ti +4 -IF( -LWORK.EQ.-1 .OR. LIWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) -LQUERY = .TRUE. -.TP 20 -.ti +4 -NNP -= MAX( N, NPROCS+1, 4 ) -.TP 20 -.ti +4 -LIWMIN -= 6*NNP -.TP 20 -.ti +4 -NPROCS -= NPROW*NPCOL -.TP 20 -.ti +4 -NB_A -= DESCA( NB_ ) -.TP 20 -.ti +4 -MB_A -= DESCA( MB_ ) -.TP 20 -.ti +4 -NB_Z -= DESCZ( NB_ ) -.TP 20 -.ti +4 -MB_Z -= DESCZ( MB_ ) -.TP 20 -.ti +4 -NB -= NB_A -.TP 20 -.ti +4 -NN -= MAX( N, NB, 2 ) -.TP 20 -.ti +4 -RSRC_A -= DESCA( RSRC_ ) -.TP 20 -.ti +4 -CSRC_A -= DESCA( CSRC_ ) -.TP 20 -.ti +4 -RSRC_Z -= DESCZ( RSRC_ ) -.TP 20 -.ti +4 -IROFFA -= MOD( IA-1, MB_A ) -.TP 20 -.ti +4 -ICOFFA -= MOD( JA-1, NB_A ) -.TP 20 -.ti +4 -IROFFZ -= MOD( IZ-1, MB_A ) -.TP 20 -.ti +4 -IAROW -= INDXG2P( 1, NB_A, MYROW, RSRC_A, NPROW ) -.TP 20 -.ti +4 -IACOL -= INDXG2P( 1, MB_A, MYCOL, CSRC_A, NPCOL ) -.TP 20 -.ti +4 -IZROW -= INDXG2P( 1, NB_A, MYROW, RSRC_Z, NPROW ) -.TP 20 -.ti +4 -NP0 -= NUMROC( N+IROFFA, NB_Z, MYROW, IAROW, NPROW ) -.TP 20 -.ti +4 -MQ0 -= NUMROC( N+ICOFFA, NB_Z, MYCOL, IACOL, NPCOL ) -.TP 20 -.ti +4 -IF( -( .NOT.WANTZ ) .OR. ( VALEIG .AND. ( .NOT.LQUERY ) ) ) -THEN -.TP 20 -.ti +4 -LWMIN -= N + MAX( NB*( NP0+1 ), 3 ) -.TP 20 -.ti +4 -LRWMIN -= 5*NN + 4*N -.TP 20 -.ti +4 -NEIG -= 0 -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IF( -ALLEIG .OR. VALEIG ) THEN -.TP 20 -.ti +4 -NEIG -= N -.TP 20 -.ti +4 -ELSE -IF( INDEIG ) THEN -.TP 20 -.ti +4 -NEIG -= IU - IL + 1 -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -MQ0 -= NUMROC( MAX( NEIG, NB, 2 ), NB, MYCOL, IACOL, -NPCOL ) -.TP 20 -.ti +4 -NQ0 -= NUMROC( NN, NB, 0, 0, NPCOL ) -.TP 20 -.ti +4 -LWMIN -= N + ( NP0+NQ0+NB )*NB -.TP 20 -.ti +4 -LRWMIN -= 4*N + MAX( 5*NN, NP0*MQ0 ) + -ICEIL( NEIG, NPROW*NPCOL )*NN -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -INFO.EQ.0 ) THEN -.TP 20 -.ti +4 -IF( -MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN -.TP 20 -.ti +4 -RWORK( -1 ) = ABSTOL -.TP 20 -.ti +4 -IF( -VALEIG ) THEN -.TP 20 -.ti +4 -RWORK( -2 ) = VL -.TP 20 -.ti +4 -RWORK( -3 ) = VU -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -RWORK( -2 ) = ZERO -.TP 20 -.ti +4 -RWORK( -3 ) = ZERO -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -DGEBS2D( DESCA( CTXT_ ), 'ALL', ' ', 3, 1, RWORK, -3 ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -CALL -DGEBR2D( DESCA( CTXT_ ), 'ALL', ' ', 3, 1, RWORK, -3, 0, 0 ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -.NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN -.TP 20 -.ti +4 -INFO -= -1 -.TP 20 -.ti +4 -ELSE -IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN -.TP 20 -.ti +4 -INFO -= -2 -.TP 20 -.ti +4 -ELSE -IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN -.TP 20 -.ti +4 -INFO -= -3 -.TP 20 -.ti +4 -ELSE -IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN -.TP 20 -.ti +4 -INFO -= -10 -.TP 20 -.ti +4 -ELSE -IF( INDEIG .AND. ( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) ) -THEN -.TP 20 -.ti +4 -INFO -= -11 -.TP 20 -.ti +4 -ELSE -IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) -THEN -.TP 20 -.ti +4 -INFO -= -12 -.TP 20 -.ti +4 -ELSE -IF( LWORK.LT.LWMIN .AND. LWORK.NE.-1 ) THEN -.TP 20 -.ti +4 -INFO -= -23 -.TP 20 -.ti +4 -ELSE -IF( LRWORK.LT.LRWMIN .AND. LRWORK.NE.-1 ) THEN -.TP 20 -.ti +4 -INFO -= -25 -.TP 20 -.ti +4 -ELSE -IF( LIWORK.LT.LIWMIN .AND. LIWORK.NE.-1 ) THEN -.TP 20 -.ti +4 -INFO -= -27 -.TP 20 -.ti +4 -ELSE -IF( VALEIG .AND. ( ABS( RWORK( 2 )-VL ).GT.FIVE*EPS* -ABS( VL ) ) ) THEN -.TP 20 -.ti +4 -INFO -= -9 -.TP 20 -.ti +4 -ELSE -IF( VALEIG .AND. ( ABS( RWORK( 3 )-VU ).GT.FIVE*EPS* -ABS( VU ) ) ) THEN -.TP 20 -.ti +4 -INFO -= -10 -.TP 20 -.ti +4 -ELSE -IF( ABS( RWORK( 1 )-ABSTOL ).GT.FIVE*EPS* -ABS( ABSTOL ) ) THEN -.TP 20 -.ti +4 -INFO -= -13 -.TP 20 -.ti +4 -ELSE -IF( IROFFA.NE.IROFFZ ) THEN -.TP 20 -.ti +4 -INFO -= -19 -.TP 20 -.ti +4 -ELSE -IF( IROFFA.NE.0 ) THEN -.TP 20 -.ti +4 -INFO -= -6 -.TP 20 -.ti +4 -ELSE -IF( IAROW.NE.IZROW ) THEN -.TP 20 -.ti +4 -INFO -= -19 -.TP 20 -.ti +4 -ELSE -IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 800+NB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( M_ ).NE.DESCZ( M_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+M_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( N_ ).NE.DESCZ( N_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+N_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+MB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( NB_ ).NE.DESCZ( NB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+NB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( RSRC_ ).NE.DESCZ( RSRC_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+RSRC_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CSRC_ ).NE.DESCZ( CSRC_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+CSRC_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2100+CTXT_ ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -WANTZ ) THEN -.TP 20 -.ti +4 -IDUM1( -1 ) = ICHAR( 'V' ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -1 ) = ICHAR( 'N' ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -1 ) = 1 -.TP 20 -.ti +4 -IF( -LOWER ) THEN -.TP 20 -.ti +4 -IDUM1( -2 ) = ICHAR( 'L' ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -2 ) = ICHAR( 'U' ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -2 ) = 2 -.TP 20 -.ti +4 -IF( -ALLEIG ) THEN -.TP 20 -.ti +4 -IDUM1( -3 ) = ICHAR( 'A' ) -.TP 20 -.ti +4 -ELSE -IF( INDEIG ) THEN -.TP 20 -.ti +4 -IDUM1( -3 ) = ICHAR( 'I' ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -3 ) = ICHAR( 'V' ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -3 ) = 3 -.TP 20 -.ti +4 -IF( -LQUERY ) THEN -.TP 20 -.ti +4 -IDUM1( -4 ) = -1 -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -4 ) = 1 -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -4 ) = 4 -.TP 20 -.ti +4 -CALL -PCHK2MAT( N, 4, N, 4, IA, JA, DESCA, 8, N, 4, N, 4, IZ, -JZ, DESCZ, 21, 4, IDUM1, IDUM2, INFO ) -.TP 20 -.ti +4 -WORK( -1 ) = DCMPLX( LWMIN ) -.TP 20 -.ti +4 -RWORK( -1 ) = DBLE( LRWMIN ) -.TP 20 -.ti +4 -IWORK( -1 ) = LIWMIN -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -INFO.NE.0 ) THEN -.TP 20 -.ti +4 -CALL -PXERBLA( DESCA( CTXT_ ), 'PZHEEVX', -INFO ) -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -ELSE -IF( LQUERY ) THEN -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -QUICKRETURN ) THEN -.TP 20 -.ti +4 -IF( -WANTZ ) THEN -.TP 20 -.ti +4 -NZ -= 0 -.TP 20 -.ti +4 -ICLUSTR( -1 ) = 0 -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -M -= 0 -.TP 20 -.ti +4 -WORK( -1 ) = DCMPLX( LWMIN ) -.TP 20 -.ti +4 -RWORK( -1 ) = DBLE( LRWMIN ) -.TP 20 -.ti +4 -IWORK( -1 ) = LIWMIN -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -ABSTLL -= ABSTOL -.TP 20 -.ti +4 -ISCALE -= 0 -.TP 20 -.ti +4 -IF( -VALEIG ) THEN -.TP 20 -.ti +4 -VLL -= VL -.TP 20 -.ti +4 -VUU -= VU -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -VLL -= ZERO -.TP 20 -.ti +4 -VUU -= ZERO -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -ANRM -= PZLANHE( '1', UPLO, N, A, IA, JA, DESCA, -RWORK( INDRWORK ) ) -.TP 20 -.ti +4 -IF( -ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN -.TP 20 -.ti +4 -ISCALE -= 1 -.TP 20 -.ti +4 -SIGMA -= RMIN / ANRM -.TP 20 -.ti +4 -ANRM -= ANRM*SIGMA -.TP 20 -.ti +4 -ELSE -IF( ANRM.GT.RMAX ) THEN -.TP 20 -.ti +4 -ISCALE -= 1 -.TP 20 -.ti +4 -SIGMA -= RMAX / ANRM -.TP 20 -.ti +4 -ANRM -= ANRM*SIGMA -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -ISCALE.EQ.1 ) THEN -.TP 20 -.ti +4 -CALL -PZLASCL( UPLO, ONE, SIGMA, N, N, A, IA, JA, DESCA, -IINFO ) -.TP 20 -.ti +4 -IF( -ABSTOL.GT.0 ) -ABSTLL = ABSTOL*SIGMA -.TP 20 -.ti +4 -IF( -VALEIG ) THEN -.TP 20 -.ti +4 -VLL -= VL*SIGMA -.TP 20 -.ti +4 -VUU -= VU*SIGMA -.TP 20 -.ti +4 -IF( -VUU.EQ.VLL ) THEN -.TP 20 -.ti +4 -VUU -= VUU + 2*MAX( ABS( VUU )*EPS, SAFMIN ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -LALLWORK -= LLRWORK -.TP 20 -.ti +4 -CALL -PZHETRD( UPLO, N, A, IA, JA, DESCA, RWORK( INDD ), -RWORK( INDE ), WORK( INDTAU ), WORK( INDWORK ), -LLWORK, IINFO ) -.TP 20 -.ti +4 -OFFSET -= 0 -.TP 20 -.ti +4 -IF( -IA.EQ.1 .AND. JA.EQ.1 .AND. RSRC_A.EQ.0 .AND. CSRC_A.EQ.0 ) -THEN -.TP 20 -.ti +4 -CALL -PDLARED1D( N, IA, JA, DESCA, RWORK( INDD ), -RWORK( INDD2 ), RWORK( INDRWORK ), LLRWORK ) -.TP 20 -.ti +4 -CALL -PDLARED1D( N, IA, JA, DESCA, RWORK( INDE ), -RWORK( INDE2 ), RWORK( INDRWORK ), LLRWORK ) -.TP 20 -.ti +4 -IF( -.NOT.LOWER ) -OFFSET = 1 -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -DO -10 I = 1, N -.TP 20 -.ti +4 -CALL -PZELGET( 'A', ' ', WORK( INDD2+I-1 ), A, I+IA-1, -I+JA-1, DESCA ) -.TP 20 -.ti +4 -RWORK( -INDD2+I-1 ) = DBLE( WORK( INDD2+I-1 ) ) -.TP 20 -.ti +4 -10 -CONTINUE -.TP 20 -.ti +4 -IF( -LSAME( UPLO, 'U' ) ) THEN -.TP 20 -.ti +4 -DO -20 I = 1, N - 1 -.TP 20 -.ti +4 -CALL -PZELGET( 'A', ' ', WORK( INDE2+I-1 ), A, I+IA-1, -I+JA, DESCA ) -.TP 20 -.ti +4 -RWORK( -INDE2+I-1 ) = DBLE( WORK( INDE2+I-1 ) ) -.TP 20 -.ti +4 -20 -CONTINUE -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -DO -30 I = 1, N - 1 -.TP 20 -.ti +4 -CALL -PZELGET( 'A', ' ', WORK( INDE2+I-1 ), A, I+IA, -I+JA-1, DESCA ) -.TP 20 -.ti +4 -RWORK( -INDE2+I-1 ) = DBLE( WORK( INDE2+I-1 ) ) -.TP 20 -.ti +4 -30 -CONTINUE -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -WANTZ ) THEN -.TP 20 -.ti +4 -ORDER -= 'b' -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -ORDER -= 'e' -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -PDSTEBZ( DESCA( CTXT_ ), RANGE, ORDER, N, VLL, VUU, IL, IU, -ABSTLL, RWORK( INDD2 ), RWORK( INDE2+OFFSET ), M, -NSPLIT, W, IWORK( INDIBL ), IWORK( INDISP ), -RWORK( INDRWORK ), LLRWORK, IWORK( 1 ), ISIZESTEBZ, -IINFO ) -.TP 20 -.ti +4 -IF( -IINFO.NE.0 ) THEN -.TP 20 -.ti +4 -INFO -= INFO + IERREBZ -.TP 20 -.ti +4 -DO -40 I = 1, M -.TP 20 -.ti +4 -IWORK( -INDIBL+I-1 ) = ABS( IWORK( INDIBL+I-1 ) ) -.TP 20 -.ti +4 -40 -CONTINUE -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -WANTZ ) THEN -.TP 20 -.ti +4 -IF( -VALEIG ) THEN -.TP 20 -.ti +4 -CALL -IGAMN2D( DESCA( CTXT_ ), 'A', ' ', 1, 1, LALLWORK, 1, -1, 1, -1, -1, -1 ) -.TP 20 -.ti +4 -MAXEIGS -= DESCZ( N_ ) -.TP 20 -.ti +4 -DO -50 NZ = MIN( MAXEIGS, M ), 0, -1 -.TP 20 -.ti +4 -MQ0 -= NUMROC( NZ, NB, 0, 0, NPCOL ) -.TP 20 -.ti +4 -SIZESTEIN -= ICEIL( NZ, NPROCS )*N + MAX( 5*N, NP0*MQ0 ) -.TP 20 -.ti +4 -SIZEORMTR -= MAX( ( NB*( NB-1 ) ) / 2, ( MQ0+NP0 )*NB ) + -NB*NB -.TP 20 -.ti +4 -SIZEHEEVX -= MAX( SIZESTEIN, SIZEORMTR ) -.TP 20 -.ti +4 -IF( -SIZEHEEVX.LE.LALLWORK ) -GO TO 60 -.TP 20 -.ti +4 -50 -CONTINUE -.TP 20 -.ti +4 -60 -CONTINUE -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -NZ -= M -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -NZ -= MAX( NZ, 0 ) -.TP 20 -.ti +4 -IF( -NZ.NE.M ) THEN -.TP 20 -.ti +4 -INFO -= INFO + IERRSPC -.TP 20 -.ti +4 -DO -70 I = 1, M -.TP 20 -.ti +4 -IFAIL( -I ) = 0 -.TP 20 -.ti +4 -70 -CONTINUE -.TP 20 -.ti +4 -IF( -NSPLIT.GT.1 ) THEN -.TP 20 -.ti +4 -CALL -DLASRT( 'I', M, W, IINFO ) -.TP 20 -.ti +4 -IF( -NZ.GT.0 ) THEN -.TP 20 -.ti +4 -VUU -= W( NZ ) - TEN*( EPS*ANRM+SAFMIN ) -.TP 20 -.ti +4 -IF( -VLL.GE.VUU ) THEN -.TP 20 -.ti +4 -NZZ -= 0 -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -CALL -PDSTEBZ( DESCA( CTXT_ ), RANGE, ORDER, N, -VLL, VUU, IL, IU, ABSTLL, -RWORK( INDD2 ), RWORK( INDE2+ -OFFSET ), NZZ, NSPLIT, W, -IWORK( INDIBL ), IWORK( INDISP ), -RWORK( INDRWORK ), LLRWORK, -IWORK( 1 ), ISIZESTEBZ, IINFO ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -MOD( INFO / IERREBZ, 1 ).EQ.0 ) THEN -.TP 20 -.ti +4 -IF( -NZZ.GT.NZ .OR. IINFO.NE.0 ) THEN -.TP 20 -.ti +4 -INFO -= INFO + IERREBZ -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -NZ -= MIN( NZ, NZZ ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -PZSTEIN( N, RWORK( INDD2 ), RWORK( INDE2+OFFSET ), NZ, W, -IWORK( INDIBL ), IWORK( INDISP ), ORFAC, Z, IZ, -JZ, DESCZ, RWORK( INDRWORK ), LALLWORK, -IWORK( 1 ), ISIZESTEIN, IFAIL, ICLUSTR, GAP, -IINFO ) -.TP 20 -.ti +4 -IF( -IINFO.GE.NZ+1 ) -INFO = INFO + IERRCLS -.TP 20 -.ti +4 -IF( -MOD( IINFO, NZ+1 ).NE.0 ) -INFO = INFO + IERREIN -.TP 20 -.ti +4 -IF( -NZ.GT.0 ) THEN -.TP 20 -.ti +4 -CALL -PZUNMTR( 'L', UPLO, 'N', N, NZ, A, IA, JA, DESCA, -WORK( INDTAU ), Z, IZ, JZ, DESCZ, -WORK( INDWORK ), LLWORK, IINFO ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -ISCALE.EQ.1 ) THEN -.TP 20 -.ti +4 -CALL -DSCAL( M, ONE / SIGMA, W, 1 ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -WORK( -1 ) = DCMPLX( LWMIN ) -.TP 20 -.ti +4 -RWORK( -1 ) = DBLE( LRWMIN ) -.TP 20 -.ti +4 -IWORK( -1 ) = LIWMIN -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -END .SH PURPOSE +PZHEEVX computes selected eigenvalues and, optionally, eigenvectors +of a complex hermitian matrix A by calling the recommended sequence +of ScaLAPACK routines. Eigenvalues/vectors can be selected by +specifying a range of values or a range of indices for the desired +eigenvalues. + +.SH NOTES + +Each global data object is described by an associated description +vector. This vector stores the information required to establish +the mapping between an object element and its corresponding process +and memory location. + +Let A be a generic term for any 2D block cyclicly distributed array. +Such a global array has an associated description vector DESCA. +In the following comments, the character _ should be read as +"of the global array". + +NOTATION STORED IN EXPLANATION +.br +--------------- -------------- -------------------------------------- +.br +DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, + DTYPE_A = 1. +.br +CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating + the BLACS process grid A is distribu- + ted over. The context itself is glo- + bal, but the handle (the integer + value) may vary. +.br +M_A (global) DESCA( M_ ) The number of rows in the global + array A. +.br +N_A (global) DESCA( N_ ) The number of columns in the global + array A. +.br +MB_A (global) DESCA( MB_ ) The blocking factor used to distribute + the rows of the array. +.br +NB_A (global) DESCA( NB_ ) The blocking factor used to distribute + the columns of the array. +.br +RSRC_A (global) DESCA( RSRC_ ) The process row over which the first + row of the array A is distributed. +.br +CSRC_A (global) DESCA( CSRC_ ) The process column over which the + first column of the array A is + distributed. +.br +LLD_A (local) DESCA( LLD_ ) The leading dimension of the local + array. LLD_A >= MAX(1,LOCr(M_A)). + +.SH ARGUMENTS + + NP = the number of rows local to a given process. + NQ = the number of columns local to a given process. + +JOBZ (global input) CHARACTER*1 + Specifies whether or not to compute the eigenvectors: + = 'N': Compute eigenvalues only. + = 'V': Compute eigenvalues and eigenvectors. + +.tp 8 +RANGE (global input) CHARACTER*1 + = 'A': all eigenvalues will be found. + = 'V': all eigenvalues in the interval [VL,VU] will be found. + = 'I': the IL-th through IU-th eigenvalues will be found. + +.tp 8 +UPLO (global input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + Hermitian matrix A is stored: + = 'U': Upper triangular + = 'L': Lower triangular + +.tp 8 +N (global input) INTEGER + The number of rows and columns of the matrix A. N >= 0. + +.tp 8 +A (local input/workspace) block cyclic COMPLEX*16 array, + global dimension (N, N), + local dimension ( LLD_A, LOCc(JA+N-1) ) + + On entry, the Hermitian matrix A. If UPLO = 'U', only the + upper triangular part of A is used to define the elements of + the Hermitian matrix. If UPLO = 'L', only the lower + triangular part of A is used to define the elements of the + Hermitian matrix. + + On exit, the lower triangle (if UPLO='L') or the upper + triangle (if UPLO='U') of A, including the diagonal, is + destroyed. + +.tp 8 +IA (global input) INTEGER + A's global row index, which points to the beginning of the + submatrix which is to be operated on. + +.tp 8 +JA (global input) INTEGER + A's global column index, which points to the beginning of + the submatrix which is to be operated on. + +.tp 8 +DESCA (global and local input) INTEGER array of dimension DLEN_. + The array descriptor for the distributed matrix A. + If DESCA( CTXT_ ) is incorrect, PZHEEVX cannot guarantee + correct error reporting. + +.tp 8 +VL (global input) DOUBLE PRECISION + If RANGE='V', the lower bound of the interval to be searched + for eigenvalues. Not referenced if RANGE = 'A' or 'I'. + +.tp 8 +VU (global input) DOUBLE PRECISION + If RANGE='V', the upper bound of the interval to be searched + for eigenvalues. Not referenced if RANGE = 'A' or 'I'. + +.tp 8 +IL (global input) INTEGER + If RANGE='I', the index (from smallest to largest) of the + smallest eigenvalue to be returned. IL >= 1. + Not referenced if RANGE = 'A' or 'V'. + +.tp 8 +IU (global input) INTEGER + If RANGE='I', the index (from smallest to largest) of the + largest eigenvalue to be returned. min(IL,N) <= IU <= N. + Not referenced if RANGE = 'A' or 'V'. + +.tp 8 +ABSTOL (global input) DOUBLE PRECISION + If JOBZ='V', setting ABSTOL to PDLAMCH( CONTEXT, 'U') yields + the most orthogonal eigenvectors. + + The absolute error tolerance for the eigenvalues. + An approximate eigenvalue is accepted as converged + when it is determined to lie in an interval [a,b] + of width less than or equal to + ABSTOL + EPS * max( |a|,|b| ) , + where EPS is the machine precision. If ABSTOL is less than + or equal to zero, then EPS*norm(T) will be used in its place, + where norm(T) is the 1-norm of the tridiagonal matrix + obtained by reducing A to tridiagonal form. + + Eigenvalues will be computed most accurately when ABSTOL is + set to twice the underflow threshold 2*PDLAMCH('S') not zero. + If this routine returns with ((MOD(INFO,2).NE.0) .OR. + (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or + eigenvectors did not converge, try setting ABSTOL to + 2*PDLAMCH('S'). + + See "Computing Small Singular Values of Bidiagonal Matrices + with Guaranteed High Relative Accuracy," by Demmel and + Kahan, LAPACK Working Note #3. + + See "On the correctness of Parallel Bisection in Floating + Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70 + +.tp 8 +M (global output) INTEGER + Total number of eigenvalues found. 0 <= M <= N. + +.tp 8 +NZ (global output) INTEGER + Total number of eigenvectors computed. 0 <= NZ <= M. + The number of columns of Z that are filled. + If JOBZ .NE. 'V', NZ is not referenced. + If JOBZ .EQ. 'V', NZ = M unless the user supplies + insufficient space and PZHEEVX is not able to detect this + before beginning computation. To get all the eigenvectors + requested, the user must supply both sufficient + space to hold the eigenvectors in Z (M .LE. DESCZ(N_)) + and sufficient workspace to compute them. (See LWORK below.) + PZHEEVX is always able to detect insufficient space without + computation unless RANGE .EQ. 'V'. + +.tp 8 +W (global output) DOUBLE PRECISION array, dimension (N) + On normal exit, the first M entries contain the selected + eigenvalues in ascending order. + +.tp 8 +ORFAC (global input) DOUBLE PRECISION + Specifies which eigenvectors should be reorthogonalized. + Eigenvectors that correspond to eigenvalues which are within + tol=ORFAC*norm(A) of each other are to be reorthogonalized. + However, if the workspace is insufficient (see LWORK), + tol may be decreased until all eigenvectors to be + reorthogonalized can be stored in one process. + No reorthogonalization will be done if ORFAC equals zero. + A default value of 10^-3 is used if ORFAC is negative. + ORFAC should be identical on all processes. + +.tp 8 +Z (local output) COMPLEX*16 array, + global dimension (N, N), + local dimension ( LLD_Z, LOCc(JZ+N-1) ) + If JOBZ = 'V', then on normal exit the first M columns of Z + contain the orthonormal eigenvectors of the matrix + corresponding to the selected eigenvalues. If an eigenvector + fails to converge, then that column of Z contains the latest + approximation to the eigenvector, and the index of the + eigenvector is returned in IFAIL. + If JOBZ = 'N', then Z is not referenced. + +.tp 8 +IZ (global input) INTEGER + Z's global row index, which points to the beginning of the + submatrix which is to be operated on. + +.tp 8 +JZ (global input) INTEGER + Z's global column index, which points to the beginning of + the submatrix which is to be operated on. + +.tp 8 +DESCZ (global and local input) INTEGER array of dimension DLEN_. + The array descriptor for the distributed matrix Z. + DESCZ( CTXT_ ) must equal DESCA( CTXT_ ) + +.tp 8 +WORK (local workspace/output) COMPLEX*16 array, + dimension (LWORK) + WORK(1) returns workspace adequate workspace to allow + optimal performance. + +.tp 8 +LWORK (local input) INTEGER + Size of WORK array. If only eigenvalues are requested: + LWORK >= N + MAX( NB * ( NP0 + 1 ), 3 ) + If eigenvectors are requested: + LWORK >= N + ( NP0 + MQ0 + NB ) * NB + with NQ0 = NUMROC( NN, NB, 0, 0, NPCOL ). + + For optimal performance, greater workspace is needed, i.e. + LWORK >= MAX( LWORK, NHETRD_LWORK ) + Where LWORK is as defined above, and + NHETRD_LWORK = N + 2*( ANB+1 )*( 4*NPS+2 ) + + ( NPS + 1 ) * NPS + + ICTXT = DESCA( CTXT_ ) + ANB = PJLAENV( ICTXT, 3, 'PZHETTRD', 'L', 0, 0, 0, 0 ) + SQNPC = SQRT( DBLE( NPROW * NPCOL ) ) + NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) + + NUMROC is a ScaLAPACK tool functions; + PJLAENV is a ScaLAPACK envionmental inquiry function + MYROW, MYCOL, NPROW and NPCOL can be determined by calling + the subroutine BLACS_GRIDINFO. + + If LWORK = -1, then LWORK is global input and a workspace + query is assumed; the routine only calculates the + optimal size for all work arrays. Each of these + values is returned in the first entry of the corresponding + work array, and no error message is issued by PXERBLA. + +.tp 8 +RWORK (local workspace/output) DOUBLE PRECISION array, + dimension (LRWORK) + On return, WROK(1) contains the optimal amount of + workspace required for efficient execution. + if JOBZ='N' RWORK(1) = optimal amount of workspace + required to compute eigenvalues efficiently + if JOBZ='V' RWORK(1) = optimal amount of workspace + required to compute eigenvalues and eigenvectors + efficiently with no guarantee on orthogonality. + If RANGE='V', it is assumed that all eigenvectors + may be required. + +.tp 8 +LRWORK (local input) INTEGER + Size of RWORK + See below for definitions of variables used to define LRWORK. + If no eigenvectors are requested (JOBZ = 'N') then + LRWORK >= 5 * NN + 4 * N + If eigenvectors are requested (JOBZ = 'V' ) then + the amount of workspace required to guarantee that all + eigenvectors are computed is: + LRWORK >= 4*N + MAX( 5*NN, NP0 * MQ0 ) + + ICEIL( NEIG, NPROW*NPCOL)*NN + + The computed eigenvectors may not be orthogonal if the + minimal workspace is supplied and ORFAC is too small. + If you want to guarantee orthogonality (at the cost + of potentially poor performance) you should add + the following to LRWORK: + (CLUSTERSIZE-1)*N + where CLUSTERSIZE is the number of eigenvalues in the + largest cluster, where a cluster is defined as a set of + close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) | + W(J+1) <= W(J) + ORFAC*2*norm(A) } + Variable definitions: + NEIG = number of eigenvectors requested + NB = DESCA( MB_ ) = DESCA( NB_ ) = + DESCZ( MB_ ) = DESCZ( NB_ ) + NN = MAX( N, NB, 2 ) + DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) = + DESCZ( CSRC_ ) = 0 + NP0 = NUMROC( NN, NB, 0, 0, NPROW ) + MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) + ICEIL( X, Y ) is a ScaLAPACK function returning + ceiling(X/Y) + + When LRWORK is too small: + If LRWORK is too small to guarantee orthogonality, + PZHEEVX attempts to maintain orthogonality in + the clusters with the smallest + spacing between the eigenvalues. + If LRWORK is too small to compute all the eigenvectors + requested, no computation is performed and INFO=-25 + is returned. Note that when RANGE='V', PZHEEVX does + not know how many eigenvectors are requested until + the eigenvalues are computed. Therefore, when RANGE='V' + and as long as LRWORK is large enough to allow PZHEEVX to + compute the eigenvalues, PZHEEVX will compute the + eigenvalues and as many eigenvectors as it can. + + Relationship between workspace, orthogonality & performance: + If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing + enough space to compute all the eigenvectors + orthogonally will cause serious degradation in + performance. In the limit (i.e. CLUSTERSIZE = N-1) + PZSTEIN will perform no better than ZSTEIN on 1 + processor. + For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing + all eigenvectors will increase the total execution time + by a factor of 2 or more. + For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will + grow as the square of the cluster size, all other factors + remaining equal and assuming enough workspace. Less + workspace means less reorthogonalization but faster + execution. + + If LRWORK = -1, then LRWORK is global input and a workspace + query is assumed; the routine only calculates the size + required for optimal performance for all work arrays. Each of + these values is returned in the first entry of the + corresponding work arrays, and no error message is issued by + PXERBLA. + +.tp 8 +IWORK (local workspace) INTEGER array + On return, IWORK(1) contains the amount of integer workspace + required. + +.tp 8 +LIWORK (local input) INTEGER + size of IWORK + LIWORK >= 6 * NNP + Where: + NNP = MAX( N, NPROW*NPCOL + 1, 4 ) + If LIWORK = -1, then LIWORK is global input and a workspace + query is assumed; the routine only calculates the minimum + and optimal size for all work arrays. Each of these + values is returned in the first entry of the corresponding + work array, and no error message is issued by PXERBLA. + +.tp 8 +IFAIL (global output) INTEGER array, dimension (N) + If JOBZ = 'V', then on normal exit, the first M elements of + IFAIL are zero. If (MOD(INFO,2).NE.0) on exit, then + IFAIL contains the + indices of the eigenvectors that failed to converge. + If JOBZ = 'N', then IFAIL is not referenced. + +.tp 8 +ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL) + This array contains indices of eigenvectors corresponding to + a cluster of eigenvalues that could not be reorthogonalized + due to insufficient workspace (see LWORK, ORFAC and INFO). + Eigenvectors corresponding to clusters of eigenvalues indexed + ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be + reorthogonalized due to lack of workspace. Hence the + eigenvectors corresponding to these clusters may not be + orthogonal. ICLUSTR() is a zero terminated array. + (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if + K is the number of clusters + ICLUSTR is not referenced if JOBZ = 'N' + +.tp 8 +GAP (global output) DOUBLE PRECISION array, + dimension (NPROW*NPCOL) + This array contains the gap between eigenvalues whose + eigenvectors could not be reorthogonalized. The output + values in this array correspond to the clusters indicated + by the array ICLUSTR. As a result, the dot product between + eigenvectors correspoding to the I^th cluster may be as high + as ( C * n ) / GAP(I) where C is a small constant. + +.tp 8 +INFO (global output) INTEGER + = 0: successful exit + < 0: If the i-th argument is an array and the j-entry had + an illegal value, then INFO = -(i*100+j), if the i-th + argument is a scalar and had an illegal value, then + INFO = -i. + > 0: if (MOD(INFO,2).NE.0), then one or more eigenvectors + failed to converge. Their indices are stored + in IFAIL. Ensure ABSTOL=2.0*PDLAMCH( 'U' ) + Send e-mail to scalapack@cs.utk.edu + if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding + to one or more clusters of eigenvalues could not be + reorthogonalized because of insufficient workspace. + The indices of the clusters are stored in the array + ICLUSTR. + if (MOD(INFO/4,2).NE.0), then space limit prevented + PZHEEVX from computing all of the eigenvectors + between VL and VU. The number of eigenvectors + computed is returned in NZ. + if (MOD(INFO/8,2).NE.0), then PZSTEBZ failed to compute + eigenvalues. Ensure ABSTOL=2.0*PDLAMCH( 'U' ) + Send e-mail to scalapack@cs.utk.edu --- scalapack-doc-1.5.orig/man/manl/pzhegvx.l +++ scalapack-doc-1.5/man/manl/pzhegvx.l @@ -1,6 +1,7 @@ .TH PZHEGVX l "12 May 1997" "LAPACK version 1.5" "LAPACK routine (version 1.5)" .SH NAME - +PZHEGVX - compute all the eigenvalues, and optionally, +the eigenvectors of a complex generalized Hermitian-definite eigenproblem .SH SYNOPSIS .TP 20 SUBROUTINE PZHEGVX( @@ -119,816 +120,501 @@ .ti +4 INTRINSIC ABS, DBLE, DCMPLX, ICHAR, MAX, MIN, MOD -.TP 20 -.ti +4 -IF( -BLOCK_CYCLIC_2D*CSRC_*CTXT_*DLEN_*DTYPE_*LLD_*MB_*M_*NB_*N_* -RSRC_.LT.0 )RETURN -.TP 20 -.ti +4 -ICTXT -= DESCA( CTXT_ ) -.TP 20 -.ti +4 -CALL -BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) -.TP 20 -.ti +4 -INFO -= 0 -.TP 20 -.ti +4 -IF( -NPROW.EQ.-1 ) THEN -.TP 20 -.ti +4 -INFO -= -( 900+CTXT_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CTXT_ ).NE.DESCB( CTXT_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+CTXT_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2600+CTXT_ ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -EPS -= PDLAMCH( DESCA( CTXT_ ), 'Precision' ) -.TP 20 -.ti +4 -WANTZ -= LSAME( JOBZ, 'V' ) -.TP 20 -.ti +4 -UPPER -= LSAME( UPLO, 'U' ) -.TP 20 -.ti +4 -ALLEIG -= LSAME( RANGE, 'A' ) -.TP 20 -.ti +4 -VALEIG -= LSAME( RANGE, 'V' ) -.TP 20 -.ti +4 -INDEIG -= LSAME( RANGE, 'I' ) -.TP 20 -.ti +4 -CALL -CHK1MAT( N, 4, N, 4, IA, JA, DESCA, 9, INFO ) -.TP 20 -.ti +4 -CALL -CHK1MAT( N, 4, N, 4, IB, JB, DESCB, 13, INFO ) -.TP 20 -.ti +4 -CALL -CHK1MAT( N, 4, N, 4, IZ, JZ, DESCZ, 26, INFO ) -.TP 20 -.ti +4 -IF( -INFO.EQ.0 ) THEN -.TP 20 -.ti +4 -IF( -MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN -.TP 20 -.ti +4 -RWORK( -1 ) = ABSTOL -.TP 20 -.ti +4 -IF( -VALEIG ) THEN -.TP 20 -.ti +4 -RWORK( -2 ) = VL -.TP 20 -.ti +4 -RWORK( -3 ) = VU -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -RWORK( -2 ) = ZERO -.TP 20 -.ti +4 -RWORK( -3 ) = ZERO -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -DGEBS2D( DESCA( CTXT_ ), 'ALL', ' ', 3, 1, RWORK, -3 ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -CALL -DGEBR2D( DESCA( CTXT_ ), 'ALL', ' ', 3, 1, RWORK, -3, 0, 0 ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IAROW -= INDXG2P( IA, DESCA( MB_ ), MYROW, DESCA( RSRC_ ), -NPROW ) -.TP 20 -.ti +4 -IBROW -= INDXG2P( IB, DESCB( MB_ ), MYROW, DESCB( RSRC_ ), -NPROW ) -.TP 20 -.ti +4 -IACOL -= INDXG2P( JA, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ), -NPCOL ) -.TP 20 -.ti +4 -IBCOL -= INDXG2P( JB, DESCB( NB_ ), MYCOL, DESCB( CSRC_ ), -NPCOL ) -.TP 20 -.ti +4 -IROFFA -= MOD( IA-1, DESCA( MB_ ) ) -.TP 20 -.ti +4 -ICOFFA -= MOD( JA-1, DESCA( NB_ ) ) -.TP 20 -.ti +4 -IROFFB -= MOD( IB-1, DESCB( MB_ ) ) -.TP 20 -.ti +4 -ICOFFB -= MOD( JB-1, DESCB( NB_ ) ) -.TP 20 -.ti +4 -LQUERY -= .FALSE. -.TP 20 -.ti +4 -IF( -LWORK.EQ.-1 .OR. LIWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) -LQUERY = .TRUE. -.TP 20 -.ti +4 -LIWMIN -= 6*MAX( N, ( NPROW*NPCOL )+1, 4 ) -.TP 20 -.ti +4 -NB -= DESCA( MB_ ) -.TP 20 -.ti +4 -NN -= MAX( N, NB, 2 ) -.TP 20 -.ti +4 -NP0 -= NUMROC( NN, NB, 0, 0, NPROW ) -.TP 20 -.ti +4 -IF( -( .NOT.WANTZ ) .OR. ( VALEIG .AND. ( .NOT.LQUERY ) ) ) -THEN -.TP 20 -.ti +4 -LWMIN -= N + MAX( NB*( NP0+1 ), 3 ) -.TP 20 -.ti +4 -LRWMIN -= 5*NN + 4*N -.TP 20 -.ti +4 -NEIG -= 0 -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IF( -ALLEIG .OR. VALEIG ) THEN -.TP 20 -.ti +4 -NEIG -= N -.TP 20 -.ti +4 -ELSE -IF( INDEIG ) THEN -.TP 20 -.ti +4 -NEIG -= IU - IL + 1 -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -MQ0 -= NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) -.TP 20 -.ti +4 -LWMIN -= N + ( NP0+MQ0+NB )*NB -.TP 20 -.ti +4 -LRWMIN -= 4*N + MAX( 5*NN, NP0*MQ0 ) + -ICEIL( NEIG, NPROW*NPCOL )*NN -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -IBTYPE.LT.1 .OR. IBTYPE.GT.3 ) THEN -.TP 20 -.ti +4 -INFO -= -1 -.TP 20 -.ti +4 -ELSE -IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN -.TP 20 -.ti +4 -INFO -= -2 -.TP 20 -.ti +4 -ELSE -IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN -.TP 20 -.ti +4 -INFO -= -3 -.TP 20 -.ti +4 -ELSE -IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN -.TP 20 -.ti +4 -INFO -= -4 -.TP 20 -.ti +4 -ELSE -IF( N.LT.0 ) THEN -.TP 20 -.ti +4 -INFO -= -5 -.TP 20 -.ti +4 -ELSE -IF( IROFFA.NE.0 ) THEN -.TP 20 -.ti +4 -INFO -= -7 -.TP 20 -.ti +4 -ELSE -IF( ICOFFA.NE.0 ) THEN -.TP 20 -.ti +4 -INFO -= -8 -.TP 20 -.ti +4 -ELSE -IF( DESCA( MB_ ).NE.DESCA( NB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 900+NB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( M_ ).NE.DESCB( M_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+M_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( N_ ).NE.DESCB( N_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+N_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( MB_ ).NE.DESCB( MB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+MB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( NB_ ).NE.DESCB( NB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+NB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( RSRC_ ).NE.DESCB( RSRC_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+RSRC_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CSRC_ ).NE.DESCB( CSRC_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+CSRC_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CTXT_ ).NE.DESCB( CTXT_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 1300+CTXT_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( M_ ).NE.DESCZ( M_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2200+M_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( N_ ).NE.DESCZ( N_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2200+N_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( MB_ ).NE.DESCZ( MB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2200+MB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( NB_ ).NE.DESCZ( NB_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2200+NB_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( RSRC_ ).NE.DESCZ( RSRC_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2200+RSRC_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CSRC_ ).NE.DESCZ( CSRC_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2200+CSRC_ ) -.TP 20 -.ti +4 -ELSE -IF( DESCA( CTXT_ ).NE.DESCZ( CTXT_ ) ) THEN -.TP 20 -.ti +4 -INFO -= -( 2200+CTXT_ ) -.TP 20 -.ti +4 -ELSE -IF( IROFFB.NE.0 .OR. IBROW.NE.IAROW ) THEN -.TP 20 -.ti +4 -INFO -= -11 -.TP 20 -.ti +4 -ELSE -IF( ICOFFB.NE.0 .OR. IBCOL.NE.IACOL ) THEN -.TP 20 -.ti +4 -INFO -= -12 -.TP 20 -.ti +4 -ELSE -IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN -.TP 20 -.ti +4 -INFO -= -15 -.TP 20 -.ti +4 -ELSE -IF( INDEIG .AND. ( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) ) -THEN -.TP 20 -.ti +4 -INFO -= -16 -.TP 20 -.ti +4 -ELSE -IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) -THEN -.TP 20 -.ti +4 -INFO -= -17 -.TP 20 -.ti +4 -ELSE -IF( VALEIG .AND. ( ABS( RWORK( 2 )-VL ).GT.FIVE*EPS* -ABS( VL ) ) ) THEN -.TP 20 -.ti +4 -INFO -= -14 -.TP 20 -.ti +4 -ELSE -IF( VALEIG .AND. ( ABS( RWORK( 3 )-VU ).GT.FIVE*EPS* -ABS( VU ) ) ) THEN -.TP 20 -.ti +4 -INFO -= -15 -.TP 20 -.ti +4 -ELSE -IF( ABS( RWORK( 1 )-ABSTOL ).GT.FIVE*EPS* -ABS( ABSTOL ) ) THEN -.TP 20 -.ti +4 -INFO -= -18 -.TP 20 -.ti +4 -ELSE -IF( LWORK.LT.LWMIN .AND. LWORK.NE.-1 ) THEN -.TP 20 -.ti +4 -INFO -= -28 -.TP 20 -.ti +4 -ELSE -IF( LRWORK.LT.LRWMIN .AND. LRWORK.NE.-1 ) THEN -.TP 20 -.ti +4 -INFO -= -30 -.TP 20 -.ti +4 -ELSE -IF( LIWORK.LT.LIWMIN .AND. LIWORK.NE.-1 ) THEN -.TP 20 -.ti +4 -INFO -= -32 -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM1( -1 ) = IBTYPE -.TP 20 -.ti +4 -IDUM2( -1 ) = 1 -.TP 20 -.ti +4 -IF( -WANTZ ) THEN -.TP 20 -.ti +4 -IDUM1( -2 ) = ICHAR( 'V' ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -2 ) = ICHAR( 'N' ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -2 ) = 2 -.TP 20 -.ti +4 -IF( -UPPER ) THEN -.TP 20 -.ti +4 -IDUM1( -3 ) = ICHAR( 'U' ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -3 ) = ICHAR( 'L' ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -3 ) = 3 -.TP 20 -.ti +4 -IF( -ALLEIG ) THEN -.TP 20 -.ti +4 -IDUM1( -4 ) = ICHAR( 'A' ) -.TP 20 -.ti +4 -ELSE -IF( INDEIG ) THEN -.TP 20 -.ti +4 -IDUM1( -4 ) = ICHAR( 'I' ) -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -4 ) = ICHAR( 'V' ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -4 ) = 4 -.TP 20 -.ti +4 -IF( -LQUERY ) THEN -.TP 20 -.ti +4 -IDUM1( -5 ) = -1 -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -IDUM1( -5 ) = 1 -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IDUM2( -5 ) = 5 -.TP 20 -.ti +4 -CALL -PCHK2MAT( N, 4, N, 4, IA, JA, DESCA, 9, N, 4, N, 4, IB, -JB, DESCB, 13, 5, IDUM1, IDUM2, INFO ) -.TP 20 -.ti +4 -CALL -PCHK1MAT( N, 4, N, 4, IZ, JZ, DESCZ, 26, 0, IDUM1, IDUM2, -INFO ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -WORK( -1 ) = DCMPLX( DBLE( LWMIN ) ) -.TP 20 -.ti +4 -RWORK( -1 ) = DBLE( LRWMIN ) -.TP 20 -.ti +4 -IWORK( -1 ) = LIWMIN -.TP 20 -.ti +4 -IF( -INFO.NE.0 ) THEN -.TP 20 -.ti +4 -CALL -PXERBLA( ICTXT, 'PZHEGVX ', -INFO ) -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -ELSE -IF( LQUERY ) THEN -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -PZPOTRF( UPLO, N, B, IB, JB, DESCB, INFO ) -.TP 20 -.ti +4 -IF( -INFO.NE.0 ) THEN -.TP 20 -.ti +4 -IFAIL( -1 ) = INFO -.TP 20 -.ti +4 -INFO -= IERRNPD -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -PZHEGST( IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, -DESCB, SCALE, INFO ) -.TP 20 -.ti +4 -CALL -PZHEEVX( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, VU, IL, -IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ, -WORK, LWORK, RWORK, LRWORK, IWORK, LIWORK, IFAIL, -ICLUSTR, GAP, INFO ) -.TP 20 -.ti +4 -IF( -WANTZ ) THEN -.TP 20 -.ti +4 -NEIG -= M -.TP 20 -.ti +4 -IF( -IBTYPE.EQ.1 .OR. IBTYPE.EQ.2 ) THEN -.TP 20 -.ti +4 -IF( -UPPER ) THEN -.TP 20 -.ti +4 -TRANS -= 'N' -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -TRANS -= 'C' -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -PZTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, -B, IB, JB, DESCB, Z, IZ, JZ, DESCZ ) -.TP 20 -.ti +4 -ELSE -IF( IBTYPE.EQ.3 ) THEN -.TP 20 -.ti +4 -IF( -UPPER ) THEN -.TP 20 -.ti +4 -TRANS -= 'C' -.TP 20 -.ti +4 -ELSE -.TP 20 -.ti +4 -TRANS -= 'N' -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -CALL -PZTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, -B, IB, JB, DESCB, Z, IZ, JZ, DESCZ ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -IF( -SCALE.NE.ONE ) THEN -.TP 20 -.ti +4 -CALL -DSCAL( N, SCALE, W, 1 ) -.TP 20 -.ti +4 -END -IF -.TP 20 -.ti +4 -RETURN -.TP 20 -.ti +4 -END .SH PURPOSE +PZHEGVX computes all the eigenvalues, and optionally, +the eigenvectors +of a complex generalized Hermitian-definite eigenproblem, of the form +sub( A )*x=(lambda)*sub( B )*x, sub( A )*sub( B )x=(lambda)*x, or +sub( B )*sub( A )*x=(lambda)*x. +Here sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ) is assumed to be +Hermitian, and sub( B ) denoting B( IB:IB+N-1, JB:JB+N-1 ) is assumed +to be Hermitian positive definite. +.SH NOTES +Each global data object is described by an associated description +vector. This vector stores the information required to establish +the mapping between an object element and its corresponding process +and memory location. +.br +Let A be a generic term for any 2D block cyclicly distributed array. +Such a global array has an associated description vector DESCA. +In the following comments, the character _ should be read as +"of the global array". +.br + +NOTATION STORED IN EXPLANATION +.br +--------------- -------------- -------------------------------------- +.br +DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case, + DTYPE_A = 1. +.br +CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating + the BLACS process grid A is distribu- + ted over. The context itself is glo- + bal, but the handle (the integer + value) may vary. +.br +M_A (global) DESCA( M_ ) The number of rows in the global + array A. +.br +N_A (global) DESCA( N_ ) The number of columns in the global + array A. +.br +MB_A (global) DESCA( MB_ ) The blocking factor used to distribute + the rows of the array. +.br +NB_A (global) DESCA( NB_ ) The blocking factor used to distribute + the columns of the array. +.br +RSRC_A (global) DESCA( RSRC_ ) The process row over which the first + row of the array A is distributed. +.br +CSRC_A (global) DESCA( CSRC_ ) The process column over which the + first column of the array A is + distributed. +.br +LLD_A (local) DESCA( LLD_ ) The leading dimension of the local + array. LLD_A >= MAX(1,LOCr(M_A)). +.br +Let K be the number of rows or columns of a distributed matrix, +and assume that its process grid has dimension p x q. +LOCr( K ) denotes the number of elements of K that a process +would receive if K were distributed over the p processes of its +process column. +Similarly, LOCc( K ) denotes the number of elements of K that a +process would receive if K were distributed over the q processes of +its process row. +The values of LOCr() and LOCc() may be determined via a call to the +ScaLAPACK tool function, NUMROC: +.br + LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), +.br + LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). +.br +An upper bound for these quantities may be computed by: +.br + LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A +.br + LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A + +.SH ARGUMENTS + +.tp 8 +IBTYPE (global input) INTEGER + Specifies the problem type to be solved: + = 1: sub( A )*x = (lambda)*sub( B )*x + = 2: sub( A )*sub( B )*x = (lambda)*x + = 3: sub( B )*sub( A )*x = (lambda)*x + +.tp 8 +JOBZ (global input) CHARACTER*1 + = 'N': Compute eigenvalues only; + = 'V': Compute eigenvalues and eigenvectors. + +.tp 8 +RANGE (global input) CHARACTER*1 + = 'A': all eigenvalues will be found. + = 'V': all eigenvalues in the interval [VL,VU] will be found. + = 'I': the IL-th through IU-th eigenvalues will be found. + +.tp 8 +UPLO (global input) CHARACTER*1 + = 'U': Upper triangles of sub( A ) and sub( B ) are stored; + = 'L': Lower triangles of sub( A ) and sub( B ) are stored. + +.tp 8 +N (global input) INTEGER + The order of the matrices sub( A ) and sub( B ). N >= 0. + +.tp 8 +A (local input/local output) COMPLEX*16 pointer into the + local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). + On entry, this array contains the local pieces of the + N-by-N Hermitian distributed matrix sub( A ). If UPLO = 'U', + the leading N-by-N upper triangular part of sub( A ) contains + the upper triangular part of the matrix. If UPLO = 'L', the + leading N-by-N lower triangular part of sub( A ) contains + the lower triangular part of the matrix. + +.br + On exit, if JOBZ = 'V', then if INFO = 0, sub( A ) contains + the distributed matrix Z of eigenvectors. The eigenvectors + are normalized as follows: + if IBTYPE = 1 or 2, Z**H*sub( B )*Z = I; + if IBTYPE = 3, Z**H*inv( sub( B ) )*Z = I. + If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') + or the lower triangle (if UPLO='L') of sub( A ), including + the diagonal, is destroyed. + +.tp 8 +IA (global input) INTEGER + The row index in the global array A indicating the first + row of sub( A ). + +.tp 8 +JA (global input) INTEGER + The column index in the global array A indicating the + first column of sub( A ). + +.tp 8 +DESCA (global and local input) INTEGER array of dimension DLEN_. + The array descriptor for the distributed matrix A. + If DESCA( CTXT_ ) is incorrect, PZHEGVX cannot guarantee + correct error reporting. + +.tp 8 +B (local input/local output) COMPLEX*16 pointer into the + local memory to an array of dimension (LLD_B, LOCc(JB+N-1)). + On entry, this array contains the local pieces of the + N-by-N Hermitian distributed matrix sub( B ). If UPLO = 'U', + the leading N-by-N upper triangular part of sub( B ) contains + the upper triangular part of the matrix. If UPLO = 'L', the + leading N-by-N lower triangular part of sub( B ) contains + the lower triangular part of the matrix. + +.br + On exit, if INFO <= N, the part of sub( B ) containing the + matrix is overwritten by the triangular factor U or L from + the Cholesky factorization sub( B ) = U**H*U or + sub( B ) = L*L**H. + +.tp 8 +IB (global input) INTEGER + The row index in the global array B indicating the first + row of sub( B ). + +.tp 8 +JB (global input) INTEGER + The column index in the global array B indicating the + first column of sub( B ). + +.tp 8 +DESCB (global and local input) INTEGER array of dimension DLEN_. + The array descriptor for the distributed matrix B. + DESCB( CTXT_ ) must equal DESCA( CTXT_ ) + +.tp 8 +VL (global input) DOUBLE PRECISION + If RANGE='V', the lower bound of the interval to be searched + for eigenvalues. Not referenced if RANGE = 'A' or 'I'. + +.tp 8 +VU (global input) DOUBLE PRECISION + If RANGE='V', the upper bound of the interval to be searched + for eigenvalues. Not referenced if RANGE = 'A' or 'I'. + +.tp 8 +IL (global input) INTEGER + If RANGE='I', the index (from smallest to largest) of the + smallest eigenvalue to be returned. IL >= 1. + Not referenced if RANGE = 'A' or 'V'. + +.tp 8 +IU (global input) INTEGER + If RANGE='I', the index (from smallest to largest) of the + largest eigenvalue to be returned. min(IL,N) <= IU <= N. + Not referenced if RANGE = 'A' or 'V'. + +.tp 8 +ABSTOL (global input) DOUBLE PRECISION + If JOBZ='V', setting ABSTOL to PDLAMCH( CONTEXT, 'U') yields + the most orthogonal eigenvectors. + +.br + The absolute error tolerance for the eigenvalues. + An approximate eigenvalue is accepted as converged + when it is determined to lie in an interval [a,b] + of width less than or equal to + ABSTOL + EPS * max( |a|,|b| ) , + where EPS is the machine precision. If ABSTOL is less than + or equal to zero, then EPS*norm(T) will be used in its place, + where norm(T) is the 1-norm of the tridiagonal matrix + obtained by reducing A to tridiagonal form. + +.br + Eigenvalues will be computed most accurately when ABSTOL is + set to twice the underflow threshold 2*PDLAMCH('S') not zero. + If this routine returns with ((MOD(INFO,2).NE.0) .OR. + (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or + eigenvectors did not converge, try setting ABSTOL to + 2*PDLAMCH('S'). + +.br + See "Computing Small Singular Values of Bidiagonal Matrices + with Guaranteed High Relative Accuracy," by Demmel and + Kahan, LAPACK Working Note #3. + +.br + See "On the correctness of Parallel Bisection in Floating + Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70 + +.tp 8 +M (global output) INTEGER + Total number of eigenvalues found. 0 <= M <= N. + +.tp 8 +NZ (global output) INTEGER + Total number of eigenvectors computed. 0 <= NZ <= M. + The number of columns of Z that are filled. + If JOBZ .NE. 'V', NZ is not referenced. + If JOBZ .EQ. 'V', NZ = M unless the user supplies + insufficient space and PZHEGVX is not able to detect this + before beginning computation. To get all the eigenvectors + requested, the user must supply both sufficient + space to hold the eigenvectors in Z (M .LE. DESCZ(N_)) + and sufficient workspace to compute them. (See LWORK below.) + PZHEGVX is always able to detect insufficient space without + computation unless RANGE .EQ. 'V'. + +.tp 8 +W (global output) DOUBLE PRECISION array, dimension (N) + On normal exit, the first M entries contain the selected + eigenvalues in ascending order. + +.tp 8 +ORFAC (global input) DOUBLE PRECISION + Specifies which eigenvectors should be reorthogonalized. + Eigenvectors that correspond to eigenvalues which are within + tol=ORFAC*norm(A) of each other are to be reorthogonalized. + However, if the workspace is insufficient (see LWORK), + tol may be decreased until all eigenvectors to be + reorthogonalized can be stored in one process. + No reorthogonalization will be done if ORFAC equals zero. + A default value of 10^-3 is used if ORFAC is negative. + ORFAC should be identical on all processes. + +.tp 8 +Z (local output) COMPLEX*16 array, + global dimension (N, N), + local dimension ( LLD_Z, LOCc(JZ+N-1) ) + If JOBZ = 'V', then on normal exit the first M columns of Z + contain the orthonormal eigenvectors of the matrix + corresponding to the selected eigenvalues. If an eigenvector + fails to converge, then that column of Z contains the latest + approximation to the eigenvector, and the index of the + eigenvector is returned in IFAIL. + If JOBZ = 'N', then Z is not referenced. + +.tp 8 +IZ (global input) INTEGER + The row index in the global array Z indicating the first + row of sub( Z ). + +.tp 8 +JZ (global input) INTEGER + The column index in the global array Z indicating the + first column of sub( Z ). + +.tp 8 +DESCZ (global and local input) INTEGER array of dimension DLEN_. + The array descriptor for the distributed matrix Z. + DESCZ( CTXT_ ) must equal DESCA( CTXT_ ) + +.tp 8 +WORK (local workspace/output) COMPLEX*16 array, + dimension (LWORK) + WORK(1) returns the optimal workspace. + +.tp 8 +LWORK (local input) INTEGER + Size of WORK array. If only eigenvalues are requested: + LWORK >= N + MAX( NB * ( NP0 + 1 ), 3 ) + If eigenvectors are requested: + LWORK >= N + ( NP0 + MQ0 + NB ) * NB + with NQ0 = NUMROC( NN, NB, 0, 0, NPCOL ). + +.br + For optimal performance, greater workspace is needed, i.e. + LWORK >= MAX( LWORK, N + NHETRD_LWOPT, + NHEGST_LWOPT ) + Where LWORK is as defined above, and + NHETRD_LWORK = 2*( ANB+1 )*( 4*NPS+2 ) + + ( NPS + 1 ) * NPS + NHEGST_LWOPT = 2*NP0*NB + NQ0*NB + NB*NB + +.br + NB = DESCA( MB_ ) + NP0 = NUMROC( N, NB, 0, 0, NPROW ) + NQ0 = NUMROC( N, NB, 0, 0, NPCOL ) + ICTXT = DESCA( CTXT_ ) + ANB = PJLAENV( ICTXT, 3, 'PZHETTRD', 'L', 0, 0, 0, 0 ) + SQNPC = SQRT( DBLE( NPROW * NPCOL ) ) + NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB ) + +.br + NUMROC is a ScaLAPACK tool functions; + PJLAENV is a ScaLAPACK envionmental inquiry function + MYROW, MYCOL, NPROW and NPCOL can be determined by calling + the subroutine BLACS_GRIDINFO. + +.br + If LWORK = -1, then LWORK is global input and a workspace + query is assumed; the routine only calculates the optimal + size for all work arrays. Each of these values is returned + in the first entry of the correspondingwork array, and no + error message is issued by PXERBLA. + +.tp 8 +RWORK (local workspace/output) DOUBLE PRECISION array, + dimension (LRWORK) + On return, RWORK(1) contains the amount of workspace + required for optimal efficiency + if JOBZ='N' RWORK(1) = optimal amount of workspace + required to compute eigenvalues efficiently + if JOBZ='V' RWORK(1) = optimal amount of workspace + required to compute eigenvalues and eigenvectors + efficiently with no guarantee on orthogonality. + If RANGE='V', it is assumed that all eigenvectors + may be required when computing optimal workspace. + +.tp 8 +LRWORK (local input) INTEGER + Size of RWORK + See below for definitions of variables used to define LRWORK. + If no eigenvectors are requested (JOBZ = 'N') then + LRWORK >= 5 * NN + 4 * N + If eigenvectors are requested (JOBZ = 'V' ) then + the amount of workspace required to guarantee that all + eigenvectors are computed is: + LRWORK >= 4*N + MAX( 5*NN, NP0 * MQ0 ) + + ICEIL( NEIG, NPROW*NPCOL)*NN + +.br + The computed eigenvectors may not be orthogonal if the + minimal workspace is supplied and ORFAC is too small. + If you want to guarantee orthogonality (at the cost + of potentially poor performance) you should add + the following to LRWORK: + (CLUSTERSIZE-1)*N + where CLUSTERSIZE is the number of eigenvalues in the + largest cluster, where a cluster is defined as a set of + close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) | + W(J+1) <= W(J) + ORFAC*2*norm(A) } + Variable definitions: + NEIG = number of eigenvectors requested + NB = DESCA( MB_ ) = DESCA( NB_ ) = DESCZ( MB_ ) = + DESCZ( NB_ ) + NN = MAX( N, NB, 2 ) + DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) = + DESCZ( CSRC_ ) = 0 + NP0 = NUMROC( NN, NB, 0, 0, NPROW ) + MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL ) + ICEIL( X, Y ) is a ScaLAPACK function returning + ceiling(X/Y) + +.br + When LRWORK is too small: + If LRWORK is too small to guarantee orthogonality, + PZHEGVX attempts to maintain orthogonality in + the clusters with the smallest + spacing between the eigenvalues. + If LRWORK is too small to compute all the eigenvectors + requested, no computation is performed and INFO=-25 + is returned. Note that when RANGE='V', PZHEGVX does + not know how many eigenvectors are requested until + the eigenvalues are computed. Therefore, when RANGE='V' + and as long as LRWORK is large enough to allow PZHEGVX to + compute the eigenvalues, PZHEGVX will compute the + eigenvalues and as many eigenvectors as it can. + +.br + Relationship between workspace, orthogonality & performance: + If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing + enough space to compute all the eigenvectors + orthogonally will cause serious degradation in + performance. In the limit (i.e. CLUSTERSIZE = N-1) + PZSTEIN will perform no better than ZSTEIN on 1 processor. + For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing + all eigenvectors will increase the total execution time + by a factor of 2 or more. + For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will + grow as the square of the cluster size, all other factors + remaining equal and assuming enough workspace. Less + workspace means less reorthogonalization but faster + execution. + +.br + If LRWORK = -1, then LRWORK is global input and a workspace + query is assumed; the routine only calculates the minimum + and optimal size for all work arrays. Each of these + values is returned in the first entry of the corresponding + work array, and no error message is issued by PXERBLA. + +.tp 8 +IWORK (local workspace) INTEGER array + On return, IWORK(1) contains the amount of integer workspace + required. + +.tp 8 +LIWORK (local input) INTEGER + size of IWORK + LIWORK >= 6 * NNP + Where: + NNP = MAX( N, NPROW*NPCOL + 1, 4 ) + If LIWORK = -1, then LIWORK is global input and a workspace + query is assumed; the routine only calculates the minimum + and optimal size for all work arrays. Each of these + values is returned in the first entry of the corresponding + work array, and no error message is issued by PXERBLA. + +.tp 8 +IFAIL (output) INTEGER array, dimension (N) + IFAIL provides additional information when INFO .NE. 0 + If (MOD(INFO/16,2).NE.0) then IFAIL(1) indicates the order of + the smallest minor which is not positive definite. + If (MOD(INFO,2).NE.0) on exit, then IFAIL contains the + indices of the eigenvectors that failed to converge. + +.br + If neither of the above error conditions hold and JOBZ = 'V', + then the first M elements of IFAIL are set to zero. + +.tp 8 +ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL) + This array contains indices of eigenvectors corresponding to + a cluster of eigenvalues that could not be reorthogonalized + due to insufficient workspace (see LWORK, ORFAC and INFO). + Eigenvectors corresponding to clusters of eigenvalues indexed + ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be + reorthogonalized due to lack of workspace. Hence the + eigenvectors corresponding to these clusters may not be + orthogonal. ICLUSTR() is a zero terminated array. + (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if + K is the number of clusters + ICLUSTR is not referenced if JOBZ = 'N' + +.tp 8 +GAP (global output) DOUBLE PRECISION array, + dimension (NPROW*NPCOL) + This array contains the gap between eigenvalues whose + eigenvectors could not be reorthogonalized. The output + values in this array correspond to the clusters indicated + by the array ICLUSTR. As a result, the dot product between + eigenvectors correspoding to the I^th cluster may be as high + as ( C * n ) / GAP(I) where C is a small constant. + +.tp 8 +INFO (global output) INTEGER + = 0: successful exit + < 0: If the i-th argument is an array and the j-entry had + an illegal value, then INFO = -(i*100+j), if the i-th + argument is a scalar and had an illegal value, then + INFO = -i. + > 0: if (MOD(INFO,2).NE.0), then one or more eigenvectors + failed to converge. Their indices are stored + in IFAIL. Send e-mail to scalapack@cs.utk.edu + if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding + to one or more clusters of eigenvalues could not be + reorthogonalized because of insufficient workspace. + The indices of the clusters are stored in the array + ICLUSTR. + if (MOD(INFO/4,2).NE.0), then space limit prevented + PZHEGVX from computing all of the eigenvectors + between VL and VU. The number of eigenvectors + computed is returned in NZ. + if (MOD(INFO/8,2).NE.0), then PZSTEBZ failed to + compute eigenvalues. + Send e-mail to scalapack@cs.utk.edu + if (MOD(INFO/16,2).NE.0), then B was not positive + definite. IFAIL(1) indicates the order of + the smallest minor which is not positive definite. --- scalapack-doc-1.5.orig/README +++ scalapack-doc-1.5/README @@ -16,7 +16,7 @@ If it says that MANPATH is undefined, type: - % setenv MANPATH /usr/man:/usr/local/man:/user_path/man_dir + % setenv MANPATH /usr/man:/usr/share/man:/usr/local/man:/user_path/man_dir Otherwise, if MANPATH is defined, type: --- scalapack-doc-1.5.orig/debian/changelog +++ scalapack-doc-1.5/debian/changelog @@ -0,0 +1,87 @@ +scalapack-doc (1.5-10) unstable; urgency=low + + * New maintainer (Closes: #335007) + * Moved to debhelper v5. + * Bumped standards version to 3.7.2 + * dh_installman instead of deprecated dh_installmanpages. + * Build-Depends instead of Build-Depends-Indep into control file. + * Changed to doc section. Linda doesn't complaint. + + -- Muammar El Khatib Mon, 18 Dec 2006 22:30:01 -0400 + +scalapack-doc (1.5-9) unstable; urgency=low + + * Rewrote man pages + pcheevx.l pchegvx.l pdsyev.l pdsyevx.l pdsygvx.l pssyev.l pssyevx.l + pssygvx.l pzheevx.l pzhegvx.l + which were completely useless (just crap inside). + Closes: #215370 + * Bumped standards version. + * Adapted package description. + + -- Philipp Frauenfelder Sat, 25 Oct 2003 16:35:41 +0200 + +scalapack-doc (1.5-8) unstable; urgency=low + + * Changed section to devel and priority to optional + * Changed README to reflect FHS. Closes: #127107 + + -- Philipp Frauenfelder Mon, 31 Dec 2001 12:51:33 +0100 + +scalapack-doc (1.5-7) unstable; urgency=low + + * Typo in debian/control. Closes: #125336 + * New standards version + * Changed paths in doc-base files. + + -- Philipp Frauenfelder Sun, 23 Dec 2001 16:45:49 +0100 + +scalapack-doc (1.5-6) unstable; urgency=low + + * Added the Scalapack User' Guide (SLUG) and the FAQ, both + in html. Closes: #69814 + * Support doc-base for all documentation. + * Standards-Version is now 3.2.0 + + -- Philipp Frauenfelder Fri, 25 Aug 2000 09:36:29 +0200 + +scalapack-doc (1.5-5) frozen unstable; urgency=low + + * Upload to frozen too. + + -- Philipp Frauenfelder Sun, 26 Mar 2000 13:37:53 +0200 + +scalapack-doc (1.5-4) unstable; urgency=low + + * Removed recommendation of obsolete packages. + + -- Philipp Frauenfelder Tue, 21 Dec 1999 18:41:19 +0100 + +scalapack-doc (1.5-3) unstable; urgency=low + + * Rebuilt with new debhelper to support FSH. + Standards-Version is now 3.0.1. + + -- Philipp Frauenfelder Tue, 26 Oct 1999 08:53:46 +0200 + +scalapack-doc (1.5-2) unstable; urgency=low + + * Moved package to the doc section as requested bz David Rocher. + Closes: #38886 + * Changed priority from optional to extra to stay in sync with scalapack. + * Moved to debhelper v2. + * Changed location of man pages to /usr/share/man/man3, + package documentation goes to ...? + * Bumped standards version to 3.0.1. + + -- Philipp Frauenfelder Thu, 19 Aug 1999 11:14:04 +0200 + +scalapack-doc (1.5-1) unstable; urgency=low + + * Initial release. + + -- Philipp Frauenfelder Sat, 24 Apr 1999 09:42:50 +0200 + +Local variables: + mode: debian-changelog +End: --- scalapack-doc-1.5.orig/debian/control +++ scalapack-doc-1.5/debian/control @@ -0,0 +1,19 @@ +Source: scalapack-doc +Section: doc +Priority: optional +Maintainer: Muammar El Khatib +Standards-Version: 3.7.2 +Build-Depends: debhelper (>= 5) + +Package: scalapack-doc +Architecture: all +Description: Scalable Linear Algebra Package Documentation + ScaLAPACK is the parallel version of LAPACK. It depends on PVM or MPI. + . + This package provides the man pages for the routines in the + ScaLAPACK library (see package scalapack1-pvm, scalapack1-mpich or + scalapack1-lam) and a quick reference for PBLAS and ScaLAPACK. PBLAS + is the library for Parallel Basic Linear Algebra Subprograms included in + ScaLAPACK. + . + Also included: ScaLAPACK Users' Guide (SLUG) and the FAQ on ScaLAPACK. --- scalapack-doc-1.5.orig/debian/dirs +++ scalapack-doc-1.5/debian/dirs @@ -0,0 +1,3 @@ +usr/share/man/man3 +usr/share/doc/scalapack-doc/html/slug +usr/share/doc-base --- scalapack-doc-1.5.orig/debian/rules +++ scalapack-doc-1.5/debian/rules @@ -0,0 +1,67 @@ +#! /usr/bin/make -f +# Made with the aid of debmake, by Christoph Lameter, +# based on the sample debian/rules file for GNU hello by Ian Jackson. +# Handmodified by P. Frauenfelder for debhelper support, 5 Sept 1998 + +d=debian/scalapack-doc + +# export DH_VERBOSE=1 +export DH_COMPAT=5 + +build: build-stamp + +build-stamp: + dh_testdir + touch build-stamp + +clean: + dh_testdir + dh_testroot + rm -f build-stamp + dh_clean + +binary-indep: build + dh_testdir + dh_testroot + dh_clean -k + dh_installdirs + + install -m 644 pblasqref.ps scalapackqref.ps \ + `pwd`/$(d)/usr/share/doc/scalapack-doc + install -m 644 html/faq.html \ + `pwd`/$(d)/usr/share/doc/scalapack-doc/html + install -m 644 html/slug/* \ + `pwd`/$(d)/usr/share/doc/scalapack-doc/html/slug + install -m 644 debian/scalapack-faq `pwd`/$(d)/usr/share/doc-base/ + install -m 644 debian/scalapack-pblasqref \ + `pwd`/$(d)/usr/share/doc-base/ + install -m 644 debian/scalapack-scalapackqref \ + `pwd`/$(d)/usr/share/doc-base/ + install -m 644 debian/scalapack-slug `pwd`/$(d)/usr/share/doc-base/ + + dh_installdocs + +# rename man pages and fix apropos description +# copy and paste from lapack-doc_2.0.1-3.2's rules file + (cd man/manl; ls -1 *) | ( \ + echo 's=man/manl; d=$(d)/usr/share/man/man3'; \ + sed -e 's,\(.*\)\.l$$,sed -f debian/sed $$s/\1.l > $$d/\1.3,' \ + ) | sh + + dh_installman + dh_installchangelogs + dh_compress + dh_fixperms + dh_installdeb + dh_gencontrol + dh_md5sums + dh_builddeb + +binary-arch: build + +source diff: + @echo >&2 'source and diff are obsolete - use dpkg-source -b'; false + +binary: binary-indep binary-arch + +.PHONY: binary binary-arch binary-indep clean build --- scalapack-doc-1.5.orig/debian/copyright +++ scalapack-doc-1.5/debian/copyright @@ -0,0 +1,37 @@ +This package was debianized by Philipp Frauenfelder on +Sat, 24 Apr 1999 09:42:50 +0200 + +Quoting from http://www.netlib.org/scalapack/ + + The ScaLAPACK project is a collaborative effort involving several + institutions: + + Oak Ridge National Laboratory + Rice University + University of California, Berkeley + University of California, Los Angeles + University of Illinois + University of Tennessee, Knoxville + +It was downloaded from +http://www.netlib.org/scalapack/ + +Copyright notice, quoting from http://www.netlib.org/scalapack/faq.html + +1.4) Are there legal restrictions on the use of ScaLAPACK software? + + ScaLAPACK (like LINPACK, EISPACK, LAPACK, etc) is a freely-available + software package. It is available from netlib via anonymous ftp and + the World Wide Web. It can, and is, being included in commercial + packages (e.g., IBM's Parallel ESSL, NAG Numerical PVM and MPI + Library). We only ask that proper credit be given to the authors. + + Like all software, it is copyrighted. It is not trademarked, but we do + ask the following: + + If you modify the source for these routines we ask that you change the + name of the routine and comment the changes made to the original. + + We will gladly answer any questions regarding the software. If a + modification is done, however, it is the responsibility of the person + who modified the routine to provide support. --- scalapack-doc-1.5.orig/debian/postinst +++ scalapack-doc-1.5/debian/postinst @@ -0,0 +1,14 @@ +#!/bin/sh + +set -e + +if [ "$1" = configure ]; then + if command -v install-docs >/dev/null 2>&1; then + install-docs -i /usr/share/doc-base/scalapack-faq + install-docs -i /usr/share/doc-base/scalapack-pblasqref + install-docs -i /usr/share/doc-base/scalapack-scalapackqref + install-docs -i /usr/share/doc-base/scalapack-slug + fi +fi + +#DEBHELPER# --- scalapack-doc-1.5.orig/debian/sed +++ scalapack-doc-1.5/debian/sed @@ -0,0 +1,4 @@ +/^.SH NAME/{ +n +s/ - / \\- / +} --- scalapack-doc-1.5.orig/debian/prerm +++ scalapack-doc-1.5/debian/prerm @@ -0,0 +1,14 @@ +#!/bin/sh + +set -e + +if [ "$1" = remove -o "$1" = upgrade ]; then + if command -v install-docs >/dev/null 2>&1; then + install-docs -r scalapack-faq + install-docs -r scalapack-slug + install-docs -r scalapack-pblasqref + install-docs -r scalapack-scalapackqref + fi +fi + +#DEBHELPER# --- scalapack-doc-1.5.orig/debian/scalapack-scalapackqref +++ scalapack-doc-1.5/debian/scalapack-scalapackqref @@ -0,0 +1,7 @@ +Document: scalapack-scalapackqref +Title: ScaLAPACK Quick Reference Guide to the Driver Routines +Abstract: Quick reference on two pages for ScaLAPACK routines. + Only in Fortran. +Section: Apps/Programming +Format: PS +Files: /usr/share/doc/scalapack-doc/scalapackqref.ps.gz --- scalapack-doc-1.5.orig/debian/scalapack-pblasqref +++ scalapack-doc-1.5/debian/scalapack-pblasqref @@ -0,0 +1,7 @@ +Document: scalapack-pblasqref +Title: Parallel Basic Linear Algebra Suprograms, A Quick Reference Guide +Abstract: Quick reference on two pages for PBLAS routines. + Only in Fortran. +Section: Apps/Programming +Format: PS +Files: /usr/share/doc/scalapack-doc/pblasqref.ps.gz --- scalapack-doc-1.5.orig/debian/scalapack-slug +++ scalapack-doc-1.5/debian/scalapack-slug @@ -0,0 +1,7 @@ +Document: scalapack-slug +Title: ScaLAPACK Users' Guide +Abstract: Detailed description of ScaLAPACK for users of the library +Section: Apps/Programming +Format: HTML +Index: /usr/share/doc/scalapack-doc/html/slug/index.html +Files: /usr/share/doc/scalapack-doc/html/slug/* --- scalapack-doc-1.5.orig/debian/scalapack-faq +++ scalapack-doc-1.5/debian/scalapack-faq @@ -0,0 +1,7 @@ +Document: scalapack-faq +Title: ScaLAPACK Frequently Asked Questions +Abstract: Questions and answers on ScaLAPACK covering ScaLAPACK, BLACS and BLAS +Section: Apps/Programming +Format: HTML +Index: /usr/share/doc/scalapack-doc/html/faq.html +Files: /usr/share/doc/scalapack-doc/html/faq.html