diff -Nru theseus-2.0.6/Cds.h theseus-3.0.0/Cds.h --- theseus-2.0.6/Cds.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/Cds.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,7 +31,18 @@ #include "PDBCds.h" -typedef struct Algorithm_ +typedef struct Algorithm Algorithm; +typedef struct Statistics Statistics; +typedef struct StCds StCds; +typedef struct StCdsArray StCdsArray; +typedef struct CdsParams CdsParams; +typedef struct Params Params; +typedef struct Priors Priors; +typedef struct Cds Cds; +typedef struct CdsArray CdsArray; + + +struct Algorithm { char cmdline[1024]; /* copy of the command line */ int argc; @@ -39,7 +50,6 @@ char **infiles; /* an array of the input files listed on the command line */ int filenum; /* number of input files */ char rootname[FILENAME_MAX]; - int method; /* Kabsch, Kearsley, Horn, or SVD derivative superposition algorithms */ int weight; /* weighting method */ int verbose; /* lots of output */ double precision; /* requested relative precision to converge to */ @@ -47,14 +57,11 @@ int rounds; /* running counter of rounds of the outer loop of MultiPose() */ int innerrounds; /* running counter of rounds of the inner loop of MultiPose() */ double milliseconds; /* how long the calculation took */ - int print_weight; /* flag to print weights */ - int print_trans; /* flag to print translations */ int write_file; /* flag to write output pdb file, default = 1 = yes */ int atoms; /* flag for atom types to include in superposition, CA, CB, backbone, P, etc. */ - int reflection; char *selection; /* character array holding user input for residues/alignment columns to include */ char *atomslxn; /* character array holding user input for atom types to include */ - int revsel; /* reverse the sense of the residues to select in ->selection above (i.e. exclude them) */ + int revsel; /* reverse the sense of the residues to select in selection above (i.e. exclude them) */ int embedave; /* flag to initialize the algorithm with an embedded average (distance geometry) */ int landmarks; /* # of landmarks (Dryden files) */ int writestats; /* flag to write out stat files */ @@ -71,21 +78,20 @@ int princaxes; /* flag to align final superposition with principle axes of mean structure */ int nullrun; int binary; /* flag to read and write binary structure files */ - int modelpca; - double raxes[3]; int mbias; /* flag to calculate bias-corrected mean */ - int notrans; - int norot; + int domp; /* don't initialize Bayes w/MultiPose ML */ + int doave; /* don't calculate an average structure */ + int dotrans; + int dorot; + int dohierarch; + int docovars; int alignment; /* flag for superimposing based on a sequence alignment */ - int dimweight; /* flag to do dimensionally (axially) weighting */ int covweight; /* flag to do atomic, row-wise covariance matrix weighting */ int varweight; /* flag to do variance weighing (i.e., a diagonal covariance matrix) */ int leastsquares; /* flag to do classical least squares, all variances equal, no covars */ int hierarch; /* flag to use hierarchical variances, e.g. inverse gamma distributed vars */ int fmodel; /* read only first or all models in a pdb file */ - int noave; /* don't calculate an average structure */ int noinnerloop; /* don't iterate the inner loop */ - int htrans; /* flag to calculate hierarchical translations */ int fasta; /* flag to write out FASTA sequence files for each PDB model read in */ int olve; /* Olve Peersen's pet requests */ int abort; @@ -99,10 +105,8 @@ double param[2]; /* Random generation of structures, params for inverse gamma */ double radii[3]; /* Random generation of structures, radii of gyration for generating mean forms */ int ssm; - int lele5; int bayes; int ipmat; - int commandeur; /* Commandeur algorithm for missing atom translations */ int missing; int scale; /* calculate scale factors for each structure */ int instfile; /* print out PDB files in each internal round of the MultiPose algorithm */ @@ -110,68 +114,77 @@ int amber; /* switch for special treatment of AMBER8 PDB formatted files */ int atom_names; double scalefactor; /* value to scale all structures by */ -} Algorithm; + int morphfile; /* flag to read and write morphometric .tps files */ + int scaleanchor; /* model index for relative scaling -- scale of this model = 1 */ + int randgibbs; /* Randomly initialize GibbsMet */ + double covnu; +}; -typedef struct Statistics_ +struct Statistics { double stddev; /* combined standard deviation of all atomic positions */ double var; double phi; double alpha; - double starting_paRMSD, starting_pawRMSD; /* some stats for initial superposition */ - double starting_mlRMSD, starting_ave_wRMSD_from_mean; + double starting_paRMSD, starting_pawRMSD; /* stats for initial superposition */ + double starting_mlRMSD; double starting_stddev, starting_logL; double ave_paRMSD; /* average pairwise RMSD */ double ave_pawRMSD; /* average weighted pairwise RMSD */ double RMSD_from_mean; /* average RMSD from the mean structure */ - double wRMSD_from_mean; /* average weighted RMSD from the mean structure */ double mlRMSD; /* max lik RMSD, actually a sigma */ - double anova_RMSD, anova_AIC, anova_logL; - double ave_ref_wRMSD_from_mean, refl_RMSD; - double KSp, Fp, signp, wilcoxonp, dw; /* some frequentist stats */ - double logL, AIC, BIC, nparams, ndata, chi2; /* likelihood statistics */ + double logL, mlogL, AIC, BIC, nparams, ndata, chi2; /* likelihood stats */ double skewness[4]; /* for x, y, z residuals and total */ double kurtosis[4]; double SES, SEK; - double condition_num; /* condition number of the covariance matrix */ int median; /* index of structure closest to mean */ - double trace_inv_sigma; double wtnorm; /* normalization factor for atomic row-wise weight matrix */ - double hierarch_p1, hierarch_p2; /* parameters of the PDF for hierarchical variances */ - double htrans_ave, htrans_var; /* parameters of Gaussian for hierarhcial translations */ + double hierarch_p1, hierarch_p2; /* parameters of the hierarchical variance PDF */ double hierarch_chi2;/* chi^2 value for fit of hierarchical variances */ double hierarch_chi2_P; /* P-value */ - double htrans_chi2; /* chi^2 value for fit of hierarchical translations to Gaussian */ - double htrans_chi2_P; /* P-value */ - double omnibus_chi2; /* overall chi^2, including hierarchical stuff and overall fit */ + double omnibus_chi2; /* overall chi^2, including hierarchical and overall fit */ double omnibus_chi2_P; /* P-value */ double precision; /* actual precision to which the algorithm converged */ - double fperr; /* empirically determined floating point error of superposition */ - double minvar; /* empirically determined theoretical minimum variance */ - double lsvar; /* least-squares variances, homoscedastic */ - double mglogl; -} Statistics; +}; -/* Cds is for holding working sets of coordinates */ -typedef struct Cds_ +/* StCds is for holding static working sets of coordinates */ +struct StCds { - char filename[FILENAME_MAX]; int model; /* model number, not really used */ int vlen; /* number of coordinates */ int aalen; /* number of real residues, no gaps, used for CA alignments */ - char **resName; /* residue name */ - char *chainID; /* chain ID */ - int *resSeq; /* residue number */ - double *x, *y, *z; /* x,y,z atomic coordinates */ double *o; /* occupancy */ double *b; /* B-factor */ + /* not to be accessed - for space only */ + char *resName_space; +}; + + +struct StCdsArray +{ + int vlen; /* number of coordinates */ + int cnum; /* number of Cds in array */ + + Cds **cds; /* pointer to an array of pointers to Cds */ + Cds *avecds; /* average Cds of all in CdsArray */ + + double *evals; + double *samplevar3N; /* atomic sample variances */ + double **CovMat; /* the atomic, row-wise covariance matrix */ +}; + + +struct CdsParams +{ + int vlen; /* number of coordinates */ + double *prvar; /* prior variances */ double *residual_x, *residual_y, *residual_z; @@ -179,6 +192,7 @@ double **matrix; /* 3x3 rotation matrix */ double **last_matrix; /* temp 3x3 rotation matrix */ + double **last_outer_matrix; /* temp 3x3 rotation matrix */ double radgyr; /* radius of gyration */ double **innerprod; /* vlen x vlen inner product matrix */ @@ -187,56 +201,143 @@ double center[3]; /* weighted centroid of coordinates */ double last_center[3]; /* temp centroid of coordinates */ double translation[3]; /* translation vector, based on weighted center */ - double transsum[3]; - double jktranslation[3]; double RMSD_from_mean; /* rmsd from the mean structure */ double wRMSD_from_mean; /* weighted rmsd from mean structure */ double ref_wRMSD_from_mean; double evals[4]; /* quaternion evals (residual sums) */ double **evecs; /* 4x4 quaternion evecs (rotation vectors) */ +}; - double **tmpmat1, **tmpmat2; /* a bunch of scratch matrices and vectors to be passed around */ - double **tmpmatKK1; /* must be careful that these aren't doubly accessed by subroutines */ - double **tmpmatKK2; - double *tmpvecK; - double **tmpmat3K, **tmpmatK3a, **tmpmatK3b; - double **tmpmat3a, **tmpmat3b, **tmpmat3c, **tmpmat3d; /* 3x3 scratch matrices */ - double tmpvec3a[3]; + +struct Params +{ + int vlen; /* number of coordinates */ + int cnum; /* number of Cds in array */ + + CdsParams **cdsp; /* array of coords parameters */ + + Cds *avecds; /* average Cds of all in CdsArray */ + Cds *tcds; /* target Cds */ + + double *w; /* diagonal atomic weights */ + double *var; /* atomic variance estimates */ + double *evals; + double *samplevar3N; /* atomic sample variances */ + int *df; /* degrees of freedom for variances, for incomplete data alignments */ + double *S2; /* theoretical NMR order parameters */ + + double *residuals; /* 3 x vlen x cnum vector of normalized residuals */ + + double **Var_matrix; /* the variances of the distances in distmat */ + double **Dij_matrix; /* average distance distance matrix for the CdsArray */ + Matrix3D *distmat; + double **CovMat; /* the atomic, row-wise covariance matrix */ + double **WtMat; /* inverse of the CovMat */ + double **FullCovMat; + + double **pcamat; /* vlen x vlen sized matrix for PC eigenvectors */ + double *pcavals; /* PCA eigenvalues */ +}; + + +struct Priors +{ + int vlen; /* number of coordinates */ + int cnum; /* number of Cds in array */ + + double alpha; + + Cds *meancds; /* mean Cds of all in CdsArray */ + + double *prvar; + double *prevals; + double **PrCovMat; /* the atomic, row-wise covariance matrix */ + double **PrInvCovMat; /* inverse of the PrCovMat */ +}; + + +/* Cds is for holding working sets of coordinates */ +struct Cds +{ + char filename[FILENAME_MAX]; + int model; /* model number, not really used */ + int vlen; /* number of coordinates */ + int aalen; /* number of real residues, no gaps, used for CA alignments */ + + char **resName; /* residue name */ + char *chainID; /* chain ID */ + int *resSeq; /* residue number */ + + double **wc; /* 3 x K matrix matrix of working coordinates, aliased to x,y,z below */ + double *x, *y, *z; /* x,y,z atomic coordinates */ + double *o; /* occupancy */ + double *b; /* B-factor */ + + int *nu, *mu; /* binary flag vectors for present and missing data, respectively */ + + double **sc; /* 3 x K matrix matrix of static coordinates, aliased to sx,sy,sz below */ + double *sx, *sy, *sz; /* x,y,z atomic coordinates */ + double *so; /* occupancy */ + double *sb; /* B-factor */ + + double **cc; /* inv covariance weighted coordinates */ + double *covx, *covy, *covz; /* inv covariance matrix weighted x,y,z cds */ + + double *prvar; /* prior variances */ + + double *residual_x, *residual_y, *residual_z; + + double **matrix; /* 3x3 rotation matrix */ + double **last_matrix; /* temp 3x3 rotation matrix */ + double **last_outer_matrix; /* temp 3x3 rotation matrix */ + + double radgyr; /* radius of gyration */ + double **outerprod; /* vlen x vlen outer product matrix */ + double **innerprod; /* 3 x 3 inner product matrix */ + + double center[3]; /* weighted centroid of coordinates */ + double last_center[3]; /* temp centroid of coordinates */ + double translation[3]; /* translation vector, based on weighted center */ + double RMSD_from_mean; /* rmsd from the mean structure */ + double wRMSD_from_mean; /* weighted rmsd from mean structure */ + double evals[4]; /* quaternion evals (residual sums) */ + double **evecs; /* 4x4 quaternion evecs (rotation vectors) */ double bfact_c; double scale; /* not to be accessed - for space only */ char *resName_space; -} Cds; +}; /* CdsArray is an array of Cds, plus a bunch of stuff necessary to do the ML superposition for this family of Cds. */ -typedef struct Cds_Array +struct CdsArray { - char outfile_name[FILENAME_MAX]; - int vlen; /* number of coordinates */ - int cnum; /* number of Cds in array */ - char *anchorf_name; - char *mapfile_name; - char *msafile_name; - struct PDB_Cds_Array *pdbA; /* associated PDBCdsArray */ - struct Cds_Array *scratchA; /* associated scratch array of Cds */ - - Cds **cds; /* pointer to an array of pointers to Cds */ - Cds *avecds; /* average Cds of all in CdsArray */ - Cds *tcds; /* target Cds */ - Cds *jkcds; /* average bootstrapped Cds for SuperJack() */ - - double *w; /* diagonal atomic weights */ - double *var; /* atomic variances */ - int *df; /* degrees of freedom for variances, used for incomplete data alignments */ - double *S2; /* theoretical NMR order parameters */ + struct PDBCdsArray *pdbA; /* associated PDBCdsArray */ + struct CdsArray *scratchA; /* associated scratch array of Cds */ - Algorithm *algo; - Statistics *stats; + char outfile_name[FILENAME_MAX]; + int vlen; /* number of coordinates */ + int cnum; /* number of Cds in array */ + char *anchorf_name; + char *mapfile_name; + char *msafile_name; + + Cds **cds; /* pointer to an array of pointers to Cds */ + Cds *avecds; /* average Cds of all in CdsArray */ + double **ac; /* average coords matrix */ + Cds *tcds; /* target Cds */ + double **tc; + + double *w; /* diagonal atomic weights */ + double *var; /* atomic variance estimates */ + double *evals; + double *samplevar3N; /* atomic sample variances */ + int *df; /* degrees of freedom for variances, used for incomplete data alignments */ + double *S2; /* theoretical NMR order parameters */ double *residuals; /* 3 x vlen x cnum vector of normalized residuals */ @@ -246,21 +347,24 @@ double **CovMat; /* the atomic, row-wise covariance matrix */ double **WtMat; /* normalized inverse of the CovMat */ double **FullCovMat; - double **MVCovMat; /* a 3x3 matrix */ - double **SCovMat; /* a cnum x cnum matrix */ double **pcamat; /* vlen x vlen sized matrix for principle component eigenvectors */ double *pcavals; /* PCA eigenvalues */ - double **modpcamat; /* cnum x cnum sized matrix for model principle component eigenvectors */ - double *modpcavals; /* model PCA eigenvalues */ - double **tmpmat1, **tmpmat2; /* a bunch of scratch matrices and vectors to be passed around */ double **tmpmatKK1; /* must be careful that these aren't accesses by subroutines */ double **tmpmatKK2; - double *tmpvecK; - double **tmpmat3K, **tmpmatK3a, **tmpmatK3b; double **tmpmat3a, **tmpmat3b, **tmpmat3c, **tmpmat3d; /* 3x3 scratch matrices */ - double tmpvec3a[3]; -} CdsArray; + double *tmpvecK; + double *tmpvec3a; +}; + + +/* global declarations (necessary for leave(), I think) */ +extern CdsArray *baseA; /* main array of selected pdb cds, never modified */ +extern PDBCdsArray *pdbA; /* array holding all of the pdb file coordinate info, + much of it unused in the actual calculations */ +extern Algorithm *algo; +extern Statistics *stats; #endif + Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/._CovMat.c and /tmp/g2bOMTRwaC/theseus-3.0.0/._CovMat.c differ diff -Nru theseus-2.0.6/CovMat.c theseus-3.0.0/CovMat.c --- theseus-2.0.6/CovMat.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/CovMat.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -80,15 +80,6 @@ if (cdsA->tmpmatKK2 == NULL) cdsA->tmpmatKK2 = MatAlloc(vlen, vlen); - - if (cdsA->tmpmatK3a == NULL) - cdsA->tmpmatK3a = MatAlloc(vlen, 3); - - if (cdsA->tmpmatK3b == NULL) - cdsA->tmpmatK3b = MatAlloc(vlen, 3); - - if (cdsA->tmpmat3K == NULL) - cdsA->tmpmat3K = MatAlloc(3, vlen); } @@ -96,33 +87,36 @@ int CheckZeroVariances(CdsArray *cdsA) { - Algorithm *algo = cdsA->algo; int i, zeroflag = 1; - if (algo->varweight != 0) + if (algo->varweight) { for (i = 0; i < cdsA->vlen; ++i) if (cdsA->var[i] > DBL_EPSILON) zeroflag = 0; } - else if (algo->covweight != 0) + else if (algo->covweight) { for (i = 0; i < cdsA->vlen; ++i) if (cdsA->CovMat[i][i] > DBL_EPSILON) zeroflag = 0; } + else if (algo->leastsquares) + { + zeroflag = 0; + } return(zeroflag); -/* if (zeroflag == 1) */ +/* if (zeroflag) */ /* { */ -/* double var = cdsA->stats->wRMSD_from_mean * cdsA->stats->wRMSD_from_mean; */ +/* double var = stats->wRMSD_from_mean * stats->wRMSD_from_mean; */ /* */ -/* if (algo->varweight != 0) */ +/* if (algo->varweight) */ /* { */ /* memsetd(cdsA->var, var, cdsA->vlen); */ /* } */ -/* else if (algo->covweight != 0) */ +/* else if (algo->covweight) */ /* { */ /* for (i = 0; i < cdsA->vlen; ++i) */ /* cdsA->CovMat[i][i] = var; */ @@ -135,88 +129,42 @@ CalcBfactC(CdsArray *cdsA) { int i, j; - double trBS, occsum; + double trBS, nusum; for (i = 0; i < cdsA->cnum; ++i) { - trBS = occsum = 0.0; + trBS = nusum = 0.0; for (j = 0; j < cdsA->vlen; ++j) { - if (cdsA->cds[i]->o[j] > 0) + if (cdsA->cds[i]->nu[j]) { - occsum += 1.0; + nusum += 1; trBS += cdsA->cds[i]->prvar[j] / cdsA->var[j]; /*printf("trBS[%d] = % f\n", j, cdsA->cds[i]->prvar[j] / cdsA->var[j]);*/ } } - cdsA->cds[i]->bfact_c = occsum / trBS; + cdsA->cds[i]->bfact_c = nusum / trBS; /*printf("bfact_c[%d] = % f\n", i, cdsA->cds[i]->bfact_c);*/ } } -/* Weighting by dimensional, axial Xi covariance matrix, here diagonal. */ /* Holding the superposition constant, calculates the covariance - matrices and corresponding weight matrices, looping till + matrices and corresponding weight matrices, looping till convergence. */ void CalcCovariances(CdsArray *cdsA) { - Algorithm *algo = cdsA->algo; - - if (algo->varweight != 0 || algo->leastsquares != 0) - { - if (algo->alignment == 1) - VarianceCdsOcc(cdsA); - else - VarianceCds(cdsA); - } - else if (algo->covweight != 0) - CalcCovMat(cdsA); -} - - -void -MVCovMat(CdsArray *cdsA) -{ - int i, j; - const int cnum = cdsA->cnum, vlen = cdsA->vlen; - const double idf = 1.0 / (double)(cnum * vlen); - const Cds **cds = (const Cds **) cdsA->cds; - const Cds *cdsi; - double **MVCovMat = NULL; - - if (cdsA->MVCovMat == NULL) - MVCovMat = cdsA->MVCovMat = MatAlloc(3, 3); - else - MVCovMat = cdsA->MVCovMat; - - for (i = 0; i < 3; ++i) - for (j = 0; j < 3; ++j) - MVCovMat[i][j] = 0.0; - - for (i = 0; i < cnum; ++i) + if (algo->varweight || algo->leastsquares) { - for (j = 0; j < vlen; ++j) - { - cdsi = (Cds *) cds[i]; - MVCovMat[0][0] += mysquare(cdsi->residual_x[j]); - MVCovMat[1][1] += mysquare(cdsi->residual_y[j]); - MVCovMat[2][2] += mysquare(cdsi->residual_z[j]); - MVCovMat[0][1] += cdsi->residual_x[j] * cdsi->residual_y[j]; - MVCovMat[0][2] += cdsi->residual_x[j] * cdsi->residual_z[j]; - MVCovMat[1][2] += cdsi->residual_y[j] * cdsi->residual_z[j]; - } + if (algo->alignment) + VarianceCdsNu(cdsA); + else + VarianceCds(cdsA); } - - MVCovMat[1][0] = MVCovMat[0][1]; - MVCovMat[2][0] = MVCovMat[0][2]; - MVCovMat[2][1] = MVCovMat[1][2]; - - for (i = 0; i < 3; ++i) - for (j = 0; j < 3; ++j) - MVCovMat[i][j] *= idf; + else if (algo->covweight) + CalcCovMat(cdsA); } @@ -224,13 +172,14 @@ CalcCovMat(CdsArray *cdsA) { double newx1, newy1, newz1, newx2, newy2, newz2; + double avexi, aveyi, avezi, avexj, aveyj, avezj; double covsum; - double *cdskx, *cdsky, *cdskz; + double *cdskx = NULL, *cdsky = NULL, *cdskz = NULL; int i, j, k; const int cnum = cdsA->cnum, vlen = cdsA->vlen; const double normalize = 1.0 / (3.0 * cnum); - const Cds **cds = (const Cds **) cdsA->cds; - const Cds *cdsk; + const Cds **cds = (const Cds **) cdsA->cds; + const Cds *cdsk = NULL; double **CovMat = cdsA->CovMat; const double *avex = (const double *) cdsA->avecds->x, *avey = (const double *) cdsA->avecds->y, @@ -246,28 +195,36 @@ based upon current superposition, put in CovMat */ for (i = 0; i < vlen; ++i) { + avexi = avex[i]; + aveyi = avey[i]; + avezi = avez[i]; + for (j = 0; j <= i; ++j) { + avexj = avex[j]; + aveyj = avey[j]; + avezj = avez[j]; + covsum = 0.0; for (k = 0; k < cnum; ++k) { - cdsk = cds[k]; + cdsk = cds[k]; cdskx = cdsk->x; cdsky = cdsk->y; cdskz = cdsk->z; - newx1 = cdskx[i] - avex[i]; - newy1 = cdsky[i] - avey[i]; - newz1 = cdskz[i] - avez[i]; - - newx2 = cdskx[j] - avex[j]; - newy2 = cdsky[j] - avey[j]; - newz2 = cdskz[j] - avez[j]; + newx1 = cdskx[i] - avexi; + newy1 = cdsky[i] - aveyi; + newz1 = cdskz[i] - avezi; + + newx2 = cdskx[j] - avexj; + newy2 = cdsky[j] - aveyj; + newz2 = cdskz[j] - avezj; #ifdef FP_FAST_FMA covsum += fma(newx1, newx2, fma(newy1, newy2, newz1 * newz2)); #else - covsum += (newx1 * newx2 + newy1 * newy2 + newz1 * newz2); + covsum += newx1 * newx2 + newy1 * newy2 + newz1 * newz2; #endif } @@ -280,22 +237,23 @@ } -/* Same as CalcCovMat() but weights by the occupancies */ +/* Same as CalcCovMat() but weights by the nu missing flag */ void -CalcCovMatOcc(CdsArray *cdsA) +CalcCovMatNu(CdsArray *cdsA) { double newx1, newy1, newz1, newx2, newy2, newz2; double covsum; - double *cdskx, *cdsky, *cdskz; + double *cdskx = NULL, *cdsky = NULL, *cdskz = NULL; int i, j, k; const int cnum = cdsA->cnum, vlen = cdsA->vlen; - const Cds **cds = (const Cds **) cdsA->cds; - const Cds *cdsk; + const Cds **cds = (const Cds **) cdsA->cds; + const Cds *cdsk = NULL; double **CovMat = cdsA->CovMat; const double *avex = (const double *) cdsA->avecds->x, *avey = (const double *) cdsA->avecds->y, *avez = (const double *) cdsA->avecds->z; - double *occ, osum; + int *nu = NULL; + double nusum; if (cdsA->CovMat == NULL) { @@ -309,11 +267,11 @@ { for (j = 0; j <= i; ++j) { - covsum = osum = 0.0; + covsum = nusum = 0.0; for (k = 0; k < cnum; ++k) { cdsk = cds[k]; - occ = cdsk->o; + nu = cdsk->nu; cdskx = cdsk->x; cdsky = cdsk->y; cdskz = cdsk->z; @@ -326,14 +284,14 @@ newy2 = cdsky[j] - avey[j]; newz2 = cdskz[j] - avez[j]; - covsum += occ[i] * occ[j] * + covsum += nu[i] * nu[j] * (newx1 * newx2 + newy1 * newy2 + newz1 * newz2); - osum += occ[i] * occ[j]; + nusum += nu[i] * nu[j]; } - if (osum > 0.0) - CovMat[i][j] = CovMat[j][i] = covsum / osum; /* sample variance, ML biased not n-1 definition */ + if (nusum > 0) + CovMat[i][j] = CovMat[j][i] = covsum / nusum; /* sample variance, ML biased not n-1 definition */ else CovMat[i][j] = CovMat[j][i] = 0.0; } @@ -342,46 +300,6 @@ void -CalcStructCovMat(CdsArray *cdsA) -{ - double invdf, cov_sum; - int i, j, k; - const int cnum = cdsA->cnum, vlen = cdsA->vlen; - const Cds **cds = (const Cds **) cdsA->cds; - const Cds *cdsi = NULL, *cdsj = NULL; - double **SCovMat = NULL; - - invdf = 1.0 / (double) vlen; /* ML, biased */ - - if (cdsA->SCovMat == NULL) - cdsA->SCovMat = MatAlloc(cnum, cnum); - else - memset(&cdsA->SCovMat[0][0], 0, cnum * cnum * sizeof(double)); - - SCovMat = cdsA->SCovMat; - - /* calculate covariance matrix of structures across atoms, put in SCovMat */ - for (i = 0; i < cnum; ++i) - { - for (j = 0; j < cnum; ++j) - { - cov_sum = 0.0; - for (k = 0; k < vlen; ++k) - { - cdsi = cds[i]; - cdsj = cds[j]; - cov_sum += cdsi->x[k] * cdsj->x[k]; - cov_sum += cdsi->y[k] * cdsj->y[k]; - cov_sum += cdsi->z[k] * cdsj->z[k]; - } - - SCovMat[i][j] = cov_sum * invdf; /* sample variance, ML biased not n-1 definition */ - } - } -} - - -void CalcFullCovMat(CdsArray *cdsA) { double newx1, newy1, newz1, newx2, newy2, newz2; @@ -392,12 +310,13 @@ *avez = (const double *) cdsA->avecds->z; const int vlen = cdsA->vlen, cnum = cdsA->cnum; int i, j, k, m, n, p, q0, q1, q2; - const Cds **cds = (const Cds **) cdsA->cds; - Cds *cdsk; + const Cds **cds = (const Cds **) cdsA->cds; + Cds *cdsk = NULL; invdf = 1.0 / (double) cnum; /* ML, biased */ - AveCds(cdsA); + if (algo->doave) + AveCds(cdsA); if (FullCovMat == NULL) FullCovMat = cdsA->FullCovMat = MatAlloc(3 * cdsA->vlen, 3 * cdsA->vlen); @@ -454,37 +373,7 @@ } -/* calculate covariance matrix weighted cds - \Sigma^-1 * \CdsMat */ -void -CalcCovCds(Cds *cds, const double **covmat) -{ - int i, k; - double *covx = cds->covx, - *covy = cds->covy, - *covz = cds->covz; - const double *x = (const double *) cds->x, - *y = (const double *) cds->y, - *z = (const double *) cds->z; - double covmatik; - - for (i = 0; i < cds->vlen; ++i) - { - covx[i] = covy[i] = covz[i] = 0.0; - - for (k = 0; k < cds->vlen; ++k) - { - covmatik = covmat[i][k]; - - covx[i] += (covmatik * x[k]); - covy[i] += (covmatik * y[k]); - covz[i] += (covmatik * z[k]); - } - } -} - - -/* Normalize the covariance matrix to form the correlation matrix +/* Normalize the covariance matrix to form the correlation matrix by dividing each element by the square root of the product of the corresponding diagonal elements. This makes a pearson correlation matrix. @@ -519,37 +408,6 @@ } -/* Normalizes a covariance matrix by dividing every cell by the - average variance */ -double -NormalizeCovMat(double **mat, const int size) -{ - int i, j; - double normalize; - - normalize = 0.0; - for (i = 0; i < size; ++i) - normalize += mat[i][i]; - - normalize = size / normalize; - -/* normalize = 0.0; */ -/* for (i = 0; i < size; ++i) */ -/* for (j = 0; j < size; ++j) */ -/* normalize += mat[i][j]; */ -/* normalize = (double) size / normalize; */ - - for (i = 0; i < size; ++i) - for (j = 0; j < size; ++j) - mat[i][j] *= normalize; - - /* fprintf(stderr, "\n Mat[%3d][%3d] = %12.5f", size/2, size/2, mat[size/2][size/2]); */ -/* fprintf(stderr, "\n norm = %12.5f", normalize); */ -/* fflush(NULL); */ - return(normalize); -} - - void PrintCovMat(CdsArray *cdsA) { diff -Nru theseus-2.0.6/CovMat.h theseus-3.0.0/CovMat.h --- theseus-2.0.6/CovMat.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/CovMat.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -41,80 +41,24 @@ CalcBfactC(CdsArray *cdsA); void -CalcCovariances(CdsArray *scratchA); - -void -MVCovMat(CdsArray *cdsA); +CalcCovariances(CdsArray *cdsA); void CalcCovMat(CdsArray *cdsA); void -CalcCovMatOcc(CdsArray *cdsA); - -void -CalcStructCovMat(CdsArray *cdsA); +CalcCovMatNu(CdsArray *cdsA); void CalcFullCovMat(CdsArray *cdsA); void -CalcCovCds(Cds *cds, const double **covmat); - -void CovMat2CorMat(double **CovMat, const int size); void CorMat2CovMat(double **CovMat, const double *vars, const int size); -double -NormalizeCovMat(double **mat, const int size); - void PrintCovMat(CdsArray *cdsA); -void -InvgaussFitVars(CdsArray *cdsA, double *mean, double *lambda); - -void -InvgaussAdjustVars(CdsArray *cdsA, - const double mean, const double lambda); - -void -LognormalFitVars(CdsArray *cdsA, double *zeta, double *sigma); - -void -LognormalAdjustVars(CdsArray *cdsA, double zeta, double sigma); - -void -InvGammaFitVars(CdsArray *cdsA, int iterate); - -void -InvGammaFitVars_c1(CdsArray *cdsA, double *b, double *c); - -void -InvGammaFitVars_GaussVarVar(CdsArray *cdsA, double *b, double *c); - -void -InvGammaFitVars_Mode(CdsArray *cdsA, double *b, double *c, const double mode); - -void -InvGammaStacyFitVars(CdsArray *cdsA, double *b, double *c); - -void -InvGammaMMFitVars(CdsArray *cdsA, double *b, double *c); - -void -InvGammaAdjustVars(CdsArray *cdsA, const double b, const double c); - -void -InvGammaAdjustCov(CdsArray *cdsA, const double b, const double c); - -void -RecipInvGaussFitVars(CdsArray *cdsA, double *mu, double *lambda); - -void -RecipInvGaussAdjustVars(CdsArray *cdsA, - const double mu, const double lambda); - #endif diff -Nru theseus-2.0.6/CovMat_local.h theseus-3.0.0/CovMat_local.h --- theseus-2.0.6/CovMat_local.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/CovMat_local.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/debian/changelog theseus-3.0.0/debian/changelog --- theseus-2.0.6/debian/changelog 2013-06-26 15:14:55.000000000 +0000 +++ theseus-3.0.0/debian/changelog 2014-05-20 13:36:28.000000000 +0000 @@ -1,3 +1,12 @@ +theseus (3.0.0-1) unstable; urgency=medium + + * New upstream version + * Moved debian/upstream to debian/upstream/metadata + * cme fix dpkg-control + * debian/README.source: removed since redundant + + -- Andreas Tille Tue, 20 May 2014 15:08:38 +0200 + theseus (2.0.6-1) unstable; urgency=low * New upstream version diff -Nru theseus-2.0.6/debian/control theseus-3.0.0/debian/control --- theseus-2.0.6/debian/control 2013-06-26 15:10:12.000000000 +0000 +++ theseus-3.0.0/debian/control 2014-05-20 13:11:38.000000000 +0000 @@ -6,9 +6,9 @@ Priority: optional Build-Depends: debhelper (>= 9), libgsl0-dev -Standards-Version: 3.9.4 -Vcs-Browser: http://svn.debian.org/wsvn/debian-med/trunk/packages/theseus/trunk -Vcs-Svn: svn://svn.debian.org/debian-med/trunk/packages/theseus/trunk/ +Standards-Version: 3.9.5 +Vcs-Browser: http://anonscm.debian.org/viewvc/debian-med/trunk/packages/theseus/trunk +Vcs-Svn: svn://anonscm.debian.org/debian-med/trunk/packages/theseus/trunk/ Homepage: http://www.theseus3d.org Package: theseus @@ -33,4 +33,3 @@ other programs and algorithms discard residues that are aligned with gaps. Theseus, however, uses a novel superimposition algorithm that includes all of the data. - diff -Nru theseus-2.0.6/debian/patches/20_hardening.patch theseus-3.0.0/debian/patches/20_hardening.patch --- theseus-2.0.6/debian/patches/20_hardening.patch 2013-06-26 14:35:19.000000000 +0000 +++ theseus-3.0.0/debian/patches/20_hardening.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,324 +0,0 @@ -Author: Andreas Tille -Date: Sat, 24 Mar 2012 07:48:57 +0100 -Description: Build with hardening flags - When compiling with hardening flags variables that are set but not used - [-Werror=unused-but-set-variable] all warnings being treated as errors - ---- a/libdistfit/gamma_dist.c -+++ b/libdistfit/gamma_dist.c -@@ -547,7 +547,7 @@ gamma_fit(const double *data, const int - void - gamma_fit_no_stats(const double *data, const int num, double *b, double *c) - { -- double ave, /* var, */logterm, logdata, fx, dfx, fxdfx, guess_b, guess_c; -+ double ave, /* var, */logterm, logdata, fx, dfx, fxdfx/*, guess_b, guess_c*/; - int i, maxiter = 500; - double tol = FLT_EPSILON; - -@@ -604,8 +604,8 @@ gamma_fit_no_stats(const double *data, c - if (*c > FLT_MAX) - *c = FLT_MAX; - -- guess_b = *b; -- guess_c = *c; -+ /* guess_b = *b; */ -+ /* guess_c = *c; */ - /* Maximum likelihood fit. */ - /* Use Newton-Raphson to find ML estimate of c - Based on _Statistical Distributions_ 3rd ed. Evans, Hastings, and Peacock, p 41. ---- a/libdistfit/lognormal_dist.c -+++ b/libdistfit/lognormal_dist.c -@@ -128,7 +128,7 @@ lognormal_logL(const double zeta, const - double - lognormal_fit(const double *data, const int num, double *zeta, double *sigma, double *prob) - { -- double ave, avesqr, var, /* m, */ x, theta; -+ double ave, avesqr, var, /* m, */ x/*, theta*/; - int i; - - ave = avesqr = 0.0; -@@ -165,7 +165,7 @@ lognormal_fit(const double *data, const - - *zeta = ave; - *sigma = sqrt(var); -- theta = 0.5*(sqrt(1.0 + 4.0*avesqr) - 1.0); -+ /* theta = 0.5*(sqrt(1.0 + 4.0*avesqr) - 1.0); */ - /* printf(" LogNormal theta: %f %e\n", theta, theta*theta + theta - avesqr); */ - /* printf("\n LogNormal logL: %f", lognormal_logL(*zeta, *sigma)); */ - ---- a/libdssplite/dssplite.c -+++ b/libdssplite/dssplite.c -@@ -167,7 +167,7 @@ char - int vlen) /* used to be that len = # of CA atoms, not full x vector length */ - { - DSSP *dssp = NULL; -- int Hbonds, len; -+ int /*Hbonds, */len; - char *summary; - - /* Setup DSSP structure */ -@@ -184,7 +184,7 @@ char - - /* do the DSSP algorithm, Doug-style */ - GetCONHCA(dssp); -- Hbonds = FlagHBonds(dssp); -+ /*Hbonds =*/ FlagHBonds(dssp); - - FlagBends(dssp); - FlagTurns(dssp); ---- a/Embed.c -+++ b/Embed.c -@@ -856,7 +856,7 @@ int FastCalcRMSDAndRotation(double *rot, - double oldg = 0.0; - double b, a, delta, ms; - double q1, q2, q3, q4, normq; -- double d11, d12, d13, d14, d21, d22, d23, d24; -+ double /*d11, d12, */d13, d14, d21, d22, d23, d24; - double d31, d32, d33, d34, d41, d42, d43, d44; - double a2, x2, y2, z2; - double xy, az, zx, ay, yz, ax; -@@ -932,7 +932,7 @@ int FastCalcRMSDAndRotation(double *rot, - } - } - -- d11 = SxxpSyy + Szz-mxEigenV; d12 = SyzmSzy; d13 = - SxzmSzx; d14 = SxymSyx; -+ /*d11 = SxxpSyy + Szz-mxEigenV; d12 = SyzmSzy; */ d13 = - SxzmSzx; d14 = SxymSyx; - d21 = SyzmSzy; d22 = SxxmSyy - Szz-mxEigenV; d23 = SxypSyx; d24= SxzpSzx; - d31 = d13; d32 = d23; d33 = Syy-Sxx-Szz - mxEigenV; d34 = SyzpSzy; - d41 = d14; d42 = d24; d43 = d34; d44 = Szz - SxxpSyy - mxEigenV; ---- a/HierarchVars.c -+++ b/HierarchVars.c -@@ -650,7 +650,7 @@ InvGammaFitEvals(CdsArray *cdsA, int ite - double **evecs = NULL; - double precision = cdsA->algo->precision; - const int vlen = cdsA->vlen, cnum = cdsA->cnum; -- double nd, oldb, oldc, b, c, chi2 = DBL_MAX, logL, harmave, mode; -+ double nd, oldb, oldc, b, c, chi2 = DBL_MAX, logL, harmave/*, mode*/; - int i, count, newlen; - - newvar = malloc(vlen * sizeof(double)); -@@ -754,7 +754,7 @@ InvGammaFitEvals(CdsArray *cdsA, int ite - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - - /* the mode of an inv gamma dist */ -- mode = b / (c+1.0); -+ /* mode = b / (c+1.0); */ - - /* for (i = 0; i < vlen - newlen; ++i) */ - /* newvar[i] = mode; */ -@@ -820,7 +820,7 @@ InvGammaFitEvalsNoN(CdsArray *cdsA, int - double **evecs = NULL; - double precision = cdsA->algo->precision; - const int vlen = cdsA->vlen, cnum = cdsA->cnum; -- double nd, oldb, oldc, b, c, chi2 = DBL_MAX, logL, harmave, mode; -+ double nd, oldb, oldc, b, c, chi2 = DBL_MAX, logL, harmave/*, mode*/; - int i, count, newlen; - - newvar = malloc(vlen * sizeof(double)); -@@ -924,7 +924,7 @@ InvGammaFitEvalsNoN(CdsArray *cdsA, int - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - - /* the mode of an inv gamma dist */ -- mode = b / (c+1.0); -+ /* mode = b / (c+1.0); */ - - /* for (i = 0; i < vlen - newlen; ++i) */ - /* newvar[i] = mode; */ -@@ -1178,7 +1178,7 @@ InvGammaBayesFitEvals(CdsArray *cdsA, in - double **evecs = NULL; - double precision = cdsA->algo->precision; - const int vlen = cdsA->vlen, cnum = cdsA->cnum; -- double nd, oldb, oldc, b, c, chi2 = DBL_MAX, logL, harmave, mode; -+ double nd, oldb, oldc, b, c, chi2 = DBL_MAX, logL, harmave/*, mode*/; - int i, count, newlen; - - newvar = malloc(vlen * sizeof(double)); -@@ -1282,7 +1282,7 @@ InvGammaBayesFitEvals(CdsArray *cdsA, in - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - - /* the mode of an inv gamma dist */ -- mode = b / (c+1.0); -+ /* mode = b / (c+1.0); */ - - /* for (i = 0; i < vlen - newlen; ++i) */ - /* newvar[i] = mode; */ -@@ -3002,11 +3002,11 @@ InvGammaEMFixedC(CdsArray *cdsA, const d - double *variance = NULL; - double precision = FLT_EPSILON; // cdsA->algo->precision; - const int vlen = cdsA->vlen, cnum = cdsA->cnum; -- double nd, oldb, oldc, b = 0.0, chi2 = 0.0, logL; -+ double /* nd, */oldb, oldc, b = 0.0, chi2 = 0.0, logL; - int count; - - newvar = malloc(vlen * sizeof(double)); -- nd = 3.0 * cnum; -+ /* nd = 3.0 * cnum; */ - oldb = oldc = DBL_MAX; - - if (cdsA->algo->varweight > 0) ---- a/MultiPose.c -+++ b/MultiPose.c -@@ -1795,7 +1795,7 @@ MultiPose(CdsArray *baseA) - /* FILE *fp; */ - int i, round, innerround; - int slxn; /* index of random coord to select as first */ -- double frobnorm, sumdev, percent, lastpercent, logL, lastlogL, lastscale; -+ double frobnorm, sumdev, /* percent, lastpercent, */ logL, lastlogL, lastscale; - double deviation_sum = 0.0; - const int cnum = baseA->cnum; - const int vlen = baseA->vlen; -@@ -1944,7 +1944,7 @@ MultiPose(CdsArray *baseA) - matrices and corresponding weight matrices, looping till - convergence when using a dimensional/axial covariance matrix */ - round = 0; -- percent = lastpercent = 0.0; -+ /* percent = lastpercent = 0.0; */ - logL = lastlogL = lastscale = -DBL_MAX; - while(1) - { -@@ -2066,10 +2066,10 @@ MultiPose(CdsArray *baseA) - CheckConvergenceOuter(scratchA, round, algo->precision) == 1) - goto outsidetheloops; - -- if (stats->precision > 0.0) -+/* if (stats->precision > 0.0) - percent = 100.0 * log(fabs(stats->precision))/log(algo->precision); - else -- percent = 0.0; -+ percent = 0.0; */ - - // if (percent > lastpercent) - // { ---- a/MultiPoseMix.c -+++ b/MultiPoseMix.c -@@ -1388,7 +1388,7 @@ MultiPoseMix(CdsArray *baseA, const doub - Statistics *stats = NULL; - Cds **cds = NULL; - Cds *avecds = NULL; -- Cds *tcds = NULL; -+ /* Cds *tcds = NULL; */ - CdsArray *scratchA = NULL; - - gsl_rng *r2 = NULL; -@@ -1411,7 +1411,7 @@ MultiPoseMix(CdsArray *baseA, const doub - stats = scratchA->stats; - cds = scratchA->cds; - avecds = scratchA->avecds; -- tcds = scratchA->tcds; -+ /* tcds = scratchA->tcds; */ - - memcpy(scratchA->w, probs, vlen * sizeof(double)); - memcpy(baseA->w, probs, vlen * sizeof(double)); ---- a/PCAstats.c -+++ b/PCAstats.c -@@ -58,7 +58,7 @@ CalcPCA(CdsArray *cdsA) - { - int i, j; - int vlen = (int) cdsA->vlen; -- int upper, lower, pcanum; -+ int /*upper, lower, */ pcanum; - double **CovMat = cdsA->CovMat; - double sum, runsum; - PDBCds *pdbave; -@@ -101,8 +101,8 @@ CalcPCA(CdsArray *cdsA) - for (i = 0; i < vlen; ++i) - sum += CovMat[i][i]; - -- lower = vlen - pcanum + 1; /* careful -- inclusive indices */ -- upper = vlen - 0; -+ /* lower = vlen - pcanum + 1; / * careful -- inclusive indices */ -+ /* upper = vlen - 0; */ - //cdsA->pcamat = MatAlloc(pcanum, vlen); - cdsA->pcamat = MatAlloc(vlen, vlen); - cdsA->pcavals = malloc(vlen * sizeof(double)); -@@ -237,7 +237,7 @@ Calc3NPCA(CdsArray *cdsA) - int i, j; - int vlen = (int) 3 * cdsA->vlen; - double **mat = NULL; -- int upper, lower, pcanum; -+ int /* upper, lower, */ pcanum; - double **evecs = NULL, *evals = NULL; - double sum, runsum; - PDBCds *pdbave = NULL; -@@ -273,8 +273,8 @@ Calc3NPCA(CdsArray *cdsA) - for (i = 0; i < vlen; ++i) - sum += mat[i][i]; - -- lower = vlen - pcanum + 1; /* careful -- inclusive indices */ -- upper = vlen - 0; -+ /* lower = vlen - pcanum + 1; / * careful -- inclusive indices */ -+ /* upper = vlen - 0; */ - evecs = MatAlloc(vlen, vlen); - evals = malloc(vlen * sizeof(double)); - ---- a/pdbStats.c -+++ b/pdbStats.c -@@ -1596,7 +1596,7 @@ CalcHierarchLogL(CdsArray *cdsA) - if (algo->varweight != 0) - { - double *newvar = malloc(vlen * sizeof(double)); -- double b, c, xn1; -+ double b, c/*, xn1*/; - - b = stats->hierarch_p1; - c = stats->hierarch_p2; -@@ -1604,7 +1604,7 @@ CalcHierarchLogL(CdsArray *cdsA) - memcpy(newvar, cdsA->var, vlen * sizeof(double)); - qsort(newvar, vlen, sizeof(double), dblcmp_rev); - /* qsort-dblcmp_rev sorts big to small */ -- xn1 = newvar[vlen-4]; -+ /* xn1 = newvar[vlen-4]; */ - - logL = invgamma_logL(newvar, vlen-3, b, c); - //- b * ExpInvXn(xn1, b, c) - (1+c)*ExpLogXn(xn1, b, c) -@@ -1778,13 +1778,13 @@ CalcLogL(CdsArray *cdsA) - if (algo->hierarch != 0) - { - double *newvar = malloc(vlen * sizeof(double)); -- double xn1; -+ /* double xn1; */ - - memcpy(newvar, var, vlen * sizeof(double)); - qsort(newvar, vlen, sizeof(double), dblcmp_rev); - /* qsort-dblcmp_rev sorts big to small */ - -- xn1 = newvar[vlen - 4]; -+ /*xn1 = newvar[vlen - 4];*/ - - lndetrow = 0.0; - for (i = 0; i < vlen-3; ++i) ---- a/QuarticHornFrag.c -+++ b/QuarticHornFrag.c -@@ -318,11 +318,11 @@ CalcQuarticCoeffsPu2(const FragCds *frag - lambdamax = QCProot(coeff, 0.5 * innerprod, precision); - - /* Now calculate the optimal rotation from one row of the cofactor matrix */ -- double a11, a12, a13, a14, a21, a22, a23, a24; -+ double /* a11, a12, */ a13, a14, a21, a22, a23, a24; - double a31, a32, a33, a34, a41, a42, a43, a44; - -- a11 = SxxpSyy + Szz - lambdamax; -- a12 = SyzmSzy; -+ /* a11 = SxxpSyy + Szz - lambdamax; */ -+ /* a12 = SyzmSzy; */ - a13 = -SxzmSzx; - a14 = SxymSyx; - a21 = SyzmSzy; -@@ -652,7 +652,7 @@ FragDistPu(CdsArray *cdsA, int fraglen, - double *coeff = NULL; - double var; - FILE *distfile = NULL, *distfile2 = NULL; -- double biggest; -+ /* double biggest; */ - double *array = NULL; - - double **Rmat = MatAlloc(3, 3); -@@ -686,7 +686,7 @@ FragDistPu(CdsArray *cdsA, int fraglen, - - start_time = clock(); - -- biggest = 0.0; -+ /* biggest = 0.0; */ - count = 0; - for (coord1 = 0; coord1 < cdsA->cnum; ++coord1) - { diff -Nru theseus-2.0.6/debian/patches/30_fix_gcc_options.patch theseus-3.0.0/debian/patches/30_fix_gcc_options.patch --- theseus-2.0.6/debian/patches/30_fix_gcc_options.patch 2013-06-26 15:08:13.000000000 +0000 +++ theseus-3.0.0/debian/patches/30_fix_gcc_options.patch 2014-05-20 13:29:00.000000000 +0000 @@ -1,34 +1,53 @@ Author: Andreas Tille +Last-Update: Tue, 20 May 2014 15:08:38 +0200 Descriptions: Set options for Linux Default options seem to be addressing MacOS by default - just enable clean on Linux --- a/make.inc +++ b/make.inc -@@ -49,7 +49,7 @@ RANLIB = ranlib +@@ -6,12 +6,12 @@ + RANLIB = ranlib + + # for universal OSX binary +-ARCH = libtool +-ARCHFLAGS = -static -o ++# ARCH = libtool ++# ARCHFLAGS = -static -o + + # for normal arch-native tuned OSX binary +-# ARCH = ar +-# ARCHFLAGS = -rvs ++ARCH = ar ++ARCHFLAGS = -rvs + + #MACOSX_DEPLOYMENT_TARGET=10.4 + #export MACOSX_DEPLOYMENT_TARGET +@@ -38,7 +38,7 @@ ARCHFLAGS = -static -o # Mac OSX Universal # LOCALLIBDIR = /usr/local/lib -LIBS = -lgsl -lgslcblas -ldistfit -lmsa -ldssplite -ldltmath -lDLTutils -ltheseus +LIBS = -lgsl -ldistfit -lmsa -ldssplite -ldltmath -lDLTutils -ltheseus - #SYSLIBS = -framework CoreServices SYSLIBS = -lpthread -lgsl -lgslcblas -lm -lc LIBDIR = -L./lib -@@ -59,7 +59,7 @@ INSTALLDIR = /usr/local/bin - OPT = -O3 -ffast-math -fstrict-aliasing -funroll-loops -fomit-frame-pointer + INSTALLDIR = /usr/local/bin +@@ -46,7 +46,7 @@ INSTALLDIR = /usr/local/bin + OPT = -O3 -ffast-math #-fstrict-aliasing -funroll-loops -fomit-frame-pointer #WARN = -Werror -Wno-error=unused-result -Wall -pedantic -std=c99 WARN = -Werror -Wall -pedantic -std=c99 # for APPLE MACOSX -CFLAGS += $(WARN) -force_cpusubtype_ALL -mmacosx-version-min=10.4 -arch x86_64 -arch i386 #-DNDEBUG +#CFLAGS += $(WARN) -force_cpusubtype_ALL -mmacosx-version-min=10.4 -arch x86_64 -arch i386 #-DNDEBUG - #CFLAGS += $(WARN) # CPPFLAGS are ignored by the rest of the build system but necessary for hardening flags CFLAGS += $(CPPFLAGS) -@@ -79,10 +79,10 @@ CC = /usr/bin/gcc + # Filter out -O2 which overwrites the default -O3 because OPT is used before CFLAGS +@@ -65,11 +65,11 @@ CC = gcc # LIBDIR = -L./lib/ -L/usr/lib/ # INCDIR = -I/usr/local/include/ # INSTALLDIR = /usr/local/bin -# OPT = -O3 -ffast-math -fstrict-aliasing -funroll-loops -fomit-frame-pointer +OPT = -O3 -ffast-math -fstrict-aliasing -funroll-loops -fomit-frame-pointer + # # OPT = -m64 -O3 -march=native -funroll-loops -ffast-math -mfpmath=sse # #OPT = -O0 -ggdb -# WARN = -Wall -pedantic -std=c99 -Wno-unused-result -# CFLAGS = $(WARN) -pthread diff -Nru theseus-2.0.6/debian/patches/series theseus-3.0.0/debian/patches/series --- theseus-2.0.6/debian/patches/series 2013-06-26 14:58:23.000000000 +0000 +++ theseus-3.0.0/debian/patches/series 2014-05-20 13:32:26.000000000 +0000 @@ -1,2 +1 @@ -20_hardening.patch 30_fix_gcc_options.patch diff -Nru theseus-2.0.6/debian/README.source theseus-3.0.0/debian/README.source --- theseus-2.0.6/debian/README.source 2009-08-31 12:50:55.000000000 +0000 +++ theseus-3.0.0/debian/README.source 1970-01-01 00:00:00.000000000 +0000 @@ -1,4 +0,0 @@ -This packages uses the quilt patch system. Please read -/usr/share/doc/quilt/README.source for more information. - - -- Andreas Tille Mon, 31 Aug 2009 14:50:19 +0200 File /tmp/V7iqsWxQ7o/theseus-2.0.6/debian/upstream is a regular file while file /tmp/g2bOMTRwaC/theseus-3.0.0/debian/upstream is a directory diff -Nru theseus-2.0.6/distfit.c theseus-3.0.0/distfit.c --- theseus-2.0.6/distfit.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/distfit.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -42,157 +42,161 @@ #include #include #include +#include #include "distfit_local.h" #define VERSION "0.90" #define DISTNUM 24 +static double +**ReadValsMulti(const char *listfile_name, int *col, int *len); + /* array of function names */ char -dist_name[DISTNUM][32] = +dist_name[DISTNUM][32] = { - "Normal", "Laplace", "EVD", "Logistic", - "Cauchy", "Uniform", "Weibull", - "Gamma", "ChisqrGen", "Lognormal", - "Invgamma", "Invgauss", "Recinvgauss", "Pareto", - "Exponential", "Invchisqr", "Chisqr", "Chi", - "Rayleigh", "Rice*", - "Maxwell", "Beta", "BetaSym", "BetaPrime" //"Binomial" + "Normal", "Laplace", "EVD", "Logistic", + "Cauchy", "Uniform", "Weibull", + "Gamma", "ChisqrGen", "Lognormal", + "Invgamma", "Invgauss", "Recinvgauss", "Pareto", + "Exponential", "Invchisqr", "Chisqr", "Chi", + "Rayleigh", "Rice*", + "Maxwell", "Beta", "BetaSym", "BetaPrime" //"Binomial" }; /* array of function pointers */ DistFit -distfit_func[DISTNUM] = +distfit_func[DISTNUM] = { - &normal_fit, &laplace_fit, &EVD_fit, &logistic_fit, - &cauchy_fit, &uniform_fit, &weibull_fit, - &gamma_fit, &chisqrgen_fit, &lognormal_fit, - &invgamma_fit, &invgauss_fit, &recinvgauss_fit, &pareto_fit, - &exp_fit, &invchisqr_fit, &chisqr_fit, &chi_fit, - &rayleigh_fit, &rice_fit, - &maxwell_fit, &beta_fit, &betasym_fit, &betaprime_fit //&binomial_fit + &normal_fit, &laplace_fit, &EVD_fit, &logistic_fit, + &cauchy_fit, &uniform_fit, &weibull_fit, + &gamma_fit, &chisqrgen_fit, &lognormal_fit, + &invgamma_fit, &invgauss_fit, &recinvgauss_fit, &pareto_fit, + &exp_fit, &invchisqr_fit, &chisqr_fit, &chi_fit, + &rayleigh_fit, &rice_fit, + &maxwell_fit, &beta_fit, &betasym_fit, &betaprime_fit //&binomial_fit }; /* array of function pointers */ DistDev -distdev_func[DISTNUM] = +distdev_func[DISTNUM] = { - &normal_dev, &laplace_dev, &EVD_dev, &logistic_dev, - &cauchy_dev, &uniform_dev, &weibull_dev, - &gamma_dev, &chisqrgen_dev, &lognormal_dev, - &invgamma_dev, &invgauss_dev, &recinvgauss_dev, &pareto_dev, - &exp_dev, &invchisqr_dev, &chisqr_dev, &chi_dev, - &rayleigh_dev, &rice_dev, - &maxwell_dev, &beta_dev, &betasym_dev, &betaprime_dev //&binomial_dev + &normal_dev, &laplace_dev, &EVD_dev, &logistic_dev, + &cauchy_dev, &uniform_dev, &weibull_dev, + &gamma_dev, &chisqrgen_dev, &lognormal_dev, + &invgamma_dev, &invgauss_dev, &recinvgauss_dev, &pareto_dev, + &exp_dev, &invchisqr_dev, &chisqr_dev, &chi_dev, + &rayleigh_dev, &rice_dev, + &maxwell_dev, &beta_dev, &betasym_dev, &betaprime_dev //&binomial_dev }; DistPDF -distpdf_func[DISTNUM] = +distpdf_func[DISTNUM] = { - &normal_pdf, &laplace_pdf, &EVD_pdf, &logistic_pdf, - &cauchy_pdf, &uniform_pdf, &weibull_pdf, - &gamma_pdf, &chisqrgen_pdf, &lognormal_pdf, - &invgamma_pdf, &invgauss_pdf, &recinvgauss_pdf, &pareto_pdf, - &exp_pdf, &invchisqr_pdf, &chisqr_pdf, &chi_pdf, - &rayleigh_pdf, &rice_pdf, - &maxwell_pdf, &beta_pdf, &betasym_pdf, &betaprime_pdf //&binomial_pdf + &normal_pdf, &laplace_pdf, &EVD_pdf, &logistic_pdf, + &cauchy_pdf, &uniform_pdf, &weibull_pdf, + &gamma_pdf, &chisqrgen_pdf, &lognormal_pdf, + &invgamma_pdf, &invgauss_pdf, &recinvgauss_pdf, &pareto_pdf, + &exp_pdf, &invchisqr_pdf, &chisqr_pdf, &chi_pdf, + &rayleigh_pdf, &rice_pdf, + &maxwell_pdf, &beta_pdf, &betasym_pdf, &betaprime_pdf //&binomial_pdf }; DistCDF -distcdf_func[DISTNUM] = +distcdf_func[DISTNUM] = { - &normal_cdf, &laplace_cdf, &EVD_cdf, &logistic_cdf, - &cauchy_cdf, &uniform_cdf, &weibull_cdf, - &gamma_cdf, &chisqrgen_cdf, &lognormal_cdf, - &invgamma_cdf, &invgauss_cdf, &recinvgauss_cdf, &pareto_cdf, - &exp_cdf, &invchisqr_cdf, &chisqr_cdf, &chi_cdf, - &rayleigh_cdf, &rice_cdf, - &maxwell_cdf, &beta_cdf, &betasym_cdf, &betaprime_cdf //&binomial_cdf + &normal_cdf, &laplace_cdf, &EVD_cdf, &logistic_cdf, + &cauchy_cdf, &uniform_cdf, &weibull_cdf, + &gamma_cdf, &chisqrgen_cdf, &lognormal_cdf, + &invgamma_cdf, &invgauss_cdf, &recinvgauss_cdf, &pareto_cdf, + &exp_cdf, &invchisqr_cdf, &chisqr_cdf, &chi_cdf, + &rayleigh_cdf, &rice_cdf, + &maxwell_cdf, &beta_cdf, &betasym_cdf, &betaprime_cdf //&binomial_cdf }; DistSDF -distsdf_func[DISTNUM] = +distsdf_func[DISTNUM] = { - &normal_sdf, &laplace_sdf, &EVD_sdf, &logistic_sdf, - &cauchy_sdf, &uniform_sdf, &weibull_sdf, - &gamma_sdf, &chisqrgen_sdf, &lognormal_sdf, - &invgamma_sdf, &invgauss_sdf, &recinvgauss_sdf, &pareto_sdf, - &exp_sdf, &invchisqr_sdf, &chisqr_sdf, &chi_sdf, - &rayleigh_sdf, &rice_sdf, - &maxwell_sdf, &beta_sdf, &betasym_sdf, &betaprime_sdf //&binomial_sdf + &normal_sdf, &laplace_sdf, &EVD_sdf, &logistic_sdf, + &cauchy_sdf, &uniform_sdf, &weibull_sdf, + &gamma_sdf, &chisqrgen_sdf, &lognormal_sdf, + &invgamma_sdf, &invgauss_sdf, &recinvgauss_sdf, &pareto_sdf, + &exp_sdf, &invchisqr_sdf, &chisqr_sdf, &chi_sdf, + &rayleigh_sdf, &rice_sdf, + &maxwell_sdf, &beta_sdf, &betasym_sdf, &betaprime_sdf //&binomial_sdf }; DistInt -distint_func[DISTNUM] = +distint_func[DISTNUM] = { - &normal_int, &laplace_int, &EVD_int, &logistic_int, - &cauchy_int, &uniform_int, &weibull_int, - &gamma_int, &chisqrgen_int, &lognormal_int, - &invgamma_int, &invgauss_int, &recinvgauss_int, &pareto_int, - &exp_int, &invchisqr_int, &chisqr_int, &chi_int, - &rayleigh_int, &rice_int, - &maxwell_int, &beta_int, &betasym_int, &betaprime_int //&binomial_int + &normal_int, &laplace_int, &EVD_int, &logistic_int, + &cauchy_int, &uniform_int, &weibull_int, + &gamma_int, &chisqrgen_int, &lognormal_int, + &invgamma_int, &invgauss_int, &recinvgauss_int, &pareto_int, + &exp_int, &invchisqr_int, &chisqr_int, &chi_int, + &rayleigh_int, &rice_int, + &maxwell_int, &beta_int, &betasym_int, &betaprime_int //&binomial_int }; int -dist_pnum[DISTNUM] = +dist_pnum[DISTNUM] = { - 2, 2, 2, 2, - 2, 2, 2, - 2, 2, 2, - 2, 2, 2, 2, - 1, 1, 1, 1, - 1, 2, - 1, 2, 1, 2 //2 + 2, 2, 2, 2, + 2, 2, 2, + 2, 2, 2, + 2, 2, 2, 2, + 1, 1, 1, 1, + 1, 2, + 1, 2, 1, 2 //2 }; int -dist_nonneg[DISTNUM] = +dist_nonneg[DISTNUM] = { - 0, 0, 0, 0, - 0, 0, 1, - 1, 1, 0, - 0, 0, 0, 0, - 1, 0, 1, 1, - 1, 1, - 1, 0, 0, 1 //1 + 0, 0, 0, 0, + 0, 0, 1, + 1, 1, 0, + 0, 0, 0, 0, + 1, 0, 1, 1, + 1, 1, + 1, 0, 0, 1 //1 }; int -dist_posdef[DISTNUM] = +dist_posdef[DISTNUM] = { - 0, 0, 0, 0, - 0, 0, 0, - 0, 0, 1, - 1, 1, 1, 1, - 0, 1, 0, 0, - 0, 0, - 0, 0, 0, 0 //0 + 0, 0, 0, 0, + 0, 0, 0, + 0, 0, 1, + 1, 1, 1, 1, + 0, 1, 0, 0, + 0, 0, + 0, 0, 0, 0 //0 }; int -dist_beta[DISTNUM] = +dist_beta[DISTNUM] = { - 0, 0, 0, 0, - 0, 0, 0, - 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, - 0, 1, 1, 0 //0 + 0, 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, + 0, 1, 1, 0 //0 }; DFParams DfParams; DFParams *dfparams = &DfParams; DistStats **diststats = NULL; -int distsort[DISTNUM]; -double *pdfparams = NULL; -double *cdfparams = NULL; -double *sdfparams = NULL; -double *intparams = NULL; +int distsort[DISTNUM]; +double *pdfparams = NULL; +double *cdfparams = NULL; +double *sdfparams = NULL; +double *intparams = NULL; /* @@ -276,7 +280,7 @@ var = 0.0; for (j = 0; j < len; ++j) var += dif[i][j] * dif[i][j]; - + std[i] = sqrt(var * invlen); } @@ -310,11 +314,11 @@ // printf ("\n\nEdgeworth correlation matrix:"); // MatPrintLowerDiag(cor, dim); -// +// // for (i = 0; i < dim; ++i) // for (j = 0; j < dim; ++j) // cov[i][j] = cor[i][j] / (std[i] * std[j]); -// +// // printf ("\n\nEdgeworth covariance matrix:"); // MatPrintLowerDiag(cov, dim); @@ -399,7 +403,7 @@ } } - /* There are d \kappa_{i,i,i} terms, 2 {d \choose 2} \kappa_{i,i,j} terms, + /* There are d \kappa_{i,i,i} terms, 2 {d \choose 2} \kappa_{i,i,j} terms, and {d \choose 3} \kappa_{i,j,k} terms. gsl_sf_choose (unsigned int n, unsigned int m) */ @@ -419,7 +423,7 @@ printf("\nln(det): %14.3f", lndet); entropy = 0.5 * dim * log(2.0 * M_PI * M_E) + 0.5 * lndet; - + printf("\nwhite entropy: %14.3f", entropy); printf("\nbias: %14.3f", bias); printf("\nln(scale): %14.3f", lnscale); @@ -427,7 +431,7 @@ printf("\nNaive N-entropy: %14.3f", entropy + lnscale); //entropy = entropy - bias + lnscale; - + printf("\nEdgeworth entropy: %14.3f", entropy - term1/12.0 + lnscale); printf("\nEdgeworth entropy (4th order corrections): %14.3f", entropy - bias + lnscale); printf("\n\n"); @@ -446,11 +450,11 @@ // kappa_iii += t3; /* skewness */ // kappa_iiii += t3 * dif[i][j]; /* kurtosis */ // } -// +// // kappa_iii *= invlen; // kappa_iiii *= invlen; // kappa_iiii -= 3.0; -// +// // t3 = kappa_iii * kappa_iii; // t4 = kappa_iiii * kappa_iiii; // term1 += t3; @@ -458,28 +462,28 @@ // term5 += t4 * kappa_iiii; // k_4^3; // term6 += t3 * kappa_iiii; // k_3^2 k_4 // } -// +// // bias = (term1 + 3.0 * term2 + term3 / 6.0) / 12.0 + term4/48.0 - term5/16.0 - 5.0*term6/8.0; -// +// // printf("\nEdgeworth term1: %g", term1/ 12.0); // printf("\nEdgeworth term2: %g", 3.0*term2/ 12.0); // printf("\nEdgeworth term3: %g", term3/(6.0*12.0)); // printf("\nEdgeworth term4: %g", +term4/48.0); // printf("\nEdgeworth term5: %g", -term5/16.0); // printf("\nEdgeworth term6: %g\n", - 5.0*term6/8.0); -// +// // printf("\nln(det): %14.3f", lndet); -// +// // entropy = 0.5 * dim * log(2.0 * M_PI * M_E) + 0.5 * lndet; -// +// // printf("\nwhite entropy: %14.3f", entropy); // printf("\nbias: %14.3f", bias); // printf("\nln(scale): %14.3f", lnscale); -// +// // printf("\nNaive N-entropy: %14.3f", entropy + lnscale); -// +// // //entropy = entropy - bias + lnscale; -// +// // printf("\nEdgeworth entropy: %14.3f", entropy - term1/12.0 + lnscale); // printf("\nEdgeworth entropy: %14.3f", entropy - bias + lnscale); // printf("\n\n"); @@ -560,13 +564,13 @@ pstats[7] = gsl_stats_quantile_from_sorted_data(x, 1, len, 0.05); pstats[8] = gsl_stats_quantile_from_sorted_data(x, 1, len, 0.95); pstats[9] = FindMode(x, len); - + printf("\nmode approximation: %g\n", pstats[9]); printf("\n ave (+/- SD) skew kurt median ( 95%% credible interval ) ( 90%% credible interval )"); printf("\n--------------------------------------------------------------------------------------------------------"); - printf("\n% 10.2f (+/- %8.2f) % 7.2f % 7.2f % 10.2f (% 10.2f - % 10.2f) (% 10.2f - % 10.2f)\n\n", + printf("\n% 10.2f (+/- %8.2f) % 7.2f % 7.2f % 10.2f (% 10.2f - % 10.2f) (% 10.2f - % 10.2f)\n\n", pstats[0], pstats[1], pstats[2], pstats[3], pstats[4], pstats[5], pstats[6], pstats[7], pstats[8]); free(x); @@ -579,10 +583,14 @@ int option; /* get the options */ - while ((option = getopt (*argc, *argv, "b:c:C:d:D:hI:m:n:op:P:s:S:t:vz")) != -1) + while ((option = getopt (*argc, *argv, "1b:c:C:d:D:g:G:hI:m:n:op:P:s:S:t:T:vz")) != -1) { switch (option) { + case '1': + dfparams->neg = 1; + break; + case 'b': dfparams->bootstrap = (int) strtol(optarg, NULL, 10); break; @@ -676,6 +684,14 @@ dfparams->distdev = (int) strtol(optarg, NULL, 10); break; + case 'g': + dfparams->digamma = (double) strtod(optarg, NULL); + break; + + case 'G': + dfparams->invdigamma = (double) strtod(optarg, NULL); + break; + case 'h': dfparams->histo = 1; break; @@ -719,6 +735,10 @@ break; + case 'T': + dfparams->trigamma = (double) strtod(optarg, NULL); + break; + case 'v': Version(); exit(EXIT_SUCCESS); @@ -772,6 +792,10 @@ dfparams->mix = 0; dfparams->fixzeros = 0; dfparams->fixones = 0; + dfparams->digamma = 0.0; + dfparams->trigamma = 0.0; + dfparams->invdigamma = 0.0; + dfparams->neg = 0; //printf("\n###### %e\n", normal_sdf(5.0, 0.0, 1.0)); @@ -784,7 +808,55 @@ narguments = ParseCmdLine(&argc, &argv); if (dfparams->seed == -1) - dfparams->seed = (unsigned long) time(NULL); + dfparams->seed = (unsigned long) time(NULL) + getpid(); + + if (dfparams->digamma > 0.0) + { + printf("\n Digamma(%g): %g \n\n", dfparams->digamma, gsl_sf_psi(dfparams->digamma)); + + exit(EXIT_SUCCESS); + } + + if (dfparams->invdigamma > 0.0) + { + double y; + double x = dfparams->invdigamma; + double lambda; + + if (dfparams->neg) + x = -x; + + // Inverse digamma (psi) function. The digamma function is the + // derivative of the log gamma function. This calculates the value + // Y > 0 for a value X such that digamma(Y) = X. + // + // This algorithm is from Paul Fackler: + // http://www4.ncsu.edu/~pfackler/ + + lambda = 1.0; + y = exp(x); + while (lambda > DBL_EPSILON) + { + y += lambda*copysign(1.0, x - gsl_sf_psi(y)); + lambda *= 0.5; + } + + printf("\n Inverse Digamma(%g): %g \n\n", dfparams->invdigamma, y); + + exit(EXIT_SUCCESS); + } + + if (dfparams->trigamma > 0.0) + { + double x = dfparams->trigamma; + + x = gsl_sf_psi_1(x); + //x = 1.0/sqrt(x); + + printf("\n Trigamma(%g): %g \n\n", dfparams->trigamma, x); + + exit(EXIT_SUCCESS); + } if (pdfparams != NULL) { @@ -849,7 +921,13 @@ if (dfparams->histo == 0) { /* get data values from file */ - array = getvals(argv[0], &length, dfparams->col); + //array = getvals(argv[0], &length, dfparams->col); + double **matrix = NULL; + int colnum; + matrix = ReadValsMulti(argv[0], &colnum, &length); + array = malloc(length * sizeof(double)); + for (i = 0; i < length; ++i) + array[i] = matrix[dfparams->col][i]; dfparams->length = length; if (length < 4) @@ -875,6 +953,8 @@ for (i = 0; i < length; ++i) warray[i] = array[i]; + + MatDestroy(&matrix); } else { @@ -895,10 +975,10 @@ while(1) { ch = getc(listfile); - + if (ch == EOF || ch == '\n') ++length; - + if (ch == EOF) break; } @@ -989,11 +1069,11 @@ } /* do the bootstrap if so desired */ - if (dfparams->distnum == 1 && dfparams->bootstrap > 1) + if (dfparams->distnum && dfparams->bootstrap > 1) { BootFit(diststats, dfparams->distindex, array, length, dfparams); } - else if(dfparams->distnum == 1) + else if(dfparams->distnum) { SingleDistFit(distfit_func, dfparams->distindex, dist_name[dfparams->distindex], diststats[0], array, length, dfparams); } @@ -1008,7 +1088,7 @@ TestData(array, length); - if (dfparams->histo == 1) + if (dfparams->histo) { MultiHistFit(array, freq, length); } @@ -1031,7 +1111,7 @@ free(array); free(warray); - if (dfparams->histo == 1) + if (dfparams->histo) free(freq); for(i = 0; i < dfparams->distnum; ++i) free(diststats[i]); @@ -1046,52 +1126,52 @@ { int i; - dfparams->isbeta = 1; - dfparams->isneg = 0; - dfparams->iszero = 0; - for (i = 0; i < length; ++i) - { - if (data[i] > 1.0) - { - dfparams->isbeta = 0; - break; - } - } - - for (i = 0; i < length; ++i) - { - if (data[i] == 0.0) - { - dfparams->isbeta = 0; - dfparams->iszero = 1; - break; - } - } - - for (i = 0; i < length; ++i) - { - if (data[i] < 0.0) - { - dfparams->isbeta = 0; - dfparams->isneg = 1; - break; - } - } + dfparams->isbeta = 1; + dfparams->isneg = 0; + dfparams->iszero = 0; + for (i = 0; i < length; ++i) + { + if (data[i] > 1.0) + { + dfparams->isbeta = 0; + break; + } + } + + for (i = 0; i < length; ++i) + { + if (data[i] == 0.0) + { + dfparams->isbeta = 0; + dfparams->iszero = 1; + break; + } + } + + for (i = 0; i < length; ++i) + { + if (data[i] < 0.0) + { + dfparams->isbeta = 0; + dfparams->isneg = 1; + break; + } + } } int TestDistVsData(const int ndist) { - if (dist_beta[ndist] == 1 && dfparams->isbeta == 0) + if (dist_beta[ndist] && dfparams->isbeta == 0) return(0); - else if (dist_posdef[ndist] == 1 && dfparams->isneg == 1) + else if (dist_posdef[ndist] && dfparams->isneg) return(0); - else if (dist_nonneg[ndist] == 1 && dfparams->isneg == 1) + else if (dist_nonneg[ndist] && dfparams->isneg) return(0); - else if (dist_posdef[ndist] == 1 && dfparams->iszero == 1) + else if (dist_posdef[ndist] && dfparams->iszero) return(0); - else if (dist_nonneg[ndist] == 1 && dfparams->iszero == 1) /* should return 1, but there are problems fitting data with zeros */ + else if (dist_nonneg[ndist] && dfparams->iszero) /* should return 1, but there are problems fitting data with zeros */ return(0); else return(1); @@ -1105,7 +1185,7 @@ int i; double *array = NULL; char argstring[64]; - FILE *tmpfile; + FILE *tmpfile = NULL; const gsl_rng_type *T = NULL; gsl_rng *r2 = NULL; @@ -1329,51 +1409,51 @@ { double param1, param2, chisq, logLper, logL, AIC, BIC; - param1 = param2 = chisq = logLper = logL = AIC = BIC = 0.0; - strcpy(diststats->dist_name, dist_name); + param1 = param2 = chisq = logLper = logL = AIC = BIC = 0.0; + strcpy(diststats->dist_name, dist_name); + + if (TestDistVsData(distindex) == 0) + return; + + chisq = distfit_func[distindex](array, length, ¶m1, ¶m2, &logL); + logLper = logL / (double) length; + + if (chisq == -1.0) + diststats->AIC = -DBL_MAX; - if (TestDistVsData(distindex) == 0) - return; + if (dist_pnum[distindex] == 2) + { + AIC = logL - ((2.0 * length) / (length - 3.0)); + BIC = logL - 2.0 * log(length); + } + else if (dist_pnum[distindex] == 1) + { + AIC = logL - (length / (length - 2.0)); + BIC = logL - log(length); + } + + if (fabs(chisq) > 1e6) + chisq = DBL_MAX; + + if (logLper < -1e7) + logLper = -DBL_MAX; - chisq = distfit_func[distindex](array, length, ¶m1, ¶m2, &logL); - logLper = logL / (double) length; + if (logL < -1e11) + logL = -DBL_MAX; - if (chisq == -1.0) - diststats->AIC = -DBL_MAX; - - if (dist_pnum[distindex] == 2) - { - AIC = logL - ((2.0 * length) / (length - 3.0)); - BIC = logL - 2.0 * log(length); - } - else if (dist_pnum[distindex] == 1) - { - AIC = logL - (length / (length - 2.0)); - BIC = logL - log(length); - } - - if (fabs(chisq) > 1e6) - chisq = DBL_MAX; - - if (logLper < -1e7) - logLper = -DBL_MAX; - - if (logL < -1e11) - logL = -DBL_MAX; - - if (AIC < -1e11) - AIC = -DBL_MAX; - - if (BIC < -1e11) - BIC = -DBL_MAX; - - diststats->param1 = param1; - diststats->param2 = param2; - diststats->chisq = chisq; - diststats->logL = logL; - diststats->logLper = logLper; - diststats->AIC = AIC; - diststats->BIC = BIC; + if (AIC < -1e11) + AIC = -DBL_MAX; + + if (BIC < -1e11) + BIC = -DBL_MAX; + + diststats->param1 = param1; + diststats->param2 = param2; + diststats->chisq = chisq; + diststats->logL = logL; + diststats->logLper = logLper; + diststats->AIC = AIC; + diststats->BIC = BIC; } @@ -1472,25 +1552,25 @@ if (TestDistVsData(i) == 0) continue; -/* +/* if(diststats[i]->AIC == -DBL_MAX || diststats[i]->chisq < 0.0) continue; */ diststats[i]->akaikewt = exp(diststats[i]->AIC - largest); - + //printf("\nweight[%d]:%e %e %e", i, diststats[i]->akaikewt, largest, diststats[i]->AIC); - + if (!isfinite(diststats[i]->akaikewt)) diststats[i]->akaikewt = 0.0; - + wtsum += diststats[i]->akaikewt; } for(i = 0; i < distnum; ++i) { - + if (TestDistVsData(i) == 0) continue; /* @@ -1583,8 +1663,8 @@ void InsortDiststats_old(DistStats **diststats, int distnum) { - int i, j; - DistStats *temp; + int i, j; + DistStats *temp = NULL; for (i = 0; i < distnum; ++i) distsort[i] = i; @@ -1663,9 +1743,11 @@ fprintf(stderr, " (4) Cauchy, (5) Uniform, (6) Weibull, (7) Gamma, \n"); fprintf(stderr, " (8) ChiSqrGen, (9) Lognormal, (10) InvGamma, (11) InvGauss, \n"); fprintf(stderr, " (12) RecInvGauss, (13) Pareto, (14) Exp, (15) InvChiSqr, \n"); - fprintf(stderr, " (16) ChiSqr, (17) Chi, (18) Rayleigh (19) Rice, \n"); + fprintf(stderr, " (16) ChiSqr, (17) Chi, (18) Rayleigh (19) Rice, \n"); fprintf(stderr, " (20) Maxwell, (21) Beta, (22) BetaSymm, (23) BetaPrime \n"); fprintf(stderr, " -D {distribution} -- distribution to simulate, use w/ -np \n"); + fprintf(stderr, " -g {x} -- calculate digamma(x) \n"); + fprintf(stderr, " -G {x} -- calculate inverse digamma(x) (i.e., find y for x = digamma(y)) \n"); fprintf(stderr, " -m {# of mixtures} -- fit a normal mixture distribution \n"); fprintf(stderr, " -n {# replicates} -- number of variates to simulate \n"); fprintf(stderr, " -p {p1:p2} -- two parameters for a specified distribution (ran # gen) \n"); @@ -1676,6 +1758,7 @@ fprintf(stderr, " -S {x:p1:p2} -- Calculate SDF for x and params p1 and p2 \n"); fprintf(stderr, " -s {seed} -- specify a random seed, must be an integer \n"); fprintf(stderr, " -t -- do a series of simple data transformations and refit \n"); + fprintf(stderr, " -T {x} -- calculate trigamma(x) \n"); fprintf(stderr, " -z -- \"fix\" zeros by setting to DBL_EPSILON (%e) \n", DBL_EPSILON); fprintf(stderr, " -o -- \"fix\" ones by setting to 1.0 - DBL_EPSILON \n"); fprintf(stderr, " -v -- version \n"); @@ -1719,6 +1802,207 @@ } +static int +GetFileLen(FILE *fp) +{ + int ch, filelen = 0; + + rewind(fp); + + while(!feof(fp)) + { + ch = getc(fp); + + if (ch == EOF || ch == '\n') + ++filelen; + + if (ch == EOF) + break; + } + + return(filelen); +} + + +static int +GetFileColLen(FILE *fp) +{ + int linelen = 4096; + char line[linelen]; + char element[linelen]; + int i, j, ch, elemlen, nvals, nscan; + double tmp; + + rewind(fp); + + /* keep getting lines as long as the first data element is not a number */ + while(fgets(line, linelen, fp) != NULL) + { + nscan = sscanf(line, "%le", &tmp); + if (nscan > 0) + break; + } + + /* null-terminate the line, instead of newline */ + for (i = 0; i < strlen(line); ++i) + if (line[i] == '\r' || line[i] == '\n' || line[i] == '#') + line[i] = '\0'; + + i = 0; /* column counter */ + j = 0; /* line position counter */ + ch = line[0]; + while(ch != EOF && ch != '\0' && j < linelen) + { + /* move through whitespace */ + while (isspace(ch) && ch != EOF && ch != '\0' && j < linelen) + { + ++j; + ch = line[j]; + } + + nscan = sscanf(&line[j], "%s", element); + + if (nscan > 0) + { + nvals = sscanf(element, "%le", &tmp); + //printf("\ni:%d j:%d nvals:%d elemlen:%d \n%s %g\n", i, j, nvals, elemlen, element, tmp); + + if(nvals > 0 && isfinite(tmp) && (fabs(tmp) < DBL_MAX)) + ++i; + } + + elemlen = strlen(element); + + //printf("\ni:%d j:%d nvals:%d elemlen:%d \n%s\n", i, j, nvals, elemlen, element); + + j += elemlen; + ch = line[j]; + } + + return(i); +} + + +/* +Reads an entire multi-record, multi-column file of floating point numbers. +Columns are free-form, separated by white space. +Allows comments (midline OK) using '#'. +Also allows blank internal lines. +Actually, it ignores any lines beginning with a word (i.e., a non-number). +Longest line allowed is 4096 chars. +In principle, the number of records and columns is unlimited. +Returns a (col x recordlen) array of numbers. +Each column in the file ends up being a contiguous vector of floats. +*/ +static double +**ReadValsMulti(const char *listfile_name, int *col, int *len) +{ + int i, j, k, m; + int ch, elemlen, breakout, nscan, nvals, cnt, filelen; + int linelen = 4096; + FILE *listfile = NULL; + double **array = NULL; + char line[linelen]; + char element[linelen]; + double tmp; + + + /* Open the file */ + listfile = fopen(listfile_name, "r"); + if (listfile == NULL) + { + fprintf(stderr, + "\n ERROR_069: cannot open first file \"%s\" \n", + listfile_name); + exit(EXIT_FAILURE); + } + + /* Count the number of lines in the file; serves as initial (max) guess at number of records */ + filelen = GetFileLen(listfile); + + /* Now get the number of columns in the first real data record */ + *col = GetFileColLen(listfile); + + array = MatAlloc(*col, filelen); + + /* Start over --- read the data into the array */ + rewind(listfile); + + breakout = 0; + cnt = 0; /* counter for actual number of data records (not including blank lines, comments, other crap) */ + for(k = 0; k < filelen; ++k) + { + nscan = -1; + while(nscan <= 0) + { + if (fgets(line, linelen, listfile) == NULL) + { + breakout = 1; + break; + } + else + { + nscan = sscanf(line, "%le", &tmp); + } + } + + for (m = 0; m < strlen(line); ++m) + if (line[m] == '\r' || line[m] == '\n' || line[m] == '#') + line[m] = '\0'; + + i = 0; /* column counter */ + j = 0; /* line position counter */ + ch = line[0]; + while(breakout == 0 && ch != EOF && ch != '\0' && j < linelen) + { + while (isspace(ch) && ch != EOF && ch != '\0' && j < linelen) + { + ++j; + ch = line[j]; + } + + nscan = sscanf(&line[j], "%s", element); + + if (nscan > 0) + { + nvals = sscanf(element, "%le", &tmp); + //printf("\ni:%d j:%d nvals:%d elemlen:%d \n%s %g\n", i, j, nvals, elemlen, element, tmp); + + if(nvals <= 0 || !isfinite(tmp) || fabs(tmp) > DBL_MAX) + { + fprintf(stderr, "\n ERROR_169: value line %d column %d has a problem: %g\n", k, i, tmp); + } + else + { + if (i == 0) + cnt++; + + array[i][k] = tmp; + ++i; + } + } + + elemlen = strlen(element); + j += elemlen; + ch = line[j]; + } +// printf("\n"); +// for (i = 0; i < *col; ++i) +// printf("%g ", array[i][k]); + } + + *len = cnt; + + fclose(listfile); + + printf("\nFile length: %d", filelen); + printf("\nNumber of data records: %d", *len); + printf("\nNumber of data columns: %d\n", *col); + + return(array); +} + + double *getvals(char *listfile_name, int *length, int column) { @@ -1841,9 +2125,9 @@ for (j = 0; j < mixn; ++j) { mixp[j] = 0.0; - for (i = 0; i < n; ++i) - mixp[j] += probs[j][i]; - + for (i = 0; i < n; ++i) + mixp[j] += probs[j][i]; + mixp[j] /= n; } } @@ -1916,13 +2200,13 @@ for (j = 0; j < mixn; ++j) printf("%5d:%-2d % f % f % f\n", i, j, param1[j], param2[j], mixp[j]); - CalcProbs(distpdf, x, probs, n, mixp, mixn, param1, param2); - CalcMixProbs(probs, n, mixp, mixn); + CalcProbs(distpdf, x, probs, n, mixp, mixn, param1, param2); + CalcMixProbs(probs, n, mixp, mixn); for (j = 0; j < mixn; ++j) distfitw(x, n, probs[j], ¶m1[j], ¶m2[j], &logL); - if (VecEq(oldprobs, probs[0], n, tol) == 1 && i > 4) + if (VecEq(oldprobs, probs[0], n, tol) && i > 4) break; } diff -Nru theseus-2.0.6/distfit.h theseus-3.0.0/distfit.h --- theseus-2.0.6/distfit.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/distfit.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2012 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -88,108 +88,7 @@ EVD_fit(const double *x, const int n, double *rmu, double *rlambda, double *prob); #endif -#ifndef MATUTILS_SEEN -#define MATUTILS_SEEN -typedef struct -{ - int rows; - int cols; - int depth; - double ***matrix; - double **matrixc; - double *matrixd; -} Matrix3D; - -void -MatPrint(double **matrix, const int size); - -void -MatPrintRec(double **matrix, const int n, const int m); - -void -MatDestroy(double ***matrix_ptr); - -double -**MatAlloc(const int rows, const int cols); - -void -MatIntDestroy(int ***matrix_ptr); - -int -**MatIntInit(const int rows, const int cols); - -Matrix3D -*Mat3DInit(const int rows, const int cols, const int depth); - -void -Mat3DDestroy(Matrix3D **matrix3d_ptr); - -double -MatFrobNorm(const double **mat1, const double **mat2, const int row, const int col); - -double -MatDiff(const double **mat1, const double **mat2, const int row, const int col); - -void -MatCpySym(double **matrix2, const double **matrix1, const int dim); - -void -MatCpySymgen(double **matrix2, const double **matrix1, const int rows, const int cols); - -void -MatMultGenUSVOp(double **c, const double **u, double *s, const double **v, - const int udim, const int sdim, const int vdim); - -void -MatMultGen(double **C, const double **A, const int ni, const int nk, const double **B, const int nj); - -void -MatMultGenIp(double **A, const int nk, const int ni, const double **B, const int nj); - -void -MatTransMultGen(double **C, const double **A, const int ni, const int nk, const double **B, const int nj); - -void -MatTransMultGenIp(double **A, const int nk, const int ni, const double **B, const int nj); - -void -MatMultSym(double **C, const double **A, const double **B, const int len); - -void -MatMultSymDiag(double **C, const double **A, const double **B, const int len); - -void -MatTransIp(double **mat, const int dim); - -void -MatTransOp(double **outmat, const double **inmat, const int dim); - -void -cholesky(double **mat, const int dim, double *p); - -double -MatDet(const double **mat, const int dim); - -double -MatGenLnDet(const double **mat, const int dim); - -double -MatSymLnDet(const double **mat, const int dim); - -double -MatTrace(const double **mat, const int dim); - -int -TestZeroOffDiag(const double **mat, const int dim, const double precision); - -int -TestIdentMat(const double **mat, const int dim, const double precision); - -double -FrobDiffNormIdentMat(const double **mat, const int dim); - -#endif /* !MATRIXUTILS_SEEN */ #ifndef REGGAMMA_SEEN #define REGGAMMA_SEEN @@ -1322,10 +1221,6 @@ #ifndef STATISTICS_SEEN #define STATISTICS_SEEN - -double -RoundInt(const double x); - double average(const double *data, const int dim); diff -Nru theseus-2.0.6/distfit_local.h theseus-3.0.0/distfit_local.h --- theseus-2.0.6/distfit_local.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/distfit_local.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -129,6 +129,10 @@ int mix; int fixzeros; int fixones; + double digamma; + double trigamma; + double invdigamma; + int neg; } DFParams; diff -Nru theseus-2.0.6/DistMat.c theseus-3.0.0/DistMat.c --- theseus-2.0.6/DistMat.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/DistMat.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/DistMat.h theseus-3.0.0/DistMat.h --- theseus-2.0.6/DistMat.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/DistMat.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/DLTmath.h theseus-3.0.0/DLTmath.h --- theseus-2.0.6/DLTmath.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/DLTmath.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -59,6 +59,8 @@ #define POW4(a) ((a)*(a)*(a)*(a)) #define SIGN(a,b) ((b) >= 0.0 ? fabs(a) : -fabs(a)) +#endif + #ifndef MAT3UTILS_SEEN #define MAT3UTILS_SEEN @@ -71,6 +73,9 @@ int Mat3Eq(const double **matrix1, const double **matrix2, const double precision); +double +Mat3FrobDiff(const double **matrix1, const double **matrix2); + int Mat3FrobEq(const double **matrix1, const double **matrix2, const double precision); @@ -134,16 +139,41 @@ int VerifyRotMat(double **rotmat, double tol); -double -**ClosestRotMat(double **inmat); - void ClosestRotMatIp(double **inmat); double RotMat2AxisAngle(double **rot, double *v); -#endif /* !MATRIXUTILS_SEEN */ +double +RotMat2AxisAngleQuat(double **rot, double *v); + +#endif +/* + Theseus - maximum likelihood superpositioning of macromolecular structures + + Copyright (C) 2004-2014 Douglas L. Theobald + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the: + + Free Software Foundation, Inc., + 59 Temple Place, Suite 330, + Boston, MA 02111-1307 USA + + -/_|:|_|_\- +*/ + #ifndef MAT4UTILS_SEEN #define MAT4UTILS_SEEN @@ -159,7 +189,32 @@ void Mat4TransposeOp(double **matrix2, const double **matrix1); -#endif /* !MATRIXUTILS_SEEN */ +#endif /* !MATRIXUTILS_SEEN */ +/* + Theseus - maximum likelihood superpositioning of macromolecular structures + + Copyright (C) 2004-2014 Douglas L. Theobald + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the: + + Free Software Foundation, Inc., + 59 Temple Place, Suite 330, + Boston, MA 02111-1307 USA + + -/_|:|_|_\- +*/ + #ifndef MATUTILS_SEEN #define MATUTILS_SEEN @@ -186,7 +241,7 @@ **MatAlloc(const int rows, const int cols); void -MatIntDestroy(int ***matrix_ptr); +MatIntDestroy(int ***matrix); int **MatIntInit(const int rows, const int cols); @@ -207,7 +262,7 @@ MatCpySym(double **matrix2, const double **matrix1, const int dim); void -MatCpySymgen(double **matrix2, const double **matrix1, const int rows, const int cols); +MatCpyGen(double **matrix2, const double **matrix1, const int rows, const int cols); void MatMultGenUSVOp(double **c, const double **u, double *s, const double **v, @@ -237,7 +292,7 @@ void MatTransOp(double **outmat, const double **inmat, const int dim); -void +void cholesky(double **mat, const int dim, double *p); double @@ -261,7 +316,64 @@ double FrobDiffNormIdentMat(const double **mat, const int dim); -#endif /* !MATRIXUTILS_SEEN */ +#endif /* !MATRIXUTILS_SEEN */ +/* + Theseus - maximum likelihood superpositioning of macromolecular structures + + Copyright (C) 2004-2014 Douglas L. Theobald + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the: + + Free Software Foundation, Inc., + 59 Temple Place, Suite 330, + Boston, MA 02111-1307 USA + + -/_|:|_|_\- +*/ + +#ifndef MULTIVARGAMMA_SEEN +#define MULTIVARGAMMA_SEEN + +double +MultivarLnGamma(const int k, const double a); + +#endif +/* + Theseus - maximum likelihood superpositioning of macromolecular structures + + Copyright (C) 2004-2014 Douglas L. Theobald + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the: + + Free Software Foundation, Inc., + 59 Temple Place, Suite 330, + Boston, MA 02111-1307 USA + + -/_|:|_|_\- +*/ + #ifndef REGGAMMA_SEEN #define REGGAMMA_SEEN @@ -297,918 +409,94 @@ InGammaP(double a, double x); #endif -#ifndef VECUTILS_SEEN -#define VECUTILS_SEEN - -void -VecPrint(double *vec, const int size); - -void -InvRotVec(double *newvec, double *vec, double **rotmat); - -void -RotVec(double *newvec, double *vec, double **rotmat); - -int -VecEq(const double *vec1, const double *vec2, const int len, const double tol); - -void -RevVecIp(double *vec, const int len); - -double -VecSmallest(double *vec, const int len); - -double -VecBiggest(double *vec, const int len); - -#endif /* !MATRIXUTILS_SEEN */ -#ifndef EIGEN_SEEN -#define EIGEN_SEEN - -double -*NormalizeVec(double *vect); - -void -EigenSort(double **eigenvectors, double *eigenvalues); - -void -EigenSort3(double **eigenvectors, double *eigenvalues, double *tmpevec); - -void -EigenSort3b(double **eigenvectors, double *eigenvalues); - -void -EigenSort4(double **eigenvectors, double *eigenvalues); +/* + Theseus - maximum likelihood superpositioning of macromolecular structures -void -EvalSort4(double *eigenvalues); + Copyright (C) 2004-2014 Douglas L. Theobald -void -CopyEvec(double *evec1, double *evec2, int length); + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. -void -SwapEvec(double *evec1, double *evec2, int length); + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. -void -Swap3Evec(double *evec1, double *evec2, double *tmpevec); + You should have received a copy of the GNU General Public License + along with this program; if not, write to the: -void -eigen3(double **z, double *eigenval); + Free Software Foundation, Inc., + 59 Temple Place, Suite 330, + Boston, MA 02111-1307 USA -void -Eigen4Min(double **eigenvectors, double *eigenvalues); + -/_|:|_|_\- +*/ -void -eigen4(double **Q, double *eigenval); +#ifndef VECUTILS_SEEN +#define VECUTILS_SEEN void -eigenval4(double **Q, double *eigenval); - -double -pythag(double a, double b); +VecPrint(double *vec, const int size); void -tred24(double **a, double *d, double *e); +InvRotVec(double *newvec, double *vec, double **rotmat); void -tred24vals(double **a, double *d, double *e); +RotVec(double *newvec, double *vec, double **rotmat); void -tqli4(double *d, double *e, double **z); +InvRotVecAdd(double *newvec, double *vec, double **rotmat); void -tqli4vals(double *d, double *e, double **z); +RotVecAdd(double *newvec, double *vec, double **rotmat); int -jacobi3(double **a, double *d, double **v, double tol); - -int -jacobi3_cyc(double **a, double *d, double **v, double tol); - -void -jacobi4(double **a, double *d, double **v); - -void -rotate(double **a, double s, double tau, - int i, int j, int k, int l); - -double -InvSymEigenOp(double **invmat, const double **mat, int n, - double *evals, double **evecs, const double tol); - -void -eigensym(const double **mat, double *evals, double **evecs, int n); - -void -eigensym2(const double **mat, double *evals, double **evecs, int n, double *work); - -void -eigenvalsym(const double **mat, double *evals, double **evecs, int n); - -void -eigenvalsym2(const double **mat, double *evals, double **evecs, int n, double *work); - -void -eigengen(const double **mat, double *evals, double **evecs, int n); - -void -transevecs(double **mat, int len); - -void -eigen_quicksort(double *evals, double **evecs, int len); +VecEq(const double *vec1, const double *vec2, const int len, const double tol); void -EigenReconSym(double **mat, const double **evecs, const double *evals, const int n); - -int -SymbolicEigen4 (double **mat, double *evals); - -#endif - -#ifndef INTEGRATE_SEEN -#define INTEGRATE_SEEN - -double -trapzd(double (*func)(double, double, double), - double param1, double param2, double a, double b, int n); +RevVecIp(double *vec, const int len); double -integrate_qsimp(double (*func)(double, double, double), - double param1, double param2, double a, double b); +VecSmallest(double *vec, const int len); double -integrate_romberg(double (*f)(double a, double p1, double p2), - double p1, double p2, double a, double b); - -#endif - - -void -matinv(double **a, double **outmat, int N, int *indx); - -void -lubksb(double **a, int n, int *indx, double b[]); - -void -ludcmp(double **a, int n, int *indx, double *d); -#ifndef MYRANDOM_SEEN -#define MYRANDOM_SEEN - -void -init_genrand(unsigned long s); +VecBiggest(double *vec, const int len); -void -init_by_array(unsigned long init_key[], unsigned long key_length); - -unsigned long -genrand_int32(void); - -long -genrand_int31(void); - -double -genrand_real1(void); - -double -genrand_real2(void); - -double -genrand_real3(void); - -double -genrand_res53(void); - -double -expondev(void); - -double -gaussdev(void); - -double -Normal(void); - -void -shuffle(int *a, int n); - -void -shufflef(double *a, int n); - -#endif -#ifndef ALGO_BLAST_CORE__NCBIMATH -#define ALGO_BLAST_CORE__NCBIMATH - -/* $Id: ncbi_math.h,v 1.11 2005/03/10 16:12:59 papadopo Exp $ - * =========================================================================== - * - * PUBLIC DOMAIN NOTICE - * National Center for Biotechnology Information - * - * This software/database is a "United States Government Work" under the - * terms of the United States Copyright Act. It was written as part of - * the author's official duties as a United States Government employee and - * thus cannot be copyrighted. This software/database is freely available - * to the public for use. The National Library of Medicine and the U.S. - * Government have not placed any restriction on its use or reproduction. - * - * Although all reasonable efforts have been taken to ensure the accuracy - * and reliability of the software and data, the NLM and the U.S. - * Government do not and cannot warrant the performance or results that - * may be obtained by using this software or data. The NLM and the U.S. - * Government disclaim all warranties, express or implied, including - * warranties of performance, merchantability or fitness for any particular - * purpose. - * - * Please cite the author in any work or product based on this material. - * - * =========================================================================== - * - * Authors: Gish, Kans, Ostell, Schuler - * - * Version Creation Date: 10/23/91 - * - * ========================================================================== - */ - -/** @file ncbi_math.h - * Prototypes for portable math library (ported from C Toolkit) - */ - -/*#include -#include */ - -double -s_PolyGamma(double x, int order); - -/** Natural logarithm with shifted input - * @param x input operand (x > -1) - * @return log(x+1) - */ - -double BLAST_Log1p (double x); - -/** Exponentional with base e - * @param x input operand - * @return exp(x) - 1 - */ - -double BLAST_Expm1 (double x); - -/** Factorial function - * @param n input operand - * @return (double)(1 * 2 * 3 * ... * n) - */ - -double BLAST_Factorial(int n); - -/** Logarithm of the factorial - * @param x input operand - * @return log(1 * 2 * 3 * ... * x) - */ - -double BLAST_LnFactorial (double x); - -/** log(gamma(n)), integral n - * @param n input operand - * @return log(1 * 2 * 3 * ... (n-1)) - */ - -double BLAST_LnGammaInt (int n); - -/** Romberg numerical integrator - * @param f Pointer to the function to integrate; the first argument - * is the variable to integrate over, the second is a pointer - * to a list of additional arguments that f may need - * @param fargs Pointer to an array of extra arguments or parameters - * needed to compute the function to be integrated. None - * of the items in this list may vary over the region - * of integration - * @param p Left-hand endpoint of the integration interval - * @param q Right-hand endpoint of the integration interval - * (q is assumed > p) - * @param eps The relative error tolerance that indicates convergence - * @param epsit The number of consecutive diagonal entries in the - * Romberg array whose relative difference must be less than - * eps before convergence is assumed. This is presently - * limited to 1, 2, or 3 - * @param itmin The minimum number of diagnonal Romberg entries that - * will be computed - * @return The computed integral of f() between p and q - */ - -double BLAST_RombergIntegrate (double (*f) (double, void*), - void* fargs, double p, double q, - double eps, int epsit, int itmin); - -/** Greatest common divisor - * @param a First operand (any integer) - * @param b Second operand (any integer) - * @return The largest integer that evenly divides a and b - */ - -int BLAST_Gcd (int a, int b); - -/** Divide 3 numbers by their greatest common divisor - * @param a First integer [in] [out] - * @param b Second integer [in] [out] - * @param c Third integer [in] [out] - * @return The greatest common divisor - */ - -int BLAST_Gdb3(int* a, int* b, int* c); - -/** Nearest integer - * @param x Input to round (rounded value must be representable - * as a 32-bit signed integer) - * @return floor(x + 0.5); - */ - -long BLAST_Nint (double x); - -/** Integral power of x - * @param x floating-point base of the exponential - * @param n (integer) exponent - * @return x multiplied by itself n times - */ - -double BLAST_Powi (double x, int n); - -/** Number of derivatives of log(x) to carry in gamma-related - computations */ -#define LOGDERIV_ORDER_MAX 4 -/** Number of derivatives of polygamma(x) to carry in gamma-related - computations for non-integral values of x */ -#define POLYGAMMA_ORDER_MAX LOGDERIV_ORDER_MAX - -/** value of pi is only used in gamma-related computations */ -#define NCBIMATH_PI 3.1415926535897932384626433832795 - -/** Natural log(2) */ -#define NCBIMATH_LN2 0.69314718055994530941723212145818 -/** Natural log(PI) */ -#define NCBIMATH_LNPI 1.1447298858494001741434273513531 - -#ifdef __cplusplus -} -#endif - -/* - * =========================================================================== - * - * $Log: ncbi_math.h,v $ - * Revision 1.11 2005/03/10 16:12:59 papadopo - * doxygen fixes - * - * Revision 1.10 2004/11/18 21:22:10 dondosha - * Added BLAST_Gdb3, used in greedy alignment; removed extern and added to all prototypes - * - * Revision 1.9 2004/11/02 13:54:33 papadopo - * small doxygen fixes - * - * Revision 1.8 2004/11/01 16:37:57 papadopo - * Add doxygen tags, remove unused constants - * - * Revision 1.7 2004/05/19 14:52:01 camacho - * 1. Added doxygen tags to enable doxygen processing of algo/blast/core - * 2. Standardized copyright, CVS $Id string, $Log and rcsid formatting and i - * location - * 3. Added use of @todo doxygen keyword - * - * Revision 1.6 2003/09/26 20:38:12 dondosha - * Returned prototype for the factorial function (BLAST_Factorial) - * - * Revision 1.5 2003/09/26 19:02:31 madden - * Prefix ncbimath functions with BLAST_ - * - * Revision 1.4 2003/09/10 21:35:20 dondosha - * Removed Nlm_ prefix from math functions - * - * Revision 1.3 2003/08/25 22:30:24 dondosha - * Added LnGammaInt definition and Factorial prototype - * - * Revision 1.2 2003/08/11 14:57:16 dondosha - * Added algo/blast/core path to all #included headers - * - * Revision 1.1 2003/08/02 16:32:11 camacho - * Moved ncbimath.h -> ncbi_math.h - * - * Revision 1.2 2003/08/01 21:18:48 dondosha - * Correction of a #include - * - * Revision 1.1 2003/08/01 21:03:40 madden - * Cleaned up version of file for C++ toolkit - * - * =========================================================================== - */ - - -#endif /* !ALGO_BLAST_CORE__NCBIMATH */ - -#ifndef QUICKSORT_SEEN -#define QUICKSORT_SEEN - -/*--------------- quicksort.h --------------*/ -/* - * The key TYPE. - * COARRAY_T is the type of the companion array - * The keys are the array items moved with the SWAP macro - * around using the SWAP macro. - * the comparison macros can compare either the key or things - * referenced by the key (if its a pointer) - */ -typedef double KEY_T; -typedef char *COARRAY_T; -/* - * The comparison macros: - * - * GT(x, y) as (strcmp((x),(y)) > 0) - * LT(x, y) as (strcmp((x),(y)) < 0) - * GE(x, y) as (strcmp((x),(y)) >= 0) - * LE(x, y) as (strcmp((x),(y)) <= 0) - * EQ(x, y) as (strcmp((x),(y)) == 0) - * NE(x, y) as (strcmp((x),(y)) != 0) - */ -#define GT(x, y) ((x) > (y)) -#define LT(x, y) ((x) < (y)) -#define GE(x, y) ((x) >= (y)) -#define LE(x, y) ((x) <= (y)) -#define EQ(x, y) ((x) == (y)) -#define NE(x, y) ((x) != (y)) - -/* - * Swap macro: - */ - -/* double tempd; */ -/* char *tempc; */ -/* */ -/* #define SWAPD(x, y) tempd = (x); (x) = (y); (y) = tempd */ -/* #define SWAPC(x, y) tempc = (x); (x) = (y); (y) = tempc */ - -extern void -swapd(double *x, double *y); - -extern void -swapc(char **x, char **y); - -extern void -insort2 (KEY_T *array1, COARRAY_T *array2, int len); - -extern void -insort2d (KEY_T *array1, KEY_T *array2, int len); - -extern void -insort (KEY_T *array1, int len); - -extern void -partial_quicksort2 (KEY_T *array1, COARRAY_T *array2, int lower, int upper); - -extern void -partial_quicksort2d (KEY_T *array1, KEY_T *array2, int lower, int upper); - -extern void -partial_quicksort (KEY_T *array, int lower, int upper); - -extern void -quicksort2 (KEY_T *array1, COARRAY_T *array2, int len); - -extern void -quicksort2d (KEY_T *array1, KEY_T *array2, int len); - -extern void -quicksort (KEY_T *array, int len); - -#endif -/* ------------------------------------------------------------- - * Name : rvms.h (header file for the library rvms.c) - * Author : Steve Park & Dave Geyer - * Language : ANSI C - * Latest Revision : 11-02-96 - * -------------------------------------------------------------- - */ - -#if !defined( _RVMS_ ) -#define _RVMS_ - -double LogFactorial(long n); -double LogChoose(long n, long m); - -double pdfBernoulli(double p, long x); -double cdfBernoulli(double p, long x); -long idfBernoulli(double p, double u); - -double pdfEquilikely(long a, long b, long x); -double cdfEquilikely(long a, long b, long x); -long idfEquilikely(long a, long b, double u); - -double pdfBinomial(long n, double p, long x); -double cdfBinomial(long n, double p, long x); -long idfBinomial(long n, double p, double u); - -double pdfGeometric(double p, long x); -double cdfGeometric(double p, long x); -long idfGeometric(double p, double u); - -double pdfPascal(long n, double p, long x); -double cdfPascal(long n, double p, long x); -long idfPascal(long n, double p, double u); - -double pdfPoisson(double m, long x); -double cdfPoisson(double m, long x); -long idfPoisson(double m, double u); - -double pdfUniform(double a, double b, double x); -double cdfUniform(double a, double b, double x); -double idfUniform(double a, double b, double u); - -double pdfExponential(double m, double x); -double cdfExponential(double m, double x); -double idfExponential(double m, double u); - -double pdfErlang(long n, double b, double x); -double cdfErlang(long n, double b, double x); -double idfErlang(long n, double b, double u); - -double pdfNormal(double m, double s, double x); -double cdfNormal(double m, double s, double x); -double idfNormal(double m, double s, double u); - -double pdfLognormal(double a, double b, double x); -double cdfLognormal(double a, double b, double x); -double idfLognormal(double a, double b, double u); - -double pdfChisquare(long n, double x); -double cdfChisquare(long n, double x); -double idfChisquare(long n, double u); - -double pdfStudent(long n, double x); -double cdfStudent(long n, double x); -double idfStudent(long n, double u); - -#endif -#ifndef SPECFUNC_SEEN -#define SPECFUNC_SEEN - -double -BesselI(const double nu, const double z); - -double -BesselI0(const double z); - -double -BesselI1(const double z); - -double -bessi(const int n, const double x); - -double -bessi0(const double x); - -double -bessi1(const double x); - -double -UpperIncompleteGamma(const double a, const double x); - -double -gammp(const double a, const double x); - -double -gammq(const double a, const double x); - -double -gcf(double a, double x); - -double -gser(double a, double x); - -double -IncompleteGamma(const double x, const double alpha); - -double -lngamma(const double xx); - -double -mygamma(const double xx); - -double -harmonic(int x); - -double -polygamma(int k, double x); - -double -betai(double a, double b, double x); - -double -betacf(double a, double b, double x); - -double -beta(double z, double w); - -double -mysquare(const double val); - -double -mycube(const double val); - -double -mypow4(double val); - -#endif -#ifndef MAT3UTILS_SEEN -#define MAT3UTILS_SEEN - -void -Mat3Print(double **matrix); - -double -**Mat3Ident(double **matrix); - -int -Mat3Eq(const double **matrix1, const double **matrix2, const double precision); - -int -Mat3FrobEq(const double **matrix1, const double **matrix2, const double precision); - -void -Mat3Cpy(double **matrix2, const double **matrix1); - -void -Mat3MultOp(double **C, const double **A, const double **B); - -void -Mat3MultIp(double **A, const double **B); - -void -Mat3MultUSVOp(double **C, const double **U, double *S, const double **V); - -void -Mat3PreMultIp(const double **A, double **B); - -void -Mat3Sqr(double **C, const double **A); - -void -Mat3SqrTrans2(double **C, const double **A); - -void -Mat3SqrTrans1(double **C, const double **A); - -void -Mat3TransSqr(double **C, const double **A); - -void -Mat3MultTransA(double **C, const double **A, const double **B); - -void -Mat3MultTransB(double **C, const double **A, const double **B); - -void -Mat3Add(double **C, const double **A, const double **B); - -void -Mat3Sub(double **A, double **B, double **C); - -void -Mat3TransposeIp(double **matrix); - -void -Mat3TransposeOp(double **matrix2, const double **matrix1); - -double -Mat3Det(const double **matrix); - -void -Mat3Invert(double **outmat, const double **inmat); - -void -Mat3SymInvert(double **outmat, const double **inmat); - -void -Mat3MultVec(double *outv, const double **inmat, const double *vec); - -int -VerifyRotMat(double **rotmat, double tol); - -double -**ClosestRotMat(double **inmat); - -void -ClosestRotMatIp(double **inmat); - -#endif /* !MATRIXUTILS_SEEN */ -#ifndef MAT4UTILS_SEEN -#define MAT4UTILS_SEEN - -void -Mat4Print(double **matrix); - -void -Mat4Copy(double **matrix2, const double **matrix1); - -void -Mat4TransposeIp(double **matrix); - -void -Mat4TransposeOp(double **matrix2, const double **matrix1); - -#endif /* !MATRIXUTILS_SEEN */ -#ifndef MATUTILS_SEEN -#define MATUTILS_SEEN - -typedef struct -{ - int rows; - int cols; - int depth; - double ***matrix; - double **matrixc; - double *matrixd; -} Matrix3D; - -void -MatPrint(double **matrix, const int size); - -void -MatPrintRec(double **matrix, const int n, const int m); - -void -MatDestroy(double ***matrix_ptr); - -double -**MatAlloc(const int rows, const int cols); - -void -MatIntDestroy(int ***matrix_ptr); - -int -**MatIntInit(const int rows, const int cols); - -Matrix3D -*Mat3DInit(const int rows, const int cols, const int depth); - -void -Mat3DDestroy(Matrix3D **matrix3d_ptr); - -double -MatFrobNorm(const double **mat1, const double **mat2, const int row, const int col); - -double -MatDiff(const double **mat1, const double **mat2, const int row, const int col); - -void -MatCpySym(double **matrix2, const double **matrix1, const int dim); - -void -MatCpySymgen(double **matrix2, const double **matrix1, const int rows, const int cols); - -void -MatMultGenUSVOp(double **c, const double **u, double *s, const double **v, - const int udim, const int sdim, const int vdim); - -void -MatMultGen(double **C, const double **A, const int ni, const int nk, const double **B, const int nj); - -void -MatMultGenIp(double **A, const int nk, const int ni, const double **B, const int nj); - -void -MatTransMultGen(double **C, const double **A, const int ni, const int nk, const double **B, const int nj); - -void -MatTransMultGenIp(double **A, const int nk, const int ni, const double **B, const int nj); - -void -MatMultSym(double **C, const double **A, const double **B, const int len); - -void -MatMultSymDiag(double **C, const double **A, const double **B, const int len); - -void -MatTransIp(double **mat, const int dim); - -void -MatTransOp(double **outmat, const double **inmat, const int dim); - -void -cholesky(double **mat, const int dim, double *p); - -double -MatDet(const double **mat, const int dim); - -double -MatGenLnDet(const double **mat, const int dim); - -double -MatSymLnDet(const double **mat, const int dim); - -double -MatTrace(const double **mat, const int dim); - -int -TestZeroOffDiag(const double **mat, const int dim, const double precision); - -int -TestIdentMat(const double **mat, const int dim, const double precision); - -double -FrobDiffNormIdentMat(const double **mat, const int dim); - -#endif /* !MATRIXUTILS_SEEN */ -#ifndef REGGAMMA_SEEN -#define REGGAMMA_SEEN - -/* double */ -/* IncompleteGamma (double theA, double theX); */ -/* */ -/* double */ -/* regularizedGammaP(double a, double x, double epsilon, int maxIterations); */ -/* */ -/* double */ -/* regularizedGammaQ(double a, double x, double epsilon, int maxIterations); */ -/* */ -/* double */ -/* gamain( double x, double p, double g ); */ -/* */ -/* double */ -/* gamln( double x ); */ -/* */ -/* void */ -/* grat1(double a, double x, double r, double *p, double *q, */ -/* double eps); */ - -double -InBeta(double a, double b, double x); - -double -InGamma(double a, double x); - -double -InGammaQ(double a, double x); - -double -InGammaP(double a, double x); - -#endif -#ifndef VECUTILS_SEEN -#define VECUTILS_SEEN - -void -VecPrint(double *vec, const int size); - -void -InvRotVec(double *newvec, double *vec, double **rotmat); - -void -RotVec(double *newvec, double *vec, double **rotmat); - -int -VecEq(const double *vec1, const double *vec2, const int len, const double tol); - -void -RevVecIp(double *vec, const int len); - -double -VecSmallest(double *vec, const int len); - -double -VecBiggest(double *vec, const int len); - -#endif /* !MATRIXUTILS_SEEN */ -#ifndef EIGEN_SEEN -#define EIGEN_SEEN - -double -*NormalizeVec(double *vect); +#endif /* !MATRIXUTILS_SEEN */ +/* + Theseus - maximum likelihood superpositioning of macromolecular structures -void -EigenSort(double **eigenvectors, double *eigenvalues); + Copyright (C) 2004-2014 Douglas L. Theobald -void -EigenSort3(double **eigenvectors, double *eigenvalues, double *tmpevec); + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. -void -EigenSort4(double **eigenvectors, double *eigenvalues); + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. -void -EvalSort4(double *eigenvalues); + You should have received a copy of the GNU General Public License + along with this program; if not, write to the: -void -CopyEvec(double *evec1, double *evec2, int length); + Free Software Foundation, Inc., + 59 Temple Place, Suite 330, + Boston, MA 02111-1307 USA -void -SwapEvec(double *evec1, double *evec2, int length); + -/_|:|_|_\- +*/ -void -Swap3Evec(double *evec1, double *evec2, double *tmpevec); +#ifndef EIGEN_SEEN +#define EIGEN_SEEN void eigen3(double **z, double *eigenval); void -Eigen4Min(double **eigenvectors, double *eigenvalues); - -void eigen4(double **Q, double *eigenval); void @@ -1247,33 +535,89 @@ double *evals, double **evecs, const double tol); void -eigensym(const double **mat, double *evals, double **evecs, int n); +EigenReconSym(double **mat, const double **evecs, const double *evals, const int n); + +#endif + +/* + Theseus - maximum likelihood superpositioning of macromolecular structures + + Copyright (C) 2004-2010 Douglas L. Theobald + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the: + + Free Software Foundation, Inc., + 59 Temple Place, Suite 330, + Boston, MA 02111-1307 USA + + -/_|:|_|_\- +*/ +#ifndef EIGEN_GSL_SEEN +#define EIGEN_GSL_SEEN void -eigensym2(const double **mat, double *evals, double **evecs, int n, double *work); +EigenvalsGSL(const double **mat, const int dim, double *eval); void -eigenvalsym(const double **mat, double *evals, double **evecs, int n); +EigenvalsGSLDest(double **mat, const int dim, double *eval); void -eigenvalsym2(const double **mat, double *evals, double **evecs, int n, double *work); +EigenGSL(const double **mat, const int dim, double *eval, double **evec, int order); void -eigengen(const double **mat, double *evals, double **evecs, int n); +EigenGSLDest(double **mat, const int dim, double *eval, double **evec, int order); void -transevecs(double **mat, int len); +CalcGSLSVD3(double **a, double **u, double *s, double **vt); void -eigen_quicksort(double *evals, double **evecs, int len); +svdGSLDest(double **A, const int dim, double *singval, double **V); void -EigenReconSym(double **mat, const double **evecs, const double *evals, const int n); +svdGSLJacobiDest(double **A, const int dim, double *singval, double **V); -int -SymbolicEigen4 (double **mat, double *evals); +void +CholeskyGSLDest(double **A, const int dim); + +void +PseudoinvSymGSL(const double **inmat, double **outmat, int n, double tol); #endif +/* + Theseus - maximum likelihood superpositioning of macromolecular structures + + Copyright (C) 2004-2014 Douglas L. Theobald + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the: + + Free Software Foundation, Inc., + 59 Temple Place, Suite 330, + Boston, MA 02111-1307 USA + + -/_|:|_|_\- +*/ #ifndef INTEGRATE_SEEN #define INTEGRATE_SEEN @@ -1290,64 +634,32 @@ integrate_romberg(double (*f)(double a, double p1, double p2), double p1, double p2, double a, double b); +#endif +/* + Theseus - maximum likelihood superpositioning of macromolecular structures -int -Dgesvd(char jobu, char jobvt, int m, int n, - double **a, int lda, double *s, - double **u, int ldu, - double **vt, int ldvt, - double *work, int lwork); - -int -dgesvd_opt_dest(double **a, int m, int n, - double **u, double *s, double **vt); - -int -dgesvd_opt_save(double **a, int m, int n, - double **u, double *s, double **vt); - -int -Dsyev(char jobz_v, char uplo_u, - int n, double **amat, double *w, - double *work, int lwork); - -int -dsyev_opt_dest(double **amat, int n, double *w); - -int -dsyev_opt_save(double **amat, int n, double **evecs, double *evals); - -int -Dsyevr(char jobz, char range, char uplo, int n, - double **a, int lda, - double vl, double vu, - int il, int iu, - double abstol, int m, double *w, - double **z__, int ldz, int *isuppz, - double *work, int lwork, - int *iwork, int liwork); + Copyright (C) 2004-2014 Douglas L. Theobald -int -dsyevr_opt_dest(double **mat, int n, - int lower, int upper, - double *evals, double **evecs, - double abstol); + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. -int -dsyevr_opt_save(const double **amat, int n, - int lower, int upper, - double *evals, double **evecs); + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. -void -dpotr_invert(double **mat, int idim); + You should have received a copy of the GNU General Public License + along with this program; if not, write to the: -int -dpotrf_opt_dest(double **amat, int dim); + Free Software Foundation, Inc., + 59 Temple Place, Suite 330, + Boston, MA 02111-1307 USA -int -pseudoinv_sym(double **inmat, double **outmat, int n, const double tol); + -/_|:|_|_\- +*/ -#endif void matinv(double **a, double **outmat, int N, int *indx); @@ -1356,6 +668,31 @@ void ludcmp(double **a, int n, int *indx, double *d); +/* + Theseus - maximum likelihood superpositioning of macromolecular structures + + Copyright (C) 2004-2014 Douglas L. Theobald + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the: + + Free Software Foundation, Inc., + 59 Temple Place, Suite 330, + Boston, MA 02111-1307 USA + + -/_|:|_|_\- +*/ + #ifndef MYRANDOM_SEEN #define MYRANDOM_SEEN @@ -1399,6 +736,31 @@ shufflef(double *a, int n); #endif +/* + Theseus - maximum likelihood superpositioning of macromolecular structures + + Copyright (C) 2004-2014 Douglas L. Theobald + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the: + + Free Software Foundation, Inc., + 59 Temple Place, Suite 330, + Boston, MA 02111-1307 USA + + -/_|:|_|_\- +*/ + #ifndef ALGO_BLAST_CORE__NCBIMATH #define ALGO_BLAST_CORE__NCBIMATH @@ -1438,48 +800,48 @@ * Prototypes for portable math library (ported from C Toolkit) */ -/*#include +/*#include #include */ -double +double s_PolyGamma(double x, int order); /** Natural logarithm with shifted input * @param x input operand (x > -1) * @return log(x+1) */ - + double BLAST_Log1p (double x); -/** Exponentional with base e +/** Exponentional with base e * @param x input operand * @return exp(x) - 1 */ - + double BLAST_Expm1 (double x); /** Factorial function * @param n input operand * @return (double)(1 * 2 * 3 * ... * n) */ - + double BLAST_Factorial(int n); -/** Logarithm of the factorial +/** Logarithm of the factorial * @param x input operand * @return log(1 * 2 * 3 * ... * x) */ - + double BLAST_LnFactorial (double x); -/** log(gamma(n)), integral n +/** log(gamma(n)), integral n * @param n input operand * @return log(1 * 2 * 3 * ... (n-1)) */ - + double BLAST_LnGammaInt (int n); -/** Romberg numerical integrator +/** Romberg numerical integrator * @param f Pointer to the function to integrate; the first argument * is the variable to integrate over, the second is a pointer * to a list of additional arguments that f may need @@ -1491,25 +853,25 @@ * @param q Right-hand endpoint of the integration interval * (q is assumed > p) * @param eps The relative error tolerance that indicates convergence - * @param epsit The number of consecutive diagonal entries in the + * @param epsit The number of consecutive diagonal entries in the * Romberg array whose relative difference must be less than - * eps before convergence is assumed. This is presently + * eps before convergence is assumed. This is presently * limited to 1, 2, or 3 * @param itmin The minimum number of diagnonal Romberg entries that * will be computed * @return The computed integral of f() between p and q */ - -double BLAST_RombergIntegrate (double (*f) (double, void*), - void* fargs, double p, double q, + +double BLAST_RombergIntegrate (double (*f) (double, void*), + void* fargs, double p, double q, double eps, int epsit, int itmin); -/** Greatest common divisor +/** Greatest common divisor * @param a First operand (any integer) * @param b Second operand (any integer) * @return The largest integer that evenly divides a and b */ - + int BLAST_Gcd (int a, int b); /** Divide 3 numbers by their greatest common divisor @@ -1518,29 +880,29 @@ * @param c Third integer [in] [out] * @return The greatest common divisor */ - + int BLAST_Gdb3(int* a, int* b, int* c); -/** Nearest integer +/** Nearest integer * @param x Input to round (rounded value must be representable * as a 32-bit signed integer) * @return floor(x + 0.5); */ - + long BLAST_Nint (double x); -/** Integral power of x +/** Integral power of x * @param x floating-point base of the exponential * @param n (integer) exponent * @return x multiplied by itself n times */ - + double BLAST_Powi (double x, int n); -/** Number of derivatives of log(x) to carry in gamma-related +/** Number of derivatives of log(x) to carry in gamma-related computations */ -#define LOGDERIV_ORDER_MAX 4 -/** Number of derivatives of polygamma(x) to carry in gamma-related +#define LOGDERIV_ORDER_MAX 4 +/** Number of derivatives of polygamma(x) to carry in gamma-related computations for non-integral values of x */ #define POLYGAMMA_ORDER_MAX LOGDERIV_ORDER_MAX @@ -1608,187 +970,39 @@ #endif /* !ALGO_BLAST_CORE__NCBIMATH */ -#ifndef QUICKSORT_SEEN -#define QUICKSORT_SEEN - -/*--------------- quicksort.h --------------*/ -/* - * The key TYPE. - * COARRAY_T is the type of the companion array - * The keys are the array items moved with the SWAP macro - * around using the SWAP macro. - * the comparison macros can compare either the key or things - * referenced by the key (if its a pointer) - */ -typedef double KEY_T; -typedef char *COARRAY_T; -/* - * The comparison macros: - * - * GT(x, y) as (strcmp((x),(y)) > 0) - * LT(x, y) as (strcmp((x),(y)) < 0) - * GE(x, y) as (strcmp((x),(y)) >= 0) - * LE(x, y) as (strcmp((x),(y)) <= 0) - * EQ(x, y) as (strcmp((x),(y)) == 0) - * NE(x, y) as (strcmp((x),(y)) != 0) - */ -#define GT(x, y) ((x) > (y)) -#define LT(x, y) ((x) < (y)) -#define GE(x, y) ((x) >= (y)) -#define LE(x, y) ((x) <= (y)) -#define EQ(x, y) ((x) == (y)) -#define NE(x, y) ((x) != (y)) /* - * Swap macro: - */ - -/* double tempd; */ -/* char *tempc; */ -/* */ -/* #define SWAPD(x, y) tempd = (x); (x) = (y); (y) = tempd */ -/* #define SWAPC(x, y) tempc = (x); (x) = (y); (y) = tempc */ - -extern void -swapd(double *x, double *y); - -extern void -swapc(char **x, char **y); - -extern void -insort2 (KEY_T *array1, COARRAY_T *array2, int len); - -extern void -insort2d (KEY_T *array1, KEY_T *array2, int len); - -extern void -insort (KEY_T *array1, int len); - -extern void -partial_quicksort2 (KEY_T *array1, COARRAY_T *array2, int lower, int upper); - -extern void -partial_quicksort2d (KEY_T *array1, KEY_T *array2, int lower, int upper); - -extern void -partial_quicksort (KEY_T *array, int lower, int upper); - -extern void -quicksort2 (KEY_T *array1, COARRAY_T *array2, int len); - -extern void -quicksort2d (KEY_T *array1, KEY_T *array2, int len); - -extern void -quicksort (KEY_T *array, int len); - -#endif -/* ------------------------------------------------------------- - * Name : rvms.h (header file for the library rvms.c) - * Author : Steve Park & Dave Geyer - * Language : ANSI C - * Latest Revision : 11-02-96 - * -------------------------------------------------------------- - */ - -#if !defined( _RVMS_ ) -#define _RVMS_ - -double LogFactorial(long n); -double LogChoose(long n, long m); - -double pdfBernoulli(double p, long x); -double cdfBernoulli(double p, long x); -long idfBernoulli(double p, double u); - -double pdfEquilikely(long a, long b, long x); -double cdfEquilikely(long a, long b, long x); -long idfEquilikely(long a, long b, double u); - -double pdfBinomial(long n, double p, long x); -double cdfBinomial(long n, double p, long x); -long idfBinomial(long n, double p, double u); - -double pdfGeometric(double p, long x); -double cdfGeometric(double p, long x); -long idfGeometric(double p, double u); - -double pdfPascal(long n, double p, long x); -double cdfPascal(long n, double p, long x); -long idfPascal(long n, double p, double u); - -double pdfPoisson(double m, long x); -double cdfPoisson(double m, long x); -long idfPoisson(double m, double u); - -double pdfUniform(double a, double b, double x); -double cdfUniform(double a, double b, double x); -double idfUniform(double a, double b, double u); - -double pdfExponential(double m, double x); -double cdfExponential(double m, double x); -double idfExponential(double m, double u); - -double pdfErlang(long n, double b, double x); -double cdfErlang(long n, double b, double x); -double idfErlang(long n, double b, double u); - -double pdfNormal(double m, double s, double x); -double cdfNormal(double m, double s, double x); -double idfNormal(double m, double s, double u); - -double pdfLognormal(double a, double b, double x); -double cdfLognormal(double a, double b, double x); -double idfLognormal(double a, double b, double u); - -double pdfChisquare(long n, double x); -double cdfChisquare(long n, double x); -double idfChisquare(long n, double u); - -double pdfStudent(long n, double x); -double cdfStudent(long n, double x); -double idfStudent(long n, double u); - -#endif - -#ifndef EIGEN_GSL_SEEN -#define EIGEN_GSL_SEEN - -void -EigenvalsGSL(double **mat, const int dim, double *eval); - -void -EigenvalsGSLDest(double **mat, const int dim, double *eval); - -void -EigenGSL(double **mat, const int dim, double *eval, double **evec, int order); - -void -EigenGSLDest(double **mat, const int dim, double *eval, double **evec, int order); - -void -svdGSLDest(double **A, const int dim, double *singval, double **V); + Theseus - maximum likelihood superpositioning of macromolecular structures -void -CholeskyGSLDest(double **A, const int dim); + Copyright (C) 2004-2014 Douglas L. Theobald -void -PseudoinvSymGSL(double **inmat, double **outmat, int n, double tol); + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. -#endif + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. -#ifndef MULTIVARGAMMA_SEEN -#define MULTIVARGAMMA_SEEN + You should have received a copy of the GNU General Public License + along with this program; if not, write to the: -double -MultivarLnGamma(const int k, const double a); + Free Software Foundation, Inc., + 59 Temple Place, Suite 330, + Boston, MA 02111-1307 USA -#endif + -/_|:|_|_\- +*/ #ifndef SPECFUNC_SEEN #define SPECFUNC_SEEN double +Hermite(const int n, const double x); + +double BesselI(const double nu, const double z); double @@ -1845,8 +1059,6 @@ double beta(double z, double w); -#endif - int findmin(const double *vec, const int len); diff -Nru theseus-2.0.6/DLTutils.h theseus-3.0.0/DLTutils.h --- theseus-2.0.6/DLTutils.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/DLTutils.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -37,24 +37,6 @@ #define BUFFLEN FILENAME_MAX -#if defined(__APPLE__) - #if !defined(inline) - #define inline __inline__ - #endif - #include - #include - #include - #include - #include - #include - - UInt64 - getTime(void); - - double - seconds(void); -#endif - double *memsetd(double *dest, const double val, size_t len); diff -Nru theseus-2.0.6/Embed.c theseus-3.0.0/Embed.c --- theseus-2.0.6/Embed.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/Embed.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,6 +25,7 @@ /* -/_|:|_|_\- */ #include "Embed_local.h" +#include "Embed.h" void @@ -34,9 +35,9 @@ /* set up matrices and initialize for Lele distmat covariance calculations */ for (i = 0; i < cdsA->cnum; ++i) - cdsA->cds[i]->innerprod = MatAlloc(cdsA->vlen, cdsA->vlen); + cdsA->cds[i]->outerprod = MatAlloc(cdsA->vlen, cdsA->vlen); - cdsA->avecds->innerprod = MatAlloc(cdsA->vlen, cdsA->vlen); + cdsA->avecds->outerprod = MatAlloc(cdsA->vlen, cdsA->vlen); DistMatsAlloc(cdsA); } @@ -44,12 +45,9 @@ void CalcLeleCovMat(CdsArray *cdsA) { - double **MMT = NULL; double idf, cov_sum; int i, j, k; - MMT = MatAlloc(cdsA->vlen, cdsA->vlen); - idf = 1.0 / (3.0 * (double)(cdsA->cnum)); /* ML, biased, maybe should be n-1 to be unbiased?? */ for (i = 0; i < cdsA->cnum; ++i) @@ -63,7 +61,7 @@ { cov_sum = 0.0; for (k = 0; k < cdsA->cnum; ++k) - cov_sum += (cdsA->cds[k]->innerprod[i][j] - cdsA->avecds->innerprod[i][j]); + cov_sum += (cdsA->cds[k]->outerprod[i][j] - cdsA->avecds->outerprod[i][j]); cdsA->CovMat[i][j] = cov_sum * idf; } @@ -82,21 +80,6 @@ /* for (i = 0; i < cdsA->vlen; ++i) */ /* printf("\n -->> LeleCovVar = %7.3e ", cdsA->CovMat[i][i]); */ - - /* - for (i = 0; i < cdsA->vlen; ++i) - { - for (j = 0; j < cdsA->vlen; ++j) - { - if (i == j) - continue; - else - cdsA->CovMat[i][j] = 0.0; - } - } - */ - - MatDestroy(&MMT); } @@ -106,7 +89,7 @@ { double idf; double *var = cdsA->var; - const Cds **cds = (const Cds **) cdsA->cds; + const Cds **cds = (const Cds **) cdsA->cds; const double *avex = (const double *) cdsA->avecds->x, *avey = (const double *) cdsA->avecds->y, *avez = (const double *) cdsA->avecds->z; @@ -137,115 +120,22 @@ void -NNxCds(double **mat, Cds *cds) -{ - int i, k; - - /* (i x k)(k x j) = (i x j) */ - /* (N x N)(N x 3) = (N x 3) */ - for (i = 0; i < cds->vlen; ++i) - { - cds->covx[i] = cds->covy[i] = cds->covz[i] = 0.0; - for (k = 0; k < cds->vlen; ++k) - { - cds->covx[i] += (mat[i][k] * cds->x[k]); - cds->covy[i] += (mat[i][k] * cds->y[k]); - cds->covz[i] += (mat[i][k] * cds->z[k]); - } - } -} - - -void -CdsxNN(Cds *cds, const double **mat) -{ - int j, k; - - /* (i x k)(k x j) = (i x j) */ - /* (3 x N)(N x N) = (3 x N) */ - for (j = 0; j < cds->vlen; ++j) - { - cds->covx[j] = cds->covy[j] = cds->covz[j] = 0.0; - for (k = 0; k < cds->vlen; ++k) - { - cds->covx[j] += (cds->x[k] * mat[k][j]); - cds->covy[j] += (cds->y[k] * mat[k][j]); - cds->covz[j] += (cds->z[k] * mat[k][j]); - } - } -} - - -/* double - RadGyrSqr(Cds *cds) - - computes the unweighted square of the unaveraged radius of gyration - of a molecule basically the sum of the square of the distances of - each atom from the centroid (this function assumes the molecule has - been centered) -*/ -double -RadGyrSqr(const Cds *cds) -{ - int i; - double sum = 0.0; - - for (i = 0; i < cds->vlen; ++i) - sum += (mysquare(cds->x[i]) + mysquare(cds->y[i]) + mysquare(cds->z[i])); - - return(sum); -} - - -double -RadGyrSqrW(const Cds *cds, const double *weights) -{ - int i; - double sum = 0.0; - - for (i = 0; i < cds->vlen; ++i) - sum += weights[i] * (mysquare(cds->x[i]) + mysquare(cds->y[i]) + mysquare(cds->z[i])); - - return(sum); -} - - -double -RadGyrSqrCov(Cds *cds, const double **weightmat) -{ - int i; - double sum = 0.0; - - CdsxNN(cds, weightmat); - - for (i = 0; i < cds->vlen; ++i) - { - sum += (cds->covx[i] * cds->x[i]) + - (cds->covy[i] * cds->y[i]) + - (cds->covz[i] * cds->z[i]); - } - - return(sum); -} - - -void CdsInnerProd(Cds *cds) { /* (i x k)(k x j) = (i x j) */ /* (N x 3)(3 x N) = (N x N) */ int i, j; - double **innerprod = cds->innerprod; + double **outerprod = cds->outerprod; for (i = 0; i < cds->vlen; ++i) for (j = 0; j < cds->vlen; ++j) - cds->innerprod[i][j] = 0.0; + cds->outerprod[i][j] = 0.0; for (i = 0; i < cds->vlen; ++i) { for (j = 0; j <= i; ++j) { - innerprod[i][j] += (cds->x[i] * cds->x[j]) + outerprod[i][j] += (cds->x[i] * cds->x[j]) + (cds->y[i] * cds->y[j]) + (cds->z[i] * cds->z[j]); } @@ -253,7 +143,7 @@ for (i = 0; i < cds->vlen; ++i) for (j = 0; j < i; ++j) - cds->innerprod[j][i] = cds->innerprod[i][j]; + cds->outerprod[j][i] = cds->outerprod[i][j]; } @@ -263,21 +153,14 @@ /* (i x k)(k x j) = (i x j) */ /* (3 x N)(N x 3) = (3 x 3) */ int k; - double **innerprod2 = NULL; + double **innerprod = NULL; const double *x = (const double *) cds->x, *y = (const double *) cds->y, *z = (const double *) cds->z; double xk, yk, zk; - if (cds->innerprod2 == NULL) - { - innerprod2 = cds->innerprod2 = MatAlloc(3, 3); - } - else - { - innerprod2 = cds->innerprod2; - memset(&innerprod2[0][0], 0, 9 * sizeof(double)); - } + innerprod = cds->innerprod; + memset(&innerprod[0][0], 0, 9 * sizeof(double)); for (k = 0; k < cds->vlen; ++k) { @@ -285,73 +168,25 @@ yk = y[k]; zk = z[k]; - innerprod2[0][0] += (xk * xk); - innerprod2[1][1] += (yk * yk); - innerprod2[2][2] += (zk * zk); - innerprod2[0][1] += (xk * yk); - innerprod2[0][2] += (xk * zk); - innerprod2[1][2] += (yk * zk); + innerprod[0][0] += (xk * xk); + innerprod[1][1] += (yk * yk); + innerprod[2][2] += (zk * zk); + innerprod[0][1] += (xk * yk); + innerprod[0][2] += (xk * zk); + innerprod[1][2] += (yk * zk); } - innerprod2[1][0] = innerprod2[0][1]; - innerprod2[2][0] = innerprod2[0][2]; - innerprod2[2][1] = innerprod2[1][2]; + innerprod[1][0] = innerprod[0][1]; + innerprod[2][0] = innerprod[0][2]; + innerprod[2][1] = innerprod[1][2]; - /* Mat3Print(cds->innerprod2); */ -} - - -/* compute the inner product of a symmetrical matrix, in place */ -void -MatInnerProdSymIp(double **mat, const int N) -{ - int i, j, k; - double **workmat = NULL; - - workmat = MatAlloc(N, N); - - /* InnerProd = M^T M */ - /* (i x k)(k x j) = (i x j) */ - for (i = 0; i < N; ++i) - { - for (j = 0; j < N; ++j) - { - workmat[i][j] = 0.0; - for (k = 0; k < N; ++k) - workmat[i][j] += (mat[i][k] * mat[j][k]); - } - } - - for (i = 0; i < N; ++i) - for (j = 0; j < N; ++j) - mat[i][j] = workmat[i][j]; - - MatDestroy(&workmat); -} - - -void -MatInnerProdSymOp(double **mato, const double **mati, const int N) -{ - int i, j, k; - - /* InnerProd = M^T M */ - /* (i x k)(k x j) = (i x j) */ - for (i = 0; i < N; ++i) - { - for (j = 0; j < N; ++j) - { - mato[i][j] = 0.0; - for (k = 0; k < N; ++k) - mato[i][j] += (mati[i][k] * mati[j][k]); - } - } + /* Mat3Print(cds->innerprod); */ } /* Calculates EDMA average of distance cds, with chi^2 variance correction for - bias. This is an unbiased estimate of the average cds, *assuming that the - cds are distributed normally*. + bias. This is an unbiased estimate of the average cds, *assuming that the + cds are distributed normally*. */ void CalcEDMADistMat(CdsArray *cdsA) @@ -359,7 +194,7 @@ int i, j, k; const int len = cdsA->vlen; double normalize, off_diagonal, on_diagonal, varsqr; - double **H, **distmat; + double **H = NULL, **distmat = NULL; normalize = 1.0 / (double) cdsA->cnum; @@ -448,13 +283,13 @@ void -CalcEDMADistMatOcc(CdsArray *cdsA) +CalcEDMADistMatNu(CdsArray *cdsA) { int i, j, k; const int len = cdsA->vlen, cnum = cdsA->cnum; double off_diagonal, on_diagonal, varsqr; - double **H, **distmat; - double occ, occsum; + double **H = NULL, **distmat = NULL; + double nu, nusum; /* set up H, the centering/normalizing matrix */ off_diagonal = -1.0 / (double) len; @@ -486,18 +321,18 @@ for (k = 0; k < j; ++k) { cdsA->Dij_matrix[j][k] = 0.0; - occsum = 0.0; + nusum = 0.0; for (i = 0; i < cnum; ++i) { - occ = cdsA->cds[i]->o[j] * cdsA->cds[i]->o[k]; - occsum += occ; - cdsA->Dij_matrix[j][k] += occ * cdsA->distmat->matrix[i][j][k]; + nu = cdsA->cds[i]->nu[j] * cdsA->cds[i]->nu[k]; + nusum += nu; + cdsA->Dij_matrix[j][k] += nu * cdsA->distmat->matrix[i][j][k]; } - if (occsum == 0.0) + if (nusum == 0.0) cdsA->Dij_matrix[j][k] = 0.0; else - cdsA->Dij_matrix[j][k] /= occsum; + cdsA->Dij_matrix[j][k] /= nusum; } } @@ -507,23 +342,23 @@ for (i = 0; i < len; ++i) for (j = 0; j < i; ++j) cdsA->Var_matrix[i][j] = 0.0; - + for (j = 0; j < len; ++j) { for (k = 0; k < j; ++k) { - occsum = 0.0; + nusum = 0.0; for (i = 0; i < cnum; ++i) { - occ = cdsA->cds[i]->o[j] * cdsA->cds[i]->o[k]; - occsum += occ; - cdsA->Var_matrix[j][k] += occ * mysquare(cdsA->distmat->matrix[i][j][k] - distmat[j][k]); + nu = cdsA->cds[i]->nu[j] * cdsA->cds[i]->nu[k]; + nusum += nu; + cdsA->Var_matrix[j][k] += nu * mysquare(cdsA->distmat->matrix[i][j][k] - distmat[j][k]); } - if (occsum == 0.0) + if (nusum == 0.0) cdsA->Var_matrix[j][k] = 0.0; else - cdsA->Var_matrix[j][k] /= occsum; + cdsA->Var_matrix[j][k] /= nusum; } } @@ -575,33 +410,33 @@ memcpy(tmpmat[0], mat[0], len*len*sizeof(double)); - for (i = 0; i < len; ++i) - { - tmp = 0.0; - for (k = 0; k < len; ++k) - tmp += mat[k][i]; + for (i = 0; i < len; ++i) + { + tmp = 0.0; + for (k = 0; k < len; ++k) + tmp += mat[k][i]; tmp *= off_diagonal; - for (j = 0; j < len; ++j) - tmpmat[j][i] += tmp; - } + for (j = 0; j < len; ++j) + tmpmat[j][i] += tmp; + } /* MatPrint(tmpmat, len); */ memcpy(mat[0], tmpmat[0], len*len*sizeof(double)); - for (i = 0; i < len; ++i) - { - tmp = 0.0; - for (k = 0; k < len; ++k) - tmp += tmpmat[i][k]; + for (i = 0; i < len; ++i) + { + tmp = 0.0; + for (k = 0; k < len; ++k) + tmp += tmpmat[i][k]; tmp *= off_diagonal; - for (j = 0; j < len; ++j) - mat[i][j] += tmp; - } + for (j = 0; j < len; ++j) + mat[i][j] += tmp; + } for (i = 0; i < len; ++i) for (j = 0; j < len; ++j) @@ -617,29 +452,29 @@ int i, j; double cen; - for (j = 0; j < len; ++j) - { - cen = 0.0; - for (i = 0; i < len; ++i) - cen += mat[i][j]; + for (j = 0; j < len; ++j) + { + cen = 0.0; + for (i = 0; i < len; ++i) + cen += mat[i][j]; cen /= len; - for (i = 0; i < len; ++i) - mat[i][j] -= cen; - } - - for (j = 0; j < len; ++j) - { - cen = 0.0; - for (i = 0; i < len; ++i) - cen += mat[j][i]; + for (i = 0; i < len; ++i) + mat[i][j] -= cen; + } + + for (j = 0; j < len; ++j) + { + cen = 0.0; + for (i = 0; i < len; ++i) + cen += mat[j][i]; cen /= len; - for (i = 0; i < len; ++i) - mat[j][i] -= cen; - } + for (i = 0; i < len; ++i) + mat[j][i] -= cen; + } for (i = 0; i < len; ++i) for (j = 0; j < len; ++j) @@ -660,8 +495,8 @@ const int len = cdsA->vlen; double normalize, tmpx, tmpy, tmpz; double **Dij_matrix = cdsA->Dij_matrix; - const Cds **cds = (const Cds **) cdsA->cds; - const Cds *cdsi; + const Cds **cds = (const Cds **) cdsA->cds; + const Cds *cdsi = NULL; normalize = 1.0 / cdsA->cnum; @@ -699,17 +534,17 @@ } -/* Same as CalcMLDistMat(), but weight by occupancy */ +/* Same as CalcMLDistMat(), but weight by nu */ void -CalcMLDistMatOcc(CdsArray *cdsA) +CalcMLDistMatNu(CdsArray *cdsA) { int i, j, k, m; const int len = cdsA->vlen; const int cnum = cdsA->cnum; - double occsum, occ, tmpx, tmpy, tmpz; + double nusum, nu, tmpx, tmpy, tmpz; double **Dij_matrix = cdsA->Dij_matrix; - const Cds **cds = (const Cds **) cdsA->cds; - const Cds *cdsi; + const Cds **cds = (const Cds **) cdsA->cds; + const Cds *cdsi = NULL; /* (1) calculate the symmetric j x k atom squared distance e^i(l,m) matrix for all structure Cds i */ /* (2) calculate the average squared distance matrix ave{e(l,m)} for the CdsArray */ @@ -717,30 +552,30 @@ { for (k = 0; k < j; ++k) { - cdsA->Dij_matrix[j][k] = 0.0; - occsum = 0.0; + Dij_matrix[j][k] = 0.0; + nusum = 0.0; for (i = 0; i < cnum; ++i) { cdsi = cds[i]; - occ = cdsi->o[j] * cdsi->o[k]; - occsum += occ; + nu = cdsi->nu[j] * cdsi->nu[k]; + nusum += nu; tmpx = cdsi->x[j] - cdsi->x[k]; tmpy = cdsi->y[j] - cdsi->y[k]; tmpz = cdsi->z[j] - cdsi->z[k]; - Dij_matrix[j][k] += occ * (tmpx*tmpx + tmpy*tmpy + tmpz*tmpz); + Dij_matrix[j][k] += nu * (tmpx*tmpx + tmpy*tmpy + tmpz*tmpz); } /* printf("\n%f", cdsA->Dij_matrix[j][k]); */ - if (occsum == 0.0) + if (nusum == 0.0) { Dij_matrix[j][k] = 0.0; } else - Dij_matrix[j][k] /= occsum; + Dij_matrix[j][k] /= nusum; - /* printf(" %f %f", occsum, cdsA->Dij_matrix[j][k]); */ + /* printf(" %f %f", nusum, cdsA->Dij_matrix[j][k]); */ } } @@ -779,230 +614,74 @@ const double *fx1 = cds1->x, *fy1 = cds1->y, *fz1 = cds1->z; const double *fx2 = cds2->x, *fy2 = cds2->y, *fz2 = cds2->z; double G1 = 0.0, G2 = 0.0; + A[0] = A[1] = A[2] = A[3] = A[4] = A[5] = A[6] = A[7] = A[8] = 0.0; + if (weight != NULL) - { - for (i = 0; i < len; ++i) - { - x1 = weight[i] * fx1[i]; - y1 = weight[i] * fy1[i]; - z1 = weight[i] * fz1[i]; - - G1 += x1 * fx1[i] + y1 * fy1[i] + z1 * fz1[i]; - - x2 = fx2[i]; - y2 = fy2[i]; - z2 = fz2[i]; - - G2 += weight[i] * (x2 * x2 + y2 * y2 + z2 * z2); - - A[0] += (x1 * x2); - A[1] += (x1 * y2); - A[2] += (x1 * z2); - - A[3] += (y1 * x2); - A[4] += (y1 * y2); - A[5] += (y1 * z2); - - A[6] += (z1 * x2); - A[7] += (z1 * y2); - A[8] += (z1 * z2); - } - } - else - { - for (i = 0; i < len; ++i) - { - x1 = fx1[i]; - y1 = fy1[i]; - z1 = fz1[i]; - - G1 += x1 * x1 + y1 * y1 + z1 * z1; - - x2 = fx2[i]; - y2 = fy2[i]; - z2 = fz2[i]; - - G2 += (x2 * x2 + y2 * y2 + z2 * z2); - - A[0] += (x1 * x2); - A[1] += (x1 * y2); - A[2] += (x1 * z2); - - A[3] += (y1 * x2); - A[4] += (y1 * y2); - A[5] += (y1 * z2); - - A[6] += (z1 * x2); - A[7] += (z1 * y2); - A[8] += (z1 * z2); - } - } - return (G1 + G2) * 0.5; -} + { + for (i = 0; i < len; ++i) + { + x1 = weight[i] * fx1[i]; + y1 = weight[i] * fy1[i]; + z1 = weight[i] * fz1[i]; + G1 += x1 * fx1[i] + y1 * fy1[i] + z1 * fz1[i]; -static -int FastCalcRMSDAndRotation(double *rot, double *A, double *msd, double E0, int len, double minScore) -{ - double Sxx, Sxy, Sxz, Syx, Syy, Syz, Szx, Szy, Szz; - double Szz2, Syy2, Sxx2, Sxy2, Syz2, Sxz2, Syx2, Szy2, Szx2, - SyzSzymSyySzz2, Sxx2Syy2Szz2Syz2Szy2, Sxy2Sxz2Syx2Szx2, - SxzpSzx, SyzpSzy, SxypSyx, SyzmSzy, - SxzmSzx, SxymSyx, SxxpSyy, SxxmSyy; - double C[4]; - int i; - double mxEigenV; - double oldg = 0.0; - double b, a, delta, ms; - double q1, q2, q3, q4, normq; - double d11, d12, d13, d14, d21, d22, d23, d24; - double d31, d32, d33, d34, d41, d42, d43, d44; - double a2, x2, y2, z2; - double xy, az, zx, ay, yz, ax; - double d3344_4334, d3244_4234, d3243_4233, d3143_4133,d3144_4134, d3142_4132; - - Sxx = A[0]; Sxy = A[1]; Sxz = A[2]; - Syx = A[3]; Syy = A[4]; Syz = A[5]; - Szx = A[6]; Szy = A[7]; Szz = A[8]; - - Sxx2 = Sxx * Sxx; - Syy2 = Syy * Syy; - Szz2 = Szz * Szz; - - Sxy2 = Sxy * Sxy; - Syz2 = Syz * Syz; - Sxz2 = Sxz * Sxz; - - Syx2 = Syx * Syx; - Szy2 = Szy * Szy; - Szx2 = Szx * Szx; - - SyzSzymSyySzz2 = 2.0*(Syz*Szy - Syy*Szz); - Sxx2Syy2Szz2Syz2Szy2 = Syy2 + Szz2 - Sxx2 + Syz2 + Szy2; - - C[2] = -2.0 * (Sxx2 + Syy2 + Szz2 + Sxy2 + Syx2 + Sxz2 + Szx2 + Syz2 + Szy2); - C[1] = 8.0 * (Sxx*Syz*Szy + Syy*Szx*Sxz + Szz*Sxy*Syx - Sxx*Syy*Szz - Syz*Szx*Sxy - Szy*Syx*Sxz); - - SxzpSzx = Sxz + Szx; - SyzpSzy = Syz + Szy; - SxypSyx = Sxy + Syx; - SyzmSzy = Syz - Szy; - SxzmSzx = Sxz - Szx; - SxymSyx = Sxy - Syx; - SxxpSyy = Sxx + Syy; - SxxmSyy = Sxx - Syy; - Sxy2Sxz2Syx2Szx2 = Sxy2 + Sxz2 - Syx2 - Szx2; - - C[0] = Sxy2Sxz2Syx2Szx2 * Sxy2Sxz2Syx2Szx2 - + (Sxx2Syy2Szz2Syz2Szy2 + SyzSzymSyySzz2) * (Sxx2Syy2Szz2Syz2Szy2 - SyzSzymSyySzz2) - + (-(SxzpSzx)*(SyzmSzy)+(SxymSyx)*(SxxmSyy-Szz)) * (-(SxzmSzx)*(SyzpSzy)+(SxymSyx)*(SxxmSyy+Szz)) - + (-(SxzpSzx)*(SyzpSzy)-(SxypSyx)*(SxxpSyy-Szz)) * (-(SxzmSzx)*(SyzmSzy)-(SxypSyx)*(SxxpSyy+Szz)) - + (+(SxypSyx)*(SyzpSzy)+(SxzpSzx)*(SxxmSyy+Szz)) * (-(SxymSyx)*(SyzmSzy)+(SxzpSzx)*(SxxpSyy+Szz)) - + (+(SxypSyx)*(SyzmSzy)+(SxzmSzx)*(SxxmSyy-Szz)) * (-(SxymSyx)*(SyzpSzy)+(SxzmSzx)*(SxxpSyy-Szz)); - - - mxEigenV = E0; - for (i = 0; i < 50; ++i) - { - oldg = mxEigenV; - x2 = mxEigenV*mxEigenV; - b = (x2 + C[2])*mxEigenV; - a = b + C[1]; - delta = ((a*mxEigenV + C[0])/(2.0*x2*mxEigenV + b + a)); - mxEigenV -= delta; - if (fabs(mxEigenV - oldg) < fabs((1e-6)*mxEigenV)) { - break; - } - } - if (i == 50) - { - fprintf(stderr," more than %d iterations needed, something wrong!\b", i); - } - - ms = 2.0 * (E0 - mxEigenV) / len; - (*msd) = ms; - - if (minScore > 0) - { - if (ms < minScore) + x2 = fx2[i]; + y2 = fy2[i]; + z2 = fz2[i]; + + G2 += weight[i] * (x2 * x2 + y2 * y2 + z2 * z2); + + A[0] += (x1 * x2); + A[1] += (x1 * y2); + A[2] += (x1 * z2); + + A[3] += (y1 * x2); + A[4] += (y1 * y2); + A[5] += (y1 * z2); + + A[6] += (z1 * x2); + A[7] += (z1 * y2); + A[8] += (z1 * z2); + } + } + else + { + for (i = 0; i < len; ++i) { - // Don't bother with rotation. - return -1; - } - } - - d11 = SxxpSyy + Szz-mxEigenV; d12 = SyzmSzy; d13 = - SxzmSzx; d14 = SxymSyx; - d21 = SyzmSzy; d22 = SxxmSyy - Szz-mxEigenV; d23 = SxypSyx; d24= SxzpSzx; - d31 = d13; d32 = d23; d33 = Syy-Sxx-Szz - mxEigenV; d34 = SyzpSzy; - d41 = d14; d42 = d24; d43 = d34; d44 = Szz - SxxpSyy - mxEigenV; - d3344_4334 = d33 * d44 - d43 * d34; d3244_4234 = d32 * d44-d42*d34; - d3243_4233 = d32 * d43 - d42 * d33; d3143_4133 = d31 * d43-d41*d33; - d3144_4134 = d31 * d44 - d41 * d34; d3142_4132 = d31 * d42-d41*d32; - q1 = d22*d3344_4334-d23*d3244_4234+d24*d3243_4233; - q2 = -d21*d3344_4334+d23*d3144_4134-d24*d3143_4133; - q3 = d21*d3244_4234-d22*d3144_4134+d24*d3142_4132; - q4 = -d21*d3243_4233+d22*d3143_4133-d23*d3142_4132; - - normq = sqrt(q1 * q1 + q2 * q2 + q3 * q3 + q4 * q4); - q1 /= normq; q2 /= normq; q3 /= normq; q4 /= normq; - - a2 = q1 * q1; - x2 = q2 * q2; - y2 = q3 * q3; - z2 = q4 * q4; - - xy = q2 * q3; - az = q1 * q4; - zx = q4 * q2; - ay = q1 * q3; - yz = q3 * q4; - ax = q1 * q2; - - rot[0] = a2 + x2 - y2 - z2; - rot[1] = 2 * (xy + az); - rot[2] = 2 * (zx - ay); - rot[3] = 2 * (xy - az); - rot[4] = a2 - x2 + y2 - z2; - rot[5] = 2 * (yz + ax); - rot[6] = 2 * (zx + ay); - rot[7] = 2 * (yz - ax); - rot[8] = a2 - x2 - y2 + z2; + x1 = fx1[i]; + y1 = fy1[i]; + z1 = fz1[i]; - return 0; -} + G1 += x1 * x1 + y1 * y1 + z1 * z1; + x2 = fx2[i]; + y2 = fy2[i]; + z2 = fz2[i]; -/* static void */ -/* CenterCds(Cds *cds, const int len) */ -/* { */ -/* int i; */ -/* double xsum, ysum, zsum; */ -/* double *x = cds->x, *y = cds->y, *z = cds->z; */ -/* */ -/* xsum = ysum = zsum = 0.0; */ -/* for (i = 0; i < len; ++i) */ -/* { */ -/* xsum += x[i]; */ -/* ysum += y[i]; */ -/* zsum += z[i]; */ -/* } */ -/* */ -/* xsum /= len; */ -/* ysum /= len; */ -/* zsum /= len; */ -/* */ -/* for (i = 0; i < len; ++i) */ -/* { */ -/* x[i] -= xsum; */ -/* y[i] -= ysum; */ -/* z[i] -= zsum; */ -/* } */ -/* } */ + G2 += (x2 * x2 + y2 * y2 + z2 * z2); + + A[0] += (x1 * x2); + A[1] += (x1 * y2); + A[2] += (x1 * z2); + + A[3] += (y1 * x2); + A[4] += (y1 * y2); + A[5] += (y1 * z2); + + A[6] += (z1 * x2); + A[7] += (z1 * y2); + A[8] += (z1 * z2); + } + } + + return (G1 + G2) * 0.5; +} static double -CalcRMSDRotationalMatrix(Cds *cds1, Cds *cds2, const int len, double *rot, const double *weight) +CalcRMSDRotMat(Cds *cds1, Cds *cds2, const int len, double *rot, const double *weight) { double A[9]; double rmsd; @@ -1024,39 +703,38 @@ /* Calculate the average coordinates from the average distance matrix as calculated in CalcEDMADistMat(CdsArray *cdsA) and CalcMLDistMat. This is a distance geometry embedding algorithm. - + See: Crippen and Havel (1978) Acta Cryst A34:282 "Stable calculation of coordinates from distance data." - + Gower, J.C (1966) Biometrika 53:3-4:325-338. "Some distance properties of latent root and vector methods used in multivariate analysis." - + Both the above refs give equivalent methods. Most mol biologists - know only the first, statisticians the second. - + know only the first, statisticians the second. + First, find the eigenvalues and eigenvectors of the NxN distance - matrix. Second, order them largest first. The first three + matrix. Second, order them largest first. The first three eigenvectors, multiplied by the sqrt of the corresponding eigenvalue, - are the x, y, and z coordinate vectors for the structure, respectively. + are the x, y, and z coordinate vectors for the structure, respectively. */ void EmbedAveCds(CdsArray *cdsA) -{ +{ int i; int vlen = cdsA->vlen; double w0, w1, w2; double **z = NULL; double *w = NULL; double deviation1, deviation2; - Cds *avecds = cdsA->avecds; - Cds *cds = cdsA->cds[0]; + Cds *avecds = cdsA->avecds; + Cds *cds = cdsA->cds[0]; /* Center/normalize with H, */ /* B(M) = -0.5 * H{Eu(M)}H */ /* based on Lele's three-step PCA algorithm given on page 581 Lele 1993 */ - DoubleCenterMat(cdsA->Dij_matrix, vlen); w = (double *) calloc(vlen, sizeof(double)); @@ -1073,36 +751,20 @@ avecds->x[i] = w2 * z[i][2]; avecds->y[i] = w1 * z[i][1]; avecds->z[i] = w0 * z[i][0]; - avecds->o[i] = 1.0; + avecds->nu[i] = 1; + avecds->mu[i] = 0; } /* WriteAveCdsFile(cdsA, "test.pdb"); */ /* check to see if the average structure has the wrong chirality, since embedding basically randomly reflects the structure */ - -/* deviation1 = ProcLAPACKSVDvan(cdsA->cds[0], */ -/* avecds, */ -/* cdsA->cds[0]->matrix, */ -/* cdsA->cds[0]->tmpmat3a, */ -/* cdsA->cds[0]->tmpmat3b, */ -/* cdsA->cds[0]->tmpmat3c, */ -/* cdsA->cds[0]->tmpvec3a); */ - - deviation1 = CalcRMSDRotationalMatrix(cds, avecds, cds->vlen, &cds->matrix[0][0], NULL); + deviation1 = CalcRMSDRotMat(cds, avecds, cds->vlen, &cds->matrix[0][0], NULL); for (i = 0; i < cdsA->vlen; ++i) avecds->x[i] = -avecds->x[i]; -/* deviation2 = ProcLAPACKSVDvan(cdsA->cds[0], */ -/* avecds, */ -/* cdsA->cds[0]->matrix, */ -/* cdsA->cds[0]->tmpmat3a, */ -/* cdsA->cds[0]->tmpmat3b, */ -/* cdsA->cds[0]->tmpmat3c, */ -/* cdsA->cds[0]->tmpvec3a); */ - - deviation2 = CalcRMSDRotationalMatrix(cds, avecds, cds->vlen, &cds->matrix[0][0], NULL); + deviation2 = CalcRMSDRotMat(cds, avecds, cds->vlen, &cds->matrix[0][0], NULL); if (deviation1 < deviation2) for (i = 0; i < cdsA->vlen; ++i) diff -Nru theseus-2.0.6/Embed.h theseus-3.0.0/Embed.h --- theseus-2.0.6/Embed.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/Embed.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -39,37 +39,16 @@ CalcLeleVariances(CdsArray *cdsA); void -NNxCds(double **mat, Cds *cds); - -void -CdsxNN(Cds *cds, const double **mat); - -double -RadGyrSqr(const Cds *cds); - -double -RadGyrSqrW(const Cds *cds, const double *weights); - -double -RadGyrSqrCov(Cds *cds, const double **weightmat); - -void CdsInnerProd(Cds *cds); void CdsInnerProd2(Cds *cds); void -MatInnerProdSymIp(double **mat, const int N); - -void -MatInnerProdSymOp(double **mato, const double **mati, const int N); - -void CalcEDMADistMat(CdsArray *cdsA); void -CalcEDMADistMatOcc(CdsArray *cdsA); +CalcEDMADistMatNu(CdsArray *cdsA); void LeleCenterMat(double **mat, const int len); @@ -81,7 +60,7 @@ CalcMLDistMat(CdsArray *cdsA); void -CalcMLDistMatOcc(CdsArray *cdsA); +CalcMLDistMatNu(CdsArray *cdsA); void EmbedAveCds(CdsArray *cdsA); diff -Nru theseus-2.0.6/Embed_local.h theseus-3.0.0/Embed_local.h --- theseus-2.0.6/Embed_local.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/Embed_local.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -35,4 +35,5 @@ #include "CovMat.h" #include "ProcGSLSVD.h" #include "DLTmath.h" -#include "Embed.h" +#include "qcprot.h" + diff -Nru theseus-2.0.6/Error.c theseus-3.0.0/Error.c --- theseus-2.0.6/Error.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/Error.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -39,27 +39,22 @@ { printf("\n"); printf(" Default usage is equivalent to: \n"); - printf(" theseus %s-a0 -e2 -g1 -i200 -k-1 -L0 -p1e-7 -v -P0%s your.pdb \n", tc_GREEN, tc_NC); + printf(" theseus %s-a0 -e2 -g1 -i200 -p1e-7 -v -P0%s your.pdb \n", tc_GREEN, tc_NC); printf("\n"); printf(" Expert options:\n"); - printf(" -b Bayesian reference prior on inverse gamma scale param \n"); printf(" -e embedding algorithm for initializing the average structure \n"); printf(" 0 = none; use randomly chosen model \n"); printf(" {%s2%s} = Maximum Likelihood \n", tc_GREEN, tc_NC); printf(" -g hierarchical model for variances \n"); printf(" 0 = none (may not converge) \n"); printf(" {%s1%s} = inverse gamma distribution \n", tc_GREEN, tc_NC); - printf(" -k constant minimum variance {%s-1%s} \n", tc_GREEN, tc_NC); - printf(" {if set to negative value, min var is determined empirically} \n"); printf(" -J print very expert options \n"); printf(" -n don't write transformed pdb file \n"); printf(" -o reference file to superimpose on, \n"); printf(" all rotations are relative to the first model in this file \n"); - printf(" -O Olve's segID file \n"); printf(" -p requested relative precision for convergence {%s1e-7%s} \n", tc_GREEN, tc_NC); printf(" -Z don't orient final superposition along principal axes \n"); printf(" -z n fixed shape parameter for inverse gamma distribution \n"); - printf(" -8 preserve atom names; don't try to fix them for PDB3 standards \n"); } @@ -73,25 +68,21 @@ printf(" -2 convert Lele file to PDB format \n"); printf(" -3 n:n scale and shape params for inverse gamma for random variances \n"); printf(" -4 n:n:n radius of gyration for Gaussian generated atoms \n"); + printf(" -5 TenBerge algorithm for mean \n"); printf(" -6 write Bob Edgar's SSM (structure similarity matrix) \n"); - printf(" -7 very specific fix for Lele's 5x5 structured covariance matrix, test data\n"); - printf(" -8 hierarchical translations (broken) \n"); printf(" -B read and write binary coordinate files \n"); printf(" 1 = read a PDB file, write a binary of it and quit \n"); printf(" 2 = read a binary, superimpose, and write a PDB \n"); printf(" 3 = read a PDB file, superimpose, and write a binary \n"); printf(" 4 = read a binary, superimpose, and write a binary \n"); - printf(" -d use ML dimensional/axes covariance weighting \n"); printf(" -G full 3D coordinate PCA (vector PCA is default) \n"); printf(" -H write 3D coordinate PCA morph files (for use with CNS/XPLOR) \n"); - printf(" -j # of times to bootstrap \n"); + printf(" -j # of landmarks for random coords generation \n"); printf(" -K n number of mixtures for mixture Gaussian \n"); - printf(" -m Procrustes rotation method \n"); printf(" -N do a \"null run\" -- no superposition \n"); - printf(" -Q do PCA across models (find families of models) \n"); - printf(" -q axes variances for random structure generation (e.g. -q1:2:3) \n"); + printf(" -O Olve's segID file \n"); + printf(" -Q n scale all coordinates by this factor \n"); printf(" -R0 randomly translate and rotate before superpositioning \n"); - printf(" -t weight by B-factor as a Bayesian prior \n"); printf(" -T n number of threads \n"); printf(" -U print logL for each iteration \n"); printf(" -u calculate bias-corrected average structure \n"); @@ -99,7 +90,7 @@ printf(" -X seed the algorithm with the superposition in the file \n"); printf(" -x no iterations in inner loop (superimposing to average) \n"); printf(" -y don't calculate the average structure \n"); - printf(" -Y print extra multivariate statistics, MV normality tests, etc. \n"); + printf(" -Y print extra stats, gyration radii, Durbin-Watson autocorrelation \n"); } @@ -107,7 +98,7 @@ Usage(int expert) { // int vers_major, vers_minor, vers_patch; - + // ilaver_(&vers_major, &vers_minor, &vers_patch); PrintTheseusPre(); @@ -122,10 +113,11 @@ printf(" 3 = alpha and beta carbons \n"); printf(" 4 = all heavy atoms (all but hydrogens) \n"); printf(" or \n"); - printf(" a colon-delimited string explicitly specifying the atom-types PDB-style\n"); + printf(" a colon-delimited string specifying the atom-types PDB-style \n"); printf(" e.g., -a' CA : N ' \n"); printf(" selects the alpha carbons and backone nitrogens \n"); printf(" -c use ML atomic covariance weighting (fit correlations, slower) \n"); + printf(" -d calculate scale factors (for morphometrics) \n"); printf(" -f only read the first model of a multi-model PDB file \n"); printf(" -i maximum iterations {%s200%s} \n", tc_GREEN, tc_NC); printf(" -l superimpose with conventional least squares method \n"); @@ -141,13 +133,14 @@ printf(" -h help/usage \n"); printf(" -I just calculate statistics for input file (don't superposition) \n"); printf(" -M file that maps sequences in the alignment file to PDB files \n"); + printf(" -q read and write Rohlf TPS morphometric landmark files \n"); printf(" -r root name for output files {%stheseus%s} \n", tc_GREEN, tc_NC); printf(" -V version \n"); printf("\n Principal components analysis: \n"); printf(" -C use covariance matrix for PCA (correlation matrix is default) \n"); printf(" -P # of principal components to calculate {%s0%s} \n", tc_GREEN, tc_NC); - if (expert == 1 || expert == 2) + if (expert) Expert1(); if (expert == 2) @@ -211,6 +204,6 @@ { printf("\n THESEUS version %s compiled on %s %s\n by user %s with machine \"%s\" \n\n", VERSION, __DATE__, __TIME__, getenv("USER"), getenv("HOST")); - printf(" Compiled with GSL version %s\n", GSL_VERSION); + printf(" Compiled with GSL version %s\n\n", GSL_VERSION); fflush(NULL); } diff -Nru theseus-2.0.6/Error.h theseus-3.0.0/Error.h --- theseus-2.0.6/Error.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/Error.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -26,7 +26,7 @@ #ifndef ERROR_SEEN #define ERROR_SEEN -#define VERSION "2.0.6" +#define VERSION "3.0.0" void Version(void); diff -Nru theseus-2.0.6/examples/d1cih__.pdb.fst theseus-3.0.0/examples/d1cih__.pdb.fst --- theseus-2.0.6/examples/d1cih__.pdb.fst 1970-01-01 00:00:00.000000000 +0000 +++ theseus-3.0.0/examples/d1cih__.pdb.fst 2014-05-13 16:48:52.000000000 +0000 @@ -0,0 +1,3 @@ +>d1cih__.pdb +TEFKAGSAKKGATLFKTRCLQCHTVEKGGPHKVGPNLHGIFGAHSGQAEGYSYTDAIIKKNVLWDENNMSEY +LTNPKKYIPGTKMASGGLKKEKDRNDLITYLKKAAE diff -Nru theseus-2.0.6/examples/d1crj__.pdb.fst theseus-3.0.0/examples/d1crj__.pdb.fst --- theseus-2.0.6/examples/d1crj__.pdb.fst 1970-01-01 00:00:00.000000000 +0000 +++ theseus-3.0.0/examples/d1crj__.pdb.fst 2014-05-13 16:48:52.000000000 +0000 @@ -0,0 +1,3 @@ +>d1crj__.pdb +TEFKAGSAKKGATLFKTRCLQCHTVEKGGPHKVGPNLHGIFGRHSGQAEGYSYTDAIIKKNVLWDENNMSEF +LTNPKKYIPGTKMAFGGLKKEKDRNDLITYLKKATE diff -Nru theseus-2.0.6/FragCds.c theseus-3.0.0/FragCds.c --- theseus-2.0.6/FragCds.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/FragCds.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -23,18 +23,12 @@ -/_|:|_|_\- */ -/* -/_|:|_|_\- */ -#if __STDC__ != 1 - #error NOT a Standard C environment -#endif #include #include #include #include #include #include -//#include "Error.h" -//#include "DLTutils.h" #include "DLTmath.h" #include "FragCds.h" @@ -43,7 +37,7 @@ *FragCdsAlloc(int fraglen) { int i; - FragCds *frag; + FragCds *frag = NULL; frag = (FragCds *) malloc(sizeof(FragCds)); diff -Nru theseus-2.0.6/FragCds.h theseus-3.0.0/FragCds.h --- theseus-2.0.6/FragCds.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/FragCds.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/FragDist.c theseus-3.0.0/FragDist.c --- theseus-2.0.6/FragDist.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/FragDist.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -40,7 +40,7 @@ { int coord1, coord2; int i, k, offset, tmp, num, count; - FragCds *frag1 = NULL, *frag2 = NULL; + FragCds *frag1 = NULL, *frag2 = NULL; double *coeff = NULL; double var; FILE *distfile = NULL, *distfile2 = NULL; diff -Nru theseus-2.0.6/FragDist.h theseus-3.0.0/FragDist.h --- theseus-2.0.6/FragDist.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/FragDist.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/gaussian_sim_marginal.c theseus-3.0.0/gaussian_sim_marginal.c --- theseus-2.0.6/gaussian_sim_marginal.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/gaussian_sim_marginal.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,2626 +0,0 @@ -/* - gaussian_sim_marginal - - Copyright (C) 2013 Douglas L. Theobald - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - - -/_|:|_|_\- -*/ -/****************************************************************************** - * - * File: gaussian_sim_marginal.c - * - * Function: - * - * Author(s): Douglas L. Theobald - * - * Copyright: Copyright (c) 2013 Douglas L. Theobald - * All Rights Reserved. - * - * Source: Started anew. - * - * Notes: - * - * Change History: - * 2011_04_15_nnn Started source - * - *****************************************************************************/ -// gcc -O3 -ffast-math -Wall -Werror -std=c99 -pedantic -o gaussian_sim_marginal -lgsl -lgslcblas gaussian_sim_marginal.c - - -#include -#include -#include -#include -#ifdef __linux__ - #include -#endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define LN2PI (M_LN2 + M_LNPI) - -double burnin = 0.0; -int iters = 1000000; -double nu = 1.0; -int dim = 1; /* number of params */ -int hdim = 1; /* # hierarchical params */ -int ndata = 100; -int nd; -double lambda_0 = 1.0; -unsigned long int seed = 0; -int expo_model = 0; -int gauss_model = 0; -int write_files = 0; -int thrdnum = 1; -int parallel = 0; -int entropy_calc = 0; - -double *pave = NULL; /* for use with CalcPCov and CalcPAve */ -double *y = NULL; /* posterior param average */ -double *h = NULL; /* hyperparameter */ -double yt, yt2, x2t; -double *musim = NULL; -double *lnpost = NULL; -double *lnlike = NULL; -double *lnprior = NULL; -double **x = NULL; /* posterior sample */ -double *x2 = NULL; -double **data = NULL; /* data */ -double **cov = NULL; - -double avelnlike = 0.0, avelnprior = 0.0, varlnpost = 0.0; -double avelnprlk2 = 0.0, avelnpost = 0.0, avelnprlk = 0.0; - - -void -Usage(void); - - -void -VecPrint(double *vec, const int size) -{ - int i; - - for (i = 0; i < size; ++i) - printf(" %4d [ % 14.8e ]\n", i, vec[i]); - - printf("\n"); - - fflush(NULL); -} - - -void -MatPrintLowerDiag(double **matrix, const int size) -{ - int i, j; - - printf("\n\n"); - for (i = 0; i < size; ++i) - { - printf("%-2d: [", i); - for (j = 0; j <= i; ++j) - printf(" % 14.6f", matrix[i][j]); - printf(" ]\n"); - } - - printf(" "); - for (i = 0; i < size; ++i) - printf(" % 14d", i); - printf("\n"); - - fflush(NULL); -} - - -void -MatPrint(double **matrix, const int size) -{ - int i, j; - - printf("\n\n"); - for (i = 0; i < size; ++i) - { - printf("%-2d: [", i); - for (j = 0; j < size; ++j) - printf(" % 14.6f", matrix[i][j]); - printf(" ]\n"); - } - - printf(" "); - for (i = 0; i < size; ++i) - printf(" % 14d", i); - printf("\n"); - - fflush(NULL); -} - - -void -MatDestroy(double ***matrix_ptr) -{ - double **matrix = *matrix_ptr; - - if (matrix != NULL) - { - if (matrix[0] != NULL) - { - free(matrix[0]); - matrix[0] = NULL; - } - - free(matrix); - *matrix_ptr = NULL; - } -} - - -double -**MatAlloc(const int rows, const int cols) -{ - int i; - double **matrix = NULL; - double *matspace = NULL; - - matspace = (double *) calloc((rows * cols), sizeof(double)); - if (matspace == NULL) - { - perror("\n ERROR"); - printf("\n ERROR: Failure to allocate matrix space in MatAlloc(): (%d x %d)\n", rows, cols); - exit(EXIT_FAILURE); - } - - /* allocate room for the pointers to the rows */ - matrix = (double **) malloc(rows * sizeof(double *)); - if (matrix == NULL) - { - perror("\n ERROR"); - printf("\n ERROR: Failure to allocate room for row pointers in MatAlloc(): (%d)\n", rows); - exit(EXIT_FAILURE); - } - - /* now 'point' the pointers */ - for (i = 0; i < rows; i++) - matrix[i] = matspace + (i * cols); - - return(matrix); -} - - -/* -Calculate eigenvalues of a square, symmetric, real matrix, using GSL. -Eigenvalues are returned in descending order, largest first. -Pointer *eval must be allocated. -Input matrix **cov is NOT perturbed. -*/ -void -EigenvalsGSL(double **cov, const int dim, double *eval) -{ - double *cov_cpy = NULL; - - cov_cpy = malloc(dim * dim * sizeof(double)); - memcpy(cov_cpy, cov[0], dim * dim * sizeof(double)); - gsl_matrix_view m = gsl_matrix_view_array(cov_cpy, dim, dim); - gsl_vector_view evalv = gsl_vector_view_array(eval, dim); - gsl_eigen_symm_workspace *w = gsl_eigen_symm_alloc(dim); - - gsl_eigen_symm(&m.matrix, &evalv.vector, w); - - gsl_eigen_symm_free(w); - free(cov_cpy); -} - - -/* This one destroys half of the input matrix **cov */ -void -EigenvalsGSLDest(double **cov, const int dim, double *eval) -{ - gsl_matrix_view m = gsl_matrix_view_array(cov[0], dim, dim); - gsl_vector_view evalv = gsl_vector_view_array(eval, dim); - gsl_eigen_symm_workspace *w = gsl_eigen_symm_alloc(dim); - gsl_eigen_symm(&m.matrix, &evalv.vector, w); - gsl_eigen_symm_free(w); -} - - -void -CholeskyGSLDest(double **mat, const int dim) -{ - gsl_matrix_view m = gsl_matrix_view_array(mat[0], dim, dim); - gsl_linalg_cholesky_decomp(&m.matrix); -} - - -static void -RandFillVec(double *vec, int len, int randmeth, const gsl_rng *r2) -{ - int j; - - for (j = 0; j < len; ++j) - { - switch (randmeth) - { - case 1: - case 'n': /* normal */ - //vec[j] = normal_dev(0.0, 1.0); - vec[j] = gsl_ran_gaussian(r2, 1.0); - /* printf("\n%f", vec[j]); */ - break; - case 2: - case 'l': /* logistic */ - vec[j] = gsl_ran_logistic(r2, 1.0); - break; - case 3: - case 'L': /* Laplacian */ - vec[j] = gsl_ran_laplace(r2, 1.0); - break; - case 4: - case 'C': /* Cauchy */ - vec[j] = gsl_ran_cauchy(r2, 1.0); - break; - case 5: - case 'g': /* gamma */ - vec[j] = gsl_ran_gamma(r2, 1.0, 1.0); - break; -// case 6: -// case 'W': /* Wald = inverse gaussian w/ 1 */ -// invgauss_dev(1.0, 1.0, r2); -// break; -// case 7: -// case 'p': /* thirdOpoly */ -// vec[j] = thirdOpoly_dev(b, c, d, r2); -// printf("%f\n", vec[j]); -// break; -// case 8: -// case 'i': /* inverse gaussian w/ 1 */ -// vec[j] = invgauss_dev(3.0, 1.0, r2); -// break; -// case 9: -// case 'E': /* EVD */ -// /* a = -0.57722 * b; */ -// vec[j] = EVD_dev(0.0, 1.0, r2); -// break; -// case 10: -// case 'c': /* chi-squared */ -// vec[j] = chisqr_dev(1.0, 0.0, r2); -// break; -// case 11: -// case 'R': /* Rayleigh - same as Weibull w/2 */ -// vec[j] = weibull_dev(1.0, 2.0, r2); -// break; - case 12: - case 'e': /* exponential */ - vec[j] = gsl_ran_exponential(r2, 1.0); - break; - default: - printf("\n ERROR888: Bad random param -R '%c' \n", - (char) randmeth); - Usage(); - exit(EXIT_FAILURE); - } - } -} - - -double -RandScale(double variance, int randmeth, double b) -{ - double scale; - - switch(randmeth) - { - case 1: - case 'n': /* normal */ - scale = sqrt(variance); - break; - case 2: - case 'l': /* logistic */ - scale = sqrt(3.0 * variance) / M_PI; - break; - case 3: - case 'L': /* Laplacian */ - scale = sqrt(variance / 2.0); - break; - case 4: - case 'C': /* Cauchy */ - scale = 1; - break; - case 5: - case 'g': /* gamma */ - scale = sqrt(variance / b); - break; -// case 6: -// case 'W': /* Wald = inverse gaussian w/ 1 */ -// scale = 1.0 / variance; -// break; -// case 7: -// case 'p': /* thirdOpoly */ -// scale = sqrt(variance); -// break; -// case 8: -// case 'i': /* inverse gaussian w/ 1 */ -// a = 3.0; -// scale = a*a*a / variance; -// break; -// case 9: -// case 'E': /* EVD */ -// scale = sqrt(6.0 * variance) / M_PI; -// break; -// case 10: -// case 'c': /* chi-squared */ -// scale = variance / 2.0; -// break; -// case 11: -// case 'R': /* Rayleigh - same as Weibull w/2 */ -// scale = sqrt(variance/(2.0 - (M_PI / 2.0))); -// break; - case 12: - case 'e': /* exponential */ - scale = sqrt(variance); - break; - default: - scale = sqrt(variance); - } - - return(scale); -} - - -void -RandVec(double **vec, const int len, const int iters, const gsl_rng *r2) -{ - int i, j, k; - double **covmat = MatAlloc(len, len); - double **cormat = MatAlloc(len, len); - double **tmpmat = MatAlloc(len, len); - double *diag = malloc(len * sizeof(double)); - double *eval = malloc(len * sizeof(double)); - double **tmpvec = MatAlloc(len, iters); - double lndet; - - for (i = 0; i < len; ++i) - for (j = 0; j < i; ++j) - tmpmat[i][j] = gsl_ran_flat(r2, -1.0, 1.0); - - for (i = 0; i < len; ++i) - tmpmat[i][i] = gsl_ran_flat(r2, 0.0, 1.0); - - MatPrintLowerDiag(tmpmat, len); - - for (i = 0; i < len; ++i) - for (j = 0; j < len; ++j) - for (k = 0; k < len; ++k) - cormat[i][k] += tmpmat[i][j] * tmpmat[k][j]; - - printf("\n\"correlation matrix\":"); - MatPrintLowerDiag(cormat, len); - -// PrintCovMatGnuPlot((const double **) covmat, len, mystrcat(cdsA->algo->rootname, "_cor.mat")); - - for (i = 0; i < len; ++i) - diag[i] = gsl_ran_gamma(r2, 2.0, 10.0); - - for (i = 0; i < len; ++i) - for (j = 0; j < len; ++j) - covmat[i][j] = cormat[i][j] * sqrt(diag[i] * diag[j]); - - for (i = 0; i < len; ++i) - covmat[i][i] += 1.0; - - printf("\ncovariance matrix:"); - MatPrintLowerDiag(covmat, len); - - for (i = 0; i < len; ++i) - diag[i] = covmat[i][i]; - - printf("\nvariances:\n"); - - for (i = 0; i < len; ++i) - printf("%-3d %f\n", i, diag[i]); - - for (i = 0; i < len; ++i) - for (j = 0; j < len; ++j) - cormat[i][j] = covmat[i][j] / sqrt(diag[i] * diag[j]); - - printf("\ntrue correlation matrix:"); - MatPrintLowerDiag(cormat, len); - -// EigenvalsGSL(cormat, len, eval); -// -// printf("\neigenvalues:\n"); -// -// for (i = 0; i < len; ++i) -// printf("%-3d %f\n", i, eval[i]); -// -// lndet = 0.0; -// for(i = 0; i < len; ++i) -// lndet += log(eval[i]); -// -// printf("logdet: %f\n", lndet); - - EigenvalsGSL(covmat, len, eval); - - printf("\neigenvalues:\n"); - - for (i = 0; i < len; ++i) - printf("%-3d %f\n", i, eval[i]); - - lndet = 0.0; - for(i = 0; i < len; ++i) - lndet += log(eval[i]); - - printf("logdet: %f\n", lndet); - - double entropy = 0.5 * len * log(2.0 * M_PI * M_E) + 0.5 * lndet; - printf("\nentropy: %14.3f", entropy); - - CholeskyGSLDest(covmat, len); - printf("\nCholesky lower diagonal matrix:"); - MatPrintLowerDiag(covmat, len); - - fflush(NULL); - - for (i = 0; i < len; ++i) - RandFillVec(tmpvec[i], iters, 1, r2); - - for (i = 0; i < iters; ++i) - for (j = 0; j < len; ++j) - for (k = 0; k <= j; ++k) /* because covmat is lower diagonal, uppper should be all zeros */ - vec[j][i] += covmat[j][k] * tmpvec[k][i]; - -// for (i = 0; i < iters; ++i) -// { -// printf("UNIFORM %4d", i); -// for (j = 0; j < len; ++j) -// printf(" %14.10f", erf(vec[j][i]/sqrt(2.0))); -// printf("\n"); -// } -// fflush(NULL); - - MatDestroy(&tmpvec); - MatDestroy(&tmpmat); - MatDestroy(&covmat); - MatDestroy(&cormat); - free(diag); - free(eval); -} - - -/* -Calculate harmonic mean estimator, which should never be used, but we determine it for fun -and to see how bad it actually is. -As boni, we get the log arithmetic mean likelihood and log geometric mean likelihood. -*/ -double -CalcHarmonicMean(const double *ll, const int len) -{ - double blik, mlik, hmlik, amlik, diff, ediff, liksi, harm_mean, var, tmp; - int i; - - /* first center the log-likelihoods, as the likelihoods are probably too small to represent. */ - blik = 0.0; - for (i = 0; i < len; ++i) - blik += ll[i]; - - blik /= len; - - mlik = hmlik = amlik = 0.0; - for (i = 0; i < len; ++i) - { - liksi = ll[i]; - diff = liksi - blik; - ediff = exp(diff); - - if (isfinite(ediff)) - { - mlik += ediff; - hmlik += 1.0 / ediff; - } - - amlik += liksi; - } - - amlik /= len; - - var = 0.0; - for (i = 0; i < len; ++i) - { - tmp = ll[i] - amlik; - var += tmp*tmp; - } - - var /= len; - -/* - if (badsamp > 0) - printf("\nWARNING: %d samples excluded, not finite\n", badsamp); -*/ - harm_mean = blik - log(hmlik) + log(len); - printf("\n%-22s% 14d", "samples:", len); - - printf("\n%-22s% 16.4f", "log arithmetic mean:", log(mlik / len) + blik); - printf("\n%-22s% 16.4f", "log geometric mean:", amlik); - printf("\n%-22s% 16.4f", "log harmonic mean:", harm_mean); - printf("\n%-22s% 16.4f", "variance of log like:", var); - printf("\n%-22s% 16.4f", "log normal estimate:", amlik - 0.5 * var); - printf("\n%-22s% 16.4f", "DIC:", amlik - var); - //printf("\n%-22s% 16.4f", "BICM_19 estimate:", amlik + var - 0.5 * dim * log(ndata)); - printf("\n%-22s% 16.4f", "BICM_20 estimate:", amlik - var * (log(ndata) - 1.0)); - printf("\n%-22s% 16.4f", "BICM_DLT estimate:", amlik - var * log(ndata) + var * (ndata-1.0)/ndata); - //printf("\n%-22s% 16.4f", "BICM_DLT2 estimate:", amlik - var * (log(ndata)-1.0) + var * (ndata-1.0)/ndata); - printf("\n"); - fflush(NULL); - - return(harm_mean); -} - - -double -average(const double *data, const int dim) -{ - double m = 0.0; - int i = dim; - - while(i-- > 0) - m += *data++; - - return(m / (double) dim); -} - - -double -variance(const double *data, const int dim, const double mean) -{ - double v = 0.0, tmpv; - int i = dim; - - while(i-- > 0) - { - tmpv = *data++ - mean; - v += (tmpv * tmpv); - } - - return(v / dim); -} - - -/* -Calculate the bias in the entropy estimate due to deviation from Normality. -Based on on Edgeworth expansion of a PDF in terms of its cumulants (moments). -The bias term is substracted from the usual multivariate Gaussian -entropy: - -0.5 * d * log(2.0 * M_PI * M_E) + 0.5 * lndet - -where lndet is the log of the determinant of the d*d covariance matrix. - -Multivariate third order corrections (using the skewness) come from Van Hulle 2005: - -See: - -Marc M. Van Hulle (2005) -"Multivariate Edgeworth-based entropy estimation." -2005 IEEE Workshop on Machine Learning for Signal Processing, -Conference Proceedings -28-28 Sept. 2005 -pp 311 - 316 - -or - -Marc M. Van Hulle (2005) -"Edgeworth Approximation of Multivariate Differential Entropy" -Neural Computation 17, 1903–1910 - -See equation 2.2. - -The fourth order corrections (kurtosis terms) are univariate only; -they don't account for cross-kurtosis between dimensions. -Fourth order corrections are from Comon 1994: - -Comon, P. (1994) -"Independent component analysis, a new concept?" -Signal processing 36, 287–314. - -Amari 1996 also has similar 4th order corrections, but they seem to be -wrong: - -Amari, S.-I., Cichocki, A. and Yang, H. H. (1996) -"A new learning algorithm for blind signal separation." -Advances in neural information processing systems 8 -Eds. D. Touretzky, M. Mozer, and M. Hasselmo. -MIT Press, Cambridge. -757–763 (1996). -*/ -double -CalcEdgeworthVanHulleEntropy(double **vec, int dim, int len) -{ - int i, j, k, m; - double *ave = NULL; - double *std = NULL; - double *eval = NULL; - double **dif = MatAlloc(dim,len); - double term1, term2, term3; - double term4, term5, term6; - double t3, t4; - double kappa_iii, kappa_iij, kappa_ijk; - double kappa_iiii; - double entropy, bias, lnscale, lndet, sum, var; - double **cor = MatAlloc(dim,dim); - double **cov = MatAlloc(dim,dim); - double invlen = 1.0/(len-1); - - - ave = malloc(dim * sizeof(double)); - std = malloc(dim * sizeof(double)); - eval = malloc(dim * sizeof(double)); - - /* First, normalize data vector to 0 mean, unit 1 variance */ - for (i = 0; i < dim; ++i) - ave[i] = average(vec[i], len); - - //VecPrint(ave, dim); - - for (i = 0; i < dim; ++i) - for (j = 0; j < len; ++j) - dif[i][j] = vec[i][j] - ave[i]; - - for (i = 0; i < dim; ++i) - { - var = 0.0; - for (j = 0; j < len; ++j) - var += dif[i][j] * dif[i][j]; - - std[i] = sqrt(var * invlen); - } - - //VecPrint(std, dim); - - /* Save the determinant of the scale transformation */ - lnscale = 0.0; - for (i = 0; i < dim; ++i) - lnscale += log(std[i]); - - /* rescale centered data */ - for (i = 0; i < dim; ++i) - std[i] = 1.0 / std[i]; - - for (i = 0; i < dim; ++i) - for (j = 0; j < len; ++j) - dif[i][j] *= std[i]; - - /* Calculate the covariance matrix of transformed data (= correlation matrix) */ - for (i = 0; i < dim; ++i) - { - for (j = 0; j <= i; ++j) - { - sum = 0.0; - for (k = 0; k < len; ++k) - sum += dif[i][k] * dif[j][k]; - - cor[i][j] = cor[j][i] = sum * invlen; - } - } - -// printf ("\n\nEdgeworth correlation matrix:"); -// MatPrintLowerDiag(cor, dim); -// -// for (i = 0; i < dim; ++i) -// for (j = 0; j < dim; ++j) -// cov[i][j] = cor[i][j] / (std[i] * std[j]); -// -// printf ("\n\nEdgeworth covariance matrix:"); -// MatPrintLowerDiag(cov, dim); - - EigenvalsGSL(cor, dim, eval); - - VecPrint(eval, dim); - - lndet = 0.0; - for (i = 0; i < dim; i++) - { - if (isgreater(eval[i], DBL_EPSILON)) - { - lndet += log(eval[i]); - } - else - { - printf("\n WARNING: excluding eigenvalue %d from determinant calculation", i); - printf("\n WARNING: eigenvalue[%d] = %g < %g", i, eval[i], FLT_EPSILON); - } - } - - term1 = 0.0; - term4 = 0.0; - term5 = 0.0; - term6 = 0.0; - for (i = 0; i < dim; ++i) - { - kappa_iii = 0.0; - kappa_iiii = 0.0; - for (j = 0; j < len; ++j) - { - t3 = dif[i][j] * dif[i][j] * dif[i][j]; - kappa_iii += t3; /* skewness */ - kappa_iiii += t3 * dif[i][j]; /* kurtosis */ - } - - kappa_iii *= invlen; - kappa_iiii *= invlen; - kappa_iiii -= 3.0; - - t3 = kappa_iii * kappa_iii; - t4 = kappa_iiii * kappa_iiii; - term1 += t3; - term4 += t4; - term5 += t3*t3; - term6 += t3 * kappa_iiii; - } - - term2 = 0.0; - for (i = 0; i < dim; ++i) - { - for (j = 0; j < dim; ++j) - { - if (i != j) - { - kappa_iij = 0.0; - for (k = 0; k < len; ++k) - kappa_iij += dif[i][k] * dif[i][k] * dif[j][k]; - - kappa_iij *= invlen; - - term2 += kappa_iij * kappa_iij; - } - } - } - - term3 = 0.0; - for (i = 0; i < dim; ++i) - { - for (j = 0; j < i; ++j) - { - for (k = 0; k < j; ++k) - { - kappa_ijk = 0.0; - for (m = 0; m < len; ++m) - kappa_ijk += dif[i][m] * dif[j][m] * dif[k][m]; - - kappa_ijk *= invlen; - - term3 += kappa_ijk * kappa_ijk; - } - } - } - - /* There are d \kappa_{i,i,i} terms, 2 {d \choose 2} \kappa_{i,i,j} terms, - and {d \choose 3} \kappa_{i,j,k} terms. - gsl_sf_choose (unsigned int n, unsigned int m) */ - - /* The following is based on Comon, P. (1994) Signal processing 36, 287–314. - See eqn 3.4 (Theorem 14). - The similar equations (7 & 8) in Amari, Cichocki, and Yang (1996) seem to be wrong. */ - - bias = (term1 + 3.0 * term2 + term3 / 6.0) / 12.0 + term4/48.0 + 7.0*term5/48.0 - term6/8.0; - - printf("\nEdgeworth term1: %g", term1/ 12.0); - printf("\nEdgeworth term2: %g", 3.0*term2/ 12.0); - printf("\nEdgeworth term3: %g", term3/(6.0*12.0)); - printf("\nEdgeworth term4: %g", +term4/48.0); - printf("\nEdgeworth term5: %g", +7.0*term5/48.0); - printf("\nEdgeworth term6: %g\n", - term6/8.0); - - printf("\nln(det): %14.3f", lndet); - - entropy = 0.5 * dim * log(2.0 * M_PI * M_E) + 0.5 * lndet; - - printf("\nwhite entropy: %14.3f", entropy); - printf("\nbias: %14.3f", bias); - printf("\nln(scale): %14.3f", lnscale); - - printf("\nNaive N-entropy: %14.3f", entropy + lnscale); - - //entropy = entropy - bias + lnscale; - - printf("\nEdgeworth entropy: %14.3f", entropy - term1/12.0 + lnscale); - printf("\nEdgeworth entropy (4th order): %14.3f", entropy - bias + lnscale); - printf("\n\n"); - - entropy = entropy - bias + lnscale; - -// /* From eqns (7 & 8) in Amari, Cichocki, and Yang (1996). -// Seems to be wrong. */ -// term1 = 0.0; -// term4 = 0.0; -// term5 = 0.0; -// term6 = 0.0; -// for (i = 0; i < dim; ++i) -// { -// kappa_iii = 0.0; -// kappa_iiii = 0.0; -// for (j = 0; j < len; ++j) -// { -// t3 = dif[i][j] * dif[i][j] * dif[i][j]; -// kappa_iii += t3; /* skewness */ -// kappa_iiii += t3 * dif[i][j]; /* kurtosis */ -// } -// -// kappa_iii *= invlen; -// kappa_iiii *= invlen; -// kappa_iiii -= 3.0; -// -// t3 = kappa_iii * kappa_iii; -// t4 = kappa_iiii * kappa_iiii; -// term1 += t3; -// term4 += t4; -// term5 += t4 * kappa_iiii; // k_4^3; -// term6 += t3 * kappa_iiii; // k_3^2 k_4 -// } -// -// bias = (term1 + 3.0 * term2 + term3 / 6.0) / 12.0 + term4/48.0 - term5/16.0 - 5.0*term6/8.0; -// -// printf("\nEdgeworth term1: %g", term1/ 12.0); -// printf("\nEdgeworth term2: %g", 3.0*term2/ 12.0); -// printf("\nEdgeworth term3: %g", term3/(6.0*12.0)); -// printf("\nEdgeworth term4: %g", +term4/48.0); -// printf("\nEdgeworth term5: %g", -term5/16.0); -// printf("\nEdgeworth term6: %g\n", - 5.0*term6/8.0); -// -// printf("\nln(det): %14.3f", lndet); -// -// entropy = 0.5 * dim * log(2.0 * M_PI * M_E) + 0.5 * lndet; -// -// printf("\nwhite entropy: %14.3f", entropy); -// printf("\nbias: %14.3f", bias); -// printf("\nln(scale): %14.3f", lnscale); -// -// printf("\nNaive N-entropy: %14.3f", entropy + lnscale); -// -// //entropy = entropy - bias + lnscale; -// -// printf("\nEdgeworth entropy: %14.3f", entropy - term1/12.0 + lnscale); -// printf("\nEdgeworth entropy: %14.3f", entropy - bias + lnscale); -// printf("\n\n"); - - MatDestroy(&dif); - MatDestroy(&cor); - MatDestroy(&cov); - free(eval); - free(std); - free(ave); - - return(entropy); -} - - -void -CalcPAve(void) -{ - int i, j; - - for (i = 0; i < dim; ++i) - pave[i] = 0.0; - - for (i = 0; i < iters; ++i) - for (j = 0; j < dim; ++j) - pave[j] += x[j][i]; - - for (i = 0; i < dim; ++i) - pave[i] /= iters; -} - - -void -CalcPCov(void) -{ - int i, j, k; - double tmpi, tmpj, sum; - double inviters = 1.0/(iters-1.0); - - //printf("\n%4d %g", iters, inviters); - - for (i = 0; i < dim; ++i) - { - for (j = 0; j <= i; ++j) - { - sum = 0.0; - for (k = 0; k < iters; ++k) - { - tmpi = x[i][k] - pave[i]; - tmpj = x[j][k] - pave[j]; - sum += tmpi * tmpj; - //printf("\n%4d %4d %4d: %16.8f %16.8f", i, j, k, tmpi, tmpj); - } - - cov[i][j] = cov[j][i] = sum * inviters; - //printf("\n%4d %4d: %16.8f %16.8f %16.8f", i, j, cov[i][j], sum*inviters, sum); - } - } -} - - -/* -The Laplace-Metropolis estimator for calculating the marginal likelihood -from metropolis samples from the posterior distribution. - -Steven M. Lewis, Adrian E. Raftery (1997) -"Estimating Bayes Factors via Posterior Stimulation with the Laplace-Metropolis Estimator." -Journal of the American Statistical Association, 92(438):648-655 - -Using equation 4, esp. see pp 649-650, first method to estimate \theta*. - -IME, this is extremely accurate (using Gold Standard as a reference). - -NB: REQUIRES CalcPAve() and CalcPCov() to have already calculated ave and cov. -*/ -double -CalcLaplaceMet(void) -{ - int i, j; - int maxind, runind; - double maxpost, lndet, lapmet, expmet, lnh, entropy, lnfish, edge_entropy; - double *eval = NULL; - - printf("Calculating Laplace approximation ...\n"); - fflush(NULL); - - CalcPAve(); - CalcPCov(); - - for (i = 0; i < dim; ++i) - printf("\nave p[%3d]:% 16.4f (+/- %16.4f)", i, pave[i], sqrt(cov[i][i])); - - printf ("\n\nParameter covariance matrix (estimate of minus inverse Hessian):"); - MatPrint(cov, dim); - - //////////////////////////////////////////////////////////////////////////////////////////////// - eval = calloc(dim, sizeof(double)); - - EigenvalsGSL(cov, dim, eval); - - lndet = lnfish = 0.0; - for (i = 0; i < dim; i++) - { - if (isgreater(eval[i], DBL_EPSILON)) - { - lndet += log(eval[i]); - lnfish -= log(ndata * eval[i]); - } - else - { - printf("\n WARNING: excluding eigenvalue %d from determinant calculation", i); - printf("\n WARNING: eigenvalue[%d] = %g < %g", i, eval[i], FLT_EPSILON); - } - } - - printf("\nln(FI): %14.3f", lnfish); - printf("\nln(det): %14.3f", lndet); - printf("\n-d ln(n): %14.3f", -dim * log(ndata)); - printf("\ndet: %g\n", exp(lndet)); - for (i = 0; i < dim; i++) - printf ("\neigenvalue[%d] = %g", i, eval[i]); - printf ("\n"); - fflush(NULL); - - free(eval); - - //////////////////////////////////////////////////////////////////////////////////////////////// - for (i = 0; i < dim; ++i) - { - for (j = 0; j <= i; ++j) - { - if (cov[i][j] == 0.0) - cov[i][j] = cov[j][i] = 0.0; - else - cov[i][j] = cov[j][i] = cov[i][j] / sqrt(cov[i][i] * cov[j][j]); - } - } - -// for (i = 0; i < dim; ++i) -// cov[i][i] = 1.0; - - printf ("\nParameter correlation matrix:"); - MatPrintLowerDiag(cov, dim); - - //////////////////////////////////////////////////////////////////////////////////////////////// - /* Find the parameters with the maximum posterior prob */ - maxpost = -DBL_MAX; - maxind = 0; - runind = 0; - for (i = 0; i < iters; ++i) - { - //printf("\nlnpost[%6d]: %g %g", i, lnpost[i], maxpost); - lnh = lnprior[i] + lnlike[i]; - if (maxpost < lnh) - { - maxpost = lnh; - maxind = i; - } - } - - printf("\n%-25s% 16.4f", "Max log posterior - p(D):", maxpost); - printf("\n%-25s% 16.4f", "Max log prior:", lnprior[maxind]); - printf("\n%-25s% 16.4f", "Max log likelihood:", lnlike[maxind]); - -// for (i = 0; i < dim; ++i) -// printf("\nmax logPost p[%d]: % 16.4f", i, x[i][maxind]); -// -// printf("\n%.4f", x[0][maxind]); -// for (i = 1; i < dim; ++i) -// printf(":%.4f", x[i][maxind]); -// printf("\n"); - - lapmet = maxpost + 0.5 * lndet + 0.5 * dim * log(2.0 * M_PI); - - printf("\n\nLog marginal likelihood ln p(x):"); - printf("\n%-30s% 16.4f\n\n", "Laplace-Metropolis:", lapmet); - - printf("Laplace approximation done ...\n"); - fflush(NULL); - - entropy = 0.5 * dim * log(2.0 * M_PI * M_E) + 0.5 * lndet; - - printf("Calculating Edgeworth entropy approximation ...\n"); - fflush(NULL); - - edge_entropy = CalcEdgeworthVanHulleEntropy(x, dim, iters); - - expmet = avelnprior + avelnlike + edge_entropy; - - printf("\n%-30s% 16.4f", "Posterior N-entropy:", entropy); - printf("\n%-30s% 16.4f", "Edgeworth entropy:", edge_entropy); - printf("\n%-30s% 16.4f", "Expected-Metropolis:", expmet); - printf("\n%-30s% 16.4f", "varlnpost:", varlnpost); - printf("\n%-30s% 16.4f", "DLT-Metropolis:", avelnprior + avelnlike - varlnpost * (log(ndata) - log(2.0 * M_PI * M_E))); - printf("\n%-30s% 16.4f\n", "DLT-Metropolis, no prior:", avelnlike - varlnpost * (log(ndata) - log(2.0 * M_PI * M_E))); - printf("\n%-30s% 16.4f", "Ave lnPrior:", avelnprior); - printf("\n%-30s% 16.4f\n", "Ave lnLike:", avelnlike); - fflush(NULL); - - return(lapmet); -} - - -double -CalcLaplaceMetUni(void) -{ - int i; - int maxind, runind; - double maxpost, lndet, lapmet, expmet, lnh, entropy, lnfish, edge_entropy; - double ave, var; - - printf("Calculating Laplace approximation ...\n"); - fflush(NULL); - - ave = average(x[0], iters); - var = variance(x[0], iters, ave); - - lndet = log(var); - lnfish = -log(nd * var); - - //////////////////////////////////////////////////////////////////////////////////////////////// - printf("\nln(FI): %14.3f", lnfish); - printf("\nln(det): %14.3f", lndet); - printf("\n-d ln(n): %14.3f", -log(nd)); - printf("\ndet: %g\n", exp(lndet)); - fflush(NULL); - - //////////////////////////////////////////////////////////////////////////////////////////////// - /* Find the parameters with the maximum posterior prob */ - maxpost = -DBL_MAX; - maxind = 0; - runind = 0; - for (i = 0; i < iters; ++i) - { - //printf("\nlnpost[%6d]: %g %g", i, lnpost[i], maxpost); - lnh = lnprior[i] + lnlike[i]; - if (maxpost < lnh) - { - maxpost = lnh; - maxind = i; - } - } - - printf("\n%-25s% 16.4f", "Max log posterior - p(D):", maxpost); - printf("\n%-25s% 16.4f", "Max log prior:", lnprior[maxind]); - printf("\n%-25s% 16.4f", "Max log likelihood:", lnlike[maxind]); - -// for (i = 0; i < dim; ++i) -// printf("\nmax logPost p[%d]: % 16.4f", i, x[i][maxind]); -// -// printf("\n%.4f", x[0][maxind]); -// for (i = 1; i < dim; ++i) -// printf(":%.4f", x[i][maxind]); -// printf("\n"); - - lapmet = maxpost + 0.5 * lndet + 0.5 * log(2.0 * M_PI); - - printf("\n\nLog marginal likelihood ln p(x):"); - printf("\n%-30s% 16.4f\n\n", "Laplace-Metropolis:", lapmet); - - printf("Laplace approximation done ...\n"); - fflush(NULL); - - entropy = 0.5 * log(2.0 * M_PI * M_E) + 0.5 * lndet; - - expmet = avelnprior + avelnlike + entropy; - - printf("Calculating Edgeworth entropy approximation ...\n"); - fflush(NULL); - - edge_entropy = CalcEdgeworthVanHulleEntropy(x, 1, iters); - - printf("\n%-30s% 16.4f", "Posterior N-entropy:", entropy); - printf("\n%-30s% 16.4f", "Edgeworth entropy:", edge_entropy); - printf("\n%-30s% 16.4f", "Expected-Metropolis:", expmet); - printf("\n%-30s% 16.4f", "varlnpost:", varlnpost); - printf("\n%-30s% 16.4f", "DLT-Metropolis:", avelnprior + avelnlike - varlnpost * (log(nd) - log(2.0 * M_PI * M_E))); - printf("\n%-30s% 16.4f\n", "DLT-Metropolis, no prior:", avelnlike - varlnpost * (log(nd) - log(2.0 * M_PI * M_E))); - printf("\n%-30s% 16.4f", "Ave lnPrior:", avelnprior); - printf("\n%-30s% 16.4f\n", "Ave lnLike:", avelnlike); - fflush(NULL); - - return(lapmet); -} - - -/* -The simple Gaussian model described on page 203 of - -Lartillot N, Philippe H. (2006) -"Computing Bayes factors using thermodynamic integration." -Syst Biol. 55(2):195-207. - -The real data is all zeros (a "data-free" likelihood). - -It appears that they have forgotten the normalization constants. -*/ -void -SimGaussLP(const double nu, const gsl_rng *r2) -{ - int i, j; - double sigma = sqrt(nu / (1.0 + nu)); - double sqrtnu = sqrt(nu), xij; - double tmp; - - avelnprior = avelnlike = avelnprlk2 = 0.0; - - for (i = 0; i < iters; ++i) - { - lnprior[i] = lnpost[i] = lnlike[i] = 0.0; - for (j = 0; j < dim; ++j) - { - xij = gsl_ran_gaussian(r2, sigma); - lnprior[i] += log(gsl_ran_gaussian_pdf(xij, sqrtnu)); - lnlike[i] += log(gsl_ran_gaussian_pdf(xij, 1.0)); - lnpost[i] += log(gsl_ran_gaussian_pdf(xij, sigma)); - x[j][i] = xij; - } - //printf("\nlnlike[%6d]: %g", i, lnlike[i]); - avelnprior += lnprior[i]; - avelnlike += lnlike[i]; - avelnprlk2 += (lnprior[i] + lnlike[i]) * (lnprior[i] + lnlike[i]); - } - - avelnprior /= iters; - avelnlike /= iters; - avelnpost = avelnprior + avelnlike; - avelnprlk2 /= iters; - - varlnpost = 0.0; - for (i = 0; i < iters; ++i) - { - tmp = lnprior[i] + lnlike[i] - avelnpost; - varlnpost += tmp * tmp; - } - - varlnpost /= iters; - - printf("\n%-22s% 16.4f", "varlnpost:", varlnpost); - printf("\n%-22s% 16.4f", "avelnlike:", avelnlike); - printf("\n%-22s% 16.4f", "avelnlike/n:", avelnlike/ndata); - printf("\n%-22s% 16.4f", "avelnprior:", avelnprior); - printf("\n%-22s% 16.4f", "avelnpost:", avelnpost); - printf("\n%-22s% 16.4f", "avelnprlk2:", avelnprlk2); - printf("\n%-22s% 16.4f", "entropy ln post:", 0.5 * log(2.0 * M_PI * varlnpost * M_E)); - printf("\n\n"); -} - - -double -normal_lnpdf(const double x, const double mean, const double var) -{ - double p; - - p = (-0.5 * log(2.0 * M_PI * var)) - ((x - mean)*(x - mean) / (2.0 * var)); - - return (p); -} - - -typedef struct -{ - double **x; - int idim, len, start, end; -} GibbsData; - - -static void -*sim_gauss_pth(void *gibbsdata_ptr) -{ - GibbsData *gibbsdata = (GibbsData *) gibbsdata_ptr; - int i; - const int idim = (const int) gibbsdata->idim; - double **x = gibbsdata->x; - double tmpmu; - - const gsl_rng_type *T = NULL; - gsl_rng *r2 = NULL; - unsigned long int seed; - - /* Every thread gets its own rng generator -- otherwise, we get data race junk in valgrind */ - T = gsl_rng_ranlxs2; - r2 = gsl_rng_alloc(T); - //seed = time(NULL) + chain; - seed = time(NULL) + (unsigned long int) pthread_self() % gsl_rng_max(r2); - //printf("\nseed[%d]:%ld %ld\n", pthread_self(), seed, time(NULL)); - gsl_rng_set(r2, seed); - //par->r2 = r2; - - //tmpmu = gsl_ran_gaussian(r2, 10.0); - //printf("\nmu[%d]: %g", i, tmpmu); - tmpmu = 1.0; - for (i = 0; i < gibbsdata->len; ++i) - { - x[idim][i] = gsl_ran_gaussian_ziggurat(r2, tmpmu); - //x[idim][i] = gsl_ran_exponential(r2, tmpmu); - //printf("\n%5d %5d % 16.6f", idim, i, x[idim][i]); - //data[i][j] = 0.0; - } - - printf("SimGauss thread %3d DONE\n", idim); - fflush(NULL); - - gsl_rng_free(r2); - r2 = NULL; - - pthread_exit((void *) 0); -} - - -void -SimGaussPth(double **data, GibbsData **gibbsdata, pthread_t *callThd, - pthread_attr_t *attr, const int thrdnum) -{ - const int len = ndata; - int i, rc = 0; - - - for (i = 0; i < thrdnum ; ++i) - { - gibbsdata[i]->x = data; - gibbsdata[i]->idim = i; - gibbsdata[i]->len = len; - - rc = pthread_create(&callThd[i], attr, sim_gauss_pth, (void *) gibbsdata[i]); - - if (rc) - { - printf("ERROR811: return code from pthread_create() %d is %d\n", i, rc); - exit(EXIT_FAILURE); - } - } - - for (i = 0; i < thrdnum; ++i) - { - rc = pthread_join(callThd[i], (void **) NULL); - - if (rc) - { - printf("ERROR812: return code from pthread_join() %d is %d\n", i, rc); - exit(EXIT_FAILURE); - } - } - - return; -} - - -static void -*sim_expo_pth(void *gibbsdata_ptr) -{ - GibbsData *gibbsdata = (GibbsData *) gibbsdata_ptr; - int i; - const int idim = (const int) gibbsdata->idim; - double **x = gibbsdata->x; - double tmpmu; - - const gsl_rng_type *T = NULL; - gsl_rng *r2 = NULL; - unsigned long int seed; - - /* Every thread gets its own rng generator -- otherwise, we get data race junk in valgrind */ - T = gsl_rng_ranlxs2; - r2 = gsl_rng_alloc(T); - //seed = time(NULL) + chain; - seed = time(NULL) + (unsigned long int) pthread_self() % gsl_rng_max(r2); - //printf("\nseed[%d]:%ld %ld\n", pthread_self(), seed, time(NULL)); - gsl_rng_set(r2, seed); - //par->r2 = r2; - - //tmpmu = gsl_ran_gaussian(r2, 10.0); - //printf("\nmu[%d]: %g", i, tmpmu); - tmpmu = 1.0; - for (i = 0; i < gibbsdata->len; ++i) - { - //x[idim][i] = gsl_ran_gaussian_ziggurat(r2, tmpmu); - x[idim][i] = gsl_ran_exponential(r2, tmpmu); - //printf("\n%5d %5d % 16.6f", idim, i, x[idim][i]); - //data[i][j] = 0.0; - } - - printf("SimGauss thread %3d DONE\n", idim); - fflush(NULL); - - gsl_rng_free(r2); - r2 = NULL; - - pthread_exit((void *) 0); -} - - -void -SimExpoPth(double **data, GibbsData **gibbsdata, pthread_t *callThd, - pthread_attr_t *attr, const int thrdnum) -{ - const int len = ndata; - int i, rc = 0; - - - for (i = 0; i < thrdnum ; ++i) - { - gibbsdata[i]->x = data; - gibbsdata[i]->idim = i; - gibbsdata[i]->len = len; - - rc = pthread_create(&callThd[i], attr, sim_expo_pth, (void *) gibbsdata[i]); - - if (rc) - { - printf("ERROR811: return code from pthread_create() %d is %d\n", i, rc); - exit(EXIT_FAILURE); - } - } - - for (i = 0; i < thrdnum; ++i) - { - rc = pthread_join(callThd[i], (void **) NULL); - - if (rc) - { - printf("ERROR812: return code from pthread_join() %d is %d\n", i, rc); - exit(EXIT_FAILURE); - } - } - - return; -} - - -void -SimGauss(const gsl_rng *r2) -{ - int i, j; - double tmpmu; - - /* First, generate artificial data */ - /* precision/sigma/variance of likelihood is 1.0 */ - /* real mu = 0 */ - printf("Simulating Gaussian data ...\n"); - fflush(NULL); - for (i = 0; i < dim; ++i) - { - //tmpmu = gsl_ran_gaussian(r2, 10.0); - //printf("\nmu[%d]: %g", i, tmpmu); - tmpmu = 1.0; - for (j = 0; j < ndata; ++j) - { - data[i][j] = gsl_ran_gaussian_ziggurat(r2, tmpmu); - //data[i][j] = 0.0; - } - } -} - - -void -SimExpo(const gsl_rng *r2) -{ - int i, j; - double tmpmu; - - /* First, generate artificial data */ - /* scale of likelihood is 1.0 */ - printf("Simulating exponential data ...\n"); - fflush(NULL); - for (i = 0; i < dim; ++i) - { - tmpmu = 1; - //tmpmu = (1.0 / gsl_ran_exponential(r2, 10000)); - printf("\nmu[%d]: %g", i, tmpmu); - for (j = 0; j < ndata; ++j) - { - data[i][j] = gsl_ran_exponential(r2, tmpmu); - //printf("\n%g", data[i][j]); - //data[i][j] = 0.0; - } - } - - printf("\n"); -} - - -void -CalcCumulants(void) -{ - int i, j; - - printf("Calculate vector first and second cumulants ...\n"); - fflush(NULL); - /* for each dim, find the mean of the data */ - for (j = 0; j < dim; ++j) - { - y[j] = 0.0; - for (i = 0; i < ndata; ++i) - y[j] += data[j][i]; - } - -// for (i = 0; i < dim; ++i) -// y[i] /= ndata; - - for (j = 0; j < dim; ++j) - { - x2[j] = 0.0; - for (i = 0; i < ndata; ++i) - x2[j] += data[j][i]*data[j][i]; - } - - yt = 0.0; - for (j = 0; j < dim; ++j) - yt += y[j]; - - yt2 = 0.0; - for (j = 0; j < dim; ++j) - yt2 += y[j]*y[j]; - - x2t = 0.0; - for (j = 0; j < dim; ++j) - x2t += x2[j]; -} - - -void -WriteChain(char *fname, double **chain, const int n, const int d) -{ - FILE *fp = fopen(fname ,"w"); - int i, j; - - for (i = 0; i < n; ++i) - { - for (j = 0; j < d; ++j) - fprintf(fp, "%-18.8f ", chain[j][i]); - - fprintf(fp, "\n"); - } - - fprintf(fp, "\n\n"); - fclose(fp); - fflush(NULL); -} - - -/* mu = 0 */ -/* lambda = precision of prior mu */ -void -GibbsGauss(const double lambda_0, const gsl_rng *r2) -{ - int i, j; - double postvar = 1.0 / (ndata + lambda_0); - double postsigma = sqrt(postvar); - double tmp, factor, musimj; - //double priorvar = 1.0 / lambda_0; - const double ln2pi2 = 0.5*log(2.0*M_PI); - const double ln2pi = log(2.0*M_PI); - double musim2, diffsum; - - /* Now sample posterior of mu with Gibbs */ - printf("Gibbs sampling ...\n"); - fflush(NULL); - avelnprior = avelnlike = avelnpost = avelnprlk2 = 0.0; - factor = 1.0 / (lambda_0 + ndata); - - for (i = 0; i < iters; ++i) - { - lnprior[i] = lnpost[i] = lnlike[i] = 0.0; - - for (j = 0; j < dim; ++j) - { - x[j][i] = musimj = gsl_ran_gaussian_ziggurat(r2, postsigma) + factor * y[j]; - //x[j][i] = gsl_ran_exponential(r2, 1.0); - //x[j][i] = gsl_ran_gamma(r2, 5.0, 1.0); - //x[j][i] = gsl_ran_weibull(r2, 1.0, 2.0); - lnlike[i] += -ndata * ln2pi2 - -0.5 *(x2[j] - 2.0*musimj*y[j] + ndata*musimj*musimj); - } - - musim2 = 0.0; - for (j = 0; j < dim; ++j) - musim2 += x[j][i]*x[j][i]; - - diffsum = 0.0; - for (j = 0; j < dim; ++j) - { - tmp = y[j] * factor - x[j][i]; - diffsum += tmp*tmp; - } - - lnprior[i] = 0.5*(-dim*ln2pi + dim*log(lambda_0) - lambda_0 * musim2); - lnpost[i] = 0.5*(-dim*ln2pi + dim*log(lambda_0 + ndata) - (lambda_0 + ndata)*diffsum); - -// for (j = 0; j < dim; ++j) -// { -// tmpmu = y[j]*factor; -// musim[j] = gsl_ran_gaussian(r2, postsigma) + tmpmu; -// x[j][i] = musim[j]; -// lnprior[i] += normal_lnpdf(musim[j], 0.0, priorvar); -// //for (k = 0; k < ndata; ++k) -// // lnlike[i] += normal_lnpdf(data[j][k], musim[j], 1.0); -// lnlike[i] += -0.5 * ndata * log(2.0*M_PI) -// -0.5 *(x2[j] - 2.0*musim[j]*y[j] + ndata*musim[j]*musim[j]); -// lnpost[i] += normal_lnpdf(musim[j], tmpmu, postvar); -// } - - //printf("\n%-d% 16.4f % 16.4f % 16.4f", i, lnprior[i], lnlike[i], lnpost[i]); - - avelnprior += lnprior[i]; - avelnlike += lnlike[i]; - avelnpost += lnpost[i]; - avelnprlk2 += (lnprior[i] + lnlike[i]) * (lnprior[i] + lnlike[i]); - } - - avelnprior /= iters; - avelnlike /= iters; - avelnpost /= iters; - avelnprlk = avelnprior + avelnlike; - avelnprlk2 /= iters; - - varlnpost = 0.0; - for (i = 0; i < iters; ++i) - { - tmp = lnprior[i] + lnlike[i] - avelnprlk; - varlnpost += tmp * tmp; - } - - varlnpost /= iters; - - printf("Gibbs done ...\n"); - fflush(NULL); - - //printf("\n%-22s% 16.4f", "log c:", -0.5 * log(2.0 * M_PI * varlnpost * M_E) + avelnlike + avelnprior); // this one is probably meaningless - printf("\n%-22s% 16.4f", "varlnpost:", varlnpost); - printf("\n%-22s% 16.4f", "avelnlike:", avelnlike); - printf("\n%-22s% 16.4f", "explnlike:", -0.5 * (dim * ndata * log(2.0 * M_PI) + x2t - yt2/ndata + dim)); // reference prior - printf("\n%-22s% 16.4f", "avelnprior:", avelnprior); - double explnprior; - explnprior = -0.5*dim*log(2.0 * M_PI) - +0.5*dim*log(lambda_0) - -0.5*dim*lambda_0/(lambda_0+ndata) - -0.5*lambda_0*yt2/((lambda_0+ndata)*(lambda_0+ndata)); - printf("\n%-22s% 16.4f", "explnprior:", explnprior); - printf("\n%-22s% 16.4f", "avelnprlk:", avelnprlk); - printf("\n%-22s% 16.4f", "avelnpost:", avelnpost); - printf("\n%-22s% 16.4f", "explnpost:", -0.5 * dim * log(2.0*M_PI*M_E/ndata)); // reference prior - printf("\n%-22s% 16.4f", "sqrt avelnprlk2:", sqrt(avelnprlk2)); - printf("\n%-22s% 16.4f", "entropy ln post:", 0.5 * log(2.0 * M_PI * varlnpost * M_E)); - printf("\n%-22s% 16.4f", "posterior entropy:", - avelnpost); - printf("\n%-22s% 16.4f", "exp lnlik + entropy:", avelnlike - avelnpost); - printf("\n%-22s% 16.4f", "DIC:", avelnlike - varlnpost); - printf("\n%-22s% 16.4f", "exact exp ml:", avelnlike + avelnprior - avelnpost); - printf("\n\n"); - fflush(NULL); - - if (write_files == 1) - WriteChain("gibbs_gauss.txt", x, iters, dim); - - /* Gibbs with reference priors, calculate exp(lnpost) and exp(lnlike) */ - printf("Reference posterior Gibbs sampling ...\n"); - fflush(NULL); - - double pi_avelnlike = 0.0; - double pi_avelnpost = 0.0; - double pi_varlnpost = 0.0; - double lnposti, lnlikei; - double inv_ndata = 1.0/ndata; - double pi_sigma = sqrt(inv_ndata); - double yj_ndata; - double delta; - - for (i = 0; i < iters; ++i) - { - lnposti = lnlikei = 0.0; - for (j = 0; j < dim; ++j) - { - yj_ndata = y[j]*inv_ndata; - musimj = gsl_ran_gaussian(r2, pi_sigma) + yj_ndata; - lnlikei += -0.5 * ndata * ln2pi - -0.5 *(x2[j] - 2.0*musimj*y[j] + ndata*musimj*musimj); - lnposti += normal_lnpdf(musimj, yj_ndata, inv_ndata); - } - - /* running mean and variance */ - delta = lnposti - pi_avelnpost; - pi_avelnpost += delta/(i+1); - pi_varlnpost += delta*(lnposti - pi_avelnpost); - - pi_avelnlike += lnlikei; - } - - printf("\n%-22s% 16.4f", "pi_avelnlike:", pi_avelnlike/iters); - printf("\n%-22s% 16.4f", "pi_avelnpost:", pi_avelnpost); - printf("\n%-22s% 16.4f", "posterior pi-entropy:", -pi_avelnpost); - printf("\n%-22s% 16.4f", "pi lnlik + entropy:", (pi_avelnlike/iters - pi_avelnpost)); - printf("\n%-22s% 16.4f", "pi_varlnpost:", pi_varlnpost/iters); - printf("\n%-22s% 16.4f", "pi_DIC:", pi_avelnlike/iters - pi_varlnpost/iters); - //printf("\n%-22s% 16.4f", "mean:", mean); - printf("\n\n"); - fflush(NULL); -} - - -void -GibbsGaussHierarch(const double phi_0, const gsl_rng *r2) -{ - int i, j; - double postvar = 1.0 / (ndata + 1.0); - double postsigma = sqrt(postvar); - double tmp, musimj; - const double ln2pi2 = 0.5*log(2.0*M_PI); - const double ln2pi = log(2.0*M_PI); - double mu0sim, musum; - double postphi = phi_0/(dim*phi_0 + 1); - double phisigma = sqrt(postphi); - - - /* Now sample posterior of mu with Gibbs */ - printf("Gibbs sampling ...\n"); - fflush(NULL); - avelnprior = avelnlike = avelnpost = avelnprlk2 = 0.0; - - musum = 0.0; - for (i = 0; i < iters; ++i) - { - lnprior[i] = lnpost[i] = lnlike[i] = 0.0; - - h[i] = mu0sim = gsl_ran_gaussian_ziggurat(r2, phisigma) + postphi*musum; - - for (j = 0; j < dim; ++j) - { - x[j][i] = musimj = gsl_ran_gaussian_ziggurat(r2, postsigma) + postvar * (y[j] + mu0sim); - - lnlike[i] += - ndata * ln2pi2 - - 0.5 *(x2[j] - 2.0*musimj*y[j] + ndata*musimj*musimj); - lnprior[i] += - 0.5*ln2pi - 0.5*(musimj - mu0sim)*(musimj - mu0sim); - lnpost[i] += - 0.5 * (ndata+1.0) * musimj * musimj - + mu0sim*musimj - + musimj*y[j] - - 0.5* y[j]*y[j]/(ndata+1.0); - } - - lnprior[i] += - 0.5*ln2pi - 0.5*log(phi_0) - 0.5 * mu0sim * mu0sim / phi_0; - lnpost[i] += - 0.5*(dim+1)*ln2pi - - 0.5*log(phi_0) - + 0.5*log(ndata*dim*phi_0+ndata+1.0) - + 0.5*(dim-1.0)*log(ndata+1.0) - - 0.5*mu0sim*mu0sim*(dim*phi_0+1.0)/phi_0 - - 0.5*phi_0 * yt * yt / ((ndata+1.0)*(ndata*dim*phi_0+ndata+1.0)); - - //printf("\n%-d% 16.4f % 16.4f % 16.4f", i, lnprior[i], lnlike[i], lnpost[i]); - - avelnprior += lnprior[i]; - avelnlike += lnlike[i]; - avelnpost += lnpost[i]; - avelnprlk2 += (lnprior[i] + lnlike[i]) * (lnprior[i] + lnlike[i]); - - musum = 0.0; - for (j = 0; j < dim; ++j) - musum += x[j][i]; - } - - avelnprior /= iters; - avelnlike /= iters; - avelnpost /= iters; - avelnprlk = avelnprior + avelnlike; - avelnprlk2 /= iters; - - varlnpost = 0.0; - for (i = 0; i < iters; ++i) - { - tmp = lnprior[i] + lnlike[i] - avelnprlk; - varlnpost += tmp * tmp; - } - - varlnpost /= iters; - - printf("Gibbs done ...\n"); - fflush(NULL); - - printf("\n%-22s% 16.4f", "varlnpost:", varlnpost); - printf("\n%-22s% 16.4f", "avelnlike:", avelnlike); - printf("\n%-22s% 16.4f", "avelnprior:", avelnprior); - printf("\n%-22s% 16.4f", "avelnprlk:", avelnprlk); - printf("\n%-22s% 16.4f", "avelnpost:", avelnpost); - printf("\n%-22s% 16.4f", "sqrt avelnprlk2:", sqrt(avelnprlk2)); - printf("\n%-22s% 16.4f", "entropy ln post:", 0.5 * log(2.0 * M_PI * varlnpost * M_E)); - printf("\n%-22s% 16.4f", "posterior entropy:", - avelnpost); - printf("\n%-22s% 16.4f", "exp lnlik + entropy:", avelnlike - avelnpost); - printf("\n%-22s% 16.4f", "DIC:", avelnlike - varlnpost); - printf("\n%-22s% 16.4f", "exact exp ml:", avelnlike + avelnprior - avelnpost); - printf("\n\n"); - fflush(NULL); - - if (write_files == 1) - { - WriteChain("gibbs_gauss_hierarch.txt", x, iters, dim); - WriteChain("gibbs_gauss_hierarch_mu0.txt", &h, iters, 1); - fflush(NULL); - } -} - - -void -GibbsGaussUni(const double lambda_0, const gsl_rng *r2) -{ - int i; - double postvar = 1.0 / (nd + lambda_0); - double postsigma = sqrt(postvar); - double tmpmu, tmp, factor, musimj; - double priorvar = 1.0 / lambda_0; -// const double ln2pi2 = 0.5*log(2.0*M_PI); - const double ln2pi = log(2.0*M_PI); - - - /* Now simulate posterior of mu with Gibbs */ - printf("Gibbs sampling Gaussian uniparameter ...\n"); - fflush(NULL); - avelnprior = avelnlike = avelnpost = avelnprlk2 = 0.0; - factor = 1.0 / (lambda_0 + nd); - tmpmu = yt/(lambda_0 + nd); - for (i = 0; i < iters; ++i) - { - musimj = gsl_ran_gaussian(r2, postsigma) + tmpmu; - x[0][i] = musimj; - lnprior[i] = normal_lnpdf(musimj, 0.0, priorvar); - lnlike[i] = -0.5 * nd * ln2pi - -0.5 *(x2t - 2.0*musimj*yt + nd*musimj*musimj); - lnpost[i] = normal_lnpdf(musimj, tmpmu, postvar); - - //printf("\n%-d% 16.4f % 16.4f % 16.4f", i, lnprior[i], lnlike[i], lnpost[i]); - - avelnprior += lnprior[i]; - avelnlike += lnlike[i]; - avelnpost += lnpost[i]; - avelnprlk2 += (lnprior[i] + lnlike[i]) * (lnprior[i] + lnlike[i]); - } - - avelnprior /= iters; - avelnlike /= iters; - avelnpost /= iters; - avelnprlk = avelnprior + avelnlike; - avelnprlk2 /= iters; - - varlnpost = 0.0; - for (i = 0; i < iters; ++i) - { - tmp = lnprior[i] + lnlike[i] - avelnprlk; - varlnpost += tmp * tmp; - } - - varlnpost /= iters; - - printf("Gibbs done ...\n"); - fflush(NULL); - - printf("\n%-22s% 16.4f", "varlnpost:", varlnpost); - printf("\n%-22s% 16.4f", "avelnlike:", avelnlike); - printf("\n%-22s% 16.4f", "explnlike:", -0.5 * (nd * log(2.0 * M_PI) + x2t - (yt*yt/nd) + 1.0)); // analytical exact, posterior expected ln like, reference prior - printf("\n%-22s% 16.4f", "avelnprior:", avelnprior); - printf("\n%-22s% 16.4f", "explnprior:", -0.5*(log(2.0 * M_PI) - log(lambda_0) + lambda_0 * (lambda_0 + nd + yt*yt)/((lambda_0+nd)*(lambda_0+nd)) )); // analytical exact - printf("\n%-22s% 16.4f", "avelnprlk:", avelnprlk); - printf("\n%-22s% 16.4f", "avelnpost:", avelnpost); - printf("\n%-22s% 16.4f", "explnpost:", -0.5* (log(2.0*M_PI) - log(nd) + 1.0)); // analytical exact, reference prior - printf("\n%-22s% 16.4f", "sqrt avelnprlk2:", sqrt(avelnprlk2)); - printf("\n%-22s% 16.4f", "entropy ln post:", 0.5 * log(2.0 * M_PI * varlnpost * M_E)); - printf("\n%-22s% 16.4f", "posterior entropy:", - avelnpost); - printf("\n%-22s% 16.4f", "exp lnlik + entropy:", avelnlike - avelnpost); - printf("\n%-22s% 16.4f", "DIC:", avelnlike - varlnpost); - printf("\n%-22s% 16.4f", "exact exp ml:", avelnlike + avelnprior - avelnpost); - printf("\n\n"); - fflush(NULL); - - if (write_files == 1) - WriteChain("gibbs_gauss_uni.txt", x, iters, 1); - - /* Gibbs with reference priors, calculate exp(lnpost) and exp(lnlike) */ - printf("Reference posterior Gibbs sampling Gaussian uniparameter ...\n"); - fflush(NULL); - - double pi_avelnlike = 0.0; - double pi_avelnpost = 0.0; - double pi_varlnpost = 0.0; - double lnposti, lnlikei; - double inv_nd = 1.0/nd; - double yt_nd; - double delta; - - for (i = 0; i < iters; ++i) - { - yt_nd = yt*inv_nd; - musimj = gsl_ran_gaussian(r2, sqrt(inv_nd)) + yt_nd; - lnlikei = -0.5 * nd * ln2pi - -0.5 *(x2t - 2.0*musimj*yt + nd*musimj*musimj); - lnposti = normal_lnpdf(musimj, yt_nd, inv_nd); - - /* running mean and variance */ - delta = lnposti - pi_avelnpost; - pi_avelnpost += delta/(i+1); - pi_varlnpost += delta*(lnposti - pi_avelnpost); - - pi_avelnlike += lnlikei; - } - - printf("\n%-22s% 16.4f", "pi_avelnlike:", pi_avelnlike/iters); - printf("\n%-22s% 16.4f", "pi_avelnpost:", pi_avelnpost); - printf("\n%-22s% 16.4f", "posterior pi-entropy:", -pi_avelnpost); - printf("\n%-22s% 16.4f", "pi lnlik + entropy:", (pi_avelnlike/iters - pi_avelnpost)); - printf("\n%-22s% 16.4f", "pi_varlnpost:", pi_varlnpost/iters); - printf("\n%-22s% 16.4f", "pi_DIC:", pi_avelnlike/iters - pi_varlnpost/iters); - printf("\n\n"); - fflush(NULL); -} - - -void -GibbsGaussPrecUni(const double lambda_0, const gsl_rng *r2) -{ - int i; - double tmp, atmp, btmp, binvtmp, musimj; - double ln2pi = log(2.0 * M_PI); - - /* prior is gamma; we assume alpha=1, so that prior is exponential with beta=lambda_0 */ - /* Now simulate posterior of mu with Gibbs */ - printf("Gibbs sampling Gaussian precision uniparameter ...\n"); - fflush(NULL); - avelnprior = avelnlike = avelnpost = avelnprlk2 = 0.0; - atmp = 1.0+0.5*nd; - btmp = lambda_0 + 0.5*x2t; - binvtmp = 1.0/btmp; - for (i = 0; i < iters; ++i) - { - musimj = gsl_ran_gamma(r2, atmp, binvtmp); - x[0][i] = musimj; - lnprior[i] = log(lambda_0) - musimj * lambda_0; - lnlike[i] = -0.5*nd*ln2pi + 0.5*nd * log(musimj) - 0.5*musimj*x2t; - lnpost[i] = atmp * log(btmp) + (atmp-1.0)*log(musimj)-btmp*musimj - lgamma(atmp); - - //printf("\n%-d% 16.4f % 16.4f % 16.4f", i, lnprior[i], lnlike[i], lnpost[i]); - - avelnprior += lnprior[i]; - avelnlike += lnlike[i]; - avelnpost += lnpost[i]; - avelnprlk2 += (lnprior[i] + lnlike[i]) * (lnprior[i] + lnlike[i]); - } - - avelnprior /= iters; - avelnlike /= iters; - avelnpost /= iters; - avelnprlk = avelnprior + avelnlike; - avelnprlk2 /= iters; - - varlnpost = 0.0; - for (i = 0; i < iters; ++i) - { - tmp = lnprior[i] + lnlike[i] - avelnprlk; - varlnpost += tmp * tmp; - } - - varlnpost /= iters; - - printf("Gibbs done ...\n"); - fflush(NULL); - - - double Eloglam = gsl_sf_psi((nd+2.0)/2.0) - log(lambda_0+0.5*x2t); - double Elam = (nd+2.0)/(2.0 * lambda_0+x2t); - double explnlike = -0.5*nd * log(2.0*M_PI) + 0.5*nd*Eloglam - 0.5*x2t * Elam; - - printf("\n%-22s% 16.4f", "varlnpost:", varlnpost); - printf("\n%-22s% 16.4f", "avelnlike:", avelnlike); - printf("\n%-22s% 16.4f", "explnlike:", explnlike); // analytical exact, posterior expected ln like - Eloglam = gsl_sf_psi((nd+2.0)/2.0) - log(0.5*x2t); - explnlike = 0.5*nd * (Eloglam - log(2.0*M_PI) - 1.0) - 1.0; - printf("\n%-22s% 16.4f", "explnlike(ref):", explnlike); // analytical exact, posterior expected ln like, reference prior (beta=0) - printf("\n%-22s% 16.4f", "avelnprior:", avelnprior); - //printf("\n%-22s% 16.4f", "explnprior:", -0.5*(log(2.0 * M_PI) - log(lambda_0) + lambda_0 * (lambda_0 + nd + yt*yt)/((lambda_0+nd)*(lambda_0+nd)) )); // analytical exact - printf("\n%-22s% 16.4f", "avelnprlk:", avelnprlk); - printf("\n%-22s% 16.4f", "avelnpost:", avelnpost); - //printf("\n%-22s% 16.4f", "explnpost:", -0.5* (log(2.0*M_PI) - log(nd) + 1.0)); // analytical exact, reference prior - printf("\n%-22s% 16.4f", "sqrt avelnprlk2:", sqrt(avelnprlk2)); - printf("\n%-22s% 16.4f", "entropy ln post:", 0.5 * log(2.0 * M_PI * varlnpost * M_E)); - printf("\n%-22s% 16.4f", "posterior entropy:", - avelnpost); - printf("\n%-22s% 16.4f", "exp lnlik + entropy:", avelnlike - avelnpost); - printf("\n%-22s% 16.4f", "DIC:", avelnlike - varlnpost); - printf("\n%-22s% 16.4f", "exact exp ml:", avelnlike + avelnprior - avelnpost); - printf("\n\n"); - fflush(NULL); - - if (write_files == 1) - WriteChain("gibbs_gauss_prec_uni.txt", x, iters, 1); - - /* Gibbs with reference priors, calculate exp(lnpost) and exp(lnlike) */ - printf("Reference posterior Gibbs sampling Gaussian uniparameter ...\n"); - fflush(NULL); - - double pi_avelnlike = 0.0; - double pi_avelnpost = 0.0; - double pi_varlnpost = 0.0; - double lnposti, lnlikei; - double delta; - - atmp = 0.5*nd; - btmp = 0.5*x2t; - binvtmp = 1.0/btmp; - - for (i = 0; i < iters; ++i) - { - musimj = gsl_ran_gamma(r2, atmp, binvtmp); - lnlikei = -0.5*nd*ln2pi + 0.5*nd * log(musimj) - 0.5*musimj*x2t; - lnposti = atmp * log(btmp) + (atmp-1.0)*log(musimj)-btmp*musimj - lgamma(atmp); - - /* running mean and variance */ - delta = lnposti - pi_avelnpost; - pi_avelnpost += delta/(i+1); - pi_varlnpost += delta*(lnposti - pi_avelnpost); - - pi_avelnlike += lnlikei; - } - - printf("\n%-22s% 16.4f", "pi_avelnlike:", pi_avelnlike/iters); - printf("\n%-22s% 16.4f", "pi_avelnpost:", pi_avelnpost); - printf("\n%-22s% 16.4f", "posterior pi-entropy:", -pi_avelnpost); - printf("\n%-22s% 16.4f", "pi lnlik + entropy:", (pi_avelnlike/iters - pi_avelnpost)); - printf("\n%-22s% 16.4f", "pi_varlnpost:", pi_varlnpost/iters); - printf("\n%-22s% 16.4f", "pi_DIC:", pi_avelnlike/iters - pi_varlnpost/iters); - printf("\n\n"); - fflush(NULL); -} - - -void -GibbsExpo(const double alpha_0, const gsl_rng *r2) -{ - int i, j; - double tmp, musimj; - const double beta_0 = alpha_0; - - - /* Now simulate posterior of mu with Gibbs */ - printf("Gibbs sampling ...\n"); - fflush(NULL); - - avelnprior = avelnlike = avelnpost = avelnprlk2 = 0.0; - for (i = 0; i < iters; ++i) - { - lnprior[i] = lnlike[i] = lnpost[i] = 0.0; - - for (j = 0; j < dim; ++j) - { - musim[j] = gsl_ran_gamma(r2, alpha_0 + ndata, 1.0/(beta_0 + y[j])); - x[j][i] = musim[j]; - lnprior[i] += alpha_0 * log(beta_0) - lgamma(alpha_0) - + (alpha_0-1.0)*log(musim[j]) - beta_0*musim[j]; - lnlike[i] += ndata*log(musim[j]) - musim[j]*y[j]; - lnpost[i] += (alpha_0 + ndata)*log(beta_0+y[j]) - lgamma(alpha_0+ndata) - +(alpha_0+ndata-1.0)*log(musim[j]) - - musim[j]*(beta_0+y[j]); - } - - //printf("\n%-d% 16.4f % 16.4f % 16.4f", i, lnprior[i], lnlike[i], lnpost[i]); - - avelnprior += lnprior[i]; - avelnlike += lnlike[i]; - avelnpost += lnpost[i]; - avelnprlk2 += (lnprior[i] + lnlike[i]) * (lnprior[i] + lnlike[i]); - } - - avelnprior /= iters; - avelnlike /= iters; - avelnpost /= iters; - avelnprlk = avelnprior + avelnlike; - avelnprlk2 /= iters; - - varlnpost = 0.0; - for (i = 0; i < iters; ++i) - { - tmp = lnprior[i] + lnlike[i] - avelnprlk; - varlnpost += tmp * tmp; - } - - varlnpost /= iters; - - printf("Gibbs done ...\n"); - fflush(NULL); - - //printf("\n%-22s% 16.4f", "log c:", -0.5 * log(2.0 * M_PI * varlnpost * M_E) + avelnlike + avelnprior); // this one is probably meaningless - printf("\n%-22s% 16.4f", "varlnpost:", varlnpost); - printf("\n%-22s% 16.4f", "avelnlike:", avelnlike); - printf("\n%-22s% 16.4f", "avelnprior:", avelnprior); - printf("\n%-22s% 16.4f", "avelnprlk:", avelnprlk); - printf("\n%-22s% 16.4f", "avelnpost:", avelnpost); - printf("\n%-22s% 16.4f", "sqrt avelnprlk2:", sqrt(avelnprlk2)); - printf("\n%-22s% 16.4f", "entropy ln post:", 0.5 * log(2.0 * M_PI * varlnpost * M_E)); - printf("\n%-22s% 16.4f", "posterior entropy:", - avelnpost); - printf("\n%-22s% 16.4f", "exp lnlik + entropy:", avelnlike - avelnpost); - printf("\n%-22s% 16.4f", "DIC:", avelnlike - varlnpost); - printf("\n%-22s% 16.4f", "exact exp ml:", avelnlike + avelnprior - avelnpost); - printf("\n\n"); - fflush(NULL); - - if (write_files == 1) - WriteChain("gibbs_expo.txt", x, iters, dim); - - /* Gibbs with reference priors, calculate exp(lnpost) and exp(lnlike) */ - printf("Reference posterior Gibbs sampling ...\n"); - fflush(NULL); - - double pi_avelnlike = 0.0; - double pi_avelnpost = 0.0; - double lnposti, lnlikei; - - for (i = 0; i < iters; ++i) - { - lnposti = lnlikei = 0.0; - for (j = 0; j < dim; ++j) - { - musimj = gsl_ran_gamma(r2, ndata, 1.0/y[j]); - lnlikei += ndata*log(musimj) - musimj*y[j]; - lnposti += ndata*log(y[j]) - lgamma(ndata) - +(ndata-1.0)*log(musimj) - - musimj*y[j]; - } - - pi_avelnlike += lnlikei; - pi_avelnpost += lnposti; - } - - printf("\n%-22s% 16.4f", "pi_avelnlike:", pi_avelnlike/iters); - printf("\n%-22s% 16.4f", "pi_avelnpost:", pi_avelnpost/iters); - printf("\n%-22s% 16.4f", "posterior pi-entropy:", -pi_avelnpost/iters); - printf("\n%-22s% 16.4f", "pi lnlik + entropy:", (pi_avelnlike - pi_avelnpost)/iters); - printf("\n\n"); - fflush(NULL); -} - - -void -GibbsExpoUni(const double alpha_0, const gsl_rng *r2) -{ - int i; - double tmp, musimj; - const double beta_0 = alpha_0; - - - /* Now simulate posterior of mu with Gibbs */ - printf("Gibbs sampling ...\n"); - fflush(NULL); - - avelnprior = avelnlike = avelnpost = avelnprlk2 = 0.0; - for (i = 0; i < iters; ++i) - { - musimj = gsl_ran_gamma(r2, alpha_0 + nd, 1.0/(beta_0 + yt)); - x[0][i] = musimj; - lnprior[i] = alpha_0 * log(beta_0) - lgamma(alpha_0) - + (alpha_0-1.0)*log(musimj) - beta_0*musimj; - lnlike[i] = nd*log(musimj) - musimj*yt; - lnpost[i] = (alpha_0 + nd)*log(beta_0+yt) - lgamma(alpha_0+nd) - +(alpha_0+nd-1.0)*log(musimj) - - musimj*(beta_0+yt); - - //printf("\n%-d% 16.4f % 16.4f % 16.4f", i, lnprior[i], lnlike[i], lnpost[i]); - - avelnprior += lnprior[i]; - avelnlike += lnlike[i]; - avelnpost += lnpost[i]; - avelnprlk2 += (lnprior[i] + lnlike[i]) * (lnprior[i] + lnlike[i]); - } - - avelnprior /= iters; - avelnlike /= iters; - avelnpost /= iters; - avelnprlk = avelnprior + avelnlike; - avelnprlk2 /= iters; - - varlnpost = 0.0; - for (i = 0; i < iters; ++i) - { - tmp = lnprior[i] + lnlike[i] - avelnprlk; - varlnpost += tmp * tmp; - } - - varlnpost /= iters; - - printf("Gibbs done ...\n"); - fflush(NULL); - - printf("\n%-22s% 16.4f", "varlnpost:", varlnpost); - printf("\n%-22s% 16.4f", "avelnlike:", avelnlike); - printf("\n%-22s% 16.4f", "explnlike:", nd * (gsl_sf_psi(nd) - log(yt) - 1.0)); - printf("\n%-22s% 16.4f", "avelnprior:", avelnprior); - printf("\n%-22s% 16.4f", "explnprior:", alpha_0 * log(beta_0) - lgamma(alpha_0) - + (alpha_0-1.0) * (gsl_sf_psi(alpha_0 + nd) - log(beta_0+yt)) - - beta_0 * (alpha_0+nd)/(beta_0+yt) ); // analytical exact - printf("\n%-22s% 16.4f", "avelnprlk:", avelnprlk); - printf("\n%-22s% 16.4f", "avelnpost:", avelnpost); - printf("\n%-22s% 16.4f", "explnpost:", log(yt) - lgamma(nd) + gsl_sf_psi(nd)*(nd-1.0) - nd); - printf("\n%-22s% 16.4f", "sqrt avelnprlk2:", sqrt(avelnprlk2)); - printf("\n%-22s% 16.4f", "entropy ln post:", 0.5 * log(2.0 * M_PI * varlnpost * M_E)); - printf("\n%-22s% 16.4f", "posterior entropy:", - avelnpost); - printf("\n%-22s% 16.4f", "exp lnlik + entropy:", avelnlike - avelnpost); - printf("\n%-22s% 16.4f", "DIC:", avelnlike - varlnpost); - printf("\n%-22s% 16.4f", "exact exp ml:", avelnlike + avelnprior - avelnpost); - printf("\n\n"); - fflush(NULL); - - if (write_files == 1) - WriteChain("gibbs_expo_uni.txt", x, iters, 1); - - /* Gibbs with reference priors, calculate exp(lnpost) and exp(lnlike) */ - printf("Reference posterior Gibbs sampling ...\n"); - fflush(NULL); - - double pi_avelnlike = 0.0; - double pi_avelnpost = 0.0; - double lnposti, lnlikei; - - for (i = 0; i < iters; ++i) - { - musimj = gsl_ran_gamma(r2, nd, 1.0/yt); - lnlikei = nd*log(musimj) - musimj*yt; - lnposti = nd*log(yt) - lgamma(nd) - +(nd-1.0)*log(musimj) - - musimj*yt; - - pi_avelnlike += lnlikei; - pi_avelnpost += lnposti; - } - - printf("\n%-22s% 16.4f", "pi_avelnlike:", pi_avelnlike/iters); - printf("\n%-22s% 16.4f", "pi_avelnpost:", pi_avelnpost/iters); - printf("\n%-22s% 16.4f", "posterior pi-entropy:", -pi_avelnpost/iters); - printf("\n%-22s% 16.4f", "pi lnlik + entropy:", (pi_avelnlike - pi_avelnpost)/iters); - printf("\n\n"); - fflush(NULL); -} - - -void -Usage(void) -{ - printf("\n < BEGIN MARG > \n"); - printf("I===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-==I\n"); - printf(" Usage: \n"); - printf(" marg [options] \n\n"); - printf(" -b burnin (as a fraction) \n"); - printf(" -d # of dimensions in models \n"); - printf(" -e exponential models \n"); - printf(" -f write samples to file \n"); - printf(" -g Gaussian models \n"); - printf(" -H calculate entropy of data in file \n"); - printf(" -i # of samples or sampling iterations \n"); - printf(" -l lambda, prior precision \n"); - printf(" -n # of data points per dimension \n"); - printf(" -p parallel simulation \n"); - printf(" -s seed for random number generators \n"); - printf("I===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-==I\n"); - printf(" < END MARG > \n\n\n"); -// printf(" %s< END THESEUS %s >%s \n\n\n", -// tc_RED, VERSION, tc_NC); - fflush(NULL); -} - - -void -GetOpts(int argc, char *argv[]) -{ - int option; - - /* get the options */ - while ((option = getopt(argc, argv, "b:d:efgHi:l:m:n:ps:t:")) != -1) - { - switch (option) - { -/* - case 'P': - sscanf(optarg, "%lf:%lf:%lf:%lf:%lf:%lf:%lf:%lf:%lf:%lf:%lf:%lf", - &prior[0], &prior[1], &prior[2], &prior[3], - &prior[4], &prior[5], &prior[6], &prior[7], - &prior[8], &prior[9], &prior[10], &prior[11]); - for (i = 0; i < dim; ++i) - prior[i] *= 0.5; - break; -*/ - case 'b': - burnin = (double) strtod(optarg, NULL); - - if (burnin > 0.0 && burnin < 1.0) - burnin = 1.0 - burnin; - else - burnin = 0.5; - break; - - case 'd': - dim = (int) strtol(optarg, NULL, 10); - break; - - case 'e': - expo_model = 1; - break; - - case 'f': - write_files = 1; - break; - - case 'g': - gauss_model = 1; - break; - - case 'H': - entropy_calc = 1; - break; - - case 'i': - iters = (int) strtol(optarg, NULL, 10); - break; - - case 'l': - lambda_0 = (double) strtod(optarg, NULL); - break; - - case 'n': - ndata = (double) strtod(optarg, NULL); - break; - - case 'p': - parallel = 1; - break; - - case 's': - seed = (int) strtol(optarg, NULL, 10); - break; - - case 't': - thrdnum = (int) strtol(optarg, NULL, 10); - break; - - default: - perror("\n\n ERROR"); - fprintf(stderr, "\nBad option '-%c' \n", optopt); - Usage(); - exit(EXIT_FAILURE); - break; - } - } -} - - -int -main(int argc, char *argv[]) -{ - int i, narguments; - double hme, marglik; -// double ln2pi = log(2.0 * M_PI); - - const gsl_rng_type *T = NULL; - gsl_rng *r2 = NULL; - - if (argc == 1) - { - Usage(); - exit(EXIT_FAILURE); - } - - GetOpts(argc, argv); - - narguments = argc - optind; /* number of nonoption args */ - argv += optind; /* now argv is set with first arg = argv[0] */ - - thrdnum = dim; - - GibbsData **gibbsdata = malloc(thrdnum * sizeof(GibbsData *)); - pthread_t *callThd = malloc(thrdnum * sizeof(pthread_t)); - pthread_attr_t attr; - - pthread_attr_init(&attr); -/* pthread_mutexattr_t mattr; */ -/* pthread_mutexattr_init(&mattr); */ -/* pthread_mutexattr_settype(&mattr, PTHREAD_MUTEX_ERRORCHECK); */ -/* pthread_mutexattr_settype(&mattr, PTHREAD_MUTEX_NORMAL); */ -/* pthread_attr_getstacksize (&attr, &stacksize); */ -/* printf("\nDefault stack size = %d", (int) stacksize); */ - pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); - pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); - - for (i = 0; i < thrdnum; ++i) - gibbsdata[i] = malloc(sizeof(GibbsData)); - - nd = ndata * dim; - - gsl_rng_env_setup(); - if (seed == 0) - gsl_rng_default_seed = time(NULL); - else - gsl_rng_default_seed = seed; - T = gsl_rng_ranlxd2; - r2 = gsl_rng_alloc(T); - //gsl_rng_set (r2, 1); - - cov = MatAlloc(dim, dim); - lnpost = calloc(iters, sizeof(double)); - lnlike = calloc(iters, sizeof(double)); - lnprior = calloc(iters, sizeof(double)); - h = calloc(iters, sizeof(double)); - pave = calloc(dim, sizeof(double)); - x = MatAlloc(dim, iters); - data = calloc(dim, sizeof(double *)); - for (i = 0; i < dim; ++i) - data[i] = calloc(ndata, sizeof(double)); - y = calloc(dim, sizeof(double)); - musim = calloc(dim, sizeof(double)); - x2 = calloc(dim, sizeof(double)); - - /************************************************************************************/ - if (entropy_calc == 1) - { - double entropy; - - RandVec(x, dim, iters, r2); - - entropy = CalcEdgeworthVanHulleEntropy(x, dim, iters); - - printf("\n-d ln(n): %14.3f", -dim * log(ndata)); - printf("\nentropy: %14.3f", entropy); - printf ("\n\n"); - fflush(NULL); - - exit(EXIT_SUCCESS); - } - - /************************************************************************************/ - if (parallel == 1) - { - //SimGaussPth(data, gibbsdata, callThd, &attr, thrdnum); - SimExpoPth(data, gibbsdata, callThd, &attr, thrdnum); - } - else - { - //SimGauss(r2); - SimExpo(r2); - } - - CalcCumulants(); - - /************************************************************************************/ - if (gauss_model == 1) - { - printf("\n************************************************************************************"); - printf("\nHierarchical gaussian model:\n"); - double phi_0 = 1.0 / lambda_0; - GibbsGaussHierarch(phi_0, r2); - - CalcLaplaceMet(); - - printf("\n%-22s% 14d", "dim:", dim); - - hme = CalcHarmonicMean(lnlike, iters); - printf("\n%-22s% 16.4f\n", "hme:", hme); - fflush(NULL); - - /* gaussian model, hyperprior mu=0 */ - marglik = -0.5*(ndata*dim)* log(2.0*M_PI) - -0.5*log(ndata*dim*phi_0 + ndata + 1.0) - -0.5*(dim-1.0)*log(ndata + 1.0) - -0.5*x2t - +0.5*yt2/(ndata+1.0) - +0.5*phi_0 * yt * yt / ((ndata+1.0)*(ndata*dim*phi_0+ndata+1.0)); - - printf("\nanalytical marginal likelihood: %16.4f\n", marglik); - } - - /************************************************************************************/ - /* gaussian model */ - if (gauss_model == 1) - { - printf("\n************************************************************************************"); - printf("\nGaussian model:\n"); - GibbsGauss(lambda_0, r2); - CalcLaplaceMet(); - - printf("\n%-22s% 14d", "dim:", dim); - - hme = CalcHarmonicMean(lnlike, iters); - printf("\n%-22s% 16.4f\n", "hme:", hme); - fflush(NULL); - - // marglik = - 0.5 * dim * log(2.0 * M_PI) - 0.5 * dim * log(1.0 + nu); // SimGaussLP - - marglik = -0.5*(ndata*dim)* log(2.0*M_PI) - +0.5*dim*log(lambda_0/(lambda_0+ndata)) - -0.5*x2t - +0.5*yt2/(lambda_0+ndata); - - printf("\nanalytical marginal likelihood: %16.4f\n", marglik); - } - - /************************************************************************************/ - /* gaussian one-param model, unknown mu location parameter */ - if (gauss_model == 1) - { - printf("\n************************************************************************************"); - printf("\nGaussian one-parameter model:\n"); - GibbsGaussUni(lambda_0, r2); - CalcLaplaceMetUni(); - - hme = CalcHarmonicMean(lnlike, iters); - printf("\n%-22s% 16.4f\n", "hme:", hme); - fflush(NULL); - - marglik = 0.5*(- x2t + yt*yt/(lambda_0+nd) - - nd*log(2.0*M_PI) - + log(lambda_0/(lambda_0 + nd))); - - printf("\nanalytical marginal likelihood: %16.4f\n", marglik); - } - - /************************************************************************************/ - /* gaussian one-param model, known mu=0, unknown lambda precision parameter */ - if (gauss_model == 1) - { - printf("\n************************************************************************************"); - printf("\nGaussian one-parameter precision model:\n"); - GibbsGaussPrecUni(lambda_0, r2); - CalcLaplaceMetUni(); - - hme = CalcHarmonicMean(lnlike, iters); - printf("\n%-22s% 16.4f\n", "hme:", hme); - fflush(NULL); - - /* gaussian model */ - double atmp, btmp; - - atmp = 1.0+0.5*nd; - btmp = lambda_0 + 0.5*x2t; - - marglik = - 0.5*nd*log(2.0*M_PI) +log(lambda_0) - atmp*log(btmp) + lgamma(atmp); - printf("\nanalytical marginal likelihood (normal mu=0): %16.4f\n", marglik); - } - - /************************************************************************************/ - /* Exponential model */ - if (expo_model == 1) - { - printf("\n************************************************************************************"); - printf("\nExponential model:\n"); - GibbsExpo(lambda_0, r2); - CalcLaplaceMet(); - - printf("\n%-22s% 14d", "dim:", dim); - - hme = CalcHarmonicMean(lnlike, iters); - printf("\n%-22s% 16.4f\n", "hme:", hme); - fflush(NULL); - - marglik = 0.0; - for (i = 0; i < dim; ++i) - marglik += log(lambda_0+y[i]); - - marglik = dim *(lgamma(lambda_0+ndata)-lgamma(lambda_0)+lambda_0*log(lambda_0)) - - (lambda_0+ndata)*marglik; - - printf("\nanalytical marginal likelihood: %16.4f\n", marglik); - } - - /************************************************************************************/ - /* Exponential one-param model */ - if (expo_model == 1) - { - printf("\n************************************************************************************"); - printf("\nExponential one-parameter model:\n"); - GibbsExpoUni(lambda_0, r2); - CalcLaplaceMetUni(); - - hme = CalcHarmonicMean(lnlike, iters); - printf("\n%-22s% 16.4f\n", "hme:", hme); - fflush(NULL); - - marglik = 0.0; - for (i = 0; i < dim; ++i) - marglik += log(lambda_0+y[i]); - - marglik = lgamma(lambda_0+nd) - lgamma(lambda_0) + lambda_0*log(lambda_0) - - (lambda_0+nd)*log(lambda_0+yt); - - printf("\nanalytical marginal likelihood: %16.4f\n", marglik); - } - - /************************************************************************************/ -// printf("\nwriting files ...\n\n"); - printf("\n"); - fflush(NULL); - -// fp = fopen("lnL.txt" ,"w"); -// -// for (i = 0; i < iters; ++i) -// fprintf(fp, "%-12.3f\n", lnlike[i]); -// -// fprintf(fp, "\n\n"); -// fclose(fp); - - /************************************************************************************/ - for (i = 0; i < dim; ++i) - free(data[i]); - free(data); - MatDestroy(&x); - free(lnpost); - free(lnprior); - free(h); - free(pave); - free(y); - free(musim); - free(x2); - MatDestroy(&cov); - - pthread_attr_destroy(&attr); - for (i = 0; i < thrdnum; ++i) - free(gibbsdata[i]); - free(gibbsdata); - free(callThd); - - gsl_rng_free(r2); - r2 = NULL; - - exit(EXIT_SUCCESS); -} - - Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/._GibbsMet.c and /tmp/g2bOMTRwaC/theseus-3.0.0/._GibbsMet.c differ diff -Nru theseus-2.0.6/GibbsMet.c theseus-3.0.0/GibbsMet.c --- theseus-2.0.6/GibbsMet.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/GibbsMet.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -30,6 +30,8 @@ #include #include #include +#include +#include #include "Error.h" #include "pdbMalloc.h" #include "pdbStats.h" @@ -44,6 +46,7 @@ #include "ProcGSLSVD.h" #include "DLTmath.h" #include "libdistfit/vonmises_dist.h" +#include "myassert.h" #include #include #include @@ -51,13 +54,6 @@ static double -CalcLogLGibbs(CdsArray *cdsA); - -double -scale_met3(const double n, const double gamma, const double phi, const double x, const gsl_rng *r2, double loc, double width); - - -double ScaleMax(const double n, const double gamma, const double phi) { return((gamma + sqrt(gamma*gamma + 4.0*phi*(n-1.0)))/(2.0*phi)); @@ -67,7 +63,7 @@ /* Parabolic cylinder function D_n(z) See Abramowitz and Stegun p 510, 13.6.36 also see Ch 19, 19.3, 19.5.1, etc. - The parabolic cylinder function a type of conßuent hypergeometric function, + The parabolic cylinder function a type of conßuent hypergeometric function, deÞned in Gradshteyn and Ryzhik p 1028, section 9.24-9.25. http://mathworld.wolfram.com/ParabolicCylinderFunction.html @@ -76,92 +72,24 @@ http://mathworld.wolfram.com/ConfluentHypergeometricFunctionoftheSecondKind.html */ -/* NB: THIS IS BROKEN. gsl_sf_hyperg_U doesn't work for some large arguments -- I'm unsure exactly which, - but it sucks and makes the fxn useless for me. +/* NB: THIS IS BROKEN. gsl_sf_hyperg_U doesn't work for some large arguments -- + I'm unsure exactly which, but it sucks and makes the fxn useless for me. It does, however, successfully reproduce ALL the tables in A&S (pp 702-710). */ -double +static double CalcDnz(const double n, const double z) { return(pow(2.0, 0.5 * n) * exp(-0.25 * z*z ) * gsl_sf_hyperg_U(-0.5 * n, 0.5, 0.5 * z*z)); } -/* NB: This is broken for large arguments, because CalcDnz is broken */ -double +/* NB: This is broken for large arguments, because CalcDnz is broken. */ +static double CalcUax(const double a, const double x) { return(CalcDnz(-a-0.5, x)); } -double -CalcHalfNormChiLik(const double x, const double n, const double gamma, const double phi) -{ - if (x < DBL_MIN) - { - return(0.0); - } - else - { - double logp = (n-1.0) * log(x) - (0.5 * phi * x * x) + (gamma * x); - return(exp(logp)); -// return(pow(x, n-1.0) * exp((-0.5 * phi * x * x) + (gamma * x))); - } -} - - -/* Calculates the normalizing constant for the scale factor PDF: - - P(x) \propto x^(n-1) e^-(phi/2 x^2 - gamma x) - - The integral for this can be found in Gradshteyn and Ryzhik, - p. 365, formula 3.462(1). -*/ -double -CalcNormConst(const double n, const double gamma, const double phi) -{ - double tmpx; - - tmpx = (pow(phi, -0.5 * n) * exp(gamma*gamma / (4.0 * phi))) * - (tgamma(n) * CalcDnz(-n, -gamma / sqrt(phi))); - -// tmpx = (pow(phi, -0.5 * n) * exp(gamma*gamma / (4.0 * phi))) * -// (tgamma(n) * CalcUab_large_a(n-0.5, -gamma / sqrt(phi))); - - return(1.0/tmpx); -} - - -double -CalcNormConstMm(const double n, const double gamma, const double phi) -{ - double tmpx; - - tmpx = pow(2.0, 0.5*(n-3.0)) * pow(phi,-0.5*(n+1.0)) - * ( - sqrt(2.0*phi) * tgamma(0.5*n) * gsl_sf_hyperg_1F1(0.5*n, 0.5, 0.5*gamma*gamma/phi) - + 2.0 * gamma * tgamma(0.5*(n+1.0)) * gsl_sf_hyperg_1F1 (0.5*(n+1.0), 1.5, 0.5*gamma*gamma/phi) - ); - - return(1.0/tmpx); -} - - -double -CalcHalfNormChi(const double x, const double n, const double gamma, const double phi) -{ - return(CalcHalfNormChiLik(x, n, gamma, phi) * CalcNormConstMm(n, gamma, phi)); -} - - -double -ExpectScale(const double n, const double gamma, const double phi) -{ - return((n+1.0) * CalcUax(n+0.5, -gamma/sqrt(phi))/(sqrt(phi)*CalcUax(n-0.5,-gamma/sqrt(phi)))); - //return((n+1.0) * CalcDnz(-n-1.0, -gamma/sqrt(phi))/(sqrt(phi)*CalcDnz(-n,-gamma/sqrt(phi)))); -} - - static void AveCdsGibbs(CdsArray *cdsA) { @@ -170,8 +98,8 @@ *avey = cdsA->avecds->y, *avez = cdsA->avecds->z; const int cnum = cdsA->cnum, vlen = cdsA->vlen; - const Cds **cds = (const Cds **) cdsA->cds; - Cds *cdsj = NULL; + const Cds **cds = (const Cds **) cdsA->cds; + Cds *cdsj = NULL; double invcnum = 1.0 / (double) cnum; memset(avex, 0, vlen * sizeof(double)); @@ -205,7 +133,7 @@ double trace, s, w, x, y, z; /* convert to quaternion */ - trace = rot[0][0] + rot[1][1] + rot[2][2] + 1.0; + trace = rot[0][0] + rot[1][1] + rot[2][2] + 1.0; if( trace > FLT_EPSILON ) { @@ -250,92 +178,6 @@ } -static void -CdsInnProd2(Cds *cds, const double *wts) -{ - /* (i x k)(k x j) = (i x j) */ - /* (3 x N)(N x 3) = (3 x 3) */ - int k; - double **innerprod2 = NULL; - const double *x = (const double *) cds->x, - *y = (const double *) cds->y, - *z = (const double *) cds->z; - double xk, yk, zk, wtsi; - - if (cds->innerprod2 == NULL) - innerprod2 = cds->innerprod2 = MatAlloc(3, 3); - else - innerprod2 = cds->innerprod2; - - memset(&innerprod2[0][0], 0, 9 * sizeof(double)); - - for (k = 0; k < cds->vlen; ++k) - { - wtsi = wts[k]; - - xk = x[k]; - yk = y[k]; - zk = z[k]; - - innerprod2[0][0] += (xk * xk) * wtsi; - innerprod2[1][1] += (yk * yk) * wtsi; - innerprod2[2][2] += (zk * zk) * wtsi; - innerprod2[0][1] += (xk * yk) * wtsi; - innerprod2[0][2] += (xk * zk) * wtsi; - innerprod2[1][2] += (yk * zk) * wtsi; - } - - innerprod2[1][0] = innerprod2[0][1]; - innerprod2[2][0] = innerprod2[0][2]; - innerprod2[2][1] = innerprod2[1][2]; - - printf("tr(X'X) = % e\n", innerprod2[0][0] + innerprod2[1][1] + innerprod2[2][2]); - - /* Mat3Print(innerprod2); */ -} - - -static void -CalcCdsPrincAxesGibbs(Cds *cds, double **rotmat, const double *wts) -{ - double *evals = (double *) malloc(3 * sizeof(double)); - double det; - int i, j; - - CdsInnProd2(cds, wts); - jacobi3_cyc(cds->innerprod2, evals, rotmat, 1e-8); -// eigensym((const double **) cds->innerprod2, evals, rotmat, 3); -// Mat3TransposeIp(rotmat); - EigenSort3b(rotmat, evals); -// printf("\nevals:\n"); - //Mat3TransposeIp(rotmat); -// VecPrint(evals, 3); -// printf("\nCalcCdsPrincAxes A:"); -// Mat3Print(rotmat); - - det = Mat3Det((const double **) rotmat); - - if (det < 0) - { -// printf("\nNEGATIVE DETERMINANT\n"); - for (i = 0; i < 3; ++i) - { - if (rotmat[i][i] < 0) - { - for (j = 0; j < 3; ++j) - rotmat[i][j] *= -1.0; - - break; - } - } - -// Mat3Print(rotmat); - } - - free(evals); -} - - static double wrap_nPI_pPI(double x) { @@ -349,14 +191,14 @@ } -static double +static double mardia_gadsden_target_ratio(const double a, const double b, const double x, const double y) { return(exp(a * (cos(y) - cos(x)) + b * (sin(y) - sin(x))) * cos(y) / cos(x)); } -double +static double mardia_gadsden_met3(const double a, const double b, const double x, const gsl_rng *r2, const double width) { double r, y, u; @@ -364,7 +206,7 @@ //y = x + (2.0 * width * gsl_rng_uniform(r2) - width); //y = normal_dev3(x, width, r2); - y = x + gsl_ran_gaussian(r2, width); + y = x + gsl_ran_gaussian_ziggurat(r2, width); y = wrap_nPI_pPI(y); r = mardia_gadsden_target_ratio(a, b, x, y); u = gsl_rng_uniform(r2); @@ -385,56 +227,37 @@ } -static double -scale_target_ratio(const double n, const double gamma, const double phi, const double x, const double y) -{ - return(exp(-0.5*phi*(y*y - x*x) + gamma*(y-x)) * pow(y/x, n-1.0)); -} - - -double -scale_met3(const double n, const double gamma, const double phi, const double x, const gsl_rng *r2, double loc, double width) +static double +scale_log_target_ratio(const double n, const double gamma, const double phi, const double x, const double y) { - double r, y, u; - - //y = x + (2.0 * width * gsl_rng_uniform(r2) - width); - //y = loc + gsl_ran_gaussian(r2, 3.0 * width); - y = x + gsl_ran_gaussian(r2, width); - //y = x + normal_dev3(0.0, width, r2); - if (y < 0.0) - y = -y; - r = scale_target_ratio(n, gamma, phi, x, y); - u = gsl_rng_uniform(r2); - - //printf("\nMETROPOLIS: % e % e -- % f % f", x, y, r, u); - - if (u < r) + if (y <= 0.0) { - //printf("\nACCEPT: 1"); - return(y); + return(-INFINITY); } else { - //printf("\nACCEPT: 0"); - return(x); - } -} + double prior; + double hngamma = -0.5*phi*(y*y - x*x) + gamma*(y-x) + (n-1.0) * log(y/x); -static double -scale_log_target_ratio(const double n, const double gamma, const double phi, const double x, const double y) -{ - double k = 2.0; - double theta = 1.0/2.0; - double hngamma = -0.5*phi*(y*y - x*x) + gamma*(y-x) + (n-1.0) * log(y/x); - //hngamma = 0.0; - double prior = (k-1.0) * log(y/x) - (y-x)/theta; // gamma prior - - return(hngamma + prior); + // gamma prior + //double k = 2.0; + //double theta = 1.0 / 2.0; + //prior = (k-1.0) * log(y/x) - (y-x)/theta; // gamma prior + + // lognormal prior on beta + //double sigma = 0.1; + //prior = -log(y/x) - (log(y)*log(y)-log(x)*log(x))/(2.0*sigma); + + // Jeffrey's prior + prior = -log(y/x); + + return(hngamma + prior); + } } -double +static double scale_log_met3(const double n, const double gamma, const double phi, double x, const gsl_rng *r2, const double loc, const double width, const int iters) { double r, y, u; @@ -444,15 +267,13 @@ for (i = 0; i < iters; ++i) { //y = x + (2.0 * width * gsl_rng_uniform(r2) - width); - //y = loc + gsl_ran_gaussian(r2, 3.0 * width); - jit = gsl_ran_gaussian(r2, 3.0 * width); - //y = x + gsl_ran_gaussian(r2, width); + //y = loc + gsl_ran_gaussian_ziggurat(r2, 3.0 * width); + jit = gsl_ran_gaussian_ziggurat(r2, 3.0 * width); + //y = x + gsl_ran_gaussian_ziggurat(r2, width); //jit = gsl_rng_uniform(r2) * 0.6 - 0.3; - //jit = gsl_ran_gaussian(r2, 0.7); + //jit = gsl_ran_gaussian_ziggurat(r2, 0.7); y = x + jit; - if (y < 0.0) - y = -y; r = scale_log_target_ratio(n, gamma, phi, x, y); u = log(gsl_rng_uniform(r2)); @@ -475,7 +296,7 @@ /* KVM -For simplicity of presentation take phi =1 +For simplicity of presentation take phi = 1 */ double scale_rejection(const double r, const double gamma, const double phi, const gsl_rng *r2) @@ -501,37 +322,84 @@ } -/* D. J. Best and N. I. Fisher (1979) - "Efficient simulation of the von Mises distribution." - Applied Statistics 28:152Ð157. */ +// Sample from a von Mises distribution +// +// http://phaistos.sourceforge.net/doxygen/vonmises_8cpp_source.html +// This is Hamelryck's version from PHAISTOS, which should be golden ... +// I've checked this against Fisher 1993 myself. +// +// Return a von Mises distribution pseudo-random variate on [-pi, +pi]. +// The implementation is similar to the algorithm by Best and Fisher, +// 1979; +// see N.I. Fisher, _Statistical Analysis of Circular Data_, +// Cambridge University Press, 1993, p. 49. +// Also reproduced in Mardia and Jupp, _Directional Statistics_, 2000, p 43. +// +// I added checks for when k is very small or very large. +// +// mu = mean, k = scale double -vonmises_dev3(const double a, const double b, const gsl_rng *r2) +vonmises_dev4(double mu, double k, const gsl_rng *r2) { - double z, f, c, t, p, r; + double U1, z, c, U2, U3; - t = 1.0 + sqrt(1.0 + 4.0*b*b); - p = 0.5*(t - sqrt(2.0*t))/b; - r = 0.5*(1.0 + p*p)/p; +// if (mu < -M_PI || mu > M_PI) +// { +// fprintf(stderr, "VonMises Error: mu must be in the interval [-pi,pi]. mu=%f\n", mu); +// } - do + // von Mises converges to constant mu as k -> inf + if (k >= (DBL_MAX-1)/2) + return(fmod(mu, 2.0*M_PI)); + + // Set sampling internals + double a = 1.0 + sqrt(1.0 + 4.0*k*k); + double b = (a - sqrt(2*a)) / (2*k); + double r = (1.0 + b*b)/(2*b); + + // von Mises converges to uniform [-pi, pi] as k -> 0 + if (k <= 0.0 || !isfinite(b) || !isfinite(r)) + return((2.0*gsl_rng_uniform(r2)-1.0) * M_PI); + + double f, res; + + while(1) { - z = cos(MY_PI * gsl_rng_uniform(r2)); + U1 = gsl_rng_uniform(r2); + z = cos(M_PI * U1); f = (1.0 + r*z)/(r + z); - c = b*(r-f); + c = k * (r - f); + U2 = gsl_rng_uniform(r2); + + if (((c*(2.0-c) - U2) > 0.0) || ((log(c/U2) + 1.0 - c >= 0.0))) + { + break; // accept + } } - while(log(c/gsl_rng_uniform(r2)) + 1.0 < c); - if (gsl_rng_uniform(r2) > 0.5) - return(wrap_nPI_pPI(a + acos(f))); + U3 = gsl_rng_uniform(r2); + + if (U3 > 0.5) + { + res = fmod(acos(f)+mu, 2.0*M_PI); + } else - return(wrap_nPI_pPI(a - acos(f))); + { + res = fmod(-acos(f)+mu, 2.0*M_PI); + } + + return(res); } +/* http://www.gnu.org/software/gsl/manual/html_node/The-Gamma-Distribution.html + double gsl_ran_gamma (const gsl_rng * r, double a, double b) + p(x) dx = {1 \over \Gamma(a) b^a} x^{a-1} e^{-x/b} dx +*/ static double -invgamma_dev4(const double b, const double c, const gsl_rng *r2) +invgamma_dev4(const double b, const double a, const gsl_rng *r2) { - return(1.0 / gsl_ran_gamma(r2, c, 1.0/b)); + return(1.0 / gsl_ran_gamma(r2, a, 1.0/b)); } @@ -544,8 +412,8 @@ double variance; const int cnum = cdsA->cnum, vlen = cdsA->vlen; double *var = cdsA->var; - const Cds **cds = (const Cds **) cdsA->cds; - Cds *cdsj = NULL; + const Cds **cds = (const Cds **) cdsA->cds; + Cds *cdsj = NULL; const double *avex = (const double *) cdsA->avecds->x, *avey = (const double *) cdsA->avecds->y, *avez = (const double *) cdsA->avecds->z; @@ -568,480 +436,180 @@ } variance /= (double) vlen; - cdsA->stats->stddev = sqrt(variance); - cdsA->stats->var = variance; + stats->stddev = sqrt(variance); + stats->var = variance; } static void -MetScale(CdsArray *cdsA, const gsl_rng *r2) +GibbsTrans(CdsArray *cdsA, const gsl_rng *r2) { - int i; + int i, j; const int cnum = cdsA->cnum, vlen = cdsA->vlen; - const int n = 3.0 * vlen + 1.0; - double phi, gamma; + double var = stats->var; + double tvar, tvarbeta; + double newcen[3]; Cds *cdsi = NULL; - Cds *avecds = cdsA->avecds; - double sm, width, oldscale; - double var = cdsA->stats->var; - int skip = 7; - double priorg = 0.0; + Cds **cds = cdsA->cds; - double sum = 0.0; - for (i = 0; i < cnum; ++i) - sum += cdsA->cds[i]->scale; + tvar = sqrt(var / vlen); - sum /= cnum; - printf("\nave: % f", sum); +// printf("\ntvar: % e", tvar); +// fflush(NULL); for (i = 0; i < cnum; ++i) { - cdsi = cdsA->cds[i]; - oldscale = cdsi->scale; + cdsi = cds[i]; + CenMass2((const double **) cdsi->sc, + vlen, cdsi->center); + + // For an in-place algorithm where X has already been scaled and rotated, + // we don't need to scale and rotate the mean center. + // For out-of-place, we have to apply the inverse scale and inverse + // rotation to M. - /* here we have to undo the effects of the previous scaling step, for both phi and gamma - phi is the self inner prod, so the scale is squared */ - phi = TrCdsInnerProd(cdsi, vlen) / (var * oldscale * oldscale); - gamma = TrCdsInnerProd2(cdsi, avecds, vlen) / (var * oldscale) - priorg; - sm = ScaleMax(n, gamma, phi); - width = sqrt(1.0 / (phi + (n-1.0)/(sm*sm))); + InvRotVec(&newcen[0], cdsA->avecds->center, cdsi->matrix); -/* printf("\n trans[%d]", i+1); */ -/* printf("\nB trans[%d]: % f % f % f", i+1, */ -/* cdsi->center[0], cdsi->center[1], cdsi->center[2]); */ + for (j = 0; j < 3; ++j) + cdsA->avecds->center[j] = newcen[j] / cdsi->scale; +// +// printf("\n trans[%d]", i+1); +// printf("\nB trans[%d]: % f % f % f", i+1, +// cdsi->center[0], cdsi->center[1], cdsi->center[2]); +// fflush(NULL); - cdsi->scale = scale_log_met3(n, gamma, phi, oldscale, r2, sm, width, skip); - ScaleCds(cdsi, cdsi->scale / oldscale); + tvarbeta = tvar/cdsi->scale; + //tvarbeta = tvar; - //printf("\nscale[%3d]: % f % f % f % f % f", i+1, cdsi->scale, phi, gamma, sm, width); - //fflush(NULL); + cdsi->center[0] -= cdsA->avecds->center[0]; + cdsi->center[1] -= cdsA->avecds->center[1]; + cdsi->center[2] -= cdsA->avecds->center[2]; + + cdsi->center[0] += gsl_ran_gaussian_ziggurat(r2, tvarbeta); + cdsi->center[1] += gsl_ran_gaussian_ziggurat(r2, tvarbeta); + cdsi->center[2] += gsl_ran_gaussian_ziggurat(r2, tvarbeta); + +// printf("\nA trans[%d]: % f % f % f\n", i+1, +// cdsA->cds[i]->center[0], cdsA->cds[i]->center[1], cdsA->cds[i]->center[2]); +// fflush(NULL); + + //ApplyCenterIp(cdsi); + //NegTransCdsIp(cdsi, cdsi->center); + + TranslateCdsOp2(cdsi->wc, + (const double **) cdsi->sc, + vlen, + (const double *) cdsi->center); } } static void -MetScaleDiag(CdsArray *cdsA, const gsl_rng *r2) +GibbsTransDiag(CdsArray *cdsA, const gsl_rng *r2) { - int i; + int i, j; const int cnum = cdsA->cnum, vlen = cdsA->vlen; - const int n = 3.0 * vlen + 1.0; - double phi, gamma; + double tvar, invtr, tvarbeta; + double newcen[3]; Cds *cdsi = NULL; - Cds *avecds = cdsA->avecds; - double *wts = cdsA->w; - double sm, width, oldscale; - int skip = 7; - double priorg = 0.0; // set to 1.0 for exponential prior mean = 1 + Cds **cds = cdsA->cds; - double sum = 0.0; - for (i = 0; i < cnum; ++i) - sum += cdsA->cds[i]->scale; + invtr = 0.0; + for (i = 0; i < vlen; ++i) + { + //cdsA->w[i] = 1.0 / cdsA->var[i]; + invtr += cdsA->w[i]; + } - sum /= cnum; - printf("\nave: % f", sum); + tvar = sqrt(1.0 / invtr); - sum = 0.0; - for (i = 0; i < cnum; ++i) - sum += log(cdsA->cds[i]->scale); +// printf("\ntvar: %e", tvar); - sum /= cnum; - printf("\nave log: % f", sum); +/* for (i = 0; i < vlen; ++i) */ +/* printf("\n vartrans[%3d]: % 11.5f % 11.5f", i+1, cdsA->var[i], cdsA->w[i]); */ for (i = 0; i < cnum; ++i) { - cdsi = cdsA->cds[i]; - oldscale = cdsi->scale; - - /* here we have to undo the effects of the previous scaling step, for both phi and gamma - phi is the self inner prod, so the scale is squared */ - phi = TrCdsInnerProdWt(cdsi, vlen, wts) / (oldscale * oldscale); - gamma = TrCdsInnerProdWt2(cdsi, avecds, vlen, wts) / oldscale - priorg; - sm = ScaleMax(n, gamma, phi); - width = sqrt(1.0 / (phi + (n-1.0)/(sm*sm))); - -/* printf("\n trans[%d]", i+1); */ -/* printf("\nB trans[%d]: % f % f % f", i+1, */ -/* cdsi->center[0], cdsi->center[1], cdsi->center[2]); */ - - cdsi->scale = scale_log_met3(n, gamma, phi, oldscale, r2, sm, width, skip); - //skip = 1; - //cdsi->scale = scale_rejection(n, gamma, phi, r2); - ScaleCds(cdsi, cdsi->scale / oldscale); + cdsi = cds[i]; + CenMassWt2((const double **) cdsi->sc, + (const double *) cdsA->w, + vlen, + cdsi->center); + + // For an in-place algorithm where X has already been scaled and rotated, + // we don't need to scale and rotate the mean center. + // For out-of-place, we have to apply the inverse scale and inverse + // rotation to M. - //printf("\n(diag) scale[%3d]: % f % f % f % f % f", i+1, cdsi->scale, phi, gamma, sm, width); - //fflush(NULL); - } -} + InvRotVec(&newcen[0], cdsA->avecds->center, cdsi->matrix); + for (j = 0; j < 3; ++j) + cdsA->avecds->center[j] = newcen[j] / cdsi->scale; +// +// printf("\n trans[%d]", i+1); +// printf("\nB trans[%d]: % f % f % f", i+1, +// cdsi->center[0], cdsi->center[1], cdsi->center[2]); +// fflush(NULL); -static void -GibbsVar(CdsArray *cdsA, const gsl_rng *r2) -{ - double var; - const int cnum = cdsA->cnum, vlen = cdsA->vlen; + tvarbeta = tvar/cdsi->scale; + //tvarbeta = tvar; - VarCds(cdsA); - var = cdsA->stats->var; - //printf("\nB var: % e", var); + cdsi->center[0] -= cdsA->avecds->center[0]; + cdsi->center[1] -= cdsA->avecds->center[1]; + cdsi->center[2] -= cdsA->avecds->center[2]; + + cdsi->center[0] += gsl_ran_gaussian_ziggurat(r2, tvarbeta); + cdsi->center[1] += gsl_ran_gaussian_ziggurat(r2, tvarbeta); + cdsi->center[2] += gsl_ran_gaussian_ziggurat(r2, tvarbeta); - cdsA->stats->var = invgamma_dev4(1.5 * cnum * vlen * var, 1.5 * cnum * vlen, r2); +/* printf("\nA trans[%d]: % f % f % f\n", i+1, */ +/* cdsi->center[0], cdsi->center[1], cdsi->center[2]); */ +/* fflush(NULL); */ - //printf("\nA var: % e % e", cdsA->stats->var, cdsA->stats->stddev); - //fflush(NULL); + TranslateCdsOp2(cdsi->wc, + (const double **) cdsi->sc, + vlen, + (const double *) cdsi->center); + //NegTransCdsIp(cdsi, cdsi->center); + } } +/* A = (cds1' * cds2)' */ +/* Actually retruns the transpose of A */ static void -GibbsPhi(CdsArray *cdsA, const gsl_rng *r2) +CdsInnerProduct(double *A, Cds *cds1, Cds *cds2, const int vlen, const double *weight) { - const int vlen = cdsA->vlen; - double invtr, a, b; + double x1, x2, y1, y2, z1, z2; int i; + const double *fx1 = cds1->x, *fy1 = cds1->y, *fz1 = cds1->z; + const double *fx2 = cds2->x, *fy2 = cds2->y, *fz2 = cds2->z; - invtr = 0.0; - for (i = 0; i < vlen; ++i) - invtr += 1.0 / cdsA->var[i]; + memset(A, 0, 9 * sizeof(double)); - a = 0.5 * (vlen + 2.0); - b = 2.0 / (invtr + 2.0 / cdsA->stats->alpha); + if (weight != NULL) + { + for (i = 0; i < vlen; ++i) + { + x1 = weight[i] * fx1[i]; + y1 = weight[i] * fy1[i]; + z1 = weight[i] * fz1[i]; - cdsA->stats->phi = gsl_ran_gamma(r2, a, b); + x2 = fx2[i]; + y2 = fy2[i]; + z2 = fz2[i]; -// printf("\nA phi: %e %e %e %e", cdsA->stats->phi, b, vlen / invtr, sqrt(vlen / invtr)); -// fflush(NULL); -} + A[0] += (x1 * x2); + A[1] += (y1 * x2); + A[2] += (z1 * x2); - -static void -GibbsVarDiag(CdsArray *cdsA, const gsl_rng *r2) -{ - double phi = cdsA->stats->phi; - const int cnum = cdsA->cnum, vlen = cdsA->vlen; - int i; - - VarCds(cdsA); - - for (i = 0; i < vlen; ++i) - { - //printf("\nvar[%d]: %e %e %e", i, cdsA->var[i], 3.0 * cnum * cdsA->var[i], (3.0 * cnum * cdsA->var[i] + phi)); - cdsA->var[i] = invgamma_dev4(0.5 * (3.0 * cnum * cdsA->var[i] + phi), 1.5 * cnum + 0.5, r2); - //cdsA->var[i] = invgamma_dev4(0.5 * (3.0 * cnum * cdsA->var[i]), 1.5 * cnum + 0.5, r2); -// printf(" %e", cdsA->var[i]); - } - - for (i = 0; i < vlen; ++i) - cdsA->w[i] = 1.0 / cdsA->var[i]; -} - - -static void -GibbsMean(CdsArray *cdsA, const gsl_rng *r2) -{ - int i; - double *avex = cdsA->avecds->x, - *avey = cdsA->avecds->y, - *avez = cdsA->avecds->z; - const int cnum = cdsA->cnum, vlen = cdsA->vlen; - double mvar = sqrt(cdsA->stats->var / cnum); - -/* printf("\nmvar: % e", mvar); */ - - AveCdsGibbs(cdsA); - - for (i = 0; i < vlen; ++i) - { - avex[i] += gsl_ran_gaussian(r2, mvar); - avey[i] += gsl_ran_gaussian(r2, mvar); - avez[i] += gsl_ran_gaussian(r2, mvar); - } - - - CenMass(cdsA->avecds); - -/* - ApplyCenterIp(cdsA->avecds); - - for (i = 0; i < cnum; ++i) - NegTransCdsIp(cdsA->cds[i], cdsA->avecds->center); - */ - -} - - -static void -GibbsMeanDiag(CdsArray *cdsA, const gsl_rng *r2) -{ - int i; - Cds *avecds = cdsA->avecds; - double *avex = avecds->x, - *avey = avecds->y, - *avez = avecds->z; - const int cnum = cdsA->cnum, vlen = cdsA->vlen; - double mvar; - - AveCdsGibbs(cdsA); - - for (i = 0; i < vlen; ++i) - { - mvar = sqrt(cdsA->var[i] / (cnum)); - - avex[i] += gsl_ran_gaussian(r2, mvar); - avey[i] += gsl_ran_gaussian(r2, mvar); - avez[i] += gsl_ran_gaussian(r2, mvar); - //avez[i] = 0.0; - } - - CenMassWtIp(avecds, cdsA->w); - - printf("\ntrans: % f % f % f", - cdsA->avecds->center[0], cdsA->avecds->center[1], cdsA->avecds->center[2]); - fflush(NULL); - -/* - ApplyCenterIp(avecds); - - for (i = 0; i < cnum; ++i) - NegTransCdsIp(cdsA->cds[i], cdsA->avecds->center); - */ - - -// printf("\nmean trans[%d]: % f % f % f", i+1, -// avecds->center[0], avecds->center[1], avecds->center[2]); -// fflush(NULL); -} - - -static void -GibbsMeanWrite(CdsArray *cdsA, const gsl_rng *r2, const int iter) -{ - int i; -// Cds *avecds = cdsA->avecds; -// const int vlen = cdsA->vlen; - const int cnum = cdsA->cnum; - char filename[256], avename[256]; - -/* - for (i = 0; i < vlen; ++i) - cdsA->w[i] = 1.0 / cdsA->stats->var; - */ - -// AveCdsGibbs(cdsA); -// GibbsMean(cdsA, r2); -// -// CenMass(avecds); -// ApplyCenterIp(avecds); - -/* - CalcCdsPrincAxesGibbs(avecds, avecds->matrix, cdsA->w); - Mat3TransposeIp(avecds->matrix); - RotateCdsIp(avecds, (const double **) avecds->matrix); - - for (i = 0; i < cnum; ++i) - RotateCdsIp(cdsA->cds[i], (const double **) avecds->matrix); - */ - - - double sum = 0.0; - - for (i = 0; i < cnum; ++i) - sum += log(cdsA->cds[i]->scale); - - sum = exp(sum/cnum); - - for (i = 0; i < cnum; ++i) - ScaleCds(cdsA->cds[i], 1.0/sum); - - sprintf(filename, "%s%05d.pdb", "gibbs_", iter); -/* printf("\nWriting %s", filename); */ -/* fflush(NULL); */ - WriteTheseusCdsModelFile(cdsA, filename); - sprintf(avename, "%s%05d.pdb", "gibbs_ave_", iter); - WriteAveCds(cdsA, avename); - - for (i = 0; i < cnum; ++i) - ScaleCds(cdsA->cds[i], sum); - - GibbsMean(cdsA, r2); -} - - -static void -GibbsMeanDiagWrite(CdsArray *cdsA, const gsl_rng *r2, const int iter) -{ - //int i; - //Cds *avecds = cdsA->avecds; - //const int vlen = cdsA->vlen; - //const int cnum = cdsA->cnum; - char filename[256], avename[256]; - - //AveCdsGibbs(cdsA); - GibbsMeanDiag(cdsA, r2); - -// for (i = 0; i < vlen; ++i) -// cdsA->w[i] = 1.0; -// cdsA->w[i] = 1.0 / cdsA->var[i]; -// -// CenMassWtIp(avecds, cdsA->w); -// ApplyCenterIp(avecds); - -/* - CalcCdsPrincAxesGibbs(avecds, avecds->matrix, cdsA->w); - Mat3TransposeIp(avecds->matrix); - RotateCdsIp(avecds, (const double **) avecds->matrix); - - for (i = 0; i < cnum; ++i) - RotateCdsIp(cdsA->cds[i], (const double **) avecds->matrix); - */ - - -// for (i = 0; i < vlen; ++i) -// cdsA->w[i] = 1.0 / cdsA->var[i]; - - sprintf(filename, "%s%05d.pdb", "gibbs_", iter); -/* printf("\nWriting %s", filename); */ -/* fflush(NULL); */ - WriteTheseusCdsModelFile(cdsA, filename); - sprintf(avename, "%s%05d.pdb", "gibbs_ave_", iter); - WriteAveCds(cdsA, avename); - - GibbsMeanDiag(cdsA, r2); -} - - -void -ModCenMass(Cds *cdsi, double *avecen) -{ - cdsi->center[0] -= avecen[0]; - cdsi->center[1] -= avecen[1]; - cdsi->center[2] -= avecen[2]; -} - - -static void -GibbsTrans(CdsArray *cdsA, const gsl_rng *r2) -{ - int i; - const int cnum = cdsA->cnum, vlen = cdsA->vlen; - double var = cdsA->stats->var; - double tvar, tvarbeta; - //double newcen[3]; - Cds *cdsi = NULL; - - tvar = sqrt(var / vlen); - -/* printf("\ntvar: % e", tvar); */ -/* fflush(NULL); */ - - for (i = 0; i < cnum; ++i) - { - cdsi = cdsA->cds[i]; - CenMass(cdsi); - //InvRotVec(&newcen[0], cdsA->avecds->center, cdsi->matrix); - //ModCenMass(cdsi, &newcen[0]); - -/* printf("\n trans[%d]", i+1); */ -/* printf("\nB trans[%d]: % f % f % f", i+1, */ -/* cdsi->center[0], cdsi->center[1], cdsi->center[2]); */ - - tvarbeta = tvar/cdsi->scale; - - cdsi->center[0] += gsl_ran_gaussian(r2, tvarbeta); - cdsi->center[1] += gsl_ran_gaussian(r2, tvarbeta); - cdsi->center[2] += gsl_ran_gaussian(r2, tvarbeta); - -/* printf("\nA trans[%d]: % f % f % f\n", i+1, */ -/* cdsA->cds[i]->center[0], cdsA->cds[i]->center[1], cdsA->cds[i]->center[2]); */ -/* fflush(NULL); */ - - ApplyCenterIp(cdsi); - } -} - - -static void -GibbsTransDiag(CdsArray *cdsA, const gsl_rng *r2) -{ - int i; - const int cnum = cdsA->cnum, vlen = cdsA->vlen; - double tvar, invtr, tvarbeta; - //double newcen[3]; - Cds *cdsi = NULL; - - invtr = 0.0; - for (i = 0; i < vlen; ++i) - invtr += (1.0 / cdsA->var[i]); - - tvar = sqrt(1.0 / invtr); - -// printf("\ntvar: %e", tvar); - -/* for (i = 0; i < vlen; ++i) */ -/* printf("\n vartrans[%3d]: % 11.5f % 11.5f", i+1, cdsA->var[i], cdsA->w[i]); */ - - for (i = 0; i < cnum; ++i) - { - cdsi = cdsA->cds[i]; - CenMassWtIp(cdsi, cdsA->w); - //InvRotVec(&newcen[0], cdsA->avecds->center, cdsi->matrix); - //ModCenMass(cdsi, &newcen[0]); - -/* printf("\n trans[%d]", i+1); */ -/* printf("\nB trans[%d]: % f % f % f", i+1, */ -/* cdsi->center[0], cdsi->center[1], cdsi->center[2]); */ - - //tvarbeta = tvar/cdsi->scale; - tvarbeta = tvar; - - cdsi->center[0] += gsl_ran_gaussian(r2, tvarbeta); - cdsi->center[1] += gsl_ran_gaussian(r2, tvarbeta); - cdsi->center[2] += gsl_ran_gaussian(r2, tvarbeta); - - //cdsi->center[2] = 0.0; - -/* printf("\nA trans[%d]: % f % f % f\n", i+1, */ -/* cdsi->center[0], cdsi->center[1], cdsi->center[2]); */ -/* fflush(NULL); */ - - ApplyCenterIp(cdsi); - //NegTransCdsIp(cdsi, cdsi->center); - } -} - - -static void -CdsInnerProduct(double *A, Cds *cds1, Cds *cds2, const int vlen, const double *weight) -{ - double x1, x2, y1, y2, z1, z2; - int i; - const double *fx1 = cds1->x, *fy1 = cds1->y, *fz1 = cds1->z; - const double *fx2 = cds2->x, *fy2 = cds2->y, *fz2 = cds2->z; - - //A[0] = A[1] = A[2] = A[3] = A[4] = A[5] = A[6] = A[7] = A[8] = 0.0; - memset(A, 0.0, 9 * sizeof(double)); - - if (weight != NULL) - { - for (i = 0; i < vlen; ++i) - { - x1 = weight[i] * fx1[i]; - y1 = weight[i] * fy1[i]; - z1 = weight[i] * fz1[i]; - - x2 = fx2[i]; - y2 = fy2[i]; - z2 = fz2[i]; - - A[0] += (x1 * x2); - A[1] += (y1 * x2); - A[2] += (z1 * x2); - - A[3] += (x1 * y2); - A[4] += (y1 * y2); - A[5] += (z1 * y2); + A[3] += (x1 * y2); + A[4] += (y1 * y2); + A[5] += (z1 * y2); A[6] += (x1 * z2); A[7] += (y1 * z2); - A[8] += (z1 * z2); + A[8] += (z1 * z2); } } else @@ -1059,11 +627,11 @@ // A[0] += (x1 * x2); // A[1] += (x1 * y2); // A[2] += (x1 * z2); -// +// // A[3] += (y1 * x2); // A[4] += (y1 * y2); // A[5] += (y1 * z2); -// +// // A[6] += (z1 * x2); // A[7] += (z1 * y2); // A[8] += (z1 * z2); @@ -1084,6 +652,43 @@ } +/* A = (cds1' * cds2)' */ +/* Actually retruns the transpose of A */ +static void +CdsInnerProduct2(double *A, const double **cds1, const double **cds2, const int vlen) +{ + double x1, x2, y1, y2, z1, z2; + int i; + const double *fx1 = cds1[0], *fy1 = cds1[1], *fz1 = cds1[2]; + const double *fx2 = cds2[0], *fy2 = cds2[1], *fz2 = cds2[2]; + + memset(A, 0, 9 * sizeof(double)); + + for (i = 0; i < vlen; ++i) + { + x1 = fx1[i]; + y1 = fy1[i]; + z1 = fz1[i]; + + x2 = fx2[i]; + y2 = fy2[i]; + z2 = fz2[i]; + + A[0] += (x1 * x2); + A[1] += (y1 * x2); + A[2] += (z1 * x2); + + A[3] += (x1 * y2); + A[4] += (y1 * y2); + A[5] += (z1 * y2); + + A[6] += (x1 * z2); + A[7] += (y1 * z2); + A[8] += (z1 * z2); + } +} + + static void MardiaRot3(double *R, const double *t) { @@ -1107,43 +712,31 @@ R[6] = s1*s3 - c1*c3*s2; R[7] = -c3*s1*s2 - c1*s3; R[8] = c2*c3; - -// R[0] = c1*c2; -// R[1] = -c2*s1; -// R[2] = -s2; -// -// R[3] = c3*s1 - c1*s2*s3; -// R[4] = c1*c3 - s1*s2*s3; -// R[5] = -c2*s3; -// -// R[6] = s1*s3 + c1*c3*s2; -// R[7] = -c3*s1*s2 + c1*s3; -// R[8] = c2*c3; } /* See: Green and Mardia (2006) "Bayesian alignment using hierarchical models, with applications in protein bioinformatics" - Biometrika 93(2):235Ð254 + Biometrika 93(2):235Ð254 Esp. pp 241-242. */ void -GibbsMetRot(CdsArray *cdsA, double **theta, const gsl_rng *r2) +GibbsRotMardia(CdsArray *cdsA, double **theta, const gsl_rng *r2) { int i, j, k; const int cnum = cdsA->cnum, vlen = cdsA->vlen; - double var = cdsA->stats->var; + double var = stats->var; double **F = cdsA->tmpmat3a; - Cds *ave = cdsA->avecds; + Cds *ave = cdsA->avecds; double a12, b12, a13, b13, a23, b23; double kap12, kap23, m, t12, t13, t23, width; double **rotmat = NULL; for (i = 0; i < cnum; ++i) { - //t12 = theta[i][0], t13 = theta[i][1], t23 = theta[i][2]; - t12 = t13 = t23 = 0.0; /* I'm farily convinced that this is valid, since when I rotate the structures below, */ + t12 = theta[i][0], t13 = theta[i][1], t23 = theta[i][2]; + //t12 = t13 = t23 = 0.0; /* I'm farily convinced that this is valid, since when I rotate the structures below, */ /* I simply offet the structures by a "location parameter". */ /* I do the same for the translations. */ /* We can't do this if the chains are run out of place */ @@ -1152,7 +745,7 @@ /* printf("\nF:"); */ /* Mat3Print(F); */ - if (cdsA->algo->varweight > 0) + if (algo->varweight > 0) { CdsInnerProduct(&F[0][0], ave, cdsA->cds[i], vlen, cdsA->w); @@ -1169,26 +762,32 @@ F[j][k] /= (2.0 * var); } - a12 = ( F[1][1] - sin(t13) * F[0][2]) * cos(t23) + (-F[1][2] - sin(t13) * F[0][1]) * sin(t23) + cos(t13) * F[0][0]; - b12 = (-F[0][1] - sin(t13) * F[1][2]) * cos(t23) + ( F[0][2] - sin(t13) * F[1][1]) * sin(t23) + cos(t13) * F[1][0]; + a12 = ( F[1][1] - sin(t13) * F[0][2]) * cos(t23) + + (-F[1][2] - sin(t13) * F[0][1]) * sin(t23) + cos(t13) * F[0][0]; + b12 = (-F[0][1] - sin(t13) * F[1][2]) * cos(t23) + + ( F[0][2] - sin(t13) * F[1][1]) * sin(t23) + cos(t13) * F[1][0]; kap12 = sqrt(a12*a12 + b12*b12); m = atan2(b12, a12); //printf("\nkap12: % e m:% e", kap12, m); - t12 = theta[i][0] = vonmises_dev3(m, kap12, r2); + t12 = theta[i][0] = vonmises_dev4(m, kap12, r2); //t12 = theta[i][0] = 0.0; - a23 = ( F[1][1] - sin(t13) * F[0][2]) * cos(t12) + (-F[0][1] - sin(t13) * F[1][2]) * sin(t12) + cos(t13) * F[2][2]; - b23 = (-F[1][2] - sin(t13) * F[0][1]) * cos(t12) + ( F[0][2] - sin(t13) * F[1][1]) * sin(t12) + cos(t13) * F[2][1]; + a23 = ( F[1][1] - sin(t13) * F[0][2]) * cos(t12) + + (-F[0][1] - sin(t13) * F[1][2]) * sin(t12) + cos(t13) * F[2][2]; + b23 = (-F[1][2] - sin(t13) * F[0][1]) * cos(t12) + + ( F[0][2] - sin(t13) * F[1][1]) * sin(t12) + cos(t13) * F[2][1]; kap23 = sqrt(a23*a23 + b23*b23); m = atan2(b23, a23); //printf("\nkap23: % e m:% e", kap23, m); - t23 = theta[i][2] = vonmises_dev3(m, kap23, r2); + t23 = theta[i][2] = vonmises_dev4(m, kap23, r2); //t23 = theta[i][2] = 0.0; - a13 = sin(t12) * F[1][0] + cos(t12) * F[0][0] + sin(t23) * F[2][1] + cos(t23) * F[2][2]; - b13 = (-sin(t23) * F[0][1] - cos(t23) * F[0][2]) * cos(t12) + (-sin(t23) * F[1][1] - cos(t23) * F[1][2]) * sin(t12) + F[2][0]; + a13 = sin(t12) * F[1][0] + cos(t12) * F[0][0] + + sin(t23) * F[2][1] + cos(t23) * F[2][2]; + b13 = (-sin(t23) * F[0][1] - cos(t23) * F[0][2]) * cos(t12) + + (-sin(t23) * F[1][1] - cos(t23) * F[1][2]) * sin(t12) + F[2][0]; width = sqrt(2.0 *(1.0/kap12 + 1.0/kap23)); @@ -1215,159 +814,888 @@ Mat3TransposeIp(rotmat); - RotateCdsIp(cdsA->cds[i], (const double **) rotmat); - } -} + RotateCdsIp(cdsA->cds[i], (const double **) rotmat); + } +} + + +static void +HabeckRot3(double **R, const double *t) +{ + double c1, c2, c3, s1, s2, s3; + + c1 = cos(t[0]); + c2 = cos(t[1]); + c3 = cos(t[2]); + s1 = sin(t[0]); + s2 = sin(t[1]); + s3 = sin(t[2]); + + R[0][0] = c1*c2*c3 - s1*s3; + R[0][1] = s1*c2*c3 + c1*s3; + R[0][2] = -s2*c3; + + R[1][0] = -c1*c2*s3 - s1*c3; + R[1][1] = -s1*c2*s3 + c1*c3; + R[1][2] = s2*s3; + + R[2][0] = c1*s2; + R[2][1] = s1*s2; + R[2][2] = c2; +} + + +void +HabeckMF_dev(double **r, const double **f, double *theta, + double **a, double **u, double **vt, double *lambda, + const gsl_rng *r2) +{ + int j; + double alpha, beta, gamma, phi, psi, w, q, x; + double kappab, kappaphi, kappapsi, tmpa, tmpb, det; + + Mat3Cpy(a, f); + CalcGSLSVD3(a, u, lambda, vt); + +// printf("lambda% 10.7f % 10.7f % 10.7f\n", lambda[0], lambda[1], lambda[2]); + +// printf("\nU:"); +// Mat3Print(u); +// printf("U end\n"); +// fflush(NULL); +// +// printf("\nVt:"); +// Mat3Print(vt); +// printf("Vt end\n"); +// fflush(NULL); + +// det = Mat3Det((const double **) u); +// printf("\n * determinant of SVD U matrix = %f\n", det); +// det = Mat3Det((const double **) vt); +// printf(" * determinant of SVD V matrix = %f\n", det); + + det = Mat3Det((const double **) u) * Mat3Det((const double **) vt); +// printf("\n * determinant of SVD UVt matrix = %f\n", det); + + if (det < 0) + { +// printf("\nlambda: % f % f % f\n", lambda[0], lambda[1], lambda[2]); + lambda[2] = -lambda[2]; + + for (j = 0; j < 3; ++j) + u[j][2] = -u[j][2]; + +// det = Mat3Det((const double **)U) * Mat3Det((const double **)Vt); +// printf("\n * ##determinant of SVD UVt matrix = %f\n", det); + } + + beta = theta[1]; + + tmpa = cos(0.5 * beta); + tmpb = sin(0.5 * beta); + + kappaphi = tmpa*tmpa * (lambda[0] + lambda[1]); + kappapsi = tmpb*tmpb * (lambda[0] - lambda[1]); + +// printf("\nkappaphi, kappaspi: %e %e\n", kappaphi, kappapsi); +// fflush(NULL); + + phi = vonmises_dev4(0.0, kappaphi, r2); + myassert(isfinite(phi)); + +// printf("\nphi: %f\n", phi); +// fflush(NULL); + + psi = vonmises_dev4(M_PI, kappapsi, r2); + myassert(isfinite(psi)); + +// printf("psi: %f\n", psi); +// fflush(NULL); + + w = gsl_ran_bernoulli(r2, 0.5); + + alpha = 0.5 * (phi + psi) + M_PI * w; + gamma = 0.5 * (phi - psi) + M_PI * w; + + kappab = (lambda[0] + lambda[1]) * cos(phi) + + (lambda[0] - lambda[1]) * cos(psi) + + 2.0 * lambda[2]; + + q = gsl_rng_uniform(r2); + + x = 1.0 + (2.0 * log(q + (1.0 - q) * exp(-kappab)) / kappab); + beta = acos(x); + +// printf("\nalpha, beta, gamma: %f %f %f\n", alpha, beta, gamma); +// fflush(NULL); + + theta[0] = alpha; + theta[1] = beta; + theta[2] = gamma; + +// printf("\ntheta: % f % f % f\n", theta[i][0], theta[i][1], theta[i][2]); + + HabeckRot3(a, theta); + + Mat3MultIp(u, (const double **) a); + Mat3MultIp(u, (const double **) vt); + Mat3Cpy(r, (const double **) u); + + if (VerifyRotMat(r, 1e-8) == 0) + { + printf("\nBAD ROTATION MATRIX U\n\n"); + exit(EXIT_FAILURE); + } +} + + +/* See: + Habeck (2009) + "Generation of three-dimensional random rotations in fitting and matching problems." + Comput Stat 24:719-731 + + p 726, 3 Algorithm + + My implementation has been verified against Habeck 2009 Figures 1 & 3 usin + his A matrix (sxn 4.1), which has singular values 16.17, 4.80, and 0.57, + max tr(A'R) = 20.4. Conitional dist of beta should have max about 0.34, + and average about 0.5. +*/ +void +GibbsRotHabeck(CdsArray *cdsA, double **theta, const gsl_rng *r2) +{ + int i, j, k; + const int cnum = cdsA->cnum, vlen = cdsA->vlen; + double var = stats->var; + double **f = cdsA->tmpmat3a; + double **a = cdsA->tmpmat3b; + double **u = cdsA->tmpmat3c; + double **vt = cdsA->tmpmat3d; + double *lambda = cdsA->tmpvec3a; + Cds *avecds = cdsA->avecds; + Cds *tcds = NULL; + double **rotmat = NULL; + + + if (algo->covweight) + { + tcds = cdsA->tcds; + MatMultCdsMultMatDiag(tcds, (const double **) cdsA->WtMat, avecds); + } + else if (algo->varweight) + { + tcds = cdsA->tcds; + MatDiagMultCdsMultMatDiag(tcds, cdsA->w, avecds); + } + else if (algo->leastsquares) + { + tcds = cdsA->avecds; + } + + for (i = 0; i < cnum; ++i) + { + CdsInnerProduct2(&f[0][0], + (const double **) tcds->wc, + (const double **) cdsA->cds[i]->wc, + vlen); + + if (algo->varweight > 0) + { + for (j = 0; j < 3; ++j) + for (k = 0; k < 3; ++k) + f[j][k] *= 0.5; + } + else + { + for (j = 0; j < 3; ++j) + for (k = 0; k < 3; ++k) + f[j][k] /= (2.0 * var); + } + + rotmat = cdsA->cds[i]->matrix; + + HabeckMF_dev(rotmat, (const double **) f, theta[i], a, u, vt, lambda, r2); + + RotateCdsIp(cdsA->cds[i], (const double **) rotmat); + } +} + + +double +Calc_b(const double *lambda) +{ + int i, j; + double b, fx, dfx, fxdfx, term; + const int maxiter = 100; + const double tol = FLT_EPSILON; + + b = 1.0; + for (i = 0; i < maxiter; ++i) + { + fx = dfx = 0.0; + for (j = 0; j < 4; ++j) + { + term = 1.0 / (b + 2.0*lambda[j]); + fx += term; + dfx -= term*term; + } + + fx -= 1.0; + + fxdfx = fx / dfx; + b -= fxdfx; /* Newton-Raphson correction */ + + if (b < 0.0) + b = 0.0; + +// printf("%3d % 10.6e % 10.6e % 10.6e % 10.6e\n", +// i, b, fx, dfx, fx/dfx); +// fflush(NULL); + + if (fabs(fxdfx) < tol * b || fabs(fx) < tol) + break; /* success */ + } + +// printf("\ndone: %3d % 10.6e % 10.6e % 10.6e % 10.6e\n", +// i, b, fx, dfx, fx/dfx); + + if (i == maxiter) + { + printf("\n WARNING02: Newton-Raphson failed to converge in gamma_fit()\n"); + + printf(" b fx dfx fx/dfx\n"); + printf("% 10.6e % 10.6e % 10.6e % 10.6e\n", + b, fx, dfx, fx/dfx); + + fflush(NULL); + } + + return(b); +} + + +/* ACG - angular central gaussian distribution variate */ +/* omega matrix is assumed to be diagonal */ +void +acg_dev(double *x, const int q, const double *omega, const gsl_rng *r2) +{ + int i; + double norm; + + norm = 0.0; + for (i = 0; i < q; ++i) + { + x[i] = gsl_ran_gaussian_ziggurat(r2, sqrt(1.0/omega[i])); + norm += x[i]*x[i]; + } + + norm = sqrt(norm); + + for (i = 0; i < q; ++i) + x[i] /= norm; +} + + +// Equation 5.1 +void +KentRot3(double **m, const double *x) +{ + const double x1 = x[0]; + const double x2 = x[1]; + const double x3 = x[2]; + const double x4 = x[3]; + + m[0][0] = x1*x1 + x2*x2 - x3*x3 - x4*x4; + m[0][1] = -2.0*(x1*x4 - x2*x3); + m[0][2] = 2.0*(x1*x3 + x2*x4); + m[1][0] = 2.0*(x1*x4 + x2*x3); + m[1][1] = x1*x1 + x3*x3 - x2*x2 - x4*x4; + m[1][2] = -2.0*(x1*x2 - x3*x4); + m[2][0] = -2.0*(x1*x3 - x2*x4); + m[2][1] = 2.0*(x1*x2 + x3*x4); + m[2][2] = x1*x1 + x4*x4 - x2*x2 - x3*x3; +} + + +// Equation 5.2 +void +KentDelta2Lambda(double *lambda, const double *delta) +{ + lambda[0] = 0.0; + lambda[1] = 2.0*(delta[1] + delta[2]); + lambda[2] = 2.0*(delta[0] + delta[2]); + lambda[3] = 2.0*(delta[0] + delta[1]); +} + + +double +CalcEnvelopeBACG(const double *x, const double *lambda, const double *omega, const double b) +{ + double envf, term; + int i; + + envf = 2.0 - 0.5*b; + + for (i = 0; i < 4; ++i) + envf -= x[i]*x[i]*lambda[i]; + + term = 0.0; + for (i = 0; i < 4; ++i) + term += x[i]*x[i]*omega[i]; + + envf += 2.0 * log(b * term / 4.0); + + return(envf); +} + + +/* f is actually f', the transpose */ +void +KentMF_dev(double **r, const double **f, + double **a, double **u, double **vt, double *delta, + double *v1, double *v2, double *v3, + const gsl_rng *r2) +{ + int i, j; + double w, b, envelope; + double *x = v1; + double *lambda = v2; + double *omega = v3; + + + Mat3Cpy(a, f); + CalcGSLSVD3(a, u, delta, vt); + + double det = Mat3Det((const double **) u) * Mat3Det((const double **) vt); +// printf("\n * determinant of SVD UVt matrix = %f\n", det); + + if (det < 0) + { +// printf("\nlambda: % f % f % f\n", lambda[0], lambda[1], lambda[2]); + delta[2] = -delta[2]; + + for (j = 0; j < 3; ++j) + u[j][2] = -u[j][2]; + +// det = Mat3Det((const double **)U) * Mat3Det((const double **)Vt); +// printf("\n * ##determinant of SVD UVt matrix = %f\n", det); + } + +// printf("delta: %g %g %g\n", delta[0], delta[1], delta[2]); + KentDelta2Lambda(lambda, delta); +// printf("lambda: %g %g %g %g\n", lambda[0], lambda[1], lambda[2], lambda[3]); +// fflush(NULL); + + b = Calc_b(lambda); + + for (i = 0; i < 4; ++i) + omega[i] = 1.0 + 2.0*lambda[i]/b; + + while(1) + { + acg_dev(x, 4, omega, r2); + w = gsl_rng_uniform(r2); + + envelope = CalcEnvelopeBACG(x, lambda, omega, b); +// printf("envelope = % 16.8f % 16.8f %d\n", log(w), envelope, log(w) < envelope); +// fflush(NULL); + + if (log(w) < envelope) + break; + } + + KentRot3(a, x); + + Mat3MultIp(u, (const double **) a); + Mat3MultIp(u, (const double **) vt); + Mat3Cpy(r, (const double **) u); + + if (VerifyRotMat(r, 1e-8) == 0) + { + printf("\nBAD ROTATION MATRIX U\n\n"); + exit(EXIT_FAILURE); + } +} + + +void +GibbsRotKent(CdsArray *cdsA, const gsl_rng *r2) +{ + int i, j, k; + const int cnum = cdsA->cnum, vlen = cdsA->vlen; + double var = stats->var; + double **f = cdsA->tmpmat3a; + double **a = cdsA->tmpmat3b; + double **u = cdsA->tmpmat3c; + double **vt = cdsA->tmpmat3d; + double *lambda = cdsA->tmpvec3a; + double *v1 = malloc(4 * sizeof(double)); + double *v2 = malloc(4 * sizeof(double)); + double *v3 = malloc(4 * sizeof(double)); + Cds *avecds = cdsA->avecds; + Cds *tcds = NULL; + double **rotmat = NULL; + + + if (algo->covweight) + { + tcds = cdsA->tcds; + MatMultCdsMultMatDiag(tcds, (const double **) cdsA->WtMat, avecds); + } + else if (algo->varweight) + { + tcds = cdsA->tcds; + MatDiagMultCdsMultMatDiag(tcds, cdsA->w, avecds); + } + else if (algo->leastsquares) + { + tcds = cdsA->avecds; + } + + for (i = 0; i < cnum; ++i) + { + /* f' = tcds' * cdsi */ + /* f is the transpose */ + CdsInnerProduct2(&f[0][0], + (const double **) tcds->wc, + (const double **) cdsA->cds[i]->wc, + vlen); + + if (algo->varweight > 0) + { + for (j = 0; j < 3; ++j) + for (k = 0; k < 3; ++k) + f[j][k] *= 0.5; + } + else + { + for (j = 0; j < 3; ++j) + for (k = 0; k < 3; ++k) + f[j][k] /= (2.0 * var); + } + + rotmat = cdsA->cds[i]->matrix; + + KentMF_dev(rotmat, (const double **) f, a, u, vt, lambda, v1, v2, v3, r2); + + RotateCdsIp(cdsA->cds[i], (const double **) rotmat); + } + + free(v1); + free(v2); + free(v3); +} + + +static void +MetScale(CdsArray *cdsA, const gsl_rng *r2) +{ + int i; + const int cnum = cdsA->cnum, vlen = cdsA->vlen; + const int n = 3.0 * vlen + 1.0; + double phi, gamma; + Cds *cdsi = NULL; + Cds *avecds = cdsA->avecds; + double sm, width, oldscale; + double var = stats->var; + int skip = 7; + + double sum = 0.0; + for (i = 0; i < cnum; ++i) + sum += cdsA->cds[i]->scale; + + sum /= cnum; + //printf("\nave: % f", sum); + + for (i = 0; i < cnum; ++i) + { + if (algo->scaleanchor == i) + continue; + + cdsi = cdsA->cds[i]; + oldscale = cdsi->scale; + + phi = TrCdsInnerProd(cdsi, vlen) / var; + gamma = TrCdsInnerProd2(cdsi, avecds, vlen) / var; + sm = ScaleMax(n, gamma, phi); + width = sqrt(1.0 / (phi + (n-1.0)/(sm*sm))); + + cdsi->scale = scale_log_met3(n, gamma, phi, oldscale, r2, sm, width, skip); + ScaleCds(cdsi, cdsi->scale); + + //printf("\nscale[%3d]: % f % f % f % f % f", i+1, cdsi->scale, phi, gamma, sm, width); + //fflush(NULL); + } +} + + +static void +MetScaleDiag(CdsArray *cdsA, const gsl_rng *r2) +{ + int i; + const int cnum = cdsA->cnum, vlen = cdsA->vlen; + const int n = 3.0 * vlen + 1.0; + double phi, gamma; + Cds *cdsi = NULL; + Cds *avecds = cdsA->avecds; + double *wts = cdsA->w; + double sm, width, oldscale; + int skip = 7; + double priorg = 0.0; // set to 1.0 for exponential prior mean = 1 + +// double sum = 0.0; +// for (i = 0; i < cnum; ++i) +// sum += cdsA->cds[i]->scale; +// +// sum /= cnum; +// printf("\nave: % f", sum); +// +// sum = 0.0; +// for (i = 0; i < cnum; ++i) +// sum += log(cdsA->cds[i]->scale); +// +// sum /= cnum; +// printf("\nave log: % f", sum); + + for (i = 0; i < cnum; ++i) + { + if (algo->scaleanchor == i) + continue; + + cdsi = cdsA->cds[i]; + oldscale = cdsi->scale; + + phi = TrCdsInnerProdWt(cdsi, vlen, wts); + gamma = TrCdsInnerProdWt2(cdsi, avecds, vlen, wts) - priorg; + sm = ScaleMax(n, gamma, phi); + width = sqrt(1.0 / (phi + (n-1.0)/(sm*sm))); + //printf("width:% g\n", width); + + cdsi->scale = scale_log_met3(n, gamma, phi, oldscale, r2, sm, width, skip); + //cdsi->scale = scale_rejection(n, gamma, phi, r2); + ScaleCds(cdsi, cdsi->scale); + + //printf("\n(diag) scale[%3d]: % f % f % f % f % f", i+1, cdsi->scale, phi, gamma, sm, width); + //fflush(NULL); + } +} + + +static void +CdsInnProd(Cds *cds) +{ + /* (i x k)(k x j) = (i x j) */ + /* (3 x N)(N x 3) = (3 x 3) */ + int k; + double **ip = NULL; + const double *x = (const double *) cds->x, + *y = (const double *) cds->y, + *z = (const double *) cds->z; + double xk, yk, zk; + + ip = cds->innerprod; + + memset(ip[0], 0, 9 * sizeof(double)); + + for (k = 0; k < cds->vlen; ++k) + { + xk = x[k]; + yk = y[k]; + zk = z[k]; + + ip[0][0] += (xk * xk); + ip[1][1] += (yk * yk); + ip[2][2] += (zk * zk); + ip[0][1] += (xk * yk); + ip[0][2] += (xk * zk); + ip[1][2] += (yk * zk); + } + + ip[1][0] = ip[0][1]; + ip[2][0] = ip[0][2]; + ip[2][1] = ip[1][2]; + + //printf("tr(X'X) = % e\n", ip[0][0] + ip[1][1] + ip[2][2]); + + /* Mat3Print(ip2); */ +} + + +static void +CdsInnProdWt(Cds *cds, const double *wts) +{ + /* (i x k)(k x j) = (i x j) */ + /* (3 x N)(N x 3) = (3 x 3) */ + int k; + double **ip = NULL; + const double *x = (const double *) cds->x, + *y = (const double *) cds->y, + *z = (const double *) cds->z; + double xk, yk, zk, wtsi; + + ip = cds->innerprod; + + memset(ip[0], 0, 9 * sizeof(double)); + + for (k = 0; k < cds->vlen; ++k) + { + wtsi = wts[k]; + + xk = x[k]; + yk = y[k]; + zk = z[k]; + + ip[0][0] += (xk * xk) * wtsi; + ip[1][1] += (yk * yk) * wtsi; + ip[2][2] += (zk * zk) * wtsi; + ip[0][1] += (xk * yk) * wtsi; + ip[0][2] += (xk * zk) * wtsi; + ip[1][2] += (yk * zk) * wtsi; + } + + ip[1][0] = ip[0][1]; + ip[2][0] = ip[0][2]; + ip[2][1] = ip[1][2]; + + //printf("tr(X'X) = % e\n", ip[0][0] + ip[1][1] + ip[2][2]); + + /* Mat3Print(ip2); */ +} + + +void +CalcCdsPrincAxesGibbs(Cds *cds, double **r, double **u, double **vt, double *lambda, const double *wts) +{ + int j; + double det; + + if (algo->leastsquares) + CdsInnProd(cds); + else + CdsInnProdWt(cds, wts); + + CalcGSLSVD3(cds->innerprod, u, lambda, vt); + + det = Mat3Det((const double **) u); +// printf("\n * determinant of SVD UVt matrix = %f\n", det); + + if (det < 0) + { +// printf("\nlambda: % f % f % f\n", lambda[0], lambda[1], lambda[2]); + printf("\nNEGATIVE DETERMINANT\n"); + lambda[2] = -lambda[2]; + + for (j = 0; j < 3; ++j) + u[j][2] = -u[j][2]; + } + + Mat3Cpy(r, (const double **) u); + + if (VerifyRotMat(r, 1e-8) == 0) + { + printf("\nBAD ROTATION MATRIX U\n\n"); + exit(EXIT_FAILURE); + } +} + + +void +IdentifyMean(CdsArray *cdsA) +{ + int i; + const int cnum = cdsA->cnum; + Cds *avecds = cdsA->avecds; + + ApplyCenterIp(avecds); + +// printf("\na trans: % f % f % f", +// cdsA->avecds->center[0], cdsA->avecds->center[1], cdsA->avecds->center[2]); +// fflush(NULL); + + for (i = 0; i < cnum; ++i) + NegTransCdsIp(cdsA->cds[i]->wc, cdsA->avecds->center, cdsA->vlen); + + CalcCdsPrincAxesGibbs(avecds, avecds->matrix, cdsA->tmpmat3a, cdsA->tmpmat3b, cdsA->tmpvec3a, cdsA->w); +// printf("\nBEF\n"); +// Mat3Print(avecds->innerprod); +// double tr = avecds->innerprod[0][0]+avecds->innerprod[1][1]+avecds->innerprod[2][2]; +// printf("tr:%20.8f\n", +// tr/(3*cdsA->vlen)); + RotateCdsIp(avecds, (const double **) avecds->matrix); +// CdsInnProdWt(avecds, cdsA->w); +// printf("tra:%20.10f\n\n", avecds->innerprod[0][0]+avecds->innerprod[1][1]+avecds->innerprod[2][2]); +// printf("\nAFT\n"); +// Mat3Print(avecds->innerprod); + + for (i = 0; i < cnum; ++i) + RotateCdsIp(cdsA->cds[i], (const double **) avecds->matrix); +} + + +static void +GibbsMean(CdsArray *cdsA, const gsl_rng *r2) +{ + int i; + double *avex = cdsA->avecds->x, + *avey = cdsA->avecds->y, + *avez = cdsA->avecds->z; + const int cnum = cdsA->cnum, vlen = cdsA->vlen; + double mvar = sqrt(stats->var / cnum); + +/* printf("\nmvar: % e", mvar); */ + + AveCdsGibbs(cdsA); + + for (i = 0; i < vlen; ++i) + { + avex[i] += gsl_ran_gaussian_ziggurat(r2, mvar); + avey[i] += gsl_ran_gaussian_ziggurat(r2, mvar); + avez[i] += gsl_ran_gaussian_ziggurat(r2, mvar); + } + + CenMass(cdsA->avecds); + //IdentifyMean(cdsA); +} + + +static void +GibbsMeanDiag(CdsArray *cdsA, const gsl_rng *r2) +{ + int i; + double *avex = cdsA->avecds->x, + *avey = cdsA->avecds->y, + *avez = cdsA->avecds->z; + const int cnum = cdsA->cnum, vlen = cdsA->vlen; + double mvar; + + AveCdsGibbs(cdsA); + + for (i = 0; i < vlen; ++i) + { + mvar = sqrt(cdsA->var[i] / cnum); + + avex[i] += gsl_ran_gaussian_ziggurat(r2, mvar); + avey[i] += gsl_ran_gaussian_ziggurat(r2, mvar); + avez[i] += gsl_ran_gaussian_ziggurat(r2, mvar); + } + + CenMassWtIp(cdsA->avecds, cdsA->w); + //IdentifyMean(cdsA); + +// printf("\ntrans: % f % f % f", +// cdsA->avecds->center[0], cdsA->avecds->center[1], cdsA->avecds->center[2]); +// fflush(NULL); + +// ApplyCenterIp(avecds); +// +// for (i = 0; i < cnum; ++i) +// NegTransCdsIp(cdsA->cds[i], cdsA->avecds->center); + +// printf("\nmean trans[%d]: % f % f % f", i+1, +// avecds->center[0], avecds->center[1], avecds->center[2]); +// fflush(NULL); +} + + +static void +GibbsVar(CdsArray *cdsA, const gsl_rng *r2) +{ + double var; + const int cnum = cdsA->cnum, vlen = cdsA->vlen; + + VarCds(cdsA); + var = stats->var; + //printf("\nB var: % e % e", var, sqrt(var)); + stats->var = invgamma_dev4(1.5 * cnum * vlen * var, 1.5 * cnum * vlen, r2); -static int -CalcRotMat(double **rotmat, double **Umat, double *s, double **Vtmat) -{ - int i, j, k; - double det; + //printf("\nA var: % e % e", stats->var, sqrt(stats->var)); + //fflush(NULL); +} - memset(&rotmat[0][0], 0, 9 * sizeof(double)); - det = Mat3Det((const double **)Umat) * Mat3Det((const double **)Vtmat); +static void +GibbsVarDiag(CdsArray *cdsA, const gsl_rng *r2) +{ + double phi = stats->phi; + const int cnum = cdsA->cnum, vlen = cdsA->vlen; + int i; - if (det > 0) - { - for (i = 0; i < 3; ++i) - for (j = 0; j < 3; ++j) - for (k = 0; k < 3; ++k) - rotmat[i][j] += (Vtmat[k][i] * s[k] * Umat[j][k]); + VarCds(cdsA); - return(1); - } - else + for (i = 0; i < vlen; ++i) { - /* printf("\n * determinant of SVD U or V matrix = %f", det); */ - - for (i = 0; i < 3; ++i) - { - for (j = 0; j < 3; ++j) - { - for (k = 0; k < 2; ++k) - rotmat[i][j] += (Vtmat[k][i] * s[k] * Umat[j][k]); - - rotmat[i][j] -= (Vtmat[2][i] * Umat[j][2]); - } - } - - return(-1); + //printf("\nvar[%d]: %e %e %e", i, cdsA->var[i], 3.0 * cnum * cdsA->var[i], (3.0 * cnum * cdsA->var[i] + phi)); + cdsA->var[i] = invgamma_dev4(0.5 * (3.0 * cnum * cdsA->var[i] + phi), (3.0*cnum + 1.0)/2.0, r2); + //cdsA->var[i] = invgamma_dev4(0.5 * (3.0 * cnum * cdsA->var[i]), 1.5 * cnum + 0.5, r2); +// printf(" %e", cdsA->var[i]); } -} - -static int -CalcGSLSVD(double **Rmat, double **Umat, double *sigma, double **VTmat) -{ - svdGSLDest(Rmat, 3, sigma, VTmat); - Mat3TransposeIp(VTmat); - Mat3Cpy(Umat, (const double **) Rmat); - return(1); + for (i = 0; i < vlen; ++i) + cdsA->w[i] = 1.0 / cdsA->var[i]; } -/* See: - Habeck (2009) - "Generation of three-dimensional random rotations in fitting and matching problems." - Comput Stat 24:719-731 - - NB: broken, for now -- probably a transpose issue somewhere, either rotmat or V (should be Vt?) +/* http://www.gnu.org/software/gsl/manual/html_node/The-Gamma-Distribution.html + double gsl_ran_gamma (const gsl_rng * r, double a, double b) + p(x) dx = {1 \over \Gamma(a) b^a} x^{a-1} e^{-x/b} dx */ -void -GibbsRot(CdsArray *cdsA, double **theta, const gsl_rng *r2) +static void +GibbsPhi(CdsArray *cdsA, const gsl_rng *r2) { - int i, j, k; - const int cnum = cdsA->cnum, vlen = cdsA->vlen; - double var = cdsA->stats->var; - double **A = cdsA->tmpmat3a; - Cds *ave = cdsA->avecds; - double alpha, beta, gamma, phi, psi, u, r, x, kappab, kappaphi, kappapsi, tmpa, tmpb; - double **rotmat = NULL; - double **V = cdsA->tmpmat3b; - double *lambda = malloc(3 * sizeof(double)); - double a = 0.0; + const int vlen = cdsA->vlen; + double invtr, a, b; + int i; - for (i = 0; i < cnum; ++i) - { - if (cdsA->algo->varweight > 0) - CdsInnerProduct(&A[0][0], ave, cdsA->cds[i], vlen, cdsA->w); - else - CdsInnerProduct(&A[0][0], ave, cdsA->cds[i], vlen, NULL); -/* printf("\n\n****************\nstructure: %d", i+1); */ -/* printf("\nF:"); */ -/* Mat3Print(F); */ + invtr = 0.0; + for (i = 0; i < vlen; ++i) + invtr += 1.0 / cdsA->var[i]; - if (cdsA->algo->varweight > 0) - { - for (j = 0; j < 3; ++j) - for (k = 0; k < 3; ++k) - A[j][k] *= 0.5; - } - else - { - for (j = 0; j < 3; ++j) - for (k = 0; k < 3; ++k) - A[j][k] /= (2.0 * var); - } + a = 0.5 * (vlen + 2.0); + b = 2.0 / (invtr + 2.0 / stats->alpha); - rotmat = cdsA->cds[i]->matrix; +// reference prior = 1/phi +// a = 0.5*vlen; +// b = 2.0/invtr; - CalcGSLSVD(rotmat, A, lambda, V); - //svdGSLDest(A, 3, lambda, V); + stats->phi = gsl_ran_gamma(r2, a, b); - beta = theta[i][1]; +// printf("\nA phi: %e %e %e %e", stats->phi, b, vlen / invtr, sqrt(vlen / invtr)); +// fflush(NULL); +} - for (i = 0; i < 7; ++i) - { - tmpa = cos(0.5 * beta); - tmpb = sin(0.5 * beta); - - kappaphi = tmpa*tmpa * (lambda[0] + lambda[1]); - kappapsi = tmpb*tmpb * (lambda[0] - lambda[1]); - - phi = vonmises_dev(a, kappaphi, r2); - psi = vonmises_dev(a, kappapsi, r2); - u = gsl_ran_bernoulli (r2, 0.5); - - alpha = 0.5 * (phi + psi) + M_PI * u; - gamma = 0.5 * (phi - psi) + M_PI * u; - - kappab = (lambda[0] + lambda[1]) * cos(phi) + (lambda[0] - lambda[1]) * cos(psi) + 2.0 * lambda[2]; - r = gsl_rng_uniform(r2); - x = 1.0 + 2.0 * log(r + (1.0 -r) * exp(-kappab))/kappab; - beta = acos(x); - } - CalcRotMat(rotmat, A, lambda, V); +static void +GibbsWrite(CdsArray *cdsA, const gsl_rng *r2, const int iter) +{ + char filename[256], avename[256]; -/* printf("\ntheta: % f % f % f\n", theta[i][0], theta[i][1], theta[i][2]); */ + sprintf(filename, "%s%05d.pdb", "gibbs_", iter); + OverWriteTheseusCdsModelFile(cdsA, filename); + sprintf(avename, "%s%05d.pdb", "gibbs_ave_", iter); + WriteAveCds(cdsA, avename); +} - theta[i][0] = alpha; - theta[i][1] = beta; - theta[i][2] = gamma; - MardiaRot3(&rotmat[0][0], theta[i]); -/* printf("\nrotmat:"); */ -/* Mat3Print(rotmat); */ -/* fflush(NULL); */ -/* */ -/* if (VerifyRotMat(rotmat, 1e-6) == 0) */ -/* { */ -/* printf("\nBAD ROTATION MATRIX\n\n"); */ -/* exit(EXIT_FAILURE); */ -/* } */ +static void +GibbsMaxWrite(CdsArray *cdsA, const gsl_rng *r2, const int iter) +{ + char filename[256]; - theta[i][1] = beta; + sprintf(filename, "%s%05d.pdb", "gibbs_max_", iter); + OverWriteTheseusCdsModelFile(cdsA, filename); +} - Mat3TransposeIp(rotmat); - RotateCdsIp(cdsA->cds[i], (const double **) rotmat); - } - - free(lambda); +static void +GibbsDiagWrite(CdsArray *cdsA, const gsl_rng *r2, const int iter) +{ + char filename[256], avename[256]; + + sprintf(filename, "%s%05d.pdb", "gibbs_", iter); + OverWriteTheseusCdsModelFile(cdsA, filename); + sprintf(avename, "%s%05d.pdb", "gibbs_ave_", iter); + WriteAveCds(cdsA, avename); } @@ -1375,31 +1703,37 @@ WriteSample(FILE *paramfile, CdsArray *cdsA, double **theta, const int iter) { int j, k; + double angle; + double *v = malloc(3 * sizeof(double)); double *quat = malloc(4 * sizeof(double)); const int cnum = cdsA->cnum, vlen = cdsA->vlen; fprintf(paramfile, "%-12s %12d\n", "BEG SAMPLE", iter); - fprintf(paramfile, "%-12s %18.9e\n", "LOGL", cdsA->stats->logL); - - if (cdsA->algo->varweight > 0) - { - fprintf(paramfile, "%-12s %18.9e\n", "ALPHA", cdsA->stats->alpha); - fprintf(paramfile, "%-12s %18.9e\n", "PHI", cdsA->stats->phi); + fprintf(paramfile, "%-12s %26.2f\n", "LOGL", stats->logL); + fprintf(paramfile, "%-12s %26.2f\n", "MLOGL", stats->mlogL); + fprintf(paramfile, "%-12s %26.6f\n", "RMSD", stats->ave_paRMSD); + + if (algo->varweight > 0) + { + fprintf(paramfile, "%-12s %18.9e\n", "ALPHA", stats->alpha); + fprintf(paramfile, "%-12s %18.9e\n", "PHI", stats->phi); + fprintf(paramfile, "%-12s %18.9e\n", "SIGMA", sqrt(stats->var)); } else { - fprintf(paramfile, "%-12s %18.9e\n", "PHI", cdsA->stats->var); + fprintf(paramfile, "%-12s %18.9e\n", "PHI", stats->var); + fprintf(paramfile, "%-12s %18.9e\n", "SIGMA", sqrt(stats->var)); } for (j = 0; j < vlen; ++j) fprintf(paramfile, "%-12s %6d %18.9e\n", "VAR", j+1, cdsA->var[j]); for (j = 0; j < vlen; ++j) - fprintf(paramfile, "%-12s %6d % 14.9f % 14.9f % 14.9f\n", "MEAN", + fprintf(paramfile, "%-12s %6d % 14.9f % 14.9f % 14.9f\n", "MEAN", j+1, cdsA->avecds->x[j], cdsA->avecds->y[j], cdsA->avecds->z[j]); for (k = 0; k < cnum; ++k) - fprintf(paramfile, "%-12s %6d % 18.9e % 18.9e % 18.9e\n", "TRANS", + fprintf(paramfile, "%-12s %6d % 18.9e % 18.9e % 18.9e\n", "TRANS", k+1, cdsA->cds[k]->center[0], cdsA->cds[k]->center[1], cdsA->cds[k]->center[2]); for (k = 0; k < cnum; ++k) @@ -1419,23 +1753,35 @@ fputc('\n', paramfile); } - for (k = 0; k < cnum; ++k) - fprintf(paramfile, "%-12s %6d % 18.9e % 18.9e % 18.9e\n", "THETA", - k+1, theta[k][0], theta[k][1], theta[k][2]); +// for (k = 0; k < cnum; ++k) +// fprintf(paramfile, "%-12s %6d % 18.9e % 18.9e % 18.9e\n", "THETA", +// k+1, theta[k][0], theta[k][1], theta[k][2]); for (k = 0; k < cnum; ++k) { RotMatToQuaternion((const double **) cdsA->cds[k]->matrix, quat); - fprintf(paramfile, "%-12s %6d % 18.9e % 18.9e % 18.9e % 18.9e\n", "QUAT", + fprintf(paramfile, "%-12s %6d % 18.9e % 18.9e % 18.9e % 18.9e\n", "QUAT", k+1, quat[0], quat[1], quat[2], quat[3]); } - if (cdsA->algo->scale > 0) + for (k = 0; k < cnum; ++k) + { + angle = RotMat2AxisAngle(cdsA->cds[k]->matrix, v); + fprintf(paramfile, + "%-12s %6d % 10.7f % 12.9f % 12.9f % 12.9f\n", "ANGLE-AXIS", + k+1, angle, v[0], v[1], v[2]); + } + + if (algo->scale > 0) for (j = 0; j < cnum; ++j) - fprintf(paramfile, "%-12s %6d %18.9e\n", "SCALE", j+1, cdsA->cds[j]->scale); + fprintf(paramfile, "%-12s %6d % 26.16f\n", "SCALE", j+1, cdsA->cds[j]->scale); fprintf(paramfile, "%-12s %12d\n", "END SAMPLE", iter); + + fflush(NULL); + free(quat); + free(v); } @@ -1447,13 +1793,13 @@ slxn = (int) (gsl_rng_uniform(r2) * cnum); - cdsA->stats->phi = 0.0; - for (i = 0; i < vlen; ++i) - cdsA->w[i] = cdsA->var[i] = 1.0; - RandRotCdsArray(cdsA, r2); - RandTransCdsArray(cdsA, 1000.0, r2); + stats->phi = 0.0; + for (i = 0; i < vlen; ++i) + cdsA->w[i] = cdsA->var[i] = 1.0; + RandRotCdsArray(cdsA, r2); + RandTransCdsArray(cdsA, 1000.0, r2); - CdsCopyAll(cdsA->avecds, cdsA->cds[slxn]); + CdsCopyAll(cdsA->avecds, cdsA->cds[slxn]); } @@ -1463,16 +1809,18 @@ int k, m;; const int cnum = cdsA->cnum, vlen = cdsA->vlen; const double *var = (const double *) cdsA->var; - const Cds **cds = (const Cds **) cdsA->cds; - const Cds *cdsm = NULL; + const Cds **cds = (const Cds **) cdsA->cds; + const Cds *cdsm = NULL; const double *avex = (const double *) cdsA->avecds->x, *avey = (const double *) cdsA->avecds->y, *avez = (const double *) cdsA->avecds->z; - double fterm, tmpx, tmpy, tmpz; + double fterm, tmpx, tmpy, tmpz, invvark; fterm = 0.0; for (k = 0; k < vlen; ++k) { + invvark = 1.0 / var[k]; + for (m = 0; m < cnum; ++m) { cdsm = (Cds *) cds[m]; @@ -1481,7 +1829,7 @@ tmpy = cdsm->y[k] - avey[k]; tmpz = cdsm->z[k] - avez[k]; - fterm += (tmpx*tmpx + tmpy*tmpy + tmpz*tmpz) / var[k]; + fterm += (tmpx*tmpx + tmpy*tmpy + tmpz*tmpz) * invvark; } } @@ -1494,8 +1842,8 @@ { int k, m;; const int cnum = cdsA->cnum, vlen = cdsA->vlen; - const Cds **cds = (const Cds **) cdsA->cds; - const Cds *cdsm = NULL; + const Cds **cds = (const Cds **) cdsA->cds; + const Cds *cdsm = NULL; const double *avex = (const double *) cdsA->avecds->x, *avey = (const double *) cdsA->avecds->y, *avez = (const double *) cdsA->avecds->z; @@ -1516,17 +1864,17 @@ } } - return(fterm); + return(fterm / stats->var); } -static double +double CalcHierarchPrLogL(CdsArray *cdsA) { const double *var = (const double *) cdsA->var; const int vlen = cdsA->vlen; - return(invgamma_logL(var, vlen, cdsA->stats->phi, 0.5)); + return(invgamma_logL(var, vlen, stats->phi, algo->covnu / 2)); } @@ -1543,6 +1891,20 @@ return(3.0 * cdsA->vlen * scales); } + +/* Calculates the likelihood for a specified Gaussian model, given a + structural superposition. + + NOTA BENE: This function assumes that the variances, covariance matrices, + hierarchical model parameters, average coordinates, rotations, and + translations have all been pre-calculated. + + The first term of the likelihood equation (the Mahalonobius Frobenius + matrix norm term) is normally equal to NKD/2 at the maximum. However, + when using shrinkage or hierarchical estimates of the covariance + matrices, this convenient simplification no longer holds, and the + double matrix-weighted Frobenius norm must be calculated explicitly. +*/ static double CalcLogLGibbs(CdsArray *cdsA) { @@ -1554,19 +1916,17 @@ const double ndk2 = 0.5 * ndk; const double *var = (const double *) cdsA->var; double lndet, frobterm, igL, scales; - Algorithm *algo = cdsA->algo; - Statistics *stats = cdsA->stats; int i; lndet = frobterm = igL = 0.0; - if (algo->leastsquares == 1) + if (algo->leastsquares) { - lndet = vlen * log(cdsA->stats->var); - frobterm = CalcFrobTermIso(cdsA) / cdsA->stats->var; + lndet = vlen * log(stats->var); + frobterm = CalcFrobTermIso(cdsA); igL = 0.0; } - else if (algo->varweight == 1) + else if (algo->varweight) { lndet = 0.0; for (i = 0; i < vlen; ++i) @@ -1577,32 +1937,171 @@ } scales = CalcLogScaleJacob(cdsA); - stats->logL = scales - - (0.5 * frobterm) - - (ndk2 * log(2.0 * MY_PI)) - - (0.5 * nd * lndet) + stats->logL = scales + - (0.5 * frobterm) + - (ndk2 * log(2.0 * M_PI)) + - (0.5 * nd * lndet) + igL; -/* printf("\n! scales frobterm -ndk2 igL lndet\n"); */ -/* printf("! % 12.4f % 12.4f % 12.4f % 12.4f % 12.4f\n", */ -/* scales, (-0.5 * frobterm), -ndk2, igL, (- 0.5 * nd * lndet)); */ +// printf("\n! scales -frobterm/2 -ndk2 igL lndet\n"); +// printf("! % 12.4f % 12.4f % 12.4f % 12.4f % 12.4f\n", +// scales, (-0.5 * frobterm), -ndk2, igL, (- 0.5 * nd * lndet)); return(stats->logL); } -double -f(double x, void *params) +/* tr(A'B) */ +static double +Mat3MultTr2(const double **A, const double **B) { - double *p = (double *) params; - double n = p[0]; - double gamma = p[1]; - double phi = p[2]; - double r = p[3]; - double f = pow(x,r) * CalcHalfNormChiLik(x, n, gamma, phi); - //printf("x: %e n: %e gamma: %e phi: %e prob: %e\n", x, n, gamma, phi, f); - //fflush(NULL); - return f; + double tr; + tr = A[0][0]*B[0][0] + A[1][0]*B[1][0] + A[2][0]*B[2][0] + + A[0][1]*B[0][1] + A[1][1]*B[1][1] + A[2][1]*B[2][1] + + A[0][2]*B[0][2] + A[1][2]*B[1][2] + A[2][2]*B[2][2]; + + return(tr); +} + + +static double +H_Kent(const double **g, const double **x) +{ + double tr = Mat3MultTr2(g,x); + + return(exp(tr)); +} + + +static void +TestMF(const int burn, gsl_rng *r2) +{ + printf("\n starting rot mat devs\n"); + fflush(NULL); + + double **r = MatAlloc(3,3); + int i, j, k; + + double **f = MatAlloc(3,3); + double **a = MatAlloc(3,3); + double **vt = MatAlloc(3,3); + double **u = MatAlloc(3,3);; + double *lambda = malloc(4 * sizeof(double)); + double *v1 = malloc(4 * sizeof(double)); + double *v2 = malloc(4 * sizeof(double)); + double *v3 = malloc(4 * sizeof(double)); + +// f[0][0] = 85; +// f[0][1] = 78; +// f[0][2] = 43; +// f[1][0] = 11; +// f[1][1] = 39; +// f[1][2] = 64; +// f[2][0] = 41; +// f[2][1] = 60; +// f[2][2] = 48; + + f[0][0] = 85; + f[0][1] = 0; + f[0][2] = 0; + f[1][0] = 0; + f[1][1] = 48; + f[1][2] = 0; + f[2][0] = 0; + f[2][1] = 0; + f[2][2] = 39; + + for (j = 0; j < 3; ++j) + for (k = 0; k < 3; ++k) + f[j][k] *= 0.1; + + double **g = MatAlloc(3,3); +// for (j = 0; j < 3; ++j) +// for (k = 0; k < 3; ++k) +// g[j][k] = 0.1*ceil(gsl_rng_uniform(r2) * 100); + + for (j = 0; j < 3; ++j) + //for (k = 0; k < 3; ++k) + g[j][j] = 0.1*ceil(gsl_rng_uniform(r2) * 100); + + double fv, nfv, nfngv, gv; + double *theta1 = calloc(3,sizeof(double)); + double *theta2 = calloc(3,sizeof(double)); + + double **nf = MatAlloc(3,3); + double **nfng = MatAlloc(3,3); + double **gf = MatAlloc(3,3); + + for (j = 0; j < 3; ++j) + for (k = 0; k < 3; ++k) + nf[j][k] = - f[j][k]; + + for (j = 0; j < 3; ++j) + for (k = 0; k < 3; ++k) + nfng[j][k] = - f[j][k] - g[j][k]; + + for (j = 0; j < 3; ++j) + for (k = 0; k < 3; ++k) + gf[j][k] = f[j][k] + g[j][k]; + + double delta; + double nfave = 0.0, nfngave = 0.0, gave = 0.0, nfvar = 0.0; + for (i = 0; i < burn; ++i) + { + KentMF_dev(r, (const double **) f, a, u, vt, lambda, v1, v2, v3, r2); + //HabeckMF_dev(r, (const double **) f, theta1, a, u, vt, lambda, r2); + + fv = H_Kent((const double **) f, (const double **) r); + + gv = H_Kent((const double **) g, (const double **) r); + nfv = H_Kent((const double **) nf, (const double **) r); + + KentMF_dev(r, (const double **) gf, a, u, vt, lambda, v1, v2, v3, r2); + //HabeckMF_dev(r, (const double **) gf, theta2, a, u, vt, lambda, r2); + + nfngv = H_Kent((const double **) nfng,(const double **) r); + + printf("MF: % 10.8e % 10.8e % 10.8e % 10.8e\n", nfv, log(fv), nfngv, gv); + + delta = nfv - nfave; + nfave += delta / (i+1); + nfvar += delta * (nfv - nfave); + + //nfave += nfv; + nfngave += nfngv; + gave += gv; + + if (algo->abort) + break; + } + + //nfave /= burn; + nfvar /= burn; + nfngave /= burn; + double ratio = nfave/nfngave; + gave /= burn; + + printf("\n % 10.8e(% 10.8e) % 10.8e % 10.8e % 10.8e: % 10.8e\n", + nfave, sqrt(nfvar/burn), nfngave, ratio, gave, ratio/gave); + Mat3Print(f); + Mat3Print(g); + + fflush(NULL); + free(v1); + free(v2); + free(v3); + free(theta1); + free(theta2); + +//int m; double angle; double vc[3]; +//double tr; +// Mat3Print(rotmat); +// angle = RotMat2AxisAngle(r, vc); +// +// printf("Angle: % 10.7f Axis: % 10.7f % 10.7f % 10.7f\n", +// angle, vc[0], vc[1], vc[2]); + + exit(EXIT_SUCCESS); } @@ -1611,29 +2110,33 @@ { int i; const int cnum = cdsA->cnum, vlen = cdsA->vlen; - int burn = cdsA->algo->bayes; - char paramfname[256]; + int burn = algo->bayes; + char paramfname[256], maxfname[256]; //char tmpname[256]; double **theta = MatAlloc(cnum, 3); - const gsl_rng_type *T = NULL; - gsl_rng *r2 = NULL; double invtr, mlik, hmlik, blik, amlik, diff, ediff, liksi; + double maxlogL, maxmlogL; int badsamp, nsamp, burnin = 100; - Cds *avecds = cdsA->avecds; +// Cds *avecds = cdsA->avecds; FILE *paramfile = NULL; - - double *liks = malloc(burn * sizeof(double)); - - cdsA->stats->phi = cdsA->stats->hierarch_p1; - //printf("\nLogL: %e", CalcLogLGibbs(cdsA)); - blik = CalcLogLGibbs(cdsA); + FILE *maxfile = NULL; + double *liks = malloc(burn * sizeof(double)); + double *mliks = malloc(burn * sizeof(double)); + const gsl_rng_type *T = NULL; + gsl_rng *r2 = NULL; gsl_rng_env_setup(); - gsl_rng_default_seed = time(NULL); + gsl_rng_default_seed = time(NULL) + getpid() + clock(); T = gsl_rng_ranlxs2; r2 = gsl_rng_alloc(T); - sprintf(paramfname, "%s_%s.p", cdsA->algo->rootname, "gibbs"); + + if(0) + { + TestMF(burn, r2); + } + + sprintf(paramfname, "%s_%s.p", algo->rootname, "gibbs"); paramfile = fopen(paramfname, "w"); if (paramfile == NULL) @@ -1645,106 +2148,181 @@ exit(EXIT_FAILURE); } + sprintf(maxfname, "%s_%s.p", algo->rootname, "gibbs_max"); + + maxfile = fopen(maxfname, "w"); + if (paramfile == NULL) + { + perror("\n ERROR"); + fprintf(stderr, + "\n ERROR99: could not open file '%s' for writing. \n", maxfname); + PrintTheseusTag(); + exit(EXIT_FAILURE); + } + fprintf(paramfile, "%-12s %12d\n", "NUM", cnum); fprintf(paramfile, "%-12s %12d\n", "LEN", vlen); - cdsA->stats->alpha = DBL_MAX; + fprintf(maxfile, "%-12s %12d\n", "NUM", cnum); + fprintf(maxfile, "%-12s %12d\n", "LEN", vlen); + + stats->alpha = DBL_MAX; - if (cdsA->algo->varweight > 0) + if (algo->varweight > 0) { invtr = 0.0; for (i = 0; i < vlen; ++i) { - invtr += 1.0 / cdsA->var[i]; cdsA->w[i] = 1.0 / cdsA->var[i]; + invtr += cdsA->w[i]; } - cdsA->stats->alpha = vlen / invtr; - cdsA->stats->phi = vlen / invtr; + stats->alpha = vlen / invtr; + stats->phi = 2.0 * vlen / invtr; + stats->hierarch_p1 = 0.5 * stats->phi; } else { for (i = 0; i < vlen; ++i) - cdsA->w[i] = 1.0 / cdsA->stats->var; + cdsA->w[i] = 1.0 / stats->var; + + stats->phi = stats->var/vlen; + stats->hierarch_p1 = 0.5 * stats->phi; } ////////////////////////////////////////// - //RandInitGibbs(cdsA, r2); - - CalcCdsPrincAxesGibbs(avecds, avecds->matrix, cdsA->w); - Mat3TransposeIp(avecds->matrix); - RotateCdsIp(avecds, (const double **) avecds->matrix); + if (algo->randgibbs > 0) + { + printf("\n Random Gibbs Init\n"); + RandInitGibbs(cdsA, r2); + } +// +// double fac, facsum = 0.0; +// for (i = 1; i < cnum; ++i) +// { +// fac = gsl_rng_uniform(r2); +// facsum += fac; +// printf("\nfac[%3d]: % 12.6f", i+1, 1.0/fac); +// //ScaleCds(scratchA->cds[i], 1.0 / (i+1.0)); +// ScaleCds(cdsA->cds[i], fac); +// //scratchA->cds[i]->scale = 1.0 / (i+1.0); +// } +// printf("\nfacsum: %12.6f", facsum+1.0); - for (i = 0; i < cnum; ++i) - RotateCdsIp(cdsA->cds[i], (const double **) avecds->matrix); + if (algo->varweight > 0) + GibbsDiagWrite(cdsA, r2, 0); + else + GibbsWrite(cdsA, r2, 0); -// if (cdsA->algo->varweight > 0) -// GibbsMeanDiagWrite(cdsA, r2, 0); -// else -// GibbsMeanWrite(cdsA, r2, 0); + //stats->phi = stats->hierarch_p1; + CalcLogLGibbs(cdsA); + CalcMgLogL(cdsA); - //printf("\nLogL: %e", CalcLogLGibbs(cdsA)); + printf("\n Initial LogL: %f", stats->logL); + printf("\n Initial mLogL: %f", stats->mlogL); + fflush(NULL); for (i = 0; i < cnum; ++i) { //ScaleCds(cdsA->cds[i], i+1.0); //cdsA->cds[i]->scale = i+1.0; - //cdsA->cds[i]->scale = 1.0; + cdsA->cds[i]->scale = 1.0; } VarCds(cdsA); - printf("\nphi initial: % f", cdsA->stats->var); + printf("\n var initial: % f", stats->var); + printf("\n phi initial: % f", stats->phi); + fflush(NULL); + maxlogL = maxmlogL = -DBL_MAX; for (i = 1; i <= burn; ++i) { - if (cdsA->algo->varweight > 0) + if (algo->varweight > 0) { - GibbsPhi(cdsA, r2); - GibbsVarDiag(cdsA, r2); - GibbsMeanDiag(cdsA, r2); - GibbsTransDiag(cdsA, r2); - //GibbsRot(cdsA, theta, r2); - GibbsMetRot(cdsA, theta, r2); - if (cdsA->algo->scale > 0) + if (algo->dotrans) + GibbsTransDiag(cdsA, r2); + if (algo->dorot) + // GibbsRotMardia(cdsA, theta, r2); + // GibbsRotHabeck(cdsA, theta, r2); + GibbsRotKent(cdsA, r2); + if (algo->scale > 0) MetScaleDiag(cdsA, r2); + if (algo->doave) + GibbsMeanDiag(cdsA, r2); + GibbsVarDiag(cdsA, r2); + GibbsPhi(cdsA, r2); } else { - GibbsVar(cdsA, r2); - GibbsMean(cdsA, r2); - GibbsTrans(cdsA, r2); - //GibbsRot(cdsA, theta, r2); - GibbsMetRot(cdsA, theta, r2); - if (cdsA->algo->scale > 0) + if (algo->dotrans) + GibbsTrans(cdsA, r2); + if (algo->dorot) + // GibbsRotMardia(cdsA, theta, r2); + // GibbsRotHabeck(cdsA, theta, r2); + GibbsRotKent(cdsA, r2); + if (algo->scale > 0) MetScale(cdsA, r2); + if (algo->doave) + GibbsMean(cdsA, r2); + GibbsVar(cdsA, r2); } - //printf("\nLogL: %e", CalcLogLGibbs(cdsA)); liks[i-1] = CalcLogLGibbs(cdsA); + stats->hierarch_p1 = 0.5 * stats->phi; // DLT this is ungraceful + mliks[i-1] = CalcMgLogL(cdsA); - if (1) - if (i%100 == 0) + if (i%100 == 0 || i == 1) { - if (i%1000 == 0) + if (i%1000 == 0 || i == 1) { - if (cdsA->algo->varweight > 0) - GibbsMeanDiagWrite(cdsA, r2, i); - else - GibbsMeanWrite(cdsA, r2, i); + GibbsWrite(cdsA, r2, i); } + CalcPRMSD(cdsA); WriteSample(paramfile, cdsA, theta, i); -/* sprintf(tmpname, "%s_%s_%05d.pdb", cdsA->algo->rootname, "gibbs_ave", i); */ -/* WriteAveCds(cdsA, tmpname); */ + printf("\n%5d LogL: % 20.3f MargLogL: % 20.3f", i, liks[i-1], mliks[i-1]); + fflush(NULL); + } + + if (maxlogL < liks[i-1]) + { + maxlogL = liks[i-1]; + + GibbsMaxWrite(cdsA, r2, -1); + WriteSample(maxfile, cdsA, theta, i); + } + + if (maxmlogL < mliks[i-1]) + { + maxmlogL = mliks[i-1]; + + GibbsMaxWrite(cdsA, r2, -2); + WriteSample(maxfile, cdsA, theta, i); + } + + if (algo->abort) + { + burn = i-1; + break; } } - printf(" Done with Gibbs-Metropolis ...\n"); + printf("\n Done with Gibbs-Metropolis ...\n"); fflush(NULL); -//////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////////// + blik = 0.0; + for (i = burnin; i < burn; ++i) + blik += mliks[i]; + + nsamp = burn - burnin; + blik /= nsamp; + + printf("\n Expected marginal likelihood (covmat): % 26.4f\n", blik); + fflush(NULL); + blik = 0.0; for (i = burnin; i < burn; ++i) blik += liks[i]; @@ -1752,6 +2330,9 @@ nsamp = burn - burnin; blik /= nsamp; + printf(" Expected log likelihood: % 26.4f\n", blik); + fflush(NULL); + mlik = hmlik = amlik = 0.0; badsamp = 0; for (i = burnin; i < burn; ++i) @@ -1770,219 +2351,17 @@ { ++badsamp; } - -/* printf("\nnormalize: % e % e % e % e % e % e", */ -/* liksi, */ -/* blik, */ -/* liksi - blik, */ -/* exp(liksi - blik), */ -/* exp(diff), */ -/* exp(-diff)); */ } nsamp -= badsamp; - printf("\n Marginal likelihood: % 14.2f % 14.2f % 14.2f\n", + printf("\n Marginal likelihood: % 14.2f % 14.2f % 14.2f\n", log(mlik / nsamp) + blik, blik - log(hmlik) + log(nsamp), amlik / nsamp); - /* Newton and Raftery 1994 - Approximate Bayesian inference with the weighted likelihood bootstrap (with discussion). - Journal of the Royal Statistical Society, Series B, 56:3-48. - Equation 16, p 22 - - They suggest delta = 0.01, something "small". - Curiously, delta = 0.5 results in the average posterior log likelihood - (which is different from the average posterior likelihood) - I guess that can be thought of as the entropy of the posterior distribution */ - int cnt = 0; - double term, fac, delta, num, denom, oldbf, bf = 0.0; - delta = 0.5; - term = delta * (burn-burnin) / (1.0 - delta); - do - { - ++cnt; - oldbf = bf; - num = denom = 0.0; - for (i = burnin; i < burn; ++i) - { - liksi = liks[i]; - diff = liksi - blik; - ediff = exp(diff); - fac = delta*bf + (1.0-delta)*ediff; - num += ediff / fac; - denom += 1.0 / fac; - } - - bf = (term + num) / (term * bf + denom); - //printf("Marginal likelihood2[%3d]: % 14.2f \n", cnt, log(bf) + blik); - } - while (fabs(oldbf - bf) > bf * 1e-7 && cnt < 1000); - printf(" Marginal likelihood2[%3d]: % 14.6f \n\n", cnt, log(bf) + blik); - -//////////////////////////////////////////////////////////////////////////////////////////////////// - - double Cm, lik; - double x,n,phi,gamma; - FILE *metfp = fopen("metropolis.txt", "w"); - FILE *rejfp = fopen("rejection.txt", "w"); - - int samples = 100000; - - n=100; - phi=100; - gamma=100; - - double scalemax = ScaleMax(n,gamma,phi); - printf("Mx: % e\n", scalemax); - printf("FI: % e\n", 1.0 / (phi + (n-1.0)/(scalemax*scalemax))); - printf("~Ex: % e\n", 1.0+(gamma/sqrt(phi))); - printf("~Ex_dlt: % e\n", sqrt(n/phi)); - -#if 0 //defined(__APPLE__) - double starttime, endtime; - double exitloop; - - starttime = seconds(); -#endif - - double sm = ScaleMax(n,gamma,phi); - double width = sqrt(1.0 / (phi + (n-1.0)/(sm*sm))); - double xv = sm; - int skip = 3; - - for (i = 0; i < samples; ++i) - { - xv = scale_log_met3(n, gamma, phi, xv, r2, sm, width, skip); - fprintf(metfp, "%e\n", xv); - } - - -#if 0 //defined(__APPLE__) - endtime = seconds(); - exitloop = (double) (endtime - starttime) / 0.001; - - printf("\nmet time: %7.2f (ms) \n", exitloop); - fflush(NULL); -#endif - - -#if 0 //defined(__APPLE__) - starttime = seconds(); -#endif - -if (0) - for (i = 0; i < samples; ++i) - { - fprintf(rejfp, "%e\n", scale_rejection(n, gamma, phi, r2)); - } - -#if 0 //defined(__APPLE__) - endtime = seconds(); - exitloop = (double) (endtime - starttime) / 0.001; - - printf("\nrej time: %7.2f (ms) \n", exitloop); - fflush(NULL); -#endif - - fclose(metfp); - fclose(rejfp); - -// printf("1F1:\n"); -// fflush(NULL); -// printf("1F1: %e\n", gsl_sf_hyperg_1F1(0.5*(n+1.0), 1.5, 0.5*gamma*gamma/phi)); -// fflush(NULL); - - //C = CalcNormConst(n, gamma, phi); - - if (0) - { - Cm = CalcNormConstMm(n, gamma, phi); - for (i=0;i<100;++i) - { - x=i*0.1; - lik = CalcHalfNormChi(x, n, gamma, phi); - - //printf("L-[%3d]: %f % e\n", i, x, lik); - //printf("CL[%3d]: %f % e\n", i, x, C*lik); - printf("CLm[%3d]: %f % e\n", i, x, lik); - } - } - - //double integral1 = integrate_romberg_f3(CalcHalfNormChi, n, gamma, phi, 0.0, 100.0); - //printf("integral: %e\n", integral1); - - //printf("Ex: %e\n", ExpectScale(n,gamma,phi)); - - //printf("Dnz[]: % 7.4e\n", CalcDnz(-n, -gamma / sqrt(phi))); - //printf("Dnz[]: % 7.4e\n", CalcDnz(2, 2)); - //printf("C-: % .18f\n", C); - Cm = CalcNormConstMm(n, gamma, phi); - printf("Cm: % .18f\n", Cm); - -/* for (i=0;i<50;++i) */ -/* printf("Uax[-1.5, %f]: % 7.4e\n", i*0.1, CalcUax(5, i*0.1)); */ - - //printf("U(): %e\n", gsl_sf_hyperg_U(-2.0, 0.5, 1.13)); - -//////////////////////////////////////////////////////////////////////////////////////////////////// - - #include - gsl_integration_workspace *w = gsl_integration_workspace_alloc(1000); - double *params = malloc(4 * sizeof(double)); - double result, error; - double p, m1, m2, m3, m4, v, s, k, sd; - - params[0] = n; - params[1] = gamma; - params[2] = phi; - - gsl_function F; - F.function = &f; - F.params = ¶ms[0]; - - //gsl_integration_qag(&F, 0.0, 10.0, 0.0, 1e-7, 1000, GSL_INTEG_GAUSS61, w, &result, &error); - params[3] = 0.0; - gsl_integration_qagiu(&F, 0.0, 0.0, 1e-6, 1000, w, &result, &error); - printf ("result (C) = % .18e +/- % .18e\n", result, error); - printf ("result (1/C) = % .18e +/- % .18e\n", 1.0/result, error/(result*result)); - p = 1.0/result; - - params[3] = 1.0; - gsl_integration_qagiu(&F, 0.0, 0.0, 1e-6, 1000, w, &result, &error); - printf ("result (m1) = % .18f +/- % .18f\n", p*result, p*error); - m1 = p*result; - - params[3] = 2.0; - gsl_integration_qagiu(&F, 0.0, 0.0, 1e-6, 1000, w, &result, &error); - printf ("result (m2) = % .18f +/- % .18f\n", p*result, p*error); - m2 = p*result; - - params[3] = 3.0; - gsl_integration_qagiu(&F, 0.0, 0.0, 1e-6, 1000, w, &result, &error); - printf ("result (m1) = % .18f +/- % .18f\n", p*result, p*error); - m3 = p*result; - - params[3] = 4.0; - gsl_integration_qagiu(&F, 0.0, 0.0, 1e-6, 1000, w, &result, &error); - printf ("result (m2) = % .18f +/- % .18f\n", p*result, p*error); - m4 = p*result; - - v = m2 - m1*m1; - sd = sqrt(v); - s = (2.0*m1*m1*m1 - 3.0*m1*m2 + m3)/(sd*sd*sd); - k = 3.0-(-3.0*m1*m1*m1*m1 + 6.0*m1*m1*m2 - 4.0*m1*m3 + m4)/(sd*sd*sd*sd); - printf ("emp exp = % e\n", m1); - printf ("emp var = % e\n", v); - printf ("emp skw = % e\n", s); - printf ("emp kur = % e\n", k); - -//////////////////////////////////////////////////////////////////////////////////////////////////// - MatDestroy(&theta); fclose(paramfile); + fclose(maxfile); free(liks); - free(params); - gsl_integration_workspace_free(w); gsl_rng_free(r2); } diff -Nru theseus-2.0.6/GibbsMetExtras.c theseus-3.0.0/GibbsMetExtras.c --- theseus-2.0.6/GibbsMetExtras.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/GibbsMetExtras.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,3 +1,257 @@ +double +CalcHalfNormChiLik(const double x, const double n, const double gamma, const double phi) +{ + if (x < DBL_MIN) + { + return(0.0); + } + else + { + double logp = (n-1.0) * log(x) - (0.5 * phi * x * x) + (gamma * x); + return(exp(logp)); +// return(pow(x, n-1.0) * exp((-0.5 * phi * x * x) + (gamma * x))); + } +} + + +/* Calculates the normalizing constant for the scale factor PDF: + + P(x) \propto x^(n-1) e^-(phi/2 x^2 - gamma x) + + The integral for this can be found in Gradshteyn and Ryzhik, + p. 365, formula 3.462(1). +*/ +double +CalcNormConst(const double n, const double gamma, const double phi) +{ + double tmpx; + + tmpx = (pow(phi, -0.5 * n) * exp(gamma*gamma / (4.0 * phi))) * + (tgamma(n) * CalcDnz(-n, -gamma / sqrt(phi))); + +// tmpx = (pow(phi, -0.5 * n) * exp(gamma*gamma / (4.0 * phi))) * +// (tgamma(n) * CalcUab_large_a(n-0.5, -gamma / sqrt(phi))); + + return(1.0/tmpx); +} + + +static double +CalcNormConstMm(const double n, const double gamma, const double phi) +{ + double tmpx; + + tmpx = pow(2.0, 0.5*(n-3.0)) * pow(phi,-0.5*(n+1.0)) + * ( + sqrt(2.0*phi) * tgamma(0.5*n) * gsl_sf_hyperg_1F1(0.5*n, 0.5, 0.5*gamma*gamma/phi) + + 2.0 * gamma * tgamma(0.5*(n+1.0)) * gsl_sf_hyperg_1F1 (0.5*(n+1.0), 1.5, 0.5*gamma*gamma/phi) + ); + + return(1.0/tmpx); +} + + +double +CalcHalfNormChi(const double x, const double n, const double gamma, const double phi) +{ + return(CalcHalfNormChiLik(x, n, gamma, phi) * CalcNormConstMm(n, gamma, phi)); +} + + +double +ExpectScale(const double n, const double gamma, const double phi) +{ + return((n+1.0) * CalcUax(n+0.5, -gamma/sqrt(phi))/(sqrt(phi)*CalcUax(n-0.5,-gamma/sqrt(phi)))); + //return((n+1.0) * CalcDnz(-n-1.0, -gamma/sqrt(phi))/(sqrt(phi)*CalcDnz(-n,-gamma/sqrt(phi)))); +} + + + /* Newton and Raftery 1994 + Approximate Bayesian inference with the weighted likelihood bootstrap (with discussion). + Journal of the Royal Statistical Society, Series B, 56:3-48. + Equation 16, p 22 + + They suggest delta = 0.01, something "small". + Curiously, delta = 0.5 results in the average posterior log likelihood + (which is different from the average posterior likelihood) + */ + int cnt = 0; + double term, fac, delta, num, denom, oldbf, bf = 0.0; + + delta = 0.5; + term = delta * (burn-burnin) / (1.0 - delta); + do + { + ++cnt; + oldbf = bf; + num = denom = 0.0; + for (i = burnin; i < burn; ++i) + { + liksi = liks[i]; + diff = liksi - blik; + ediff = exp(diff); + fac = delta*bf + (1.0-delta)*ediff; + num += ediff / fac; + denom += 1.0 / fac; + } + + bf = (term + num) / (term * bf + denom); + //printf("Marginal likelihood2[%3d]: % 14.2f \n", cnt, log(bf) + blik); + } + while (fabs(oldbf - bf) > bf * 1e-7 && cnt < 1000); + printf(" Marginal likelihood2[%3d]: % 14.6f \n\n", cnt, log(bf) + blik); + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +double +f(double x, void *params) +{ + double *p = (double *) params; + double n = p[0]; + double gamma = p[1]; + double phi = p[2]; + double r = p[3]; + double f = pow(x,r) * CalcHalfNormChiLik(x, n, gamma, phi); + //printf("x: %e n: %e gamma: %e phi: %e prob: %e\n", x, n, gamma, phi, f); + //fflush(NULL); + return f; +} + + + + double Cm, lik; + double x,n,phi,gamma; + FILE *metfp = fopen("metropolis.txt", "w"); + FILE *rejfp = fopen("rejection.txt", "w"); + + int samples = 100000; + + n=100; + phi=100; + gamma=100; + + double scalemax = ScaleMax(n,gamma,phi); + printf("Mx: % e\n", scalemax); + printf("FI: % e\n", 1.0 / (phi + (n-1.0)/(scalemax*scalemax))); + printf("~Ex: % e\n", 1.0+(gamma/sqrt(phi))); + printf("~Ex_dlt: % e\n", sqrt(n/phi)); + + + double sm = ScaleMax(n,gamma,phi); + double width = sqrt(1.0 / (phi + (n-1.0)/(sm*sm))); + double xv = sm; + int skip = 3; + + for (i = 0; i < samples; ++i) + { + xv = scale_log_met3(n, gamma, phi, xv, r2, sm, width, skip); + fprintf(metfp, "%e\n", xv); + } + +if (0) + for (i = 0; i < samples; ++i) + { + fprintf(rejfp, "%e\n", scale_rejection(n, gamma, phi, r2)); + } + + fclose(metfp); + fclose(rejfp); + +// printf("1F1:\n"); +// fflush(NULL); +// printf("1F1: %e\n", gsl_sf_hyperg_1F1(0.5*(n+1.0), 1.5, 0.5*gamma*gamma/phi)); +// fflush(NULL); + + //C = CalcNormConst(n, gamma, phi); + + if (0) + { + Cm = CalcNormConstMm(n, gamma, phi); + for (i=0;i<100;++i) + { + x=i*0.1; + lik = CalcHalfNormChi(x, n, gamma, phi); + + //printf("L-[%3d]: %f % e\n", i, x, lik); + //printf("CL[%3d]: %f % e\n", i, x, C*lik); + printf("CLm[%3d]: %f % e\n", i, x, lik); + } + } + + //double integral1 = integrate_romberg_f3(CalcHalfNormChi, n, gamma, phi, 0.0, 100.0); + //printf("integral: %e\n", integral1); + + //printf("Ex: %e\n", ExpectScale(n,gamma,phi)); + + //printf("Dnz[]: % 7.4e\n", CalcDnz(-n, -gamma / sqrt(phi))); + //printf("Dnz[]: % 7.4e\n", CalcDnz(2, 2)); + //printf("C-: % .18f\n", C); + Cm = CalcNormConstMm(n, gamma, phi); + printf("Cm: % .18f\n", Cm); + +/* for (i=0;i<50;++i) */ +/* printf("Uax[-1.5, %f]: % 7.4e\n", i*0.1, CalcUax(5, i*0.1)); */ + + //printf("U(): %e\n", gsl_sf_hyperg_U(-2.0, 0.5, 1.13)); + +//////////////////////////////////////////////////////////////////////////////////////////////////// + + #include + gsl_integration_workspace *w = gsl_integration_workspace_alloc(1000); + double *params = malloc(4 * sizeof(double)); + double result, error; + double p, m1, m2, m3, m4, v, s, k, sd; + + params[0] = n; + params[1] = gamma; + params[2] = phi; + + gsl_function F; + F.function = &f; + F.params = ¶ms[0]; + + //gsl_integration_qag(&F, 0.0, 10.0, 0.0, 1e-7, 1000, GSL_INTEG_GAUSS61, w, &result, &error); + params[3] = 0.0; + gsl_integration_qagiu(&F, 0.0, 0.0, 1e-6, 1000, w, &result, &error); + printf ("result (C) = % .18e +/- % .18e\n", result, error); + printf ("result (1/C) = % .18e +/- % .18e\n", 1.0/result, error/(result*result)); + p = 1.0/result; + + params[3] = 1.0; + gsl_integration_qagiu(&F, 0.0, 0.0, 1e-6, 1000, w, &result, &error); + printf ("result (m1) = % .18f +/- % .18f\n", p*result, p*error); + m1 = p*result; + + params[3] = 2.0; + gsl_integration_qagiu(&F, 0.0, 0.0, 1e-6, 1000, w, &result, &error); + printf ("result (m2) = % .18f +/- % .18f\n", p*result, p*error); + m2 = p*result; + + params[3] = 3.0; + gsl_integration_qagiu(&F, 0.0, 0.0, 1e-6, 1000, w, &result, &error); + printf ("result (m1) = % .18f +/- % .18f\n", p*result, p*error); + m3 = p*result; + + params[3] = 4.0; + gsl_integration_qagiu(&F, 0.0, 0.0, 1e-6, 1000, w, &result, &error); + printf ("result (m2) = % .18f +/- % .18f\n", p*result, p*error); + m4 = p*result; + + v = m2 - m1*m1; + sd = sqrt(v); + s = (2.0*m1*m1*m1 - 3.0*m1*m2 + m3)/(sd*sd*sd); + k = 3.0-(-3.0*m1*m1*m1*m1 + 6.0*m1*m1*m2 - 4.0*m1*m3 + m4)/(sd*sd*sd*sd); + printf ("emp exp = % e\n", m1); + printf ("emp var = % e\n", v); + printf ("emp skw = % e\n", s); + printf ("emp kur = % e\n", k); + + free(params); + gsl_integration_workspace_free(w); + +//////////////////////////////////////////////////////////////////////////////////////////////////// + + static double gamma_large_dev3(const double a, gsl_rng *r2) { @@ -104,4 +358,4 @@ invgamma_dev3(const double b, const double c, gsl_rng *r2) { return(1.0 / gamma_dev3(1.0/b, c, r2)); -} \ No newline at end of file +} diff -Nru theseus-2.0.6/GibbsMet.h theseus-3.0.0/GibbsMet.h --- theseus-2.0.6/GibbsMet.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/GibbsMet.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/HierarchVars.c theseus-3.0.0/HierarchVars.c --- theseus-2.0.6/HierarchVars.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/HierarchVars.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,21 +31,121 @@ #include #include #include +#include +#include +#include #include "CovMat.h" -#include "DLTutils.h" #include "Error.h" -#include "pdbIO.h" -#include "pdbMalloc.h" #include "Cds.h" #include "PDBCds.h" #include "pdbStats.h" -#include "pdbUtils.h" -#include "Embed.h" -#include "DLTmath.h" #include "distfit.h" #include "HierarchVars.h" +#define SAFE_FUNC_CALL(f, x, yp) \ +do { \ + *yp = GSL_FN_EVAL(f,x); \ + if (!gsl_finite(*yp)) \ + GSL_ERROR("computed function value is infinite or NaN", GSL_EBADFUNC); \ +} while(0) + + +static int +compute_f_values (gsl_function *f, double x_minimum, double *f_minimum, + double x_lower, double *f_lower, + double x_upper, double *f_upper) +{ + SAFE_FUNC_CALL(f, x_lower, f_lower); + SAFE_FUNC_CALL(f, x_upper, f_upper); + SAFE_FUNC_CALL(f, x_minimum, f_minimum); + + return GSL_SUCCESS; +} + + +static int +dlt_min_fminimizer_set_with_values (gsl_min_fminimizer *s, gsl_function *f, + double x_minimum, double f_minimum, + double x_lower, double f_lower, + double x_upper, double f_upper) +{ + s->function = f; + s->x_minimum = x_minimum; + s->x_lower = x_lower; + s->x_upper = x_upper; + + if (x_lower > x_upper) + { + GSL_ERROR("invalid interval (lower > upper)", GSL_EINVAL); + } + + if (x_minimum >= x_upper || x_minimum <= x_lower) + { + GSL_ERROR("x_minimum must lie inside interval (lower < x < upper)", GSL_EINVAL); + } + + s->f_lower = f_lower; + s->f_upper = f_upper; + s->f_minimum = f_minimum; + + if (f_minimum >= f_lower && f_minimum >= f_upper) // DLT + { + GSL_ERROR("guess is worse than both endpoints", GSL_EINVAL); // DLT + } // DLT + + return(s->type->set)(s->state, s->function, + x_minimum, f_minimum, + x_lower, f_lower, + x_upper, f_upper); +} + + +static int +dlt_min_fminimizer_set(gsl_min_fminimizer *s, + gsl_function *f, + double x_minimum, double x_lower, double x_upper) +{ + int status ; + + double f_minimum, f_lower, f_upper; + + status = compute_f_values(f, x_minimum, &f_minimum, + x_lower, &f_lower, + x_upper, &f_upper); + + if (status != GSL_SUCCESS) + { + return status ; + } + + status = dlt_min_fminimizer_set_with_values(s, f, x_minimum, f_minimum, + x_lower, f_lower, + x_upper, f_upper); + return status; +} + + +static int +findlargest(double *vec, const int len) +{ + int i, bgi = 0; + double bg; + + bg = DBL_MIN; + for (i = 0; i < len; ++i) + { + if (vec[i] > bg) + { + bg = vec[i]; + bgi = i; + } + } + + return(bgi); +} + + static int findsmallest(double *vec, const int len) { @@ -99,3452 +199,521 @@ } -double -HarmonicAveBayes(const double *data, const int len, const double pr) +static void +InvGammaAdjustCov(CdsArray *cdsA, const double b, const double c) { - int i; - double invdata; - - invdata = 0.0; - for (i = 0; i < len; ++i) - invdata += 1.0 / data[i]; + int i, j; + const int vlen = cdsA->vlen; + const double nd = 3.0 * cdsA->cnum; + const double fact = nd / (nd + 2.0 * (1.0 + c)); - invdata += 1.0 / pr; + for (i = 0; i < vlen; ++i) + cdsA->CovMat[i][i] += 2.0 * b / nd; - return((len +2.0 ) / invdata); + for (i = 0; i < vlen; ++i) + for (j = 0; j < vlen; ++j) + cdsA->CovMat[i][j] *= fact; } -double -LogarithmicAve(const double *data, const int len) +static void +InvGammaAdjustVar(double *newvar, const int vlen, const int cnum, + double *var, const double b, const double c) { - int i; - double logdata; - - logdata = 0.0; - for (i = 0; i < len; ++i) - logdata += log(data[i]); + int i; - return(logdata / len); + for (i = 0; i < vlen; ++i) + newvar[i] = (3.0*cnum*var[i] + 2.0*b) / (3.0*cnum + 2.0 + 2.0*c); + // this is the conditional maximization algorithm + //newvar[i] = (3.0*cnum*var[i] + 2.0*b) / (3.0*cnum + 2.0*c); + // this is required for an EM algorithm of the variances + //printf("%3d %26.6f\n", i, var[i]); } -void -InvgaussFitVars(CdsArray *cdsA, double *mean, double *lambda) +static void +InvGammaAdjustVar3N(double *newvar, const int vlen, const int cnum, + double *var, const double phi, const double nu) { - double sum_mean, sum_lambda, invmean, var; - const double *data = (const double *) cdsA->var; - const int num = cdsA->vlen; - const double numd = (double) num; - int i; + int i; + + for (i = 0; i < vlen; ++i) + newvar[i] = (var[i] + phi) / (3.0*cnum + nu); + // this is required for an EM algorithm of the variances + //printf("%3d %26.6f\n", i, var[i]); +} - sum_mean = 0.0; - for (i = 0; i < num; ++i) - sum_mean += data[i]; - - *mean = sum_mean / numd; /* MLE and MME */ - /* invmean = 1.0 / *mean; */ - invmean = numd / *mean; - sum_lambda = var = 0.0; - for (i = 0; i < num; ++i) - { - sum_lambda += (1.0 / data[i] - invmean); - var += mysquare(data[i] - *mean); - } - var /= numd; - *lambda = numd / sum_lambda; /* MLE */ +void +InvGammaAdjustEvals(double *newevals, const int vlen, const int cnum, + double *evals, const double phi, const double nu) +{ + int i; -/* sum_lambda = var = 0.0; */ -/* for (i = 0; i < num; ++i) */ -/* { */ -/* sum_lambda += (mysquare(data[i] - *mean) / data[i]); */ -/* var += mysquare(data[i] - *mean); */ -/* } */ -/* var /= numd; */ -/* *lambda = numd * *mean * *mean / sum_lambda; */ - - printf("MLE: %10.5f\n", *lambda); - *lambda = *mean * *mean * *mean / var; /* MME */ - printf(" MME: %10.5f", *lambda); + for (i = 0; i < vlen; ++i) + newevals[i] = (3.0*cnum*evals[i] + phi) / (3.0*cnum + nu); + // this is for expected inverse + // newevals[i] = (3.0*cnum*evals[i] + phi) / (3.0*cnum + nu - vlen - 1.0); + // this is for expected covariance matrix (not inverse) - cdsA->stats->hierarch_p1 = *mean; - cdsA->stats->hierarch_p2 = *lambda; + //printf("%3d %26.6f\n", i, var[i]); } -void -InvgaussAdjustVars(CdsArray *cdsA, - const double mean, const double lambda) +static void +InvGammaAdjustVarNu(Cds **cds, double *newvar, const int vlen, const int cnum, + double *var, const double b, const double c) { - int i; - double *variance = cdsA->var; - const int vlen = cdsA->vlen; + int i, j; + double df; for (i = 0; i < vlen; ++i) { - /* printf("\n %10.5f", variance[i]); */ - variance[i] = - (0.5 * mean / lambda) * - ( - sqrt( - ((9.0 * mean * mean) * mysquare(vlen + 1) + - (4.0 * lambda * lambda)) + - (12.0 * vlen * lambda) * variance[i] - ) - - 3.0 * mean * (1 + vlen) - ); - /* printf(" %10.5f", variance[i]); */ + df = 0; + for (j = 0; j < cnum; ++j) + df += cds[j]->nu[i]; + + df *= 3; + + newvar[i] = (df*var[i] + 2.0*b) / (df + 2.0*(1.0 + c)); } } -/* fit a lognormal distribution by maximum likelihood */ -void -LognormalFitVars(CdsArray *cdsA, double *zeta, double *sigma) +double +LogPrPhi(double phi, void *params) { - const double *data = (const double *) cdsA->var; - const int num = cdsA->vlen; - double ave, var; + CdsArray *cdsA = (CdsArray *) params; + const int vlen = cdsA->vlen, cnum = cdsA->cnum; + double nu = 1.0; + double *var3N = cdsA->samplevar3N; + double fact = 0.5*(3.0*cnum + nu); + double term; int i; - ave = 0.0; - for (i = 0; i < num; ++i) - { - if (data[i] < DBL_EPSILON) - continue; - ave += log(data[i]); - } - ave /= (double) num; + if (phi <= 0.0) + return(DBL_MAX); - var = 0.0; - for (i = 0; i < num; ++i) - { - if (data[i] < DBL_EPSILON) - var += mysquare(ave); - else - var += mysquare(log(data[i]) - ave); - } - var /= (double) num - 1; + term = 0.0; + for (i = 0; i < vlen; ++i) + term += log(phi + var3N[i]); - *zeta = ave; - *sigma = sqrt(var); + //term = 0.5*vlen*log(phi) - fact*term; + term = 0.5*vlen*nu*log(phi) - fact*term; + //term += log(phi) - phi/1; // prior on phi - cdsA->stats->hierarch_p1 = *zeta; - cdsA->stats->hierarch_p2 = *sigma; + if (!isfinite(term)) + return(DBL_MAX); + else + return(-term); } -static void -evallognormal(double var, const double zeta, const double sigma, - const double varML, const int num, double *fx, double *dfx) +static double +LogPrPhiCov(double phi, void *params) { - *fx = (1.0 + zeta/sigma + 1.5*num + log(var)/sigma) * var - (1.5 * num * varML); - *dfx = 1.0 + (zeta + 1.0)/sigma + 1.5 * num + log(var)/sigma; -} + CdsArray *cdsA = (CdsArray *) params; + const int vlen = cdsA->vlen, cnum = cdsA->cnum; + double nu = algo->covnu; + double *evals = cdsA->samplevar3N; + double fact = 0.5*(3.0*cnum + nu); + double term; + int i; + if (phi <= 0.0) + return(DBL_MAX); -void -LognormalAdjustVars(CdsArray *cdsA, double zeta, double sigma) -{ - double var, varML, fx, dfx; - int i, j; - double tol = cdsA->algo->precision; - double *variance = cdsA->var; - const int num = cdsA->vlen; + term = 0.0; + for (i = 0; i < vlen; ++i) + term += log(phi + 3.0 * cnum * evals[i]); - for (i = 0; i < num; ++i) - { - /* Use Newton-Raphson to find ML estimate of lognormally distributed - variance. + //term = 0.5*vlen*log(phi) - fact*term; + term = 0.5*vlen*nu*log(phi) - fact*term; + //term += log(phi); // - phi/1; // prior on phi + + if (!isfinite(term)) + return(DBL_MAX); + else + return(-term); +} - must find root of: - F1 = (1 + zeta/sigma + 1.5 num + ln(x)/sigma)x -1.5 num variance_ML = 0 +static double +LogPrPhi_f(double phi, void *params) +{ + CdsArray *cdsA = (CdsArray *) params; + const int vlen = cdsA->vlen, cnum = cdsA->cnum; + double nu = 1.0; + double *var3N = cdsA->samplevar3N; + double fact = (3.0*cnum + nu); + double term; + int i; - where the first derivative with repect to the lognormal variance - estimate x (dF1/dx) is: + if (phi <= 0.0) + phi = DBL_EPSILON; - F1' = 1 + (zeta + 1)/sigma + 1.5 num + ln(x)/sigma = 0 - */ - var = varML = variance[i]; /* initial guess */ - printf(" %10.5f\n", variance[i]); + term = 0.0; + for (i = 0; i < vlen; ++i) + { + term += 1.0 / (phi + var3N[i]); + } - for (j = 0; j < 200; ++j) - { - evallognormal(var, zeta, sigma, varML, num, &fx, &dfx); + return(vlen/phi - fact*term); +} - if (fabs(fx) < tol) - break; /* success */ - var -= (fx / dfx); /* Newton-Raphson correction */ - } +static double +LogPrPhi_df(double phi, void *params) +{ + CdsArray *cdsA = (CdsArray *) params; + const int vlen = cdsA->vlen, cnum = cdsA->cnum; + double nu = 1.0; + double *var3N = cdsA->samplevar3N; + double fact = (3.0*cnum + nu); + double term2, tmp; + int i; - if (j != 200) - variance[i] = var; + if (phi <= 0.0) + phi = DBL_EPSILON; - printf(" %10.5f", variance[i]); + term2 = 0.0; + for (i = 0; i < vlen; ++i) + { + tmp = 1.0 / (phi + var3N[i]); + term2 += tmp*tmp; } + + return(-vlen/(phi*phi) + fact*term2); } -void -InvGammaFitVars(CdsArray *cdsA, int iterate) +static void +LogPrPhi_fdf(double phi, void *params, double *y, double *dy) { - double *newvar = NULL; - double *variance = cdsA->var; - double **evecs = NULL; - double precision = cdsA->algo->precision; + CdsArray *cdsA = (CdsArray *) params; const int vlen = cdsA->vlen, cnum = cdsA->cnum; - double nd, oldb, oldc, b, c, chi2, logL; - int i, j, count, newlen, df; + double nu = 1.0; + double *var3N = cdsA->samplevar3N; + double fact = (3.0*cnum + nu); + double term, term2, tmp; + int i; - newvar = malloc(vlen * sizeof(double)); - nd = 3.0 * cnum; - oldb = oldc = DBL_MAX; + if (phi <= 0.0) + phi = DBL_EPSILON; - if (cdsA->algo->covweight != 0) + term = term2 = 0.0; + for (i = 0; i < vlen; ++i) { - evecs = cdsA->tmpmatKK2; + tmp = 1.0 / (phi + var3N[i]); + term += tmp; + term2 += tmp*tmp; + } - if (vlen - 1 < nd - 3) - newlen = vlen - 1; - else - newlen = nd - 3; + *y = vlen/phi - fact*term; + *dy = -vlen/(phi*phi) + fact*term2; +} - eigenvalsym((const double **) cdsA->CovMat, variance, evecs, vlen); - RevVecIp(variance, vlen); +/* Fit of the variances/eigenvalues to an inverse gamma distribution with + a fixed c shape parameter. + c > 1 -> guarantees that the distribution has a finite mean + c > 2 -> finite variance + c > 3 -> finite skewness + c > 4 -> finite kurtosis +*/ +void +InvGammaFitMarginalGSLBrent(CdsArray *cdsA) +{ + const int vlen = cdsA->vlen, cnum = cdsA->cnum; + int status, i; + int iter, max_iter = 50; + const gsl_min_fminimizer_type *T = NULL; + gsl_min_fminimizer *s = NULL; + double oldphi = 2.0*stats->hierarch_p1; + double a, b, phi; + double chi2 = 0.0, logL; + double nu = 1.0; + gsl_function F; + double absprec = algo->precision; + double relprec = FLT_EPSILON/10; + + T = gsl_min_fminimizer_brent; +// T = gsl_min_fminimizer_goldensection; +// T = gsl_min_fminimizer_quad_golden; + s = gsl_min_fminimizer_alloc(T); + + phi = oldphi; + +// printf ("using %s method\n", gsl_min_fminimizer_name(s)); +// printf ("%5s [%9s, %9s] %9s %10s %9s\n", +// "iter", "lower", "upper", "phi", "err", "err(est)"); +// printf ("%5d [%.7f, %.7f] %.7f %.7f\n", iter, a, b, phi, b - a); + + if (algo->varweight > 0) + { + double *var3N = cdsA->samplevar3N; + + nu = 1.0; + + CalcVar(cdsA); + +// phi = 0.0; +// for (i = 0; i < vlen; ++i) +// phi += 1.0 / (var3N[i]/(3.0*cnum) + oldphi); +// phi *= vlen; - for (i = newlen; i < vlen; ++i) - variance[i] = 0.0; + phi = 0.0; + for (i = 0; i < vlen; ++i) + phi += var3N[i]; + phi /= 3.0*cnum*vlen; - b = cdsA->stats->hierarch_p1; - c = cdsA->stats->hierarch_p2; + //phi = oldphi; - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); + a = DBL_MIN; + //b = DBL_MAX; + // phi should never, ever be larger than the largest variance + // in fact, it probably shouldn't be larger than the average variance (its approx the harm ave) + b = var3N[findlargest(var3N, vlen)]/(3.0*cnum) + 1.0; - count = 0; - do - { - ++count; - oldb = b; - oldc = c; +// printf("\na: %g phi: %g b: %g \n\n", +// LogPrPhi(a, (void *) cdsA), +// LogPrPhi(phi, (void *) cdsA), +// LogPrPhi(b, (void *) cdsA)); +// fflush(NULL); - chi2 = invgamma_fit(newvar, newlen, &b, &c, &logL); + F.function = &LogPrPhi; + F.params = (void *) cdsA; + dlt_min_fminimizer_set(s, &F, phi, a, b); // DLTGSL - for (i = 0; i < newlen; ++i) - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); + iter = 0; + do + { + iter++; + status = gsl_min_fminimizer_iterate(s); - if (cdsA->algo->verbose != 0) - { - printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f\n", - count, b, c, b / (c+1.0), logL, chi2); - fflush(NULL); - } + phi = gsl_min_fminimizer_x_minimum(s); + a = gsl_min_fminimizer_x_lower(s); + b = gsl_min_fminimizer_x_upper(s); - if (count > 100) - { - printf("\n WARNING01: Failed to converge in InvGammaFitVars(), round %d\n ", - cdsA->algo->rounds); - fflush(NULL); - break; - } + status = gsl_min_test_interval(a, b, absprec, relprec); - if (iterate == 0 || cdsA->algo->abort == 1) - break; +// if (status == GSL_SUCCESS) +// printf ("Converged:\n"); +// // +// printf ("%5d [%.7f, %.7f] %.7f %.7f\n", iter, a, b, phi, b - a); } - while(fabs(oldb - b) > fabs(b*precision) && - fabs(oldc - c) > fabs(c*precision)); + while(status == GSL_CONTINUE && iter < max_iter); - InvGammaAdjustCov(cdsA, b, c); + InvGammaAdjustVar3N(cdsA->var, vlen, cnum, var3N, phi, nu); - for (i = 0; i < vlen; ++i) - variance[i] = cdsA->CovMat[i][i]; + chi2 = chi_sqr_adapt(cdsA->var, vlen, 0, &logL, 0.5*phi, 0.5*nu, + invgamma_pdf, invgamma_lnpdf, invgamma_int); } - else + else if (algo->covweight > 0) { - b = cdsA->stats->hierarch_p1; - c = cdsA->stats->hierarch_p2; + double *evals = cdsA->evals; + double *invevals = cdsA->tmpvecK; + double **evecs = cdsA->tmpmatKK2; + double *tmpevals = cdsA->samplevar3N; + int i, j; + nu = algo->covnu; + /* must calc evals before minimizer is set */ + /* EigenGSL 0 evals are small to large */ + EigenGSL((const double **) cdsA->CovMat, vlen, tmpevals, evecs, 0); + //eigensym((const double **) cdsA->CovMat, tmpevals, evecs, vlen); + + phi = 0.0; for (i = 0; i < vlen; ++i) - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); + phi += tmpevals[i]; + phi /= 3.0*cnum*vlen; + + a = DBL_MIN; + //b = DBL_MAX; + b = tmpevals[vlen-1]+1.0; // should never be larger than the largest eval + +// printf("\na: %g phi: %g b: %g \n\n", +// LogPrPhiCov(a, (void *) cdsA), +// LogPrPhiCov(phi, (void *) cdsA), +// LogPrPhiCov(b, (void *) cdsA)); +// fflush(NULL); + + iter = 0; + +// printf ("%5d [%.7f, %.7f] %.7f %.7f\n", iter, a, b, phi, b - a); +// fflush(NULL); + + F.function = &LogPrPhiCov; + F.params = (void *) cdsA; + dlt_min_fminimizer_set(s, &F, phi, a, b); // DLTGSL - count = 0; do { - oldb = b; - oldc = c; + iter++; + status = gsl_min_fminimizer_iterate(s); - chi2 = invgamma_fit(newvar, vlen, &b, &c, &logL); + phi = gsl_min_fminimizer_x_minimum(s); + a = gsl_min_fminimizer_x_lower(s); + b = gsl_min_fminimizer_x_upper(s); - if (cdsA->algo->alignment == 1) - { - for (i = 0; i < vlen; ++i) - { - df = 0; - for (j = 0; j < cnum; ++j) - df += cdsA->cds[j]->o[i]; + status = gsl_min_test_interval(a, b, absprec, relprec); - df *= 3; +// if (status == GSL_SUCCESS) +// printf ("Converged:\n"); +// +// printf ("%5d [%.7f, %.7f] %.7f %.7f\n", iter, a, b, phi, b - a); +// printf("a: %g phi: %g b: %g\n", +// LogPrPhiCov(a, (void *) cdsA), +// LogPrPhiCov(phi, (void *) cdsA), +// LogPrPhiCov(b, (void *) cdsA)); + fflush(NULL); + } + while(status == GSL_CONTINUE && iter < max_iter); - newvar[i] = (df*variance[i] + 2.0*b) / (df + 2.0*(1.0 + c)); - } - } - else - { - for (i = 0; i < vlen; ++i) - { - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - } +// printf ("%5d [%.7f, %.7f] %.7f %.7f\n", iter, a, b, phi, b - a); +// fflush(NULL); - } + //phi += FLT_MIN; - if (cdsA->algo->verbose != 0) - { - printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f\n", - count++, b, c, b / (c+1.0), logL, chi2); - fflush(NULL); - } + InvGammaAdjustEvals(evals, vlen, cnum, tmpevals, phi, nu); + EigenReconSym(cdsA->CovMat, (const double **) evecs, evals, vlen); - if (iterate == 0 || cdsA->algo->abort == 1) - break; - } - while(fabs(oldb - b) > fabs(b*precision) && - fabs(oldc - c) > fabs(c*precision)); + for (i = 0; i < vlen; ++i) + invevals[i] = 1.0 / evals[i]; + + EigenReconSym(cdsA->WtMat, (const double **) evecs, invevals, vlen); - if (cdsA->algo->verbose != 0) + if (algo->rounds < 3) { - printf(">>>>> Final: %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f\n", - count++, b, c, b / (c+1.0), logL, chi2); - fflush(NULL); + for (i = 0; i < vlen; ++i) + for (j = 0; j < i; ++j) + cdsA->WtMat[i][j] = cdsA->WtMat[j][i] = 0.0; } - memcpy(variance, newvar, vlen * sizeof(double)); + for (i = 0; i < vlen; ++i) + cdsA->var[i] = cdsA->CovMat[i][i]; + + chi2 = chi_sqr_adapt(evals, vlen, 0, &logL, 0.5*phi, 0.5*nu, + invgamma_pdf, invgamma_lnpdf, invgamma_int); } - cdsA->stats->hierarch_p1 = b; - cdsA->stats->hierarch_p2 = c; - cdsA->stats->hierarch_chi2 = chi2; + gsl_min_fminimizer_free(s); - free(newvar); + stats->hierarch_p1 = 0.5*phi; + stats->hierarch_p2 = 0.5*nu; + stats->hierarch_chi2 = chi2; } -static void -InvGammaAdjustVar(double *newvar, const int vlen, const int cnum, - double *var, const double b, const double c) +/* Finging the root doesn't seem to work */ +#include +void +InvGammaFitMarginalGSLRoot(CdsArray *cdsA) { - int i; + const int vlen = cdsA->vlen, cnum = cdsA->cnum; + int status, i; + int iter, max_iter = 100; + const gsl_root_fdfsolver_type *T = NULL; + gsl_root_fdfsolver *s = NULL; + double oldphi = 2.0*stats->hierarch_p1; + double phi, phi0; + double chi2 = 0.0, logL; + double nu = 1.0; + gsl_function_fdf FDF; + double prec = FLT_MIN; - for (i = 0; i < vlen; ++i) - //newvar[i] = (3.0*cnum*var[i] + 2.0*b) / (3.0*cnum + 2.0 + 2.0*c); // this is the conditional maximization algorithm - newvar[i] = (3.0*cnum*var[i] + 2.0*b) / (3.0*cnum + 2.0*c); // this is required for an EM algorithm of the variances -} + phi = oldphi; + if (algo->varweight > 0) + { + double *var3N = cdsA->samplevar3N; -static void -InvGammaAdjustVarNoN(double *newvar, const int vlen, const int cnum, - double *var, const double b, const double c) -{ - int i; + nu = 1.0; - for (i = 0; i < vlen; ++i) - newvar[i] = (3.0*var[i] + 2.0*b) / (3.0 + 2.0*(1.0 + c)); -} + CalcVar(cdsA); + phi = 0.0; + for (i = 0; i < vlen; ++i) + phi += var3N[i]; -static void -InvGammaAdjustVarOcc(Cds **cds, double *newvar, const int vlen, const int cnum, - double *var, const double b, const double c) -{ - int i, j; - double df; + phi /= 3.0*cnum*vlen; + phi0 = phi; - for (i = 0; i < vlen; ++i) - { - df = 0; - for (j = 0; j < cnum; ++j) - df += cds[j]->o[i]; + FDF.f = &LogPrPhi_f; + FDF.df = &LogPrPhi_df; + FDF.fdf = &LogPrPhi_fdf; + FDF.params = (void *) cdsA; - df *= 3; + T = gsl_root_fdfsolver_newton; + s = gsl_root_fdfsolver_alloc(T); - newvar[i] = (df*var[i] + 2.0*b) / (df + 2.0*(1.0 + c)); + gsl_root_fdfsolver_set(s, &FDF, phi); + + iter = 0; +// printf ("%5d [%.7f, %.7f] %.7f\n", iter, phi, phi0, phi - phi0); +// fflush(NULL); + do + { + iter++; + status = gsl_root_fdfsolver_iterate(s); + phi0 = phi; + phi = gsl_root_fdfsolver_root (s); + status = gsl_root_test_delta (phi, phi0, 0, prec); + +// if (status == GSL_SUCCESS) +// printf ("Converged:\n"); +// +// printf ("%5d [%.7f, %.7f] %.7f\n", iter, phi, phi0, phi - phi0); +// fflush(NULL); + } + while(status == GSL_CONTINUE && iter < max_iter); + + InvGammaAdjustVar3N(cdsA->var, vlen, cnum, var3N, phi, nu); + + chi2 = chi_sqr_adapt(cdsA->var, vlen, 0, &logL, 0.5*phi, 0.5*nu, + invgamma_pdf, invgamma_lnpdf, invgamma_int); } + + gsl_root_fdfsolver_free(s); + + stats->hierarch_p1 = 0.5*phi; + stats->hierarch_p2 = 0.5*nu; + stats->hierarch_chi2 = chi2; } +/* DLT 2008-03-28 new */ +/* Assumes a known shape param c, real ML-EM fit. + Estimates smallest 4 eigenvalues as expected (inverse) values given other larger evals. + Uses expectation of inverse variances. +*/ void -ConjBayesAdjustVar(double *newvar, const double *var, const int vlen, const int cnum, const double phi) -{ - int i; - - for (i = 0; i < vlen; ++i) - { - printf("\n%d -- bvar:%f ", i, var[i]); - newvar[i] = (cnum + 1.0) / (1.0 / phi + cnum / var[i]); - //newvar[i] = 2.0 / (1.0 / phi + 1.0 / var[i]); - printf("avar:%f", newvar[i]); - printf("\nphi:%f\n", phi); - } - fflush(NULL); -} - - -/* Adjustment assuming an inverse wishart prior, empirical bayes for the variance - phi * I, a diagonal homoskedastic covariance matrix. Three degrees of freedom, - one for each dimension. -*/ -void -WishartAdjustCov(double *newvar, const double *var, const int vlen, const int cnum, const double phi) -{ - int i; - double nd = 3.0 * cnum; - double df = 3.0; - - for (i = 0; i < vlen; ++i) - { - printf("\n%d -- bvar:%f ", i, var[i]); - newvar[i] = (df * phi + nd * var[i]) / (df + nd + vlen + 1); - printf("avar:%f", newvar[i]); - printf("\nphi:%f\n", phi); - } - fflush(NULL); -} - - -/* When assuming a diagonal inverse wishart matrix, the pdf reduces to the - product of scale inverse chi-squares (no need for vlen in denom). */ -void -WishartAdjustVar(double *newvar, const double *var, const int vlen, const int cnum, const double phi) -{ - int i; - double nd = 3.0 * cnum; -// double df = 3.0; - - for (i = 0; i < vlen; ++i) - { - printf("\n%d -- bvar:%f ", i, var[i]); - //newvar[i] = (df * phi + nd * var[i]) / (df + nd + 1); - newvar[i] = (phi + nd * var[i]) / (nd - 1.0); - //newvar[i] = (3.0*phi + nd*var[i]) / (nd - 3.0); - printf("avar:%f", newvar[i]); - printf("\nphi:%f\n", phi); - } - fflush(NULL); -} - - -/* Assumes that each structure has its own Wishart prior, proportional to the identity, - and that all the priors are equal */ -void -WishartAdjustVar2(double *newvar, const double *var, const int vlen, const int cnum, const double phi) -{ - int i; -// double nd = 3.0 * cnum; -// double df = 3.0; - - for (i = 0; i < vlen; ++i) - { - printf("\n%d -- bvar:%f ", i, var[i]); - //newvar[i] = (df * phi + nd * var[i]) / (df + nd + 1); - newvar[i] = (3.0*phi + var[i]) / 2.0; - printf("avar:%f", newvar[i]); - printf("\nphi:%f\n", phi); - } - fflush(NULL); -} - - -void -WishartFitVar(CdsArray *cdsA, int iterate) -{ - double *newvar = NULL; - double *var = cdsA->var; - double precision = cdsA->algo->precision; - const int vlen = cdsA->vlen, cnum = cdsA->cnum; - double phi = 0.0, oldphi; - int i, count; - - newvar = malloc(vlen * sizeof(double)); - - memcpy(newvar, var, vlen * sizeof(double)); - - count = 0; - do - { - oldphi = phi; - - phi = 0.0; - for (i = 0; i < vlen; ++i) - phi += 1.0 / newvar[i]; - - //phi = vlen / phi; - phi = (vlen - 2.0) / phi; - //phi = 1.0; - - WishartAdjustVar(newvar, var, vlen, cnum, phi); - printf("\n count:%d oldphi:%f phi:%f", count, oldphi, phi); - - count++; - - if (iterate == 0 || cdsA->algo->abort == 1) - break; - } - while(fabs(oldphi - phi) > fabs(phi*precision)); - - memcpy(var, newvar, vlen * sizeof(double)); - - free(newvar); -} - - -void -WishartFitVar2(CdsArray *cdsA, int iterate) -{ - double *newvar = NULL; - double *var = cdsA->var; - double precision = cdsA->algo->precision; - const int vlen = cdsA->vlen, cnum = cdsA->cnum; - double phi = 0.0, oldphi; - int i, count; - - newvar = malloc(vlen * sizeof(double)); - - memcpy(newvar, var, vlen * sizeof(double)); - - count = 0; - do - { - oldphi = phi; - - phi = 0.0; - for (i = 0; i < vlen; ++i) - phi += 1.0 / newvar[i]; - - phi = (3.0*vlen +6.0*vlen*cnum)/ phi; - //phi = 1.0; - - WishartAdjustVar2(newvar, var, vlen, cnum, phi); - printf("\n count:%d oldphi:%f phi:%f", count, oldphi, phi); - - count++; - - if (iterate == 0 || cdsA->algo->abort == 1) - break; - } - while(fabs(oldphi - phi) > fabs(phi*precision)); - - memcpy(var, newvar, vlen * sizeof(double)); - - free(newvar); -} - - -void -InvGammaFitEvals(CdsArray *cdsA, int iterate) -{ - double *newvar = NULL; - double *variance = cdsA->var; - double **evecs = NULL; - double precision = cdsA->algo->precision; - const int vlen = cdsA->vlen, cnum = cdsA->cnum; - double nd, oldb, oldc, b, c, chi2 = DBL_MAX, logL, harmave, mode; - int i, count, newlen; - - newvar = malloc(vlen * sizeof(double)); - nd = 3.0 * cnum; - oldb = oldc = DBL_MAX; - - if (cdsA->algo->varweight != 0) - { - b = cdsA->stats->hierarch_p1; - c = cdsA->stats->hierarch_p2; - newlen = vlen - 3; - - memcpy(newvar, variance, vlen * sizeof(double)); - - count = 0; - do - { - oldb = b; - oldc = c; - - qsort(newvar, vlen, sizeof(double), dblcmp_rev); - chi2 = invgamma_fit(newvar, newlen, &b, &c, &logL); - - if (2*c + 2 > 1000 * nd) - { - harmave = HarmonicAve(newvar + vlen - newlen, newlen); - - for (i = 0; i < vlen; ++i) - variance[i] = harmave; - - return; - } - - if (cdsA->algo->alignment == 1) - InvGammaAdjustVarOcc(cdsA->cds, newvar, vlen, cnum, variance, b, c); - else - InvGammaAdjustVar(newvar, vlen, cnum, variance, b, c); - - /* newvar[findmin(variance, vlen)] = 2.0*b / (nd + 2.0*(1.0 + c)); */ - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", - count, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - - count++; - - if (iterate == 0 || cdsA->algo->abort == 1) - break; - } - while(fabs(oldb - b) > fabs(b*precision) && - fabs(oldc - c) > fabs(c*precision)); - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> Final: %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", - count, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - - if (cdsA->algo->alignment == 1) - InvGammaAdjustVarOcc(cdsA->cds, variance, vlen, cnum, variance, b, c); - else - InvGammaAdjustVar(variance, vlen, cnum, variance, b, c); - } - else if (cdsA->algo->covweight != 0) - { - if (vlen - 3 < nd - 6) - newlen = vlen - 3; - else - newlen = nd - 6; - - evecs = cdsA->tmpmatKK2; - - eigenvalsym((const double **) cdsA->CovMat, variance, evecs, vlen); - /* eigensym((const double **) cdsA->CovMat, variance, evecs, vlen); */ - - /* RevVecIp(variance, vlen); */ - - for (i = 0; i < vlen - newlen; ++i) - variance[i] = 0.0; - - b = cdsA->stats->hierarch_p1; - c = cdsA->stats->hierarch_p2; -/* b = c = 0.0; */ - - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - - count = 0; - do - { - oldb = b; - oldc = c; - - chi2 = invgamma_fit(newvar + vlen - newlen, newlen, &b, &c, &logL); - - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - - /* the mode of an inv gamma dist */ - mode = b / (c+1.0); - -/* for (i = 0; i < vlen - newlen; ++i) */ -/* newvar[i] = mode; */ -/* for (i = 0; i < newlen; ++i) */ -/* printf("\n%3d %e", i, newvar[i]); */ - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", - count, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - - if (count > 100) - { - printf("\n WARNING01: Failed to converge in InvGammaFitVars(), round %d\n", - cdsA->algo->rounds); - fflush(NULL); - break; - } - - count++; - - if (iterate == 0 || cdsA->algo->abort == 1) - break; - } - while(fabs(oldb - b) > fabs(b*precision) && - fabs(oldc - c) > fabs(c*precision)); - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> Final: %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", - count, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - - InvGammaAdjustCov(cdsA, b, c); -/* printf("\n\n count: %d", count); */ -/* for (i = 0; i < vlen; ++i) */ -/* printf("\n%3d %8.3e %8.3e", i, newvar[i], mode); */ -/* EigenReconSym(cdsA->CovMat, (const double **) evecs, (const double *) newvar, vlen); */ - - for (i = 0; i < vlen; ++i) - variance[i] = cdsA->CovMat[i][i]; - -/* printf("\n\n count: %d harm ave: %8.3e log ave: %8.3e", */ -/* count, HarmonicAve(variance, vlen), exp(LogarithmicAve(variance, vlen))); */ - } - - cdsA->stats->hierarch_p1 = b; - cdsA->stats->hierarch_p2 = c; - cdsA->stats->hierarch_chi2 = chi2; - - free(newvar); -} - - -void -InvGammaFitEvalsNoN(CdsArray *cdsA, int iterate) -{ - double *newvar = NULL; - double *variance = cdsA->var; - double **evecs = NULL; - double precision = cdsA->algo->precision; - const int vlen = cdsA->vlen, cnum = cdsA->cnum; - double nd, oldb, oldc, b, c, chi2 = DBL_MAX, logL, harmave, mode; - int i, count, newlen; - - newvar = malloc(vlen * sizeof(double)); - nd = 3.0 * cnum; - oldb = oldc = DBL_MAX; - - if (cdsA->algo->varweight != 0) - { - b = cdsA->stats->hierarch_p1; - c = cdsA->stats->hierarch_p2; - newlen = vlen - 3; - - memcpy(newvar, variance, vlen * sizeof(double)); - - count = 0; - do - { - oldb = b; - oldc = c; - - qsort(newvar, vlen, sizeof(double), dblcmp_rev); - chi2 = invgamma_fit(newvar, newlen, &b, &c, &logL); - - if (2*c + 2 > 1000 * nd) - { - harmave = HarmonicAve(newvar + vlen - newlen, newlen); - - for (i = 0; i < vlen; ++i) - variance[i] = harmave; - - return; - } - - if (cdsA->algo->alignment == 1) - InvGammaAdjustVarOcc(cdsA->cds, newvar, vlen, cnum, variance, b, c); - else - InvGammaAdjustVarNoN(newvar, vlen, cnum, variance, b, c); - - /* newvar[findmin(variance, vlen)] = 2.0*b / (nd + 2.0*(1.0 + c)); */ - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", - count, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - - count++; - - if (iterate == 0 || cdsA->algo->abort == 1) - break; - } - while(fabs(oldb - b) > fabs(b*precision) && - fabs(oldc - c) > fabs(c*precision)); - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> Final: %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", - count, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - - if (cdsA->algo->alignment == 1) - InvGammaAdjustVarOcc(cdsA->cds, variance, vlen, cnum, variance, b, c); - else - InvGammaAdjustVar(variance, vlen, cnum, variance, b, c); - } - else if (cdsA->algo->covweight != 0) - { - if (vlen - 3 < nd - 6) - newlen = vlen - 3; - else - newlen = nd - 6; - - evecs = cdsA->tmpmatKK2; - - eigenvalsym((const double **) cdsA->CovMat, variance, evecs, vlen); - /* eigensym((const double **) cdsA->CovMat, variance, evecs, vlen); */ - - /* RevVecIp(variance, vlen); */ - - for (i = 0; i < vlen - newlen; ++i) - variance[i] = 0.0; - - b = cdsA->stats->hierarch_p1; - c = cdsA->stats->hierarch_p2; -/* b = c = 0.0; */ - - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - - count = 0; - do - { - oldb = b; - oldc = c; - - chi2 = invgamma_fit(newvar + vlen - newlen, newlen, &b, &c, &logL); - - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - - /* the mode of an inv gamma dist */ - mode = b / (c+1.0); - -/* for (i = 0; i < vlen - newlen; ++i) */ -/* newvar[i] = mode; */ -/* for (i = 0; i < newlen; ++i) */ -/* printf("\n%3d %e", i, newvar[i]); */ - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", - count, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - - if (count > 100) - { - printf("\n WARNING01: Failed to converge in InvGammaFitVars(), round %d\n", - cdsA->algo->rounds); - fflush(NULL); - break; - } - - count++; - - if (iterate == 0 || cdsA->algo->abort == 1) - break; - } - while(fabs(oldb - b) > fabs(b*precision) && - fabs(oldc - c) > fabs(c*precision)); - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> Final: %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", - count, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - - InvGammaAdjustCov(cdsA, b, c); -/* printf("\n\n count: %d", count); */ -/* for (i = 0; i < vlen; ++i) */ -/* printf("\n%3d %8.3e %8.3e", i, newvar[i], mode); */ -/* EigenReconSym(cdsA->CovMat, (const double **) evecs, (const double *) newvar, vlen); */ - - for (i = 0; i < vlen; ++i) - variance[i] = cdsA->CovMat[i][i]; - -/* printf("\n\n count: %d harm ave: %8.3e log ave: %8.3e", */ -/* count, HarmonicAve(variance, vlen), exp(LogarithmicAve(variance, vlen))); */ - } - - cdsA->stats->hierarch_p1 = b; - cdsA->stats->hierarch_p2 = c; - cdsA->stats->hierarch_chi2 = chi2; - - free(newvar); -} - - -/* set missing evals to the mode of the inv gamma */ -void -InvGammaFitModeEvals(CdsArray *cdsA, int iterate) -{ - double *newvar = NULL; - double *variance = cdsA->var; - double **evecs = NULL; - double precision = cdsA->algo->precision; - const int vlen = cdsA->vlen, cnum = cdsA->cnum; - double nd, oldb, oldc, b, c, chi2 = DBL_MAX, logL, harmave, mode; - int i, count, newlen; - - newvar = malloc(vlen * sizeof(double)); - nd = 3.0 * cnum; - oldb = oldc = DBL_MAX; - - if (cdsA->algo->varweight != 0) - { - b = cdsA->stats->hierarch_p1; - c = cdsA->stats->hierarch_p2; - newlen = vlen - 3; - - memcpy(newvar, variance, vlen * sizeof(double)); - - count = 0; - do - { - oldb = b; - oldc = c; - - qsort(newvar, vlen, sizeof(double), dblcmp_rev); - chi2 = invgamma_fit(newvar, newlen, &b, &c, &logL); - - if (2*c + 2 > 100 * nd) - { - harmave = HarmonicAve(newvar + vlen - newlen, newlen); - - for (i = 0; i < vlen; ++i) - variance[i] = harmave; - - return; - } - - if (cdsA->algo->alignment == 1) - InvGammaAdjustVarOcc(cdsA->cds, newvar, vlen, cnum, variance, b, c); - else - InvGammaAdjustVar(newvar, vlen, cnum, variance, b, c); - - /* newvar[findmin(variance, vlen)] = 2.0*b / (nd + 2.0*(1.0 + c)); */ - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", - count, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - - count++; - - if (iterate == 0 || cdsA->algo->abort == 1) - break; - } - while(fabs(oldb - b) > fabs(b*precision) && - fabs(oldc - c) > fabs(c*precision)); - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> Final: %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", - count, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - - if (cdsA->algo->alignment == 1) - InvGammaAdjustVarOcc(cdsA->cds, variance, vlen, cnum, variance, b, c); - else - InvGammaAdjustVar(variance, vlen, cnum, variance, b, c); - - variance[findmin(variance, vlen)] = b/(1.0 + c); - } - else if (cdsA->algo->covweight != 0) - { - int negevals = 3; - if (vlen - negevals < nd - 3 - negevals) - newlen = vlen - negevals; - else - newlen = nd - 3 - negevals; - - evecs = cdsA->tmpmatKK2; - printf("\n\n count:"); - /* eigenvalsym((const double **) cdsA->CovMat, variance, evecs, vlen); */ - -/* MatPrint(cdsA->CovMat, cdsA->vlen); */ -/* eigensym((const double **) cdsA->CovMat, variance, evecs, vlen); */ -/* EigenReconSym(cdsA->CovMat, (const double **) evecs, (const double *) variance, vlen); */ -/* MatPrint(cdsA->CovMat, cdsA->vlen); */ - - eigensym((const double **) cdsA->CovMat, variance, evecs, vlen); - - for (i = 0; i < vlen; ++i) - printf("\nEVALS %3d %e", i, variance[i]); - - /* RevVecIp(variance, vlen); */ - - for (i = 0; i < vlen - newlen; ++i) - variance[i] = 0.0; - - b = cdsA->stats->hierarch_p1; - c = cdsA->stats->hierarch_p2; -/* b = c = 0.0; */ - - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - - count = 0; - do - { - oldb = b; - oldc = c; - - /* chi2 = invgamma_bayes_fit(newvar + vlen - newlen, newlen, &b, &c, &logL); */ - chi2 = invgamma_fit(newvar + vlen - newlen, newlen, &b, &c, &logL); - - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - - /* the mode of an inv gamma dist */ - mode = b / (c+1.0); - //mode = b/(c-1.0); /* mean */ - - for (i = 0; i < vlen - newlen; ++i) - newvar[i] = mode; - - for (i = 0; i < vlen; ++i) - printf("\n%3d %e", i, newvar[i]); - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", - count, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - - if (count > 100) - { - printf("\n WARNING01: Failed to converge in InvGammaFitVars(), round %d\n", - cdsA->algo->rounds); - fflush(NULL); - break; - } - - count++; - - if (iterate == 0 || cdsA->algo->abort == 1) - break; - } - while(fabs(oldb - b) > fabs(b*precision) && - fabs(oldc - c) > fabs(c*precision)); - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> Final: %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", - count, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - -/* InvGammaAdjustCovMode(cdsA, b, c); */ -/* printf("\n\n count: %d", count); */ - for (i = 0; i < vlen; ++i) - printf("\n%3d %8.3e %8.3e", i, newvar[i], mode); - /* RevVecIp(variance, vlen); */ - EigenReconSym(cdsA->CovMat, (const double **) evecs, (const double *) newvar, vlen); - /* MatPrint(cdsA->CovMat, cdsA->vlen); */ - - for (i = 0; i < vlen; ++i) - variance[i] = cdsA->CovMat[i][i]; - -/* printf("\n\n count: %d harm ave: %8.3e log ave: %8.3e", */ -/* count, HarmonicAve(variance, vlen), exp(LogarithmicAve(variance, vlen))); */ - } - - cdsA->stats->hierarch_p1 = b; - cdsA->stats->hierarch_p2 = c; - cdsA->stats->hierarch_chi2 = chi2; - - free(newvar); -} - - -void -InvGammaBayesFitEvals(CdsArray *cdsA, int iterate) -{ - double *newvar = NULL; - double *variance = cdsA->var; - double **evecs = NULL; - double precision = cdsA->algo->precision; - const int vlen = cdsA->vlen, cnum = cdsA->cnum; - double nd, oldb, oldc, b, c, chi2 = DBL_MAX, logL, harmave, mode; - int i, count, newlen; - - newvar = malloc(vlen * sizeof(double)); - nd = 3.0 * cnum; - oldb = oldc = DBL_MAX; - - if (cdsA->algo->varweight != 0) - { - b = cdsA->stats->hierarch_p1; - c = cdsA->stats->hierarch_p2; - newlen = vlen - 3; - - memcpy(newvar, variance, vlen * sizeof(double)); - - count = 0; - do - { - oldb = b; - oldc = c; - - qsort(newvar, vlen, sizeof(double), dblcmp_rev); - chi2 = invgamma_bayes_fit(newvar, newlen, &b, &c, &logL); - - if (2*c + 2 > 100 * nd) - { - harmave = HarmonicAve(newvar + vlen - newlen, newlen); - - for (i = 0; i < vlen; ++i) - variance[i] = harmave; - - return; - } - - if (cdsA->algo->alignment == 1) - InvGammaAdjustVarOcc(cdsA->cds, newvar, vlen, cnum, variance, b, c); - else - InvGammaAdjustVar(newvar, vlen, cnum, variance, b, c); - - /* newvar[findmin(variance, vlen)] = 2.0*b / (nd + 2.0*(1.0 + c)); */ - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", - count, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - - count++; - - if (iterate == 0 || cdsA->algo->abort == 1) - break; - } - while(fabs(oldb - b) > fabs(b*precision) && - fabs(oldc - c) > fabs(c*precision)); - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> Final: %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", - count, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - - if (cdsA->algo->alignment == 1) - InvGammaAdjustVarOcc(cdsA->cds, variance, vlen, cnum, variance, b, c); - else - InvGammaAdjustVar(variance, vlen, cnum, variance, b, c); - } - else if (cdsA->algo->covweight != 0) - { - if (vlen - 3 < nd - 6) - newlen = vlen - 3; - else - newlen = nd - 6; - - evecs = cdsA->tmpmatKK2; - - eigenvalsym((const double **) cdsA->CovMat, variance, evecs, vlen); - /* eigensym((const double **) cdsA->CovMat, variance, evecs, vlen); */ - - /* RevVecIp(variance, vlen); */ - - for (i = 0; i < vlen - newlen; ++i) - variance[i] = 0.0; - - b = cdsA->stats->hierarch_p1; - c = cdsA->stats->hierarch_p2; -/* b = c = 0.0; */ - - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - - count = 0; - do - { - oldb = b; - oldc = c; - - chi2 = invgamma_bayes_fit(newvar + vlen - newlen, newlen, &b, &c, &logL); - - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - - /* the mode of an inv gamma dist */ - mode = b / (c+1.0); - -/* for (i = 0; i < vlen - newlen; ++i) */ -/* newvar[i] = mode; */ -/* for (i = 0; i < newlen; ++i) */ -/* printf("\n%3d %e", i, newvar[i]); */ - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", - count, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - - if (count > 100) - { - printf("\n WARNING01: Failed to converge in InvGammaFitVars(), round %d\n", - cdsA->algo->rounds); - fflush(NULL); - break; - } - - count++; - - if (iterate == 0 || cdsA->algo->abort == 1) - break; - } - while(fabs(oldb - b) > fabs(b*precision) && - fabs(oldc - c) > fabs(c*precision)); - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> Final: %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", - count, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - - InvGammaAdjustCov(cdsA, b, c); -/* printf("\n\n count: %d", count); */ -/* for (i = 0; i < vlen; ++i) */ -/* printf("\n%3d %8.3e %8.3e", i, newvar[i], mode); */ -/* EigenReconSym(cdsA->CovMat, (const double **) evecs, (const double *) newvar, vlen); */ - - for (i = 0; i < vlen; ++i) - variance[i] = cdsA->CovMat[i][i]; - -/* printf("\n\n count: %d harm ave: %8.3e log ave: %8.3e", */ -/* count, HarmonicAve(variance, vlen), exp(LogarithmicAve(variance, vlen))); */ - } - - cdsA->stats->hierarch_p1 = b; - cdsA->stats->hierarch_p2 = c; - cdsA->stats->hierarch_chi2 = chi2; - - free(newvar); -} - - -void -InvGammaAdjustVarBfact_old(Cds **cds, double *newvar, const int vlen, const int cnum, - const double *var, const double b, const double c) -{ - int i, j; - const double nd = 3.0 * cnum; - const double fact = 1.0 / (8.0 * cnum + 2.0 + 2.0 * c); - - for (i = 0; i < vlen; ++i) - newvar[i] = nd * var[i] + 2.0 * b; - - for (j = 0; j < cnum; ++j) - for (i = 0; i < vlen; ++i) - newvar[i] += cds[j]->bfact_c * 3.0 * cds[j]->prvar[i]; - - for (i = 0; i < vlen; ++i) - newvar[i] *= fact; -} - - -void -InvGammaAdjustVarBfactOcc_old(CdsArray *cdsA, double *newvar, const int vlen, const int cnum, - const double *var, const double b, const double c) -{ - int i, j; - Cds **cds = cdsA->cds; - - for (i = 0; i < vlen; ++i) - newvar[i] = 3.0 * cdsA->df[i] * var[i] + 2.0 * b; - - for (j = 0; j < cnum; ++j) - for (i = 0; i < vlen; ++i) - newvar[i] += cds[j]->bfact_c * 3.0 * cds[j]->prvar[i]; - - for (i = 0; i < vlen; ++i) - newvar[i] /= (8.0 * cdsA->df[i] + 2.0 + 2.0 * c); -} - - -static void -InvGammaAdjustVarBfact(Cds **cds, double *newvar, const int vlen, const int cnum, - const double *var, const double b, const double c) -{ - int i, j; - const double nd = 3.0 * cnum; - //const double fact = 1.0 / (8.0 * cnum + 2.0 * c + 2.0); - const double fact = 1.0 / (nd + cnum + 2.0 * (cnum + 1.0 + c)); - - for (i = 0; i < vlen; ++i) - newvar[i] = nd * var[i] + 2.0 * b; - - for (j = 0; j < cnum; ++j) - for (i = 0; i < vlen; ++i) - newvar[i] += 3.0 * cds[j]->bfact_c * cds[j]->prvar[i]; - - for (i = 0; i < vlen; ++i) - newvar[i] *= fact; -} - - -static void -InvGammaAdjustVarBfactOcc(CdsArray *cdsA, double *newvar, const int vlen, const int cnum, - const double *var, const double b, const double c) -{ - int i, j; - Cds **cds = cdsA->cds; - const double nd = 3.0 * cnum; - //const double fact = 1.0 / (8.0 * cnum + 2.0 * c + 2.0); - const double fact = 1.0 / (nd + cnum + 2.0 * (cnum + 1.0 + c)); - - for (i = 0; i < vlen; ++i) - newvar[i] = nd * var[i] + 2.0 * b; - - for (j = 0; j < cnum; ++j) - for (i = 0; i < vlen; ++i) - newvar[i] += cds[j]->bfact_c * cds[j]->prvar[i]; - - for (i = 0; i < vlen; ++i) - newvar[i] *= fact; -} - - -void -InvGammaFitEvalsBfact(CdsArray *cdsA, int iterate) -{ - double *newvar = NULL; - double *variance = cdsA->var; - double precision = cdsA->algo->precision; - const int vlen = cdsA->vlen, cnum = cdsA->cnum; - double nd, oldb, oldc, b, c, chi2 = DBL_MAX, logL; - int count, newlen; - - newvar = malloc(vlen * sizeof(double)); - nd = 3.0 * cnum; - oldb = oldc = DBL_MAX; - - if (cdsA->algo->varweight != 0) - { - b = cdsA->stats->hierarch_p1; - c = cdsA->stats->hierarch_p2; - newlen = vlen - 3; - - memcpy(newvar, variance, vlen * sizeof(double)); - - count = 0; - do - { - oldb = b; - oldc = c; - - qsort(newvar, vlen, sizeof(double), dblcmp_rev); - chi2 = invgamma_fit(newvar, newlen, &b, &c, &logL); - - if (cdsA->algo->alignment == 1) - InvGammaAdjustVarBfactOcc(cdsA, newvar, vlen, cnum, variance, b, c); - else - InvGammaAdjustVarBfact(cdsA->cds, newvar, vlen, cnum, variance, b, c); - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", - count, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - - if (cdsA->algo->bfact == 2) - CalcBfactC(cdsA); - - count++; - - if (iterate == 0 || cdsA->algo->abort == 1) - break; - } - while(fabs(oldb - b) > fabs(b*precision) && - fabs(oldc - c) > fabs(c*precision)); - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> Final: %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", - count, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - - if (cdsA->algo->alignment == 1) - InvGammaAdjustVarBfactOcc(cdsA, variance, vlen, cnum, variance, b, c); - else - InvGammaAdjustVarBfact(cdsA->cds, variance, vlen, cnum, variance, b, c); - } - else if (cdsA->algo->covweight != 0) - { - printf("\n ERROR: B-factor weighting cannot be used yet with covariance matrix weighting -c \n"); - Usage(0); - exit(EXIT_FAILURE); - } - - cdsA->stats->hierarch_p1 = b; - cdsA->stats->hierarch_p2 = c; - cdsA->stats->hierarch_chi2 = chi2; - - free(newvar); -} - - -/* This one includes the smallest distinct eigenvalue (though omitting it from the - first round of fitting) */ -void -InvGammaFitEvals3(CdsArray *cdsA, int iterate) -{ - double *newvar = NULL; - double *variance = cdsA->var; - double **evecs = NULL; - double precision = cdsA->algo->precision; - const int vlen = cdsA->vlen, cnum = cdsA->cnum; - double nd, oldb, oldc, b, c, chi2 = DBL_MAX, logL, harmave, mode; - int i, j, count, newlen, df; - - newvar = malloc(vlen * sizeof(double)); - nd = 3.0 * cnum; - oldb = oldc = DBL_MAX; - - if (cdsA->algo->varweight != 0) - { -/* b = cdsA->stats->hierarch_p1; */ -/* c = cdsA->stats->hierarch_p2; */ - b = 0.01; - c = 1.0; -/* newlen = vlen - 1; */ - - memcpy(newvar, variance, vlen * sizeof(double)); - - count = 0; - do - { - oldb = b; - oldc = c; - -/* if (count > 2) */ - newlen = vlen; - - qsort(newvar, vlen, sizeof(double), dblcmp); - chi2 = invgamma_fit(newvar + vlen - newlen, newlen, &b, &c, &logL); - - if (2*c + 2 > 100 * 3 * cnum) - { - harmave = HarmonicAve(newvar + vlen - newlen, newlen); - - for (i = 0; i < vlen; ++i) - variance[i] = harmave; - - return; - } - - if (cdsA->algo->alignment == 1) - { - for (i = 0; i < vlen; ++i) - { - df = 0; - for (j = 0; j < cnum; ++j) - df += cdsA->cds[j]->o[i]; - - df *= 3; - - newvar[i] = (df*variance[i] + 2.0*b) / (df + 2.0*(1.0 + c)); - } - } - else - { - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - } - - /* newvar[findmin(variance, vlen)] = 2.0*b / (nd + 2.0*(1.0 + c)); */ - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", - count, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - - count++; - - if (count > 100) - { - printf("\n WARNING01: Failed to converge in InvGammaFitVars(), round %d\n ", - cdsA->algo->rounds); - fflush(NULL); - break; - } - - if (iterate == 0 || cdsA->algo->abort == 1) - break; - } - while(fabs(oldb - b) > fabs(b*precision) && - fabs(oldc - c) > fabs(c*precision)); - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> Final: %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", - count++, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - } - else if (cdsA->algo->covweight != 0) - { - if (vlen - 3 < nd - 6) - newlen = vlen - 3; - else - newlen = nd - 6; - - evecs = cdsA->tmpmatKK2; - - eigenvalsym((const double **) cdsA->CovMat, variance, evecs, vlen); - - /* RevVecIp(variance, vlen); */ - - for (i = 0; i < vlen - newlen; ++i) - variance[i] = 0.0; - - b = cdsA->stats->hierarch_p1; - c = cdsA->stats->hierarch_p2; - b = c = 0.0; - -/* for (i = 0; i < vlen; ++i) */ -/* newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); */ - - count = 0; - do - { - oldb = b; - oldc = c; - -/* if (count > 1) */ -/* { */ -/* newlen = vlen; */ -/* */ -/* mode = b / (c+1.0); */ -/* */ -/* for (i = 0; i < vlen - newlen; ++i) */ -/* variance[i] = mode; */ -/* } */ - - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - - chi2 = invgamma_fit(newvar + vlen - newlen, newlen, &b, &c, &logL); - - for (i = vlen - newlen; i < newlen; ++i) - printf("%3d %e\n", i, newvar[i]); - - if (cdsA->algo->verbose != 0) - { - mode = b / (c+1.0); - printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", - count, b, c, mode, logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - - if (count > 100) - { - printf("\n WARNING01: Failed to converge in InvGammaFitVars(), round %d\n ", - cdsA->algo->rounds); - fflush(NULL); - break; - } - - ++count; - - if (iterate == 0 || cdsA->algo->abort == 1) - break; - } - while(fabs(oldb - b) > fabs(b*precision) && - fabs(oldc - c) > fabs(c*precision)); - - InvGammaAdjustCov(cdsA, b, c); - - for (i = 0; i < vlen; ++i) - variance[i] = cdsA->CovMat[i][i]; - } - - if (cdsA->algo->alignment == 1) - { - for (i = 0; i < vlen; ++i) - { - df = 0; - for (j = 0; j < cnum; ++j) - df += cdsA->cds[j]->o[i]; - - df *= 3; - - variance[i] = (df*variance[i] + 2.0*b) / (df + 2.0*(1.0 + c)); - } - } - else - { - for (i = 0; i < vlen; ++i) - variance[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - } - - cdsA->stats->hierarch_p1 = b; - cdsA->stats->hierarch_p2 = c; - cdsA->stats->hierarch_chi2 = chi2; - - free(newvar); -} - - -void -InvGammaBayesFitEvals3(CdsArray *cdsA, int iterate) -{ - double *newvar = NULL; - double *variance = cdsA->var; - double **evecs = NULL; - double precision = cdsA->algo->precision; - const int vlen = cdsA->vlen, cnum = cdsA->cnum; - double nd, oldb, oldc, b, c, chi2 = DBL_MAX, logL, harmave, mode; - int i, j, count, newlen, df; - - newvar = malloc(vlen * sizeof(double)); - nd = 3.0 * cnum; - oldb = oldc = DBL_MAX; - - if (cdsA->algo->varweight != 0) - { - b = cdsA->stats->hierarch_p1; - c = cdsA->stats->hierarch_p2; - - newlen = vlen - 1; - - memcpy(newvar, variance, vlen * sizeof(double)); - - count = 0; - do - { - oldb = b; - oldc = c; - - if (count > 2) - newlen = vlen; - - qsort(newvar, vlen, sizeof(double), dblcmp); - chi2 = invgamma_bayes_fit(newvar + vlen - newlen, newlen, &b, &c, &logL); - - if (2*c + 2 > 100 * 3 * cnum) - { - harmave = HarmonicAve(newvar + vlen - newlen, newlen); - - for (i = 0; i < vlen; ++i) - variance[i] = harmave; - - return; - } - - if (cdsA->algo->alignment == 1) - { - for (i = 0; i < vlen; ++i) - { - df = 0; - for (j = 0; j < cnum; ++j) - df += cdsA->cds[j]->o[i]; - - df *= 3; - - newvar[i] = (df*variance[i] + 2.0*b) / (df + 2.0*(1.0 + c)); - } - } - else - { - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - } - - /* newvar[findmin(variance, vlen)] = 2.0*b / (nd + 2.0*(1.0 + c)); */ - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", - count, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - - count++; - - if (count > 100) - { - printf("\n WARNING01: Failed to converge in InvGammaFitVars(), round %d\n ", - cdsA->algo->rounds); - fflush(NULL); - break; - } - - if (iterate == 0 || cdsA->algo->abort == 1) - break; - } - while(fabs(oldb - b) > fabs(b*precision) && - fabs(oldc - c) > fabs(c*precision)); - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> Final: %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", - count++, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - } - else if (cdsA->algo->covweight != 0) - { - if (vlen - 3 < nd - 6) - newlen = vlen - 3; - else - newlen = nd - 6; - - evecs = cdsA->tmpmatKK2; - - eigenvalsym((const double **) cdsA->CovMat, variance, evecs, vlen); - - /* RevVecIp(variance, vlen); */ - - for (i = 0; i < vlen - newlen; ++i) - variance[i] = 0.0; - - b = cdsA->stats->hierarch_p1; - c = cdsA->stats->hierarch_p2; - b = c = 0.0; - -/* for (i = 0; i < vlen; ++i) */ -/* newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); */ - - count = 0; - do - { - oldb = b; - oldc = c; - -/* if (count > 1) */ -/* { */ -/* newlen = vlen; */ -/* */ -/* mode = b / (c+1.0); */ -/* */ -/* for (i = 0; i < vlen - newlen; ++i) */ -/* variance[i] = mode; */ -/* } */ - - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - - chi2 = invgamma_bayes_fit(newvar + vlen - newlen, newlen, &b, &c, &logL); - - for (i = vlen - newlen; i < newlen; ++i) - printf("%3d %e\n", i, newvar[i]); - - if (cdsA->algo->verbose != 0) - { - mode = b / (c+1.0); - printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", - count, b, c, mode, logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - - if (count > 100) - { - printf("\n WARNING01: Failed to converge in InvGammaFitVars(), round %d\n ", - cdsA->algo->rounds); - fflush(NULL); - break; - } - - ++count; - - if (iterate == 0 || cdsA->algo->abort == 1) - break; - } - while(fabs(oldb - b) > fabs(b*precision) && - fabs(oldc - c) > fabs(c*precision)); - - InvGammaAdjustCov(cdsA, b, c); - - for (i = 0; i < vlen; ++i) - variance[i] = cdsA->CovMat[i][i]; - } - - if (cdsA->algo->alignment == 1) - { - for (i = 0; i < vlen; ++i) - { - df = 0; - for (j = 0; j < cnum; ++j) - df += cdsA->cds[j]->o[i]; - - df *= 3; - - variance[i] = (df*variance[i] + 2.0*b) / (df + 2.0*(1.0 + c)); - } - } - else - { - for (i = 0; i < vlen; ++i) - variance[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - } - - cdsA->stats->hierarch_p1 = b; - cdsA->stats->hierarch_p2 = c; - cdsA->stats->hierarch_chi2 = chi2; - - free(newvar); -} - - - - - -/* b = c in inverse gamma fit */ -void -InvGammaFitEvalsEq(CdsArray *cdsA, int iterate) -{ - double *newvar = NULL; - double *variance = cdsA->var; - double **evecs = NULL; - double precision = cdsA->algo->precision; - const int vlen = cdsA->vlen, cnum = cdsA->cnum; - double nd, oldb, oldc, b, c, chi2 = DBL_MAX, logL, harmave, mode; - int i, j, count, newlen, df; - - newvar = malloc(vlen * sizeof(double)); - nd = 3.0 * cnum; - oldb = oldc = DBL_MAX; - - if (cdsA->algo->varweight != 0) - { - b = cdsA->stats->hierarch_p1; - c = cdsA->stats->hierarch_p2; - - if (!isfinite(b) || !isfinite(c)) - { - printf("\n ERROR01: b(%e) or c(%e) parameter in InvGammaFitVarsEq() not finite\n", - b, c); - fflush(NULL); - exit(EXIT_FAILURE); - } - - newlen = vlen - 1; - - memcpy(newvar, variance, vlen * sizeof(double)); - - count = 0; - do - { - oldb = b; - oldc = c; - - if (count > 2) - newlen = vlen; - - qsort(newvar, vlen, sizeof(double), dblcmp); - - if (count == 0 && cdsA->algo->rounds < 8) - chi2 = invgamma_eq_bc_fit(newvar + vlen - newlen, newlen, &b, &c, &logL, 0); - else - chi2 = invgamma_eq_bc_fit(newvar + vlen - newlen, newlen, &b, &c, &logL, 1); - - cdsA->stats->hierarch_p1 = b; - cdsA->stats->hierarch_p2 = c; - - if (2*c + 2 > 100 * 3 * cnum) - { - harmave = HarmonicAve(newvar + vlen - newlen, newlen); - - for (i = 0; i < vlen; ++i) - variance[i] = harmave; - - return; - } - - if (cdsA->algo->alignment == 1) - { - for (i = 0; i < vlen; ++i) - { - df = 0; - for (j = 0; j < cnum; ++j) - df += cdsA->cds[j]->o[i]; - - df *= 3; - - newvar[i] = (df*variance[i] + 2.0*b) / (df + 2.0*(1.0 + c)); - } - } - else - { - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - } - - /* newvar[findmin(variance, vlen)] = 2.0*b / (nd + 2.0*(1.0 + c)); */ - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", - count, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - - count++; - - if (count > 100) - { - printf("\n WARNING01: Failed to converge in InvGammaFitVars(), round %d\n ", - cdsA->algo->rounds); - fflush(NULL); - break; - } - - if (iterate == 0 || cdsA->algo->abort == 1) - break; - } - while(fabs(oldb - b) > fabs(b*precision) && - fabs(oldc - c) > fabs(c*precision)); - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> Final: %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", - count++, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - } - else if (cdsA->algo->covweight != 0) - { - if (vlen - 3 < nd - 6) - newlen = vlen - 3; - else - newlen = nd - 6; - - evecs = cdsA->tmpmatKK2; - - eigenvalsym((const double **) cdsA->CovMat, variance, evecs, vlen); - - /* RevVecIp(variance, vlen); */ - - for (i = 0; i < vlen - newlen; ++i) - variance[i] = 0.0; - - b = cdsA->stats->hierarch_p1; - c = cdsA->stats->hierarch_p2; - b = c = 0.0; - -/* for (i = 0; i < vlen; ++i) */ -/* newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); */ - - count = 0; - do - { - oldb = b; - oldc = c; - - if (count > 2) - newlen = vlen; - - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - - chi2 = invgamma_eq_bc_fit(newvar + vlen - newlen, newlen, &b, &c, &logL, 0); - - for (i = vlen - newlen; i < newlen; ++i) - printf("%3d %e\n", i, newvar[i]); - - if (cdsA->algo->verbose != 0) - { - mode = b / (c+1.0); - printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", - count, b, c, mode, logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - - if (count > 100) - { - printf("\n WARNING01: Failed to converge in InvGammaFitVars(), round %d\n ", - cdsA->algo->rounds); - fflush(NULL); - break; - } - - ++count; - - if (iterate == 0 || cdsA->algo->abort == 1) - break; - } - while(fabs(oldb - b) > fabs(b*precision) && - fabs(oldc - c) > fabs(c*precision)); - - InvGammaAdjustCov(cdsA, b, c); - - for (i = 0; i < vlen; ++i) - variance[i] = cdsA->CovMat[i][i]; - } - - if (cdsA->algo->alignment == 1) - { - for (i = 0; i < vlen; ++i) - { - df = 0; - for (j = 0; j < cnum; ++j) - df += cdsA->cds[j]->o[i]; - - df *= 3; - - variance[i] = (df*variance[i] + 2.0*b) / (df + 2.0*(1.0 + c)); - } - } - else - { - for (i = 0; i < vlen; ++i) - variance[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - } - - cdsA->stats->hierarch_p1 = b; - cdsA->stats->hierarch_p2 = c; - cdsA->stats->hierarch_chi2 = chi2; - - free(newvar); -} - - -void -InvGammaFitEvals2(CdsArray *cdsA, int iterate) -{ - double *newvar = NULL; - double *variance = cdsA->var; - double **evecs = NULL; - double precision = cdsA->algo->precision; - const int vlen = cdsA->vlen, cnum = cdsA->cnum; - double nd, oldb, oldc, b, c, chi2 = DBL_MAX, logL, harmave; - int i, j, count, newlen, df; - - newvar = malloc(vlen * sizeof(double)); - nd = 3.0 * cnum; - oldb = oldc = DBL_MAX; - - if (cdsA->algo->varweight != 0) - { - b = cdsA->stats->hierarch_p1; - c = cdsA->stats->hierarch_p2; - newlen = vlen - 3; - - memcpy(newvar, variance, vlen * sizeof(double)); - qsort(newvar, vlen, sizeof(double), dblcmp); - chi2 = invgamma_fit(newvar + vlen - newlen, newlen, &b, &c, &logL); - - if (2*c + 2 > 100 * 3 * cnum) - { - harmave = HarmonicAve(newvar + vlen - newlen, newlen); - - for (i = 0; i < vlen; ++i) - variance[i] = harmave; - - return; - } - - if (cdsA->algo->alignment == 1) - { - for (i = 0; i < vlen; ++i) - { - df = 0; - for (j = 0; j < cnum; ++j) - df += cdsA->cds[j]->o[i]; - - df *= 3; - - newvar[i] = (df*variance[i] + 2.0*b) / (df + 2.0*(1.0 + c)); - } - } - else - { - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - } - - count = 0; - do - { - oldb = b; - oldc = c; - - qsort(newvar, vlen, sizeof(double), dblcmp); - chi2 = invgamma_fit(newvar, vlen, &b, &c, &logL); - - if (2*c + 2 > 100 * 3 * cnum) - { - harmave = HarmonicAve(newvar, vlen); - - for (i = 0; i < vlen; ++i) - variance[i] = harmave; - - return; - } - - if (cdsA->algo->alignment == 1) - { - for (i = 0; i < vlen; ++i) - { - df = 0; - for (j = 0; j < cnum; ++j) - df += cdsA->cds[j]->o[i]; - - df *= 3; - - newvar[i] = (df*variance[i] + 2.0*b) / (df + 2.0*(1.0 + c)); - } - } - else - { - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - } - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", - count++, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - - if (iterate == 0 || cdsA->algo->abort == 1) - break; - } - while(fabs(oldb - b) > fabs(b*precision) && - fabs(oldc - c) > fabs(c*precision)); - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> Final: %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", - count++, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - } - else if (cdsA->algo->covweight != 0) - { - if (vlen - 3 < nd - 6) - newlen = vlen - 3; - else - newlen = nd - 6; - - evecs = cdsA->tmpmatKK2; - - eigenvalsym((const double **) cdsA->CovMat, variance, evecs, vlen); - - /* RevVecIp(variance, vlen); */ - - for (i = 0; i < vlen - newlen; ++i) - variance[i] = 0.0; - - b = cdsA->stats->hierarch_p1; - c = cdsA->stats->hierarch_p2; -/* b = c = 0.0; */ - - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - - count = 0; - do - { - ++count; - oldb = b; - oldc = c; - - chi2 = invgamma_fit(newvar + vlen - newlen, newlen, &b, &c, &logL); - - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - -/* for (i = 0; i < newlen; ++i) */ -/* printf("\n%3d %e", i, newvar[i]); */ - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f\n", - count, b, c, b / (c+1.0), logL, chi2); - fflush(NULL); - } - - if (count > 100) - { - printf("\n WARNING01: Failed to converge in InvGammaFitVars(), round %d\n ", - cdsA->algo->rounds); - fflush(NULL); - break; - } - - if (iterate == 0 || cdsA->algo->abort == 1) - break; - } - while(fabs(oldb - b) > fabs(b*precision) && - fabs(oldc - c) > fabs(c*precision)); - - InvGammaAdjustCov(cdsA, b, c); - - for (i = 0; i < vlen; ++i) - variance[i] = cdsA->CovMat[i][i]; - } - - if (cdsA->algo->alignment == 1) - { - for (i = 0; i < vlen; ++i) - { - df = 0; - for (j = 0; j < cnum; ++j) - df += cdsA->cds[j]->o[i]; - - df *= 3; - - variance[i] = (df*variance[i] + 2.0*b) / (df + 2.0*(1.0 + c)); - } - } - else - { - for (i = 0; i < vlen; ++i) - variance[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - } - - cdsA->stats->hierarch_p1 = b; - cdsA->stats->hierarch_p2 = c; - cdsA->stats->hierarch_chi2 = chi2; - - free(newvar); -} - - -void -InvGamma1FitEvals(CdsArray *cdsA, int iterate) -{ - double *newvar = NULL; - double *variance = cdsA->var; - double **evecs = NULL; - double precision = cdsA->algo->precision; - const int vlen = cdsA->vlen, cnum = cdsA->cnum; - double nd, oldb, oldc, b, chi2 = DBL_MAX, logL, nullp = 1.0; - int i, j, count, newlen, df; - - newvar = malloc(vlen * sizeof(double)); - nd = 3.0 * cnum; - oldb = oldc = DBL_MAX; - - if (vlen - 3 < nd - 6) - newlen = vlen - 3; - else - newlen = nd - 6; - - if (cdsA->algo->varweight != 0) - { - b = cdsA->stats->hierarch_p1; - /* newlen = vlen - 3; */ - newlen = vlen; - - memcpy(newvar, variance, vlen * sizeof(double)); - - count = 0; - do - { - oldb = b; - - qsort(newvar, vlen, sizeof(double), dblcmp); - - chi2 = invgamma1_fit(newvar + vlen - newlen, newlen, &b, &nullp, &logL); - - if (cdsA->algo->alignment == 1) - { - for (i = 0; i < vlen; ++i) - { - df = 0; - for (j = 0; j < cnum; ++j) - df += cdsA->cds[j]->o[i]; - - df *= 3; - - newvar[i] = (df*variance[i] + 2.0*b) / (df + 4.0); - } - } - else - { - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 4.0); - } - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> %3d b:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f\n", - count++, b, b / 2.0, logL, chi2); - fflush(NULL); - } - - if (iterate == 0 || cdsA->algo->abort == 1) - break; - } - while(fabs(oldb - b) > fabs(b*precision)); - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> Final: %3d b:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f\n", - count++, b, b / 2.0, logL, chi2); - fflush(NULL); - } - } - else if (cdsA->algo->covweight != 0) - { - evecs = cdsA->tmpmatKK2; - - eigenvalsym((const double **) cdsA->CovMat, variance, evecs, vlen); - - /* RevVecIp(variance, vlen); */ - - for (i = 0; i < vlen - newlen; ++i) - variance[i] = 0.0; - - b = cdsA->stats->hierarch_p1; - - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 4.0); - - count = 0; - do - { - ++count; - oldb = b; - - chi2 = invgamma1_fit(newvar + vlen - newlen, newlen, &b, &nullp, &logL); - - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 4.0); - - if (count > 100) - { - printf("\n WARNING01: Failed to converge in InvGammaFitVars(), round %d\n ", - cdsA->algo->rounds); - fflush(NULL); - break; - } - - if (iterate == 0 || cdsA->algo->abort == 1) - break; - } - while(fabs(oldb - b) > fabs(b*precision)); - - InvGammaAdjustCov(cdsA, b, 1.0); - - for (i = 0; i < vlen; ++i) - variance[i] = cdsA->CovMat[i][i]; - } - - if (cdsA->algo->alignment == 1) - { - for (i = 0; i < vlen; ++i) - { - df = 0; - for (j = 0; j < cnum; ++j) - df += cdsA->cds[j]->o[i]; - - df *= 3; - - variance[i] = (df*variance[i] + 2.0*b) / (df + 4.0); - } - } - else - { - for (i = 0; i < vlen; ++i) - variance[i] = (nd*variance[i] + 2.0*b) / (nd + 4.0); - } - - cdsA->stats->hierarch_p1 = b; - cdsA->stats->hierarch_p2 = 1.0; - cdsA->stats->hierarch_chi2 = chi2; - - free(newvar); -} - - -void -InvGammaFitVarsND(CdsArray *cdsA, int iterate) -{ - double *newvar = NULL; - double *variance = cdsA->var; - double precision = cdsA->algo->precision; - const int vlen = cdsA->vlen, cnum = cdsA->cnum; - double nd, oldb, oldc, b, c, chi2, logL; - int i, count, newlen; - - newvar = malloc(vlen * sizeof(double)); - nd = 3.0 * cnum; - oldb = oldc = DBL_MAX; - - if (cdsA->algo->covweight != 0) - { - double **evecs = MatAlloc(vlen, vlen); - - if (vlen - 1 < nd - 3) - newlen = vlen - 1; - else - newlen = nd - 3; - - eigensym((const double **) cdsA->CovMat, variance, evecs, vlen); - RevVecIp(variance, vlen); - - for (i = newlen; i < vlen; ++i) - variance[i] = 0.0; - - memcpy(newvar, variance, vlen * sizeof(double)); - count = 0; - do - { - ++count; - oldb = b; - oldc = c; - - chi2 = invgamma_fit(newvar, newlen, &b, &c, &logL); - - for (i = 0; i < newlen; ++i) - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f\n", - count++, b, c, b / (c+1.0), logL, chi2); - fflush(NULL); - } - - if (count > 100) - { - printf("\n WARNING01: Failed to converge in InvGammaFitVars(), round %d\n ", - cdsA->algo->rounds); - fflush(NULL); - break; - } - - if (iterate == 0) - break; - } - while(fabs(oldb - b) > fabs(b*precision) && - fabs(oldc - c) > fabs(c*precision)); - - InvGammaAdjustCovND(cdsA, b, c); - - for (i = 0; i < vlen; ++i) - variance[i] = cdsA->CovMat[i][i]; - - MatDestroy(&evecs); - } - else - { - memcpy(newvar, variance, vlen * sizeof(double)); - - count = 0; - do - { - oldb = b; - oldc = c; - - chi2 = invgamma_fit(newvar, vlen, &b, &c, &logL); - - for (i = 0; i < vlen; ++i) - newvar[i] = (variance[i] + 2.0*b) / (3.0 + 2.0*c); - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f\n", - count++, b, c, b / (c+1.0), logL, chi2); - fflush(NULL); - } - - if (iterate == 0) - break; - } - while(fabs(oldb - b) > fabs(b*precision) && - fabs(oldc - c) > fabs(c*precision)); - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> Final: %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f\n", - count++, b, c, b / (c+1.0), logL, chi2); - fflush(NULL); - } - - memcpy(variance, newvar, vlen * sizeof(double)); - } - - cdsA->stats->hierarch_p1 = b; - cdsA->stats->hierarch_p2 = c; - cdsA->stats->hierarch_chi2 = chi2; - - free(newvar); -} - - -/* Fit a gamma distribution by maximum likelihood. - Uses Newton-Raphson. */ -/* void */ -/* InvGammaFitVars(CdsArray *cdsA, int iterate) */ -/* { */ -/* double *newvar = NULL; */ -/* double *variance = cdsA->var; */ -/* double precision = cdsA->algo->precision; */ -/* const int vlen = cdsA->vlen, cnum = cdsA->cnum; */ -/* double nd, oldb, oldc, harmave, b, c, chi2, logL; */ -/* int i, count; */ -/* */ -/* newvar = malloc(vlen * sizeof(double)); */ -/* nd = 3.0 * cnum; */ -/* oldb = oldc = DBL_MAX; */ -/* */ -/* if (cdsA->algo->covweight != 0) */ -/* { */ -/* for (i = 0; i < vlen; ++i) */ -/* newvar[i] = variance[i] = cdsA->CovMat[i][i]; */ -/* } */ -/* else */ -/* memcpy(newvar, variance, vlen * sizeof(double)); */ -/* */ -/* count = 0; */ -/* do */ -/* { */ -/* oldb = b; */ -/* oldc = c; */ -/* */ -/* chi2 = invgamma_fit(newvar, vlen, &b, &c, &logL); */ -/* */ -/* printf("\n>>>>> %3d b:%-10.5f c:%-10.5f mode:%-10.5f logL:% -12.6f chi2:%-10.5f", */ -/* ++count, b, c, b / (c+1.0), logL, chi2); */ -/* fflush(NULL); */ -/* */ -/* if (c > FLT_MAX) */ -/* { */ -/* harmave = 0.0; */ -/* for (i = 0; i < vlen; ++i) */ -/* { */ -/* if(variance[i] == 0.0) */ -/* continue; */ -/* else */ -/* harmave += (1.0 / variance[i]); */ -/* } */ -/* harmave = vlen / harmave; */ -/* memsetd(newvar, harmave, vlen); */ -/* break; */ -/* } */ -/* else if (b < DBL_EPSILON) */ -/* { */ -/* for (i = 0; i < vlen; ++i) */ -/* newvar[i] = (nd*variance[i]) / (nd + 2.0*(1.0 + c)); */ -/* break; */ -/* } */ -/* else if (2.0 + 2.0 * c > 20.0 * nd) */ -/* { */ -/* for (i = 0; i < vlen; ++i) */ -/* newvar[i] = b/c; */ -/* break; */ -/* } */ -/* else */ -/* { */ -/* for (i = 0; i < vlen; ++i) */ -/* newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); */ -/* } */ -/* */ -/* if (iterate == 0) */ -/* break; */ -/* } */ -/* while(fabs(oldb - b) > fabs(b*precision) && */ -/* fabs(oldc - c) > fabs(c*precision)); */ -/* */ -/* memcpy(variance, newvar, vlen * sizeof(double)); */ -/* */ -/* cdsA->stats->hierarch_p1 = b; */ -/* cdsA->stats->hierarch_p2 = c; */ -/* cdsA->stats->hierarch_chi2 = chi2; */ -/* */ -/* free(newvar); */ -/* } */ - - -/* Fit of the variances/eigenvalues to an inverse gamma distribution with - a constrained, minimum c shape parameter. - min c = 1 guarantees that the distribution has a finite mean - min c = 2 -> finite variance - min c = 3 -> finite skewness - min c = 4 -> finite kurtosis */ -void -InvGammaFitVars_minc(CdsArray *cdsA, const double minc, int iterate) -{ - double *newvar = NULL; - double *evals = cdsA->var; - double precision = cdsA->algo->precision; - const int vlen = cdsA->vlen, cnum = cdsA->cnum; - double nd, oldb, oldc, harmave, b, c, chi2, logL, mode; - int i, count, newlen, delay; - int maxcount = 300; - - newvar = malloc(vlen * sizeof(double)); - nd = 3.0 * cnum; - oldb = oldc = DBL_MAX; - delay = INT_MAX; - - if (cdsA->algo->covweight != 0) - { - double **evecs = MatAlloc(vlen, vlen); - - if (vlen - 3 < nd - 6) - newlen = vlen - 3; - else - newlen = nd - 6; - - eigensym((const double **) cdsA->CovMat, evals, evecs, vlen); - RevVecIp(evals, vlen); - - for (i = newlen; i < vlen; ++i) - evals[i] = 0.0; - - memcpy(newvar, evals, vlen * sizeof(double)); - - count = 0; - do - { - oldb = b; - oldc = c; - - if (count < delay) - chi2 = invgamma_fit(newvar, newlen, &b, &c, &logL); - else - chi2 = invgamma_fit(newvar, vlen, &b, &c, &logL); - - for (i = 0; i < newlen; ++i) - newvar[i] = (nd*evals[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - - mode = b / (c+1.0); /* the mode of an inv gamma dist; the ML estimate of missing data */ - for (i = newlen; i < vlen; ++i) - newvar[i] = mode; - -/* printf("\n\n count: %d", count); */ -/* for (i = 0; i < vlen; ++i) */ -/* printf("\n%3d %8.3e", i, newvar[i]); */ - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f\n", - count, b, c, b / (c+1.0), logL, chi2); - fflush(NULL); - } - - if (count > maxcount) - { - printf("\n WARNING03: Failed to converge in InvGammaFitVars_minc(), round %d\n ", - cdsA->algo->rounds); - fflush(NULL); - break; - } - - if (iterate == 0) - break; - - ++count; - } - while(fabs(oldb - b) > fabs(b*precision) && - fabs(oldc - c) > fabs(c*precision)); - - if (c < minc) - { - memcpy(newvar, evals, vlen * sizeof(double)); - - if (vlen - 3 < nd - 6) - newlen = vlen - 3; - else - newlen = nd - 6; - - c = minc; - count = 0; - do - { - oldb = b; - - if (count < delay) - b = minc * HarmonicAve(newvar, newlen); - else - b = minc * HarmonicAve(newvar, vlen); - - for (i = 0; i < newlen; ++i) - newvar[i] = (nd*evals[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - - mode = b / (c+1.0); /* the mode of an inv gamma dist; the ML estimate of missing data */ - for (i = newlen; i < vlen; ++i) - newvar[i] = mode; - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f\n", - count, b, c, b / (c+1.0), logL, chi2); - fflush(NULL); - } - - if (count > maxcount) - { - printf("\n WARNING04: Failed to converge in InvGammaFitVars_minc(), round %d\n ", - cdsA->algo->rounds); - fflush(NULL); - break; - } - - if (iterate == 0) - break; - - ++count; - } - while(fabs(oldb - b) > fabs(b*precision)); - - chi2 = chi_sqr_adapt(newvar, newlen, 0, &logL, b, c, invgamma_pdf, invgamma_lnpdf, invgamma_int); - } - -/* printf("\n\n count: %d", count); */ -/* for (i = 0; i < vlen; ++i) */ -/* printf("\n%3d %8.3e", i, newvar[i]); */ - -/* RevVecIp(evals, vlen); */ -/* EigenReconSym(cdsA->CovMat, (const double **) evecs, (const double *) evals, vlen); */ - - InvGammaAdjustCov(cdsA, b, c); - - for (i = 0; i < vlen; ++i) - cdsA->var[i] = cdsA->CovMat[i][i]; - - MatDestroy(&evecs); - } - else - { - memcpy(newvar, evals, vlen * sizeof(double)); - newlen = vlen - 3; - - count = 0; - do - { - oldb = b; - oldc = c; - - qsort(newvar, vlen, sizeof(double), dblcmp_rev); -/* printf("\n\n count: %d", count); */ -/* for (i = 0; i < vlen; ++i) */ -/* printf("\n%3d %8.3e", i, newvar[i]); */ - - if (count < delay) - chi2 = invgamma_fit(newvar, newlen, &b, &c, &logL); - else - chi2 = invgamma_fit(newvar, vlen, &b, &c, &logL); - - for (i = newlen; i < vlen; ++i) - newvar[i] = b / (c+1.0); - -/* printf("\n\n count: %d", count); */ -/* for (i = 0; i < vlen; ++i) */ -/* printf("\n%3d %8.3e", i, newvar[i]); */ - - if (c > FLT_MAX) - { - harmave = HarmonicAve(newvar, vlen); - memsetd(newvar, harmave, vlen); - break; - } - else if (b < DBL_EPSILON) - { - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*evals[i]) / (nd + 2.0*(1.0 + c)); - break; - } - else if (2.0 + 2.0 * c > 20.0 * nd) /* all are equal to the harmonic average */ - { - harmave = HarmonicAve(newvar, vlen); - for (i = 0; i < vlen; ++i) - newvar[i] = harmave; - break; - } - else - { - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*evals[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - } - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f\n", - count, b, c, b / (c+1.0), logL, chi2); - fflush(NULL); - } - - if (count > maxcount) - { - printf("\n WARNING04: Failed to converge in InvGammaFitVars_minc(), round %d\n ", - cdsA->algo->rounds); - fflush(NULL); - break; - } - -/* double trace1 = 0.0, trace2 = 0.0; */ -/* for (i = 0; i < vlen; ++i) */ -/* { */ -/* trace1 += newvar[i]; */ -/* trace2 += evals[i]; */ -/* } */ -/* */ -/* printf("\n trace 1: %f trace 2: %f", */ -/* trace1, trace2); */ -/* fflush(NULL); */ - - if (iterate == 0) - break; - - ++count; - } - while(fabs(oldb - b) > fabs(b*precision) && - fabs(oldc - c) > fabs(c*precision)); - - if (c < minc) - { - c = minc; - memcpy(newvar, evals, vlen * sizeof(double)); - newlen = vlen - 3; - - count = 0; - do - { - oldb = b; - -/* if (count >= delay) */ -/* newlen = vlen; */ - - qsort(newvar, vlen, sizeof(double), dblcmp); - -/* if (count >= delay) */ -/* for (i = 0; i < vlen - newlen; ++i) */ -/* newvar[i] = b / (c+1.0); */ - - if (count < delay) - b = minc * HarmonicAve(newvar, newlen); - else - b = minc * HarmonicAve(newvar, vlen); - - for (i = newlen; i < vlen; ++i) - newvar[i] = b / (c+1.0); - - if (b < DBL_EPSILON) - { - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*evals[i]) / (nd + 2.0*(1.0 + c)); - break; - } - else - { - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*evals[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - } - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f\n", - count, b, c, b / (c+1.0), logL, chi2); - fflush(NULL); - } - - if (count > maxcount) - { - printf("\n WARNING04: Failed to converge in InvGammaFitVars_minc(), round %d\n ", - cdsA->algo->rounds); - fflush(NULL); - break; - } - - if (iterate == 0) - break; - - ++count; - } - while(fabs(oldb - b) > fabs(b*precision)); - } - - for (i = 0; i < vlen; ++i) - cdsA->var[i] = (nd*evals[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - - chi2 = chi_sqr_adapt(evals, vlen, 0, &logL, b, c, invgamma_pdf, invgamma_lnpdf, invgamma_int); - } - -/* for (i = 0; i < vlen; ++i) */ -/* printf("\n%3d %e", i, newvar[i]); */ - - cdsA->stats->hierarch_p1 = b; - cdsA->stats->hierarch_p2 = c; - cdsA->stats->hierarch_chi2 = chi2; - - free(newvar); -} - - -/* -Straight-up EM fit, for marginal student-t, like -Mechelke and Habeck BMC Bioinformatics 2010, 11:363 -2010-10-31 -*/ -void -InvGammaEMFixedC(CdsArray *cdsA, const double c, int iterate) -{ - double *newvar = NULL; - double *variance = NULL; - double precision = FLT_EPSILON; // cdsA->algo->precision; - const int vlen = cdsA->vlen, cnum = cdsA->cnum; - double nd, oldb, oldc, b = 0.0, chi2 = 0.0, logL; - int count; - - newvar = malloc(vlen * sizeof(double)); - nd = 3.0 * cnum; - oldb = oldc = DBL_MAX; - - if (cdsA->algo->varweight > 0) - { - variance = cdsA->var; - b = cdsA->stats->hierarch_p1; - - memcpy(newvar, variance, vlen * sizeof(double)); - - if (cdsA->algo->verbose != 0) - printf("\n0>>>>>>>>>>>>>"); - - count = 0; - do - { - oldb = b; - - b = HarmonicAve(newvar, vlen); - //b = HarmonicAveBayes(newvar, vlen, 1.0); - //printf("\nb:%g",b); - //fflush(NULL); - - if (cdsA->algo->alignment == 1) - InvGammaAdjustVarOcc(cdsA->cds, newvar, vlen, cnum, variance, b, c); - else - InvGammaAdjustVar(newvar, vlen, cnum, variance, b, c); - - if (cdsA->algo->verbose != 0) - { - printf("\n>>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f", - count, b, c, b / (c+1.0), logL, chi2); - fflush(NULL); - } - - if (count > 300) - { - printf("\n WARNING04: Failed to converge in InvGammaBayesFixedCFitEvals(), round %d\n ", - cdsA->algo->rounds); - fflush(NULL); - break; - } - - if (iterate == 0) - break; - - ++count; - } - while(fabs(b - oldb) > fabs(b*precision)); - - if (cdsA->algo->alignment == 1) - InvGammaAdjustVarOcc(cdsA->cds, newvar, vlen, cnum, variance, b, c); - else - InvGammaAdjustVar(newvar, vlen, cnum, variance, b, c); - - chi2 = chi_sqr_adapt(variance, vlen, 0, &logL, b, c, invgamma_pdf, invgamma_lnpdf, invgamma_int); - } - - cdsA->stats->hierarch_p1 = b; - cdsA->stats->hierarch_p2 = c; - cdsA->stats->hierarch_chi2 = chi2; - - free(newvar); -} - - -/* DLT 2008-03-28 new */ -void -InvGammaEMFixedCFitEvals(CdsArray *cdsA, const double c, int iterate) -{ - double *newvar = NULL; - double *evals = NULL; - double *variance = NULL; - double **evecs = NULL; - int *missi = NULL; - double precision = FLT_EPSILON; //cdsA->algo->precision; - const int vlen = cdsA->vlen, cnum = cdsA->cnum; - double nd, oldb, oldc, b = 0.0, chi2 = 0.0, logL, xn1, expinvx, gt; - int i, j, count, newlen, missing; - - newvar = malloc(vlen * sizeof(double)); - nd = 3.0 * cnum; - oldb = oldc = DBL_MAX; - - if (cdsA->algo->varweight > 0) - { - variance = cdsA->var; - b = cdsA->stats->hierarch_p1; - missing = 4; - missi = malloc(missing * sizeof(int)); - - memcpy(newvar, variance, vlen * sizeof(double)); - - if (cdsA->algo->verbose != 0) - printf("\n0>>>>>>>>>>>>>"); - - count = 0; - do - { - oldb = b; - - /* qsort-dblcmp sorts small to big */ - qsort(newvar, vlen, sizeof(double), dblcmp); - - xn1 = newvar[missing]; -/* if (cdsA->algo->bayes == 0) */ - invgamma_fixed_c_EM_fit(newvar, vlen, missing, &b, c, &logL); -/* else if (cdsA->algo->bayes == 1) */ -/* invgamma_fixed_c_EM_fit_bayes(newvar, vlen, missing, &b, c, &logL); */ - - if (cdsA->algo->alignment == 1) - InvGammaAdjustVarOcc(cdsA->cds, newvar, vlen, cnum, variance, b, c); - else - InvGammaAdjustVar(newvar, vlen, cnum, variance, b, c); - - if (cdsA->algo->verbose != 0) - { - printf("\n>>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f", - count, b, c, b / (c+1.0), logL, chi2); - fflush(NULL); - } - - if (count > 300) - { - printf("\n WARNING04: Failed to converge in InvGammaBayesFixedCFitEvals(), round %d\n ", - cdsA->algo->rounds); - fflush(NULL); - break; - } - - if (iterate == 0) - break; - - ++count; - } - while(fabs(b - oldb) > fabs(b*precision)); - - expinvx = 1.0 / ExpInvXn(b, c, xn1); - -/* - printf("\n%g", 1.0 / expinvx); - printf("\n%g", 1 / xn1); - printf("\n%g\n", 2*(1/xn1 - 1/newvar[missing+1]) + 1/xn1); - */ - - gt = 0.0; - for (j = 0; j < missing; ++j) - { - missi[j] = findsmallest_gt(variance, vlen, gt); - - if (cdsA->algo->verbose != 0) - { - printf("\n>>>>>>>>>>>>>> %d", j); - printf("\n>>>>>>>>>>>>>> pre-var xn1 expinvx"); - printf("\n1>>>>>>>>>>>>> %8.3e %8.3e %8.3e\n", variance[missi[j]], xn1, expinvx); - fflush(NULL); - } - - gt = variance[missi[j]]; - /* this is because the inverse of the variance is always used in other - calculations/maximizations, yet I store it as the variance (uninverted) */ - } - - for (j = 0; j < missing; ++j) - variance[missi[j]] = expinvx; - - free(missi); - chi2 = chi_sqr_adapt(variance, vlen, 0, &logL, b, c, invgamma_pdf, invgamma_lnpdf, invgamma_int); - } - else if (cdsA->algo->covweight > 0) - { - evecs = cdsA->tmpmatKK2; - evals = cdsA->var; - -/* if (vlen - 3 < nd - 6) */ -/* newlen = vlen - 3; */ -/* else */ -/* newlen = nd - 6; */ - - if (vlen - 4 < nd - 4) - newlen = vlen - 4; - else - newlen = nd - 4; - -//newlen = vlen-3; - missing = vlen - newlen; - - eigensym((const double **) cdsA->CovMat, evals, evecs, vlen); - /* eigensym evals are small to large */ - -//VecPrint(evals, vlen); - //printf("missing:%d\n\n", missing); - - for (i = 0; i < missing; ++i) - evals[i] = 0.0; - - //VecPrint(evals, vlen); - - b = cdsA->stats->hierarch_p1; - - memcpy(newvar, evals, vlen * sizeof(double)); - - if (cdsA->algo->verbose != 0) - printf("\n0>>>>>>>>>>>>>"); - - count = 0; - do - { - oldb = b; - - qsort(newvar, vlen, sizeof(double), dblcmp); - - xn1 = newvar[missing]; - //chi2 = invgamma_fixed_c_EM_fit(newvar, vlen, missing, &b, c, &logL); - -/* if (cdsA->algo->bayes == 0) */ - chi2 = invgamma_fixed_c_EM_fit(newvar, vlen, missing, &b, c, &logL); -/* else if (cdsA->algo->bayes == 1) */ -/* chi2 = invgamma_fixed_c_EM_fit_bayes(newvar, vlen, missing, &b, c, &logL); */ - - for (i = missing; i < vlen; ++i) - newvar[i] = (nd*evals[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - - if (cdsA->algo->verbose != 0) - { - printf("\n>>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f", - count, b, c, b / (c+1.0), logL, chi2); - fflush(NULL); - } - - if (count > 300) - { - printf("\n WARNING04: Failed to converge in InvGammaBayesFixedCFitEvals(), round %d\n ", - cdsA->algo->rounds); - fflush(NULL); - break; - } - - count++; - - if (iterate == 0 || cdsA->algo->abort == 1) - break; - } - while(fabs(oldb - b) > fabs(b*precision)); - - if (cdsA->algo->verbose != 0) - { - printf("\n>>>>> Final: %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e", - count, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - - InvGammaAdjustVar(evals+missing, vlen-missing, cnum, evals+missing, b, c); - - xn1 = evals[missing]; - - if (cdsA->algo->verbose != 0) - { - printf("\n1>>>> %8.3e %8.3e", evals[0], xn1); - fflush(NULL); - } - - expinvx = 1.0 / ExpInvXn(b, c, xn1); - - for (i = 0; i < missing; ++i) - evals[i] = expinvx; - - if (cdsA->algo->verbose != 0) - { - printf("\n2>>>> %8.3e %8.3e\n", xn1, evals[0]); - fflush(NULL); - } - - //chi2 = chi_sqr_adapt(evals, vlen, 0, &logL, b, c, invgamma_pdf, invgamma_lnpdf, invgamma_int); - //chi2 = chi_sqr_adapt(newvar, newlen, 0, &logL, b, c, invgamma_pdf, invgamma_lnpdf, invgamma_int); - - EigenReconSym(cdsA->CovMat, (const double **) evecs, evals, vlen); - - for (i = 0; i < vlen; ++i) - evals[i] = cdsA->CovMat[i][i]; - } - -/* for (i = 0; i < vlen; ++i) */ -/* printf("\n%3d %e", i, evals[i]); */ - - cdsA->stats->hierarch_p1 = b; - cdsA->stats->hierarch_p2 = c; - cdsA->stats->hierarch_chi2 = chi2; - - free(newvar); -} - - -void -InvGammaMLFixedCFitEvals(CdsArray *cdsA, const double c, int iterate) -{ - double *newvar = NULL; - double *evals = NULL; - double *variance = NULL; - double **evecs = NULL; - double precision = FLT_EPSILON; //cdsA->algo->precision; - const int vlen = cdsA->vlen, cnum = cdsA->cnum; - double nd, oldb, oldc, b = 0.0, chi2 = 0.0, logL, xn1, expinvx; - int i, count, newlen, missing, smallest; - - newvar = malloc(vlen * sizeof(double)); - nd = 3.0 * cnum; - oldb = oldc = DBL_MAX; - - if (cdsA->algo->varweight != 0) - { - variance = cdsA->var; - b = cdsA->stats->hierarch_p1; - missing = 1; - - memcpy(newvar, variance, vlen * sizeof(double)); - - if (cdsA->algo->verbose != 0) - printf("\n0>>>>>>>>>>>>>"); - - count = 0; - do - { - oldb = b; - - /* qsort-dblcmp sorts small to big */ - qsort(newvar, vlen, sizeof(double), dblcmp); - - xn1 = newvar[1]; -/* if (cdsA->algo->bayes == 0) */ - invgamma_fixed_c_ML_fit(newvar, vlen, missing, &b, c, &logL); -/* else if (cdsA->algo->bayes == 1) */ -/* { */ -/* printf("\nXXXXXXXXXXXXXXXXXXXXXXXXX\n\n"); */ -/* exit(0); */ -/* chi2 = invgamma_fixed_c_ML_fit_bayes(newvar, vlen, missing, &b, c, &logL); */ -/* } */ - - if (cdsA->algo->alignment == 1) - InvGammaAdjustVarOcc(cdsA->cds, newvar, vlen, cnum, variance, b, c); - else - InvGammaAdjustVar(newvar, vlen, cnum, variance, b, c); - - if (cdsA->algo->verbose != 0) - { - printf("\n>>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f", - count, b, c, b / (c+1.0), logL, chi2); - fflush(NULL); - } - - if (count > 300) - { - printf("\n WARNING04: Failed to converge in InvGammaBayesFixedCFitEvals(), round %d\n ", - cdsA->algo->rounds); - fflush(NULL); - break; - } - - if (iterate == 0) - break; - - ++count; - } - while(fabs(b - oldb) > fabs(b*precision)); - - InvGammaAdjustVar(variance, vlen, cnum, variance, b, c); - - smallest = findsmallest(variance, vlen); - expinvx = 1.0 / ExpInvXn(b, c, xn1); - - if (cdsA->algo->verbose != 0) - { - printf("\n>>>>>>>>>>>>>> pre-var xn1 expinvx"); - printf("\n1>>>>>>>>>>>>> %8.3e %8.3e %8.3e\n", variance[smallest], xn1, expinvx); - fflush(NULL); - } - - variance[smallest] = expinvx; - /* this is because the inverse of the variance is always used in other - calculations/maximizations, yet I store it as the variance (uninverted) */ - - chi2 = chi_sqr_adapt(variance, vlen, 0, &logL, b, c, invgamma_pdf, invgamma_lnpdf, invgamma_int); - } - else if (cdsA->algo->covweight != 0) - { - evecs = cdsA->tmpmatKK2; - evals = cdsA->var; - -/* if (vlen - 3 < nd - 6) */ -/* newlen = vlen - 3; */ -/* else */ -/* newlen = nd - 6; */ - - if (vlen - 4 < nd - 3) - newlen = vlen - 4; - else - newlen = nd - 3; - -//newlen = vlen-3; - missing = vlen - newlen; - - eigensym((const double **) cdsA->CovMat, evals, evecs, vlen); - /* eigensym evals are small to large */ - -//VecPrint(evals, vlen); - //printf("missing:%d\n\n", missing); - - for (i = 0; i < missing; ++i) - evals[i] = 0.0; - - //VecPrint(evals, vlen); - - b = cdsA->stats->hierarch_p1; - - memcpy(newvar, evals, vlen * sizeof(double)); - - if (cdsA->algo->verbose != 0) - printf("\n0>>>>>>>>>>>>>"); - - count = 0; - do - { - oldb = b; - - qsort(newvar, vlen, sizeof(double), dblcmp); - - xn1 = newvar[missing]; - //chi2 = invgamma_fixed_c_EM_fit(newvar, vlen, missing, &b, c, &logL); - -/* if (cdsA->algo->bayes == 0) */ - chi2 = invgamma_fixed_c_ML_fit(newvar, vlen, missing, &b, c, &logL); -/* else if (cdsA->algo->bayes == 1) */ -/* { */ -/* printf("\nXXXXXXXXXXXXXXXXXXXXXXXXX\n\n"); */ -/* exit(0); */ -/* chi2 = invgamma_fixed_c_ML_fit_bayes(newvar, vlen, missing, &b, c, &logL); */ -/* } */ - - for (i = missing; i < vlen; ++i) - newvar[i] = (nd*evals[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - - if (cdsA->algo->verbose != 0) - { - printf("\n>>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f", - count, b, c, b / (c+1.0), logL, chi2); - fflush(NULL); - } - - if (count > 300) - { - printf("\n WARNING04: Failed to converge in InvGammaBayesFixedCFitEvals(), round %d\n ", - cdsA->algo->rounds); - fflush(NULL); - break; - } - - count++; - - if (iterate == 0 || cdsA->algo->abort == 1) - break; - } - while(fabs(oldb - b) > fabs(b*precision)); - - if (cdsA->algo->verbose != 0) - { - printf("\n>>>>> Final: %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e", - count, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); - fflush(NULL); - } - - InvGammaAdjustVar(evals+missing, vlen-missing, cnum, evals+missing, b, c); - - xn1 = evals[missing]; - - if (cdsA->algo->verbose != 0) - { - printf("\n1>>>> %8.3e %8.3e", evals[0], xn1); - fflush(NULL); - } - - expinvx = 1.0 / ExpInvXn(b, c, xn1); - - for (i = 0; i < missing; ++i) - evals[i] = expinvx; - - if (cdsA->algo->verbose != 0) - { - printf("\n2>>>> %8.3e %8.3e\n", xn1, evals[0]); - fflush(NULL); - } - - //chi2 = chi_sqr_adapt(evals, vlen, 0, &logL, b, c, invgamma_pdf, invgamma_lnpdf, invgamma_int); - //chi2 = chi_sqr_adapt(newvar, newlen, 0, &logL, b, c, invgamma_pdf, invgamma_lnpdf, invgamma_int); - - EigenReconSym(cdsA->CovMat, (const double **) evecs, evals, vlen); - - for (i = 0; i < vlen; ++i) - evals[i] = cdsA->CovMat[i][i]; - } - -/* for (i = 0; i < vlen; ++i) */ -/* printf("\n%3d %e", i, evals[i]); */ - - cdsA->stats->hierarch_p1 = b; - cdsA->stats->hierarch_p2 = c; - cdsA->stats->hierarch_chi2 = chi2; - - free(newvar); -} - - - -void -InvGammaMLFitEvals(CdsArray *cdsA, int iterate) +InvGammaFitEvalsEMFixedC(CdsArray *cdsA, const double c, int iterate) { double *newvar = NULL; double *evals = NULL; double *variance = NULL; double **evecs = NULL; - double precision = cdsA->algo->precision; + int *missi = NULL; + double precision = algo->precision; const int vlen = cdsA->vlen, cnum = cdsA->cnum; - double nd, oldb, oldc, b, c, chi2 = 0.0, logL, xn1, expinvx; - int i, count, newlen, missing, smallest; + double nd, oldb, oldc, b = 0.0, chi2 = 0.0, logL, xn1, expinvx, gt; + int i, j, count, newlen, missing; newvar = malloc(vlen * sizeof(double)); nd = 3.0 * cnum; oldb = oldc = DBL_MAX; - if (cdsA->algo->varweight != 0) + if (algo->varweight > 0) { variance = cdsA->var; - b = cdsA->stats->hierarch_p1; - c = cdsA->stats->hierarch_p2; - - missing = 1; + b = stats->hierarch_p1; + missing = 4; + missi = malloc(missing * sizeof(int)); memcpy(newvar, variance, vlen * sizeof(double)); - if (cdsA->algo->verbose != 0) + if (algo->verbose) printf("\n0>>>>>>>>>>>>>"); count = 0; @@ -3552,29 +721,27 @@ { oldb = b; - if (cdsA->algo->verbose != 0) + /* qsort-dblcmp sorts small to big */ + qsort(newvar, vlen, sizeof(double), dblcmp); + xn1 = newvar[missing]; + invgamma_fixed_c_EM_fit(newvar, vlen, missing, &b, c, &logL); + + if (algo->alignment) + InvGammaAdjustVarNu(cdsA->cds, newvar, vlen, cnum, variance, b, c); + else + InvGammaAdjustVar(newvar, vlen, cnum, variance, b, c); + + if (algo->verbose) { printf("\n>>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f", count, b, c, b / (c+1.0), logL, chi2); fflush(NULL); } - if (cdsA->algo->alignment == 1) - InvGammaAdjustVarOcc(cdsA->cds, newvar, vlen, cnum, variance, b, c); - else - InvGammaAdjustVar(newvar, vlen, cnum, variance, b, c); - - /* qsort-dblcmp sorts small to big */ - qsort(newvar, vlen, sizeof(double), dblcmp); -// for (i = 0; i < vlen; ++i) -// printf("\n%3d %e", i, newvar[i]); - xn1 = newvar[1]; - invgamma_EMsmall_fit(newvar, vlen, missing, &b, &c, &logL); - if (count > 300) { - printf("\n WARNING04: Failed to converge in InvGammaMLFitEvals(), round %d\n ", - cdsA->algo->rounds); + printf("\n WARNING04: Failed to converge in InvGammaFitEvalsEMFixedC(), round %d\n ", + algo->rounds); fflush(NULL); break; } @@ -3584,56 +751,71 @@ ++count; } - while(fabs(oldb - b) > fabs(b*precision)); + while(fabs(b - oldb) > fabs(b*precision)); - InvGammaAdjustVar(variance, vlen, cnum, variance, b, c); + expinvx = 1.0 / ExpInvXn(b, c, xn1); - smallest = findsmallest(variance, vlen); +// printf("\n%g", 1.0 / expinvx); +// printf("\n%g", 1 / xn1); +// printf("\n%g\n", 2*(1/xn1 - 1/newvar[missing+1]) + 1/xn1); - if (cdsA->algo->verbose != 0) + gt = 0.0; + for (j = 0; j < missing; ++j) { - printf("\n1>>>> %8.3e %8.3e", variance[smallest], xn1); - fflush(NULL); - } + missi[j] = findsmallest_gt(variance, vlen, gt); - //variance[smallest] = ExpXn(b, c, xn1); - variance[smallest] = 1.0 / ExpInvXn(b, c, xn1); - /* this is because the inverse of the variance is always used in other - calculations/maximizations, yet I store it as the variance (uninverted) */ - //printf("\n-->-->-->-->--> %8.3e %8.3e\n", variance[smallest], ExpXn(b, c, xn1)); + if (algo->verbose) + { + printf("\n>>>>>>>>>>>>>> %d", j); + printf("\n>>>>>>>>>>>>>> pre-var xn1 expinvx"); + printf("\n1>>>>>>>>>>>>> %8.3e %8.3e %8.3e\n", variance[missi[j]], xn1, expinvx); + fflush(NULL); + } - if (cdsA->algo->verbose != 0) - { - printf("\n2>>>> %8.3e %8.3e\n", xn1, variance[smallest]); - fflush(NULL); + gt = variance[missi[j]]; + /* this is because the inverse of the variance is always used in other + calculations/maximizations, yet I store it as the variance (uninverted) */ } + for (j = 0; j < missing; ++j) + variance[missi[j]] = expinvx; + + free(missi); chi2 = chi_sqr_adapt(variance, vlen, 0, &logL, b, c, invgamma_pdf, invgamma_lnpdf, invgamma_int); } - else if (cdsA->algo->covweight != 0) + else if (algo->covweight > 0) { + double *invevals = cdsA->tmpvecK; evecs = cdsA->tmpmatKK2; evals = cdsA->var; - if (vlen - 3 < nd - 6) - newlen = vlen - 3; +/* if (vlen - 3 < nd - 6) */ +/* newlen = vlen - 3; */ +/* else */ +/* newlen = nd - 6; */ + + if (vlen - 4 < nd - 4) + newlen = vlen - 4; else - newlen = nd - 6; + newlen = nd - 4; missing = vlen - newlen; - eigensym((const double **) cdsA->CovMat, evals, evecs, vlen); - /* eigensym evals are small to large */ + //eigensym((const double **) cdsA->CovMat, evals, evecs, vlen); + /* evals are small to large */ + EigenGSL((const double **) cdsA->CovMat, vlen, evals, evecs, 0); + //printf("missing:%d\n\n", missing); for (i = 0; i < missing; ++i) evals[i] = 0.0; - b = cdsA->stats->hierarch_p1; - c = cdsA->stats->hierarch_p2; + //VecPrint(evals, vlen); + + b = stats->hierarch_p1; memcpy(newvar, evals, vlen * sizeof(double)); - if (cdsA->algo->verbose != 0) + if (algo->verbose) printf("\n0>>>>>>>>>>>>>"); count = 0; @@ -3641,39 +823,39 @@ { oldb = b; - InvGammaAdjustVar(newvar+missing, vlen-missing, cnum, evals+missing, b, c); - qsort(newvar, vlen, sizeof(double), dblcmp); - if (cdsA->algo->verbose != 0) + xn1 = newvar[missing]; + //chi2 = invgamma_fixed_c_EM_fit(newvar, vlen, missing, &b, c, &logL); + + chi2 = invgamma_fixed_c_EM_fit(newvar, vlen, missing, &b, c, &logL); + + for (i = missing; i < vlen; ++i) + newvar[i] = (nd*evals[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); + + if (algo->verbose) { printf("\n>>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f", count, b, c, b / (c+1.0), logL, chi2); fflush(NULL); } - xn1 = newvar[missing]; - chi2 = invgamma_EMsmall_fit(newvar, vlen, missing, &b, &c, &logL); - -// for (i = missing; i < vlen; ++i) -// newvar[i] = (nd*evals[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - if (count > 300) { - printf("\n WARNING04: Failed to converge in InvGammaMLFitEvals(), round %d\n ", - cdsA->algo->rounds); + printf("\n WARNING04: Failed to converge in InvGammaFitEvalsEMFixedC(), round %d\n ", + algo->rounds); fflush(NULL); break; } count++; - if (iterate == 0 || cdsA->algo->abort == 1) + if (iterate == 0 || algo->abort) break; } while(fabs(oldb - b) > fabs(b*precision)); - if (cdsA->algo->verbose != 0) + if (algo->verbose) { printf("\n>>>>> Final: %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e", count, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); @@ -3684,18 +866,18 @@ xn1 = evals[missing]; - if (cdsA->algo->verbose != 0) + if (algo->verbose) { printf("\n1>>>> %8.3e %8.3e", evals[0], xn1); fflush(NULL); } - expinvx = 1.0 / ExpInvXn(b, c, xn1);; + expinvx = 1.0 / ExpInvXn(b, c, xn1); for (i = 0; i < missing; ++i) evals[i] = expinvx; - if (cdsA->algo->verbose != 0) + if (algo->verbose) { printf("\n2>>>> %8.3e %8.3e\n", xn1, evals[0]); fflush(NULL); @@ -3707,225 +889,84 @@ EigenReconSym(cdsA->CovMat, (const double **) evecs, evals, vlen); for (i = 0; i < vlen; ++i) - evals[i] = cdsA->CovMat[i][i]; + invevals[i] = 1.0 / evals[i]; + + EigenReconSym(cdsA->WtMat, (const double **) evecs, invevals, vlen); + + for (i = 0; i < vlen; ++i) + cdsA->var[i] = cdsA->CovMat[i][i]; } -// for (i = 0; i < vlen; ++i) -// printf("\n%3d %e", i, newvar[i]); +/* for (i = 0; i < vlen; ++i) */ +/* printf("\n%3d %e", i, evals[i]); */ - cdsA->stats->hierarch_p1 = b; - cdsA->stats->hierarch_p2 = c; - cdsA->stats->hierarch_chi2 = chi2; + stats->hierarch_p1 = b; + stats->hierarch_p2 = c; + stats->hierarch_chi2 = chi2; free(newvar); } +/* ML-EM fit, fitting unknown b and c inverse gamma params (scale and shape, resp.) + Fits expectation of single smallest eigenvlaue for variance weighting. +*/ void -InvGammaFitVars_fixed_c(CdsArray *cdsA, const double c, int iterate) +InvGammaFitEvalsML(CdsArray *cdsA, int iterate) { double *newvar = NULL; - double *variance = cdsA->var; - double precision = cdsA->algo->precision; + double *evals = NULL; + double *variance = NULL; + double **evecs = NULL; + double precision = algo->precision; const int vlen = cdsA->vlen, cnum = cdsA->cnum; - double nd, oldb, oldc, b = 0.0, chi2 = 0.0, logL; - int i, count, newlen; + double nd, oldb, oldc, b, c, chi2 = 0.0, logL, xn1, expinvx; + int i, count, newlen, missing, smallest; newvar = malloc(vlen * sizeof(double)); nd = 3.0 * cnum; oldb = oldc = DBL_MAX; - if (cdsA->algo->covweight != 0) - { - double **evecs = MatAlloc(vlen, vlen); - - if (vlen - 3 < nd - 6) - newlen = vlen - 3; - else - newlen = nd - 6; - - eigensym((const double **) cdsA->CovMat, variance, evecs, vlen); + b = stats->hierarch_p1; + c = stats->hierarch_p2; - RevVecIp(variance, vlen); + if (algo->varweight) + { + variance = cdsA->var; - for (i = newlen; i < vlen; ++i) - variance[i] = 0.0; + missing = 1; memcpy(newvar, variance, vlen * sizeof(double)); - count = 0; - do - { - oldb = b; - - if (count > 3) - b = c * HarmonicAve(newvar, vlen); - else - /* b = (vlen * c - c - 1.0) * HarmonicAve(newvar, newlen) / newlen; */ - b = vlen * c * HarmonicAve(newvar, newlen) / newlen; - - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - - if (cdsA->algo->verbose != 0) - { - printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f\n", - count, b, c, b / (c+1.0), logL, chi2); - fflush(NULL); - } - - if (count > 300) - { - printf("\n WARNING04: Failed to converge in InvGammaFitVars_fixed_c(), round %d\n ", - cdsA->algo->rounds); - fflush(NULL); - break; - } - - if (iterate == 0) - break; - - ++count; - } - while(fabs(oldb - b) > fabs(b*precision)); - - chi2 = chi_sqr_adapt(newvar, newlen, 0, &logL, b, c, invgamma_pdf, invgamma_lnpdf, invgamma_int); - - InvGammaAdjustCov(cdsA, b, c); - - for (i = 0; i < vlen; ++i) - variance[i] = cdsA->CovMat[i][i]; - - MatDestroy(&evecs); - } - else - { - memcpy(newvar, variance, vlen * sizeof(double)); - newlen = vlen - 3; + if (algo->verbose) + printf("\n0>>>>>>>>>>>>>"); count = 0; do { oldb = b; -/* if (count > 3) */ -/* { */ -/* b = c * HarmonicAve(newvar, vlen); */ -/* } */ -/* else */ - { - qsort(newvar, vlen, sizeof(double), dblcmp); - b = vlen * c * HarmonicAve(newvar, newlen) / newlen; - } - - if (b < DBL_EPSILON) - { - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*variance[i]) / (nd + 2.0*(1.0 + c)); - break; - } - else - { - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - } - - if (cdsA->algo->verbose != 0) + if (algo->verbose) { - printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f\n", + printf("\n>>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f", count, b, c, b / (c+1.0), logL, chi2); fflush(NULL); } - if (count > 300) - { - printf("\n WARNING04: Failed to converge in InvGammaFitVars_fixed_c(), round %d\n ", - cdsA->algo->rounds); - fflush(NULL); - break; - } - - if (iterate == 0) - break; - - ++count; - } - while(fabs(oldb - b) > fabs(b*precision)); - - for (i = 0; i < vlen; ++i) - variance[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - - chi2 = chi_sqr_adapt(variance, vlen, 0, &logL, b, c, invgamma_pdf, invgamma_lnpdf, invgamma_int); - } - -/* for (i = 0; i < vlen; ++i) */ -/* printf("\n%3d %e", i, newvar[i]); */ - - cdsA->stats->hierarch_p1 = b; - cdsA->stats->hierarch_p2 = c; - cdsA->stats->hierarch_chi2 = chi2; - - free(newvar); -} - - -void -InvGammaBayesFitVars_fixed_c(CdsArray *cdsA, const double c, int iterate) -{ - double *newvar = NULL; - double *variance = cdsA->var; - double precision = cdsA->algo->precision; - const int vlen = cdsA->vlen, cnum = cdsA->cnum; - double nd, oldb, oldc, b = 0.0, chi2 = 0.0, logL; - int i, count, newlen; - - newvar = malloc(vlen * sizeof(double)); - nd = 3.0 * cnum; - oldb = oldc = DBL_MAX; - - if (cdsA->algo->covweight != 0) - { - double **evecs = MatAlloc(vlen, vlen); - - if (vlen - 3 < nd - 6) - newlen = vlen - 3; - else - newlen = nd - 6; - - eigensym((const double **) cdsA->CovMat, variance, evecs, vlen); - - RevVecIp(variance, vlen); - - for (i = newlen; i < vlen; ++i) - variance[i] = 0.0; - - memcpy(newvar, variance, vlen * sizeof(double)); - - count = 0; - do - { - oldb = b; - - if (count > 3) - b = (c + 1.0) * HarmonicAve(newvar, vlen); + if (algo->alignment) + InvGammaAdjustVarNu(cdsA->cds, newvar, vlen, cnum, variance, b, c); else - /* b = (vlen * c - c - 1.0) * HarmonicAve(newvar, newlen) / newlen; */ - b = vlen * (c + 1.0) * HarmonicAve(newvar, newlen) / newlen; - - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); + InvGammaAdjustVar(newvar, vlen, cnum, variance, b, c); - if (cdsA->algo->verbose != 0) - { - printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f\n", - count, b, c, b / (c+1.0), logL, chi2); - fflush(NULL); - } + /* qsort-dblcmp sorts small to big */ + qsort(newvar, vlen, sizeof(double), dblcmp); + xn1 = newvar[1]; + invgamma_EMsmall_fit(newvar, vlen, missing, &b, &c, &logL); if (count > 300) { - printf("\n WARNING04: Failed to converge in InvGammaFitVars_fixed_c(), round %d\n ", - cdsA->algo->rounds); + printf("\n WARNING04: Failed to converge in InvGammaFitEvalsML(), round %d\n ", + algo->rounds); fflush(NULL); break; } @@ -3937,305 +978,302 @@ } while(fabs(oldb - b) > fabs(b*precision)); - chi2 = chi_sqr_adapt(newvar, newlen, 0, &logL, b, c, invgamma_pdf, invgamma_lnpdf, invgamma_int); + InvGammaAdjustVar(variance, vlen, cnum, variance, b, c); - InvGammaAdjustCov(cdsA, b, c); + smallest = findsmallest(variance, vlen); - for (i = 0; i < vlen; ++i) - variance[i] = cdsA->CovMat[i][i]; + if (algo->verbose) + { + printf("\n1>>>> %8.3e %8.3e", variance[smallest], xn1); + fflush(NULL); + } + + //variance[smallest] = ExpXn(b, c, xn1); + variance[smallest] = 1.0 / ExpInvXn(b, c, xn1); + /* this is because the inverse of the variance is always used in other + calculations/maximizations, yet I store it as the variance (uninverted) */ + //printf("\n-->-->-->-->--> %8.3e %8.3e\n", variance[smallest], ExpXn(b, c, xn1)); + + if (algo->verbose) + { + printf("\n2>>>> %8.3e %8.3e\n", xn1, variance[smallest]); + fflush(NULL); + } - MatDestroy(&evecs); + chi2 = chi_sqr_adapt(variance, vlen, 0, &logL, b, c, invgamma_pdf, invgamma_lnpdf, invgamma_int); } - else + else if (algo->covweight) { - memcpy(newvar, variance, vlen * sizeof(double)); - newlen = vlen - 3; + evecs = cdsA->tmpmatKK2; + evals = cdsA->var; + + if (vlen - 3 < nd - 6) + newlen = vlen - 3; + else + newlen = nd - 6; + + missing = vlen - newlen; + + //eigensym((const double **) cdsA->CovMat, evals, evecs, vlen); + /* eigensym evals are small to large */ + EigenGSL((const double **) cdsA->CovMat, vlen, evals, evecs, 0); + + for (i = 0; i < missing; ++i) + evals[i] = 0.0; + + memcpy(newvar, evals, vlen * sizeof(double)); + + if (algo->verbose) + printf("\n0>>>>>>>>>>>>>"); count = 0; do { oldb = b; -/* if (count > 3) */ -/* { */ -/* b = c * HarmonicAve(newvar, vlen); */ -/* } */ -/* else */ - { - qsort(newvar, vlen, sizeof(double), dblcmp); - b = vlen * (c + 1.0) * HarmonicAve(newvar, newlen) / newlen; - } + InvGammaAdjustVar(newvar+missing, vlen-missing, cnum, evals+missing, b, c); - if (b < DBL_EPSILON) - { - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*variance[i]) / (nd + 2.0*(1.0 + c)); - break; - } - else - { - for (i = 0; i < vlen; ++i) - newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - } + qsort(newvar, vlen, sizeof(double), dblcmp); - if (cdsA->algo->verbose != 0) + if (algo->verbose) { - printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f\n", + printf("\n>>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f", count, b, c, b / (c+1.0), logL, chi2); fflush(NULL); } + xn1 = newvar[missing]; + chi2 = invgamma_EMsmall_fit(newvar, vlen, missing, &b, &c, &logL); + +// for (i = missing; i < vlen; ++i) +// newvar[i] = (nd*evals[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); + if (count > 300) { - printf("\n WARNING04: Failed to converge in InvGammaFitVars_fixed_c(), round %d\n ", - cdsA->algo->rounds); + printf("\n WARNING04: Failed to converge in InvGammaFitEvalsML(), round %d\n ", + algo->rounds); fflush(NULL); break; } - if (iterate == 0) - break; + count++; - ++count; + if (iterate == 0 || algo->abort) + break; } while(fabs(oldb - b) > fabs(b*precision)); - for (i = 0; i < vlen; ++i) - variance[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); + if (algo->verbose) + { + printf("\n>>>>> Final: %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e", + count, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); + fflush(NULL); + } - chi2 = chi_sqr_adapt(variance, vlen, 0, &logL, b, c, invgamma_pdf, invgamma_lnpdf, invgamma_int); - } + InvGammaAdjustVar(evals+missing, vlen-missing, cnum, evals+missing, b, c); -/* for (i = 0; i < vlen; ++i) */ -/* printf("\n%3d %e", i, newvar[i]); */ + xn1 = evals[missing]; - cdsA->stats->hierarch_p1 = b; - cdsA->stats->hierarch_p2 = c; - cdsA->stats->hierarch_chi2 = chi2; + if (algo->verbose) + { + printf("\n1>>>> %8.3e %8.3e", evals[0], xn1); + fflush(NULL); + } - free(newvar); -} + expinvx = 1.0 / ExpInvXn(b, c, xn1);; + for (i = 0; i < missing; ++i) + evals[i] = expinvx; -/* void */ -/* InvGammaFitVars_c1(CdsArray *cdsA, double *b, double *c) */ -/* { */ -/* const double *data = (const double *) cdsA->var; */ -/* const int num = cdsA->vlen; */ -/* double ave; */ -/* int i; */ -/* */ -/* ave = 0.0; */ -/* for (i = 0; i < num; ++i) */ -/* ave += (1.0 / data[i]); */ -/* */ -/* *b = num / ave; */ -/* */ -/* printf("\n Inverse Gamma: %7.3e %7.3e", *b, 1.0); */ -/* fflush(NULL); */ -/* } */ + if (algo->verbose) + { + printf("\n2>>>> %8.3e %8.3e\n", xn1, evals[0]); + fflush(NULL); + } + //chi2 = chi_sqr_adapt(evals, vlen, 0, &logL, b, c, invgamma_pdf, invgamma_lnpdf, invgamma_int); + //chi2 = chi_sqr_adapt(newvar, newlen, 0, &logL, b, c, invgamma_pdf, invgamma_lnpdf, invgamma_int); -void -InvGammaFitVars_GaussVarVar(CdsArray *cdsA, double *b, double *c) -{ - const double *data = (const double *) cdsA->var; - const int num = cdsA->vlen; - double harmave; - int i; + EigenReconSym(cdsA->CovMat, (const double **) evecs, evals, vlen); - *c = 0.5 * (num + 4.0); + for (i = 0; i < vlen; ++i) + evals[i] = cdsA->CovMat[i][i]; + } - harmave = 0.0; - for (i = 0; i < num; ++i) - harmave += (1.0 / data[i]); +// for (i = 0; i < vlen; ++i) +// printf("\n%3d %e", i, newvar[i]); - *b = *c * num / harmave; + stats->hierarch_p1 = b; + stats->hierarch_p2 = c; + stats->hierarch_chi2 = chi2; - printf(" Inverse Gamma: %7.3e %7.3e\n", *b, *c); - fflush(NULL); + free(newvar); } +/* This is the old approximate method, used in versions 1.0-1.1 */ +/* inverse gamma fit of variances, excluding the smallest 3 */ +/* This accounts for the fact that the smallest three eigenvalues of the covariance + matrix are always zero, i.e. the covariance matrix is necessarily of rank + vlen - 3 (or usually less, with inadequate amounts of data 3N-6). */ void -InvGammaFitVars_Mode(CdsArray *cdsA, double *b, double *c, const double mode) +InvGammaFitEvals(CdsArray *cdsA, int iterate) { - double logL; - - invgamma_mode_fit((const double *) cdsA->var, cdsA->vlen, b, c, mode, &logL); - - printf(" Inverse Gamma: %7.3e %7.3e\n", *b, *c); - fflush(NULL); -} + double *newvar = NULL; + double *variance = cdsA->var; + //double **evecs = NULL; + double precision = algo->precision; + const int vlen = cdsA->vlen, cnum = cdsA->cnum; + double nd, oldb, oldc, b, c, chi2 = DBL_MAX, logL, harmave; + int i, count, newlen; + newvar = malloc(vlen * sizeof(double)); + nd = 3.0 * cnum; + oldb = oldc = DBL_MAX; -void -InvGammaStacyFitVars(CdsArray *cdsA, double *b, double *c) -{ - const double *data = (const double *) cdsA->var; - const int num = cdsA->vlen; - double *x = malloc(num * sizeof(double)); - double logL; - int i; + b = stats->hierarch_p1; + c = stats->hierarch_p2; - /* for robustness, toss the smallest variance */ - memcpy(x, data, num * sizeof(double)); - qsort(x, num, sizeof(double), dblcmp); + if (algo->varweight) + { + newlen = vlen - 3; - for (i = 0; i < num-1; ++i) - x[i] = (1.0 / x[i+1]); + memcpy(newvar, variance, vlen * sizeof(double)); - gamma_Stacyfit(x, num-1, b, c, &logL); - *b = 1.0 / *b; + count = 0; + do + { + oldb = b; + oldc = c; - /* printf("\n Inverse Gamma: %10.5f %10.5f", *b, *c); */ + qsort(newvar, vlen, sizeof(double), dblcmp_rev); + chi2 = invgamma_fit(newvar, newlen, &b, &c, &logL); - free(x); -} + if (2*c + 2 > 1000 * nd) + { + harmave = HarmonicAve(newvar + vlen - newlen, newlen); + for (i = 0; i < vlen; ++i) + variance[i] = harmave; -/* Fit a gamma distribution by method of moments (MMEs). */ -void -InvGammaMMFitVars(CdsArray *cdsA, double *b, double *c) -{ - const double *data = (const double *) cdsA->var; - const int num = cdsA->vlen; - double *x = malloc(num * sizeof(double)); - double logL; - int i; + return; + } - for (i = 0; i < num; ++i) - x[i] = (1.0 / data[i]); + if (algo->alignment) + InvGammaAdjustVarNu(cdsA->cds, newvar, vlen, cnum, variance, b, c); + else + InvGammaAdjustVar(newvar, vlen, cnum, variance, b, c); - gamma_MMfit(x, num, b, c, &logL); - *b = 1.0 / *b; + /* newvar[findmin(variance, vlen)] = 2.0*b / (nd + 2.0*(1.0 + c)); */ - /* printf("\n Inverse Gamma: %10.5f %10.5f", *b, *c); */ + if (algo->verbose) + { + printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", + count, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); + fflush(NULL); + } - free(x); -} + count++; + if (iterate == 0 || algo->abort) + break; + } + while(fabs(oldb - b) > fabs(b*precision) && + fabs(oldc - c) > fabs(c*precision)); -/* Var_IG = (ND/(ND + 2(c+1))) [(2b/ND) + Var_ML] */ -void -InvGammaAdjustVars(CdsArray *cdsA, const double b, const double c) -{ - int i; - double *variance = cdsA->var; - const int num = cdsA->vlen; - const double nd = 3.0 * cdsA->cnum; + if (algo->verbose) + { + printf(">>>>> Final: %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", + count, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); + fflush(NULL); + } - for (i = 0; i < num; ++i) - { - /* printf("\n %10.5f", variance[i]); */ - /* variance[i] = (nd/(nd + 2.0*(1.0 + c))) * (variance[i] + 2.0*b/nd); */ - variance[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); - /* printf(" %10.5f", variance[i]); */ + if (algo->alignment) + InvGammaAdjustVarNu(cdsA->cds, variance, vlen, cnum, variance, b, c); + else + InvGammaAdjustVar(variance, vlen, cnum, variance, b, c); } -} - + else if (algo->covweight) + { + if (vlen - 3 < nd - 6) + newlen = vlen - 3; + else + newlen = nd - 6; -void -InvGammaAdjustCov(CdsArray *cdsA, const double b, const double c) -{ - int i, j; - const int vlen = cdsA->vlen; - const double nd = 3.0 * cdsA->cnum; + //evecs = cdsA->tmpmatKK2; - for (i = 0; i < vlen; ++i) - cdsA->CovMat[i][i] += 2.0 * b / nd; + //eigenvalsym((const double **) cdsA->CovMat, variance, evecs, vlen); + /* eigensym((const double **) cdsA->CovMat, variance, evecs, vlen); */ + EigenvalsGSL((const double **) cdsA->CovMat, vlen, variance); - for (i = 0; i < vlen; ++i) - for (j = 0; j < vlen; ++j) - cdsA->CovMat[i][j] *= nd / (nd + 2.0 * (1.0 + c)); -} + /* RevVecIp(variance, vlen); */ + for (i = 0; i < vlen - newlen; ++i) + variance[i] = 0.0; -void -InvGammaAdjustCovMode(CdsArray *cdsA, const double b, const double c) -{ -/* int i, j; */ -/* const int vlen = cdsA->vlen; */ -/* const double nd = 3.0 * cdsA->cnum; */ -/* */ -/* mode = b / (c+1.0); */ -/* */ -/* for (i = 0; i < vlen - newlen; ++i) */ -/* newvar[i] = mode; */ -/* for (i = 0; i < newlen; ++i) */ -/* printf("\n%3d %e", i, newvar[i]); */ -/* */ -/* for (i = 0; i < vlen; ++i) */ -/* cdsA->CovMat[i][i] += 2.0 * b / nd; */ -/* */ -/* for (i = 0; i < vlen; ++i) */ -/* for (j = 0; j < vlen; ++j) */ -/* cdsA->CovMat[i][j] *= nd / (nd + 2.0 * (1.0 + c)); */ -} +/* b = c = 0.0; */ + for (i = 0; i < vlen; ++i) + newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); -void -InvGammaAdjustCovND(CdsArray *cdsA, const double b, const double c) -{ - int i, j; - const int vlen = cdsA->vlen; + count = 0; + do + { + oldb = b; + oldc = c; - for (i = 0; i < vlen; ++i) - cdsA->CovMat[i][i] += 2.0 * b; + chi2 = invgamma_fit(newvar + vlen - newlen, newlen, &b, &c, &logL); - for (i = 0; i < vlen; ++i) - for (j = 0; j < vlen; ++j) - cdsA->CovMat[i][j] *= 1.0 / (3.0 + 2.0 * c); -} + for (i = 0; i < vlen; ++i) + newvar[i] = (nd*variance[i] + 2.0*b) / (nd + 2.0*(1.0 + c)); + if (algo->verbose) + { + printf(">>>>> %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", + count, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); + fflush(NULL); + } -/* Maximum likelihood fit to the reciprocal inverse gaussian distribution. - My own derivation: - mu = \Sum(1/x_i) / N - lambda = N / ( \Sum{x_i + 1/(x_i mu^2)} - (2 N / mu)) -*/ -void -RecipInvGaussFitVars(CdsArray *cdsA, double *mu, double *lambda) -{ - const double *data = (const double *) cdsA->var; - const int num = cdsA->vlen; - double sum_mu, sum_lambda; - const double numd = num; - int i; + if (count > 100) + { + printf("\n WARNING01: Failed to converge in InvGammaFitVars(), round %d\n", + algo->rounds); + fflush(NULL); + break; + } - sum_mu = 0.0; - for (i = 0; i < num; ++i) - sum_mu += 1.0 / data[i]; + count++; - *mu = sum_mu / numd; + if (iterate == 0 || algo->abort) + break; + } + while(fabs(oldb - b) > fabs(b*precision) && + fabs(oldc - c) > fabs(c*precision)); - sum_lambda = 0.0; - for (i = 0; i < num; ++i) - sum_lambda += (data[i] + 1.0 / (*mu * *mu * data[i])); + if (algo->verbose) + { + printf(">>>>> Final: %3d b:% -3.2e c:% -3.2e mode:%-10.5f logL:% -12.6f chi2:%-10.5f minvar:% -3.2e\n", + count, b, c, b / (c+1.0), logL, chi2, 2.0*b / (nd + 2.0*(1.0 + c))); + fflush(NULL); + } - *lambda = numd / (sum_lambda - 2.0 * numd / *mu); + InvGammaAdjustCov(cdsA, b, c); +/* printf("\n\n count: %d", count); */ +/* for (i = 0; i < vlen; ++i) */ +/* printf("\n%3d %8.3e %8.3e", i, newvar[i], mode); */ +/* EigenReconSym(cdsA->CovMat, (const double **) evecs, (const double *) newvar, vlen); */ - cdsA->stats->hierarch_p1 = *mu; - cdsA->stats->hierarch_p2 = *lambda; -} + for (i = 0; i < vlen; ++i) + variance[i] = cdsA->CovMat[i][i]; +/* printf("\n\n count: %d harm ave: %8.3e log ave: %8.3e", */ +/* count, HarmonicAve(variance, vlen), exp(LogarithmicAve(variance, vlen))); */ + } -/* var_RIG = 1/(2 lambda)[(ND-1) - sqrt{(ND-1)^2 - 4 lambda(lambda/mu^2 +ND var_ML)}] */ -void -RecipInvGaussAdjustVars(CdsArray *cdsA, - const double mu, const double lambda) -{ - int i; - double *variance = cdsA->var; - const int num = cdsA->vlen; - const double nd1 = 3.0 * num - 1.0; + stats->hierarch_p1 = b; + stats->hierarch_p2 = c; + stats->hierarch_chi2 = chi2; - for (i = 0; i < num; ++i) - { - printf(" %10.5f\n", variance[i]); - variance[i] = - nd1 - sqrt(nd1 * nd1 - 4.0 * lambda * (lambda/(mu*mu) + 3.0 * num * variance[i])); - variance[i] *= (0.5 / lambda); - printf(" %10.5f", variance[i]); - } + free(newvar); } + diff -Nru theseus-2.0.6/HierarchVars.h theseus-3.0.0/HierarchVars.h --- theseus-2.0.6/HierarchVars.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/HierarchVars.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -26,123 +26,19 @@ #ifndef HIERARCHVARS_SEEN #define HIERARCHVARS_SEEN -#include "pdbMalloc.h" - - -void -InvGammaEMFixedC(CdsArray *cdsA, const double c, int iterate); - -void -InvgaussFitVars(CdsArray *cdsA, double *mean, double *lambda); - -void -InvgaussAdjustVars(CdsArray *cdsA, - const double mean, const double lambda); - -void -LognormalFitVars(CdsArray *cdsA, double *zeta, double *sigma); - -void -LognormalAdjustVars(CdsArray *cdsA, double zeta, double sigma); - -void -InvGammaFitVars(CdsArray *cdsA, int iterate); - -void -ConjBayesAdjustVar(double *newvar, const double *var, const int vlen, const int cnum, const double phi); - -void -WishartAdjustVar(double *newvar, const double *var, const int vlen, const int cnum, const double phi); - -void -WishartAdjustVar2(double *newvar, const double *var, const int vlen, const int cnum, const double phi); - -void -WishartFitVar(CdsArray *cdsA, int iterate); - -void -WishartFitVar2(CdsArray *cdsA, int iterate); - void InvGammaFitEvals(CdsArray *cdsA, int iterate); void -InvGammaFitEvalsNoN(CdsArray *cdsA, int iterate); - -void -InvGammaFitModeEvals(CdsArray *cdsA, int iterate); - -void -InvGammaBayesFitEvals(CdsArray *cdsA, int iterate); - -void -InvGammaFitEvalsBfact(CdsArray *cdsA, int iterate); - -void -InvGammaFitEvals3(CdsArray *cdsA, int iterate); - -void -InvGammaBayesFitEvals3(CdsArray *cdsA, int iterate); - -void -InvGammaFitEvalsEq(CdsArray *cdsA, int iterate); - -void -InvGammaFitEvals2(CdsArray *cdsA, int iterate); - -void -InvGamma1FitEvals(CdsArray *cdsA, int iterate); - -void -InvGammaFitVarsND(CdsArray *cdsA, int iterate); - -void -InvGammaFitVars_minc(CdsArray *cdsA, const double minc, int iterate); - -void -InvGammaFitVars_fixed_c(CdsArray *cdsA, const double c, int iterate); - -void -InvGammaMLFixedCFitEvals(CdsArray *cdsA, const double c, int iterate); - -void -InvGammaEMFixedCFitEvals(CdsArray *cdsA, const double c, int iterate); - -void -InvGammaMLFitEvals(CdsArray *cdsA, int iterate); - -void -InvGammaBayesFitVars_fixed_c(CdsArray *cdsA, const double c, int iterate); - -void -InvGammaFitVars_GaussVarVar(CdsArray *cdsA, double *b, double *c); - -void -InvGammaFitVars_Mode(CdsArray *cdsA, double *b, double *c, const double mode); - -void -InvGammaStacyFitVars(CdsArray *cdsA, double *b, double *c); - -void -InvGammaMMFitVars(CdsArray *cdsA, double *b, double *c); - -void -InvGammaAdjustVars(CdsArray *cdsA, const double b, const double c); - -void -InvGammaAdjustCov(CdsArray *cdsA, const double b, const double c); - -void -InvGammaAdjustCovMode(CdsArray *cdsA, const double b, const double c); +InvGammaFitMarginalGSLBrent(CdsArray *cdsA); void -InvGammaAdjustCovND(CdsArray *cdsA, const double b, const double c); +InvGammaFitMarginalGSLRoot(CdsArray *cdsA); void -RecipInvGaussFitVars(CdsArray *cdsA, double *mu, double *lambda); +InvGammaFitEvalsEMFixedC(CdsArray *cdsA, const double c, int iterate); void -RecipInvGaussAdjustVars(CdsArray *cdsA, - const double mu, const double lambda); +InvGammaFitEvalsML(CdsArray *cdsA, int iterate); #endif diff -Nru theseus-2.0.6/internmat.h theseus-3.0.0/internmat.h --- theseus-2.0.6/internmat.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/internmat.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -566,36 +566,36 @@ }; /* Inverse Gamma 4.204e-03 4.202e-01 */ -double simevals[67] = -{ - 310.30, 25.815, 23.381, 4.2054, 2.8835, 2.0697, 1.7758, 0.97085, 0.61964, 0.44248, - 0.31220, 0.31016, 0.30472, 0.29729, 0.18909, 0.17261, 0.15538, 0.11715, 0.10828, 0.10749, - 0.10607, 0.10544, 0.10077, 0.071170, 0.070950, 0.066103, 0.064815, 0.056509, 0.055751, 0.046457, - 0.042207, 0.037712, 0.031944, 0.028278, 0.020632, 0.019811, 0.019278, 0.017944, 0.017393, 0.016847, - 0.016037, 0.014610, 0.013335, 0.013324, 0.012756, 0.012629, 0.010911, 0.010611, 0.010134, 0.0097525, - 0.0095817, 0.0080242, 0.0075681, 0.0067824, 0.0053294, 0.0051037, 0.0039548, 0.0037919, 0.0036660, 0.0036545, - 0.0034841, 0.0034483, 0.0028551, 0.0026013, 0.0020196, 0.0016284, 0.0011814 -}; +// static double simevals[67] = +// { +// 310.30, 25.815, 23.381, 4.2054, 2.8835, 2.0697, 1.7758, 0.97085, 0.61964, 0.44248, +// 0.31220, 0.31016, 0.30472, 0.29729, 0.18909, 0.17261, 0.15538, 0.11715, 0.10828, 0.10749, +// 0.10607, 0.10544, 0.10077, 0.071170, 0.070950, 0.066103, 0.064815, 0.056509, 0.055751, 0.046457, +// 0.042207, 0.037712, 0.031944, 0.028278, 0.020632, 0.019811, 0.019278, 0.017944, 0.017393, 0.016847, +// 0.016037, 0.014610, 0.013335, 0.013324, 0.012756, 0.012629, 0.010911, 0.010611, 0.010134, 0.0097525, +// 0.0095817, 0.0080242, 0.0075681, 0.0067824, 0.0053294, 0.0051037, 0.0039548, 0.0037919, 0.0036660, 0.0036545, +// 0.0034841, 0.0034483, 0.0028551, 0.0026013, 0.0020196, 0.0016284, 0.0011814 +// }; /* Inverse Gamma 4.37e-03 3.95e-01 */ -double simevals2[67] = -{ - 5.587480e+00, 6.371975e+02, 4.592334e-03, 9.538267e-01, 3.205965e+00, - 2.038829e-03, 2.185746e-02, 8.378868e-02, 1.109509e-01, 4.275248e-03, - 5.172678e-03, 5.678137e-02, 2.392720e-02, 3.698641e+00, 8.570359e-01, - 3.907999e-01, 1.281967e-02, 4.530836e-02, 1.676085e-02, 9.889964e-03, - 2.630001e-03, 1.724972e-03, 1.261005e-01, 1.907326e+00, 5.899872e-02, - 3.226196e-02, 1.931122e-02, 4.912686e-02, 2.037596e-03, 5.260642e-03, - 4.347827e-02, 9.293019e-03, 2.499389e-03, 2.863749e-02, 2.168186e-02, - 1.952129e-02, 1.740615e-03, 2.212923e-02, 5.689231e-02, 1.538157e-01, - 3.438735e-02, 2.428651e-01, 3.179598e+00, 2.826661e-02, 1.165826e+01, - 2.880055e-02, 3.223456e-02, 5.549698e-02, 7.194860e-03, 1.437791e-03, - 1.077393e-01, 8.518754e-03, 4.028714e+01, 3.772714e-02, 2.027990e-03, - 5.645355e-03, 9.494095e-02, 7.281758e-02, 8.104411e-03, 2.027819e-02, - 1.676140e-01, 5.190187e-03, 9.170322e-01, 8.651568e-02, 1.501937e-02, - 1.105640e-02, 6.329355e-03 -}; +// static double simevals2[67] = +// { +// 5.587480e+00, 6.371975e+02, 4.592334e-03, 9.538267e-01, 3.205965e+00, +// 2.038829e-03, 2.185746e-02, 8.378868e-02, 1.109509e-01, 4.275248e-03, +// 5.172678e-03, 5.678137e-02, 2.392720e-02, 3.698641e+00, 8.570359e-01, +// 3.907999e-01, 1.281967e-02, 4.530836e-02, 1.676085e-02, 9.889964e-03, +// 2.630001e-03, 1.724972e-03, 1.261005e-01, 1.907326e+00, 5.899872e-02, +// 3.226196e-02, 1.931122e-02, 4.912686e-02, 2.037596e-03, 5.260642e-03, +// 4.347827e-02, 9.293019e-03, 2.499389e-03, 2.863749e-02, 2.168186e-02, +// 1.952129e-02, 1.740615e-03, 2.212923e-02, 5.689231e-02, 1.538157e-01, +// 3.438735e-02, 2.428651e-01, 3.179598e+00, 2.826661e-02, 1.165826e+01, +// 2.880055e-02, 3.223456e-02, 5.549698e-02, 7.194860e-03, 1.437791e-03, +// 1.077393e-01, 8.518754e-03, 4.028714e+01, 3.772714e-02, 2.027990e-03, +// 5.645355e-03, 9.494095e-02, 7.281758e-02, 8.104411e-03, 2.027819e-02, +// 1.676140e-01, 5.190187e-03, 9.170322e-01, 8.651568e-02, 1.501937e-02, +// 1.105640e-02, 6.329355e-03 +// }; /* double vars[PROTLEN] = */ /* { */ diff -Nru theseus-2.0.6/libdistfit/beta_dist.c theseus-3.0.0/libdistfit/beta_dist.c --- theseus-2.0.6/libdistfit/beta_dist.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/beta_dist.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/beta_dist.h theseus-3.0.0/libdistfit/beta_dist.h --- theseus-2.0.6/libdistfit/beta_dist.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/beta_dist.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/betaprime_dist.c theseus-3.0.0/libdistfit/betaprime_dist.c --- theseus-2.0.6/libdistfit/betaprime_dist.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/betaprime_dist.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/betaprime_dist.h theseus-3.0.0/libdistfit/betaprime_dist.h --- theseus-2.0.6/libdistfit/betaprime_dist.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/betaprime_dist.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/betasym_dist.c theseus-3.0.0/libdistfit/betasym_dist.c --- theseus-2.0.6/libdistfit/betasym_dist.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/betasym_dist.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/betasym_dist.h theseus-3.0.0/libdistfit/betasym_dist.h --- theseus-2.0.6/libdistfit/betasym_dist.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/betasym_dist.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/binomial_dist.c theseus-3.0.0/libdistfit/binomial_dist.c --- theseus-2.0.6/libdistfit/binomial_dist.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/binomial_dist.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -55,10 +55,10 @@ if (p > 0.01) { - int nless = 0; - for (i = 0; i < n; ++i) - if(genrand_real2() < p) - ++nless; + int nless = 0; + for (i = 0; i < n; ++i) + if(genrand_real2() < p) + ++nless; return((double) nless); } @@ -188,13 +188,13 @@ } else { - *psigd = *psigdf = 0.0; - for (i = 0; i < x; ++i) - { - tmp = 1.0 / (n - i); - *psigd += tmp; - *psigdf -= tmp*tmp; - } + *psigd = *psigdf = 0.0; + for (i = 0; i < x; ++i) + { + tmp = 1.0 / (n - i); + *psigd += tmp; + *psigdf -= tmp*tmp; + } } } @@ -242,7 +242,7 @@ double tol = 1e-9; for (i = 0; i < num; ++i) - data[i] = RoundInt(x[i]); + data[i] = round(x[i]); max = findmax(data, num); @@ -251,7 +251,7 @@ Evans, Hastings, and Peacock, p 141. Mean and variance rearranged. */ ave = average(data, num); - var = Variance(data, num, ave); + var = variance(data, num, ave); /* MMEs */ *p = pmme = (ave - var) / ave; @@ -264,8 +264,8 @@ if (*n < max) *n = max+1; - printf(" % 10.6e % 10.6e % 10.6e % 10.6e % 10.6e\n", - *n, *p, fx, dfx, fx/dfx); + printf(" % 10.6e % 10.6e % 10.6e % 10.6e % 10.6e\n", + *n, *p, fx, dfx, fx/dfx); for (i = 0; i < maxit; ++i) { @@ -290,7 +290,7 @@ i, *n, *p, fx, dfx, fx/dfx); } - *n = RoundInt(*n); + *n = round(*n); *p = ave / *n; /* if (i == maxit) */ @@ -325,7 +325,7 @@ int maxit = 100; for (i = 0; i < num; ++i) - data[i] = RoundInt(x[i]); + data[i] = round(x[i]); max = data[findmax(data, num)]; @@ -334,7 +334,7 @@ Evans, Hastings, and Peacock, p 141. Mean and variance rearranged. */ ave = average(data, num); - var = Variance(data, num, ave); + var = variance(data, num, ave); /* MMEs */ *p = pmme = (ave - var) / ave; @@ -350,8 +350,8 @@ *p = ave / *n; -/* printf("\n % 10.6e % 10.6e % 10.6e % 10.6e", *n, *p, ave, max); */ -/* fflush(NULL); */ +/* printf("\n % 10.6e % 10.6e % 10.6e % 10.6e", *n, *p, ave, max); */ +/* fflush(NULL); */ start = (int) *n; @@ -360,8 +360,8 @@ /* */ /* llogL = dist_logL(binomial_lnpdf, ln, lp, data, num); */ /* */ -/* printf("\n logL:% e n:%-3d p:% e", */ -/* llogL, ln, lp); */ +/* printf("\n logL:% e n:%-3d p:% e", */ +/* llogL, ln, lp); */ ln = start; pn = ln + 1; @@ -370,27 +370,27 @@ llogL = dist_logL(binomial_lnpdf, ln, lp, data, num); plogL = dist_logL(binomial_lnpdf, pn, pp, data, num); -/* printf("\n logL:% e n:%-3d p:% e", */ -/* llogL, ln, lp); */ -/* printf("\n logL:% e n:%-3d p:% e", */ -/* plogL, pn, pp); */ -/* fflush(NULL); */ - - if (plogL > llogL) - { - dir = 1; - pslope = 1; +/* printf("\n logL:% e n:%-3d p:% e", */ +/* llogL, ln, lp); */ +/* printf("\n logL:% e n:%-3d p:% e", */ +/* plogL, pn, pp); */ +/* fflush(NULL); */ + + if (plogL > llogL) + { + dir = 1; + pslope = 1; + } + else if (plogL < llogL) + { + dir = -1; + pslope = -1; + } + else + { + dir = 0; + pslope = 0; } - else if (plogL < llogL) - { - dir = -1; - pslope = -1; - } - else - { - dir = 0; - pslope = 0; - } for (i = 0; i < maxit; ++i) { @@ -399,12 +399,12 @@ lp = pp; lslope = pslope; - if (pn == max && dir == -1) - { - *n = pn; - *p = pp; - break; - } + if (pn == max && dir == -1) + { + *n = pn; + *p = pp; + break; + } pn += dir; @@ -412,18 +412,18 @@ pp = ave / pn; plogL = dist_logL(binomial_lnpdf, pn, pp, data, num); - if (plogL > llogL) - pslope = 1 * dir; - else if (plogL < llogL) - pslope = -1 * dir; - else - { - pslope = 0; - } - -/* printf("\n%3d logL:% e n:%-3d p:% e slope:% d -- logL:% e n:%-3d p:% e slope:% d", */ -/* i, plogL, pn, pp, pslope, llogL, ln, lp, lslope); */ -/* fflush(NULL); */ + if (plogL > llogL) + pslope = 1 * dir; + else if (plogL < llogL) + pslope = -1 * dir; + else + { + pslope = 0; + } + +/* printf("\n%3d logL:% e n:%-3d p:% e slope:% d -- logL:% e n:%-3d p:% e slope:% d", */ +/* i, plogL, pn, pp, pslope, llogL, ln, lp, lslope); */ +/* fflush(NULL); */ if (pslope == 0) { diff -Nru theseus-2.0.6/libdistfit/binomial_dist.h theseus-3.0.0/libdistfit/binomial_dist.h --- theseus-2.0.6/libdistfit/binomial_dist.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/binomial_dist.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/cauchy_dist.c theseus-3.0.0/libdistfit/cauchy_dist.c --- theseus-2.0.6/libdistfit/cauchy_dist.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/cauchy_dist.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/cauchy_dist.h theseus-3.0.0/libdistfit/cauchy_dist.h --- theseus-2.0.6/libdistfit/cauchy_dist.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/cauchy_dist.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/chi_dist.c theseus-3.0.0/libdistfit/chi_dist.c --- theseus-2.0.6/libdistfit/chi_dist.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/chi_dist.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/chi_dist.h theseus-3.0.0/libdistfit/chi_dist.h --- theseus-2.0.6/libdistfit/chi_dist.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/chi_dist.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/chisqr_dist.c theseus-3.0.0/libdistfit/chisqr_dist.c --- theseus-2.0.6/libdistfit/chisqr_dist.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/chisqr_dist.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/chisqr_dist.h theseus-3.0.0/libdistfit/chisqr_dist.h --- theseus-2.0.6/libdistfit/chisqr_dist.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/chisqr_dist.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/libdistfit/._chisqrgen_dist.c and /tmp/g2bOMTRwaC/theseus-3.0.0/libdistfit/._chisqrgen_dist.c differ diff -Nru theseus-2.0.6/libdistfit/chisqrgen_dist.c theseus-3.0.0/libdistfit/chisqrgen_dist.c --- theseus-2.0.6/libdistfit/chisqrgen_dist.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/chisqrgen_dist.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/chisqrgen_dist.h theseus-3.0.0/libdistfit/chisqrgen_dist.h --- theseus-2.0.6/libdistfit/chisqrgen_dist.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/chisqrgen_dist.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/libdistfit/._distfit.h and /tmp/g2bOMTRwaC/theseus-3.0.0/libdistfit/._distfit.h differ diff -Nru theseus-2.0.6/libdistfit/distfit.h theseus-3.0.0/libdistfit/distfit.h --- theseus-2.0.6/libdistfit/distfit.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/distfit.h 2014-05-13 16:48:52.000000000 +0000 @@ -88,108 +88,7 @@ EVD_fit(const double *x, const int n, double *rmu, double *rlambda, double *prob); #endif -#ifndef MATUTILS_SEEN -#define MATUTILS_SEEN -typedef struct -{ - int rows; - int cols; - int depth; - double ***matrix; - double **matrixc; - double *matrixd; -} Matrix3D; - -void -MatPrint(double **matrix, const int size); - -void -MatPrintRec(double **matrix, const int n, const int m); - -void -MatDestroy(double ***matrix_ptr); - -double -**MatAlloc(const int rows, const int cols); - -void -MatIntDestroy(int ***matrix_ptr); - -int -**MatIntInit(const int rows, const int cols); - -Matrix3D -*Mat3DInit(const int rows, const int cols, const int depth); - -void -Mat3DDestroy(Matrix3D **matrix3d_ptr); - -double -MatFrobNorm(const double **mat1, const double **mat2, const int row, const int col); - -double -MatDiff(const double **mat1, const double **mat2, const int row, const int col); - -void -MatCpySym(double **matrix2, const double **matrix1, const int dim); - -void -MatCpySymgen(double **matrix2, const double **matrix1, const int rows, const int cols); - -void -MatMultGenUSVOp(double **c, const double **u, double *s, const double **v, - const int udim, const int sdim, const int vdim); - -void -MatMultGen(double **C, const double **A, const int ni, const int nk, const double **B, const int nj); - -void -MatMultGenIp(double **A, const int nk, const int ni, const double **B, const int nj); - -void -MatTransMultGen(double **C, const double **A, const int ni, const int nk, const double **B, const int nj); - -void -MatTransMultGenIp(double **A, const int nk, const int ni, const double **B, const int nj); - -void -MatMultSym(double **C, const double **A, const double **B, const int len); - -void -MatMultSymDiag(double **C, const double **A, const double **B, const int len); - -void -MatTransIp(double **mat, const int dim); - -void -MatTransOp(double **outmat, const double **inmat, const int dim); - -void -cholesky(double **mat, const int dim, double *p); - -double -MatDet(const double **mat, const int dim); - -double -MatGenLnDet(const double **mat, const int dim); - -double -MatSymLnDet(const double **mat, const int dim); - -double -MatTrace(const double **mat, const int dim); - -int -TestZeroOffDiag(const double **mat, const int dim, const double precision); - -int -TestIdentMat(const double **mat, const int dim, const double precision); - -double -FrobDiffNormIdentMat(const double **mat, const int dim); - -#endif /* !MATRIXUTILS_SEEN */ #ifndef REGGAMMA_SEEN #define REGGAMMA_SEEN @@ -1324,9 +1223,6 @@ double -RoundInt(const double x); - -double average(const double *data, const int dim); double Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/libdistfit/._DLTmath.h and /tmp/g2bOMTRwaC/theseus-3.0.0/libdistfit/._DLTmath.h differ diff -Nru theseus-2.0.6/libdistfit/DLTmath.h theseus-3.0.0/libdistfit/DLTmath.h --- theseus-2.0.6/libdistfit/DLTmath.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/DLTmath.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/EVD_dist.c theseus-3.0.0/libdistfit/EVD_dist.c --- theseus-2.0.6/libdistfit/EVD_dist.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/EVD_dist.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -104,6 +104,13 @@ * * Return: (void) */ + /* + Lawless, Jerry F. (1982) +_Statistical Models and Methods for Lifetime Data_ + Wiley. + + For the 2003 book, see corresponding equations in chapter 5.2 + */ void Lawless416(const double *x, const int n, const double lambda, double *ret_f, double *ret_df) diff -Nru theseus-2.0.6/libdistfit/EVD_dist.h theseus-3.0.0/libdistfit/EVD_dist.h --- theseus-2.0.6/libdistfit/EVD_dist.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/EVD_dist.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/exp_dist.c theseus-3.0.0/libdistfit/exp_dist.c --- theseus-2.0.6/libdistfit/exp_dist.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/exp_dist.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/exp_dist.h theseus-3.0.0/libdistfit/exp_dist.h --- theseus-2.0.6/libdistfit/exp_dist.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/exp_dist.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/libdistfit/._gamma_dist.c and /tmp/g2bOMTRwaC/theseus-3.0.0/libdistfit/._gamma_dist.c differ diff -Nru theseus-2.0.6/libdistfit/gamma_dist.c theseus-3.0.0/libdistfit/gamma_dist.c --- theseus-2.0.6/libdistfit/gamma_dist.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/gamma_dist.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -547,7 +547,7 @@ void gamma_fit_no_stats(const double *data, const int num, double *b, double *c) { - double ave, /* var, */logterm, logdata, fx, dfx, fxdfx, guess_b, guess_c; + double ave, /* var, */logterm, logdata, fx, dfx, fxdfx; int i, maxiter = 500; double tol = FLT_EPSILON; @@ -604,8 +604,6 @@ if (*c > FLT_MAX) *c = FLT_MAX; - guess_b = *b; - guess_c = *c; /* Maximum likelihood fit. */ /* Use Newton-Raphson to find ML estimate of c Based on _Statistical Distributions_ 3rd ed. Evans, Hastings, and Peacock, p 41. diff -Nru theseus-2.0.6/libdistfit/gamma_dist.h theseus-3.0.0/libdistfit/gamma_dist.h --- theseus-2.0.6/libdistfit/gamma_dist.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/gamma_dist.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/libdistfit/._halfnormgamma_dist.c and /tmp/g2bOMTRwaC/theseus-3.0.0/libdistfit/._halfnormgamma_dist.c differ diff -Nru theseus-2.0.6/libdistfit/invchisqr_dist.c theseus-3.0.0/libdistfit/invchisqr_dist.c --- theseus-2.0.6/libdistfit/invchisqr_dist.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/invchisqr_dist.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/invchisqr_dist.h theseus-3.0.0/libdistfit/invchisqr_dist.h --- theseus-2.0.6/libdistfit/invchisqr_dist.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/invchisqr_dist.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/libdistfit/._invgamma_dist.c and /tmp/g2bOMTRwaC/theseus-3.0.0/libdistfit/._invgamma_dist.c differ diff -Nru theseus-2.0.6/libdistfit/invgamma_dist.c theseus-3.0.0/libdistfit/invgamma_dist.c --- theseus-2.0.6/libdistfit/invgamma_dist.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/invgamma_dist.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1208,13 +1208,13 @@ *c = *b; } - if (!isfinite(*b) || !isfinite(*c)) - { - printf("\n ERROR04: b(%e) or c(%e) parameter in invgamma_eq_bc_fit() not finite\n", - *b, *c); - fflush(NULL); - exit(EXIT_FAILURE); - } + if (!isfinite(*b) || !isfinite(*c)) + { + printf("\n ERROR04: b(%e) or c(%e) parameter in invgamma_eq_bc_fit() not finite\n", + *b, *c); + fflush(NULL); + exit(EXIT_FAILURE); + } /* Newton-Raphson to find ML estimate of c. We must find the root of: @@ -1233,15 +1233,15 @@ { evalinvgamma_eq_bc_ML(lnave, invave, *c, &fx, &dfx); - if (!isfinite(*c)) - { - printf("\n ERROR05: c(%e) parameter in invgamma_eq_bc_fit() not finite\n", - *c); + if (!isfinite(*c)) + { + printf("\n ERROR05: c(%e) parameter in invgamma_eq_bc_fit() not finite\n", + *c); printf("\n%3d: % 10.6e % 10.6e % 10.6e % 10.6e % 10.6e\n\n", i, *b, *c, fx, dfx, fx/dfx); - fflush(NULL); - exit(EXIT_FAILURE); - } + fflush(NULL); + exit(EXIT_FAILURE); + } fxdfx = fx/dfx; diff -Nru theseus-2.0.6/libdistfit/invgamma_dist.h theseus-3.0.0/libdistfit/invgamma_dist.h --- theseus-2.0.6/libdistfit/invgamma_dist.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/invgamma_dist.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/invgauss_dist.c theseus-3.0.0/libdistfit/invgauss_dist.c --- theseus-2.0.6/libdistfit/invgauss_dist.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/invgauss_dist.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/invgauss_dist.h theseus-3.0.0/libdistfit/invgauss_dist.h --- theseus-2.0.6/libdistfit/invgauss_dist.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/invgauss_dist.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/laplace_dist.c theseus-3.0.0/libdistfit/laplace_dist.c --- theseus-2.0.6/libdistfit/laplace_dist.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/laplace_dist.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/laplace_dist.h theseus-3.0.0/libdistfit/laplace_dist.h --- theseus-2.0.6/libdistfit/laplace_dist.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/laplace_dist.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/logistic_dist.c theseus-3.0.0/libdistfit/logistic_dist.c --- theseus-2.0.6/libdistfit/logistic_dist.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/logistic_dist.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/logistic_dist.h theseus-3.0.0/libdistfit/logistic_dist.h --- theseus-2.0.6/libdistfit/logistic_dist.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/logistic_dist.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/lognormal_dist.c theseus-3.0.0/libdistfit/lognormal_dist.c --- theseus-2.0.6/libdistfit/lognormal_dist.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/lognormal_dist.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -128,7 +128,7 @@ double lognormal_fit(const double *data, const int num, double *zeta, double *sigma, double *prob) { - double ave, avesqr, var, /* m, */ x, theta; + double ave, avesqr, var, x; int i; ave = avesqr = 0.0; @@ -165,9 +165,6 @@ *zeta = ave; *sigma = sqrt(var); - theta = 0.5*(sqrt(1.0 + 4.0*avesqr) - 1.0); - /* printf(" LogNormal theta: %f %e\n", theta, theta*theta + theta - avesqr); */ - /* printf("\n LogNormal logL: %f", lognormal_logL(*zeta, *sigma)); */ return(chi_sqr_adapt(data, num, 0, prob, *zeta, *sigma, lognormal_pdf, lognormal_lnpdf, lognormal_int)); } Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/libdistfit/._lognormal_dist.h and /tmp/g2bOMTRwaC/theseus-3.0.0/libdistfit/._lognormal_dist.h differ diff -Nru theseus-2.0.6/libdistfit/lognormal_dist.h theseus-3.0.0/libdistfit/lognormal_dist.h --- theseus-2.0.6/libdistfit/lognormal_dist.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/lognormal_dist.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/maxwell_dist.c theseus-3.0.0/libdistfit/maxwell_dist.c --- theseus-2.0.6/libdistfit/maxwell_dist.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/maxwell_dist.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/maxwell_dist.h theseus-3.0.0/libdistfit/maxwell_dist.h --- theseus-2.0.6/libdistfit/maxwell_dist.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/maxwell_dist.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/negbinom_dist.c theseus-3.0.0/libdistfit/negbinom_dist.c --- theseus-2.0.6/libdistfit/negbinom_dist.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/negbinom_dist.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -141,13 +141,13 @@ } else { - *psigd = *psigdf = 0.0; - for (j = 1; j <= x; ++j) - { - tmp = 1.0 / (a+x-j); - *psigd += tmp; - *psigdf -= tmp*tmp; - } + *psigd = *psigdf = 0.0; + for (j = 1; j <= x; ++j) + { + tmp = 1.0 / (a+x-j); + *psigd += tmp; + *psigdf -= tmp*tmp; + } } /* *psigd = *psigdf = 0.0; */ @@ -217,14 +217,14 @@ double tol = 1e-8; for (i = 0; i < num; ++i) - data[i] = RoundInt(x[i]); + data[i] = round(x[i]); /* Method of moments initial guess at shape parameters. See _Statistical Distributions_ 3rd ed. Evans, Hastings, and Peacock, p 141. Mean and variance rearranged. */ ave = average(data, num); - var = Variance(data, num, ave); + var = variance(data, num, ave); /* MMEs */ *p = pmme = ave / var; @@ -265,7 +265,7 @@ /* ML estimate of p */ *p = (*a / (*a + ave)); - /* *a = RoundInt(*a); */ + /* *a = round(*a); */ } if (i == maxit) @@ -274,7 +274,7 @@ *p = pmme; } - *a = RoundInt(*a); + *a = round(*a); *p = (*a / (*a + ave)); /* printf("\n\nnegbinom logL %e\n", negbinom_logL(*a, *p)); */ diff -Nru theseus-2.0.6/libdistfit/negbinom_dist.h theseus-3.0.0/libdistfit/negbinom_dist.h --- theseus-2.0.6/libdistfit/negbinom_dist.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/negbinom_dist.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/normal_dist.c theseus-3.0.0/libdistfit/normal_dist.c --- theseus-2.0.6/libdistfit/normal_dist.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/normal_dist.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -37,7 +37,7 @@ double normal_dev(const double mean, const double var, const gsl_rng *r2) { - return (gsl_ran_gaussian(r2, sqrt(var)) + mean); + return (gsl_ran_gaussian_ziggurat(r2, sqrt(var)) + mean); } /* double */ diff -Nru theseus-2.0.6/libdistfit/normal_dist.h theseus-3.0.0/libdistfit/normal_dist.h --- theseus-2.0.6/libdistfit/normal_dist.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/normal_dist.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/pareto_dist.c theseus-3.0.0/libdistfit/pareto_dist.c --- theseus-2.0.6/libdistfit/pareto_dist.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/pareto_dist.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/pareto_dist.h theseus-3.0.0/libdistfit/pareto_dist.h --- theseus-2.0.6/libdistfit/pareto_dist.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/pareto_dist.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/libdistfit/._rayleigh_dist.c and /tmp/g2bOMTRwaC/theseus-3.0.0/libdistfit/._rayleigh_dist.c differ diff -Nru theseus-2.0.6/libdistfit/rayleigh_dist.c theseus-3.0.0/libdistfit/rayleigh_dist.c --- theseus-2.0.6/libdistfit/rayleigh_dist.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/rayleigh_dist.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/libdistfit/._rayleigh_dist.h and /tmp/g2bOMTRwaC/theseus-3.0.0/libdistfit/._rayleigh_dist.h differ diff -Nru theseus-2.0.6/libdistfit/rayleigh_dist.h theseus-3.0.0/libdistfit/rayleigh_dist.h --- theseus-2.0.6/libdistfit/rayleigh_dist.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/rayleigh_dist.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/recinvgauss_dist.c theseus-3.0.0/libdistfit/recinvgauss_dist.c --- theseus-2.0.6/libdistfit/recinvgauss_dist.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/recinvgauss_dist.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/recinvgauss_dist.h theseus-3.0.0/libdistfit/recinvgauss_dist.h --- theseus-2.0.6/libdistfit/recinvgauss_dist.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/recinvgauss_dist.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/rice_dist.c theseus-3.0.0/libdistfit/rice_dist.c --- theseus-2.0.6/libdistfit/rice_dist.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/rice_dist.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -41,8 +41,8 @@ { double mean2 = M_SQRT1_2 * mean; double sigma = sqrt(var); - double var1 = gsl_ran_gaussian(r2, sigma) + mean2; - double var2 = gsl_ran_gaussian(r2, sigma) + mean2; + double var1 = gsl_ran_gaussian_ziggurat(r2, sigma) + mean2; + double var2 = gsl_ran_gaussian_ziggurat(r2, sigma) + mean2; return (sqrt(var1*var1 + var2*var2)); } Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/libdistfit/._rice_dist.h and /tmp/g2bOMTRwaC/theseus-3.0.0/libdistfit/._rice_dist.h differ diff -Nru theseus-2.0.6/libdistfit/rice_dist.h theseus-3.0.0/libdistfit/rice_dist.h --- theseus-2.0.6/libdistfit/rice_dist.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/rice_dist.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/statistics.c theseus-3.0.0/libdistfit/statistics.c --- theseus-2.0.6/libdistfit/statistics.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/statistics.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,7 +22,6 @@ -/_|:|_|_\- */ - #include #include #include @@ -37,21 +36,6 @@ #include "invgauss_dist.h" #include "statistics.h" -#define EPS1 0.001 -#define EPS2 1.0e-8 - - -double -RoundInt(const double x) -{ - double tmp; - - if (modf(x, &tmp) < 0.5) - return(tmp); - else - return(tmp + 1.0); -} - double average(const double *data, const int dim) @@ -67,7 +51,7 @@ double -Variance(const double *data, const int dim, const double mean) +variance(const double *data, const int dim, const double mean) { double v = 0.0, tmpv; int i = dim; @@ -334,9 +318,9 @@ /* fflush(NULL); */ } - expect = 0.0; - for (j = 0; j < nmix; ++j) - expect += mixp[j] * dist_int(array[i * nbins], array[num - 1], p1[j], p2[j]); + expect = 0.0; + for (j = 0; j < nmix; ++j) + expect += mixp[j] * dist_int(array[i * nbins], array[num - 1], p1[j], p2[j]); if (expect != 0.0) { @@ -582,628 +566,3 @@ } -double -kstwo(double *data1, - int n1, - double *data2, - int n2) -{ - int j1 = 1, j2 = 1; - double d, d1, d2, dt, en1, en2, en, fn1 = 0.0, fn2 = 0.0; - double prob; - - quicksort (data1, n1); - quicksort (data2, n2); - en1 = n1; - en2 = n2; - d = 0.0; -/* printf("\n N1 = %d", n1); */ -/* printf("\n N2 = %d", n2); */ - - while (j1 <= n1 && j2 <= n2) - { - if ((d1 = data1[j1]) <= (d2 = data2[j2])) - fn1 = j1++ / en1; - if (d2 <= d1) - fn2 = j2++ / en2; - if ((dt = fabs(fn2 - fn1)) > d) - d = dt; - } - -/* printf("\n Dmax = %f", d); */ - en = en1 * en2 / (en1 + en2); -/* printf("\n Ne = %d", (int) en); */ - prob = 1.0 - KS(d, en); - -/* en = sqrt(en1 * en2 / (en1 + en2)); */ -/* prob = probks((en + 0.12 + 0.11 / en) * d); */ - - return (prob); -} - - -double -ksone(double *data, int n, double (*func)(double)) -{ - int j; - double d, dt, ff, fn, fo = 0.0; - double prob; - double ave, stddev; - - quicksort(data, n); - - ave = 0.0; - for (j = 0; j < n; ++j) - ave += data[j]; - ave /= (double)n; - - stddev = 0.0; - for (j = 0; j < n; ++j) - stddev += mysquare(data[j] - ave); - stddev /= (double)(n - 1); - stddev = sqrt(stddev); - - for (j = 0; j < n; ++j) - data[j] = (data[j] - ave)/stddev; - - d = 0.0; - for (j = 0; j < n; ++j) - { - fn = j / (double) n; - ff = (*func)(data[j]); - /* printf("\n ff = %f", ff); */ - /* printf("\n data[%5d] = %f", j, data[j]); */ - dt = mymaxdbl(fabs(fo - ff), fabs(fn - ff)); - if (dt > d) - d = dt; - fo = fn; - } - - printf(" Dmax = %f\n", d); -/* printf("\n Ne = %d", (int) en); */ - prob = 1.0 - KS(d, n); - - return (prob); -} - - -double -probks(double alam) -{ - int j; - double a2, fac = 2.0, sum = 0.0, term, termbf = 0.0; - - a2 = -2.0 * alam * alam; - for (j = 1; j <= 100; j++) - { - term = fac * exp(a2 * j * j); - sum += term; - if (fabs(term) <= EPS1 * termbf || fabs(term) <= EPS2 * sum) - return (sum); - fac = -fac; - termbf = fabs(term); - } - return (1.0); -} - - -#undef EPS1 -#undef EPS2 - -void -mMultiply(double *A, double *B, double *C, int m) -{ - int i, j, k; - double s; - - for (i = 0; i < m; i++) - for (j = 0; j < m; j++) - { - s = 0.; - for (k = 0; k < m; k++) - s += A[i * m + k] * B[k * m + j]; - C[i * m + j] = s; - } -} - - -/* George Marsaglia, Wai Wan Tsang, and Jingbo Wang - "Evaluating Kolmogorov's Distribution" - Volume 8, 2003, Issue 18 - good to 7 digits *everywhere*, better if you uncomment the line */ -void -mPower(double *A, int eA, double *V, int *eV, int m, int n) -{ - double *B; - int eB, i; - - if (n == 1) - { - for (i = 0; i < m * m; i++) - V[i] = A[i]; - - *eV = eA; - - return; - } - - mPower(A, eA, V, eV, m, n / 2); - B = (double *) malloc((m * m) * sizeof(double)); - mMultiply(V, V, B, m); - eB = 2 * (*eV); - - if (n % 2 == 0) - { - for (i = 0; i < m * m; i++) - V[i] = B[i]; - - *eV = eB; - } - else - { - mMultiply(A, B, V, m); - *eV = eA + eB; - } - - if (V[(m / 2) * m + (m / 2)] > 1e140) - { - for (i = 0; i < m * m; i++) - V[i] = V[i] * 1e-140; - - *eV += 140; - } - - free(B); -} - - -double -KS(double d, int n) -{ - int k, m, i, j, g, eH, eQ; - double h, s, *H, *Q; - - /* OMIT NEXT TWO LINEs IF YOU REQUIRE > 7 DIGIT ACCURACY IN THE RIGHT TAIL */ - s = d * d * n; - - if (s > 7.24 || (s > 3.76 && n > 99)) - return (1 - 2 * exp(-(2.000071 + .331 / sqrt(n) + 1.409 / n) * s)); - - k = (int) (n * d) + 1; - m = 2 * k - 1; - h = k - n * d; - H = (double *) malloc((m * m) * sizeof(double)); - Q = (double *) malloc((m * m) * sizeof(double)); - - for (i = 0; i < m; i++) - for (j = 0; j < m; j++) - if (i - j + 1 < 0) - H[i * m + j] = 0; - else - H[i * m + j] = 1; - - for (i = 0; i < m; i++) - { - H[i * m] -= pow(h, i + 1); - H[(m - 1) * m + i] -= pow(h, (m - i)); - } - - H[(m - 1) * m] += (2 * h - 1 > 0 ? pow(2 * h - 1, m) : 0); - - for (i = 0; i < m; i++) - for (j = 0; j < m; j++) - if (i - j + 1 > 0) - for (g = 1; g <= i - j + 1; g++) - H[i * m + j] /= g; - - eH = 0; - mPower(H, eH, Q, &eQ, m, n); - s = Q[(k - 1) * m + k - 1]; - - for (i = 1; i <= n; i++) - { - s = s * i / n; - - if (s < 1e-140) - { - s *= 1e140; - eQ -= 140; - } - } - - s *= pow(10., eQ); - - free(H); - free(Q); - - return s; -} - - -double -nlogn(double n) -{ - return(n * log(n)); -} - - -double -Factorial(long unsigned int N) -{ - int i; - double F; - - if (N == 1 || N == 0) - return(1.0); - - F = 1.0; - for (i = 1; i <= N; ++i) - F *= (double) i; - - return (F); -} - - -double -Gosper(long unsigned int N) -{ - double F; - double dblN = (double)N; - - if (N == 1 || N == 0) - return(1.0); - - F = sqrt((2.0*dblN + 1.0/3.0) * MY_PI); - F *= pow((dblN / MY_E), dblN); - - return(F); -} - - -double -Stirling(long unsigned int N) -{ - double F; - double dblN = (double)N; - double series; - double sqrN, cubeN, pow4N, pow5N; - - if (N == 1 || N == 0) - return(1.0); - - sqrN = mysquare(dblN); - cubeN = sqrN*dblN; - pow4N = mysquare(sqrN); - pow5N = pow4N*dblN; - - F = sqrt(2.0 * MY_PI * dblN); - F *= pow((dblN / MY_E), dblN); - series = 1.0 + 1.0/(12.0 * dblN); - series += 1.0/(288.0 * sqrN); - series -= 139.0/(51840.0 * cubeN); - series -= 571.0/(2488320.0 * pow4N); - series += 163879.0/(209018880.0 * pow5N); - F *= series; - - return(F); -} - - -/* http://mathworld.wolfram.com/StirlingsSeries.html */ -double -LnFactorial(long unsigned int N) -{ - double S, cubeN, pow5N; - - if (N == 1 || N == 0) - return(0.0); - - cubeN = mycube(N); - pow5N = mycube(N) * mysquare(N); - /*pow7N = pow5N * mysquare(N);*/ - - S = log(2.0 * MY_PI)/2.0; - S += log(N)/2.0; - S += (nlogn(N) - N); - S += (1.0 / (12.0 * N)); - S += (1.0 / (360.0 * cubeN)); - S += (1.0 / (1260.0 * pow5N)); - /*S -= (1.0 / (1680.0 * pow7N)); this term makes no difference */ - - return (S); -} - - -double -LnFactorialPlus(long unsigned int N) -{ - double S, cubeN, pow5N, pow7N; - double dblN = (double)N; - - if (N == 1 || N == 0 || N == 1) - return(0.0); - - cubeN = mycube(dblN); - pow5N = mycube(dblN) * mysquare(dblN); - pow7N = pow5N * mysquare(dblN); - - S = log(2.0 * MY_PI)/2.0; - S += log(dblN)/2.0; - S += (nlogn(dblN) - dblN); - S += (1.0 / (12.0 * dblN)); - S += (1.0 / (360.0 * cubeN)); - S += (1.0 / (1260.0 * pow5N)); - S -= (1.0 / (1680.0 * pow7N)); - - return (S); -} - - -double -DblFactorialNaive(long unsigned int N) -{ - int i; - double DF; - - if (N == -1 || N == 0 || N == 1) - return(1.0); - - DF = 1.0; - for (i = N; i > 0; i=i - 2) - DF *= (double) i; - - return (DF); -} - - -double -DblFactorial(long unsigned int N) -{ - double DF, dblN = (double) N; - double twoPI = 2.0 / MY_PI; - - if (N == -1 || N == 0 || N == 1) - return(1.0); - - DF = pow(twoPI, (1.0 - cos(MY_PI * dblN))/4.0); - DF *= sqrt(MY_PI); - DF *= pow(dblN, (double)(N+1)/2.0); - DF *= exp(-dblN/2.0); - DF *= (1.0 + 1.0/(6.0 * dblN) + 1.0/(72.0 * mysquare(dblN)) - 139.0/(6480.0 * mycube(dblN))); - - return(DF); -} - - -/* http://functions.wolfram.com/GammaBetaErf/Factorial2/06/02/0002/ */ -double -LnDblFactorial(long unsigned int N) -{ - double DF, dblN = (double) N; - double twoPI = 2.0 / MY_PI; - double sqrN = mysquare(dblN); - double cubeN = sqrN*dblN; - double quadN = cubeN * dblN; - /* double pentN = quadN * dblN; */ - double series; - - if (N == -1 || N == 0 || N == 1) - return(0.0); - - DF = log(twoPI) * ((1.0 - cos(MY_PI*N))/4.0); - DF += log(MY_PI)/2.0; - DF += log(dblN) * ((double)(N+1)/2.0); - DF -= N/2.0; - series = 1.0 + 1.0/(6.0 * dblN); - series += 1.0 / (72.0 * sqrN); - series -= 139.0 / (6480.0 * cubeN); - series -= 571.0 / (155520.0 * quadN); - /*series -= 163879.0 / (6531840.0 * pentN);*/ - DF += log(series); - return(DF); -} - - -/************************************************************************ -* Combination * -* * -* This defines an iterative combination without needing three * -* factorials. * -* * -* The number of combinations is returned for N objects taken M at a * -* time, without regard for order. * -************************************************************************/ -double -Combination(double N, double M) -{ - int i; - double C; - double top_term = 1.0; - - if (N == M) - return (1.0); - else if ((N/M) < 2.0) - { - for (i = (M + 1.0); i <= N; ++i) - top_term *= (double) i; - C = top_term / Factorial(N - M); - } - else - { - for (i = (N - M + 1.0); i <= N; ++i) - top_term *= (double) i; - C = top_term / Factorial(M); - } - return (C); -} - - -double -MultinomCoeff(long unsigned int tries, long unsigned int *wins, int nclasses) -{ - int i; - double denom, mc; - - denom = 0.0; - for (i = 0; i < nclasses; ++i) - { - if (wins[i] < 170) - denom += log(Factorial(wins[i])); - else - denom += LnFactorialPlus(wins[i]); - } - - if (tries < 170) - mc = log(Factorial(tries)); - else - mc = LnFactorialPlus(tries); - - mc = exp(mc-denom); - - return (mc); -} - - -double -LnMultinomCoeff(long unsigned int tries, long unsigned int *wins, int nclasses) -{ - int i, tmp; - double denom, mc; - - tmp = 0; - for (i = 0; i < nclasses; ++i) - tmp += wins[i]; - - if (tmp != tries) - { - printf("\n\n %3d %3d \n\n", tmp, (int) tries); - exit(EXIT_FAILURE); - } - - denom = 0.0; - for (i = 0; i < nclasses; ++i) - { - if (wins[i] < 170) - denom += log(Factorial(wins[i])); - else - denom += LnFactorialPlus(wins[i]); - } - - if (tries < 170) - mc = log(Factorial(tries)); - else - mc = LnFactorialPlus(tries); - - mc -= denom; - - return (mc); -} - - -double -LnCombinationStirling(double N, double M) -{ - double C; - double lnfactN, lnfactM, lnfactNM; - - if (N < 170) - lnfactN = log(Factorial(N)); - else - lnfactN = LnFactorialPlus(N); - - if (M < 170) - lnfactM = log(Factorial(M)); - else - lnfactM = LnFactorialPlus(M); - - if (N-M < 170) - lnfactNM = log(Factorial(N-M)); - else - lnfactNM = LnFactorialPlus(N-M); - - C = lnfactN - lnfactM - lnfactNM; - - return (C); -} - - -/************************************************************************ -* Binomial_P * -* * -* Calculates the specific binomial probability. * -* * -* Takes N number of tries, M number of wins, for an individual * -* probability p of success for each win. * -* * -************************************************************************/ -double -Binomial_P(long unsigned int n, long unsigned int m, double p) -{ - double binom_p; - double t1, t2; - - t1 = Combination(n, m); - t2 = (double) pow((double) p, (double) m) * (double) pow((double) (1 - p), (double) (n - m)); - binom_p = t1 * t2; - - return (binom_p); -} - - -/* This works */ -double -Multinomial_P(long unsigned int tries, long unsigned int *wins, double *p, int nclasses) -{ - int i; - double multinom_p; - double t1, t2; - - int tmp = 0; - for (i = 0; i < nclasses; ++i) - tmp += wins[i]; - - if (tmp != tries) - printf("\n Multinomial_P BONK! %3d %3d \n", tmp, (int)tries); - - t1 = LnMultinomCoeff(tries, wins, nclasses); - - t2 = 0.0; - for (i = 0; i < nclasses; ++i) - { - if (p[i] == 0.0 || wins[i] == 0.0) - continue; - else - t2 += ((double) wins[i] * log(p[i])); - } - - multinom_p = t1 + t2; - - return (exp(multinom_p)); -} - - -/************************************************************************ -* Binomial_sum * -* * -* Sums the individual binomial probabilities for m wins **or more** * -* * -************************************************************************/ -double -Binomial_sum(long unsigned int n, long unsigned int m, double p) -{ - double bsum; - int i; - - bsum = 0.0; - for (i = m; i <= n; ++i) - bsum += Binomial_P(n, i, p); - - return (bsum); -} - - -double -student_t(double t, double df) -{ - return(InBeta((0.5 * df), 0.5, (df / (df + mysquare(t))))); -} diff -Nru theseus-2.0.6/libdistfit/statistics.h theseus-3.0.0/libdistfit/statistics.h --- theseus-2.0.6/libdistfit/statistics.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/statistics.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -28,13 +28,10 @@ double -RoundInt(const double x); - -double average(const double *data, const int dim); double -Variance(const double *data, const int dim, const double mean); +variance(const double *data, const int dim, const double mean); double VarVariance(const double *data, const int ddim, const int sdim); @@ -82,76 +79,4 @@ int dblcmp_rev(const void *dbl1, const void *dbl2); -double -normal_dist(double x); - -double -probks(double alam); - -double -kstwo(double *data1, int n1, double *data2, int n2); - -double -ksone(double *data, int n, double (*func)(double)); - -double -probks_dw(double ksd, int em); - -void -mMultiply(double *A, double *B, double *C, int m); - -void -mPower(double *A, int eA, double *V, int *eV, int m, int n); - -double -KS(double d, int n); - -double -F_prob(int dn, int dd, double fr); - -double -L504(int a, double f, int b, int iv); - -double -L401(int a, double f, int b, int iv); - -double -nlogn(double n); - -double -Factorial(long unsigned int N); - -double -Combination(double N, double M); - -double -LnCombinationStirling(double N, double M); - -double -MultinomCoeff(long unsigned int tries, long unsigned int *wins, int nclasses); - -double -LnMultinomCoeff(long unsigned int tries, long unsigned int *wins, int nclasses); - -double -Binomial_P(long unsigned int n, long unsigned int m, double p); - -double -Multinomial_P(long unsigned int tries, long unsigned int *wins, double *p, int classes); - -double -Multinomial_UpTail(long unsigned int tries, long unsigned int *wins, double *p, int classes); - -void -insort_multinom(double *rank, double *prob, long unsigned int *wins, int nclasses); - -double -Binomial_sum(long unsigned int n, long unsigned int m, double p); - -double -student_t(double t, double df); - -double -stdnormal(void); - #endif diff -Nru theseus-2.0.6/libdistfit/uniform_dist.c theseus-3.0.0/libdistfit/uniform_dist.c --- theseus-2.0.6/libdistfit/uniform_dist.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/uniform_dist.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/uniform_dist.h theseus-3.0.0/libdistfit/uniform_dist.h --- theseus-2.0.6/libdistfit/uniform_dist.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/uniform_dist.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/vonmises_dist.c theseus-3.0.0/libdistfit/vonmises_dist.c --- theseus-2.0.6/libdistfit/vonmises_dist.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/vonmises_dist.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2008 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -47,8 +47,8 @@ */ -/* -static double +/* +static double pseudo_vonmises_target(const double a, const double b, const double x) { return(a * cos(x) + b * cos(x)); @@ -57,8 +57,8 @@ -/* -static double +/* +static double mardia_gadsden_target(const double a, const double b, const double x) { return((a * cos(x) + b * cos(x)) * cos(x)); @@ -67,7 +67,7 @@ -/* +/* double pseudo_vonmises_met(const double a, const double b, const double x) { @@ -89,7 +89,7 @@ -/* +/* double mardia_gadsden_met(const double a, const double b, const double x) { @@ -124,7 +124,7 @@ } -/* +/* double vonmises_dev2(const double k) { @@ -147,7 +147,7 @@ z = cos(MY_PI * u); w = (1.0 + s*z) / (s + z); y = k * (s - w); - + if (w * (2.0 - w) - v > DBL_MIN) accept = 1; else if (log(w/v) + 1.0 > DBL_MIN) @@ -164,6 +164,67 @@ */ +double +vonmises_dev2(const double mean, const double k, const gsl_rng *r2) +{ + double result = 0.0; + + double a = 1.0 + sqrt(1 + 4.0 * (k * k)); + double b = (a - sqrt(2.0 * a))/(2.0 * k); + double r = (1.0 + b * b)/(2.0 * b); + + while (1) + { + double U1 = gsl_ran_flat(r2, 0.0, 1.0); + double z = cos(MY_PI * U1); + double f = (1.0 + r * z)/(r + z); + double c = k * (r - f); + double U2 = gsl_ran_flat(r2, 0.0, 1.0); + + if (c * (2.0 - c) - U2 > 0.0) + { + double U3 = gsl_ran_flat(r2, 0.0, 1.0); + double sign = 0.0; + + if (U3 - 0.5 < 0.0) + sign = -1.0; + + if (U3 - 0.5 > 0.0) + sign = 1.0; + + result = sign * acos(f) + mean; + + while (result >= 2.0 * MY_PI) + result -= 2.0 * MY_PI; + + break; + } + else + { + if(log(c/U2) + 1.0 - c >= 0.0) + { + double U3 = gsl_ran_flat(r2, 0.0, 1.0); + double sign = 0.0; + + if (U3 - 0.5 < 0.0) + sign = -1.0; + + if (U3 - 0.5 > 0.0) + sign = 1.0; + + result = sign * acos(f) + mean; + + while (result >= 2.0 * MY_PI) + result -= 2.0 * MY_PI; + + break; + } + } + } + + return(result); +} + /* D. J. Best and N. I. Fisher (1979) "Efficient simulation of the von Mises distribution." @@ -173,6 +234,9 @@ { double z, f, c, t, p, r; + if (b < DBL_EPSILON) + return(a); + t = 1.0 + sqrt(1.0 + 4.0*b*b); p = 0.5*(t - sqrt(2.0*t))/b; r = 0.5*(1.0 + p*p)/p; @@ -272,7 +336,7 @@ F1' = I_0(b) [\Sum{cos(x-a)} - N] - (N/2) I_1(b) */ void -evalvonmisesML(const double cosdif, const double b, const int num, +evalvonmisesML(const double cosdif, const double b, const int num, double *fx, double *dfx) { double i0b = BesselI0(b); @@ -284,7 +348,7 @@ void -evalvonmisesML_EHP(const double sincosterm, const double b, const int num, +evalvonmisesML_EHP(const double sincosterm, const double b, const int num, double *fx, double *dfx) { double i0b = BesselI0(b); @@ -296,7 +360,7 @@ static void -evalvonmisesMLmu(const double *data, const double a, const int num, +evalvonmisesMLmu(const double *data, const double a, const int num, double *fx, double *dfx) { double sinsum, cossum; @@ -357,19 +421,19 @@ } /* For a (mu), well, we use my own derivation that I've never seen - anywhere but it seems straight-forward to me. + anywhere but it seems straight-forward to me. Evans, Hastings, and Peacock 2000 (_Statistical Distributions_) give the MLE as: - + \hat{mu} = arctan[\Sum{sin x_i}/\Sum{cos x_i}] - + but this has problems, it gives a negative answer for real mu = 3, 4 etc. Part of the problem is fixed by using atan2, which computes the correct quadrant, but it still doesn't always work. So, I simply did the standard MLE derivation -- - take the log of the PDF, take the first derivative, set it + take the log of the PDF, take the first derivative, set it to zero, and you get: - + \Sum{sin(x - mu)} = 0 which can't be solved analytically, but can easily be solved Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/libdistfit/._vonmises_dist.h and /tmp/g2bOMTRwaC/theseus-3.0.0/libdistfit/._vonmises_dist.h differ diff -Nru theseus-2.0.6/libdistfit/vonmises_dist.h theseus-3.0.0/libdistfit/vonmises_dist.h --- theseus-2.0.6/libdistfit/vonmises_dist.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/vonmises_dist.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2008 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/libdistfit/._weibull_dist.c and /tmp/g2bOMTRwaC/theseus-3.0.0/libdistfit/._weibull_dist.c differ diff -Nru theseus-2.0.6/libdistfit/weibull_dist.c theseus-3.0.0/libdistfit/weibull_dist.c --- theseus-2.0.6/libdistfit/weibull_dist.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/weibull_dist.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdistfit/weibull_dist.h theseus-3.0.0/libdistfit/weibull_dist.h --- theseus-2.0.6/libdistfit/weibull_dist.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdistfit/weibull_dist.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/libdltmath/._DLTmath.h and /tmp/g2bOMTRwaC/theseus-3.0.0/libdltmath/._DLTmath.h differ diff -Nru theseus-2.0.6/libdltmath/DLTmath.h theseus-3.0.0/libdltmath/DLTmath.h --- theseus-2.0.6/libdltmath/DLTmath.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdltmath/DLTmath.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -59,6 +59,8 @@ #define POW4(a) ((a)*(a)*(a)*(a)) #define SIGN(a,b) ((b) >= 0.0 ? fabs(a) : -fabs(a)) +#endif + #ifndef MAT3UTILS_SEEN #define MAT3UTILS_SEEN @@ -71,6 +73,9 @@ int Mat3Eq(const double **matrix1, const double **matrix2, const double precision); +double +Mat3FrobDiff(const double **matrix1, const double **matrix2); + int Mat3FrobEq(const double **matrix1, const double **matrix2, const double precision); @@ -134,16 +139,41 @@ int VerifyRotMat(double **rotmat, double tol); -double -**ClosestRotMat(double **inmat); - void ClosestRotMatIp(double **inmat); double RotMat2AxisAngle(double **rot, double *v); -#endif /* !MATRIXUTILS_SEEN */ +double +RotMat2AxisAngleQuat(double **rot, double *v); + +#endif +/* + Theseus - maximum likelihood superpositioning of macromolecular structures + + Copyright (C) 2004-2014 Douglas L. Theobald + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the: + + Free Software Foundation, Inc., + 59 Temple Place, Suite 330, + Boston, MA 02111-1307 USA + + -/_|:|_|_\- +*/ + #ifndef MAT4UTILS_SEEN #define MAT4UTILS_SEEN @@ -159,7 +189,32 @@ void Mat4TransposeOp(double **matrix2, const double **matrix1); -#endif /* !MATRIXUTILS_SEEN */ +#endif /* !MATRIXUTILS_SEEN */ +/* + Theseus - maximum likelihood superpositioning of macromolecular structures + + Copyright (C) 2004-2014 Douglas L. Theobald + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the: + + Free Software Foundation, Inc., + 59 Temple Place, Suite 330, + Boston, MA 02111-1307 USA + + -/_|:|_|_\- +*/ + #ifndef MATUTILS_SEEN #define MATUTILS_SEEN @@ -186,7 +241,7 @@ **MatAlloc(const int rows, const int cols); void -MatIntDestroy(int ***matrix_ptr); +MatIntDestroy(int ***matrix); int **MatIntInit(const int rows, const int cols); @@ -207,7 +262,7 @@ MatCpySym(double **matrix2, const double **matrix1, const int dim); void -MatCpySymgen(double **matrix2, const double **matrix1, const int rows, const int cols); +MatCpyGen(double **matrix2, const double **matrix1, const int rows, const int cols); void MatMultGenUSVOp(double **c, const double **u, double *s, const double **v, @@ -237,7 +292,7 @@ void MatTransOp(double **outmat, const double **inmat, const int dim); -void +void cholesky(double **mat, const int dim, double *p); double @@ -261,7 +316,64 @@ double FrobDiffNormIdentMat(const double **mat, const int dim); -#endif /* !MATRIXUTILS_SEEN */ +#endif /* !MATRIXUTILS_SEEN */ +/* + Theseus - maximum likelihood superpositioning of macromolecular structures + + Copyright (C) 2004-2014 Douglas L. Theobald + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the: + + Free Software Foundation, Inc., + 59 Temple Place, Suite 330, + Boston, MA 02111-1307 USA + + -/_|:|_|_\- +*/ + +#ifndef MULTIVARGAMMA_SEEN +#define MULTIVARGAMMA_SEEN + +double +MultivarLnGamma(const int k, const double a); + +#endif +/* + Theseus - maximum likelihood superpositioning of macromolecular structures + + Copyright (C) 2004-2014 Douglas L. Theobald + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the: + + Free Software Foundation, Inc., + 59 Temple Place, Suite 330, + Boston, MA 02111-1307 USA + + -/_|:|_|_\- +*/ + #ifndef REGGAMMA_SEEN #define REGGAMMA_SEEN @@ -297,918 +409,94 @@ InGammaP(double a, double x); #endif -#ifndef VECUTILS_SEEN -#define VECUTILS_SEEN - -void -VecPrint(double *vec, const int size); - -void -InvRotVec(double *newvec, double *vec, double **rotmat); - -void -RotVec(double *newvec, double *vec, double **rotmat); - -int -VecEq(const double *vec1, const double *vec2, const int len, const double tol); - -void -RevVecIp(double *vec, const int len); - -double -VecSmallest(double *vec, const int len); - -double -VecBiggest(double *vec, const int len); - -#endif /* !MATRIXUTILS_SEEN */ -#ifndef EIGEN_SEEN -#define EIGEN_SEEN - -double -*NormalizeVec(double *vect); - -void -EigenSort(double **eigenvectors, double *eigenvalues); - -void -EigenSort3(double **eigenvectors, double *eigenvalues, double *tmpevec); - -void -EigenSort3b(double **eigenvectors, double *eigenvalues); - -void -EigenSort4(double **eigenvectors, double *eigenvalues); +/* + Theseus - maximum likelihood superpositioning of macromolecular structures -void -EvalSort4(double *eigenvalues); + Copyright (C) 2004-2014 Douglas L. Theobald -void -CopyEvec(double *evec1, double *evec2, int length); + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. -void -SwapEvec(double *evec1, double *evec2, int length); + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. -void -Swap3Evec(double *evec1, double *evec2, double *tmpevec); + You should have received a copy of the GNU General Public License + along with this program; if not, write to the: -void -eigen3(double **z, double *eigenval); + Free Software Foundation, Inc., + 59 Temple Place, Suite 330, + Boston, MA 02111-1307 USA -void -Eigen4Min(double **eigenvectors, double *eigenvalues); + -/_|:|_|_\- +*/ -void -eigen4(double **Q, double *eigenval); +#ifndef VECUTILS_SEEN +#define VECUTILS_SEEN void -eigenval4(double **Q, double *eigenval); - -double -pythag(double a, double b); +VecPrint(double *vec, const int size); void -tred24(double **a, double *d, double *e); +InvRotVec(double *newvec, double *vec, double **rotmat); void -tred24vals(double **a, double *d, double *e); +RotVec(double *newvec, double *vec, double **rotmat); void -tqli4(double *d, double *e, double **z); +InvRotVecAdd(double *newvec, double *vec, double **rotmat); void -tqli4vals(double *d, double *e, double **z); +RotVecAdd(double *newvec, double *vec, double **rotmat); int -jacobi3(double **a, double *d, double **v, double tol); - -int -jacobi3_cyc(double **a, double *d, double **v, double tol); - -void -jacobi4(double **a, double *d, double **v); - -void -rotate(double **a, double s, double tau, - int i, int j, int k, int l); - -double -InvSymEigenOp(double **invmat, const double **mat, int n, - double *evals, double **evecs, const double tol); - -void -eigensym(const double **mat, double *evals, double **evecs, int n); - -void -eigensym2(const double **mat, double *evals, double **evecs, int n, double *work); - -void -eigenvalsym(const double **mat, double *evals, double **evecs, int n); - -void -eigenvalsym2(const double **mat, double *evals, double **evecs, int n, double *work); - -void -eigengen(const double **mat, double *evals, double **evecs, int n); - -void -transevecs(double **mat, int len); - -void -eigen_quicksort(double *evals, double **evecs, int len); +VecEq(const double *vec1, const double *vec2, const int len, const double tol); void -EigenReconSym(double **mat, const double **evecs, const double *evals, const int n); - -int -SymbolicEigen4 (double **mat, double *evals); - -#endif - -#ifndef INTEGRATE_SEEN -#define INTEGRATE_SEEN - -double -trapzd(double (*func)(double, double, double), - double param1, double param2, double a, double b, int n); +RevVecIp(double *vec, const int len); double -integrate_qsimp(double (*func)(double, double, double), - double param1, double param2, double a, double b); +VecSmallest(double *vec, const int len); double -integrate_romberg(double (*f)(double a, double p1, double p2), - double p1, double p2, double a, double b); - -#endif - - -void -matinv(double **a, double **outmat, int N, int *indx); - -void -lubksb(double **a, int n, int *indx, double b[]); - -void -ludcmp(double **a, int n, int *indx, double *d); -#ifndef MYRANDOM_SEEN -#define MYRANDOM_SEEN - -void -init_genrand(unsigned long s); +VecBiggest(double *vec, const int len); -void -init_by_array(unsigned long init_key[], unsigned long key_length); - -unsigned long -genrand_int32(void); - -long -genrand_int31(void); - -double -genrand_real1(void); - -double -genrand_real2(void); - -double -genrand_real3(void); - -double -genrand_res53(void); - -double -expondev(void); - -double -gaussdev(void); - -double -Normal(void); - -void -shuffle(int *a, int n); - -void -shufflef(double *a, int n); - -#endif -#ifndef ALGO_BLAST_CORE__NCBIMATH -#define ALGO_BLAST_CORE__NCBIMATH - -/* $Id: ncbi_math.h,v 1.11 2005/03/10 16:12:59 papadopo Exp $ - * =========================================================================== - * - * PUBLIC DOMAIN NOTICE - * National Center for Biotechnology Information - * - * This software/database is a "United States Government Work" under the - * terms of the United States Copyright Act. It was written as part of - * the author's official duties as a United States Government employee and - * thus cannot be copyrighted. This software/database is freely available - * to the public for use. The National Library of Medicine and the U.S. - * Government have not placed any restriction on its use or reproduction. - * - * Although all reasonable efforts have been taken to ensure the accuracy - * and reliability of the software and data, the NLM and the U.S. - * Government do not and cannot warrant the performance or results that - * may be obtained by using this software or data. The NLM and the U.S. - * Government disclaim all warranties, express or implied, including - * warranties of performance, merchantability or fitness for any particular - * purpose. - * - * Please cite the author in any work or product based on this material. - * - * =========================================================================== - * - * Authors: Gish, Kans, Ostell, Schuler - * - * Version Creation Date: 10/23/91 - * - * ========================================================================== - */ - -/** @file ncbi_math.h - * Prototypes for portable math library (ported from C Toolkit) - */ - -/*#include -#include */ - -double -s_PolyGamma(double x, int order); - -/** Natural logarithm with shifted input - * @param x input operand (x > -1) - * @return log(x+1) - */ - -double BLAST_Log1p (double x); - -/** Exponentional with base e - * @param x input operand - * @return exp(x) - 1 - */ - -double BLAST_Expm1 (double x); - -/** Factorial function - * @param n input operand - * @return (double)(1 * 2 * 3 * ... * n) - */ - -double BLAST_Factorial(int n); - -/** Logarithm of the factorial - * @param x input operand - * @return log(1 * 2 * 3 * ... * x) - */ - -double BLAST_LnFactorial (double x); - -/** log(gamma(n)), integral n - * @param n input operand - * @return log(1 * 2 * 3 * ... (n-1)) - */ - -double BLAST_LnGammaInt (int n); - -/** Romberg numerical integrator - * @param f Pointer to the function to integrate; the first argument - * is the variable to integrate over, the second is a pointer - * to a list of additional arguments that f may need - * @param fargs Pointer to an array of extra arguments or parameters - * needed to compute the function to be integrated. None - * of the items in this list may vary over the region - * of integration - * @param p Left-hand endpoint of the integration interval - * @param q Right-hand endpoint of the integration interval - * (q is assumed > p) - * @param eps The relative error tolerance that indicates convergence - * @param epsit The number of consecutive diagonal entries in the - * Romberg array whose relative difference must be less than - * eps before convergence is assumed. This is presently - * limited to 1, 2, or 3 - * @param itmin The minimum number of diagnonal Romberg entries that - * will be computed - * @return The computed integral of f() between p and q - */ - -double BLAST_RombergIntegrate (double (*f) (double, void*), - void* fargs, double p, double q, - double eps, int epsit, int itmin); - -/** Greatest common divisor - * @param a First operand (any integer) - * @param b Second operand (any integer) - * @return The largest integer that evenly divides a and b - */ - -int BLAST_Gcd (int a, int b); - -/** Divide 3 numbers by their greatest common divisor - * @param a First integer [in] [out] - * @param b Second integer [in] [out] - * @param c Third integer [in] [out] - * @return The greatest common divisor - */ - -int BLAST_Gdb3(int* a, int* b, int* c); - -/** Nearest integer - * @param x Input to round (rounded value must be representable - * as a 32-bit signed integer) - * @return floor(x + 0.5); - */ - -long BLAST_Nint (double x); - -/** Integral power of x - * @param x floating-point base of the exponential - * @param n (integer) exponent - * @return x multiplied by itself n times - */ - -double BLAST_Powi (double x, int n); - -/** Number of derivatives of log(x) to carry in gamma-related - computations */ -#define LOGDERIV_ORDER_MAX 4 -/** Number of derivatives of polygamma(x) to carry in gamma-related - computations for non-integral values of x */ -#define POLYGAMMA_ORDER_MAX LOGDERIV_ORDER_MAX - -/** value of pi is only used in gamma-related computations */ -#define NCBIMATH_PI 3.1415926535897932384626433832795 - -/** Natural log(2) */ -#define NCBIMATH_LN2 0.69314718055994530941723212145818 -/** Natural log(PI) */ -#define NCBIMATH_LNPI 1.1447298858494001741434273513531 - -#ifdef __cplusplus -} -#endif - -/* - * =========================================================================== - * - * $Log: ncbi_math.h,v $ - * Revision 1.11 2005/03/10 16:12:59 papadopo - * doxygen fixes - * - * Revision 1.10 2004/11/18 21:22:10 dondosha - * Added BLAST_Gdb3, used in greedy alignment; removed extern and added to all prototypes - * - * Revision 1.9 2004/11/02 13:54:33 papadopo - * small doxygen fixes - * - * Revision 1.8 2004/11/01 16:37:57 papadopo - * Add doxygen tags, remove unused constants - * - * Revision 1.7 2004/05/19 14:52:01 camacho - * 1. Added doxygen tags to enable doxygen processing of algo/blast/core - * 2. Standardized copyright, CVS $Id string, $Log and rcsid formatting and i - * location - * 3. Added use of @todo doxygen keyword - * - * Revision 1.6 2003/09/26 20:38:12 dondosha - * Returned prototype for the factorial function (BLAST_Factorial) - * - * Revision 1.5 2003/09/26 19:02:31 madden - * Prefix ncbimath functions with BLAST_ - * - * Revision 1.4 2003/09/10 21:35:20 dondosha - * Removed Nlm_ prefix from math functions - * - * Revision 1.3 2003/08/25 22:30:24 dondosha - * Added LnGammaInt definition and Factorial prototype - * - * Revision 1.2 2003/08/11 14:57:16 dondosha - * Added algo/blast/core path to all #included headers - * - * Revision 1.1 2003/08/02 16:32:11 camacho - * Moved ncbimath.h -> ncbi_math.h - * - * Revision 1.2 2003/08/01 21:18:48 dondosha - * Correction of a #include - * - * Revision 1.1 2003/08/01 21:03:40 madden - * Cleaned up version of file for C++ toolkit - * - * =========================================================================== - */ - - -#endif /* !ALGO_BLAST_CORE__NCBIMATH */ - -#ifndef QUICKSORT_SEEN -#define QUICKSORT_SEEN - -/*--------------- quicksort.h --------------*/ -/* - * The key TYPE. - * COARRAY_T is the type of the companion array - * The keys are the array items moved with the SWAP macro - * around using the SWAP macro. - * the comparison macros can compare either the key or things - * referenced by the key (if its a pointer) - */ -typedef double KEY_T; -typedef char *COARRAY_T; -/* - * The comparison macros: - * - * GT(x, y) as (strcmp((x),(y)) > 0) - * LT(x, y) as (strcmp((x),(y)) < 0) - * GE(x, y) as (strcmp((x),(y)) >= 0) - * LE(x, y) as (strcmp((x),(y)) <= 0) - * EQ(x, y) as (strcmp((x),(y)) == 0) - * NE(x, y) as (strcmp((x),(y)) != 0) - */ -#define GT(x, y) ((x) > (y)) -#define LT(x, y) ((x) < (y)) -#define GE(x, y) ((x) >= (y)) -#define LE(x, y) ((x) <= (y)) -#define EQ(x, y) ((x) == (y)) -#define NE(x, y) ((x) != (y)) - -/* - * Swap macro: - */ - -/* double tempd; */ -/* char *tempc; */ -/* */ -/* #define SWAPD(x, y) tempd = (x); (x) = (y); (y) = tempd */ -/* #define SWAPC(x, y) tempc = (x); (x) = (y); (y) = tempc */ - -extern void -swapd(double *x, double *y); - -extern void -swapc(char **x, char **y); - -extern void -insort2 (KEY_T *array1, COARRAY_T *array2, int len); - -extern void -insort2d (KEY_T *array1, KEY_T *array2, int len); - -extern void -insort (KEY_T *array1, int len); - -extern void -partial_quicksort2 (KEY_T *array1, COARRAY_T *array2, int lower, int upper); - -extern void -partial_quicksort2d (KEY_T *array1, KEY_T *array2, int lower, int upper); - -extern void -partial_quicksort (KEY_T *array, int lower, int upper); - -extern void -quicksort2 (KEY_T *array1, COARRAY_T *array2, int len); - -extern void -quicksort2d (KEY_T *array1, KEY_T *array2, int len); - -extern void -quicksort (KEY_T *array, int len); - -#endif -/* ------------------------------------------------------------- - * Name : rvms.h (header file for the library rvms.c) - * Author : Steve Park & Dave Geyer - * Language : ANSI C - * Latest Revision : 11-02-96 - * -------------------------------------------------------------- - */ - -#if !defined( _RVMS_ ) -#define _RVMS_ - -double LogFactorial(long n); -double LogChoose(long n, long m); - -double pdfBernoulli(double p, long x); -double cdfBernoulli(double p, long x); -long idfBernoulli(double p, double u); - -double pdfEquilikely(long a, long b, long x); -double cdfEquilikely(long a, long b, long x); -long idfEquilikely(long a, long b, double u); - -double pdfBinomial(long n, double p, long x); -double cdfBinomial(long n, double p, long x); -long idfBinomial(long n, double p, double u); - -double pdfGeometric(double p, long x); -double cdfGeometric(double p, long x); -long idfGeometric(double p, double u); - -double pdfPascal(long n, double p, long x); -double cdfPascal(long n, double p, long x); -long idfPascal(long n, double p, double u); - -double pdfPoisson(double m, long x); -double cdfPoisson(double m, long x); -long idfPoisson(double m, double u); - -double pdfUniform(double a, double b, double x); -double cdfUniform(double a, double b, double x); -double idfUniform(double a, double b, double u); - -double pdfExponential(double m, double x); -double cdfExponential(double m, double x); -double idfExponential(double m, double u); - -double pdfErlang(long n, double b, double x); -double cdfErlang(long n, double b, double x); -double idfErlang(long n, double b, double u); - -double pdfNormal(double m, double s, double x); -double cdfNormal(double m, double s, double x); -double idfNormal(double m, double s, double u); - -double pdfLognormal(double a, double b, double x); -double cdfLognormal(double a, double b, double x); -double idfLognormal(double a, double b, double u); - -double pdfChisquare(long n, double x); -double cdfChisquare(long n, double x); -double idfChisquare(long n, double u); - -double pdfStudent(long n, double x); -double cdfStudent(long n, double x); -double idfStudent(long n, double u); - -#endif -#ifndef SPECFUNC_SEEN -#define SPECFUNC_SEEN - -double -BesselI(const double nu, const double z); - -double -BesselI0(const double z); - -double -BesselI1(const double z); - -double -bessi(const int n, const double x); - -double -bessi0(const double x); - -double -bessi1(const double x); - -double -UpperIncompleteGamma(const double a, const double x); - -double -gammp(const double a, const double x); - -double -gammq(const double a, const double x); - -double -gcf(double a, double x); - -double -gser(double a, double x); - -double -IncompleteGamma(const double x, const double alpha); - -double -lngamma(const double xx); - -double -mygamma(const double xx); - -double -harmonic(int x); - -double -polygamma(int k, double x); - -double -betai(double a, double b, double x); - -double -betacf(double a, double b, double x); - -double -beta(double z, double w); - -double -mysquare(const double val); - -double -mycube(const double val); - -double -mypow4(double val); - -#endif -#ifndef MAT3UTILS_SEEN -#define MAT3UTILS_SEEN - -void -Mat3Print(double **matrix); - -double -**Mat3Ident(double **matrix); - -int -Mat3Eq(const double **matrix1, const double **matrix2, const double precision); - -int -Mat3FrobEq(const double **matrix1, const double **matrix2, const double precision); - -void -Mat3Cpy(double **matrix2, const double **matrix1); - -void -Mat3MultOp(double **C, const double **A, const double **B); - -void -Mat3MultIp(double **A, const double **B); - -void -Mat3MultUSVOp(double **C, const double **U, double *S, const double **V); - -void -Mat3PreMultIp(const double **A, double **B); - -void -Mat3Sqr(double **C, const double **A); - -void -Mat3SqrTrans2(double **C, const double **A); - -void -Mat3SqrTrans1(double **C, const double **A); - -void -Mat3TransSqr(double **C, const double **A); - -void -Mat3MultTransA(double **C, const double **A, const double **B); - -void -Mat3MultTransB(double **C, const double **A, const double **B); - -void -Mat3Add(double **C, const double **A, const double **B); - -void -Mat3Sub(double **A, double **B, double **C); - -void -Mat3TransposeIp(double **matrix); - -void -Mat3TransposeOp(double **matrix2, const double **matrix1); - -double -Mat3Det(const double **matrix); - -void -Mat3Invert(double **outmat, const double **inmat); - -void -Mat3SymInvert(double **outmat, const double **inmat); - -void -Mat3MultVec(double *outv, const double **inmat, const double *vec); - -int -VerifyRotMat(double **rotmat, double tol); - -double -**ClosestRotMat(double **inmat); - -void -ClosestRotMatIp(double **inmat); - -#endif /* !MATRIXUTILS_SEEN */ -#ifndef MAT4UTILS_SEEN -#define MAT4UTILS_SEEN - -void -Mat4Print(double **matrix); - -void -Mat4Copy(double **matrix2, const double **matrix1); - -void -Mat4TransposeIp(double **matrix); - -void -Mat4TransposeOp(double **matrix2, const double **matrix1); - -#endif /* !MATRIXUTILS_SEEN */ -#ifndef MATUTILS_SEEN -#define MATUTILS_SEEN - -typedef struct -{ - int rows; - int cols; - int depth; - double ***matrix; - double **matrixc; - double *matrixd; -} Matrix3D; - -void -MatPrint(double **matrix, const int size); - -void -MatPrintRec(double **matrix, const int n, const int m); - -void -MatDestroy(double ***matrix_ptr); - -double -**MatAlloc(const int rows, const int cols); - -void -MatIntDestroy(int ***matrix_ptr); - -int -**MatIntInit(const int rows, const int cols); - -Matrix3D -*Mat3DInit(const int rows, const int cols, const int depth); - -void -Mat3DDestroy(Matrix3D **matrix3d_ptr); - -double -MatFrobNorm(const double **mat1, const double **mat2, const int row, const int col); - -double -MatDiff(const double **mat1, const double **mat2, const int row, const int col); - -void -MatCpySym(double **matrix2, const double **matrix1, const int dim); - -void -MatCpySymgen(double **matrix2, const double **matrix1, const int rows, const int cols); - -void -MatMultGenUSVOp(double **c, const double **u, double *s, const double **v, - const int udim, const int sdim, const int vdim); - -void -MatMultGen(double **C, const double **A, const int ni, const int nk, const double **B, const int nj); - -void -MatMultGenIp(double **A, const int nk, const int ni, const double **B, const int nj); - -void -MatTransMultGen(double **C, const double **A, const int ni, const int nk, const double **B, const int nj); - -void -MatTransMultGenIp(double **A, const int nk, const int ni, const double **B, const int nj); - -void -MatMultSym(double **C, const double **A, const double **B, const int len); - -void -MatMultSymDiag(double **C, const double **A, const double **B, const int len); - -void -MatTransIp(double **mat, const int dim); - -void -MatTransOp(double **outmat, const double **inmat, const int dim); - -void -cholesky(double **mat, const int dim, double *p); - -double -MatDet(const double **mat, const int dim); - -double -MatGenLnDet(const double **mat, const int dim); - -double -MatSymLnDet(const double **mat, const int dim); - -double -MatTrace(const double **mat, const int dim); - -int -TestZeroOffDiag(const double **mat, const int dim, const double precision); - -int -TestIdentMat(const double **mat, const int dim, const double precision); - -double -FrobDiffNormIdentMat(const double **mat, const int dim); - -#endif /* !MATRIXUTILS_SEEN */ -#ifndef REGGAMMA_SEEN -#define REGGAMMA_SEEN - -/* double */ -/* IncompleteGamma (double theA, double theX); */ -/* */ -/* double */ -/* regularizedGammaP(double a, double x, double epsilon, int maxIterations); */ -/* */ -/* double */ -/* regularizedGammaQ(double a, double x, double epsilon, int maxIterations); */ -/* */ -/* double */ -/* gamain( double x, double p, double g ); */ -/* */ -/* double */ -/* gamln( double x ); */ -/* */ -/* void */ -/* grat1(double a, double x, double r, double *p, double *q, */ -/* double eps); */ - -double -InBeta(double a, double b, double x); - -double -InGamma(double a, double x); - -double -InGammaQ(double a, double x); - -double -InGammaP(double a, double x); - -#endif -#ifndef VECUTILS_SEEN -#define VECUTILS_SEEN - -void -VecPrint(double *vec, const int size); - -void -InvRotVec(double *newvec, double *vec, double **rotmat); - -void -RotVec(double *newvec, double *vec, double **rotmat); - -int -VecEq(const double *vec1, const double *vec2, const int len, const double tol); - -void -RevVecIp(double *vec, const int len); - -double -VecSmallest(double *vec, const int len); - -double -VecBiggest(double *vec, const int len); - -#endif /* !MATRIXUTILS_SEEN */ -#ifndef EIGEN_SEEN -#define EIGEN_SEEN - -double -*NormalizeVec(double *vect); +#endif /* !MATRIXUTILS_SEEN */ +/* + Theseus - maximum likelihood superpositioning of macromolecular structures -void -EigenSort(double **eigenvectors, double *eigenvalues); + Copyright (C) 2004-2014 Douglas L. Theobald -void -EigenSort3(double **eigenvectors, double *eigenvalues, double *tmpevec); + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. -void -EigenSort4(double **eigenvectors, double *eigenvalues); + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. -void -EvalSort4(double *eigenvalues); + You should have received a copy of the GNU General Public License + along with this program; if not, write to the: -void -CopyEvec(double *evec1, double *evec2, int length); + Free Software Foundation, Inc., + 59 Temple Place, Suite 330, + Boston, MA 02111-1307 USA -void -SwapEvec(double *evec1, double *evec2, int length); + -/_|:|_|_\- +*/ -void -Swap3Evec(double *evec1, double *evec2, double *tmpevec); +#ifndef EIGEN_SEEN +#define EIGEN_SEEN void eigen3(double **z, double *eigenval); void -Eigen4Min(double **eigenvectors, double *eigenvalues); - -void eigen4(double **Q, double *eigenval); void @@ -1247,33 +535,89 @@ double *evals, double **evecs, const double tol); void -eigensym(const double **mat, double *evals, double **evecs, int n); +EigenReconSym(double **mat, const double **evecs, const double *evals, const int n); + +#endif + +/* + Theseus - maximum likelihood superpositioning of macromolecular structures + + Copyright (C) 2004-2010 Douglas L. Theobald + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the: + + Free Software Foundation, Inc., + 59 Temple Place, Suite 330, + Boston, MA 02111-1307 USA + + -/_|:|_|_\- +*/ +#ifndef EIGEN_GSL_SEEN +#define EIGEN_GSL_SEEN void -eigensym2(const double **mat, double *evals, double **evecs, int n, double *work); +EigenvalsGSL(const double **mat, const int dim, double *eval); void -eigenvalsym(const double **mat, double *evals, double **evecs, int n); +EigenvalsGSLDest(double **mat, const int dim, double *eval); void -eigenvalsym2(const double **mat, double *evals, double **evecs, int n, double *work); +EigenGSL(const double **mat, const int dim, double *eval, double **evec, int order); void -eigengen(const double **mat, double *evals, double **evecs, int n); +EigenGSLDest(double **mat, const int dim, double *eval, double **evec, int order); void -transevecs(double **mat, int len); +CalcGSLSVD3(double **a, double **u, double *s, double **vt); void -eigen_quicksort(double *evals, double **evecs, int len); +svdGSLDest(double **A, const int dim, double *singval, double **V); void -EigenReconSym(double **mat, const double **evecs, const double *evals, const int n); +svdGSLJacobiDest(double **A, const int dim, double *singval, double **V); -int -SymbolicEigen4 (double **mat, double *evals); +void +CholeskyGSLDest(double **A, const int dim); + +void +PseudoinvSymGSL(const double **inmat, double **outmat, int n, double tol); #endif +/* + Theseus - maximum likelihood superpositioning of macromolecular structures + + Copyright (C) 2004-2014 Douglas L. Theobald + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the: + + Free Software Foundation, Inc., + 59 Temple Place, Suite 330, + Boston, MA 02111-1307 USA + + -/_|:|_|_\- +*/ #ifndef INTEGRATE_SEEN #define INTEGRATE_SEEN @@ -1290,64 +634,32 @@ integrate_romberg(double (*f)(double a, double p1, double p2), double p1, double p2, double a, double b); +#endif +/* + Theseus - maximum likelihood superpositioning of macromolecular structures -int -Dgesvd(char jobu, char jobvt, int m, int n, - double **a, int lda, double *s, - double **u, int ldu, - double **vt, int ldvt, - double *work, int lwork); - -int -dgesvd_opt_dest(double **a, int m, int n, - double **u, double *s, double **vt); - -int -dgesvd_opt_save(double **a, int m, int n, - double **u, double *s, double **vt); - -int -Dsyev(char jobz_v, char uplo_u, - int n, double **amat, double *w, - double *work, int lwork); - -int -dsyev_opt_dest(double **amat, int n, double *w); - -int -dsyev_opt_save(double **amat, int n, double **evecs, double *evals); - -int -Dsyevr(char jobz, char range, char uplo, int n, - double **a, int lda, - double vl, double vu, - int il, int iu, - double abstol, int m, double *w, - double **z__, int ldz, int *isuppz, - double *work, int lwork, - int *iwork, int liwork); + Copyright (C) 2004-2014 Douglas L. Theobald -int -dsyevr_opt_dest(double **mat, int n, - int lower, int upper, - double *evals, double **evecs, - double abstol); + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. -int -dsyevr_opt_save(const double **amat, int n, - int lower, int upper, - double *evals, double **evecs); + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. -void -dpotr_invert(double **mat, int idim); + You should have received a copy of the GNU General Public License + along with this program; if not, write to the: -int -dpotrf_opt_dest(double **amat, int dim); + Free Software Foundation, Inc., + 59 Temple Place, Suite 330, + Boston, MA 02111-1307 USA -int -pseudoinv_sym(double **inmat, double **outmat, int n, const double tol); + -/_|:|_|_\- +*/ -#endif void matinv(double **a, double **outmat, int N, int *indx); @@ -1356,6 +668,31 @@ void ludcmp(double **a, int n, int *indx, double *d); +/* + Theseus - maximum likelihood superpositioning of macromolecular structures + + Copyright (C) 2004-2014 Douglas L. Theobald + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the: + + Free Software Foundation, Inc., + 59 Temple Place, Suite 330, + Boston, MA 02111-1307 USA + + -/_|:|_|_\- +*/ + #ifndef MYRANDOM_SEEN #define MYRANDOM_SEEN @@ -1399,6 +736,31 @@ shufflef(double *a, int n); #endif +/* + Theseus - maximum likelihood superpositioning of macromolecular structures + + Copyright (C) 2004-2014 Douglas L. Theobald + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the: + + Free Software Foundation, Inc., + 59 Temple Place, Suite 330, + Boston, MA 02111-1307 USA + + -/_|:|_|_\- +*/ + #ifndef ALGO_BLAST_CORE__NCBIMATH #define ALGO_BLAST_CORE__NCBIMATH @@ -1438,48 +800,48 @@ * Prototypes for portable math library (ported from C Toolkit) */ -/*#include +/*#include #include */ -double +double s_PolyGamma(double x, int order); /** Natural logarithm with shifted input * @param x input operand (x > -1) * @return log(x+1) */ - + double BLAST_Log1p (double x); -/** Exponentional with base e +/** Exponentional with base e * @param x input operand * @return exp(x) - 1 */ - + double BLAST_Expm1 (double x); /** Factorial function * @param n input operand * @return (double)(1 * 2 * 3 * ... * n) */ - + double BLAST_Factorial(int n); -/** Logarithm of the factorial +/** Logarithm of the factorial * @param x input operand * @return log(1 * 2 * 3 * ... * x) */ - + double BLAST_LnFactorial (double x); -/** log(gamma(n)), integral n +/** log(gamma(n)), integral n * @param n input operand * @return log(1 * 2 * 3 * ... (n-1)) */ - + double BLAST_LnGammaInt (int n); -/** Romberg numerical integrator +/** Romberg numerical integrator * @param f Pointer to the function to integrate; the first argument * is the variable to integrate over, the second is a pointer * to a list of additional arguments that f may need @@ -1491,25 +853,25 @@ * @param q Right-hand endpoint of the integration interval * (q is assumed > p) * @param eps The relative error tolerance that indicates convergence - * @param epsit The number of consecutive diagonal entries in the + * @param epsit The number of consecutive diagonal entries in the * Romberg array whose relative difference must be less than - * eps before convergence is assumed. This is presently + * eps before convergence is assumed. This is presently * limited to 1, 2, or 3 * @param itmin The minimum number of diagnonal Romberg entries that * will be computed * @return The computed integral of f() between p and q */ - -double BLAST_RombergIntegrate (double (*f) (double, void*), - void* fargs, double p, double q, + +double BLAST_RombergIntegrate (double (*f) (double, void*), + void* fargs, double p, double q, double eps, int epsit, int itmin); -/** Greatest common divisor +/** Greatest common divisor * @param a First operand (any integer) * @param b Second operand (any integer) * @return The largest integer that evenly divides a and b */ - + int BLAST_Gcd (int a, int b); /** Divide 3 numbers by their greatest common divisor @@ -1518,29 +880,29 @@ * @param c Third integer [in] [out] * @return The greatest common divisor */ - + int BLAST_Gdb3(int* a, int* b, int* c); -/** Nearest integer +/** Nearest integer * @param x Input to round (rounded value must be representable * as a 32-bit signed integer) * @return floor(x + 0.5); */ - + long BLAST_Nint (double x); -/** Integral power of x +/** Integral power of x * @param x floating-point base of the exponential * @param n (integer) exponent * @return x multiplied by itself n times */ - + double BLAST_Powi (double x, int n); -/** Number of derivatives of log(x) to carry in gamma-related +/** Number of derivatives of log(x) to carry in gamma-related computations */ -#define LOGDERIV_ORDER_MAX 4 -/** Number of derivatives of polygamma(x) to carry in gamma-related +#define LOGDERIV_ORDER_MAX 4 +/** Number of derivatives of polygamma(x) to carry in gamma-related computations for non-integral values of x */ #define POLYGAMMA_ORDER_MAX LOGDERIV_ORDER_MAX @@ -1608,187 +970,39 @@ #endif /* !ALGO_BLAST_CORE__NCBIMATH */ -#ifndef QUICKSORT_SEEN -#define QUICKSORT_SEEN - -/*--------------- quicksort.h --------------*/ -/* - * The key TYPE. - * COARRAY_T is the type of the companion array - * The keys are the array items moved with the SWAP macro - * around using the SWAP macro. - * the comparison macros can compare either the key or things - * referenced by the key (if its a pointer) - */ -typedef double KEY_T; -typedef char *COARRAY_T; -/* - * The comparison macros: - * - * GT(x, y) as (strcmp((x),(y)) > 0) - * LT(x, y) as (strcmp((x),(y)) < 0) - * GE(x, y) as (strcmp((x),(y)) >= 0) - * LE(x, y) as (strcmp((x),(y)) <= 0) - * EQ(x, y) as (strcmp((x),(y)) == 0) - * NE(x, y) as (strcmp((x),(y)) != 0) - */ -#define GT(x, y) ((x) > (y)) -#define LT(x, y) ((x) < (y)) -#define GE(x, y) ((x) >= (y)) -#define LE(x, y) ((x) <= (y)) -#define EQ(x, y) ((x) == (y)) -#define NE(x, y) ((x) != (y)) /* - * Swap macro: - */ - -/* double tempd; */ -/* char *tempc; */ -/* */ -/* #define SWAPD(x, y) tempd = (x); (x) = (y); (y) = tempd */ -/* #define SWAPC(x, y) tempc = (x); (x) = (y); (y) = tempc */ - -extern void -swapd(double *x, double *y); - -extern void -swapc(char **x, char **y); - -extern void -insort2 (KEY_T *array1, COARRAY_T *array2, int len); - -extern void -insort2d (KEY_T *array1, KEY_T *array2, int len); - -extern void -insort (KEY_T *array1, int len); - -extern void -partial_quicksort2 (KEY_T *array1, COARRAY_T *array2, int lower, int upper); - -extern void -partial_quicksort2d (KEY_T *array1, KEY_T *array2, int lower, int upper); - -extern void -partial_quicksort (KEY_T *array, int lower, int upper); - -extern void -quicksort2 (KEY_T *array1, COARRAY_T *array2, int len); - -extern void -quicksort2d (KEY_T *array1, KEY_T *array2, int len); - -extern void -quicksort (KEY_T *array, int len); - -#endif -/* ------------------------------------------------------------- - * Name : rvms.h (header file for the library rvms.c) - * Author : Steve Park & Dave Geyer - * Language : ANSI C - * Latest Revision : 11-02-96 - * -------------------------------------------------------------- - */ - -#if !defined( _RVMS_ ) -#define _RVMS_ - -double LogFactorial(long n); -double LogChoose(long n, long m); - -double pdfBernoulli(double p, long x); -double cdfBernoulli(double p, long x); -long idfBernoulli(double p, double u); - -double pdfEquilikely(long a, long b, long x); -double cdfEquilikely(long a, long b, long x); -long idfEquilikely(long a, long b, double u); - -double pdfBinomial(long n, double p, long x); -double cdfBinomial(long n, double p, long x); -long idfBinomial(long n, double p, double u); - -double pdfGeometric(double p, long x); -double cdfGeometric(double p, long x); -long idfGeometric(double p, double u); - -double pdfPascal(long n, double p, long x); -double cdfPascal(long n, double p, long x); -long idfPascal(long n, double p, double u); - -double pdfPoisson(double m, long x); -double cdfPoisson(double m, long x); -long idfPoisson(double m, double u); - -double pdfUniform(double a, double b, double x); -double cdfUniform(double a, double b, double x); -double idfUniform(double a, double b, double u); - -double pdfExponential(double m, double x); -double cdfExponential(double m, double x); -double idfExponential(double m, double u); - -double pdfErlang(long n, double b, double x); -double cdfErlang(long n, double b, double x); -double idfErlang(long n, double b, double u); - -double pdfNormal(double m, double s, double x); -double cdfNormal(double m, double s, double x); -double idfNormal(double m, double s, double u); - -double pdfLognormal(double a, double b, double x); -double cdfLognormal(double a, double b, double x); -double idfLognormal(double a, double b, double u); - -double pdfChisquare(long n, double x); -double cdfChisquare(long n, double x); -double idfChisquare(long n, double u); - -double pdfStudent(long n, double x); -double cdfStudent(long n, double x); -double idfStudent(long n, double u); - -#endif - -#ifndef EIGEN_GSL_SEEN -#define EIGEN_GSL_SEEN - -void -EigenvalsGSL(double **mat, const int dim, double *eval); - -void -EigenvalsGSLDest(double **mat, const int dim, double *eval); - -void -EigenGSL(double **mat, const int dim, double *eval, double **evec, int order); - -void -EigenGSLDest(double **mat, const int dim, double *eval, double **evec, int order); - -void -svdGSLDest(double **A, const int dim, double *singval, double **V); + Theseus - maximum likelihood superpositioning of macromolecular structures -void -CholeskyGSLDest(double **A, const int dim); + Copyright (C) 2004-2014 Douglas L. Theobald -void -PseudoinvSymGSL(double **inmat, double **outmat, int n, double tol); + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. -#endif + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. -#ifndef MULTIVARGAMMA_SEEN -#define MULTIVARGAMMA_SEEN + You should have received a copy of the GNU General Public License + along with this program; if not, write to the: -double -MultivarLnGamma(const int k, const double a); + Free Software Foundation, Inc., + 59 Temple Place, Suite 330, + Boston, MA 02111-1307 USA -#endif + -/_|:|_|_\- +*/ #ifndef SPECFUNC_SEEN #define SPECFUNC_SEEN double +Hermite(const int n, const double x); + +double BesselI(const double nu, const double z); double @@ -1845,8 +1059,6 @@ double beta(double z, double w); -#endif - int findmin(const double *vec, const int len); Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/libdltmath/._eigen.c and /tmp/g2bOMTRwaC/theseus-3.0.0/libdltmath/._eigen.c differ diff -Nru theseus-2.0.6/libdltmath/eigen.c theseus-3.0.0/libdltmath/eigen.c --- theseus-2.0.6/libdltmath/eigen.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdltmath/eigen.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -53,247 +53,23 @@ Reuction to tridiagonal of real, symmetric matrix a with dimension 3. z = input matrix -- is replace by orthogonal matrix Q of transformation on output eigenval = diagonal elements of Q (order 3) - - This part of the function takes less than 60% of the time of the Numerical Recipes + + This part of the function takes less than 60% of the time of the Numerical Recipes tred2 function for a matrix of order 3. - + Tridiagonal QL implicit: returns eigenvalues and eigenvectors of a real, symmetric, tridiagonal matrix (such as output by tred2) - + eigenval = holds diagonal elements of tridiagonal matrix, order n; returns eigenvalues z = tred2 output matrix z (else identity matrix to get eigenvectors) - + z[k] returns normalized eigenvector for eigenvalue eigenval[k] */ /* normalize a vector and return a pointer to it */ /* It changes the value of the vector!! */ -double -*NormalizeVec(double *vect) -{ - double len; - - len = 1.0 / sqrt(vect[0]*vect[0] + vect[1]*vect[1] + vect[2]*vect[2]); - - vect[0] *= len; - vect[1] *= len; - vect[2] *= len; - - return (vect); -} - - -void -EigenSort(double **eigenvectors, double *eigenvalues) -{ - double v; - - if (eigenvalues[0] < eigenvalues[1]) - { - v = eigenvalues[0]; - eigenvalues[0] = eigenvalues[1]; - eigenvalues[1] = v; - SwapEvec(eigenvectors[0], eigenvectors[1], 3); - } - - if (eigenvalues[0] < eigenvalues[2]) - { - v = eigenvalues[0]; - eigenvalues[0] = eigenvalues[2]; - eigenvalues[2] = v; - SwapEvec(eigenvectors[0], eigenvectors[2], 3); - } - - if (eigenvalues[1] < eigenvalues[2]) - { - v = eigenvalues[1]; - eigenvalues[1] = eigenvalues[2]; - eigenvalues[2] = v; - SwapEvec(eigenvectors[1], eigenvectors[2], 3); - } -} - - -void -EigenSort3(double **eigenvectors, double *eigenvalues, double *tmpevec) -{ - double v; - - if (eigenvalues[0] < eigenvalues[1]) - { - v = eigenvalues[0]; - eigenvalues[0] = eigenvalues[1]; - eigenvalues[1] = v; - Swap3Evec(eigenvectors[0], eigenvectors[1], tmpevec); - } - - if (eigenvalues[0] < eigenvalues[2]) - { - v = eigenvalues[0]; - eigenvalues[0] = eigenvalues[2]; - eigenvalues[2] = v; - Swap3Evec(eigenvectors[0], eigenvectors[2], tmpevec); - } - - if (eigenvalues[1] < eigenvalues[2]) - { - v = eigenvalues[1]; - eigenvalues[1] = eigenvalues[2]; - eigenvalues[2] = v; - Swap3Evec(eigenvectors[1], eigenvectors[2], tmpevec); - } -} - - -void -EigenSort3b(double **eigenvectors, double *eigenvalues) -{ - double v, tmpevec[3]; - - if (eigenvalues[0] < eigenvalues[1]) - { - v = eigenvalues[0]; - eigenvalues[0] = eigenvalues[1]; - eigenvalues[1] = v; - Swap3Evec(eigenvectors[0], eigenvectors[1], &tmpevec[0]); - } - - if (eigenvalues[0] < eigenvalues[2]) - { - v = eigenvalues[0]; - eigenvalues[0] = eigenvalues[2]; - eigenvalues[2] = v; - Swap3Evec(eigenvectors[0], eigenvectors[2], &tmpevec[0]); - } - - if (eigenvalues[1] < eigenvalues[2]) - { - v = eigenvalues[1]; - eigenvalues[1] = eigenvalues[2]; - eigenvalues[2] = v; - Swap3Evec(eigenvectors[1], eigenvectors[2], &tmpevec[0]); - } -} - - -void -EigenSort4(double **eigenvectors, double *eigenvalues) -{ - double v; - - if (eigenvalues[0] < eigenvalues[1]) - { - v = eigenvalues[0]; - eigenvalues[0] = eigenvalues[1]; - eigenvalues[1] = v; - SwapEvec(eigenvectors[0], eigenvectors[1], 4); - } - if (eigenvalues[0] < eigenvalues[2]) - { - v = eigenvalues[0]; - eigenvalues[0] = eigenvalues[2]; - eigenvalues[2] = v; - SwapEvec(eigenvectors[0], eigenvectors[2], 4); - } - if (eigenvalues[0] < eigenvalues[3]) - { - v = eigenvalues[0]; - eigenvalues[0] = eigenvalues[3]; - eigenvalues[3] = v; - SwapEvec(eigenvectors[0], eigenvectors[3], 4); - } - if (eigenvalues[1] < eigenvalues[2]) - { - v = eigenvalues[1]; - eigenvalues[1] = eigenvalues[2]; - eigenvalues[2] = v; - SwapEvec(eigenvectors[1], eigenvectors[2], 4); - } - if (eigenvalues[1] < eigenvalues[3]) - { - v = eigenvalues[1]; - eigenvalues[1] = eigenvalues[3]; - eigenvalues[3] = v; - SwapEvec(eigenvectors[1], eigenvectors[3], 4); - } - if (eigenvalues[2] < eigenvalues[3]) - { - v = eigenvalues[2]; - eigenvalues[2] = eigenvalues[3]; - eigenvalues[3] = v; - SwapEvec(eigenvectors[2], eigenvectors[3], 4); - } -} - - -void -EvalSort4(double *eigenvalues) -{ - double v; - - if (eigenvalues[0] < eigenvalues[1]) - { - v = eigenvalues[0]; - eigenvalues[0] = eigenvalues[1]; - eigenvalues[1] = v; - } - if (eigenvalues[0] < eigenvalues[2]) - { - v = eigenvalues[0]; - eigenvalues[0] = eigenvalues[2]; - eigenvalues[2] = v; - } - if (eigenvalues[0] < eigenvalues[3]) - { - v = eigenvalues[0]; - eigenvalues[0] = eigenvalues[3]; - eigenvalues[3] = v; - } - if (eigenvalues[1] < eigenvalues[2]) - { - v = eigenvalues[1]; - eigenvalues[1] = eigenvalues[2]; - eigenvalues[2] = v; - } - if (eigenvalues[1] < eigenvalues[3]) - { - v = eigenvalues[1]; - eigenvalues[1] = eigenvalues[3]; - eigenvalues[3] = v; - } - if (eigenvalues[2] < eigenvalues[3]) - { - v = eigenvalues[2]; - eigenvalues[2] = eigenvalues[3]; - eigenvalues[3] = v; - } -} - - -void -SwapEvec(double *evec1, double *evec2, int length) -{ - double *tmpevec = (double *) malloc(length * sizeof(double)); - - memcpy(tmpevec, evec1, length * sizeof(double)); - memcpy(evec1, evec2, length * sizeof(double)); - memcpy(evec2, tmpevec, length * sizeof(double)); - - free(tmpevec); -} - - -void -Swap3Evec(double *evec1, double *evec2, double *tmpevec) -{ - memcpy(tmpevec, evec1, 3 * sizeof(double)); - memcpy(evec1, evec2, 3 * sizeof(double)); - memcpy(evec2, tmpevec, 3 * sizeof(double)); -} - - void eigen3(double **z, double *eigenval) { @@ -389,7 +165,7 @@ if (iter++ == 300) { - fprintf(stderr, + fprintf(stderr, "\n ERROR: Too many iterations in tqli eigen3() in eigen3.c \n"); exit(EXIT_FAILURE); } @@ -483,7 +259,7 @@ if (iter++ == 300) { - fprintf(stderr, + fprintf(stderr, "\n ERROR: Too many iterations in tqli eigen3() in eigen3.c \n"); exit(EXIT_FAILURE); } @@ -544,48 +320,21 @@ z = input matrix -- is replace by orthogonal matrix Q of transformation on output eigenval = diagonal elements of Q (order 4) - + Tridiagonal QL implicit = tqli(): returns eigenvalues and eigenvectors of a real, symmetric, tridiagonal matrix (such as output by tred2) - + eigenval = holds diagonal elements of tridiagonal matrix, order n; returns eigenvalues z = tred2() output matrix z (else identity matrix to get eigenvectors) - + z[k] or *evals returns normalized eigenvector for eigenvalue eigenval[k] - + eigen4() uses only lower left half of matrix, whereas jacobi4() needs upper right (at least) */ void -Eigen4Min(double **eigenvector, double *eigenvalue) -{ - double smallestval; - double *smallestvec; - int i, index = 0; - - smallestval = eigenvalue[0]; - smallestvec = eigenvector[0]; - - for (i = 1; i < 4; ++i) - { - if (eigenvalue[i] < smallestval) - { - smallestval = eigenvalue[i]; - smallestvec = eigenvector[i]; - index = i; - } - } - - eigenvalue[index] = eigenvalue[3]; - eigenvector[index] = eigenvector[3]; - eigenvalue[3] = smallestval; - eigenvector[3] = smallestvec; -} - - -void eigen4(double **Q, double *evals) { double e[4]; @@ -621,7 +370,7 @@ { for (k = 0; k < l + 1; k++) scale += fabs(a[i][k]); - + if (scale == 0.0) e[i] = a[i][l]; else @@ -631,13 +380,13 @@ a[i][k] /= scale; h += a[i][k] * a[i][k]; } - + f = a[i][l]; g = (f >= 0.0 ? -sqrt(h) : sqrt(h)); e[i] = scale * g; h -= f * g; a[i][l] = f - g; - + f = 0.0; for (j = 0; j < l + 1; j++) { @@ -715,7 +464,7 @@ { for (k = 0; k < l + 1; k++) scale += fabs(a[i][k]); - + if (scale == 0.0) e[i] = a[i][l]; else @@ -725,13 +474,13 @@ a[i][k] /= scale; h += a[i][k] * a[i][k]; } - + f = a[i][l]; g = (f >= 0.0 ? -sqrt(h) : sqrt(h)); e[i] = scale * g; h -= f * g; a[i][l] = f - g; - + f = 0.0; for (j = 0; j < l + 1; j++) { @@ -977,7 +726,7 @@ { t = (a[j][k]) / h; } - else + else { theta = 0.5 * h / (a[j][k]); t = 1.0 / (fabs(theta) + sqrt(1.0 + theta * theta)); @@ -1068,7 +817,7 @@ than the cyclic Jacobi. On output, elements of a above the diagonal are destroyed (i.e. The matrix a gets trashed. If you need to keep the - values of a, call jacobi with a copy of a). + values of a, call jacobi with a copy of a). d[3] returns the eigenvalues of a. v[3][3] is a matrix whose rows contain, on output, the normalized eigenvectors of a. @@ -1174,7 +923,7 @@ as the cyclic Jacobi. On output, elements of a above the diagonal are destroyed (i.e. The matrix a gets trashed. If you need to keep the - values of a, call jacobi with a copy of a). + values of a, call jacobi with a copy of a). d[3] returns the eigenvalues of a. v[3][3] is a matrix whose rows contain, on output, the normalized eigenvectors of a. @@ -1303,8 +1052,6 @@ memcpy(tmpmat[0], mat[0], n * n * sizeof(double)); - //DSYEV(&jobz_v, &uplo_u, &n, &evecs[0][0], &n, evals, work, &lwork, &info); - EigenGSLDest(tmpmat, n, evals, evecs, 0); cond = evals[n-1] / evals[0]; @@ -1334,234 +1081,8 @@ } -/* Entire decomposition, evals and evecs */ -/* eigensym evals are small to large */ -/* Symmetric matrix decomposition, using LAPACK dsyev - Obviously it doesn't matter if you pass the matrix or its transpose; - however, LAPACK matrix ordering is FORTRAN-style, not C, so the - eigenvectors need to be transposed. */ -void -eigensym(const double **mat, double *evals, double **evecs, int n) -{ - EigenGSL((double **) mat, n, evals, evecs, 0); -} - - -/* Entire decomposition, evals and evecs */ -/* eigensym evals are small to large */ -/* exactly the same as eigensym above, but work space is passed */ -/* void */ -/* eigensym2(const double **mat, double *evals, double **evecs, int n, double *work) */ -/* { */ -/* int info = 0; */ -/* char jobz_v = 'V', uplo_u = 'U'; */ -/* int lwork = n * n; */ -/* double tmp; */ -/* int i, j; */ -/* */ -/* memcpy(&evecs[0][0], &mat[0][0], n * n * sizeof(double)); */ -/* DSYEV(&jobz_v, &uplo_u, &n, &evecs[0][0], &n, evals, work, &lwork, &info); */ -/* */ -/* for (i = 0; i < n; ++i) */ -/* { */ -/* for (j = 0; j < i; ++j) */ -/* { */ -/* tmp = evecs[i][j]; */ -/* evecs[i][j] = evecs[j][i]; */ -/* evecs[j][i] = tmp; */ -/* } */ -/* } */ -/* } */ - - -void -eigenvalsym(const double **mat, double *evals, double **evecs, int n) -{ - EigenvalsGSL((double **) mat, n, evals); -} - - -/* void */ -/* eigenvalsym2(const double **mat, double *evals, double **evecs, int n, double *work) */ -/* { */ -/* int info = 0; */ -/* char jobz_v = 'N', uplo_u = 'U'; */ -/* int lwork = n * n; */ -/* */ -/* memcpy(&evecs[0][0], &mat[0][0], n * n * sizeof(double)); */ -/* DSYEV(&jobz_v, &uplo_u, &n, &evecs[0][0], &n, evals, work, &lwork, &info); */ -/* } */ - - -#define CUTOFF 15 - -#define GT(x, y) ((x) > (y)) -#define LT(x, y) ((x) < (y)) -#define GE(x, y) ((x) >= (y)) -#define LE(x, y) ((x) <= (y)) -#define EQ(x, y) ((x) == (y)) -#define NE(x, y) ((x) != (y)) - -double tempd; - -#define SWAPD(x, y) tempd = (x); (x) = (y); (y) = tempd - - -static void -myswapd(double *x, double *y) -{ - double tempd; - - tempd = *x; - *x = *y; - *y = tempd; -} - - -static void -myswapdd(double **x, double **y) -{ - double *tempd; - - tempd = *x; - *x = *y; - *y = tempd; -} - - -static void -partial_eigen_quicksort(double *array1, double **array2, int lower, int upper) -{ - int i, j, random_index; - double pivot; - - if (upper - lower > CUTOFF) - { - myswapd(&array1[lower], &array1[(upper+lower)/2]); - myswapdd(&array2[lower], &array2[(upper+lower)/2]); - i = lower; - j = upper + 1; - - /* pivot = array1[lower]; */ - srand(time(NULL)); - random_index = (int) ( (double)(upper - lower) * (double) rand() / (double) RAND_MAX ) + lower; - pivot = array1[random_index]; - - while (1) - { - do - { - ++i; - } - while (LT(array1[i], pivot)); - - do - { - --j; - } - while (GT(array1[j], pivot)); - - if (j > i) - { - myswapd(&array1[i], &array1[j]); - myswapdd(&array2[i], &array2[j]); - } - else - { - break; - } - } - - myswapd(&array1[lower], &array1[j]); - myswapdd(&array2[lower], &array2[j]); - - partial_eigen_quicksort(array1, array2, lower, j - 1); - partial_eigen_quicksort(array1, array2, i, upper); - } -} - - -static void -eigen_insort(double *array1, double **array2, int len) -{ - int i, j; - double temp1; - double *temp2; - - for (i = 1; i < len; ++i) - { - j = i; - temp1 = array1[j]; - temp2 = array2[j]; - - while ((j > 0) && GT(array1[j-1], temp1)) - { - array1[j] = array1[j-1]; - array2[j] = array2[j-1]; - --j; - } - - array1[j] = temp1; - array2[j] = temp2; - } -} - - -void -transevecs(double **mat, int len) -{ - int i, j; - double tmp; - - for (i = 0; i < len; ++i) - { - for (j = 0; j < i; ++j) - { - tmp = mat[i][j]; - mat[i][j] = mat[j][i]; - mat[j][i] = tmp; - } - } -} - - -void -eigen_quicksort(double *evals, double **evecs, int len) -{ - double *tmpvals; - - tmpvals = malloc((len+1) * sizeof(double)); - memcpy(tmpvals, evals, len * sizeof(double)); - tmpvals[len] = DBL_MAX; - partial_eigen_quicksort(tmpvals, evecs, 0, len - 1); - eigen_insort(tmpvals, evecs, len); - memcpy(evals, tmpvals, len * sizeof(double)); - - free(tmpvals); -} - - -/* Calculates A = LDL^t, where L is a matrix of right eigenvectors and D is - a diagonal matrix of eigenvalues, in one fell swoop. Except here the - eigenvalues are delivered as a 1 x n vector. */ -/* void */ -/* EigenReconSym(double **mat, const double **evecs, const double *evals, const int n) */ -/* { */ -/* int i, j, k; */ -/* */ -/* for (i = 0; i < n; ++i) */ -/* { */ -/* for (j = 0; j < n; ++j) */ -/* { */ -/* mat[i][j] = 0.0; */ -/* for (k = 0; k < n; ++k) */ -/* mat[i][j] += (evecs[k][i] * evals[k] * evecs[k][j]); */ -/* } */ -/* } */ -/* } */ - -/* Calculates A = LDL^t, where L is a matrix of right eigenvectors and D is +/* Calculates A = LDL^t, where L is a matrix of right eigenvectors and D is a diagonal matrix of eigenvalues, in one fell swoop. Except here the eigenvalues are delivered as a 1 x n vector. */ /* This function is consistent with eigensym() above - 2006-05-10 */ diff -Nru theseus-2.0.6/libdltmath/eigen_gsl.c theseus-3.0.0/libdltmath/eigen_gsl.c --- theseus-2.0.6/libdltmath/eigen_gsl.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdltmath/eigen_gsl.c 2014-05-13 16:48:52.000000000 +0000 @@ -34,14 +34,15 @@ #include #include "DLTmath.h" -/* + +/* Calculate eigenvalues of a square, symmetric, real matrix, using GSL. -Eigenvalues are returned in ascending order, smallest first. -Pointer *eval must be allocated. -Input matrix **cov is NOT perturbed. +Eigenvalues are returned in ascending order, smallest first. +Pointer *eval must be allocated. +Input matrix **cov is NOT perturbed. */ void -EigenvalsGSL(double **mat, const int dim, double *eval) +EigenvalsGSL(const double **mat, const int dim, double *eval) { double *mat_cpy = NULL; @@ -85,7 +86,7 @@ be mutually orthogonal and normalised to unit magnitude. */ void -EigenGSL(double **mat, const int dim, double *eval, double **evec, int order) +EigenGSL(const double **mat, const int dim, double *eval, double **evec, int order) { double *mat_cpy = NULL; @@ -107,6 +108,7 @@ free(mat_cpy); } + /* This one destroys half of the input matrix **mat */ void EigenGSLDest(double **mat, const int dim, double *eval, double **evec, int order) @@ -127,6 +129,16 @@ void +CalcGSLSVD3(double **a, double **u, double *s, double **vt) +{ + memcpy(u[0], a[0], 9 * sizeof(double)); + // GSL says Jacobi SVD is more accurate the Golub + svdGSLJacobiDest(u, 3, s, vt); + Mat3TransposeIp(vt); +} + + +void svdGSLDest(double **A, const int dim, double *singval, double **V) { gsl_matrix_view a = gsl_matrix_view_array(A[0], dim, dim); @@ -141,6 +153,17 @@ void +svdGSLJacobiDest(double **A, const int dim, double *singval, double **V) +{ + gsl_matrix_view a = gsl_matrix_view_array(A[0], dim, dim); + gsl_matrix_view v = gsl_matrix_view_array(V[0], dim, dim); + gsl_vector_view singv = gsl_vector_view_array(singval, dim); + + gsl_linalg_SV_decomp_jacobi(&a.matrix, &v.matrix, &singv.vector); +} + + +void CholeskyGSLDest(double **A, const int dim) { gsl_matrix_view a = gsl_matrix_view_array(A[0], dim, dim); @@ -187,13 +210,13 @@ /* Calculates the Moore-Penrose pseudoinverse of a symmetric, square matrix. Uses GSL to do the singular value decomposition inmat = U S V^T . - Then constructs the pseudoinverse by V S^-1 U^T . + Then constructs the pseudoinverse by V S^-1 U^T . Note that here S^-1 is the inverse of only the nonzero elements of S. Also note that GSL returns V and not V^T (unlike LAPACK), so we have to account for that in the matrix multiplication, since U & V are asymmetric in general. */ void -PseudoinvSymGSL(double **inmat, double **outmat, int n, double tol) +PseudoinvSymGSL(const double **inmat, double **outmat, int n, double tol) { double **u = MatAlloc(n, n); double **v = MatAlloc(n, n); @@ -215,9 +238,11 @@ s[i] = 0.0; } - for (i = 0; i < n; ++i) - for (j = 0; j < n; ++j) - outmat[i][j] = 0.0; + memset(outmat[0], 0, n*n*sizeof(double)); + +// for (i = 0; i < n; ++i) +// for (j = 0; j < n; ++j) +// outmat[i][j] = 0.0; /* (i x k)(k x j) = (i x j) */ for (i = 0; i < n; ++i) diff -Nru theseus-2.0.6/libdltmath/eigen_gsl.h theseus-3.0.0/libdltmath/eigen_gsl.h --- theseus-2.0.6/libdltmath/eigen_gsl.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdltmath/eigen_gsl.h 2014-05-13 16:48:52.000000000 +0000 @@ -26,24 +26,30 @@ #define EIGEN_GSL_SEEN void -EigenvalsGSL(double **mat, const int dim, double *eval); +EigenvalsGSL(const double **mat, const int dim, double *eval); void EigenvalsGSLDest(double **mat, const int dim, double *eval); void -EigenGSL(double **mat, const int dim, double *eval, double **evec, int order); +EigenGSL(const double **mat, const int dim, double *eval, double **evec, int order); void EigenGSLDest(double **mat, const int dim, double *eval, double **evec, int order); void +CalcGSLSVD3(double **a, double **u, double *s, double **vt); + +void svdGSLDest(double **A, const int dim, double *singval, double **V); void +svdGSLJacobiDest(double **A, const int dim, double *singval, double **V); + +void CholeskyGSLDest(double **A, const int dim); void -PseudoinvSymGSL(double **inmat, double **outmat, int n, double tol); +PseudoinvSymGSL(const double **inmat, double **outmat, int n, double tol); #endif diff -Nru theseus-2.0.6/libdltmath/eigen.h theseus-3.0.0/libdltmath/eigen.h --- theseus-2.0.6/libdltmath/eigen.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdltmath/eigen.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -26,40 +26,10 @@ #ifndef EIGEN_SEEN #define EIGEN_SEEN -double -*NormalizeVec(double *vect); - -void -EigenSort(double **eigenvectors, double *eigenvalues); - -void -EigenSort3(double **eigenvectors, double *eigenvalues, double *tmpevec); - -void -EigenSort3b(double **eigenvectors, double *eigenvalues); - -void -EigenSort4(double **eigenvectors, double *eigenvalues); - -void -EvalSort4(double *eigenvalues); - -void -CopyEvec(double *evec1, double *evec2, int length); - -void -SwapEvec(double *evec1, double *evec2, int length); - -void -Swap3Evec(double *evec1, double *evec2, double *tmpevec); - void eigen3(double **z, double *eigenval); void -Eigen4Min(double **eigenvectors, double *eigenvalues); - -void eigen4(double **Q, double *eigenval); void @@ -98,31 +68,7 @@ double *evals, double **evecs, const double tol); void -eigensym(const double **mat, double *evals, double **evecs, int n); - -void -eigensym2(const double **mat, double *evals, double **evecs, int n, double *work); - -void -eigenvalsym(const double **mat, double *evals, double **evecs, int n); - -void -eigenvalsym2(const double **mat, double *evals, double **evecs, int n, double *work); - -void -eigengen(const double **mat, double *evals, double **evecs, int n); - -void -transevecs(double **mat, int len); - -void -eigen_quicksort(double *evals, double **evecs, int len); - -void EigenReconSym(double **mat, const double **evecs, const double *evals, const int n); -int -SymbolicEigen4 (double **mat, double *evals); - #endif diff -Nru theseus-2.0.6/libdltmath/integrate.c theseus-3.0.0/libdltmath/integrate.c --- theseus-2.0.6/libdltmath/integrate.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdltmath/integrate.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -111,7 +111,7 @@ // Routines: // // Rombergs_Integration_Method // ////////////////////////////////////////////////////////////////////////////////*/ -#include /* // required for fabs() */ +#include /* // required for fabs() */ static const double richardson[] = { 3.333333333333333333e-01, 6.666666666666666667e-02, 1.587301587301587302e-02, @@ -169,11 +169,11 @@ /* // // */ double romberg_int(double a, double h, double tolerance, - int max_cols, double (*f) (double), int *err) + int max_cols, double (*f) (double), int *err) { double upper_limit = a + h; //upper limit of integration - double dt[MAX_COLUMNS]; + double dt[MAX_COLUMNS]; //dt[i] is the last element in column i. double integral = 0.5 * ((*f) (a) + (*f) (a + h)); double x, old_h, delta = 0.0; @@ -191,32 +191,32 @@ for (k = 1; k < max_cols; k++) { - old_h = h; - - /* // Calculate T(f,h/2,a,b) using T(f,h,a,b) // */ - h *= 0.5; - integral = 0.0; - for (x = a + h; x < upper_limit; x += old_h) - integral += (*f) (x); - - integral = h * integral + 0.5 * dt[0]; - - /* // Calculate the Richardson Extrapolation to the limit // */ - for (j = 0; j < k; j++) - { - delta = integral - dt[j]; - dt[j] = integral; - integral += richardson[j] * delta; - } - - /* // If the magnitude of the change in the extrapolated estimate // */ - /* // for the integral is less than the preassigned tolerance, // */ - /* // return the estimate with err = 0. // */ - if (fabs(delta) < tolerance) - return (integral); + old_h = h; + + /* // Calculate T(f,h/2,a,b) using T(f,h,a,b) // */ + h *= 0.5; + integral = 0.0; + for (x = a + h; x < upper_limit; x += old_h) + integral += (*f) (x); - /* // Store the current esimate in the kth column. // */ - dt[k] = integral; + integral = h * integral + 0.5 * dt[0]; + + /* // Calculate the Richardson Extrapolation to the limit // */ + for (j = 0; j < k; j++) + { + delta = integral - dt[j]; + dt[j] = integral; + integral += richardson[j] * delta; + } + + /* // If the magnitude of the change in the extrapolated estimate // */ + /* // for the integral is less than the preassigned tolerance, // */ + /* // return the estimate with err = 0. // */ + if (fabs(delta) < tolerance) + return (integral); + + /* // Store the current esimate in the kth column. // */ + dt[k] = integral; } /* // The process didn't converge within the preassigned tolerance // */ @@ -235,7 +235,7 @@ double h = b - a; double upper_limit = a + h; //upper limit of integration - double dt[MAX_COLUMNS]; + double dt[MAX_COLUMNS]; //dt[i] is the last element in column i. double integral = 0.5 * ((*f)(a, p1, p2) + (*f)(a+h, p1, p2)); double x, old_h, delta = 0.0; @@ -254,32 +254,32 @@ for (k = 1; k < max_cols; k++) { - old_h = h; - - /* // Calculate T(f,h/2,a,b) using T(f,h,a,b) // */ - h *= 0.5; - integral = 0.0; - for (x = a + h; x < upper_limit; x += old_h) - integral += (*f)(x, p1, p2); - - integral = h * integral + 0.5 * dt[0]; - - /* // Calculate the Richardson Extrapolation to the limit // */ - for (j = 0; j < k; j++) - { - delta = integral - dt[j]; - dt[j] = integral; - integral += richardson[j] * delta; - } - - /* // If the magnitude of the change in the extrapolated estimate // */ - /* // for the integral is less than the preassigned tolerance, // */ - /* // return the estimate with err = 0. // */ - if (fabs(delta) < tolerance) - return (integral); + old_h = h; + + /* // Calculate T(f,h/2,a,b) using T(f,h,a,b) // */ + h *= 0.5; + integral = 0.0; + for (x = a + h; x < upper_limit; x += old_h) + integral += (*f)(x, p1, p2); + + integral = h * integral + 0.5 * dt[0]; + + /* // Calculate the Richardson Extrapolation to the limit // */ + for (j = 0; j < k; j++) + { + delta = integral - dt[j]; + dt[j] = integral; + integral += richardson[j] * delta; + } + + /* // If the magnitude of the change in the extrapolated estimate // */ + /* // for the integral is less than the preassigned tolerance, // */ + /* // return the estimate with err = 0. // */ + if (fabs(delta) < tolerance) + return (integral); - /* // Store the current esimate in the kth column. // */ - dt[k] = integral; + /* // Store the current esimate in the kth column. // */ + dt[k] = integral; } /* // The process didn't converge within the preassigned tolerance // */ diff -Nru theseus-2.0.6/libdltmath/integrate.h theseus-3.0.0/libdltmath/integrate.h --- theseus-2.0.6/libdltmath/integrate.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdltmath/integrate.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdltmath/ludcmp.c theseus-3.0.0/libdltmath/ludcmp.c --- theseus-2.0.6/libdltmath/ludcmp.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdltmath/ludcmp.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdltmath/ludcmp.h theseus-3.0.0/libdltmath/ludcmp.h --- theseus-2.0.6/libdltmath/ludcmp.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdltmath/ludcmp.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdltmath/Makefile theseus-3.0.0/libdltmath/Makefile --- theseus-2.0.6/libdltmath/Makefile 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdltmath/Makefile 2014-05-13 16:48:52.000000000 +0000 @@ -30,9 +30,6 @@ myrandom.o: myrandom.c $(CC) $(OPT) $(CFLAGS) $(INCDIR) -c myrandom.c -quicksort.o: quicksort.c - $(CC) $(OPT) $(CFLAGS) $(INCDIR) -c quicksort.c - statistics.o: statistics.c $(CC) $(OPT) $(CFLAGS) $(INCDIR) -c statistics.c @@ -48,6 +45,9 @@ ncbi_math.o: ncbi_math.c $(CC) $(OPT) $(CFLAGS) $(INCDIR) -c ncbi_math.c +quicksort.o: quicksort.c + $(CC) $(OPT) $(CFLAGS) $(INCDIR) -c quicksort.c + eigen_gsl.o: eigen_gsl.c $(CC) $(OPT) $(CFLAGS) $(INCDIR) -c eigen_gsl.c diff -Nru theseus-2.0.6/libdltmath/Mat3Utils.c theseus-3.0.0/libdltmath/Mat3Utils.c --- theseus-2.0.6/libdltmath/Mat3Utils.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdltmath/Mat3Utils.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -56,7 +56,7 @@ **Mat3Ident(double **matrix) { matrix[0][0] = matrix[1][1] = matrix[2][2] = 1.0; - matrix[0][1] = matrix[0][2] = matrix[1][0] = + matrix[0][1] = matrix[0][2] = matrix[1][0] = matrix[1][2] = matrix[2][0] = matrix[2][1] = 0.0; return(matrix); @@ -83,6 +83,26 @@ } +double +Mat3FrobDiff(const double **mat1, const double **mat2) +{ + int i, j; + double frobnorm, tmp; + + frobnorm = 0.0; + for (i = 0; i < 3; ++i) + { + for (j = 0; j < 3; ++j) + { + tmp = mat2[i][j] - mat1[i][j]; + frobnorm += tmp * tmp; + } + } + + return(sqrt(frobnorm / 3.0)); +} + + /* check for the equivalence of two matrices based on the Frobenius norm criterion (more statistically justified than the above) */ int @@ -101,7 +121,7 @@ } } - if (sqrt(frobnorm / 9.0) < precision) + if (sqrt(frobnorm / 3.0) < precision) return(1); else return(0); @@ -118,15 +138,15 @@ /* A x B = C */ void Mat3MultOp(double **C, const double **A, const double **B) -{ +{ C[0][0] = A[0][0]*B[0][0] + A[0][1]*B[1][0] + A[0][2]*B[2][0]; C[1][0] = A[1][0]*B[0][0] + A[1][1]*B[1][0] + A[1][2]*B[2][0]; - C[2][0] = A[2][0]*B[0][0] + A[2][1]*B[1][0] + A[2][2]*B[2][0]; - + C[2][0] = A[2][0]*B[0][0] + A[2][1]*B[1][0] + A[2][2]*B[2][0]; + C[0][1] = A[0][0]*B[0][1] + A[0][1]*B[1][1] + A[0][2]*B[2][1]; C[1][1] = A[1][0]*B[0][1] + A[1][1]*B[1][1] + A[1][2]*B[2][1]; C[2][1] = A[2][0]*B[0][1] + A[2][1]*B[1][1] + A[2][2]*B[2][1]; - + C[0][2] = A[0][0]*B[0][2] + A[0][1]*B[1][2] + A[0][2]*B[2][2]; C[1][2] = A[1][0]*B[0][2] + A[1][1]*B[1][2] + A[1][2]*B[2][2]; C[2][2] = A[2][0]*B[0][2] + A[2][1]*B[1][2] + A[2][2]*B[2][2]; @@ -141,16 +161,16 @@ C00 = A[0][0]*B[0][0] + A[0][1]*B[1][0] + A[0][2]*B[2][0]; C10 = A[1][0]*B[0][0] + A[1][1]*B[1][0] + A[1][2]*B[2][0]; - C20 = A[2][0]*B[0][0] + A[2][1]*B[1][0] + A[2][2]*B[2][0]; - + C20 = A[2][0]*B[0][0] + A[2][1]*B[1][0] + A[2][2]*B[2][0]; + C01 = A[0][0]*B[0][1] + A[0][1]*B[1][1] + A[0][2]*B[2][1]; C11 = A[1][0]*B[0][1] + A[1][1]*B[1][1] + A[1][2]*B[2][1]; C21 = A[2][0]*B[0][1] + A[2][1]*B[1][1] + A[2][2]*B[2][1]; - + C02 = A[0][0]*B[0][2] + A[0][1]*B[1][2] + A[0][2]*B[2][2]; C12 = A[1][0]*B[0][2] + A[1][1]*B[1][2] + A[1][2]*B[2][2]; C22 = A[2][0]*B[0][2] + A[2][1]*B[1][2] + A[2][2]*B[2][2]; - + A[0][0] = C00; A[1][0] = C10; A[2][0] = C20; @@ -168,15 +188,15 @@ /* C = USV, where S is diagonal */ void Mat3MultUSVOp(double **C, const double **U, double *S, const double **V) -{ +{ C[0][0] = (U[0][0] * S[0]) * V[0][0] + (U[0][1] * S[1]) * V[1][0] + (U[0][2] * S[2]) * V[2][0]; C[1][0] = (U[1][0] * S[0]) * V[0][0] + (U[1][1] * S[1]) * V[1][0] + (U[1][2] * S[2]) * V[2][0]; - C[2][0] = (U[2][0] * S[0]) * V[0][0] + (U[2][1] * S[1]) * V[1][0] + (U[2][2] * S[2]) * V[2][0]; - + C[2][0] = (U[2][0] * S[0]) * V[0][0] + (U[2][1] * S[1]) * V[1][0] + (U[2][2] * S[2]) * V[2][0]; + C[0][1] = (U[0][0] * S[0]) * V[0][1] + (U[0][1] * S[1]) * V[1][1] + (U[0][2] * S[2]) * V[2][1]; C[1][1] = (U[1][0] * S[0]) * V[0][1] + (U[1][1] * S[1]) * V[1][1] + (U[1][2] * S[2]) * V[2][1]; C[2][1] = (U[2][0] * S[0]) * V[0][1] + (U[2][1] * S[1]) * V[1][1] + (U[2][2] * S[2]) * V[2][1]; - + C[0][2] = (U[0][0] * S[0]) * V[0][2] + (U[0][1] * S[1]) * V[1][2] + (U[0][2] * S[2]) * V[2][2]; C[1][2] = (U[1][0] * S[0]) * V[0][2] + (U[1][1] * S[1]) * V[1][2] + (U[1][2] * S[2]) * V[2][2]; C[2][2] = (U[2][0] * S[0]) * V[0][2] + (U[2][1] * S[1]) * V[1][2] + (U[2][2] * S[2]) * V[2][2]; @@ -191,16 +211,16 @@ C00 = B[0][0]*A[0][0] + B[0][1]*A[1][0] + B[0][2]*A[2][0]; C10 = B[1][0]*A[0][0] + B[1][1]*A[1][0] + B[1][2]*A[2][0]; - C20 = B[2][0]*A[0][0] + B[2][1]*A[1][0] + B[2][2]*A[2][0]; - + C20 = B[2][0]*A[0][0] + B[2][1]*A[1][0] + B[2][2]*A[2][0]; + C01 = B[0][0]*A[0][1] + B[0][1]*A[1][1] + B[0][2]*A[2][1]; C11 = B[1][0]*A[0][1] + B[1][1]*A[1][1] + B[1][2]*A[2][1]; C21 = B[2][0]*A[0][1] + B[2][1]*A[1][1] + B[2][2]*A[2][1]; - + C02 = B[0][0]*A[0][2] + B[0][1]*A[1][2] + B[0][2]*A[2][2]; C12 = B[1][0]*A[0][2] + B[1][1]*A[1][2] + B[1][2]*A[2][2]; C22 = B[2][0]*A[0][2] + B[2][1]*A[1][2] + B[2][2]*A[2][2]; - + B[0][0] = C00; B[1][0] = C10; B[2][0] = C20; @@ -218,15 +238,15 @@ /* A x A = C */ void Mat3Sqr(double **C, const double **A) -{ +{ C[0][0] = A[0][0]*A[0][0] + A[0][1]*A[1][0] + A[0][2]*A[2][0]; C[1][0] = A[1][0]*A[0][0] + A[1][1]*A[1][0] + A[1][2]*A[2][0]; - C[2][0] = A[2][0]*A[0][0] + A[2][1]*A[1][0] + A[2][2]*A[2][0]; - + C[2][0] = A[2][0]*A[0][0] + A[2][1]*A[1][0] + A[2][2]*A[2][0]; + C[0][1] = A[0][0]*A[0][1] + A[0][1]*A[1][1] + A[0][2]*A[2][1]; C[1][1] = A[1][0]*A[0][1] + A[1][1]*A[1][1] + A[1][2]*A[2][1]; C[2][1] = A[2][0]*A[0][1] + A[2][1]*A[1][1] + A[2][2]*A[2][1]; - + C[0][2] = A[0][0]*A[0][2] + A[0][1]*A[1][2] + A[0][2]*A[2][2]; C[1][2] = A[1][0]*A[0][2] + A[1][1]*A[1][2] + A[1][2]*A[2][2]; C[2][2] = A[2][0]*A[0][2] + A[2][1]*A[1][2] + A[2][2]*A[2][2]; @@ -236,15 +256,15 @@ /* A x ~A = C */ void Mat3SqrTrans2(double **C, const double **A) -{ +{ C[0][0] = A[0][0]*A[0][0] + A[0][1]*A[0][1] + A[0][2]*A[0][2]; C[1][0] = A[1][0]*A[0][0] + A[1][1]*A[0][1] + A[1][2]*A[0][2]; - C[2][0] = A[2][0]*A[0][0] + A[2][1]*A[0][1] + A[2][2]*A[0][2]; - + C[2][0] = A[2][0]*A[0][0] + A[2][1]*A[0][1] + A[2][2]*A[0][2]; + C[0][1] = A[0][0]*A[1][0] + A[0][1]*A[1][1] + A[0][2]*A[1][2]; C[1][1] = A[1][0]*A[1][0] + A[1][1]*A[1][1] + A[1][2]*A[1][2]; C[2][1] = A[2][0]*A[1][0] + A[2][1]*A[1][1] + A[2][2]*A[1][2]; - + C[0][2] = A[0][0]*A[2][0] + A[0][1]*A[2][1] + A[0][2]*A[2][2]; C[1][2] = A[1][0]*A[2][0] + A[1][1]*A[2][1] + A[1][2]*A[2][2]; C[2][2] = A[2][0]*A[2][0] + A[2][1]*A[2][1] + A[2][2]*A[2][2]; @@ -254,15 +274,15 @@ /* ~A x A = C */ void Mat3SqrTrans1(double **C, const double **A) -{ +{ C[0][0] = A[0][0]*A[0][0] + A[1][0]*A[1][0] + A[2][0]*A[2][0]; C[1][0] = A[0][1]*A[0][0] + A[1][1]*A[1][0] + A[2][1]*A[2][0]; - C[2][0] = A[0][2]*A[0][0] + A[1][2]*A[1][0] + A[2][2]*A[2][0]; - + C[2][0] = A[0][2]*A[0][0] + A[1][2]*A[1][0] + A[2][2]*A[2][0]; + C[0][1] = A[0][0]*A[0][1] + A[1][0]*A[1][1] + A[2][0]*A[2][1]; C[1][1] = A[0][1]*A[0][1] + A[1][1]*A[1][1] + A[2][1]*A[2][1]; C[2][1] = A[0][2]*A[0][1] + A[1][2]*A[1][1] + A[2][2]*A[2][1]; - + C[0][2] = A[0][0]*A[0][2] + A[1][0]*A[1][2] + A[2][0]*A[2][2]; C[1][2] = A[0][1]*A[0][2] + A[1][1]*A[1][2] + A[2][1]*A[2][2]; C[2][2] = A[0][2]*A[0][2] + A[1][2]*A[1][2] + A[2][2]*A[2][2]; @@ -272,15 +292,15 @@ /* ~A x ~A = C */ void Mat3TransSqr(double **C, const double **A) -{ +{ C[0][0] = A[0][0]*A[0][0] + A[0][1]*A[1][0] + A[0][2]*A[2][0]; C[0][1] = A[1][0]*A[0][0] + A[1][1]*A[1][0] + A[1][2]*A[2][0]; - C[0][2] = A[2][0]*A[0][0] + A[2][1]*A[1][0] + A[2][2]*A[2][0]; - + C[0][2] = A[2][0]*A[0][0] + A[2][1]*A[1][0] + A[2][2]*A[2][0]; + C[1][0] = A[0][0]*A[0][1] + A[0][1]*A[1][1] + A[0][2]*A[2][1]; C[1][1] = A[1][0]*A[0][1] + A[1][1]*A[1][1] + A[1][2]*A[2][1]; C[1][2] = A[2][0]*A[0][1] + A[2][1]*A[1][1] + A[2][2]*A[2][1]; - + C[2][0] = A[0][0]*A[0][2] + A[0][1]*A[1][2] + A[0][2]*A[2][2]; C[2][1] = A[1][0]*A[0][2] + A[1][1]*A[1][2] + A[1][2]*A[2][2]; C[2][2] = A[2][0]*A[0][2] + A[2][1]*A[1][2] + A[2][2]*A[2][2]; @@ -290,15 +310,15 @@ /* ~A x B = C */ void Mat3MultTransA(double **C, const double **A, const double **B) -{ +{ C[0][0] = A[0][0]*B[0][0] + A[1][0]*B[1][0] + A[2][0]*B[2][0]; C[1][0] = A[0][1]*B[0][0] + A[1][1]*B[1][0] + A[2][1]*B[2][0]; - C[2][0] = A[0][2]*B[0][0] + A[1][2]*B[1][0] + A[2][2]*B[2][0]; - + C[2][0] = A[0][2]*B[0][0] + A[1][2]*B[1][0] + A[2][2]*B[2][0]; + C[0][1] = A[0][0]*B[0][1] + A[1][0]*B[1][1] + A[2][0]*B[2][1]; C[1][1] = A[0][1]*B[0][1] + A[1][1]*B[1][1] + A[2][1]*B[2][1]; C[2][1] = A[0][2]*B[0][1] + A[1][2]*B[1][1] + A[2][2]*B[2][1]; - + C[0][2] = A[0][0]*B[0][2] + A[1][0]*B[1][2] + A[2][0]*B[2][2]; C[1][2] = A[0][1]*B[0][2] + A[1][1]*B[1][2] + A[2][1]*B[2][2]; C[2][2] = A[0][2]*B[0][2] + A[1][2]*B[1][2] + A[2][2]*B[2][2]; @@ -308,15 +328,15 @@ /* A x ~B = C */ void Mat3MultTransB(double **C, const double **A, const double **B) -{ +{ C[0][0] = A[0][0]*B[0][0] + A[0][1]*B[0][1] + A[0][2]*B[0][2]; C[1][0] = A[1][0]*B[0][0] + A[1][1]*B[0][1] + A[1][2]*B[0][2]; - C[2][0] = A[2][0]*B[0][0] + A[2][1]*B[0][1] + A[2][2]*B[0][2]; - + C[2][0] = A[2][0]*B[0][0] + A[2][1]*B[0][1] + A[2][2]*B[0][2]; + C[0][1] = A[0][0]*B[1][0] + A[0][1]*B[1][1] + A[0][2]*B[1][2]; C[1][1] = A[1][0]*B[1][0] + A[1][1]*B[1][1] + A[1][2]*B[1][2]; C[2][1] = A[2][0]*B[1][0] + A[2][1]*B[1][1] + A[2][2]*B[1][2]; - + C[0][2] = A[0][0]*B[2][0] + A[0][1]*B[2][1] + A[0][2]*B[2][2]; C[1][2] = A[1][0]*B[2][0] + A[1][1]*B[2][1] + A[1][2]*B[2][2]; C[2][2] = A[2][0]*B[2][0] + A[2][1]*B[2][1] + A[2][2]*B[2][2]; @@ -325,15 +345,15 @@ void Mat3Add(double **C, const double **A, const double **B) -{ +{ C[0][0] = A[0][0] + B[0][0]; C[1][0] = A[1][0] + B[1][0]; - C[2][0] = A[2][0] + B[2][0]; - + C[2][0] = A[2][0] + B[2][0]; + C[0][1] = A[0][1] + B[0][1]; C[1][1] = A[1][1] + B[1][1]; C[2][1] = A[2][1] + B[2][1]; - + C[0][2] = A[0][2] + B[0][2]; C[1][2] = A[1][2] + B[1][2]; C[2][2] = A[2][2] + B[2][2]; @@ -342,15 +362,15 @@ void Mat3Sub(double **A, double **B, double **C) -{ +{ C[0][0] = A[0][0] - B[0][0]; C[1][0] = A[1][0] - B[1][0]; - C[2][0] = A[2][0] - B[2][0]; - + C[2][0] = A[2][0] - B[2][0]; + C[0][1] = A[0][1] - B[0][1]; C[1][1] = A[1][1] - B[1][1]; C[2][1] = A[2][1] - B[2][1]; - + C[0][2] = A[0][2] - B[0][2]; C[1][2] = A[1][2] - B[1][2]; C[2][2] = A[2][2] - B[2][2]; @@ -407,7 +427,7 @@ Mat3Det(const double **matrix) { double det; - + det = matrix[0][0] * (matrix[1][1] * matrix[2][2] - matrix[1][2] * matrix[2][1]) - matrix[1][0] * (matrix[0][1] * matrix[2][2] - matrix[0][2] * matrix[2][1]) + matrix[2][0] * (matrix[0][1] * matrix[1][2] - matrix[0][2] * matrix[1][1]); @@ -492,6 +512,7 @@ printf(" ERROR: rotation matrix not normalized\n"); printf(" row %d sum = %f, column %d sum = %f\n", i, sum1, j, sum2); fflush(NULL); + return(0); } } @@ -537,11 +558,12 @@ } MatDestroy(&testmat); + return(1); } -/* returns the closest orthogonal, normalized, rotation matrix to the +/* returns the closest orthogonal, normalized, rotation matrix to the input matrix */ void ClosestRotMatIp(double **inmat) @@ -555,7 +577,7 @@ Mat3TransposeIp(vt); Mat3Cpy(u, (const double **) inmat); - //dgesvd_opt_dest(inmat, 3, 3, u, s, vt); + // dgesvd_opt_dest(inmat, 3, 3, u, s, vt); /* this guarantees that the determinant of the rot mat is positive, as required */ @@ -568,123 +590,124 @@ MatDestroy(&u); MatDestroy(&vt); + free(s); } /* Converts an orthogonal 3x3 rotation matrix to its axis/angle representation. It returns the angle, and the axis unit - vector is supplied in v[] + vector is supplied in v[] Based on: http://www.euclideanspace.com/maths/geometry/rotations/conversions/matrixToAngle/index.htm */ double RotMat2AxisAngle(double **rot, double *v) { - double epsilon = FLT_EPSILON; - double angle, x, y, z, s, tx, ty, tz; - int xZero, yZero, zZero, xyPos, xzPos, yzPos; + double epsilon = FLT_EPSILON; + double angle, x, y, z, s, tx, ty, tz; + int xZero, yZero, zZero, xyPos, xzPos, yzPos; //MatPrint(rot, 3); - if (fabs(rot[0][1] - rot[1][0]) < epsilon && - fabs(rot[0][2] - rot[2][0]) < epsilon && - fabs(rot[1][2] - rot[2][1]) < epsilon) - {// singularity found + if (fabs(rot[0][1] - rot[1][0]) < epsilon && + fabs(rot[0][2] - rot[2][0]) < epsilon && + fabs(rot[1][2] - rot[2][1]) < epsilon) + {// singularity found if (rot[0][0] > 0.0 && rot[1][1] > 0.0 && rot[2][2] > 0.0) - { - // this singularity is identity matrix so angle = 0 - // note epsilon is greater in this case since we only have to distinguish between 0 and 180 degrees - //angle = 0.0; - v[0] = 1.0; // axis is arbitrary - v[1] = 0.0; - v[2] = 0.0; - - return(0.0); - } - else - { - // otherwise this singularity is angle = 180 - angle = MY_PI; - - x = 0.5 * (rot[0][0] + 1.0); - - if (x > 0.0) // can only take square root of positive number, always true for orthogonal matrix - x = sqrt(x); - else - x = 0.0; // in case matrix has become de-orthogonalised - - y = 0.5 * (rot[1][1] + 1.0); - - if (y > 0.0) // can only take square root of positive number, always true for orthogonal matrix - y = sqrt(y); - else - y = 0.0; // in case matrix has become de-orthogonalised - - z = 0.5 * (rot[2][2] + 1.0); - - if (z > 0.0) // can only take square root of positive number, always true for orthogonal matrix - z = sqrt(z); - else - z = 0.0; // in case matrix has become de-orthogonalised - - xZero = (fabs(x) < epsilon); - yZero = (fabs(y) < epsilon); - zZero = (fabs(z) < epsilon); - xyPos = (rot[0][1] > 0.0); - xzPos = (rot[0][2] > 0.0); - yzPos = (rot[1][2] > 0.0); - - if (xZero && !yZero && !zZero) // implements last 6 rows of above table - { - if (!yzPos) - y = -y; - } - else if (yZero && !zZero) - { - if (!xzPos) - z = -z; - } - else if (zZero) - { - if (!xyPos) - x = -x; - } - - v[0] = x; - v[1] = y; - v[2] = z; - - return(angle); - } - } - else - { - tx = rot[2][1] - rot[1][2]; - ty = rot[0][2] - rot[2][0]; - tz = rot[1][0] - rot[0][1]; - s = sqrt(tx*tx + ty*ty + tz*tz); // used to normalise - - if (fabs(s) < epsilon) - s = 1.0; // prevent divide by zero, should not happen if matrix is orthogonal - - angle = acos(0.5 * (rot[0][0] + rot[1][1] + rot[2][2] - 1.0)); - x = (rot[2][1] - rot[1][2]) / s; - y = (rot[0][2] - rot[2][0]) / s; - z = (rot[1][0] - rot[0][1]) / s; - - v[0] = x; - v[1] = y; - v[2] = z; + { + // this singularity is identity matrix so angle = 0 + // note epsilon is greater in this case since we only have to distinguish between 0 and 180 degrees + //angle = 0.0; + v[0] = 1.0; // axis is arbitrary + v[1] = 0.0; + v[2] = 0.0; + + return(0.0); + } + else + { + // otherwise this singularity is angle = 180 + angle = MY_PI; + + x = 0.5 * (rot[0][0] + 1.0); - return(angle); + if (x > 0.0) // can only take square root of positive number, always true for orthogonal matrix + x = sqrt(x); + else + x = 0.0; // in case matrix has become de-orthogonalised + + y = 0.5 * (rot[1][1] + 1.0); + + if (y > 0.0) // can only take square root of positive number, always true for orthogonal matrix + y = sqrt(y); + else + y = 0.0; // in case matrix has become de-orthogonalised + + z = 0.5 * (rot[2][2] + 1.0); + + if (z > 0.0) // can only take square root of positive number, always true for orthogonal matrix + z = sqrt(z); + else + z = 0.0; // in case matrix has become de-orthogonalised + + xZero = (fabs(x) < epsilon); + yZero = (fabs(y) < epsilon); + zZero = (fabs(z) < epsilon); + xyPos = (rot[0][1] > 0.0); + xzPos = (rot[0][2] > 0.0); + yzPos = (rot[1][2] > 0.0); + + if (xZero && !yZero && !zZero) // implements last 6 rows of above table + { + if (!yzPos) + y = -y; + } + else if (yZero && !zZero) + { + if (!xzPos) + z = -z; + } + else if (zZero) + { + if (!xyPos) + x = -x; + } + + v[0] = x; + v[1] = y; + v[2] = z; + + return(angle); + } + } + else + { + tx = rot[2][1] - rot[1][2]; + ty = rot[0][2] - rot[2][0]; + tz = rot[1][0] - rot[0][1]; + s = sqrt(tx*tx + ty*ty + tz*tz); // used to normalise + + if (fabs(s) < epsilon) + s = 1.0; // prevent divide by zero, should not happen if matrix is orthogonal + + angle = acos(0.5 * (rot[0][0] + rot[1][1] + rot[2][2] - 1.0)); + x = (rot[2][1] - rot[1][2]) / s; + y = (rot[0][2] - rot[2][0]) / s; + z = (rot[1][0] - rot[0][1]) / s; + + v[0] = x; + v[1] = y; + v[2] = z; + + return(angle); } } /* Converts an orthogonal 3x3 rotation matrix to its axis/angle representation. It returns the angle, and the axis unit - vector is supplied in v[] + vector is supplied in v[] Based on: http://www.euclideanspace.com/maths/geometry/rotations/conversions/matrixToAngle/index.htm First converts a rot matrix to a quaternion, then to the angle/axis representation. @@ -696,42 +719,42 @@ double trace, angle, s, t, invt, w, x, y, z; /* convert to quaternion */ - trace = rot[0][0] + rot[1][1] + rot[2][2] + 1.0; + trace = rot[0][0] + rot[1][1] + rot[2][2] + 1.0; if( trace > FLT_EPSILON ) { - s = 0.5 / sqrt(trace); - w = 0.25 / s; - x = ( rot[2][1] - rot[1][2] ) * s; - y = ( rot[0][2] - rot[2][0] ) * s; - z = ( rot[1][0] - rot[0][1] ) * s; + s = 0.5 / sqrt(trace); + w = 0.25 / s; + x = ( rot[2][1] - rot[1][2] ) * s; + y = ( rot[0][2] - rot[2][0] ) * s; + z = ( rot[1][0] - rot[0][1] ) * s; } else { - if (rot[0][0] > rot[1][1] && rot[0][0] > rot[2][2]) - { - s = 2.0 * sqrt( 1.0 + rot[0][0] - rot[1][1] - rot[2][2]); - x = 0.25 * s; - y = (rot[0][1] + rot[1][0] ) / s; - z = (rot[0][2] + rot[2][0] ) / s; - w = (rot[1][2] - rot[2][1] ) / s; - } - else if (rot[1][1] > rot[2][2]) - { - s = 2.0 * sqrt(1.0 + rot[1][1] - rot[0][0] - rot[2][2]); - x = (rot[0][1] + rot[1][0] ) / s; - y = 0.25 * s; - z = (rot[1][2] + rot[2][1] ) / s; - w = (rot[0][2] - rot[2][0] ) / s; - } - else - { - s = 2.0 * sqrt(1.0 + rot[2][2] - rot[0][0] - rot[1][1]); - x = (rot[0][2] + rot[2][0] ) / s; - y = (rot[1][2] + rot[2][1] ) / s; - z = 0.25 * s; - w = (rot[0][1] - rot[1][0] ) / s; - } + if (rot[0][0] > rot[1][1] && rot[0][0] > rot[2][2]) + { + s = 2.0 * sqrt( 1.0 + rot[0][0] - rot[1][1] - rot[2][2]); + x = 0.25 * s; + y = (rot[0][1] + rot[1][0] ) / s; + z = (rot[0][2] + rot[2][0] ) / s; + w = (rot[1][2] - rot[2][1] ) / s; + } + else if (rot[1][1] > rot[2][2]) + { + s = 2.0 * sqrt(1.0 + rot[1][1] - rot[0][0] - rot[2][2]); + x = (rot[0][1] + rot[1][0] ) / s; + y = 0.25 * s; + z = (rot[1][2] + rot[2][1] ) / s; + w = (rot[0][2] - rot[2][0] ) / s; + } + else + { + s = 2.0 * sqrt(1.0 + rot[2][2] - rot[0][0] - rot[1][1]); + x = (rot[0][2] + rot[2][0] ) / s; + y = (rot[1][2] + rot[2][1] ) / s; + z = 0.25 * s; + w = (rot[0][1] - rot[1][0] ) / s; + } } /* Now convert that quaternion to angle/axis */ diff -Nru theseus-2.0.6/libdltmath/Mat3Utils.h theseus-3.0.0/libdltmath/Mat3Utils.h --- theseus-2.0.6/libdltmath/Mat3Utils.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdltmath/Mat3Utils.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -35,6 +35,9 @@ int Mat3Eq(const double **matrix1, const double **matrix2, const double precision); +double +Mat3FrobDiff(const double **matrix1, const double **matrix2); + int Mat3FrobEq(const double **matrix1, const double **matrix2, const double precision); @@ -104,4 +107,7 @@ double RotMat2AxisAngle(double **rot, double *v); -#endif /* !MATRIXUTILS_SEEN */ +double +RotMat2AxisAngleQuat(double **rot, double *v); + +#endif diff -Nru theseus-2.0.6/libdltmath/Mat4Utils.c theseus-3.0.0/libdltmath/Mat4Utils.c --- theseus-2.0.6/libdltmath/Mat4Utils.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdltmath/Mat4Utils.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdltmath/Mat4Utils.h theseus-3.0.0/libdltmath/Mat4Utils.h --- theseus-2.0.6/libdltmath/Mat4Utils.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdltmath/Mat4Utils.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdltmath/MatUtils.c theseus-3.0.0/libdltmath/MatUtils.c --- theseus-2.0.6/libdltmath/MatUtils.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdltmath/MatUtils.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -27,7 +27,7 @@ #include #include #include -#include "eigen.h" +#include "eigen_gsl.h" #include "MatUtils.h" @@ -58,7 +58,7 @@ printf("\n"); for (i = 0; i < n; ++i) { - printf(" [\n"); + printf(" ["); for (j = 0; j < m; ++j) printf(" % 14.8f", matrix[i][j]); printf(" ]\n"); @@ -76,11 +76,11 @@ if (matrix != NULL) { - if (matrix[0] != NULL) - { - free(matrix[0]); - matrix[0] = NULL; - } + if (matrix[0] != NULL) + { + free(matrix[0]); + matrix[0] = NULL; + } free(matrix); *matrix_ptr = NULL; @@ -99,7 +99,7 @@ if (matspace == NULL) { perror("\n ERROR"); - printf("\n ERROR: Failure to allocate matrix space in MatAlloc(): (%d x %d)\n", rows, cols); + printf("\n ERROR: Failure to allocate matrix in MatAlloc(): (%d x %d)\n", rows, cols); exit(EXIT_FAILURE); } @@ -108,14 +108,14 @@ if (matrix == NULL) { perror("\n ERROR"); - printf("\n ERROR: Failure to allocate room for row pointers in MatAlloc(): (%d)\n", rows); + printf("\n ERROR: Failure to allocate row pointers in MatAlloc(): (%d)\n", rows); exit(EXIT_FAILURE); } /* now 'point' the pointers */ for (i = 0; i < rows; i++) matrix[i] = matspace + (i * cols); - + return(matrix); } @@ -127,11 +127,11 @@ if (matrix != NULL) { - if (matrix[0] != NULL) - { - free(matrix[0]); - matrix[0] = NULL; - } + if (matrix[0] != NULL) + { + free(matrix[0]); + matrix[0] = NULL; + } free(matrix); *matrix_ptr = NULL; @@ -150,7 +150,7 @@ if (matspace == NULL) { perror("\n ERROR"); - printf("\n ERROR: Failure to allocate room for pointers (%d x %d)\n", rows, cols); + printf("\n ERROR: Failure to allocate matrix (%d x %d)\n", rows, cols); exit(EXIT_FAILURE); } @@ -159,14 +159,14 @@ if (matrix == NULL) { perror("\n ERROR"); - printf("\n ERROR: Failure to allocate room for pointers (%d)\n", rows); + printf("\n ERROR: Failure to allocate pointers (%d)\n", rows); exit(EXIT_FAILURE); } /* now 'point' the pointers */ for (i = 0; i < rows; i++) matrix[i] = matspace + (i * cols); - + return(matrix); } @@ -193,7 +193,7 @@ matrix3d->matrixd == NULL) { perror("\n ERROR"); - puts("\n ERROR: Failure to allocate room for pointers \n\n"); + puts("\n ERROR: Failure to allocate pointers \n\n"); exit(EXIT_FAILURE); } @@ -270,7 +270,7 @@ void -MatCpySymgen(double **matrix2, const double **matrix1, const int rows, const int cols) +MatCpyGen(double **matrix2, const double **matrix1, const int rows, const int cols) { memcpy(&matrix2[0][0], &matrix1[0][0], rows * cols * sizeof(double)); } @@ -343,7 +343,7 @@ } } - MatCpySymgen(A, (const double **) tmpmat, ni, nj); + MatCpyGen(A, (const double **) tmpmat, ni, nj); MatDestroy(&tmpmat); } @@ -384,7 +384,7 @@ } } - MatCpySymgen(A, (const double **) tmpmat, ni, nj); + MatCpyGen(A, (const double **) tmpmat, ni, nj); MatDestroy(&tmpmat); } @@ -451,7 +451,7 @@ } -void +void cholesky(double **mat, const int dim, double *p) { int i, j, k; @@ -500,7 +500,8 @@ int i; double lndet; - eigensym(mat, evals, evecs, dim); + //eigensym(mat, evals, evecs, dim); + EigenGSL((const double **) mat, dim, evals, evecs, 0); lndet = 0.0; for (i = 0; i < dim; ++i) @@ -540,7 +541,7 @@ if (i !=j && mat[i][j] > precision) return(0); - return(1); + return(1); } @@ -557,13 +558,13 @@ { if (i == j) { - tmp = 1.0 - mat[i][i]; - frobnorm += tmp * tmp; + tmp = 1.0 - mat[i][i]; + frobnorm += tmp * tmp; } else { - tmp = mat[i][j]; - frobnorm += tmp * tmp; + tmp = mat[i][j]; + frobnorm += tmp * tmp; } } } @@ -590,13 +591,13 @@ { if (i == j) { - tmp = 1.0 - mat[i][i]; - frobnorm += tmp * tmp; + tmp = 1.0 - mat[i][i]; + frobnorm += tmp * tmp; } else { - tmp = mat[i][j]; - frobnorm += tmp * tmp; + tmp = mat[i][j]; + frobnorm += tmp * tmp; } } } diff -Nru theseus-2.0.6/libdltmath/MatUtils.h theseus-3.0.0/libdltmath/MatUtils.h --- theseus-2.0.6/libdltmath/MatUtils.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdltmath/MatUtils.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -70,7 +70,7 @@ MatCpySym(double **matrix2, const double **matrix1, const int dim); void -MatCpySymgen(double **matrix2, const double **matrix1, const int rows, const int cols); +MatCpyGen(double **matrix2, const double **matrix1, const int rows, const int cols); void MatMultGenUSVOp(double **c, const double **u, double *s, const double **v, Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/libdltmath/._MultivarGamma.c and /tmp/g2bOMTRwaC/theseus-3.0.0/libdltmath/._MultivarGamma.c differ Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/libdltmath/._MultivarGamma.h and /tmp/g2bOMTRwaC/theseus-3.0.0/libdltmath/._MultivarGamma.h differ diff -Nru theseus-2.0.6/libdltmath/MultivarGamma.h theseus-3.0.0/libdltmath/MultivarGamma.h --- theseus-2.0.6/libdltmath/MultivarGamma.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdltmath/MultivarGamma.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdltmath/myrandom.c theseus-3.0.0/libdltmath/myrandom.c --- theseus-2.0.6/libdltmath/myrandom.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdltmath/myrandom.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdltmath/myrandom.h theseus-3.0.0/libdltmath/myrandom.h --- theseus-2.0.6/libdltmath/myrandom.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdltmath/myrandom.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdltmath/ncbi_math.c theseus-3.0.0/libdltmath/ncbi_math.c --- theseus-2.0.6/libdltmath/ncbi_math.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdltmath/ncbi_math.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdltmath/ncbi_math.h theseus-3.0.0/libdltmath/ncbi_math.h --- theseus-2.0.6/libdltmath/ncbi_math.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdltmath/ncbi_math.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdltmath/RegGamma.c theseus-3.0.0/libdltmath/RegGamma.c --- theseus-2.0.6/libdltmath/RegGamma.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdltmath/RegGamma.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdltmath/RegGamma.h theseus-3.0.0/libdltmath/RegGamma.h --- theseus-2.0.6/libdltmath/RegGamma.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdltmath/RegGamma.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdltmath/specfunc.c theseus-3.0.0/libdltmath/specfunc.c --- theseus-2.0.6/libdltmath/specfunc.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdltmath/specfunc.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -449,68 +449,6 @@ #undef EPS -// double -// lgamma(const double xx) -// { -// #ifndef __APPLE__ -// -// int j; -// double x, y, tmp, ser; -// static double cof[6] = -// { -// 76.18009172947146, -86.50532032941677, -// 24.01409824083091, -1.231739572450155, -// 0.1208650973866179e-2, -0.5395239384953e-5 -// }; -// -// y = x = xx; -// tmp = x + 5.5; -// tmp -= (x + 0.5) * log(tmp); -// ser = 1.000000000190015; -// -// for (j = 0; j <= 5; j++) -// ser += (cof[j] / ++y); -// -// return(-tmp + log(2.5066282746310005 * ser/x)); -// -// #else -// -// extern double lgamma_r(double x, int *signgam); /* intrinsic function in math.h in OSX, linux, IRIX */ -// int signgam; -// double val; -// -// val = lgamma_r(xx, &signgam); -// -// return(val); -// -// #endif -// } -// -// -// double -// tgamma(const double xx) -// { -// #ifndef __APPLE__ -// -// return(exp(lgamma(xx))); -// -// #else -// -// extern double lgamma_r(double x, int *signgam); /* intrinsic function in math.h in OSX, linux, IRIX */ -// int *signgam = malloc(sizeof(int)); -// double lg; -// -// lg = lgamma_r(xx, signgam); -// lg = *signgam * exp(lg); -// -// free(signgam); -// -// return(lg); -// -// #endif -// } - - #define MAXIT 200 #define EPS 1e12 diff -Nru theseus-2.0.6/libdltmath/specfunc.h theseus-3.0.0/libdltmath/specfunc.h --- theseus-2.0.6/libdltmath/specfunc.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdltmath/specfunc.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libdltmath/VecUtils.c theseus-3.0.0/libdltmath/VecUtils.c --- theseus-2.0.6/libdltmath/VecUtils.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdltmath/VecUtils.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -44,7 +44,7 @@ void -InvRotVec(double *newvec, double *vec, double **rotmat) +RotVec(double *newvec, double *vec, double **rotmat) { int j, k; @@ -52,13 +52,13 @@ { newvec[j] = 0.0; for (k = 0; k < 3; ++k) - newvec[j] += (vec[k] * rotmat[j][k]); + newvec[j] += (vec[k] * rotmat[k][j]); } } void -RotVec(double *newvec, double *vec, double **rotmat) +InvRotVec(double *newvec, double *vec, double **rotmat) { int j, k; @@ -66,30 +66,30 @@ { newvec[j] = 0.0; for (k = 0; k < 3; ++k) - newvec[j] += (vec[k] * rotmat[k][j]); + newvec[j] += (vec[k] * rotmat[j][k]); } } void -InvRotVecAdd(double *newvec, double *vec, double **rotmat) +RotVecAdd(double *newvec, double *vec, double **rotmat) { int j, k; for (j = 0; j < 3; ++j) for (k = 0; k < 3; ++k) - newvec[j] += (vec[k] * rotmat[j][k]); + newvec[j] += (vec[k] * rotmat[k][j]); } void -RotVecAdd(double *newvec, double *vec, double **rotmat) +InvRotVecAdd(double *newvec, double *vec, double **rotmat) { int j, k; for (j = 0; j < 3; ++j) for (k = 0; k < 3; ++k) - newvec[j] += (vec[k] * rotmat[k][j]); + newvec[j] += (vec[k] * rotmat[j][k]); } @@ -124,12 +124,12 @@ int i; double tmp; - for (i = 0; i < (len - 1) / 2; ++i) - { - tmp = vec[i]; - vec[i] = vec[len-i-1]; - vec[len-i-1] = tmp; - } + for (i = 0; i < (len - 1) / 2; ++i) + { + tmp = vec[i]; + vec[i] = vec[len-i-1]; + vec[len-i-1] = tmp; + } } @@ -139,9 +139,9 @@ int i; double smallest = DBL_MAX; - for (i = 0; i < len; ++i) - if (smallest > vec[i]) - smallest = vec[i]; + for (i = 0; i < len; ++i) + if (smallest > vec[i]) + smallest = vec[i]; return(smallest); } @@ -153,9 +153,9 @@ int i; double biggest = -DBL_MAX; - for (i = 0; i < len; ++i) - if (biggest < vec[i]) - biggest = vec[i]; + for (i = 0; i < len; ++i) + if (biggest < vec[i]) + biggest = vec[i]; return(biggest); } diff -Nru theseus-2.0.6/libdltmath/VecUtils.h theseus-3.0.0/libdltmath/VecUtils.h --- theseus-2.0.6/libdltmath/VecUtils.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdltmath/VecUtils.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -30,17 +30,17 @@ VecPrint(double *vec, const int size); void -InvRotVec(double *newvec, double *vec, double **rotmat); - -void RotVec(double *newvec, double *vec, double **rotmat); void -InvRotVecAdd(double *newvec, double *vec, double **rotmat); +InvRotVec(double *newvec, double *vec, double **rotmat); void RotVecAdd(double *newvec, double *vec, double **rotmat); +void +InvRotVecAdd(double *newvec, double *vec, double **rotmat); + int VecEq(const double *vec1, const double *vec2, const int len, const double tol); Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/libDLTutils/._DLTutils.c and /tmp/g2bOMTRwaC/theseus-3.0.0/libDLTutils/._DLTutils.c differ diff -Nru theseus-2.0.6/libDLTutils/DLTutils.c theseus-3.0.0/libDLTutils/DLTutils.c --- theseus-2.0.6/libDLTutils/DLTutils.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libDLTutils/DLTutils.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -34,40 +34,6 @@ #include #include "DLTutils.h" -/* #undef __APPLE__ */ -#if 0 //defined(__APPLE__) - #if !defined(inline) - #define inline __inline__ - #endif - #include - #include - #include - #include - #include - #include - - static union - { - Nanoseconds ns; - UInt64 i; - } apple_time; - - /* Returns elapsed time since the machine started up. - Original code: Ian Ollmann */ - UInt64 - getTime(void) - { - apple_time.ns = AbsoluteToNanoseconds(UpTime()); - return(apple_time.i); - } - - double - seconds(void) - { - return(1e-9 * (double) getTime()); - } -#endif - void myfloath(void) Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/libDLTutils/._DLTutils.h and /tmp/g2bOMTRwaC/theseus-3.0.0/libDLTutils/._DLTutils.h differ diff -Nru theseus-2.0.6/libDLTutils/DLTutils.h theseus-3.0.0/libDLTutils/DLTutils.h --- theseus-2.0.6/libDLTutils/DLTutils.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libDLTutils/DLTutils.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -28,33 +28,15 @@ #include -#define SCREAMS(string_val) fprintf(stderr, "\n!SCREAMS! %s:%d:%s= %s", __FILE__, __LINE__, #string_val, string_val); fflush(NULL) -#define SCREAMC(char_val) fprintf(stderr, "\n!SCREAMC! %s:%d:%s= %c", __FILE__, __LINE__, #char_val, char_val); fflush(NULL) -#define SCREAMD(integer_val) fprintf(stderr, "\n!SCREAMD! %s:%d:%s= %d", __FILE__, __LINE__, #integer_val, integer_val); fflush(NULL) -#define SCREAMF(double_val) fprintf(stderr, "\n!SCREAMF! %s:%d:%s= %f", __FILE__, __LINE__, #double_val, double_val); fflush(NULL) -#define SCREAME(double_val) fprintf(stderr, "\n!SCREAME! %s:%d:%s= %e", __FILE__, __LINE__, #double_val, double_val); fflush(NULL) -#define SCREAMP(pointer_val) fprintf(stderr, "\n!SCREAMP! %s:%d:%s= %p", __FILE__, __LINE__, #pointer_val, pointer_val); fflush(NULL) +#define SCREAMS(string_val) fprintf(stderr, "\n!SCREAMS! %s:%d:%s= %s", __FILE__, __LINE__, #string_val, string_val); fflush(NULL) +#define SCREAMC(char_val) fprintf(stderr, "\n!SCREAMC! %s:%d:%s= %c", __FILE__, __LINE__, #char_val, char_val); fflush(NULL) +#define SCREAMD(integer_val) fprintf(stderr, "\n!SCREAMD! %s:%d:%s= %d", __FILE__, __LINE__, #integer_val, integer_val); fflush(NULL) +#define SCREAMF(double_val) fprintf(stderr, "\n!SCREAMF! %s:%d:%s= %f", __FILE__, __LINE__, #double_val, double_val); fflush(NULL) +#define SCREAME(double_val) fprintf(stderr, "\n!SCREAME! %s:%d:%s= %e", __FILE__, __LINE__, #double_val, double_val); fflush(NULL) +#define SCREAMP(pointer_val) fprintf(stderr, "\n!SCREAMP! %s:%d:%s= %p", __FILE__, __LINE__, #pointer_val, pointer_val); fflush(NULL) #define BUFFLEN FILENAME_MAX -#if 0 //defined(__APPLE__) - #if !defined(inline) - #define inline __inline__ - #endif - #include - #include - #include - #include - #include - #include - - UInt64 - getTime(void); - - double - seconds(void); -#endif - double *memsetd(double *dest, const double val, size_t len); diff -Nru theseus-2.0.6/libdssplite/dssplite.c theseus-3.0.0/libdssplite/dssplite.c --- theseus-2.0.6/libdssplite/dssplite.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdssplite/dssplite.c 2014-05-13 16:48:52.000000000 +0000 @@ -3,7 +3,7 @@ * * * dssplite v.0.8 * * * -* Copyright (C) 2003-2009 Douglas L. Theobald * +* Copyright (C) 2003-2014 Douglas L. Theobald * * * * dssplite is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published * @@ -167,7 +167,7 @@ int vlen) /* used to be that len = # of CA atoms, not full x vector length */ { DSSP *dssp = NULL; - int Hbonds, len; + int len; char *summary; /* Setup DSSP structure */ @@ -184,7 +184,7 @@ /* do the DSSP algorithm, Doug-style */ GetCONHCA(dssp); - Hbonds = FlagHBonds(dssp); + FlagHBonds(dssp); FlagBends(dssp); FlagTurns(dssp); diff -Nru theseus-2.0.6/libdssplite/dssplite.h theseus-3.0.0/libdssplite/dssplite.h --- theseus-2.0.6/libdssplite/dssplite.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libdssplite/dssplite.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2007 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/libmsa/DLTutils.h theseus-3.0.0/libmsa/DLTutils.h --- theseus-2.0.6/libmsa/DLTutils.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libmsa/DLTutils.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2009 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -37,24 +37,6 @@ #define BUFFLEN FILENAME_MAX -#if defined(__APPLE__) - #if !defined(inline) - #define inline __inline__ - #endif - #include - #include - #include - #include - #include - #include - - UInt64 - getTime(void); - - double - seconds(void); -#endif - double *memsetd(double *dest, const double val, size_t len); Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/libmsa/._msa.c and /tmp/g2bOMTRwaC/theseus-3.0.0/libmsa/._msa.c differ diff -Nru theseus-2.0.6/libmsa/msa.c theseus-3.0.0/libmsa/msa.c --- theseus-2.0.6/libmsa/msa.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libmsa/msa.c 2014-05-13 16:48:52.000000000 +0000 @@ -15,7 +15,7 @@ * dtheobald@gmail.com * dtheobald@brandeis.edu * - * Copyright: Copyright (c) 2004-2010 Douglas L. Theobald + * Copyright: Copyright (c) 2004-2014 Douglas L. Theobald * * msa.c is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published @@ -730,9 +730,9 @@ } } - for (i = endcol; i < msa->seqlen; ++i, ++m) - for (k = 0; k < msa->seqnum; ++k) - tmpmsa->seq[k][m] = msa->seq[k][i]; + for (i = endcol; i < msa->seqlen; ++i, ++m) + for (k = 0; k < msa->seqnum; ++k) + tmpmsa->seq[k][m] = msa->seq[k][i]; //printf("\n### endcol:%d m:%d", endcol, m); @@ -1330,37 +1330,37 @@ /* next count number of sequences in first block in file */ fseek(msafile, seq_start, SEEK_SET); /* move to beginning of name0, block0 */ if (fgets(line, 512, msafile) == NULL) - { + { fprintf(stderr, "\n ERROR010: CLUSTAL file \"%s\" appears to have no data \n\n", msafile_name); exit(EXIT_FAILURE); - } + } /* get the name of sequence #0 */ if (sscanf(line, "%30s", name) == EOF) - { + { fprintf(stderr, "\n ERROR011: CLUSTAL file \"%s\" appears to have no data \n\n", msafile_name); exit(EXIT_FAILURE); - } + } msa->allocnum = 1; while(!feof(msafile)) { - if (fgets(line, 512, msafile) == NULL) - break; + if (fgets(line, 512, msafile) == NULL) + break; - if (isspace(line[0]) || sscanf(line, "%30s", newname) == EOF) - break; - else - { - if (strncmp(name, newname, 64) == 0) - break; - else - ++msa->allocnum; - } + if (isspace(line[0]) || sscanf(line, "%30s", newname) == EOF) + break; + else + { + if (strncmp(name, newname, 64) == 0) + break; + else + ++msa->allocnum; + } } //printf("\nmsa->allocnum = %d", msa->allocnum); fflush(NULL); /* get the (maximum) length of the alignment (per sequence) */ @@ -2124,22 +2124,22 @@ MSAalloc(msa, msa->allocnum, msa->seqlen, 128); seqpos = k = 0; - for(j = 0; j < msa->allocnum; ++j) - { - fgets(line, bufflen, msafile); /* read line */ - sscanf(line, "%s", msa->name[j]); /* read name */ - //printf("\n%s", msa->name[j]); - - k = 0; - for (i = strlen(msa->name[j]); i < strlen(line); ++i) - { - if (isalpha(line[i]) || line[i] == '-' || line[i] == '?' || line[i] == '.') - { - msa->seq[j][k] = line[i]; - ++k; - } - } - } + for(j = 0; j < msa->allocnum; ++j) + { + fgets(line, bufflen, msafile); /* read line */ + sscanf(line, "%s", msa->name[j]); /* read name */ + //printf("\n%s", msa->name[j]); + + k = 0; + for (i = strlen(msa->name[j]); i < strlen(line); ++i) + { + if (isalpha(line[i]) || line[i] == '-' || line[i] == '?' || line[i] == '.') + { + msa->seq[j][k] = line[i]; + ++k; + } + } + } /* now read sequences from file into MSA object */ while(!feof(msafile)) Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/libmsa/._msa.h and /tmp/g2bOMTRwaC/theseus-3.0.0/libmsa/._msa.h differ diff -Nru theseus-2.0.6/libmsa/msa.h theseus-3.0.0/libmsa/msa.h --- theseus-2.0.6/libmsa/msa.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/libmsa/msa.h 2014-05-13 16:48:52.000000000 +0000 @@ -4,7 +4,7 @@ * * * msa.c * * * -* Copyright (C) 2003 Douglas L. Theobald * +* Copyright (C) 2003-2014 Douglas L. Theobald * * * * msa.c is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published * diff -Nru theseus-2.0.6/lodmats.c theseus-3.0.0/lodmats.c --- theseus-2.0.6/lodmats.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/lodmats.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/lodmats.h theseus-3.0.0/lodmats.h --- theseus-2.0.6/lodmats.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/lodmats.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/Makefile theseus-3.0.0/Makefile --- theseus-2.0.6/Makefile 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/Makefile 2014-05-13 16:48:52.000000000 +0000 @@ -7,17 +7,16 @@ lodmats.o MultiPose.o MultiPose2MSA.o MultiPoseMix.o \ PCAstats.o pdbIO.o pdbMalloc.o pdbStats.o pdbUtils.o QuarticHornFrag.o \ RandCds.o pdbSSM.o \ -ProcGSLSVD.o ProcGSLSVDOcc.o ProcJacobiSVD.o \ +ProcGSLSVD.o ProcGSLSVDNu.o ProcJacobiSVD.o \ GibbsMet.o \ qcprot.o \ termcol.o theseuslib.o -all: libs progs +all: libs ltheseus progs -libs: ldistfit lDLTutils lmsa ldltmath ldssplite ltheseus - -progs: theseus +libs: ldistfit lDLTutils lmsa ldltmath ldssplite +progs: theseus libs ldistfit: ( cd libdistfit; $(MAKE) && cp libdistfit.a ../lib ) @@ -123,8 +122,8 @@ ProcGSLSVD.o: ProcGSLSVD.c $(CC) $(OPT) $(CFLAGS) $(INCDIR) -c ProcGSLSVD.c -ProcGSLSVDOcc.o: ProcGSLSVDOcc.c - $(CC) $(OPT) $(CFLAGS) $(INCDIR) -c ProcGSLSVDOcc.c +ProcGSLSVDNu.o: ProcGSLSVDNu.c + $(CC) $(OPT) $(CFLAGS) $(INCDIR) -c ProcGSLSVDNu.c termcol.o: termcol.c $(CC) $(OPT) $(CFLAGS) $(INCDIR) -c termcol.c @@ -152,5 +151,5 @@ clean: find . -name '*.[oa]' -exec rm -f {} \; - rm -f theseus &> /dev/null + rm -f theseus distfit diff -Nru theseus-2.0.6/make.inc theseus-3.0.0/make.inc --- theseus-2.0.6/make.inc 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/make.inc 2014-05-13 16:48:52.000000000 +0000 @@ -3,42 +3,31 @@ # If you system has no ranlib, set RANLIB = echo. # -#ARCH = libtool -#ARCHFLAGS = -o -ARCH = ar -ARCHFLAGS = -rvs RANLIB = ranlib +# for universal OSX binary +ARCH = libtool +ARCHFLAGS = -static -o + +# for normal arch-native tuned OSX binary +# ARCH = ar +# ARCHFLAGS = -rvs + #MACOSX_DEPLOYMENT_TARGET=10.4 #export MACOSX_DEPLOYMENT_TARGET -# LOCALLIBDIR = /usr/local/lib -# LIBS = -ldistfit -lmsa -ldssplite -ldltmath -lDLTutils -ltheseus -# SYSLIBS = -lpthread -lgsl -lgslcblas -lm -lc -# LIBDIR = -L./lib -# INSTALLDIR = /usr/local/bin -# #OPT = -O0 -ggdb -# OPT = -O3 -ffast-math -fstrict-aliasing -funroll-loops -fomit-frame-pointer -# WARN = -Werror -Wno-error=unused-result -Wall -pedantic -std=c99 -# CFLAGS += $(WARN) #-force_cpusubtype_ALL -mmacosx-version-min=10.4 -arch x86_64 -arch i386 #-DNDEBUG -# # CPPFLAGS are ignored by the rest of the build system but necessary for hardening flags. -# CFLAGS += $(CPPFLAGS) -# # Filter out -O2 which overwrites the default -O3 because OPT is used before CFLAGS -# CFLAGS := $(filter-out -O2,$(CFLAGS)) -# CC = gcc - - -#Mac OSX +# #Mac OSX +# # # LOCALLIBDIR = /usr/local/lib # LIBS = -lgsl -lgslcblas -ldistfit -lmsa -ldssplite -ldltmath -lDLTutils -ltheseus -# SYSLIBS = -lpthread -lgsl -lgslcblas -lm -lc +# SYSLIBS = -lpthread -lm -lc # LIBDIR = -L./lib # INSTALLDIR = /usr/local/bin # #OPT = -O0 -ggdb -# OPT = -O3 -ffast-math -fstrict-aliasing -funroll-loops -fomit-frame-pointer -# WARN = -Wall -pedantic -std=c99 # for APPLE MACOSX -# CFLAGS += $(WARN) +# OPT = -O3 -ffast-math -mtune=native# -fstrict-aliasing -funroll-loops -fomit-frame-pointer +# WARN = -Wall -Werror -pedantic -std=c99# -Wno-error=unused-result +# CFLAGS += $(WARN) # # CPPFLAGS are ignored by the rest of the build system but necessary for hardening flags # CFLAGS += $(CPPFLAGS) # # Filter out -O2 which overwrites the default -O3 because OPT is used before CFLAGS @@ -50,22 +39,19 @@ # LOCALLIBDIR = /usr/local/lib LIBS = -lgsl -lgslcblas -ldistfit -lmsa -ldssplite -ldltmath -lDLTutils -ltheseus -#SYSLIBS = -framework CoreServices SYSLIBS = -lpthread -lgsl -lgslcblas -lm -lc LIBDIR = -L./lib -#LAPACKLIB = -framework vecLib INSTALLDIR = /usr/local/bin #OPT = -O0 -ggdb -OPT = -O3 -ffast-math -fstrict-aliasing -funroll-loops -fomit-frame-pointer +OPT = -O3 -ffast-math #-fstrict-aliasing -funroll-loops -fomit-frame-pointer #WARN = -Werror -Wno-error=unused-result -Wall -pedantic -std=c99 WARN = -Werror -Wall -pedantic -std=c99 # for APPLE MACOSX CFLAGS += $(WARN) -force_cpusubtype_ALL -mmacosx-version-min=10.4 -arch x86_64 -arch i386 #-DNDEBUG -#CFLAGS += $(WARN) # CPPFLAGS are ignored by the rest of the build system but necessary for hardening flags CFLAGS += $(CPPFLAGS) # Filter out -O2 which overwrites the default -O3 because OPT is used before CFLAGS CFLAGS := $(filter-out -O2,$(CFLAGS)) -CC = /usr/bin/gcc +CC = gcc # Ubuntu LINUX Dual processor Core2 x86_32, gulon (2010-09-07) @@ -80,6 +66,7 @@ # INCDIR = -I/usr/local/include/ # INSTALLDIR = /usr/local/bin # OPT = -O3 -ffast-math -fstrict-aliasing -funroll-loops -fomit-frame-pointer +# # OPT = -m64 -O3 -march=native -funroll-loops -ffast-math -mfpmath=sse # #OPT = -O0 -ggdb # WARN = -Wall -pedantic -std=c99 -Wno-unused-result # CFLAGS = $(WARN) -pthread @@ -114,14 +101,3 @@ # CFLAGS = $(WARN) -DNDEBUG # CC = /usr/local/cross-tools/i386-mingw32msvc/bin/cc -# IRIX new 32-bit binary -- this probably needs some work to get it to compile -# SYSLIBS = -lm -# LIBS = -ldistfit -lmsa -ldssplite -ldltmath -lDLTutils -# LIBDIR = -L/usr/lib32/ -L./lib -# INSTALLDIR = /home/wuttke/bin/ -# OPT = -Ofast -n32 -mips4 -# WARN = #-ansi #-fullwarn -woff 1209 -# CFLAGS = $(WARN) -# CC = cc - - diff -Nru theseus-2.0.6/monte_carlo_marginal.c theseus-3.0.0/monte_carlo_marginal.c --- theseus-2.0.6/monte_carlo_marginal.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/monte_carlo_marginal.c 2014-05-13 16:48:52.000000000 +0000 @@ -37,7 +37,17 @@ * 2011_04_15_nnn Started source * *****************************************************************************/ -// gcc -O3 -ffast-math -Wall -std=c99 -pedantic -mtune=native -o monte_carlo_marginal -lgsl -lgslcblas -lpthread -lm monte_carlo_marginal.c; sudo cp monte_carlo_marginal /usr/local/bin/ +/* +gcc -O3 -ffast-math -Wall -std=c99 -pedantic -mtune=native -o monte_carlo_marginal \ +-lgsl -lgslcblas -lpthread -lm monte_carlo_marginal.c; \ +sudo cp monte_carlo_marginal /usr/local/bin/ +*/ + +// ./monte_carlo_marginal -d20 -n 10000 -i 1000000 -l 1e-10 -ep -s10 +// ./monte_carlo_marginal -d2 -n 10000 -i 1000000 -l 1e-10 -gp +// ./monte_carlo_marginal -d10 -n 100 -i 100000 -l 10 -egp +// ./monte_carlo_marginal -d1 -n 1000000 -i 100000 -l 1 -e -s1 + #include #include @@ -61,9 +71,14 @@ #include #include #include +#include +#include -#define VERSION "0.6" -#define LN2PI (M_LN2 + M_LNPI) +#define VERSION "0.7" +#define LN2PI ( M_LN2 + M_LNPI ) +#define DIGAMMA2 ( gsl_sf_psi_int(2) ) +#define DIGAMMA3 ( gsl_sf_psi_int(3) ) +#define DIGAMMA4 ( gsl_sf_psi_int(4) ) double burnin = 0.0; @@ -76,21 +91,23 @@ int thrdnum = 1; int parallel = 0; int entropy_calc = 0; +int klentk = 1; /* K-L entropy calc k value (kth smallest dist) */ +int randmeth = 1; -int dim = 1; /* number of params */ +int dim = 1; /* number of params */ int hdim = 1; /* # hierarchical params */ int ndata = 100; int nd; /* data globals */ double **data = NULL; /* data */ -double *y = NULL; /* data averages */ +double *y = NULL; /* data averages */ double *x2 = NULL; double yt, yt2, x2t; /* parameter globals */ double *pave = NULL; /* parameter averages */ -double *h = NULL; /* hyperparameter */ +double *h = NULL; /* hyperparameter */ double *lnpost = NULL; double *lnlike = NULL; double *lnprior = NULL; @@ -138,6 +155,7 @@ double maxlike; double lndet; double lnfish; + double kl; double pi_avelnlike; double pi_avelnpost; @@ -203,6 +221,7 @@ mcdat->maxlike = INFINITY; mcdat->lndet = INFINITY; mcdat->lnfish = INFINITY; + mcdat->kl = INFINITY; mcdat->pi_avelnlike = INFINITY; mcdat->pi_avelnpost = INFINITY; @@ -223,6 +242,35 @@ Usage(void); +double +average(const double *data, const int dim) +{ + double m = 0.0; + int i = dim; + + while(i-- > 0) + m += *data++; + + return(m / (double) dim); +} + + +double +variance(const double *data, const int dim, const double mean) +{ + double v = 0.0, tmpv; + int i = dim; + + while(i-- > 0) + { + tmpv = *data++ - mean; + v += (tmpv * tmpv); + } + + return(v / dim); +} + + void VecPrint(double *vec, const int size) { @@ -379,51 +427,6 @@ } -static void -RandFillVec(double *vec, int len, int randmeth, const gsl_rng *r2) -{ - int j; - - for (j = 0; j < len; ++j) - { - switch (randmeth) - { - case 1: - case 'n': /* normal */ - //vec[j] = normal_dev(0.0, 1.0); - vec[j] = gsl_ran_gaussian(r2, 1.0); - /* printf("\n%f", vec[j]); */ - break; - case 2: - case 'l': /* logistic */ - vec[j] = gsl_ran_logistic(r2, 1.0); - break; - case 3: - case 'L': /* Laplacian */ - vec[j] = gsl_ran_laplace(r2, 1.0); - break; - case 4: - case 'C': /* Cauchy */ - vec[j] = gsl_ran_cauchy(r2, 1.0); - break; - case 5: - case 'g': /* gamma */ - vec[j] = gsl_ran_gamma(r2, 1.0, 1.0); - break; - case 12: - case 'e': /* exponential */ - vec[j] = gsl_ran_exponential(r2, 1.0); - break; - default: - printf("\n ERROR888: Bad random param -R '%c' \n", - (char) randmeth); - Usage(); - exit(EXIT_FAILURE); - } - } -} - - double RandScale(double variance, int randmeth, double b) { @@ -463,8 +466,103 @@ } +static void +RandFillVec(double *vec, int len, int randmeth, const gsl_rng *r2) +{ + int j; + double a = 1; + double b = 4; + double u; + double alpha; + double beta; + double mu = 0.0; + + + switch (randmeth) + { + case 1: + case 'n': /* normal */ + for (j = 0; j < len; ++j) + vec[j] = gsl_ran_gaussian(r2, 1.0); + break; + + case 2: + case 'l': /* logistic */ + for (j = 0; j < len; ++j) + vec[j] = gsl_ran_logistic(r2, 1.0); + break; + + case 3: + case 'L': /* Laplacian */ + for (j = 0; j < len; ++j) + vec[j] = gsl_ran_laplace(r2, 1.0); + break; + + case 4: + case 'C': /* Cauchy */ + for (j = 0; j < len; ++j) + vec[j] = gsl_ran_cauchy(r2, 1.0); + break; + + case 5: + case 'g': /* gamma */ + alpha = 3.0; + beta = 1.0; + + printf("\nentropy:%g %g %g\n", alpha, beta, alpha - log(beta) + lgamma(alpha) + (1-alpha)*gsl_sf_psi(alpha)); + + for (j = 0; j < len; ++j) + vec[j] = gsl_ran_gamma(r2, alpha, beta); + break; + + case 12: + case 'e': /* exponential */ + for (j = 0; j < len; ++j) + vec[j] = gsl_ran_exponential(r2, 1.0); + break; + + case 13: + case 'b': /* beta */ + for (j = 0; j < len; ++j) + vec[j] = gsl_ran_beta(r2, a,b); + + printf("\nentropy:%g %g %g\n", a, b, gsl_sf_lnbeta(a,b) + -(a-1.0)*gsl_sf_psi(a) + -(b-1.0)*gsl_sf_psi(b) + +(a+b-2.0)*gsl_sf_psi(a+b)); + break; + + case 14: + case 'E': /* extreme value */ + + beta = 0.1; + + for (j = 0; j < len; ++j) + { + u = gsl_rng_uniform(r2); + vec[j] = mu - beta * log(-log(u)); + } + + printf("\nentropy:%g %g %g\n", mu, beta, log(beta)+M_EULER+1.0); + break; + + default: + printf("\n ERROR888: Bad random param -r '%c' \n", + (char) randmeth); + Usage(); + exit(EXIT_FAILURE); + } +} + + +/* +The analyitical entropy of the sampled distribution is easy to calculate: +The total entropy is the sum of the raw uncorrelated entropy and +1/2 log(det(C)), where C is the covariance matrix used to correlate and +scale the raw variates. +*/ void -RandVec(double **vec, const int len, const int iters, const gsl_rng *r2) +RandVec(double **vec, const int len, const int iters, int randmeth, const gsl_rng *r2) { int i, j, k; double **covmat = MatAlloc(len, len); @@ -475,82 +573,91 @@ double **tmpvec = MatAlloc(len, iters); double lndet; - for (i = 0; i < len; ++i) - for (j = 0; j < i; ++j) - tmpmat[i][j] = gsl_ran_flat(r2, -1.0, 1.0); for (i = 0; i < len; ++i) - tmpmat[i][i] = gsl_ran_flat(r2, 0.0, 1.0); + RandFillVec(tmpvec[i], iters, randmeth, r2); - MatPrintLowerDiag(tmpmat, len); +// for (i = 0; i < iters; ++i) +// for (j = 0; j < len; ++j) +// vec[j][i] = tmpvec[j][i]; - for (i = 0; i < len; ++i) - for (j = 0; j < len; ++j) - for (k = 0; k < len; ++k) - cormat[i][k] += tmpmat[i][j] * tmpmat[k][j]; + if (1) + { + for (i = 0; i < len; ++i) + for (j = 0; j < i; ++j) + tmpmat[i][j] = gsl_ran_flat(r2, -1.0, 1.0); - printf("\n\"correlation matrix\":"); - MatPrintLowerDiag(cormat, len); + for (i = 0; i < len; ++i) + tmpmat[i][i] = gsl_ran_flat(r2, 0.0, 1.0); -// PrintCovMatGnuPlot((const double **) covmat, len, mystrcat(cdsA->algo->rootname, "_cor.mat")); + MatPrintLowerDiag(tmpmat, len); - for (i = 0; i < len; ++i) - diag[i] = gsl_ran_gamma(r2, 2.0, 10.0); + for (i = 0; i < len; ++i) + for (j = 0; j < len; ++j) + for (k = 0; k < len; ++k) + cormat[i][k] += tmpmat[i][j] * tmpmat[k][j]; - for (i = 0; i < len; ++i) - for (j = 0; j < len; ++j) - covmat[i][j] = cormat[i][j] * sqrt(diag[i] * diag[j]); + printf("\n\"correlation matrix\":"); + MatPrintLowerDiag(cormat, len); - for (i = 0; i < len; ++i) - covmat[i][i] += 1.0; + // PrintCovMatGnuPlot((const double **) covmat, len, mystrcat(cdsA->algo->rootname, "_cor.mat")); - printf("\ncovariance matrix:"); - MatPrintLowerDiag(covmat, len); + for (i = 0; i < len; ++i) + diag[i] = gsl_ran_gamma(r2, 2.0, 10.0); - for (i = 0; i < len; ++i) - diag[i] = covmat[i][i]; + for (i = 0; i < len; ++i) + for (j = 0; j < len; ++j) + covmat[i][j] = cormat[i][j] * sqrt(diag[i] * diag[j]); - printf("\nvariances:\n"); + for (i = 0; i < len; ++i) + covmat[i][i] += 1.0; - for (i = 0; i < len; ++i) - printf("%-3d %f\n", i, diag[i]); + printf("\ncovariance matrix:"); + MatPrintLowerDiag(covmat, len); - for (i = 0; i < len; ++i) - for (j = 0; j < len; ++j) - cormat[i][j] = covmat[i][j] / sqrt(diag[i] * diag[j]); + for (i = 0; i < len; ++i) + diag[i] = covmat[i][i]; - printf("\ntrue correlation matrix:"); - MatPrintLowerDiag(cormat, len); + printf("\nvariances:\n"); - EigenvalsGSL(covmat, len, eval); + for (i = 0; i < len; ++i) + printf("%-3d %f\n", i, diag[i]); - printf("\neigenvalues:\n"); + for (i = 0; i < len; ++i) + for (j = 0; j < len; ++j) + cormat[i][j] = covmat[i][j] / sqrt(diag[i] * diag[j]); - for (i = 0; i < len; ++i) - printf("%-3d %f\n", i, eval[i]); + printf("\ntrue correlation matrix:"); + MatPrintLowerDiag(cormat, len); - lndet = 0.0; - for(i = 0; i < len; ++i) - lndet += log(eval[i]); + EigenvalsGSL(covmat, len, eval); - printf("logdet: %f\n", lndet); + printf("\neigenvalues:\n"); - double entropy = 0.5 * len * log(2.0 * M_PI * M_E) + 0.5 * lndet; - printf("\nentropy: %14.3f", entropy); + for (i = 0; i < len; ++i) + printf("%-3d %f\n", i, eval[i]); - CholeskyGSLDest(covmat, len); - printf("\nCholesky lower diagonal matrix:"); - MatPrintLowerDiag(covmat, len); + lndet = 0.0; + for(i = 0; i < len; ++i) + lndet += log(eval[i]); - fflush(NULL); + printf("logdet: %f\n", lndet); + printf("half logdet: %f\n", 0.5*lndet); - for (i = 0; i < len; ++i) - RandFillVec(tmpvec[i], iters, 1, r2); + double entropy = 0.5 * len * log(2.0 * M_PI * M_E) + 0.5 * lndet; + printf("\nentropy: %14.3f", entropy); - for (i = 0; i < iters; ++i) - for (j = 0; j < len; ++j) - for (k = 0; k <= j; ++k) /* because covmat is lower diagonal, uppper should be all zeros */ - vec[j][i] += covmat[j][k] * tmpvec[k][i]; + CholeskyGSLDest(covmat, len); + printf("\nCholesky lower diagonal matrix:"); + MatPrintLowerDiag(covmat, len); + + fflush(NULL); + + for (i = 0; i < iters; ++i) + for (j = 0; j < len; ++j) + for (k = 0; k <= j; ++k) /* because covmat is lower diagonal, upper should be all zeros */ + vec[j][i] += covmat[j][k] * tmpvec[k][i]; + } // for (i = 0; i < iters; ++i) // { @@ -570,8 +677,128 @@ } +/* Euclidean norm, L_2 */ +inline double +Norm2(double **x, const int i, const int j, double dim) +{ + int k; + double dist, tmpx; + + dist = 0.0; + for (k = 0; k < dim; ++k) + { + tmpx = x[k][i] - x[k][j]; + dist += tmpx * tmpx; + } + + return(dist); +} + + +/* Max norm, L_inf */ +inline double +NormMax(double **x, const int i, const int j, double dim) +{ + int k; + double dist, max; + + max = DBL_MIN; + for (k = 0; k < dim; ++k) + { + dist = fabs(x[k][i] - x[k][j]); + if (max < dist) + max = dist; + } + + return(max); +} + + +/* Manhattan norm, L_1 */ +inline double +NormManhattan(double **x, const int i, const int j, double dim) +{ + int k; + double dist; + + dist = 0.0; + for (k = 0; k < dim; ++k) + dist += fabs(x[k][i] - x[k][j]); + + return(dist); +} + + +/* Naive brute force search */ +double +*FindLogDists(double **x, const int dim, const int n, const int k) +{ + double *dists = NULL; + double *tmpdist = NULL; + size_t *smalls = NULL; + int i, j; + + dists = malloc(n * sizeof(double)); + tmpdist = malloc(n * sizeof(double)); + smalls = malloc(k * sizeof(size_t)); + + for (i = 0; i < n; ++i) + { + for (j = 0; j < n; ++j) + { + //tmpdist[j] = Norm2(x, i, j, dim); // Euclidian norm L_2 + //tmpdist[j] = NormMax(x, i, j, dim); // Max norm + tmpdist[j] = NormManhattan(x, i, j, dim); // Manhattan norm L_1 + } + + tmpdist[i] = DBL_MAX; // distance to itself = 0, so kill this one + + //gsl_sort_smallest(smalls, k, tmpdist, 1, n); + gsl_sort_smallest_index(smalls, k, tmpdist, 1, n); + + //dists[i] = 0.5*log(smalls[k-1]); // Euclidian norm L_2 + dists[i] = log(tmpdist[smalls[k-1]]); // Max norm or Manhattan + } + + free(smalls); + free(tmpdist); + + return(dists); +} + + +/* Kozachenko-Leonenko non-parametric entropy estimate */ +/* We can use any norm -- Euclidean and max-norm are popular choices, + but I like Manhattan because its easy and fast */ +double +CalcKLentropy(double **x, const int dim, const int n, const int k) +{ + double *dists = NULL; + double entropy, volume, term1; + + dists = FindLogDists(x, dim, n, k); + + /* http://en.wikipedia.org/wiki/Volume_of_an_n-ball#Balls_in_Lp_norms */ + //volume = 0.5*dim*M_LNPI - lgamma(0.5*dim + 1.0); // Euclidian norm L_2 + //volume = dim * log(2); // Max norm L_inf + volume = dim * log(2) - lgamma(dim + 1.0); // Manhattan norm L_1 + + term1 = dim * average(dists, n); + + entropy = term1 + volume + gsl_sf_psi_int(n) - gsl_sf_psi_int(k); + + printf("\nvolume: %g\n", volume); + printf("term1: %g\n", term1); + printf("lnN+g: %g\n", log(n) + M_EULER); + printf("lnN+g: %g\n", gsl_sf_psi_int(n) + M_EULER); + printf("lnN+g: %g\n", gsl_sf_psi_int(n) + DIGAMMA2); // for k=2 + + return(entropy); +} + + /* -Calculate harmonic mean estimator, which should never be used, but we determine it for fun +Calculate harmonic mean estimator, which should not be used, but we determine it for fun and to see how bad it actually is. As boni, we get the log arithmetic mean likelihood and log geometric mean likelihood. */ @@ -630,35 +857,6 @@ } -double -average(const double *data, const int dim) -{ - double m = 0.0; - int i = dim; - - while(i-- > 0) - m += *data++; - - return(m / (double) dim); -} - - -double -variance(const double *data, const int dim, const double mean) -{ - double v = 0.0, tmpv; - int i = dim; - - while(i-- > 0) - { - tmpv = *data++ - mean; - v += (tmpv * tmpv); - } - - return(v / dim); -} - - /* Calculate the bias in the entropy estimate due to deviation from Normality. Based on on Edgeworth expansion of a PDF in terms of its cumulants (moments). @@ -1311,14 +1509,14 @@ double CalcLaplaceMetUni(MonteCarloDat *mcdat) { - int d, i; + int i; int maxind; double lndet, lapmet, lnh, lnfish; double maxpost, maxprior, maxlike; double ave, var; - d = mcdat->dim; + //d = mcdat->dim; iters = mcdat->iters; printf("Calculating Laplace approximation ...\n"); @@ -1458,7 +1656,7 @@ static void *sim_gauss_pth(void *simdata_ptr) { - SimData *simdata = (SimData *) simdata_ptr; + SimData *simdata = (SimData *) simdata_ptr; int i; const int idim = (const int) simdata->idim; double **x = simdata->x; @@ -1471,7 +1669,7 @@ /* Every thread gets its own rng generator -- otherwise, we get data race junk in valgrind */ T = gsl_rng_ranlxs2; r2 = gsl_rng_alloc(T); - seed = time(NULL) + (unsigned long int) pthread_self() % gsl_rng_max(r2); + seed = time(NULL) + (unsigned long int) pthread_self() + getpid() + clock(); //printf("\nseed[%d]:%ld %ld\n", pthread_self(), seed, time(NULL)); gsl_rng_set(r2, seed); @@ -1537,7 +1735,7 @@ static void *sim_expo_pth(void *simdata_ptr) { - SimData *simdata = (SimData *) simdata_ptr; + SimData *simdata = (SimData *) simdata_ptr; int i; const int idim = (const int) simdata->idim; double **x = simdata->x; @@ -1550,7 +1748,7 @@ /* Every thread gets its own rng generator -- otherwise, we get data race junk in valgrind */ T = gsl_rng_ranlxs2; r2 = gsl_rng_alloc(T);; - seed = time(NULL) + (unsigned long int) pthread_self() % gsl_rng_max(r2); + seed = time(NULL) + (unsigned long int) pthread_self() + getpid() + clock(); //printf("\nseed[%d]:%ld %ld\n", pthread_self(), seed, time(NULL)); gsl_rng_set(r2, seed); @@ -1626,7 +1824,7 @@ fflush(NULL); for (i = 0; i < dim; ++i) { - //tmpmu = gsl_ran_gaussian(r2, 10.0); + //tmpmu = gsl_ran_gaussian(r2, 1000.0); //printf("\nmu[%d]: %g", i, tmpmu); tmpmu = 1.0; for (j = 0; j < ndata; ++j) @@ -1820,6 +2018,9 @@ printf("\n%-25s% 16.4f", "explnpost:", explnpost); printf("\n%-25s% 16.4f", "avelnlike/n:", avelnlike/mcdat->ndata); + printf("\n%-25s% 16.4f", "ave Kulback-Leibler:", avelnpost - avelnprior); + if (isfinite(explnprior) && isfinite(explnpost)) + printf("\n%-25s% 16.4f", "exp Kulback-Leibler:", explnpost - explnprior); if (isfinite(avelnscore)) { @@ -2828,9 +3029,11 @@ printf(" -g Gaussian models \n"); printf(" -H calculate Edgworth entropy of simulation \n"); printf(" -i # of samples or sampling iterations \n"); + printf(" -k kth smallest distance for K-L entropy calc [1] \n"); printf(" -l lambda, prior precision \n"); printf(" -n # of data points per dimension \n"); printf(" -p parallel simulation \n"); + printf(" -r pdf for generating random variates \n"); printf(" -s seed for random number generators \n"); printf(" -v version and info \n"); printf("I===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-==I\n"); @@ -2847,7 +3050,7 @@ int option; /* get the options */ - while ((option = getopt(argc, argv, "b:d:efgHi:l:m:n:ps:t:v")) != -1) + while ((option = getopt(argc, argv, "b:d:efgHi:k:l:m:n:pr:s:t:v")) != -1) { switch (option) { @@ -2894,6 +3097,10 @@ iters = (int) strtol(optarg, NULL, 10); break; + case 'k': + klentk = (int) strtol(optarg, NULL, 10); + break; + case 'l': lambda_0 = (double) strtod(optarg, NULL); break; @@ -2906,6 +3113,10 @@ parallel = 1; break; + case 'r': + randmeth = (int) strtol(optarg, NULL, 10); + break; + case 's': seed = (int) strtol(optarg, NULL, 10); break; @@ -2972,9 +3183,21 @@ gsl_rng_env_setup(); if (seed == 0) - gsl_rng_default_seed = time(NULL); + { + unsigned long int clockn, timen, pidn; + + pidn = getpid(); + timen = time(NULL); + clockn = clock(); + gsl_rng_default_seed = pidn + clockn + timen; + + printf("\npid:%lu time:%lu clock:%lu\nseed:%lu\n\n", pidn, timen, clockn, gsl_rng_default_seed); + } else + { gsl_rng_default_seed = seed; + } + T = gsl_rng_ranlxd2; r2 = gsl_rng_alloc(T); //gsl_rng_set (r2, 1); @@ -2993,18 +3216,27 @@ y = calloc(dim, sizeof(double)); x2 = calloc(dim, sizeof(double)); + /************************************************************************************/ if (entropy_calc == 1) { double entropy; - RandVec(x, dim, iters, r2); + RandVec(x, dim, iters, randmeth, r2); entropy = CalcEdgeworthVanHulleEntropy(x, dim, iters); printf("\n-d ln(n): %14.3f", -dim * log(ndata)); printf("\nentropy: %14.3f", entropy); printf ("\n\n"); + + fflush(NULL); + + entropy = CalcKLentropy(x, dim, iters, klentk); + + printf("\nentropy: %14.3f", entropy); + printf ("\n\n"); + fflush(NULL); exit(EXIT_SUCCESS); @@ -3013,13 +3245,13 @@ /************************************************************************************/ if (parallel == 1) { - //SimGaussPth(data, simdata, callThd, &attr, thrdnum); - SimExpoPth(data, simdata, callThd, &attr, thrdnum); + SimGaussPth(data, simdata, callThd, &attr, thrdnum); + //SimExpoPth(data, simdata, callThd, &attr, thrdnum); } else { - //SimGauss(r2); - SimExpo(r2); + SimGauss(r2); + //SimExpo(r2); } CalcCumulants(); @@ -3078,6 +3310,9 @@ mcdat->lapmet = CalcLaplaceMet(mcdat); mcdat->edge_entropy = CalcEdgeworthVanHulleEntropy(x, dim, iters); +// printf("\nKozachenko ##############************** %g *****************##############################\n", +// CalcKLentropy(x, dim, iters, klentk)); + fflush(NULL); mcdat->expmet = mcdat->avelnprior + mcdat->avelnlike + mcdat->edge_entropy; mcdat->hme = CalcHarmonicMean(mcdat, lnlike, iters); @@ -3104,6 +3339,9 @@ MCGaussRef(mcdat); PrintRefMargEstimates(mcdat); + + printf("\nlogLmax: %f\n", -0.5*nd*log(2.0*M_PI) - 0.5*x2t + 0.5*yt2/ndata); + free(mcdat); } @@ -3135,6 +3373,9 @@ PrintMargEstimates(mcdat); + printf("\nlogLmax: %f\n", -0.5*nd*log(2.0*M_PI) - 0.5*x2t + 0.5*yt*yt/nd); + printf("\ndiff: %f\n", -0.5*(yt*yt/nd - yt2/ndata + dim - 1.0)); + free(mcdat); } @@ -3242,7 +3483,6 @@ free(mcdat); } - /************************************************************************************/ if (expo_model == 1) { Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/._MultiPose2MSA.c and /tmp/g2bOMTRwaC/theseus-3.0.0/._MultiPose2MSA.c differ diff -Nru theseus-2.0.6/MultiPose2MSA.c theseus-3.0.0/MultiPose2MSA.c --- theseus-2.0.6/MultiPose2MSA.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/MultiPose2MSA.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,11 +31,12 @@ #include "Error.h" #include "lodmats.h" #include "pdbMalloc.h" +#include "pdbUtils.h" #include "Cds.h" #include "PDBCds.h" #include "MultiPose2MSA.h" #include "ProcGSLSVD.h" -#include "ProcGSLSVDOcc.h" +#include "ProcGSLSVDNu.h" #include "msa.h" @@ -48,6 +49,7 @@ static const char atoms0[] = ":CA :C1*:C1'"; static const char atoms1[] = ":N :C :O :CA :"; + static int atom_selxn(char *name, int mode) { @@ -127,21 +129,21 @@ ++len; aalen = 0; - for (j = 0; j < pdbA->cds[i]->vlen; ++j) - if (atom_selxn(pdbA->cds[i]->name[j], atomsel) == 1 && - (pdbA->cds[i]->altLoc[j] == ' ' || pdbA->cds[i]->altLoc[j] == 'A')) - ++aalen; + for (j = 0; j < pdbA->cds[i]->vlen; ++j) + if (atom_selxn(pdbA->cds[i]->name[j], atomsel) && + (pdbA->cds[i]->altLoc[j] == ' ' || pdbA->cds[i]->altLoc[j] == 'A')) + ++aalen; baseA->cds[i]->aalen = aalen; if (len != aalen) { - fprintf(stderr, "\n\n ERROR1122: PDB file '%s' and sequence '%s' in '%s'", - pdbA->cds[i]->filename, msa->name[map[i]], msa->filename); - fprintf(stderr, "\n have different lengths (%d vs %d)\n\n", - aalen, len); + fprintf(stderr, "\n\n ERROR1122: PDB file '%s' and sequence '%s' in '%s'", + pdbA->cds[i]->filename, msa->name[map[i]], msa->filename); + fprintf(stderr, "\n have different lengths (%d vs %d)\n\n", + aalen, len); PrintTheseusTag(); - exit(EXIT_FAILURE); + exit(EXIT_FAILURE); } } } @@ -159,7 +161,7 @@ char *seq = NULL; int resSeq = 0; char alnindex[5]; - PDBCds *cdsi = NULL; + PDBCds *cdsi = NULL; for (i = 0; i < cnum; ++i) { @@ -207,7 +209,7 @@ MSA *msa = pdbA->seq2pdb->msa; char *seq = NULL; int resSeq = 0; - PDBCds *cdsi = NULL; + PDBCds *cdsi = NULL; int *singletons = pdbA->seq2pdb->singletons; for (i = 0; i < cnum; ++i) @@ -226,7 +228,7 @@ MSA). So we check that m is OK before accessing it. */ while (j < msa->seqlen && k < cdsi->vlen) { - if (singletons[j] == 1) + if (singletons[j]) { if (seq[j] == '-') /* IS a gap */ { @@ -290,7 +292,7 @@ static Seq2PDB *GetMapFile(char *mapfile_name) { - Seq2PDB *seq2pdb; + Seq2PDB *seq2pdb = NULL; FILE *mapfile = NULL; int i, numscanned, seqnum, maxseqnum; char line[FILENAME_MAX + 256]; @@ -421,14 +423,14 @@ free(msaname_root); } - if (k == msa->seqnum) - { - fprintf(stderr, - "\n ERROR_689: Sequence #%d (%s) in mapfile has no corresponding sequence in the alignment\n", - j+1, seq2pdb->pdbfile_name[j]); - PrintTheseusTag(); - exit(EXIT_FAILURE); - } + if (k == msa->seqnum) + { + fprintf(stderr, + "\n ERROR_689: Sequence #%d (%s) in mapfile has no corresponding sequence in the alignment\n", + j+1, seq2pdb->pdbfile_name[j]); + PrintTheseusTag(); + exit(EXIT_FAILURE); + } free(filename_root); free(mappdbname_root); @@ -444,11 +446,11 @@ if (j == seq2pdb->seqnum) { - fprintf(stderr, - "\n ERROR_690: PDB file #%d (%s) has no corresponding sequence in the alignment\n", - i+1, pdbA->cds[i]->filename); - PrintTheseusTag(); - exit(EXIT_FAILURE); + fprintf(stderr, + "\n ERROR_690: PDB file #%d (%s) has no corresponding sequence in the alignment\n", + i+1, pdbA->cds[i]->filename); + PrintTheseusTag(); + exit(EXIT_FAILURE); } } } @@ -486,11 +488,11 @@ if (j == msa->seqnum) { - fprintf(stderr, - "\n ERROR690: PDB file #%d (%s) has no corresponding sequence in the alignment\n", - i+1, pdbA->cds[i]->filename); - PrintTheseusTag(); - exit(EXIT_FAILURE); + fprintf(stderr, + "\n ERROR690: PDB file #%d (%s) has no corresponding sequence in the alignment\n", + i+1, pdbA->cds[i]->filename); + PrintTheseusTag(); + exit(EXIT_FAILURE); } } } @@ -562,104 +564,110 @@ int *lower = pdbA->lower; int range_num = pdbA->range_num; - for (j = 0; j < cnum; ++j) - { - k = map[j]; - m = n = p = 0; - while(m < pdbA->cds[j]->vlen && n < vlen && p < alignlen) - { - /* m = PDB length */ - /* n = baseA cds length */ - /* p = sequence alignment length */ - /* k = sequence index */ - /* j = pdb and baseA cds index */ + for (j = 0; j < cnum; ++j) + { + k = map[j]; + m = n = p = 0; + while(m < pdbA->cds[j]->vlen && n < vlen && p < alignlen) + { + /* m = PDB length */ + /* n = baseA cds length */ + /* p = sequence alignment length */ + /* k = sequence index */ + /* j = pdb and baseA cds index */ /* printf("\n1 n:%d(%d) atomname:pdbA->cds[%d]->name[%d] %s", */ /* n, alignlen, j, m, pdbA->cds[j]->name[m]); */ /* fflush(NULL); */ /* if (j == cnum - 1) */ /* printf("\naltLoc:%c", pdbA->cds[j]->altLoc[m]); */ - if (atom_selxn(pdbA->cds[j]->name[m], baseA->algo->atoms) == 1 && - (pdbA->cds[j]->altLoc[m] == ' ' || pdbA->cds[j]->altLoc[m] == 'A')) - { + if (atom_selxn(pdbA->cds[j]->name[m], algo->atoms) && + (pdbA->cds[j]->altLoc[m] == ' ' || pdbA->cds[j]->altLoc[m] == 'A')) + { /* printf("\n1 m:%d(%d) msa->seq[%d(%d)][%d(%d)] = %c", */ /* m, pdbA->cds[j]->vlen, k, cnum, n, alignlen, msa->seq[k][n]); */ /* fflush(NULL); */ - if (range_selxn(p, lower, upper, range_num) == 1 - baseA->algo->revsel && /* in-range, or out-of-range if revsel == 1 */ - singletons[p] == 0) /* not a singleton */ - { - if (msa->seq[k][p] != '-') /* not a gap */ - { - strncpy(baseA->cds[j]->resName[n], pdbA->cds[j]->resName[m], 3); - baseA->cds[j]->chainID[n] = pdbA->cds[j]->chainID[m]; - baseA->cds[j]->resSeq[n] = pdbA->cds[j]->resSeq[m]; - baseA->cds[j]->x[n] = pdbA->cds[j]->x[m]; - baseA->cds[j]->y[n] = pdbA->cds[j]->y[m]; - baseA->cds[j]->z[n] = pdbA->cds[j]->z[m]; - baseA->cds[j]->o[n] = 1.0; - baseA->cds[j]->b[n] = pdbA->cds[j]->tempFactor[m]; - - ++m; - ++n; - } - else /* is a gap */ - { - strncpy(baseA->cds[j]->resName[n], "GAP", 3); - baseA->cds[j]->chainID[n] = pdbA->cds[j]->chainID[m]; - baseA->cds[j]->resSeq[n] = 0; - baseA->cds[j]->x[n] = 0.0; - baseA->cds[j]->y[n] = 0.0; - baseA->cds[j]->z[n] = 0.0; - baseA->cds[j]->o[n] = 0.0; - baseA->cds[j]->b[n] = 99.99; - - ++n; - } - } - else /* out of range */ - { - if (msa->seq[k][p] != '-') /* not a gap */ - { - ++m; - } + if (range_selxn(p, lower, upper, range_num) - algo->revsel && /* in-range, or out-of-range if revsel == 1 */ + singletons[p] == 0) /* not a singleton */ + { + if (msa->seq[k][p] != '-') /* not a gap */ + { + strncpy(baseA->cds[j]->resName[n], pdbA->cds[j]->resName[m], 3); + baseA->cds[j]->chainID[n] = pdbA->cds[j]->chainID[m]; + baseA->cds[j]->resSeq[n] = pdbA->cds[j]->resSeq[m]; + baseA->cds[j]->x[n] = pdbA->cds[j]->x[m]; + baseA->cds[j]->y[n] = pdbA->cds[j]->y[m]; + baseA->cds[j]->z[n] = pdbA->cds[j]->z[m]; + baseA->cds[j]->o[n] = pdbA->cds[j]->occupancy[m]; + baseA->cds[j]->b[n] = pdbA->cds[j]->tempFactor[m]; + baseA->cds[j]->nu[n] = 1; + baseA->cds[j]->mu[n] = 0; + + ++m; + ++n; + } + else /* is a gap */ + { + strncpy(baseA->cds[j]->resName[n], "GAP", 3); + baseA->cds[j]->chainID[n] = pdbA->cds[j]->chainID[m]; + baseA->cds[j]->resSeq[n] = 0; + baseA->cds[j]->x[n] = 0.0; + baseA->cds[j]->y[n] = 0.0; + baseA->cds[j]->z[n] = 0.0; + baseA->cds[j]->o[n] = 0.0; + baseA->cds[j]->b[n] = 99.99; + baseA->cds[j]->nu[n] = 0; + baseA->cds[j]->mu[n] = 1; + + ++n; + } + } + else /* out of range */ + { + if (msa->seq[k][p] != '-') /* not a gap */ + { + ++m; + } - } + } - ++p; -/* if (j == cnum-1) */ + ++p; +/* if (j == cnum-1) */ /* printf("\n2 m:%d(%d) msa->seq[%d(%d)][%d(%d)] = %c", */ /* m, pdbA->cds[j]->vlen, k, cnum, n, alignlen, msa->seq[k][n]); */ /* fflush(NULL); */ - } - else /* not the proper atom slxn */ - { - ++m; - } -/* if (j == cnum-1) */ + } + else /* not the proper atom slxn */ + { + ++m; + } +/* if (j == cnum-1) */ /* printf("\n2 n:%d(%d) atomname:pdbA->cds[%d]->name[%d] %s", */ /* n, alignlen, j, m, pdbA->cds[j]->name[m]); */ /* fflush(NULL); */ - } - /*******************************************************************************************/ - /* if the end of the PDB is before the end of the alignment */ - if (n < vlen && m != 0 && n != 0) - { -/* printf("\nHere: m:%4d n:%4d p:%4d j:%4d -- %4d %4d %4d %4d", */ -/* m, n, p, j, pdbA->cds[j]->vlen, vlen, alignlen, cnum); */ - - for (q = n; q < vlen; ++q) - { - strncpy(baseA->cds[j]->resName[q], "GAP", 3); - baseA->cds[j]->chainID[q] = pdbA->cds[j]->chainID[m-1]; - baseA->cds[j]->resSeq[q] = 0; - baseA->cds[j]->x[q] = 0.0; - baseA->cds[j]->y[q] = 0.0; - baseA->cds[j]->z[q] = 0.0; - baseA->cds[j]->o[q] = 0.0; - baseA->cds[j]->b[q] = 66.66; - } - } - } + } + /*******************************************************************************************/ + /* if the end of the PDB is before the end of the alignment */ + if (n < vlen && m != 0 && n != 0) + { +/* printf("\nHere: m:%4d n:%4d p:%4d j:%4d -- %4d %4d %4d %4d", */ +/* m, n, p, j, pdbA->cds[j]->vlen, vlen, alignlen, cnum); */ + + for (q = n; q < vlen; ++q) + { + strncpy(baseA->cds[j]->resName[q], "GAP", 3); + baseA->cds[j]->chainID[q] = pdbA->cds[j]->chainID[m-1]; + baseA->cds[j]->resSeq[q] = 0; + baseA->cds[j]->x[q] = 0.0; + baseA->cds[j]->y[q] = 0.0; + baseA->cds[j]->z[q] = 0.0; + baseA->cds[j]->o[q] = 0.0; + baseA->cds[j]->b[q] = 66.66; + baseA->cds[j]->nu[n] = 0; + baseA->cds[j]->mu[n] = 1; + } + } + } } @@ -780,31 +788,31 @@ if (revsel == 0) { - vlen = 0; - for (j = 0; j < pdbA->range_num; ++j) - vlen += (pdbA->upper[j] - pdbA->lower[j] + 1); + vlen = 0; + for (j = 0; j < pdbA->range_num; ++j) + vlen += (pdbA->upper[j] - pdbA->lower[j] + 1); /* don't count singletons that are in selected ranges */ for (j = 0; j < pdbA->range_num; ++j) for (i = 0; i < alignlen; ++i) - if (i >= pdbA->lower[j] && i <= pdbA->upper[j] && singletons[i] == 1) + if (i >= pdbA->lower[j] && i <= pdbA->upper[j] && singletons[i]) vlen--; } else { - vlen = alignlen; - for (j = 0; j < pdbA->range_num; ++j) - vlen -= (pdbA->upper[j] - pdbA->lower[j] + 1); + vlen = alignlen; + for (j = 0; j < pdbA->range_num; ++j) + vlen -= (pdbA->upper[j] - pdbA->lower[j] + 1); singleton_cnt = 0; for (i = 0; i < alignlen; ++i) - if (singletons[i] == 1) + if (singletons[i]) ++singleton_cnt; /* don't count singletons that are in (un)selected ranges */ for (j = 0; j < pdbA->range_num; ++j) for (i = 0; i < alignlen; ++i) - if (i >= pdbA->lower[j] && i <= pdbA->upper[j] && singletons[i] == 1) + if (i >= pdbA->lower[j] && i <= pdbA->upper[j] && singletons[i]) singleton_cnt--; vlen -= singleton_cnt; @@ -831,9 +839,8 @@ { int i, alnlen, vlen; const int cnum = pdbA->cnum; - MSA *msa; + MSA *msa = NULL; //int *singletons = NULL; - Algorithm *algo = baseA->algo; if (mapfile_name == NULL) { @@ -869,7 +876,7 @@ DefaultSeq2PDBMap(pdbA, pdbA->seq2pdb, msa); GetSingletons(pdbA->seq2pdb->singletons, msa); - if (algo->missing == 1) + if (algo->missing) GetUbiqs(pdbA->seq2pdb->singletons, msa); // for (i=0;icds; - Cds *avecds = cdsA->avecds; + Cds **cds = cdsA->cds; + Cds *avecds = cdsA->avecds; const double *wts = (const double *) cdsA->w; double deviation = 0.0, deviation_sum = 0.0; int i; for (i = 0; i < cdsA->cnum; ++i) { - if(cdsA->algo->covweight != 0) + if(algo->covweight) { - deviation = ProcGSLSVDCovOcc(cds[i], avecds, cds[i]->matrix, + deviation = ProcGSLSVDCovNu(cds[i], avecds, cds[i]->matrix, (const double **) cdsA->WtMat, cdsA->tmpmat3a, cdsA->tmpmat3b, cdsA->tmpmat3c, cdsA->tmpvec3a); } - else if(cdsA->algo->varweight != 0 || cdsA->algo->leastsquares != 0) + else if(algo->varweight || algo->leastsquares) { - deviation = ProcGSLSVDOcc(cds[i], avecds, + deviation = ProcGSLSVDNu(cds[i], avecds, cds[i]->matrix, wts, cdsA->tmpmat3a, @@ -949,12 +956,12 @@ cds[i]->wRMSD_from_mean = sqrt(deviation / cdsA->vlen); deviation_sum += deviation; - if (cdsA->algo->verbose == 1) + if (algo->verbose) { /* rmsd from mean would usually need a 2 in denom,\ but this is already deviation from mean, since structure #2 is the average structure */ - printf("CalcRotationsOcc:%5d %8.3f %13.3f \n", + printf("CalcRotationsNu:%5d %8.3f %13.3f \n", i+1, cds[i]->wRMSD_from_mean, deviation); @@ -965,6 +972,57 @@ } +// DLT OP +double +CalcRotationsNu2(CdsArray *cdsA) +{ + Cds **cds = cdsA->cds; + const Cds *avecds = cdsA->avecds; + const double *wts = (const double *) cdsA->w; + const double **wtmat = (const double **) cdsA->WtMat; + Cds *tcds = NULL; + double deviation, deviation_sum; + double norm1, norm2, innprod; + const int vlen = cdsA->vlen; + int i; + + + if (algo->covweight) + { + tcds = cdsA->tcds; + MatMultCdsMultMatDiag(tcds, wtmat, avecds); + } + else if (algo->varweight) + { + tcds = cdsA->tcds; + MatDiagMultCdsMultMatDiag(tcds, wts, avecds); + } + else if (algo->leastsquares) + { + tcds = cdsA->avecds; + } + + deviation = deviation_sum = 0.0; + for (i = 0; i < cdsA->cnum; ++i) + { + deviation = ProcGSLSVDvanNu2((const double **) cds[i]->sc, + (const double **) tcds->wc, + (const int *) cds[i]->nu, + vlen, cds[i]->matrix, + cdsA->tmpmat3a, + cdsA->tmpmat3b, + cdsA->tmpmat3c, + cdsA->tmpvec3a, + &norm1, &norm2, &innprod); + + cds[i]->wRMSD_from_mean = sqrt(deviation / (3 * cdsA->vlen)); + deviation_sum += deviation; + } + + return(deviation_sum); +} + + /* char aa1[] = "ARNDCQEGHILKMFPSTWYV"; */ /* char aa3[] = "ALAARGASNASPCYSGLNGLUGLYHISILELEULYSMETPHEPROSERTHRTRPTYRVAL"; */ @@ -973,7 +1031,7 @@ { int i, j, count, aaindex; char *pindex = NULL; - FILE *fp; + FILE *fp = NULL; char outfile[FILENAME_MAX]; for (i = 0; i < pdbA->cnum; ++i) @@ -998,16 +1056,16 @@ strncmp(pdbA->cds[i]->name[j], "C1'", 3) == 0) && (pdbA->cds[i]->altLoc[j] == ' ' || pdbA->cds[i]->altLoc[j] == 'A')) { - if (count % 72 == 0) - fputc('\n', fp); + if (count % 72 == 0) + fputc('\n', fp); pindex = strstr(aan3, pdbA->cds[i]->resName[j]); if (pindex == NULL) fputc('X', fp); else { - aaindex = (int) (pindex - &aan3[0])/3; - fputc(aan1[aaindex], fp); + aaindex = (int) (pindex - &aan3[0])/3; + fputc(aan1[aaindex], fp); } ++count; @@ -1022,3 +1080,4 @@ fclose(fp); } } + diff -Nru theseus-2.0.6/MultiPose2MSA.h theseus-3.0.0/MultiPose2MSA.h --- theseus-2.0.6/MultiPose2MSA.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/MultiPose2MSA.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -36,7 +36,10 @@ Align2MSA(PDBCdsArray *pdbA, CdsArray *baseA, char *msafile_name, char *mapfile_name); double -CalcRotationsOcc(CdsArray *cdsA); +CalcRotationsNu(CdsArray *cdsA); + +double +CalcRotationsNu2(CdsArray *cdsA); void pdb2fst(PDBCdsArray *pdbA); Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/._MultiPose.c and /tmp/g2bOMTRwaC/theseus-3.0.0/._MultiPose.c differ diff -Nru theseus-2.0.6/MultiPose.c theseus-3.0.0/MultiPose.c --- theseus-2.0.6/MultiPose.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/MultiPose.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -23,572 +23,348 @@ -/_|:|_|_\- */ -#include -#include "Threads.h" -#include "ProcGSLSVD.h" -#include "ProcGSLSVDOcc.h" -#include "qcprot.h" #include "MultiPose_local.h" #include "MultiPose.h" -void -SuperPose2Anchor(CdsArray *scratchA, CdsArray *baseA, char *anchorf_name) -{ - double **anchormat = MatAlloc(3, 3); - double *anchortrans = malloc(3 * sizeof(double)); - double *tmpanchortrans = malloc(3 * sizeof(double)); - double *trans = malloc(3 * sizeof(double)); - double norm1, norm2, innprod; - int i, j, anchor = 0; - - for (i = 0; i < baseA->cnum; ++i) - { - if (strncmp(anchorf_name, baseA->cds[i]->filename, FILENAME_MAX - 1) == 0) - { - anchor = i; - break; - } - } - - SuperPose(scratchA->cds[anchor], baseA->cds[anchor], anchormat, anchortrans, - &norm1, &norm2, &innprod); - - for (i = 0; i < baseA->cnum; ++i) - { - InvRotVec(tmpanchortrans, anchortrans, scratchA->cds[i]->matrix); - - for (j = 0; j < 3; ++j) - scratchA->cds[i]->center[j] = scratchA->cds[i]->translation[j] = - scratchA->cds[i]->center[j] - tmpanchortrans[j]; - - Mat3MultIp(scratchA->cds[i]->matrix, (const double **) anchormat); - } - - for (j = 0; j < 3; ++j) - scratchA->avecds->center[j] = scratchA->avecds->translation[j] = - anchortrans[j]; +static double +CalcScaleFactors(CdsArray *cdsA); - Mat3Cpy(scratchA->avecds->matrix, (const double **) anchormat); +static double +CalcScaleFactorsML(CdsArray *cdsA); - free(trans); - free(anchortrans); - free(tmpanchortrans); - MatDestroy(&anchormat); -} +static void +*CalcRot_pth(void *rotdata_ptr); +static double +CalcRotations_pth(CdsArray *cdsA, RotData **rotdata, pthread_t *callThd, + pthread_attr_t *attr, const int thrdnum); -/* static void */ -/* CenMassWtHVarIp_old3D(Cds *cds, const double *wts, const double wtnorm, */ -/* const double *mean, const double *var, const double precision) */ -/* { */ -/* int i; */ -/* double tempx, tempy, tempz; */ -/* double wti, wtsum; */ -/* const double *x = (const double *) cds->x, */ -/* *y = (const double *) cds->y, */ -/* *z = (const double *) cds->z; */ -/* */ -/* tempx = tempy = tempz = wtsum = 0.0; */ -/* for (i = 0; i < cds->vlen; ++i) */ -/* { */ -/* wti = wts[i]; */ -/* wtsum += wti; */ -/* tempx += (wti * x[i]); */ -/* tempy += (wti * y[i]); */ -/* tempz += (wti * z[i]); */ -/* } */ -/* */ -/* if (wtsum < precision * wtnorm / var[0]) */ -/* { */ -/* */ -/* cds->center[0] = - wtnorm * mean[0]; */ -/* cds->center[1] = - wtnorm * mean[1]; */ -/* cds->center[2] = - wtnorm * mean[2]; */ -/* } */ -/* else */ -/* { */ -/* cds->center[0] = (tempx - wtnorm*mean[0] / var[0]) / (wtsum + wtnorm / var[0]); */ -/* cds->center[1] = (tempy - wtnorm*mean[1] / var[1]) / (wtsum + wtnorm / var[1]); */ -/* cds->center[2] = (tempz - wtnorm*mean[2] / var[2]) / (wtsum + wtnorm / var[2]); */ -/* } */ -/* } */ - - -/* For superimposing to an alignment, we don't need to weight by occupancy - since we are using pseudo-coordinates here from the E-M expectation step */ -/* static void */ -/* CalcTranslations_old3D(CdsArray *scratchA, Algorithm *algo) */ -/* { */ -/* Cds **cds = scratchA->cds; */ -/* int i, j, cnt; */ -/* double chi2; */ -/* */ -/* if (algo->notrans == 0) */ -/* { */ -/* if (algo->htrans == 1 && algo->rounds > 1) */ -/* { */ -/* double logL, lvar, varsum; */ -/* double *mean = malloc(3 * sizeof(double)); */ -/* double *var = malloc(3 * sizeof(double)); */ -/* double *trans = malloc(scratchA->cnum * sizeof(double)); */ -/* */ -/* varsum = FLT_MAX; */ -/* cnt = 0; */ -/* do */ -/* { */ -/* lvar = varsum; */ -/* ++cnt; */ -/* varsum = 0.0; */ -/* for (j = 0; j < 3; ++j) */ -/* { */ -/* for (i = 0; i < scratchA->cnum; ++i) */ -/* trans[i] = -cds[i]->center[j]; */ -/* */ -/* chi2 = normal_fit((const double *) trans, scratchA->cnum, &mean[j], &var[j], &logL); */ -/* varsum += var[j]; */ -/* */ -/* fflush(NULL); */ -/* } */ -/* */ -/* for (i = 0; i < scratchA->cnum; ++i) */ -/* CenMassWtHVarIp(cds[i], scratchA->w, scratchA->stats->wtnorm, mean, var, algo->precision); */ -/* break; */ -/* } */ -/* while(fabs(lvar - varsum)/varsum > algo->precision); */ -/* */ -/* scratchA->stats->htrans_ave = mean[0]; */ -/* scratchA->stats->htrans_var = var[0]; */ -/* scratchA->stats->htrans_chi2 = chi2; */ -/* */ -/* free(trans); */ -/* free(var); */ -/* free(mean); */ -/* } */ -/* else */ -/* { */ -/* for (i = 0; i < scratchA->cnum; ++i) */ -/* { */ -/* if (algo->covweight == 0) */ -/* { */ -/* if (algo->alignment == 1 && algo->rounds < 3) */ -/* CenMassWtIpOcc(cds[i], scratchA->w); */ -/* else */ -/* CenMassWtIp(cds[i], scratchA->w); */ -/* } */ -/* else */ -/* CenMassCov(cds[i], (const double **) scratchA->WtMat); */ -/* } */ -/* } */ -/* } */ -/* } */ +static void +InvGammaAdjustEvals(double *newevals, const int vlen, const int cnum, + double *evals, const double b, const double c); static void -CenMassWtHVarIp(Cds *cds, const double *wts, const double wtnorm, - const double *mean, const double var, const double precision) +CalcTranslationsOp(CdsArray *cdsA, Algorithm *algo) { int i; - double tempx, tempy, tempz; - double wti, wtsum; - const double *x = (const double *) cds->x, - *y = (const double *) cds->y, - *z = (const double *) cds->z; - - tempx = tempy = tempz = wtsum = 0.0; - for (i = 0; i < cds->vlen; ++i) - { - wti = wts[i]; - wtsum += wti; - tempx += (wti * x[i]); - tempy += (wti * y[i]); - tempz += (wti * z[i]); - } -/* printf("\nwtsum: %f wtnorm: %f", wtsum, wtnorm); */ -/* printf("\n% f % f % f %f %f %f", tempx / wtsum, tempy / wtsum, tempz / wtsum, wtsum, wtnorm, wtnorm / var); */ - - printf("\nbefore: % f % f % f", cds->center[0], cds->center[1], cds->center[2]); - if (var * wtsum < precision * wtnorm) - { - cds->center[0] = -mean[0]; - cds->center[1] = -mean[1]; - cds->center[2] = -mean[2]; - } - else - { - cds->center[0] = (tempx*var - wtnorm*mean[0]) / (wtsum*var + wtnorm); - cds->center[1] = (tempy*var - wtnorm*mean[1]) / (wtsum*var + wtnorm); - cds->center[2] = (tempz*var - wtnorm*mean[2]) / (wtsum*var + wtnorm); - } - - printf("\nafter: % f % f % f", cds->center[0], cds->center[1], cds->center[2]); - fflush(NULL); -} - - -void -CalcTranslationsOp(CdsArray *scratchA, CdsArray *baseA, Algorithm *algo) -{ - int i, j, cnt; - double chi2; - if (algo->notrans == 0) + for (i = 0; i < cdsA->cnum; ++i) { - if (algo->htrans == 1 && algo->rounds > 1) + if (algo->covweight == 0) { - double logL, lvar, varsum/* , chi2 */; - double *mean = malloc(3 * sizeof(double)); - double *var = malloc(3 * sizeof(double)); - double *trans = malloc(scratchA->cnum * sizeof(double)); - - varsum = FLT_MAX; - cnt = 0; - do + if (algo->alignment) { - lvar = varsum; - ++cnt; - varsum = 0.0; - for (j = 0; j < 3; ++j) - { - for (i = 0; i < scratchA->cnum; ++i) - trans[i] = -baseA->cds[i]->center[j]; - - chi2 = normal_fit((const double *) trans, scratchA->cnum, &mean[j], &var[j], &logL); - varsum += var[j]; - printf("\n %3d:%d chi2: %f mean: %f var: %f logL: %f wtnorm: %f", - cnt, j, chi2, mean[j], var[j], logL, scratchA->stats->wtnorm); - fflush(NULL); - } - - printf("\n %3d: varsum:%f", cnt, varsum); - - for (i = 0; i < scratchA->cnum; ++i) - CenMassWtHVarIp(baseA->cds[i], scratchA->w, scratchA->stats->wtnorm, mean, varsum, algo->precision); + CenMassWtNu2((const double **) cdsA->cds[i]->sc, + (const double **) cdsA->avecds->wc, + (const int *) cdsA->cds[i]->nu, + (const double *) cdsA->w, + cdsA->vlen, + (const double **) cdsA->cds[i]->matrix, + cdsA->cds[i]->center); + } + else + { + CenMassWt2((const double **) cdsA->cds[i]->sc, + (const double *) cdsA->w, + cdsA->vlen, + cdsA->cds[i]->center); } - while(fabs(lvar - varsum) > algo->precision * varsum); - - scratchA->stats->htrans_ave = mean[0]; - scratchA->stats->htrans_var = varsum; - scratchA->stats->htrans_chi2 = chi2; - - free(trans); - free(var); - free(mean); } else { - for (i = 0; i < scratchA->cnum; ++i) - { - if (algo->covweight == 0) - { - if (algo->alignment == 1 && algo->rounds < 3) - { - CenMassWtIpOcc(baseA->cds[i], scratchA->w); - } - else - { - if (algo->commandeur == 1) - CenMassWtIpEM(baseA->cds[i], scratchA->avecds, scratchA->w); - else - CenMassWtIp(baseA->cds[i], scratchA->w); - } - } - else - { - CenMassCov(baseA->cds[i], (const double **) scratchA->WtMat); - } - - //printf("\n********** cen[%d]: %f %f %f", i+1, baseA->cds[i]->center[0], baseA->cds[i]->center[1], baseA->cds[i]->center[2]); - } + CenMassCov2((const double **) cdsA->cds[i]->sc, + cdsA->cds[i]->cc, + (const double **) cdsA->WtMat, + cdsA->vlen, + cdsA->cds[i]->center); } } - -// fflush(NULL); - -/* for (i = 0; i < scratchA->cnum; ++i) */ -/* memcpy(scratchA->cds[i]->center, baseA->cds[i]->center, 3 * sizeof(double)); */ } -void -CalcTranslationsIp(CdsArray *scratchA, Algorithm *algo) +double +CalcRotations(CdsArray *cdsA) { - Cds **cds = scratchA->cds; - int i, j, cnt; - double chi2; - - if (algo->notrans == 0) - { - if (algo->htrans == 1 && algo->rounds > 1) - { - double logL, lvar, varsum/* , chi2 */; - double *mean = malloc(3 * sizeof(double)); - double *var = malloc(3 * sizeof(double)); - double *trans = malloc(scratchA->cnum * sizeof(double)); - - varsum = FLT_MAX; - cnt = 0; - do - { - lvar = varsum; - ++cnt; - varsum = 0.0; - for (j = 0; j < 3; ++j) - { - for (i = 0; i < scratchA->cnum; ++i) - trans[i] = -cds[i]->center[j]; + Cds **cds = cdsA->cds; + const Cds *avecds = cdsA->avecds; + const double *wts = (const double *) cdsA->w; + Cds *tcds = NULL; + double deviation = 0.0, deviation_sum = 0.0; + int i; - chi2 = normal_fit((const double *) trans, scratchA->cnum, &mean[j], &var[j], &logL); - varsum += var[j]; - printf("\n %3d:%d chi2: %f mean: %f var: %f logL: %f wtnorm: %f", - cnt, j, chi2, mean[j], var[j], logL, scratchA->stats->wtnorm); - fflush(NULL); - } - printf("\n %3d: varsum:%f", cnt, varsum); + if (algo->covweight) + { + tcds = cdsA->tcds; + MatMultCdsMultMatDiag(tcds, (const double **) cdsA->WtMat, avecds); + } + else if (algo->varweight) + { + tcds = cdsA->tcds; + MatDiagMultCdsMultMatDiag(tcds, wts, avecds); + } + else if (algo->leastsquares) + { + tcds = cdsA->avecds; + } - for (i = 0; i < scratchA->cnum; ++i) - CenMassWtHVarIp(cds[i], scratchA->w, scratchA->stats->wtnorm, mean, varsum, algo->precision); - break; /* iterating converges to singularities */ - } - while(fabs(lvar - varsum) > algo->precision * varsum); + for (i = 0; i < cdsA->cnum; ++i) + { + if (algo->tenberge) + { + AveCdsTB(cdsA, i); + MatDiagMultCdsMultMatDiag(tcds, wts, avecds); + } - scratchA->stats->htrans_ave = mean[0]; - scratchA->stats->htrans_var = varsum; - scratchA->stats->htrans_chi2 = chi2; - - free(trans); - free(var); - free(mean); + /* note that the avecds are already multiplied by the weight matrices */ + if (algo->pu) + { + double **qcpcds = MatAlloc(3, cds[i]->vlen); + double **qcptcds = MatAlloc(3, cds[i]->vlen); + size_t len = cds[i]->vlen * sizeof(double); + memcpy(qcpcds[0], cds[i]->x, len); + memcpy(qcpcds[1], cds[i]->y, len); + memcpy(qcpcds[2], cds[i]->z, len); + memcpy(qcptcds[0], tcds->x, len); + memcpy(qcptcds[1], tcds->y, len); + memcpy(qcptcds[2], tcds->z, len); + deviation = CalcRMSDRotationalMatrix(qcpcds, qcptcds, cds[i]->vlen, &cds[i]->matrix[0][0], NULL); + deviation = deviation * deviation * cds[i]->vlen; +// printf("\nqcp deviation: %g", deviation); +// MatPrint(cds[i]->matrix, 3); + MatDestroy(&qcpcds); + MatDestroy(&qcptcds); } else { - for (i = 0; i < scratchA->cnum; ++i) - { - if (algo->covweight == 0) - { -/* if (algo->alignment == 1 && algo->rounds < 3) */ -/* { */ -/* CenMassWtIpOcc(scratchA->cds[i], scratchA->w); */ -/* } */ -/* else */ -/* { */ - if (algo->commandeur == 1) - CenMassWtIpEM(scratchA->cds[i], scratchA->avecds, scratchA->w); - else - CenMassWtIp(scratchA->cds[i], scratchA->w); -/* } */ - } - else - { - CenMassCov(scratchA->cds[i], (const double **) scratchA->WtMat); - } - } + deviation = ProcGSLSVDvan(cds[i], + tcds, + cds[i]->matrix, + cdsA->tmpmat3a, + cdsA->tmpmat3b, + cdsA->tmpmat3c, + cdsA->tmpvec3a); +// printf("\nSVD deviation: %g", deviation); +// MatPrint(cds[i]->matrix, 3); } + + /* find global rmsd and average cds (both held in structure) */ + cds[i]->wRMSD_from_mean = sqrt(deviation / (3 * cdsA->vlen)); + deviation_sum += deviation; } + + return(deviation_sum); } -void -MatMultCdsMultMatDiag(Cds *outcds, const double **matK, const Cds *cds) +double +CalcRotations2(CdsArray *cdsA) { - int i, k; - const int vlen = cds->vlen; - double **TmpMat = MatAlloc(vlen, 3); - double matKik; + Cds **cds = cdsA->cds; + const Cds *avecds = cdsA->avecds; + const double *wts = (const double *) cdsA->w; + const double **wtmat = (const double **) cdsA->WtMat; + Cds *tcds = NULL; + double deviation, deviation_sum; + double norm1, norm2, innprod; + const int vlen = cdsA->vlen; + int i; - for (i = 0; i < vlen; ++i) + if (algo->covweight) { - for (k = 0; k < vlen; ++k) - { - matKik = matK[i][k]; - TmpMat[i][0] += matKik * cds->x[k]; - TmpMat[i][1] += matKik * cds->y[k]; - TmpMat[i][2] += matKik * cds->z[k]; - } + tcds = cdsA->tcds; + MatMultCdsMultMatDiag(tcds, wtmat, avecds); } - - for (i = 0; i < vlen; ++i) + else if (algo->varweight) + { + tcds = cdsA->tcds; + MatDiagMultCdsMultMatDiag(tcds, wts, avecds); + } + else if (algo->leastsquares) { - outcds->x[i] = TmpMat[i][0]; - outcds->y[i] = TmpMat[i][1]; - outcds->z[i] = TmpMat[i][2]; + tcds = cdsA->avecds; } - MatDestroy(&TmpMat); -} - - -void -MatMultCdsMultMat(Cds *outcds, const double **matK, const Cds *cds, const double **matD) -{ - int i, k; - const int vlen = cds->vlen; - double **TmpMat = MatAlloc(vlen, 3); - double matKik; - double xi, yi, zi; - - for (i = 0; i < vlen; ++i) + // Assumes Cds have been wt centered previously + if (algo->alignment) { - for (k = 0; k < vlen; ++k) + for (i = 0; i < cdsA->cnum; ++i) { - matKik = matK[i][k]; - TmpMat[i][0] += matKik * cds->x[k]; - TmpMat[i][1] += matKik * cds->y[k]; - TmpMat[i][2] += matKik * cds->z[k]; + for (int j = 0; j < cdsA->vlen; ++j) + { + if (cdsA->cds[i]->nu[j] == 0) + { + cds[i]->x[j] = 0.0; + cds[i]->y[j] = 0.0; + cds[i]->z[j] = 0.0; + } + } } } - for (i = 0; i < vlen; ++i) + deviation = deviation_sum = 0.0; + for (i = 0; i < cdsA->cnum; ++i) { - xi = TmpMat[i][0]; - yi = TmpMat[i][1]; - zi = TmpMat[i][2]; +// if (algo->alignment) +// { +// deviation = ProcGSLSVDvanNu2((const double **) cds[i]->sc, +// (const double **) tcds->wc, +// (const int *) cds[i]->nu, +// vlen, +// cds[i]->matrix, +// cdsA->tmpmat3a, +// cdsA->tmpmat3b, +// cdsA->tmpmat3c, +// cdsA->tmpvec3a, +// &norm1, &norm2, &innprod); +// // printf("\nSVD deviation: %g", deviation); +// // MatPrint(cds[i]->matrix, 3); +// } +// else + + if (algo->tenberge) + { + AveCdsTB(cdsA, i); + MatDiagMultCdsMultMatDiag(tcds, wts, avecds); + } + + /* note that the avecds are already multiplied by the weight matrices */ + if (algo->pu) + { + deviation = CalcRMSDRotationalMatrix(cds[i]->wc, tcds->wc, vlen, &cds[i]->matrix[0][0], NULL); + deviation = deviation * deviation * vlen; +// printf("\nqcp deviation: %g", deviation); +// MatPrint(cds[i]->matrix, 3); + } + else + { + deviation = ProcGSLSVDvan2((const double **) cds[i]->wc, // sc works for non-missing data + (const double **) tcds->wc, + vlen, + cds[i]->matrix, + cdsA->tmpmat3a, + cdsA->tmpmat3b, + cdsA->tmpmat3c, + cdsA->tmpvec3a, + &norm1, &norm2, &innprod); +// printf("\nSVD deviation: %g", deviation); +// MatPrint(cds[i]->matrix, 3); + } - outcds->x[i] = xi * matD[0][0] + yi * matD[1][0] + zi * matD[2][0]; - outcds->y[i] = xi * matD[0][1] + yi * matD[1][1] + zi * matD[2][1]; - outcds->z[i] = xi * matD[0][2] + yi * matD[1][2] + zi * matD[2][2]; + /* find global rmsd and average cds (both held in structure) */ + cds[i]->wRMSD_from_mean = sqrt(deviation / (3 * vlen)); + deviation_sum += deviation; } - MatDestroy(&TmpMat); + return(deviation_sum); } -void -MatDiagMultCdsMultMat(Cds *outcds, const double *diag, const Cds *cds, const double **matD) +static void +*CalcRot_pth(void *rotdata_ptr) { int i; - const int vlen = cds->vlen; - double diagi; - double xi, yi, zi; + double deviation = 0.0; + RotData *rotdata = (RotData *) rotdata_ptr; + Cds *cds = NULL; - for (i = 0; i < vlen; ++i) + for (i = rotdata->start; i < rotdata->end; ++i) { - diagi = diag[i]; - xi = diagi * cds->x[i]; - yi = diagi * cds->y[i]; - zi = diagi * cds->z[i]; - - outcds->x[i] = xi * matD[0][0] + yi * matD[1][0] + zi * matD[2][0]; - outcds->y[i] = xi * matD[0][1] + yi * matD[1][1] + zi * matD[2][1]; - outcds->z[i] = xi * matD[0][2] + yi * matD[1][2] + zi * matD[2][2]; - } -} + cds = rotdata->cds[i]; + /* note that the avecds are already multiplied by the weight matrices */ +// deviation = CalcRMSDRotationalMatrix(cds, rotdata->tcds, cds->vlen, &cds->matrix[0][0], NULL); + /* rotate the scratch cds with new rotation matrix */ + RotateCdsIp(cds, (const double **) cds->matrix); -void -MatDiagMultCdsMultMatDiag(Cds *outcds, const double *wtK, const Cds *cds) -{ - int i; - double wtKi; - const double *x = (const double *) cds->x, - *y = (const double *) cds->y, - *z = (const double *) cds->z; - - for (i = 0; i < cds->vlen; ++i) - { - wtKi = wtK[i]; - - outcds->x[i] = wtKi * x[i]; - outcds->y[i] = wtKi * y[i]; - outcds->z[i] = wtKi * z[i]; + /* find global rmsd and average cds (both held in structure) */ + cds->wRMSD_from_mean = sqrt(deviation / (3 * rotdata->vlen)); } + + pthread_exit((void *) 0); } -double -CalcRotations(CdsArray *cdsA) +static double +CalcRotations_pth(CdsArray *cdsA, RotData **rotdata, pthread_t *callThd, + pthread_attr_t *attr, const int thrdnum) { Cds **cds = cdsA->cds; const Cds *avecds = cdsA->avecds; const double *wts = (const double *) cdsA->w; Cds *tcds = cdsA->tcds; - double deviation = 0.0, deviation_sum = 0.0; - int i; + double deviation_sum = 0.0; + int i, rc = 0, incr; + const int cnum = cdsA->cnum; + + if (algo->covweight) + { + MatMultCdsMultMatDiag(tcds, + (const double **) cdsA->WtMat, + avecds); + } + else if (algo->varweight || algo->leastsquares) + { + MatDiagMultCdsMultMatDiag(tcds, wts, avecds); + } - if (cdsA->algo->norot == 0) + incr = cnum / thrdnum; + + for (i = 0; i < thrdnum - 1; ++i) { - if (cdsA->algo->method == 3) /* default */ + rotdata[i]->cds = cds; + rotdata[i]->tcds = tcds; + rotdata[i]->start = i * incr; + rotdata[i]->end = i*incr + incr; + rotdata[i]->vlen = cdsA->vlen; + + rc = pthread_create(&callThd[i], attr, CalcRot_pth, (void *) rotdata[i]); + + if (rc) { - if (cdsA->algo->covweight == 1) - { - MatMultCdsMultMatDiag(tcds, - (const double **) cdsA->WtMat, - avecds); - } - else if (cdsA->algo->varweight == 1 || cdsA->algo->leastsquares == 1) - { - MatDiagMultCdsMultMatDiag(tcds, - wts, - avecds); - } + printf("ERROR811: return code from pthread_create() %d is %d\n", i, rc); + exit(EXIT_FAILURE); + } + } - for (i = 0; i < cdsA->cnum; ++i) - { - if (cdsA->algo->tenberge == 1) - { - AveCdsTB(cdsA, i); - MatDiagMultCdsMultMatDiag(tcds, - wts, - avecds); - } + rotdata[thrdnum - 1]->cds = cds; + rotdata[thrdnum - 1]->tcds = tcds; + rotdata[thrdnum - 1]->start = (thrdnum - 1) * incr; + rotdata[thrdnum - 1]->end = cnum; + rotdata[thrdnum - 1]->vlen = cdsA->vlen; - /* note that the avecds are already multiplied by the weight matrices */ + rc = pthread_create(&callThd[thrdnum - 1], attr, CalcRot_pth, (void *) rotdata[thrdnum - 1]); - if (cdsA->algo->pu == 1) - { - double **qcpcds = MatAlloc(3, cds[i]->vlen); - double **qcptcds = MatAlloc(3, cds[i]->vlen); - size_t len = cds[i]->vlen * sizeof(double); - memcpy(qcpcds[0], cds[i]->x, len); - memcpy(qcpcds[1], cds[i]->y, len); - memcpy(qcpcds[2], cds[i]->z, len); - memcpy(qcptcds[0], tcds->x, len); - memcpy(qcptcds[1], tcds->y, len); - memcpy(qcptcds[2], tcds->z, len); - deviation = CalcRMSDRotationalMatrix(qcpcds, qcptcds, cds[i]->vlen, &cds[i]->matrix[0][0], NULL); - deviation = deviation * deviation * cds[i]->vlen; - //printf("\nqcp deviation: %g", deviation); - //MatPrint(cds[i]->matrix, 3); - MatDestroy(&qcpcds); - MatDestroy(&qcptcds); - } - else - { -/* deviation = Kabsch(cds[i], */ -/* tcds, */ -/* cds[i]->matrix, */ -/* cdsA->tmpmat3a, */ -/* cdsA->tmpmat3b, */ -/* cdsA->tmpmat3c, */ -/* cdsA->tmpvec3a); */ - - deviation = ProcGSLSVDvan(cds[i], - tcds, - cds[i]->matrix, - cdsA->tmpmat3a, - cdsA->tmpmat3b, - cdsA->tmpmat3c, - cdsA->tmpvec3a); - //printf("\nSVD deviation: %g", deviation); - //MatPrint(cds[i]->matrix, 3); - } + if (rc) + { + printf("ERROR811: return code from pthread_create() %d is %d\n", i, rc); + exit(EXIT_FAILURE); + } - /* find global rmsd and average cds (both held in structure) */ - cds[i]->wRMSD_from_mean = sqrt(deviation / (3 * cdsA->vlen)); - deviation_sum += deviation; - } + for (i = 0; i < thrdnum; ++i) + { + rc = pthread_join(callThd[i], (void **) NULL); + + if (rc) + { + printf("ERROR812: return code from pthread_join() %d is %d\n", i, rc); + exit(EXIT_FAILURE); } } + for (i = 0; i < cnum; ++i) + deviation_sum += 3 * cdsA->vlen * cds[i]->wRMSD_from_mean * cds[i]->wRMSD_from_mean; + return(deviation_sum); } /* This is the classic iterative (not eigendecomp) solution given by Gower 1975 and in Gower and Dijksterhuis 2004, Ch 9, page 113, Eqn 9.21 */ -double +static double CalcScaleFactors(CdsArray *cdsA) { Cds *cdsi = NULL; @@ -597,10 +373,10 @@ double *wts = cdsA->w; int i; const int cnum = cdsA->cnum, vlen = cdsA->vlen; - double scaleprod, selfprod, innprod, norm, avecdstr, oldscale, factor; + double scalesum, selfprod, innprod, norm, avecdstr; - if (cdsA->algo->leastsquares == 1) + if (algo->leastsquares) { avecdstr = TrCdsInnerProd(avecds, vlen); @@ -608,7 +384,7 @@ for (i = 0; i < cnum; ++i) norm += TrCdsInnerProd(cds[i], vlen); } - else if (cdsA->algo->varweight == 1) + else if (algo->varweight) { avecdstr = TrCdsInnerProdWt(avecds, vlen, wts); @@ -621,25 +397,24 @@ norm = avecdstr = 1.0; } - for (i = 0; i < vlen; ++i) - wts[i] = 1.0 / cdsA->var[i]; +// for (i = 0; i < vlen; ++i) // DLT OP +// wts[i] = 1.0 / cdsA->var[i]; // DLT OP - scaleprod = 0.0; + scalesum = 0.0; for (i = 0; i < cnum; ++i) { cdsi = cdsA->cds[i]; - oldscale = cdsi->scale; - if (cdsA->algo->leastsquares == 1) + if (algo->leastsquares) { - selfprod = TrCdsInnerProd(cdsi, vlen) / (oldscale * oldscale); - innprod = TrCdsInnerProd2(cdsi, avecds, vlen) / oldscale; + selfprod = TrCdsInnerProd(cdsi, vlen); + innprod = TrCdsInnerProd2(cdsi, avecds, vlen); } - else if (cdsA->algo->varweight == 1) + else if (algo->varweight) { - selfprod = TrCdsInnerProdWt(cdsi, vlen, wts) / (oldscale * oldscale); - innprod = TrCdsInnerProdWt2(cdsi, avecds, vlen, wts) / oldscale-1.0; + selfprod = TrCdsInnerProdWt(cdsi, vlen, wts); + innprod = TrCdsInnerProdWt2(cdsi, avecds, vlen, wts) - 1.0; } else { @@ -647,1009 +422,271 @@ } cdsi->scale = norm * innprod / (cnum * avecdstr * selfprod); - cdsi->scale = (sqrt(innprod*innprod + 12.0 * (double) vlen * selfprod) + innprod) / (2.0 * selfprod); + //cdsi->scale = (sqrt(innprod*innprod + 12.0 * (double) vlen * selfprod) + innprod) / (2.0 * selfprod); //cds[i]->scale = innprod / selfprod; - scaleprod += log(cds[i]->scale); - factor = cdsi->scale / oldscale; - ScaleCds(cdsi, factor); - printf("\nfactor[%3d] = %12.6e -- scale = %12.6e", i+1, factor, cdsi->scale); + scalesum += log(cds[i]->scale); + //printf("\nscale[%3d] = %12.6e", i+1, cdsi->scale); } - scaleprod = exp(scaleprod / (double) cnum); + scalesum = exp(scalesum / (double) cnum); double bsum = 0.0; for (i = 0; i < cnum; ++i) bsum += cdsA->cds[i]->scale; - for (i = 0; i < cnum; ++i) - printf("\nscale[%3d]: %12.6f", i+1, 15.5 * 30.0 * cdsA->cds[i]->scale / bsum); + //for (i = 0; i < cnum; ++i) + // printf("\nscale[%3d]: %12.6f", i+1, 15.5 * 30.0 * cdsA->cds[i]->scale / bsum); -// for (i = 0; i < cnum; ++i) -// cds[i]->scale /= scaleprod; + for (i = 0; i < cnum; ++i) + ScaleCds(cdsi, cdsA->cds[i]->scale); - return(scaleprod); + return(scalesum); } -void -ConstrainCovMat(CdsArray *cdsA) +/* ML solution, scale of structure #1 constrained to be 1 */ +static double +CalcScaleFactorsML(CdsArray *cdsA) { - int i, j; - double tmpx, tmpy, tmpz; - double lagrange, vari; + Cds **cds = cdsA->cds; + Cds *avecds = cdsA->avecds; + Cds *cdsi = NULL; + int i; const int cnum = cdsA->cnum, vlen = cdsA->vlen; - double *var = cdsA->var; - const Cds **cds = (const Cds **) cdsA->cds; - Cds *cdsj; - const double *avex = (const double *) cdsA->avecds->x, - *avey = (const double *) cdsA->avecds->y, - *avez = (const double *) cdsA->avecds->z; + const double *wts = (const double *) cdsA->w; + double scalesum, ft, gt, sigma2, lagrangian; + //int scaleanchor = algo->scaleanchor; - lagrange = 0.0; - for (j = 0; j < cnum; ++j) + if (algo->leastsquares) { - cdsj = (Cds *) cds[j]; - + sigma2 = 0.0; for (i = 0; i < vlen; ++i) + sigma2 += cdsA->var[i]; + sigma2 /= (double) vlen; + + //printf("\nsigma2 = %12.6e \n", sigma2); + + for (i = 0; i < cnum; ++i) { - tmpx = cdsj->x[i] - avex[i]; - vari = tmpx * tmpx; - tmpy = cdsj->y[i] - avey[i]; - vari += tmpy * tmpy; - tmpz = cdsj->z[i] - avez[i]; - vari += tmpz * tmpz; + cdsi = cdsA->cds[i]; + ft = TrCdsInnerProd(cdsi, vlen); + gt = TrCdsInnerProd2(cdsi, avecds, vlen); - lagrange += vari / var[i]; +// if (i == scaleanchor) +// cdsi->scale = 1.0; +// else + cdsi->scale = (sqrt(gt*gt + 12.0 * vlen * sigma2 * ft) + gt) / (2.0 * ft); } } + else if (algo->varweight) + { + double term, var3Ni, phi; - lagrange = lagrange / (3.0 * cnum * vlen) - 1.0; + phi = 2.0*stats->hierarch_p1; - if (lagrange < 0.0) - lagrange = 0.0; + term = 0.0; + for (i = 0; i < vlen; ++i) + { + var3Ni = cdsA->samplevar3N[i]; + term += var3Ni / (var3Ni + phi); + } - printf("\nlagrange = % 12.6e", lagrange); + //printf("term = % 12.4e\n", term); - for (i = 0; i < vlen; ++i) - var[i] -= lagrange * (avex[i]*avex[i] + avey[i]*avey[i] + avez[i]*avez[i]) / (3.0 * cnum); -} + lagrangian = (3.0 * cnum + 1.0) * term / cnum - 3.0 * vlen; + //printf("term2 = % 12.4e\n", term); + for (i = 0; i < cnum; ++i) + { + cdsi = cdsA->cds[i]; + ft = TrCdsInnerProdWt(cdsi, vlen, wts); + gt = TrCdsInnerProdWt2(cdsi, avecds, vlen, wts); + // printf("gt = % 12.4e\n", gt); + gt += lagrangian; -/* This is the unconstrained ML solution */ -double -CalcScaleFactorsML(CdsArray *cdsA) -{ - Cds **cds = cdsA->cds; - Cds *avecds = cdsA->avecds; - Cds *cdsi = NULL; - double *wts = cdsA->w; - int i; - const int cnum = cdsA->cnum, vlen = cdsA->vlen; - double bsum, scaleprod, phi, gamma, sigma2, oldscale, factor, nkd = 3.0 * cnum * vlen; -// double var = cdsA->stats->var; - - CalcCovariances(cdsA); - CalcWts(cdsA); - - scaleprod = 0.0; - if (cdsA->algo->leastsquares == 1) - { - sigma2 = 0.0; - for (i = 0; i < vlen; ++i) - sigma2 += cdsA->var[i]; - sigma2 /= (double) vlen; - - bsum = 1.0; - - printf("\nsigma2 = %12.6e \n", sigma2); - - for (i = 0; i < cnum; ++i) - { - cdsi = cdsA->cds[i]; - oldscale = cdsi->scale; - phi = bsum * TrCdsInnerProd(cdsi, vlen) / (oldscale * oldscale); - gamma = TrCdsInnerProd2(cdsi, avecds, vlen) / oldscale; - cdsi->scale = (sqrt(gamma*gamma + 12.0 * vlen * sigma2 * phi) + gamma) / (2.0 * phi); - scaleprod += log(cdsi->scale); - factor = cdsi->scale / oldscale; - ScaleCds(cdsi, factor); - printf("\nfactor[%3d] = %12.6e -- scale = %12.6e", i+1, factor, cdsi->scale); - } - - /* This is to verify that our implicit constraint is actually in effect. */ - bsum = 0.0; - for (i = 0; i < cnum; ++i) - bsum += log(cdsA->cds[i]->scale); - - printf("\nblogsum = %12.6e", bsum); - - bsum = 0.0; - for (i = 0; i < cnum; ++i) - bsum += TrCdsInnerProd(cds[i], vlen) - TrCdsInnerProd2(cds[i], avecds, vlen); - - printf("\nbsum = %12.6e %12.6e % 12.6e", bsum/sigma2, nkd, bsum/sigma2 - nkd); - //bsum = (bsum / (3.0 * cnum * vlen)) + 1.0; - - scaleprod = exp(scaleprod / (double) cnum); - } - else if (cdsA->algo->varweight == 1) - { - for (i = 0; i < vlen; ++i) - wts[i] = 1.0 / cdsA->var[i]; - - double constraint = 0.0; - - for (i = 0; i < vlen; ++i) - constraint += wts[i] * (avecds->x[i] * avecds->x[i] + - avecds->y[i] * avecds->y[i] + - avecds->z[i] * avecds->z[i]); - - constraint = constraint / (3.0 * vlen) + 1.0; - printf("\nconstraint = % 12.6e", constraint); +// if (i == scaleanchor) +// cdsi->scale = 1.0; +// else + cdsi->scale = (sqrt(gt*gt + 12.0 * vlen * ft) + gt) / (2.0 * ft); + //cdsi->scale = (sqrt(gt*gt + 4.0 * (3.0 * vlen + 1.0) * ft) + gt) / (2.0 * ft); - for (i = 0; i < cnum; ++i) - { - cdsi = cdsA->cds[i]; - oldscale = cdsi->scale; - phi = constraint * TrCdsInnerProdWt(cdsi, vlen, wts) / (oldscale * oldscale); - gamma = TrCdsInnerProdWt2(cdsi, avecds, vlen, wts) / oldscale; - cdsi->scale = (sqrt(gamma*gamma + 12.0 * vlen * phi) + gamma) / (2.0 * phi); - scaleprod += log(cdsi->scale); - factor = cdsi->scale / oldscale; - ScaleCds(cdsi, factor); - printf("\nfactor[%3d] = %12.6e -- scale = %12.6e", i+1, factor, cdsi->scale); +// printf("scale[%3d] = % 12.4e\n", i+1, cdsi->scale); } - /* This is to verify that our implicit constraint is actually in effect. */ - bsum = 0.0; - for (i = 0; i < cnum; ++i) - bsum += TrCdsInnerProdWt(cds[i], vlen, wts) - TrCdsInnerProdWt2(cds[i], avecds, vlen, wts); - - printf("\nbsum = %12.6e %12.6e % 12.6e", bsum, nkd, bsum - nkd); - - double phisum = 0.0; - for (i = 0; i < cnum; ++i) - phisum += TrCdsInnerProdWt(cds[i], vlen, wts); - - double gammasum = 0.0; - for (i = 0; i < cnum; ++i) - gammasum +=TrCdsInnerProdWt2(cds[i], avecds, vlen, wts); - - printf("\nphisum, gammasum: % 12.6e % 12.6e % 12.6e", phisum, gammasum, 3.0*vlen); - - scaleprod = exp(scaleprod / (double) cnum); - -/* CalcRotations(cdsA); */ -/* for (i = 0; i < cnum; ++i) */ -/* RotateCdsIp(cds[i], (const double **) cds[i]->matrix); */ -/* AveCds(cdsA); */ - CalcCovariances(cdsA); - //ConstrainCovMat(cdsA); - CalcWts(cdsA); +// printf("%5d\n", algo->rounds); } else { - gamma = phi = 1.0; for (i = 0; i < cnum; ++i) cds[i]->scale = 1.0; - scaleprod = 1.0; - } - -/* bsum = 0.0; */ -/* for (i = 0; i < cnum; ++i) */ -/* bsum += cdsA->cds[i]->scale; */ -/* */ -/* for (i = 0; i < cnum; ++i) */ -/* printf("\nscale[%3d]: %12.6f", i+1, 15.5 * 30.0 * cdsA->cds[i]->scale / bsum); */ - -/* for (i = 0; i < cnum; ++i) */ -/* cds[i]->scale /= scaleprod; */ - - return(scaleprod); -} - - -static void -evallognormal(const double beta, const double phi, const double gamma, const double mu, const int vlen, const double lambda, double *fx, double *dfx) -{ - *fx = phi * beta * beta - gamma * beta + log(beta)/mu - 3.0 * vlen - lambda; - *dfx = 2.0 * beta * phi - gamma + 1.0 / (mu * beta); -} - - -static double -NewtRaphScaleLogNorm(const double init, const double phi, const double gamma, const double mu, const int vlen, const double lambda, const double tol) -{ - int i; - double beta, fx, dfx; - - /* Use Newton-Raphson to find ML estimate of lognormally distributed - scale factors. - - must find root of: - - F1 = = 0 - - where the first derivative with repect to the lognormal scale - estimate x (dF1/dx) is: - - F1' = - */ - beta = init; - for (i = 0; i < 200; ++i) - { - evallognormal(beta, phi, gamma, mu, vlen, lambda, &fx, &dfx); - - if (fabs(fx) < tol) - break; /* success */ - - beta -= (fx / dfx); /* Newton-Raphson correction */ - } - - if (i == 200) - beta = init; - - printf("\n init, beta: %10.5f %10.5f", init, beta); - return(beta); -} - - -static double -CalcMu(CdsArray *cdsA) -{ - int i; - const int cnum = cdsA->cnum; - double logb, logbsum; - - logbsum = 0.0; - for (i = 0; i < cnum; ++i) - { - logb = log(cdsA->cds[i]->scale); - logbsum += logb*logb; } - printf("\n logbsum: %10.5f", logbsum); - - return(0.5 * (sqrt(1.0 + 4.0 * logbsum / cnum) - 1.0)); -} - - -static double -CalcTheta(CdsArray *cdsA) -{ - int i; - const int cnum = cdsA->cnum; - double logb, logbsum; - logbsum = 0.0; + double aveb = 0.0; for (i = 0; i < cnum; ++i) - { - logb = log(cdsA->cds[i]->scale); - logbsum += logb*logb; - } - - return(logbsum / cnum); -} - - -double -CalcScaleFactorsMLLogNorm(CdsArray *cdsA) -{ - Cds **cds = cdsA->cds; - Cds *cdsi = NULL; - Cds *avecds = cdsA->avecds; - double *wts = cdsA->w; - const int cnum = cdsA->cnum, vlen = cdsA->vlen; - double scaleprod, init, mu, theta, lambda, phi, gamma, sigma2, oldscale, factor; - int i; - double tol = cdsA->algo->precision; - //double *variance = cdsA->var; - - scaleprod = 0.0; - if (cdsA->algo->leastsquares == 1) - { - lambda = 0.0; - for (i = 0; i < cnum; ++i) - lambda += TrCdsInnerProd(cds[i], vlen) - TrCdsInnerProd2(cds[i], avecds, vlen); - - lambda = (lambda - 3.0 * vlen * cnum) / cnum; - - sigma2 = 0.0; - for (i = 0; i < vlen; ++i) - sigma2 += cdsA->var[i]; - sigma2 /= (double) vlen; - - printf("\nsigma2 = %12.6e \n", sigma2); - - for (i = 0; i < cnum; ++i) - { -/* lambda = 0.0; */ -/* for (j = 0; j < cnum; ++j) */ -/* lambda += TrCdsInnerProd(cds[j], vlen) - TrCdsInnerProd2(cds[j], avecds, vlen); */ -/* */ -/* lambda = (lambda - 3.0 * vlen * cnum + cnum) / cnum; */ -/* printf("\nlambda = %12.6e \n", lambda); */ - lambda = 0.0; - - cdsi = cdsA->cds[i]; - oldscale = cdsi->scale; - phi = TrCdsInnerProd(cdsi, vlen) / (sigma2 * oldscale * oldscale); - gamma = TrCdsInnerProd2(cdsi, avecds, vlen) / (sigma2 * oldscale); - init = gamma / phi; - mu = CalcMu(cdsA); - theta = CalcTheta(cdsA); - printf("\nmu = %12.6e \n", mu); - printf("\ntheta = %12.6e \n", theta); - cdsi->scale = NewtRaphScaleLogNorm(init, phi, gamma, mu, vlen, lambda, tol); - //cdsi->scale = (sqrt(gamma*gamma + 12.0 * (double) vlen * sigma2 * phi) + gamma) / (2.0 * phi); - scaleprod += log(cdsi->scale); - factor = cdsi->scale / oldscale; - ScaleCds(cdsi, factor); - printf("\nfactor[%3d] = %12.6e -- scale = %12.6e", i+1, factor, cdsi->scale); - } - - scaleprod = exp(scaleprod / (double) cnum); - } - else if (cdsA->algo->varweight == 1) - { -/* int j; */ -/* lambda = 0.0; */ -/* for (j = 0; j < cnum; ++j) */ -/* lambda += TrCdsInnerProdWt(cds[j], vlen, wts) - TrCdsInnerProdWt2(cds[j], avecds, vlen, wts); */ -/* */ -/* lambda = (lambda - 3.0 * vlen * cnum) / cnum; */ -/* printf("\nlambda = %12.6e \n", lambda); */ - - lambda = 0.0; - - for (i = 0; i < vlen; ++i) - wts[i] = 1.0 / cdsA->var[i]; - - for (i = 0; i < cnum; ++i) - { - cdsi = cdsA->cds[i]; - oldscale = cdsi->scale; - phi = TrCdsInnerProdWt(cdsi, vlen, wts) / (oldscale * oldscale); - gamma = TrCdsInnerProdWt2(cdsi, avecds, vlen, wts) / oldscale; - - if (cdsA->algo->rounds > 8) - init = cdsi->scale; - else - init = gamma / phi; - - mu = CalcMu(cdsA); - theta = CalcTheta(cdsA); - printf("\nmu = %12.6e ", mu); - printf("\ntheta = %12.6e ", theta); - cdsi->scale = NewtRaphScaleLogNorm(init, phi, gamma, mu, vlen, lambda, tol); - //cdsi->scale = (sqrt(gamma*gamma + 12.0 * (double) vlen * phi) + gamma) / (2.0 * phi); - scaleprod += log(cdsi->scale); - factor = cdsi->scale / oldscale; - ScaleCds(cdsi, factor); - printf("\nfactor[%3d] = %12.6e -- scale = %12.6e", i+1, factor, cdsi->scale); - } - } - else - { - phi = gamma = 1.0; - for (i = 0; i < cnum; ++i) - cds[i]->scale = 1.0; - scaleprod = 1.0; - } + aveb += cdsA->cds[i]->scale; + scalesum = aveb; + aveb /= cnum; - double bsum = 0.0; - for (i = 0; i < cnum; ++i) - bsum += cdsA->cds[i]->scale; +// printf("scalesum: % 12.6e % 12.6e\n", scalesum, aveb); for (i = 0; i < cnum; ++i) - printf("\nscale[%3d]: %12.6f", i+1, 15.5 * 30.0 * cdsA->cds[i]->scale / bsum); - - return(scaleprod); -} - + cdsA->cds[i]->scale /= aveb; -/* */ -double -CalcScaleFactorsMLLog(CdsArray *cdsA) -{ - Cds **cds = cdsA->cds; - Cds *avecds = cdsA->avecds; - const double *wts = (const double *) cdsA->w; - int i, cnum = cdsA->cnum, vlen = cdsA->vlen; - double scaleprod, selfprod, innprod, sigma2, theta; - - theta = 0.0; for (i = 0; i < cnum; ++i) - theta += cds[i]->scale * cds[i]->scale; - theta /= (double) cnum; - - CalcCovariances(cdsA); - CalcWts(cdsA); - - sigma2 = 0.0; - for (i = 0; i < vlen; ++i) - sigma2 += cdsA->var[i]; - sigma2 /= (double) vlen; - - printf("\nsigma2 = %12.6e \n", sigma2); - - scaleprod = 0.0; - if (cdsA->algo->leastsquares == 1) - { - for (i = 0; i < cnum; ++i) - { - innprod = TrCdsInnerProd2(cds[i], avecds, vlen); - selfprod = TrCdsInnerProd(cds[i], vlen); - cds[i]->scale = - (sqrt(innprod*innprod + 4.0 * (3.0 * (double) vlen - 1.0 - log(cds[i]->scale)/theta) * sigma2 * selfprod) + innprod) / (2.0 * selfprod); - scaleprod += log(cds[i]->scale); - } - - scaleprod = exp(scaleprod / (double) cnum); - } - else if (cdsA->algo->varweight == 1) - { - for (i = 0; i < cnum; ++i) - { - innprod = TrCdsInnerProdWt2(cds[i], avecds, vlen, wts); - selfprod = TrCdsInnerProdWt(cds[i], vlen, wts); - cds[i]->scale = (sqrt(innprod*innprod + 4.0 * (3.0 * (double) vlen - 1.0 - log(cds[i]->scale)/theta) * selfprod) + innprod) / (2.0 * selfprod); - scaleprod += log(cds[i]->scale); - } - - scaleprod = exp(scaleprod / (double) cnum); - } - else - { - innprod = selfprod = 1.0; - for (i = 0; i < cnum; ++i) - cds[i]->scale = 1.0; - scaleprod = 1.0; - } - - return(scaleprod); -} - - -/* This is the constrained ML solution, without the scale factor Jacobian in the PDF */ -double -CalcScaleFactorsMLConstr(CdsArray *cdsA) -{ - Cds **cds = cdsA->cds; - Cds *avecds = cdsA->avecds; - const double *wts = (const double *) cdsA->w; - int i, cnum = cdsA->cnum, vlen = cdsA->vlen; - double scaleprod, selfprod, innprod, sigma2, trsig; - double *varu = malloc(vlen * sizeof(double)); - - CalcCovariances(cdsA); - - memcpy(varu, cdsA->var, vlen); - - CalcWts(cdsA); - - sigma2 = 0.0; - for (i = 0; i < vlen; ++i) - sigma2 += cdsA->var[i]; - sigma2 /= (double) vlen; - - printf("\nsigma2 = %12.6e \n", sigma2); - - scaleprod = 0.0; - - if (cdsA->algo->leastsquares == 1) - { - for (i = 0; i < cnum; ++i) - { - innprod = TrCdsInnerProd2(cds[i], avecds, vlen); - selfprod = TrCdsInnerProd(cds[i], vlen); - cds[i]->scale = (sqrt(innprod*innprod + 12.0 * (double) vlen * sigma2 * selfprod) + innprod) / (2.0 * selfprod); - scaleprod += log(cds[i]->scale); - } - } - else if (cdsA->algo->varweight == 1) - { - trsig = 0.0; - for (i = 0; i < vlen; ++i) - trsig += varu[i]/cdsA->var[i]; - - printf("\ntrsig = %12.6e \n", trsig); - - for (i = 0; i < cnum; ++i) - { - innprod = TrCdsInnerProdWt2(cds[i], avecds, vlen, wts); - selfprod = TrCdsInnerProdWt(cds[i], vlen, wts); - cds[i]->scale = (sqrt(innprod*innprod + 12.0 * trsig * selfprod) + innprod) / (2.0 * selfprod); - scaleprod += log(cds[i]->scale); - } - } - else - { - innprod = selfprod = 1.0; - cds[i]->scale = 1.0; - scaleprod += log(cds[i]->scale); - } - - free(varu); - - return(exp(scaleprod / (double) cnum)); -} - + ScaleCds(cdsi, cdsA->cds[i]->scale); -/* This is the constrained ML solution, with (or without) the scale factor Jacobian in the PDF */ -double -CalcScaleFactorsMLGoodall(CdsArray *cdsA) -{ - Cds **cds = cdsA->cds; - Cds *avecds = cdsA->avecds; - const double *wts = (const double *) cdsA->w; - int i, cnum = cdsA->cnum, vlen = cdsA->vlen; - double scaleprod, selfprod, innprod, sigma2; - - CalcCovariances(cdsA); - CalcWts(cdsA); - - sigma2 = 0.0; - for (i = 0; i < vlen; ++i) - sigma2 += cdsA->var[i]; - sigma2 /= (double) vlen; - - printf("\nsigma2 = %12.6e \n", sigma2); - - scaleprod = 0.0; - for (i = 0; i < cnum; ++i) - { - if (cdsA->algo->leastsquares == 1) - { - innprod = TrCdsInnerProd2(cds[i], avecds, vlen); - selfprod = TrCdsInnerProd(cds[i], vlen); - } - else if (cdsA->algo->varweight == 1) - { - innprod = TrCdsInnerProdWt2(cds[i], avecds, vlen, wts); - selfprod = TrCdsInnerProdWt(cds[i], vlen, wts); - } - else - { - innprod = selfprod = 1.0; - } - - cds[i]->scale = (sqrt(innprod*innprod + 12.0 * (double) vlen * sigma2 * selfprod) + innprod) / (2.0 * selfprod); - scaleprod += log(cds[i]->scale); - } - - return(exp(scaleprod / (double) cnum)); -} - - -/* constrained LS, so that \Prod_i^N scale_i = 1 */ -double -CalcScaleFactorsML2(CdsArray *cdsA) -{ - Cds **cds = cdsA->cds; - Cds *avecds = cdsA->avecds; - const double *wts = (const double *) cdsA->w; - int i, cnum = cdsA->cnum, vlen = cdsA->vlen; - double scaleprod, selfprod, innprod; - - - scaleprod = 1.0; - for (i = 0; i < cnum; ++i) - { - if (cdsA->algo->leastsquares == 1) - { - innprod = TrCdsInnerProd2(cds[i], avecds, vlen); - selfprod = TrCdsInnerProd(cds[i], vlen); - } - else if (cdsA->algo->varweight == 1) - { - innprod = TrCdsInnerProdWt2(cds[i], avecds, vlen, wts); - selfprod = TrCdsInnerProdWt(cds[i], vlen, wts); - } - else - { - innprod = selfprod = 1.0; - } - - cds[i]->scale = innprod / selfprod; - scaleprod *= cds[i]->scale; - } - - scaleprod = powf(scaleprod, 1.0 / (double) cnum); - - for (i = 0; i < cnum; ++i) - cds[i]->scale /= scaleprod; - - return(scaleprod); -} - - -void -ScaleCdsArray(CdsArray *cdsA) -{ - Cds **cds = cdsA->cds; - int i; + /* This is to verify that our implicit constraint is actually in effect. */ +// double bsum, ftsum, gtsum; +// double nkd = 3.0 * cnum * vlen; +// bsum = 0.0; +// for (i = 0; i < cnum; ++i) +// bsum += TrCdsInnerProdWt(cds[i], vlen, wts) - TrCdsInnerProdWt2(cds[i], avecds, vlen, wts); +// +// printf("bsum = % 12.6e % 12.6e % 12.6e % 12.6e\n", bsum, nkd, cnum*lagrangian, bsum - nkd - cnum*lagrangian); +// +// ftsum = 0.0; +// for (i = 0; i < cnum; ++i) +// ftsum += TrCdsInnerProdWt(cds[i], vlen, wts); +// +// gtsum = 0.0; +// for (i = 0; i < cnum; ++i) +// gtsum += TrCdsInnerProdWt2(cds[i], avecds, vlen, wts); +// +// printf("ftsum, gtsum: % 12.6e % 12.6e % 12.6e\n", ftsum, gtsum, 3.0*vlen); - for (i = 0; i < cdsA->cnum; ++i) - { - ScaleCds(cds[i], cds[i]->scale); - printf("Scale[%3d]: %12.6f\n", i, cds[i]->scale); - } + return(scalesum); } static void -*CalcRot_pth(void *rotdata_ptr) -{ - int i; - double deviation = 0.0; - RotData *rotdata = (RotData *) rotdata_ptr; - Cds *cds; - - for (i = rotdata->start; i < rotdata->end; ++i) - { - cds = rotdata->cds[i]; - /* note that the avecds are already multiplied by the weight matrices */ -// deviation = CalcRMSDRotationalMatrix(cds, rotdata->tcds, cds->vlen, &cds->matrix[0][0], NULL); - - /* rotate the scratch cds with new rotation matrix */ - RotateCdsIp(cds, (const double **) cds->matrix); - - /* find global rmsd and average cds (both held in structure) */ - cds->wRMSD_from_mean = sqrt(deviation / (3 * rotdata->vlen)); - } - - pthread_exit((void *) 0); -} - - -static double -CalcRotations_pth(CdsArray *cdsA, RotData **rotdata, pthread_t *callThd, - pthread_attr_t *attr, const int thrdnum) +InvGammaAdjustEvals(double *newevals, const int vlen, const int cnum, + double *evals, const double b, const double c) { - Cds **cds = cdsA->cds; - const Cds *avecds = cdsA->avecds; - const double *wts = (const double *) cdsA->w; - Cds *tcds = cdsA->tcds; - double deviation_sum = 0.0; - int i, rc = 0, incr; - - if (cdsA->algo->covweight == 1) - { - MatMultCdsMultMatDiag(tcds, - (const double **) cdsA->WtMat, - avecds); - } - else if (cdsA->algo->varweight == 1 || cdsA->algo->leastsquares == 1) - { - MatDiagMultCdsMultMatDiag(tcds, wts, avecds); - } - - incr = cdsA->cnum / thrdnum; - - for (i = 0; i < thrdnum - 1; ++i) - { - rotdata[i]->cds = cds; - rotdata[i]->tcds = tcds; - rotdata[i]->start = i * incr; - rotdata[i]->end = i*incr + incr; - rotdata[i]->vlen = cdsA->vlen; - - rc = pthread_create(&callThd[i], attr, CalcRot_pth, (void *) rotdata[i]); - - if (rc) - { - printf("ERROR811: return code from pthread_create() %d is %d\n", i, rc); - exit(EXIT_FAILURE); - } - } - - rotdata[thrdnum - 1]->cds = cds; - rotdata[thrdnum - 1]->tcds = tcds; - rotdata[thrdnum - 1]->start = (thrdnum - 1) * incr; - rotdata[thrdnum - 1]->end = cdsA->cnum; - rotdata[thrdnum - 1]->vlen = cdsA->vlen; - - rc = pthread_create(&callThd[thrdnum - 1], attr, CalcRot_pth, (void *) rotdata[thrdnum - 1]); - - if (rc) - { - printf("ERROR811: return code from pthread_create() %d is %d\n", i, rc); - exit(EXIT_FAILURE); - } - - for (i = 0; i < thrdnum; ++i) - { - rc = pthread_join(callThd[i], (void **) NULL); - - if (rc) - { - printf("ERROR812: return code from pthread_join() %d is %d\n", i, rc); - exit(EXIT_FAILURE); - } - } - - for (i = 0; i < cdsA->cnum; ++i) - deviation_sum += 3 * cdsA->vlen * cds[i]->wRMSD_from_mean * cds[i]->wRMSD_from_mean; + int i; - return(deviation_sum); + for (i = 0; i < vlen; ++i) + newevals[i] = (3.0*cnum*evals[i] + 2.0*b) / (3.0*cnum + 2.0*c); + // this is required for an EM algorithm of the variances + //printf("%3d %26.6f\n", i, var[i]); } void HierarchVars(CdsArray *cdsA) { - int i; - double mean, mu, lambda, zeta, sigma; - - switch(cdsA->algo->hierarch) + switch(algo->hierarch) { case 0: break; /* Assuming a known shape param c, real ML-EM fit */ case 1: - if (cdsA->algo->rounds > 4) - InvGammaEMFixedCFitEvals(cdsA, 0.5, 1); + if (algo->rounds > 4) + InvGammaFitEvalsEMFixedC(cdsA, 0.5, 1); else - InvGammaEMFixedCFitEvals(cdsA, 0.5, 0); + InvGammaFitEvalsEMFixedC(cdsA, 0.5, 0); break; /* real ML-EM fit, fitting unknown b and c inverse gamma params (scale and shape, resp.) */ case 2: - if (cdsA->algo->rounds > 4) - InvGammaMLFitEvals(cdsA, 1); + if (algo->rounds > 4) + InvGammaFitEvalsML(cdsA, 1); else - InvGammaMLFitEvals(cdsA, 0); + InvGammaFitEvalsML(cdsA, 0); break; case 3: - InvGammaFitEvalsBfact(cdsA, 1); - break; - - case 4: /* This is the old approximate method, used in versions 1.0-1.1 */ /* inverse gamma fit of variances, excluding the smallest 3 */ /* This accounts for the fact that the smallest three eigenvalues of the covariance matrix are always zero, i.e. the covariance matrix is necessarily of rank vlen - 3 (or usually less, with inadequate amounts of data 3N-6). */ - if (cdsA->algo->rounds > 4) + if (algo->rounds > 4) InvGammaFitEvals(cdsA, 1); else InvGammaFitEvals(cdsA, 0); break; - case 5: /* inverse gamma fit of variances, excluding the smallest 3 */ - /* This accounts for the fact that the smallest three eigenvalues of the covariance - matrix are always zero, i.e. the covariance matrix is necessarily of rank - vlen - 3 (or usually less, with inadequate amounts of data 3N-6). - __Bayesian Bernardo reference prior on the scale and shape params__. */ - if (cdsA->algo->rounds > 4) - InvGammaBayesFitEvals(cdsA, 1); - else - InvGammaBayesFitEvals(cdsA, 0); - break; - - case 6: - if (cdsA->algo->rounds > 4) - InvGammaEMFixedC(cdsA, 0.5, 1); - else - InvGammaEMFixedC(cdsA, 0.5, 0); -/* if (cdsA->algo->rounds > 4) */ -/* InvGammaBayesFitEvals3(cdsA, 1); */ -/* else */ -/* InvGammaBayesFitEvals3(cdsA, 0); */ - break; - -// case 7: -// /* InvGammaFitVars_minc(cdsA, 1.0, 1); */ -// if (cdsA->algo->rounds > 4) -// InvGammaFitEvalsEq(cdsA, 1); -// else -// InvGammaFitEvalsEq(cdsA, 0); -// -// if (cdsA->algo->verbose != 0) -// printf(" HierarchVars() chi2:%f\n", cdsA->stats->hierarch_chi2); -// break; - - case 7: - if (cdsA->algo->rounds > 4) - InvGammaMLFixedCFitEvals(cdsA, cdsA->algo->minc, 1); - else - InvGammaMLFixedCFitEvals(cdsA, cdsA->algo->minc, 0); - break; - - case 8: - if (cdsA->algo->rounds > 4) - InvGammaMLFixedCFitEvals(cdsA, 0.5, 1); - else - InvGammaMLFixedCFitEvals(cdsA, 0.5, 0); - break; - -/* case 8: */ -/* // InvGammaMMFitVars(cdsA, &b, &c); */ -/* if (cdsA->algo->rounds > 4) */ -/* InvGammaFitVars_minc(cdsA, cdsA->algo->minc, 1); */ -/* else */ -/* InvGammaFitVars_minc(cdsA, cdsA->algo->minc, 0); */ -/* break; */ - - case 9: - if (cdsA->algo->rounds > 4) - InvGammaFitVars_fixed_c(cdsA, cdsA->algo->minc, 1); - else - InvGammaFitVars_fixed_c(cdsA, cdsA->algo->minc, 0); - break; - - case 10: - InvGammaFitVars_minc(cdsA, cdsA->algo->minc, 0); - break; - - case 11: - if (cdsA->algo->rounds > 4) - InvGammaFitModeEvals(cdsA, 1); + case 4: + if (algo->rounds < 15) + InvGammaFitEvalsEMFixedC(cdsA, 0.5, 1); else - InvGammaFitModeEvals(cdsA, 0); + InvGammaFitMarginalGSLBrent(cdsA); + //InvGammaMarginalFitEvals(cdsA); break; - case 12: /* inverse gamma fit of variances, excluding the smallest 3 */ - /* This accounts for the fact that the smallest three eigenvalues of the covariance - matrix are always zero, i.e. the covariance matrix is necessarily of rank - vlen - 3 (or usually less, with inadequate amounts of data 3N-6). - No iterations */ - InvGammaFitEvals(cdsA, 0); + case 5: + InvGammaFitMarginalGSLBrent(cdsA); break; - case 13: /* Bayesian diagonal Wishart prior (proportional to the identity mat) on the - variances/covmat, assuming improper reference prior on the precision - hyperparameter */ - WishartFitVar(cdsA, 1); - break; + case 6: + { + const int vlen = cdsA->vlen, cnum = cdsA->cnum; + double phi; + const double nu = algo->covnu; + double *evals = cdsA->evals; + double *invevals = cdsA->tmpvecK; + double **evecs = cdsA->tmpmatKK2; + double *tmpevals = cdsA->samplevar3N; + int i, j; - case 14: - WishartFitVar2(cdsA, 1); -// if (cdsA->algo->rounds > 4) -// InvGammaBayesFitVars_fixed_c(cdsA, cdsA->algo->minc, 1); -// else -// InvGammaBayesFitVars_fixed_c(cdsA, cdsA->algo->minc, 0); -// -// if (cdsA->algo->verbose != 0) -// printf(" HierarchVars() chi2:%f\n", cdsA->stats->hierarch_chi2); - break; + /* evals are small to large */ + //eigensym((const double **) cdsA->CovMat, tmpevals, evecs, vlen); + EigenGSL((const double **) cdsA->CovMat, vlen, tmpevals, evecs, 0); - case 15: - if (cdsA->algo->rounds >= 10) - WishartAdjustVar(cdsA->var, cdsA->var, cdsA->vlen, cdsA->cnum, cdsA->stats->lsvar); - break; + //VecPrint(tmpevals, vlen); - case 16: - if (cdsA->algo->rounds >= 10) - ConjBayesAdjustVar(cdsA->var, cdsA->var, cdsA->vlen, cdsA->cnum, cdsA->stats->lsvar); - break; + stats->hierarch_p1 = algo->minc; + phi = 2.0 * algo->minc; - case 17: /* inverse gamma fit of variances, excluding the smallest 3 */ - /* This accounts for the fact that the smallest three eigenvalues of the covariance - matrix are always zero, i.e. the covariance matrix is necessarily of rank - vlen - 3 (or usually less, with inadequate amounts of data 3N-6). */ - if (cdsA->algo->rounds > 4) - InvGammaFitEvalsNoN(cdsA, 1); - else - InvGammaFitEvalsNoN(cdsA, 0); - break; + InvGammaAdjustEvals(evals, vlen, cnum, tmpevals, phi, nu); + EigenReconSym(cdsA->CovMat, (const double **) evecs, evals, vlen); - case 18: - WishartAdjustVar(cdsA->var, cdsA->var, cdsA->vlen, cdsA->cnum, cdsA->algo->param[0]); - break; - - case 19: - WishartFitVar2(cdsA, 1); - break; - -/* for (i = 0; i < cdsA->vlen; ++i) */ -/* cdsA->var[i] = cdsA->CovMat[i][i]; */ -/* cdsA->algo->covweight = 0; */ -/* cdsA->algo->varweight = 1; */ -/* InvGammaFitVars(cdsA, 1); */ -/* cdsA->algo->covweight = 1; */ -/* cdsA->algo->varweight = 0; */ -/* CovMat2CorMat(cdsA->CovMat, cdsA->vlen); */ -/* CorMat2CovMat(cdsA->CovMat, (const double *) cdsA->var, cdsA->vlen); */ -/* break; */ - - case 20: /* ML fit of variances to a reciprocal inverse gaussian dist */ - RecipInvGaussFitVars(cdsA, &mu, &lambda); - RecipInvGaussAdjustVars(cdsA, mu, lambda); - break; - - case 21: /* ML fit of variances to a lognorml distribution */ - LognormalFitVars(cdsA, &zeta, &sigma); - LognormalAdjustVars(cdsA, zeta, sigma); - break; + for (i = 0; i < vlen; ++i) + invevals[i] = 1.0 / evals[i]; - case 22: - InvgaussFitVars(cdsA, &mean, &lambda); - InvgaussAdjustVars(cdsA, zeta, sigma); - break; + EigenReconSym(cdsA->WtMat, (const double **) evecs, invevals, vlen); - case 30: /* inv gamma fit to eigenvalues of covariance mat, but only weighting by variances */ - cdsA->algo->covweight = 1; - cdsA->algo->varweight = 0; - if (cdsA->algo->alignment == 1) - CalcCovMatOcc(cdsA); - else - CalcCovMat(cdsA); - InvGammaFitEvals(cdsA, 1); - cdsA->algo->covweight = 0; - cdsA->algo->varweight = 1; - for (i = 0; i < cdsA->vlen; ++i) - cdsA->var[i] = cdsA->CovMat[i][i]; - break; + if (algo->rounds < 3) + { + for (i = 0; i < vlen; ++i) + for (j = 0; j < i; ++j) + cdsA->WtMat[i][j] = cdsA->WtMat[j][i] = 0.0; + } - case 31: /* inv gamma fit to eigenvalues of covariance mat, but only weighting by variances */ - cdsA->algo->covweight = 1; - cdsA->algo->varweight = 0; - if (cdsA->algo->alignment == 1) - CalcCovMatOcc(cdsA); - else - CalcCovMat(cdsA); - InvGammaFitVars(cdsA, 0); /* no iterations */ - cdsA->algo->covweight = 0; - cdsA->algo->varweight = 1; - for (i = 0; i < cdsA->vlen; ++i) - cdsA->var[i] = cdsA->CovMat[i][i]; + for (i = 0; i < vlen; ++i) + cdsA->var[i] = cdsA->CovMat[i][i]; + + //chi2 = chi_sqr_adapt(evals, vlen, 0, &logL, 0.5*phi, 0.5*nu, + // invgamma_pdf, invgamma_lnpdf, invgamma_int); + } break; default: - printf("\n ERROR: Bad -g option \"%d\" \n", cdsA->algo->hierarch); + printf("\n ERROR: Bad -g option \"%d\" \n", algo->hierarch); Usage(0); exit(EXIT_FAILURE); break; - - if (cdsA->algo->verbose != 0) - printf(" HierarchVars() chi2:%f\n", cdsA->stats->hierarch_chi2); } - if (cdsA->algo->lele5 == 1 && cdsA->algo->covweight != 0) - { - /* Correct Lele's 5-landmark testset covariance matrix (only two off-diags are non-zero) */ - cdsA->CovMat[0][1] = 0.0; - cdsA->CovMat[0][2] = 0.0; - cdsA->CovMat[0][3] = 0.0; - cdsA->CovMat[0][4] = 0.0; - - cdsA->CovMat[1][0] = 0.0; - cdsA->CovMat[1][2] = 0.0; - cdsA->CovMat[1][4] = 0.0; - - cdsA->CovMat[2][0] = 0.0; - cdsA->CovMat[2][1] = 0.0; - cdsA->CovMat[2][3] = 0.0; - cdsA->CovMat[2][4] = 0.0; - - cdsA->CovMat[3][0] = 0.0; - cdsA->CovMat[3][2] = 0.0; - cdsA->CovMat[3][4] = 0.0; - - cdsA->CovMat[4][0] = 0.0; - cdsA->CovMat[4][1] = 0.0; - cdsA->CovMat[4][2] = 0.0; - cdsA->CovMat[4][3] = 0.0; - } + if (algo->verbose) + printf(" HierarchVars() chi2:%f\n", stats->hierarch_chi2); } int CheckConvergenceInner(CdsArray *cdsA, const double precision) { - Algorithm *algo = cdsA->algo; + int i; - if (algo->abort == 1) + if (algo->abort) return(1); for (i = 0; i < cdsA->cnum; ++i) { - if (TestIdentMat((const double **) cdsA->cds[i]->matrix, 3, precision) == 0) - /* if (Mat3FrobEq((const double **) cdsA->cds[i]->last_matrix, (const double **) cdsA->cds[i]->matrix, precision) == 0) */ + if (Mat3FrobEq((const double **) cdsA->cds[i]->last_matrix, + (const double **) cdsA->cds[i]->matrix, precision) == 0) return(0); } @@ -1660,338 +697,189 @@ int CheckConvergenceOuter(CdsArray *cdsA, int round, const double precision) { - Algorithm *algo = cdsA->algo; int i; if (round >= algo->iterations) + { return(1); - - if (algo->abort == 1) + } + else if (algo->abort) + { return(1); - -/* else if (algo->alignment == 1 && round < 10) */ -/* return(0); */ + } else if (round > 6) { -/* if (Mat3FrobEq((const double **) cdsA->cds[0]->matrix, (const double **) cdsA->cds[0]->last_matrix, algo->precision) == 0) */ -/* return(1); */ -/* else */ -/* return(0); */ - - cdsA->stats->precision = 0.0; + stats->precision = 0.0; for (i = 0; i < cdsA->cnum; ++i) - cdsA->stats->precision += FrobDiffNormIdentMat((const double **) cdsA->cds[i]->matrix, 3); - cdsA->stats->precision /= cdsA->cnum; + { + stats->precision += Mat3FrobDiff((const double **) cdsA->cds[0]->matrix, + (const double **) cdsA->cds[0]->last_matrix); + } - if (cdsA->stats->precision > precision) + stats->precision /= cdsA->cnum; + + if (stats->precision > precision) return(0); else return(1); } else + { return(0); + } } -double -SuperPoseArray2Orig(CdsArray *cdsA, CdsArray *targetA, double *sumdev) +void +InitializeStates(CdsArray *cdsA) { - int i, j; - const int vlen = cdsA->vlen; + int i; + int slxn; /* index of random coords to select as first */ const int cnum = cdsA->cnum; - double *trans = malloc(3 * sizeof(double)); - double norm1, norm2, innprod, fpe; - Cds **cds = cdsA->cds; - PDBCdsArray *pdbA; - - /* Superimpose originals on ML superimposed family, just to clean up - any floating point problems due to extensive iteration, - and to get the proper rotations & translations to apply to the original - PDB cds. */ - pdbA = PDBCdsArrayInit(); - PDBCdsArrayAlloc(pdbA, cnum, vlen); - - for (i = 0; i < cnum; ++i) - CopyCds2PDB(pdbA->cds[i], cds[i]); + const int vlen = cdsA->vlen; + Cds **cds = cdsA->cds; + Cds *avecds = cdsA->avecds; - if (cdsA->algo->verbose == 1) - { - char *ca_name = mystrcat(cdsA->algo->rootname, "_MultiPose_CA.pdb"); - WriteModelFile(pdbA, ca_name); - free(ca_name); - } + const gsl_rng_type *T = gsl_rng_ranlxs2; + gsl_rng *r2 = gsl_rng_alloc(T); - *sumdev = fpe = 0.0; for (i = 0; i < cnum; ++i) { - *sumdev += fabs(SuperPose(targetA->cds[i], cds[i], cds[i]->matrix, trans, - &norm1, &norm2, &innprod)); - - fpe += fabs(norm1 - innprod)/innprod; - - for (j = 0; j < 3; ++j) - cds[i]->center[j] = cds[i]->translation[j] = - targetA->cds[i]->center[j] = targetA->cds[i]->translation[j] = - -trans[j]; - -/* printf("\n new trans: %f %f %f", */ -/* cds[i]->center[0], cds[i]->center[1], cds[i]->center[2]); */ - } - - free(trans); - PDBCdsArrayDestroy(&pdbA); - - *sumdev = sqrt(*sumdev / (cnum * vlen)); - - return(fpe / cnum); -} - - -double -CalcInnProd(const Cds *cds1, const Cds *cds2) -{ - int i; - double innprod, tmpx, tmpy, tmpz; - - innprod = 0.0; - for (i = 0; i < cds1->vlen; ++i) - { - tmpx = cds1->x[i] * cds2->x[i]; - /* printf("\n% 8.3f %8.3f", cds1->x[i], cds2->x[i]); */ - tmpy = cds1->y[i] * cds2->y[i]; - tmpz = cds1->z[i] * cds2->z[i]; - innprod += (tmpx*tmpx + tmpy*tmpy + tmpz*tmpz); + MatCpyGen(cds[i]->sc, (const double **) cds[i]->wc, 3, vlen); + memcpy(cds[i]->so, cds[i]->o, vlen * sizeof(double)); + memcpy(cds[i]->sb, cds[i]->b, vlen * sizeof(double)); } - return(innprod / cds1->vlen); -} - - -static void -WriteInstModelFile(char *fext, CdsArray *cdsA) -{ - int i; - PDBCdsArray *mpA; - mpA = PDBCdsArrayInit(); - char *fext_name = NULL; - - PDBCdsArrayAlloc(mpA, cdsA->cnum, cdsA->vlen); - - for (i = 0; i < mpA->cnum; ++i) - CopyCds2PDB(mpA->cds[i], cdsA->cds[i]); - - fext_name = mystrcat(cdsA->algo->rootname, fext); - WriteTheseusModelFileNoStats(mpA, cdsA->algo, fext_name); - - free(fext_name); - PDBCdsArrayDestroy(&mpA); -} - - -/* The real thing */ -int -MultiPose(CdsArray *baseA) -{ - /* FILE *fp; */ - int i, round, innerround; - int slxn; /* index of random coord to select as first */ - double frobnorm, sumdev, percent, lastpercent, logL, lastlogL, lastscale; - double deviation_sum = 0.0; - const int cnum = baseA->cnum; - const int vlen = baseA->vlen; - Algorithm *algo = NULL; - Statistics *stats = NULL; - Cds **cds = NULL; - Cds *avecds = NULL; - CdsArray *scratchA = NULL; /* working scratch array, also holds average cds, */ - /* rotation matrices, translation and center vectors */ - -#if 0 //defined(__APPLE__) - double starttime, endtime; - double init, setup = 0.0, innerloop, exitloop; - - starttime = seconds(); -#endif - - gsl_rng *r2 = NULL; - const gsl_rng_type *T = NULL; - T = gsl_rng_ranlxs2; - r2 = gsl_rng_alloc(T); - -/* for (i = 0; i < cnum; ++i) */ -/* for (int j = 0; j < vlen; ++j) */ -/* baseA->cds[i]->z[j] = 0.0; */ - - /* setup scratchA */ - scratchA = CdsArrayInit(); - CdsArrayAlloc(scratchA, cnum, vlen); - CdsArraySetup(scratchA); - - baseA->scratchA = scratchA; - - /* duplicate baseA -- copy to scratchA */ - CdsArrayCopy(scratchA, baseA); - - /* setup local aliases based on scratchA */ - algo = scratchA->algo; - stats = scratchA->stats; - cds = scratchA->cds; - avecds = scratchA->avecds; - - if (algo->covweight == 1) + if (algo->covweight) { - SetupCovWeighting(scratchA); /* DLT debug */ - SetupCovWeighting(baseA); /* DLT debug */ + SetupCovWeighting(cdsA); } - memsetd(scratchA->w, 1.0, vlen); - memsetd(baseA->w, 1.0, vlen); - - stats->hierarch_p1 = 1.0; - stats->hierarch_p2 = 1.0; + memsetd(cdsA->evals, 1.0, vlen); + memsetd(cdsA->w, 1.0, vlen); - //algo->constant = 0.001; + stats->hierarch_p1 = 1; + stats->hierarch_p2 = 0.5; -#if 0 //defined(__APPLE__) - endtime = seconds(); - init = (double) (endtime - starttime) / 0.001; - starttime = seconds(); -#endif + algo->covnu = vlen; /* Initialize the algorithm -- we need a centered mean structure as first guess */ - /* determine a structure to use as the initial mean structure */ - if (algo->embedave != 0 /*|| algo->alignment == 1*/) + if (algo->embedave) { printf(" Calculating distance matrix for embedding average ... \n"); fflush(NULL); CdsCopyAll(avecds, cds[0]); - DistMatsAlloc(scratchA); + DistMatsAlloc(cdsA); - if (algo->alignment == 1) - CalcMLDistMatOcc(scratchA); + if (algo->alignment) + CalcMLDistMatNu(cdsA); else - CalcMLDistMat(scratchA); + CalcMLDistMat(cdsA); printf(" Embedding average structure (ML) ... \n"); fflush(NULL); - EmbedAveCds(scratchA); + EmbedAveCds(cdsA); for (i = 0; i < vlen; ++i) avecds->resSeq[i] = i+1; - // DistMatsDestroy(scratchA); // DLT debug FIX + DistMatsDestroy(cdsA); printf(" Finished embedding \n"); fflush(NULL); - if (algo->write_file == 1) + if (algo->write_file) { char *embed_ave_name = mystrcat(algo->rootname, "_embed_ave.pdb"); - WriteAveCdsFile(scratchA, embed_ave_name); + WriteAveCdsFile(cdsA, embed_ave_name); free(embed_ave_name); } } else { - //slxn = (int) (genrand_real2() * cnum); slxn = gsl_rng_uniform_int(r2, cnum); - CdsCopyAll(avecds, baseA->cds[slxn]); + CdsCopyAll(avecds, cds[slxn]); } - if (algo->notrans == 0) + if (algo->dotrans) { CenMass(avecds); ApplyCenterIp(avecds); } - if (algo->seed == 1) + if (algo->seed) { - CalcStats(scratchA); - round = 10; + CalcStats(cdsA); } - if (algo->bfact > 0) - { - for (i = 0; i < cnum; ++i) - Bfacts2PrVars(scratchA, i); - } + CalcDf(cdsA); - //if (algo->alignment == 1) - CalcDf(scratchA); + gsl_rng_free(r2); + r2 = NULL; +} - if (algo->scale > 0) - { - //double fac, facsum = 0.0; - for (i = 0; i < cnum; ++i) - { - //fac = genrand_real2() * 100.0; - //facsum += fac; - //printf("\nfac[%3d]: % 12.6f", i+1, fac); - //ScaleCds(scratchA->cds[i], 1.0 / (i+1.0)); - //ScaleCds(scratchA->cds[i], 1.0 / fac); - //scratchA->cds[i]->scale = 1.0 / (i+1.0); - } - //printf("\nfacsum: %12.6f", facsum); - } + +/* The real thing */ +int +MultiPose(CdsArray *cdsA) +{ + int i, round, innerround, maybe; + double frobnorm, mlogL, lastmlogL, lastscale, scalesum; + const int cnum = cdsA->cnum; + const int vlen = cdsA->vlen; + Cds **cds = cdsA->cds; /* The EM algorithm */ + // Calculate, in this order: + // translations + // rotations + // scale + // mean + // covariance + // hierarchical params (phi) + /* The outer loop: - (1) First calculates the translations - (2) Does inner loop -- calc rotations and average till convergence - (3) Holding the superposition constant, calculates the covariance - matrices and corresponding weight matrices, looping till - convergence when using a dimensional/axial covariance matrix */ + (1) First calculate the translations + (2) Inner loop -- calc rotations, scales, and mean, until convergence + (3) Holding the superposition constant, calculate the covariance + matrix, corresponding weight matrix, and hierarchical params */ round = 0; - percent = lastpercent = 0.0; - logL = lastlogL = lastscale = -DBL_MAX; + maybe = 0; + mlogL = lastmlogL = lastscale = -DBL_MAX; + scalesum = 1.0; while(1) { -/* if (round % 62 == 0) */ -/* printf("\n "); */ -/* else */ -/* putchar('.'); */ -/* fflush(NULL); */ - - if (algo->nullrun == 1) + if (algo->nullrun) break; - lastlogL = logL; ++round; - baseA->algo->rounds = algo->rounds = round; + algo->rounds = round; - if (algo->verbose == 1) + if (algo->verbose) { - printf("\n\n\nNew Outer Round:%3d ////////////////////////////////////////////////////////////", - round); + printf("\n\n\nNew Outer Round:%3d ///////////////////////////////////////////", round); fflush(NULL); } - /* Calculate the minimum variance empirically -- this is really just inherent floating point error */ - if (round == 2 && algo->constant < 0.0) + /* Find weighted center of all cds, for translation vectors */ + if (algo->dotrans) { - SuperPoseArray2Orig(scratchA, baseA, &sumdev); - algo->constant = sumdev * sumdev; -/* printf("\n Minimum variance: %8.3e (sigma:%8.3e)", sumdev*sumdev, sumdev); */ -/* fflush(NULL); */ - } - - /* Find weighted center and translate all cds */ - CalcTranslationsIp(scratchA, algo); - //CalcTranslationsOp(scratchA, baseA, algo); - - for (i = 0; i < cnum; ++i) - ApplyCenterIp(cds[i]); - //ApplyCenterOp(cds[i], (const Cds *) baseA->cds[i]); + CalcTranslationsOp(cdsA, algo); // VecPrint(cds[0]->center, 3); - /* save the translation vector for each coord in the array */ - for (i = 0; i < cnum; ++i) - memcpy(cds[i]->translation, cds[i]->center, 3 * sizeof(double)); + // save the translation vector for each coord in the array + // wait to translate coords right before rotating coords + for (i = 0; i < cnum; ++i) + memcpy(cds[i]->translation, cds[i]->center, 3 * sizeof(double)); + } - /* when superimposing to an alignemnt, initially iterate unweighted LS for a few rounds */ - //if (algo->alignment == 1 && round < 10) /* DLT debug -- I changed this just to find the LS answer first */ - // memsetd(scratchA->w, 1.0, vlen); + if (algo->dorot) + { + /* save the old rotation matrices to test convergence at bottom of outer loop */ + for (i = 0; i < cnum; ++i) + MatCpySym(cds[i]->last_outer_matrix, (const double **) cds[i]->matrix, 3); + } /* Inner loop: (1) Calc rotations given weights/weight matrices @@ -2003,521 +891,168 @@ innerround = 0; do { -/* putchar('*'); */ -/* fflush(NULL); */ ++innerround; algo->innerrounds += innerround; -/* char *tempstr = malloc(512 * sizeof(char)); */ -/* sprintf(tempstr, "_mp_%d.pdb", algo->innerrounds); */ -/* WriteInstModelFile(tempstr, scratchA); */ -/* free(tempstr); */ - - if (algo->verbose == 1) + if (algo->verbose) { - printf("\n New Inner Round:%d \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\n", innerround); + printf("\n New Inner Round:%d \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\n", innerround); fflush(NULL); } - /* save the old rotation matrices to test convergence at bottom of loop */ - for (i = 0; i < cnum; ++i) - MatCpySym(cds[i]->last_matrix, (const double **) cds[i]->matrix, 3); - -//////////////////////////////////////////////////////////////////////////////////////////////////// -/* double *mytrans = malloc(3 * sizeof(double)); */ -/* */ -/* mytrans[0] = 20.0; mytrans[1] = 30.0; mytrans[2] = 40.0; */ -/* */ -/* TransCdsIp(cds[0], mytrans); */ -/* CalcRotations(scratchA); */ -/* */ -/* printf("\nBefore[%d][%d]:", innerround, round); */ -/* Mat3Print(cds[0]->matrix); */ -/* */ -/* mytrans[0] = -20.0; mytrans[1] = -30.0; mytrans[2] = -40.0; */ -/* TransCdsIp(cds[0], mytrans); */ -/* */ -/* free(mytrans); */ - - /* find the optimal rotation matrices */ - if (algo->alignment == 1 /* && (round == 1 || cnum == 2) */) - deviation_sum = CalcRotationsOcc(scratchA); - else - deviation_sum = CalcRotations(scratchA); - -/* printf("\nAfter:"); */ -/* Mat3Print(cds[0]->matrix); */ -//////////////////////////////////////////////////////////////////////////////////////////////////// - - if (algo->verbose == 1 && innerround == 1) + // translate/center static cds with new translation vector from static coords, + // put in working coords + // this is inefficient // DLT OP // DLT FIX --- maybe have a new vec w/this calc only once? + if (algo->dotrans) { - frobnorm = 0.0; for (i = 0; i < cnum; ++i) - /* frobnorm += MatFrobNorm((const double **) cds[i]->last_matrix, (const double **) cds[i]->matrix, 3, 3); */ - frobnorm += FrobDiffNormIdentMat((const double **) cds[i]->matrix, 3); - frobnorm /= cnum; - - printf("-----<<<<< %3d Frobenius Norm (Outer): % 8.3e //////////////////////////////\n", - round, frobnorm); - fflush(NULL); - } - - if (innerround == 1 && - CheckConvergenceOuter(scratchA, round, algo->precision) == 1) - goto outsidetheloops; - - if (stats->precision > 0.0) - percent = 100.0 * log(fabs(stats->precision))/log(algo->precision); - else - percent = 0.0; - -// if (percent > lastpercent) -// { -// lastpercent = percent; -// printf(" %5.1f%%\n", percent); -// /* printf("\n%e\n", stats->precision); */ -// printf("\033[<1>A"); /* moves the cursor up one line */ -// fflush(NULL); -// } - - /* rotate the scratch cds with new rotation matrix */ - for (i = 0; i < cnum; ++i) - { - RotateCdsIp(cds[i], (const double **) cds[i]->matrix); - //printf("\n\nCds %d\n", i); - //PrintCds(cds[i]); - } - - if (algo->scale > 0) - { - lastscale = cds[0]->scale; - - double scaleprod; - - if (algo->scale == 1) - scaleprod = CalcScaleFactorsML(scratchA); - else if (algo->scale == 2) - scaleprod = CalcScaleFactors(scratchA); - else if (algo->scale == 3) - scaleprod = CalcScaleFactorsMLLogNorm(scratchA); - else - scaleprod = 1.0; - - printf("\n%5d scaleprod = %12.6f\n", round, scaleprod); - } - - /* find global rmsd and average cds (both held in structure) */ - if (algo->noave == 0) - { - if (algo->alignment == 1) - { - AveCdsOcc(scratchA); - EM_MissingCds(scratchA); - //printf("\n\nAveCds\n"); - //PrintCds(scratchA->avecds); - } - else { - AveCds(scratchA); - -/* Calculate the ML estimate of a hierarchical mean, where the variance-weighted atoms - are normally distributed with mean zero */ -/* See pdbUtils.c */ -/* double oldpsi = 0.0, psi = 0.0; */ -/* int q; */ -/* */ -/* q = 0; */ -/* do */ -/* { */ -/* oldpsi = psi; */ -/* psi = HierAveCds(scratchA); */ -/* //printf("\n psi[%d] = %e", q, psi); */ -/* q++; */ -/* } */ -/* while(fabs(psi - oldpsi) > psi * algo->precision); */ + TranslateCdsOp2(cds[i]->wc, + (const double **) cds[i]->sc, + vlen, + (const double *) cds[i]->center); } - //PrintCds(scratchA->avecds); } - if (algo->mbias == 1) - UnbiasMean(scratchA); - - stats->wRMSD_from_mean = sqrt(deviation_sum / (3 * vlen * cnum)); - - if (algo->verbose == 1) + if (algo->dorot) { - frobnorm = 0.0; - for (i = 0; i < cnum; ++i) - frobnorm += FrobDiffNormIdentMat((const double **) cds[i]->matrix, 3); - frobnorm /= cnum; - printf(" ----->>>>> %3d Frobenius Norm (Inner %d): % e\n", round, innerround, frobnorm); - printf(" End Inner Round:%d \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\n", innerround); - fflush(NULL); - } + // save the old rotation matrices to test convergence at bottom of inner loop + for (i = 0; i < cnum; ++i) + MatCpySym(cds[i]->last_matrix, (const double **) cds[i]->matrix, 3); + +// if (algo->alignment) +// CalcRotationsNu2(cdsA); +// else + CalcRotations2(cdsA); // DLT OP - if (algo->noinnerloop == 1) - break; - else if (innerround > 160) - { - putchar(','); - fflush(NULL); - break; + // rotate working cds with new rotation matrix from static coords, put in wrking coords + for (i = 0; i < cnum; ++i) + { + RotateCdsIp2(cds[i]->wc, vlen, (const double **) cds[i]->matrix); + } } - } - //while(CheckConvergenceInner(scratchA, algo->precision) == 0 && - // fabs(cds[0]->scale - lastscale) > cds[0]->scale * algo->precision); - while(CheckConvergenceInner(scratchA, algo->precision) == 0); - - if (round < 10) - { - if (algo->alignment == 1) - VarianceCdsOcc(scratchA); - else - VarianceCds(scratchA); - - stats->lsvar = stats->stddev * stats->stddev; - } -/* printf("\nvar = %10.5e", VarianceCdsOcc(scratchA)); */ -/* printf("\nrmsd = %10.5e\n", CalcPRMSD(scratchA)); */ - if (algo->instfile == 1) - WriteInstModelFile("_inst.pdb", scratchA); - - /* Weighting by dimensional, axial Xi covariance matrix, here diagonal. */ - /* Holding the superposition constant, calculates the covariance - matrices and corresponding weight matrices, looping till - convergence. */ - CalcCovariances(scratchA); - - //if (algo->scale > 0) - // ConstrainCovMat(scratchA); - - if (scratchA->algo->lele5 == 1) - { - /* Correct Lele's 5-landmark testset covariance matrix (only two off-diags are non-zero) */ - scratchA->CovMat[0][1] = 0.0; - scratchA->CovMat[0][2] = 0.0; - scratchA->CovMat[0][3] = 0.0; - scratchA->CovMat[0][4] = 0.0; - - scratchA->CovMat[1][0] = 0.0; - scratchA->CovMat[1][2] = 0.0; - scratchA->CovMat[1][4] = 0.0; - - scratchA->CovMat[2][0] = 0.0; - scratchA->CovMat[2][1] = 0.0; - scratchA->CovMat[2][3] = 0.0; - scratchA->CovMat[2][4] = 0.0; - - scratchA->CovMat[3][0] = 0.0; - scratchA->CovMat[3][2] = 0.0; - scratchA->CovMat[3][4] = 0.0; - - scratchA->CovMat[4][0] = 0.0; - scratchA->CovMat[4][1] = 0.0; - scratchA->CovMat[4][2] = 0.0; - scratchA->CovMat[4][3] = 0.0; - } - - if (CheckZeroVariances(scratchA) == 1) - { - algo->varweight = 0; - algo->covweight = 0; - algo->leastsquares = 1; - } - - /* calculate the weights/weight matrices */ - /* and first the hierarchical adjustment */ - CalcWts(scratchA); - - if (algo->printlogL == 1) - { - if (algo->leastsquares == 1) - CalcNormResidualsLS(scratchA); - else - CalcNormResiduals(scratchA); - logL = CalcLogL(scratchA); - printf("----> %4d logL: % e % e <----\n", round, logL, logL - lastlogL); - } - - if (algo->verbose == 1) - { - printf("END Outer Round:%3d ////////////////////////////////////////////////////////////\n\n", - round); - fflush(NULL); - } - } - - outsidetheloops: - -/* for (i = 0; i < cnum; ++i) */ -/* printf("\ntrans [%3d]: %f %f %f", */ -/* i+1, */ -/* cds[i]->transsum[0], */ -/* cds[i]->transsum[1], */ -/* cds[i]->transsum[2]); */ - - if (algo->seed == 1) - round -= 10; - - if (algo->bayes > 0) - { - #include "GibbsMet.h" - printf(" Calculating Gibbs-Metropolis Bayesian superposition ... \n"); - fflush(NULL); - GibbsMet(scratchA); - } - -#if 0 //defined(__APPLE__) - endtime = seconds(); - innerloop = (double) (endtime - starttime) / 0.001; - starttime = seconds(); -#endif - - printf(" Calculating statistics ... \n"); - fflush(NULL); - - if (algo->instfile == 1) - WriteInstModelFile("_inst_final.pdb", scratchA); - -/* - fp = fopen("distcor.txt", "w"); - if (scratchA->CovMat == NULL) - scratchA->CovMat = MatAlloc(vlen, vlen); - - CalcCovMat(scratchA); - DistMatsAlloc(cdsA); - - CalcMLDistMat(scratchA); - - for (i = 0; i < vlen; ++i) - for (j = 0; j < i; ++j) - fprintf(fp, "%6d % 10.3f % 8.3e\n", - i-j, - scratchA->Dij_matrix[i][j], - scratchA->CovMat[i][j] / sqrt(scratchA->CovMat[i][i] * scratchA->CovMat[j][j])); - - fclose(fp); -*/ - -/* if (algo->weight == 200) */ -/* unremlvar(scratchA); */ - -/* #include "internmat.h" */ -/* AveCds(scratchA); */ -/* CalcCovMat(scratchA); */ -/* PrintCovMatGnuPlot((const double **) scratchA->CovMat, vlen, "cov.mat"); */ -/* for (i = 0; i < vlen; ++i) */ -/* for (j = 0; j < vlen; ++j) */ -/* scratchA->CovMat[i][j] -= internmat[i][j]; */ -/* PrintCovMatGnuPlot((const double **) scratchA->CovMat, vlen, "covdiff.mat"); */ - -/* CovMat2CorMat(scratchA->CovMat, vlen); */ -/* PrintCovMatGnuPlot((const double **) scratchA->CovMat, vlen, "corr.mat"); */ -/* memcpy(&scratchA->CovMat[0][0], &internmat[0][0], vlen * vlen * sizeof(double)); */ -/* PrintCovMatGnuPlot((const double **) scratchA->CovMat, vlen, "cov_true.mat"); */ -/* CovMat2CorMat(scratchA->CovMat, vlen); */ -/* PrintCovMatGnuPlot((const double **) scratchA->CovMat, vlen, "corr_true.mat"); */ - -/* CovMatsDestroy(scratchA); */ /* DLT debug */ -/* CovMatsDestroy(baseA); */ - - if (algo->covweight == 1 && (algo->write_file > 0 || algo->info != 0)) - { - double *evals = malloc(vlen * sizeof(double)); - double **evecs = scratchA->tmpmatKK2; - char *mp_cov_name = NULL; - - eigenvalsym((const double **) scratchA->CovMat, evals, evecs, vlen); - /* VecPrint(evals, vlen); */ - mp_cov_name = mystrcat(algo->rootname, "_mp_cov.mat"); - PrintCovMatGnuPlot((const double **) scratchA->CovMat, vlen, mp_cov_name); - free(mp_cov_name); -/* CovMat2CorMat(scratchA->CovMat, vlen); */ -/* PrintCovMatGnuPlot((const double **) scratchA->CovMat, vlen, mystrcat(algo->rootname, "_cor.mat")); */ - CalcPRMSD(scratchA); - WriteInstModelFile("_mp.pdb", scratchA); - free(evals); - } - - { /* Write out a taxa distance matrix in NEXUS format */ - #include "DistMat.h" - - DISTMAT *distmat = NULL; - double sum; - int j,k; - int cnt; - char num[32]; - char *ptr = NULL; - char *tree_name = NULL; - - distmat = DISTMATalloc(cnum); - - for (i = 0; i < cnum; ++i) - { - strcpy(distmat->taxa[i], cds[i]->filename); - ptr = strrchr(distmat->taxa[i], '.'); - if (ptr != NULL) - *ptr = '\0'; - sprintf(num, "_%d", i); - strcat(distmat->taxa[i], num); - } - -// for (i = 0; i < cnum; ++i) -// { -// for (j = 0; j <= i; ++j) -// { -// sum = 0.0; -// for (k = 0; k < vlen; ++k) -// sum += SqrCdsDistMahal2((const Cds *) cds[i], k, -// (const Cds *) cds[j], k, -// (const double) scratchA->w[k]); -// -// distmat->dist[i][j] = sqrt(sum); -// } -// } - - for (i = 0; i < cnum; ++i) - { - for (j = 0; j < i; ++j) + if (algo->verbose && innerround == 1) { - sum = 0.0; - cnt = 0; - for (k = 0; k < vlen; ++k) - { - if (cds[i]->o[k] > 0 && cds[j]->o[k] > 0) - { - cnt += scratchA->w[k]; - sum += SqrCdsDistMahal2((const Cds *) cds[i], k, - (const Cds *) cds[j], k, - (const double) scratchA->w[k]); - } - } + frobnorm = 0.0; + for (i = 0; i < cnum; ++i) + frobnorm += FrobDiffNormIdentMat((const double **) cds[i]->matrix, 3); // DLT FIX: now obsolete for OP + frobnorm /= cnum; - distmat->dist[i][j] = sqrt(sum/(3.0*cnt)); + printf("-----<<<<< %3d Frobenius Norm (Outer): % 8.3e ///////\n", round, frobnorm); + fflush(NULL); } - } - tree_name = mystrcat(algo->rootname, "_ML_tree.nxs"); - print_NX_distmat(distmat, tree_name); + if (algo->scale > 0) + { + lastscale = scalesum; -// //double total = 0.0; -// for (i = 0; i < cnum; ++i) -// { -// for (j = 0; j < i; ++j) -// { -// sum = 0.0; -// for (k = 0; k < vlen; ++k) -// sum += SqrCdsDist((const Cds *) cds[i], k, -// (const Cds *) cds[j], k); -// -// distmat->dist[i][j] = sqrt(sum/vlen); -// //total += sum/vlen; -// } -// } + if (algo->scale) + scalesum = CalcScaleFactorsML(cdsA); + else if (algo->scale == 2) + scalesum = CalcScaleFactors(cdsA); + } - for (i = 0; i < cnum; ++i) - { - for (j = 0; j < i; ++j) + /* find global rmsd and average cds (both held in structure) */ + if (algo->doave) { - sum = 0.0; - cnt = 0; - for (k = 0; k < vlen; ++k) + if (algo->alignment) { - if (cds[i]->o[k] > 0 && cds[j]->o[k] > 0) - { - ++cnt; - sum += SqrCdsDist((const Cds *) cds[i], k, - (const Cds *) cds[j], k); - } + AveCdsNu(cdsA); + // EM_MissingCds(cdsA); // DLT OP + } + else + { + AveCds(cdsA); //PrintCds(cdsA->avecds); } - distmat->dist[i][j] = sqrt(sum/cnt); + if (algo->mbias) + UnbiasMean(cdsA); } - } - //printf("\nrmsd? %g\n", sqrt(total/((cnum*cnum- cnum)/2))); // verified same as paRMSD + if ((innerround == 1) && // DLT + CheckConvergenceOuter(cdsA, round, algo->precision)) // DLT + maybe = 1; // DLT - tree_name = mystrcat(algo->rootname, "_LS_tree.nxs"); - print_NX_distmat(distmat, tree_name); - - if (tree_name != NULL) - free(tree_name); - - DISTMATdestroy(&distmat); - } + if (algo->verbose) + { + frobnorm = 0.0; + for (i = 0; i < cnum; ++i) + frobnorm += FrobDiffNormIdentMat((const double **) cds[i]->matrix, 3); + frobnorm /= cnum; + printf(" -->> %3d Frobenius Norm (Inner %d): % e\n", round, innerround, frobnorm); + printf(" End Inner Round:%d \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\n", innerround); + fflush(NULL); + } - CalcStats(scratchA); - stats->fperr = SuperPoseArray2Orig(scratchA, baseA, &stats->minvar); + if (algo->noinnerloop) + break; - if (algo->ssm == 1) - { - printf(" Calculating SSM ... \n"); - fflush(NULL); + if (algo->abort) + break; - #include "pdbSSM.h" - SSM *ssm = SSMInit(); - SSMAlloc(ssm, scratchA); - //for (i=0; i < 2; ++i) - SSMCalc(ssm, scratchA); + if (innerround > 200) + { + putchar('.'); + fflush(NULL); + break; + } + } + while((CheckConvergenceInner(cdsA, algo->precision) == 0) && + (fabs(scalesum - lastscale) > algo->precision/cnum)); - printf(" Writing SSM ... \n"); - fflush(NULL); + /* Holding the superposition constant, calculates the covariance matrices */ + if (algo->docovars) + CalcCovariances(cdsA); - WriteSSM(ssm); - SSMDestroy(&ssm); - } + if (algo->dohierarch) + { + if (algo->varweight || algo->covweight) + HierarchVars(cdsA); + } - if (baseA->anchorf_name != NULL) /* orient entire family to a user-specified structure */ - SuperPose2Anchor(scratchA, baseA, baseA->anchorf_name); - else if (algo->princaxes == 1) /* orient the family perpendicular to principal axes of the average cds -- */ - RotPrincAxes(scratchA); /* makes for nice viewing */ + if (CheckZeroVariances(cdsA)) + { + algo->varweight = 0; + algo->covweight = 0; + algo->leastsquares = 1; + printf("\n ----- WARNING: LEAST SQUARES INVOKED [%d] ----- \n", round); + fflush(NULL); + } - if (algo->write_file == 1) - { - char *transf_name = mystrcat(algo->rootname, "_transf2.txt"); - WriteTransformations(scratchA, transf_name); - free(transf_name); - } + /* calculate the weights or weight matrices */ + CalcWts(cdsA); - if (algo->olve == 1 && algo->write_file == 1) - { - PDBCdsArray *olveA; - printf(" Writing Olve's file ... \n"); - fflush(NULL); + if (algo->instfile) + WriteInstModelFile("_inst.pdb", cdsA); - olveA = PDBCdsArrayInit(); - PDBCdsArrayAlloc(olveA, cnum, vlen); + if (algo->verbose) + { + printf("END Outer Round:%3d /////////////////////////////////////////////\n\n", round); + fflush(NULL); + } - for (i = 0; i < cnum; ++i) - CopyCds2PDB(olveA->cds[i], cds[i]); + lastmlogL = mlogL; + mlogL = CalcMgLogL(cdsA); - char *olve_name = mystrcat(algo->rootname, "_olve.pdb"); - WriteOlveModelFile(olveA, algo, stats, olve_name); - free(olve_name); - PDBCdsArrayDestroy(&olveA); - } + if (algo->printlogL) + { + printf("----> %4d mlogL: % 22.3f % e % e <----\n", + round, mlogL, mlogL - lastmlogL, stats->hierarch_p1); + } - CopyStats(baseA, scratchA); + if (round >= algo->iterations) + break; - /* wRMSD_from_mean does not need 2 in denominator, since it is already from the average */ - stats->wRMSD_from_mean = sqrt(deviation_sum / (double) (vlen * cnum)); + if (algo->abort) + break; -#if 0 //defined(__APPLE__) - endtime = seconds(); - exitloop = (double) (endtime - starttime) / 0.001; - if (algo->verbose == 1) - { - printf(" init setup inner loop exit loop \n"); - printf(" %7.2f %7.2f %7.2f %7.2f (ms) \n", init, setup, innerloop, exitloop); - fflush(NULL); + if (maybe && (fabs(lastmlogL - mlogL) < algo->precision)) + break; } -#endif - - CdsArrayDestroy(&scratchA); - gsl_rng_free(r2); - r2 = NULL; + if (algo->instfile) + WriteInstModelFile("_inst_final.pdb", cdsA); return(round); } @@ -2526,33 +1061,16 @@ int MultiPose_pth(CdsArray *baseA) { - /* FILE *fp; */ int i, round, innerround; int slxn; /* index of random coord to select as first */ - double frobnorm, sumdev, percent, lastpercent; - double deviation_sum = 0.0; const int cnum = baseA->cnum; const int vlen = baseA->vlen; double *evals = malloc(3 * sizeof(double)); Algorithm *algo = NULL; Statistics *stats = NULL; - Cds **cds = NULL; - Cds *avecds = NULL; - CdsArray *scratchA = NULL; - - const int thrdnum = baseA->algo->threads; - RotData **rotdata = malloc(thrdnum * sizeof(RotData *)); - AveData **avedata = malloc(thrdnum * sizeof(AveData *)); - pthread_t *callThd = malloc(thrdnum * sizeof(pthread_t)); - pthread_attr_t attr; - - -#if 0 //defined(__APPLE__) - double starttime, endtime; - double init, setup = 0.0, innerloop, exitloop; - - starttime = seconds(); -#endif + Cds **cds = NULL; + Cds *avecds = NULL; + CdsArray *scratchA = NULL; gsl_rng *r2 = NULL; const gsl_rng_type *T = NULL; @@ -2560,6 +1078,13 @@ T = gsl_rng_ranlxs2; r2 = gsl_rng_alloc(T); + // THREAD STUFF ///////////////////////////////////////////////////// + const int thrdnum = algo->threads; + RotData **rotdata = malloc(thrdnum * sizeof(RotData *)); + AveData **avedata = malloc(thrdnum * sizeof(AveData *)); + pthread_t *callThd = malloc(thrdnum * sizeof(pthread_t)); + pthread_attr_t attr; + for (i = 0; i < thrdnum; ++i) { rotdata[i] = malloc(sizeof(RotData)); @@ -2571,9 +1096,7 @@ /* printf("\nDefault stack size = %d", (int) stacksize); */ pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); - -/* if (baseA->algo->weight == 200) */ -/* remlvar(baseA); */ + // THREAD STUFF ///////////////////////////////////////////////////// /* setup scratchA */ scratchA = CdsArrayInit(); @@ -2586,73 +1109,21 @@ CdsArrayCopy(scratchA, baseA); /* setup local aliases based on scratchA */ - algo = scratchA->algo; - stats = scratchA->stats; cds = scratchA->cds; avecds = scratchA->avecds; - SetupCovWeighting(scratchA); /* DLT debug */ - SetupCovWeighting(baseA); /* DLT debug */ - stats->hierarch_p1 = 1.0; stats->hierarch_p2 = 1.0; -#if 0 //defined(__APPLE__) - endtime = seconds(); - init = (double) (endtime - starttime) / 0.001; - starttime = seconds(); -#endif - - if (algo->embedave != 0 /*|| algo->alignment == 1*/) // DLTIP - { - printf(" Calculating distance matrix for embedding average ... \n"); - fflush(NULL); - - CdsCopyAll(avecds, cds[0]); - DistMatsAlloc(scratchA); - - if (algo->alignment == 1) - CalcMLDistMatOcc(scratchA); - else - CalcMLDistMat(scratchA); - - printf(" Embedding average structure (ML) ... \n"); - fflush(NULL); - - EmbedAveCds(scratchA); - - for (i = 0; i < vlen; ++i) - avecds->resSeq[i] = i+1; - - printf(" Finished embedding \n"); - fflush(NULL); - - if (algo->write_file == 1) - { - char *embed_ave_name = mystrcat(algo->rootname, "_embed_ave.pdb"); - WriteAveCdsFile(scratchA, embed_ave_name); - free(embed_ave_name); - } - } - else - { - //slxn = (int) (genrand_real2() * cnum); - slxn = gsl_rng_uniform_int(r2, cnum); - CdsCopyAll(avecds, baseA->cds[slxn]); - } + slxn = gsl_rng_uniform_int(r2, cnum); + CdsCopyAll(avecds, baseA->cds[slxn]); - if (algo->notrans == 0) + if (algo->dotrans) { CenMass(avecds); ApplyCenterIp(avecds); } - if (algo->seed == 1) - { - CalcStats(scratchA); - round = 10; - } - /* The outer loop: (1) First calculates the translations (2) Does inner loop -- calc rotations and average till convergence @@ -2660,51 +1131,27 @@ matrices and corresponding weight matrices, looping till convergence when using a dimensional/axial covariance matrix */ - percent = lastpercent = 0.0; round = 0; while(1) { -/* if (round % 62 == 0) */ -/* printf(" \n"); */ -/* else */ -/* putchar('.'); */ -/* fflush(NULL); */ - - if (algo->nullrun == 1) + if (algo->nullrun) break; ++round; - baseA->algo->rounds = algo->rounds = round; + algo->rounds = round; - if (algo->verbose == 1) + /* Find weighted center and translate all cds */ + if (algo->dotrans) { - printf("\n\n\nNew Outer Round:%3d ////////////////////////////////////////////////////////////", - round); - fflush(NULL); - } + CalcTranslationsOp(baseA, algo); // DLT OP + for (i = 0; i < cnum; ++i) + ApplyCenterIp(cds[i]); - /* Calculate the minimum variance empirically */ - if (round == 2 && algo->constant < 0.0) - { - SuperPoseArray2Orig(scratchA, baseA, &sumdev); - algo->constant = sumdev * sumdev; -/* printf("\n Minimum variance: %8.3e (sigma:%8.3e)", sumdev*sumdev, sumdev); */ -/* fflush(NULL); */ + /* save the translation vector for each coord in the array */ + for (i = 0; i < cnum; ++i) + memcpy(cds[i]->translation, cds[i]->center, 3 * sizeof(double)); } - /* Find weighted center and translate all cds */ - CalcTranslationsIp(scratchA, algo); - for (i = 0; i < cnum; ++i) - ApplyCenterIp(cds[i]); - - /* save the translation vector for each coord in the array */ - for (i = 0; i < cnum; ++i) - memcpy(cds[i]->translation, cds[i]->center, 3 * sizeof(double)); - - /* when superimposing to an alignemnt, initially iterate into unwted LS for a few rounds */ -// if (algo->alignment == 1 && round < 5) -// memsetd(scratchA->w, 1.0, vlen); - /* Inner loop: (1) Calc rotations given weights/weight matrices (2) Rotate cds with new rotations @@ -2715,91 +1162,51 @@ innerround = 0; do { -/* putchar('*'); */ -/* fflush(NULL); */ ++innerround; algo->innerrounds += innerround; - if (algo->verbose == 1) - { - printf("\n New Inner Round:%d \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\n", innerround); - fflush(NULL); - } - /* save the old rotation matrices to test convergence at bottom of loop */ for (i = 0; i < cnum; ++i) MatCpySym(cds[i]->last_matrix, (const double **) cds[i]->matrix, 3); /* find the optimal rotation matrices */ - if (algo->alignment == 1 /* && (round == 1 || cnum == 2) */) - deviation_sum = CalcRotationsOcc(scratchA); - else - deviation_sum = CalcRotations_pth(scratchA, rotdata, callThd, &attr, thrdnum); - - if (algo->verbose == 1 && innerround == 1) + if (algo->dorot) { - frobnorm = 0.0; - for (i = 0; i < cnum; ++i) - /* frobnorm += MatFrobNorm((const double **) cds[i]->last_matrix, (const double **) cds[i]->matrix, 3, 3); */ - frobnorm += FrobDiffNormIdentMat((const double **) cds[i]->matrix, 3); - frobnorm /= cnum; - - printf("-----<<<<< %3d Frobenius Norm (Outer): % 8.3e //////////////////////////////\n", - round, frobnorm); - fflush(NULL); + if (algo->alignment) + { + CalcRotationsNu(scratchA); + } + else + { + // THREAD STUFF ///////////////////////////////////////////////////// + CalcRotations_pth(scratchA, rotdata, callThd, &attr, thrdnum); + // THREAD STUFF ///////////////////////////////////////////////////// + } } if (innerround == 1 && - CheckConvergenceOuter(scratchA, round, algo->precision) == 1) + CheckConvergenceOuter(scratchA, round, algo->precision)) goto outsidetheloops; - if (stats->precision > 0.0) - percent = 100.0 * log(fabs(stats->precision))/log(algo->precision); - else - percent = 0.0; - - if (percent > lastpercent) - { - lastpercent = percent; - printf(" %5.1f%%\n", percent); - /* printf("\n%e\n", stats->precision); */ - printf("\033[<1>A"); /* moves the cursor up one line */ - fflush(NULL); - } - /* find global rmsd and average cds (both held in structure) */ - if (algo->noave == 0) + if (algo->doave) { - if (algo->alignment == 1) + if (algo->alignment) { - AveCdsOcc(scratchA); + AveCdsNu(scratchA); EM_MissingCds(scratchA); - /* PrintCds(scratchA->avecds); */ } else { + // THREAD STUFF ///////////////////////////////////////////////////// AveCds_pth(scratchA, avedata, callThd, &attr, thrdnum); - /* AveCds(scratchA); */ + // THREAD STUFF ///////////////////////////////////////////////////// } } - if (algo->mbias == 1) - UnbiasMean(scratchA); - - stats->wRMSD_from_mean = sqrt(deviation_sum / (3 * vlen * cnum)); - - if (algo->verbose == 1) - { - frobnorm = 0.0; - for (i = 0; i < cnum; ++i) - frobnorm += FrobDiffNormIdentMat((const double **) cds[i]->matrix, 3); - frobnorm /= cnum; - printf(" ----->>>>> %3d Frobenius Norm (Inner %d): % e\n", round, innerround, frobnorm); - printf(" End Inner Round:%d \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\n", innerround); - fflush(NULL); - } + //stats->wRMSD_from_mean = sqrt(deviation_sum / (3 * vlen * cnum)); - if (algo->noinnerloop == 1) + if (algo->noinnerloop) break; else if (innerround > 160) { @@ -2810,129 +1217,22 @@ } while(CheckConvergenceInner(scratchA, algo->precision) == 0); - /* Weighting by dimensional, axial Xi covariance matrix, here diagonal. */ - /* Holding the superposition constant, calculates the covariance - matrices and corresponding weight matrices, looping till - convergence. */ - CalcCovariances(scratchA); + if (algo->docovars) + { + CalcCovariances(scratchA); + if (algo->varweight || algo->covweight) + HierarchVars(scratchA); + } - /* calculate the weights/weight matrices */ CalcWts(scratchA); - /* printf("\n----> logL: %e <----", CalcLogL(scratchA)); */ } outsidetheloops: - if (algo->seed == 1) - round -= 10; - -#if 0 //defined(__APPLE__) - endtime = seconds(); - innerloop = (double) (endtime - starttime) / 0.001; - starttime = seconds(); -#endif - - printf(" Calculating statistics ... \n"); - fflush(NULL); - -/* - fp = fopen("distcor.txt", "w"); - if (scratchA->CovMat == NULL) - scratchA->CovMat = MatAlloc(vlen, vlen); - - CalcCovMat(scratchA); - DistMatsAlloc(cdsA); - - CalcMLDistMat(scratchA); - - for (i = 0; i < vlen; ++i) - for (j = 0; j < i; ++j) - fprintf(fp, "%6d % 10.3f % 8.3e\n", - i-j, - scratchA->Dij_matrix[i][j], - scratchA->CovMat[i][j] / sqrt(scratchA->CovMat[i][i] * scratchA->CovMat[j][j])); - - fclose(fp); -*/ - -/* if (algo->weight == 200) */ -/* unremlvar(scratchA); */ - -/* #include "internmat.h" */ -/* AveCds(scratchA); */ -/* CalcCovMat(scratchA); */ -/* PrintCovMatGnuPlot((const double **) scratchA->CovMat, vlen, "cov.mat"); */ -/* for (i = 0; i < vlen; ++i) */ -/* for (j = 0; j < vlen; ++j) */ -/* scratchA->CovMat[i][j] -= internmat[i][j]; */ -/* PrintCovMatGnuPlot((const double **) scratchA->CovMat, vlen, "covdiff.mat"); */ - -/* CovMat2CorMat(scratchA->CovMat, vlen); */ -/* PrintCovMatGnuPlot((const double **) scratchA->CovMat, vlen, "corr.mat"); */ -/* memcpy(&scratchA->CovMat[0][0], &internmat[0][0], vlen * vlen * sizeof(double)); */ -/* PrintCovMatGnuPlot((const double **) scratchA->CovMat, vlen, "cov_true.mat"); */ -/* CovMat2CorMat(scratchA->CovMat, vlen); */ -/* PrintCovMatGnuPlot((const double **) scratchA->CovMat, vlen, "corr_true.mat"); */ - -/* CovMatsDestroy(scratchA); */ /* DLT debug */ -/* CovMatsDestroy(baseA); */ - - CalcStats(scratchA); - stats->fperr = SuperPoseArray2Orig(scratchA, baseA, &stats->minvar); - - /* orient the family perpendicular to principal axes of the average cds -- - makes for nice viewing */ - if (baseA->anchorf_name != NULL) - SuperPose2Anchor(scratchA, baseA, baseA->anchorf_name); - else if (algo->princaxes == 1) - RotPrincAxes(scratchA); - - if (algo->write_file == 1) - { - char *transf_name = mystrcat(algo->rootname, "_transf.txt"); - WriteTransformations(scratchA, transf_name); - free(transf_name); - } - - if (algo->olve == 1 && algo->write_file == 1) - { - PDBCdsArray *olveA; - printf(" Writing Olve's file ... \n"); - fflush(NULL); - - olveA = PDBCdsArrayInit(); - PDBCdsArrayAlloc(olveA, cnum, vlen); - - for (i = 0; i < cnum; ++i) - CopyCds2PDB(olveA->cds[i], cds[i]); - - /* WriteTheseusModelFile(olveA, algo, stats, "theseus_olve.pdb"); */ - char *olve_name = mystrcat(algo->rootname, "_olve.pdb"); - WriteOlveModelFile(olveA, algo, stats, olve_name); - free(olve_name); - PDBCdsArrayDestroy(&olveA); - } - - CopyStats(baseA, scratchA); - - /* wRMSD_from_mean does not need 2 in denominator, since it is already from the average */ - stats->wRMSD_from_mean = sqrt(deviation_sum / (double) (vlen * cnum)); - - if (algo->write_file == 1) - { - char *cov_name = mystrcat(algo->rootname, "_cov.mat"); - char *cor_name = mystrcat(algo->rootname, "_cor.mat"); - CalcCovMat(scratchA); - PrintCovMatGnuPlot((const double **) scratchA->CovMat, vlen, cov_name); - CovMat2CorMat(scratchA->CovMat, vlen); - PrintCovMatGnuPlot((const double **) scratchA->CovMat, vlen, cor_name); - free(cov_name); - free(cor_name); - } - CdsArrayDestroy(&scratchA); free(evals); + // THREAD STUFF ///////////////////////////////////////////////////// pthread_attr_destroy(&attr); for (i = 0; i < thrdnum; ++i) free(rotdata[i]); @@ -2941,17 +1241,7 @@ free(rotdata); free(avedata); free(callThd); - -#if 0 //defined(__APPLE__) - endtime = seconds(); - exitloop = (double) (endtime - starttime) / 0.001; - if (algo->verbose == 1) - { - printf(" init setup inner loop exit loop \n"); - printf(" %7.2f %7.2f %7.2f %7.2f (ms) \n", init, setup, innerloop, exitloop); - fflush(NULL); - } -#endif + // THREAD STUFF ///////////////////////////////////////////////////// gsl_rng_free(r2); r2 = NULL; @@ -2960,179 +1250,64 @@ } -void -RotPrincAxes(CdsArray *cdsA) -{ - int i; - double **x90z90 = MatAlloc(3,3); - /* double x90[3][3] = {{ 1, 0, 0}, { 0, 0, 1}, { 0,-1, 0}}; */ - /* double z90[3][3] = {{ 0, 1, 0}, {-1, 0, 0}, { 0, 0, 1}}; */ - /* double x90z90[3][3] = {{ 0, 1, 0}, { 0, 0, 1}, { 1, 0, 0}}; */ - - /* this orients the least -> most variable axes along x, y, z respectively */ - CalcCdsPrincAxes(cdsA->avecds, cdsA->avecds->matrix); - - memset(&x90z90[0][0], 0, 9 * sizeof(double)); - x90z90[0][1] = x90z90[1][2] = x90z90[2][0] = 1.0; - - /* Rotate the family 90deg along x and then along z. - This puts the most variable axis horizontal, the second most variable - axis vertical, and the least variable in/out of screen. */ - Mat3MultIp(cdsA->avecds->matrix, (const double **) x90z90); - - for (i = 0; i < cdsA->cnum; ++i) - Mat3MultIp(cdsA->cds[i]->matrix, (const double **) cdsA->avecds->matrix); - - MatDestroy(&x90z90); -} - - /* Calculates weights corresponding to the atomic, row-wise covariance matrix only */ void CalcWts(CdsArray *cdsA) { - int i, j; - Algorithm *algo = cdsA->algo; + int i; + double *variance = cdsA->var; double *weight = cdsA->w; const int vlen = cdsA->vlen; - if (algo->leastsquares != 0) + if (algo->leastsquares) { for (i = 0; i < vlen; ++i) weight[i] = 1.0; - - return; } - - if (algo->varweight != 0) + else if (algo->varweight) { for (i = 0; i < vlen; ++i) - if (variance[i] < algo->constant) - variance[i] = algo->constant; - - HierarchVars(cdsA); - - for (i = 0; i < vlen; ++i) { if (variance[i] >= DBL_MAX) weight[i] = 0.0; - else if (variance[i] == 0.0) + else if (variance[i] <= 0.0) weight[i] = 0.0; else weight[i] = 1.0 / variance[i]; } -/* if (algo->alignment == 1) */ -/* cdsA->stats->wtnorm = NormalizeWeightsOcc(weight, cdsA->avecds->o, vlen); */ -/* else */ - cdsA->stats->wtnorm = NormalizeWeights(weight, vlen); - -// for (i=0; icovweight != 0) - { - if (cdsA->algo->rounds < 5) - { - for (i = 0; i < vlen; ++i) - for (j = 0; j < i; ++j) - cdsA->CovMat[i][j] = cdsA->CovMat[j][i] = 0.0; - } - - if (algo->hierarch != 0 && algo->rounds > 2) - HierarchVars(cdsA); - - /* minimum variance boundary condition */ - for (i = 0; i < vlen; ++i) - if (cdsA->CovMat[i][i] < algo->constant) - cdsA->CovMat[i][i] = algo->constant; - - /* CovInvWeightLAPACK(cdsA); */ - /* pseudoinv_sym(cdsA->CovMat, cdsA->WtMat, vlen, DBL_MIN); */ - InvSymEigenOp(cdsA->WtMat, (const double **) cdsA->CovMat, vlen, cdsA->tmpvecK, cdsA->tmpmatKK1, DBL_MIN); - - cdsA->stats->wtnorm = NormalizeCovMat(cdsA->WtMat, vlen); - - cdsA->stats->trace_inv_sigma = 0.0; - for (i = 0; i < vlen; ++i) - for (j = 0; j < vlen; ++j) - cdsA->stats->trace_inv_sigma += cdsA->WtMat[i][j]; - } -} - - -void -CalcWtsFinal(CdsArray *cdsA) -{ - int i; - double *weight = cdsA->w; - const double *variance = (const double *) cdsA->var; - - for (i = 0; i < cdsA->vlen; ++i) - { - if (variance[i] >= DBL_MAX) - weight[i] = 0.0; - else - weight[i] = 1.0 / variance[i]; - } - - NormalizeWeights(weight, cdsA->vlen); -} - - -double -SuperPose(Cds *cds1, Cds *cds2, double **rotmat, double *trans, - double *norm1, double *norm2, double *innprod) -{ - const int vlen = cds1->vlen; - double **tmpmat1 = MatAlloc(3, 3); - double **tmpmat2 = MatAlloc(3, 3); - double **tmpmat3 = MatAlloc(3, 3); - double *tmpvec = malloc(3 * sizeof(double)); - double *newtrans = malloc(3 * sizeof(double)); - double *cen1 = calloc(3, sizeof(double)); - double *cen2 = calloc(3, sizeof(double)); - double sumdev; - int i; - - CenMassOccVec(cds1, cen1); - CenMassOccVec(cds2, cen2); - - NegTransCdsIp(cds1, cen1); - NegTransCdsIp(cds2, cen2); - - sumdev = ProcGSLSVDvanOcc(cds1, cds2, rotmat, - tmpmat1, tmpmat2, tmpmat3, tmpvec, - norm1, norm2, innprod); - - if (sumdev > 1) - { - printf(" ERROR1111: -> sumdev: % 12.7e % 12.7e \n", - 0.5 * sumdev / vlen, sqrt(fabs(0.5 * sumdev / vlen)) ); - printf(" ERROR1111: Please report to dtheobald@brandeis.edu \n"); -// PrintTheseusTag(); -// exit(EXIT_FAILURE); +// if (algo->scale > 0) +// { +// double sum = 0.0; +// for (i = 0; i < vlen; ++i) +// { +// sum += weight[i]; +// } +// +// for (i = 0; i < vlen; ++i) +// { +// variance[i] *= sum; +// weight[i] /= (sum/vlen); +// } +// } } - TransCdsIp(cds1, cen1); - TransCdsIp(cds2, cen2); - - InvRotVec(newtrans, cen2, rotmat); - -/* printf("\n nt: %f %f %f", */ -/* newtrans[0], newtrans[1], newtrans[2]); */ - for (i = 0; i < 3; ++i) - trans[i] = newtrans[i] - cen1[i]; - - MatDestroy(&tmpmat1); - MatDestroy(&tmpmat2); - MatDestroy(&tmpmat3); - free(tmpvec); - free(newtrans); - free(cen1); - free(cen2); - - return(sumdev); +// else if (algo->covweight) +// // WtMat is calculated in HeriarchVars +// { +// // if (algo->rounds < 3) +// // { +// // for (i = 0; i < vlen; ++i) +// // for (j = 0; j < i; ++j) +// // cdsA->CovMat[i][j] = cdsA->CovMat[j][i] = 0.0; +// // } +// +// /* CovInvWeightLAPACK(cdsA); */ +// /* pseudoinv_sym(cdsA->CovMat, cdsA->WtMat, vlen, DBL_MIN); */ +// //InvSymEigenOp(cdsA->WtMat, (const double **) cdsA->CovMat, vlen, cdsA->tmpvecK, cdsA->tmpmatKK1, DBL_MIN); +// +// // WtMat is calculated in HeriarchVars +// } } Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/._MultiPose.h and /tmp/g2bOMTRwaC/theseus-3.0.0/._MultiPose.h differ diff -Nru theseus-2.0.6/MultiPose.h theseus-3.0.0/MultiPose.h --- theseus-2.0.6/MultiPose.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/MultiPose.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -29,34 +29,13 @@ #include "Cds.h" void -SuperPose2Anchor(CdsArray *scratchA, CdsArray *baseA, char *anchorf_name); - -void CalcTranslationsIp(CdsArray *scratchA, Algorithm *algo); double CalcRotations(CdsArray *cdsA); -double -CalcScaleFactorsML(CdsArray *cdsA); - -double -CalcScaleFactorsMLConstr(CdsArray *cdsA); - -double -CalcScaleFactorsMLLogNorm(CdsArray *cdsA); - -double -CalcScaleFactorsML2(CdsArray *cdsA); - -double -CalcScaleFactorsMLGoodall(CdsArray *cdsA); - -double -CalcScaleFactors(CdsArray *cdsA); - void -ScaleCdsArray(CdsArray *cdsA); +HierarchVars(CdsArray *cdsA); int CheckConvergenceInner(CdsArray *cdsA, const double precision); @@ -64,32 +43,16 @@ int CheckConvergenceOuter(CdsArray *cdsA, int round, const double precision); -double -SuperPoseArray2Orig(CdsArray *cdsA, CdsArray *targetA, double *sumdev); - void -HierarchVars(CdsArray *cdsA); - -int -MultiPose_pth(CdsArray *baseA); +InitializeStates(CdsArray *cdsA); int MultiPose(CdsArray *baseA); -void -RotPrincAxes(CdsArray *cdsA); - -void -SuperJack(CdsArray *baseA); +int +MultiPose_pth(CdsArray *baseA); void CalcWts(CdsArray *cdsA); -void -CalcWtsFinal(CdsArray *cdsA); - -double -SuperPose(Cds *cds1, Cds *cds2, double **rotmat, double *trans, - double *, double *, double *); - #endif diff -Nru theseus-2.0.6/MultiPose_local.h theseus-3.0.0/MultiPose_local.h --- theseus-2.0.6/MultiPose_local.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/MultiPose_local.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -28,6 +28,7 @@ #include #include #include +#include #include "CovMat.h" #include "DLTutils.h" #include "Error.h" @@ -37,15 +38,20 @@ #include "pdbIO.h" #include "pdbUtils.h" #include "ProcGSLSVD.h" -#include "ProcGSLSVDOcc.h" +#include "ProcGSLSVDNu.h" #include "Embed.h" #include "FragDist.h" #include "CovMat.h" #include "RandCds.h" #include "msa.h" #include "MultiPose2MSA.h" - +#include "Threads.h" +#include "ProcGSLSVD.h" +#include "ProcGSLSVDNu.h" +#include "qcprot.h" +#include "GibbsMet.h" +#include "internmat.h" #include "DLTmath.h" -#include #include "distfit.h" +#include diff -Nru theseus-2.0.6/MultiPoseMix.c theseus-3.0.0/MultiPoseMix.c --- theseus-2.0.6/MultiPoseMix.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/MultiPoseMix.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -29,6 +29,7 @@ #include "Threads.h" #include "MultiPose_local.h" #include "pdbStats.h" +#include "pdbUtils.h" #include "distfit.h" #include "ProcGSLSVD.h" #include "MultiPoseMix.h" @@ -73,7 +74,7 @@ structural superposition. NOTA BENE: This function assumes that the variances, covariance matrices, - hierarchical model parameters, average coordinates, rotations, and + hierarchical model parameters, average coordinates, rotations, and translations have all been pre-calculated. Even when not calculating the optimal ML rotations and translation transformations, the other parameters in general must be estimated iteratively, as described below. @@ -99,19 +100,19 @@ const double ndk2 = 0.5 * ndk; const double *var = (const double *) cdsA->var; double lndetrow , frobterm, logL; - Algorithm *algo = cdsA->algo; + lndetrow = frobterm = 0.0; - if (algo->leastsquares == 1) + if (algo->leastsquares) { frobterm = FrobTermAtom(cdsA, atom); } - else if (algo->varweight == 1) + else if (algo->varweight) { lndetrow = log(var[atom]); - if (algo->hierarch != 0) + if (algo->hierarch) frobterm = FrobTermAtom(cdsA, atom); else frobterm = -ndk2; @@ -186,7 +187,7 @@ dist = SqrCdsDist(mixA[j]->cds[i], m, mixA[j]->avecds, m); pij = probs[j][m] * normal_pdf(sqrt(dist), 0.0, 3.0 * mixA[j]->var[m]); - sump += pij / summix; + sump += pij / summix; } probs[j][m] = sump / cnum; @@ -221,14 +222,14 @@ for (k = 0; k < mixn; ++k) { - for (i = 0; i < vlen; ++i) - { - dist = 0.0; - for (j = 0; j < cnum; ++j) - dist += SqrCdsDist(mixA[k]->cds[j], i, mixA[k]->avecds, i); - - probs[k][i] *= normal_pdf(sqrt(dist), 0.0, 3.0 * cnum * mixA[k]->var[i]); - } + for (i = 0; i < vlen; ++i) + { + dist = 0.0; + for (j = 0; j < cnum; ++j) + dist += SqrCdsDist(mixA[k]->cds[j], i, mixA[k]->avecds, i); + + probs[k][i] *= normal_pdf(sqrt(dist), 0.0, 3.0 * cnum * mixA[k]->var[i]); + } } for (m = 0; m < vlen; ++m) @@ -328,7 +329,7 @@ InitializeMix(CdsArray *cdsA, double **probs) { int i, j; - const int vlen = cdsA->vlen, mixn = cdsA->algo->mixture; + const int vlen = cdsA->vlen, mixn = algo->mixture; double ave, sum, aveprob; const gsl_rng_type *T = NULL; @@ -384,28 +385,28 @@ const int vlen = mixA[0]->vlen, cnum = mixA[0]->cnum; double sump, summix, dist, pij, aveprob, sum; - for (m = 0; m < vlen; ++m) - { - for (j = 0; j < subset - 1; ++j) - { - sump = 0.0; - for (i = 0; i < cnum; ++i) - { - summix = 0.0; - for (k = 0; k < subset - 1; ++k) - { - dist = SqrCdsDist(mixA[k]->cds[i], m, mixA[k]->avecds, m); - summix += probs[k][m] * normal_pdf(sqrt(dist), 0.0, 3.0 * mixA[k]->var[m]); - } - - dist = SqrCdsDist(mixA[j]->cds[i], m, mixA[j]->avecds, m); - pij = probs[j][m] * normal_pdf(sqrt(dist), 0.0, 3.0 * mixA[j]->var[m]); - sump += pij / summix; - } - - probs[j][m] = sump / cnum; - } - } + for (m = 0; m < vlen; ++m) + { + for (j = 0; j < subset - 1; ++j) + { + sump = 0.0; + for (i = 0; i < cnum; ++i) + { + summix = 0.0; + for (k = 0; k < subset - 1; ++k) + { + dist = SqrCdsDist(mixA[k]->cds[i], m, mixA[k]->avecds, m); + summix += probs[k][m] * normal_pdf(sqrt(dist), 0.0, 3.0 * mixA[k]->var[m]); + } + + dist = SqrCdsDist(mixA[j]->cds[i], m, mixA[j]->avecds, m); + pij = probs[j][m] * normal_pdf(sqrt(dist), 0.0, 3.0 * mixA[j]->var[m]); + sump += pij / summix; + } + + probs[j][m] = sump / cnum; + } + } aveprob = 0.0; for (i = 0; i < vlen; ++i) @@ -413,18 +414,18 @@ aveprob += log(probs[j][i]); aveprob = exp(aveprob/vlen); - for (i = 0; i < vlen; ++i) - probs[subset - 1][i] = aveprob; + for (i = 0; i < vlen; ++i) + probs[subset - 1][i] = aveprob; + + for (i = 0; i < vlen; ++i) + { + sum = 0.0; + for (j = 0; j < subset; ++j) + sum += probs[j][i]; - for (i = 0; i < vlen; ++i) - { - sum = 0.0; - for (j = 0; j < subset; ++j) - sum += probs[j][i]; - - for (j = 0; j < subset; ++j) - probs[j][i] /= sum; - } + for (j = 0; j < subset; ++j) + probs[j][i] /= sum; + } } @@ -439,46 +440,46 @@ /* VecPrint(probs[0], vlen); */ - count = 0; - while(1) - { - ++count; + count = 0; + while(1) + { + ++count; - memcpy(oldprobs, probs[0], vlen * sizeof(double)); + memcpy(oldprobs, probs[0], vlen * sizeof(double)); - for (i = 0; i < 2; ++i) - MultiPoseMix(mixA[i], probs[i], vars[i]); + for (i = 0; i < 2; ++i) + MultiPoseMix(mixA[i], probs[i], vars[i]); - NewCalcMixDens(mixA, 2, probs); + NewCalcMixDens(mixA, 2, probs); - if (CheckConvergenceMix(probs[0], oldprobs, vlen, 1e-2) == 1 && count > 5) - break; - } + if (CheckConvergenceMix(probs[0], oldprobs, vlen, 1e-2) && count > 5) + break; + } /* VecPrint(probs[0], vlen); */ for (n = 3; n < mixn; ++n) { - for (j = 0; j < n; ++j) - AveCds(mixA[j]); - + for (j = 0; j < n; ++j) + AveCds(mixA[j]); + CalcSubsetProbs(mixA, n, probs); - count = 0; - while(1) - { - ++count; - - memcpy(oldprobs, probs[0], vlen * sizeof(double)); - - for (i = 0; i < n; ++i) - MultiPoseMix(mixA[i], probs[i], vars[i]); - - NewCalcMixDens(mixA, n, probs); - - if (CheckConvergenceMix(probs[0], oldprobs, vlen, 1e-3) == 1 && count > 5) - break; - } + count = 0; + while(1) + { + ++count; + + memcpy(oldprobs, probs[0], vlen * sizeof(double)); + + for (i = 0; i < n; ++i) + MultiPoseMix(mixA[i], probs[i], vars[i]); + + NewCalcMixDens(mixA, n, probs); + + if (CheckConvergenceMix(probs[0], oldprobs, vlen, 1e-3) && count > 5) + break; + } /* VecPrint(probs[0], vlen); */ } @@ -501,12 +502,12 @@ /* VecPrint(probs[0], vlen); */ - count = 0; - while(1) - { - ++count; + count = 0; + while(1) + { + ++count; - memcpy(oldprobs, probs[0], vlen * sizeof(double)); + memcpy(oldprobs, probs[0], vlen * sizeof(double)); rc = 0; for (i = 0; i < 2; ++i) @@ -515,88 +516,88 @@ mixdata[i]->probs = probs[i]; mixdata[i]->vars = vars[i]; - rc = pthread_create(&callThd[i], attr, MultiPoseMix_pth, (void *) mixdata[i]); + rc = pthread_create(&callThd[i], attr, MultiPoseMix_pth, (void *) mixdata[i]); - if (rc) - { - printf("ERROR811: return code from pthread_create() %d is %d\n", i, rc); - fflush(NULL); - exit(EXIT_FAILURE); - } + if (rc) + { + printf("ERROR811: return code from pthread_create() %d is %d\n", i, rc); + fflush(NULL); + exit(EXIT_FAILURE); + } } rc = 0; - for (i = 0; i < 2; ++i) - { - rc = pthread_join(callThd[i], (void **) NULL); - - if (rc) - { - printf("ERROR812: return code from pthread_join() %d is %d\n", i, rc); - fflush(NULL); - exit(EXIT_FAILURE); - } - } - - NewCalcMixDens(mixA, 2, probs); - - if (CheckConvergenceMix(probs[0], oldprobs, vlen, 1e-2) == 1 && count > 5) - break; - } + for (i = 0; i < 2; ++i) + { + rc = pthread_join(callThd[i], (void **) NULL); + + if (rc) + { + printf("ERROR812: return code from pthread_join() %d is %d\n", i, rc); + fflush(NULL); + exit(EXIT_FAILURE); + } + } + + NewCalcMixDens(mixA, 2, probs); + + if (CheckConvergenceMix(probs[0], oldprobs, vlen, 1e-2) && count > 5) + break; + } /* VecPrint(probs[0], vlen); */ for (n = 3; n < mixn; ++n) { - for (j = 0; j < n; ++j) - AveCds(mixA[j]); - + for (j = 0; j < n; ++j) + AveCds(mixA[j]); + CalcSubsetProbs(mixA, n, probs); - count = 0; - while(1) - { - ++count; - - memcpy(oldprobs, probs[0], vlen * sizeof(double)); - - rc = 0; - for (i = 0; i < n; ++i) - { - mixdata[i]->cdsA = mixA[i]; - mixdata[i]->probs = probs[i]; - mixdata[i]->vars = vars[i]; - - /* MultiPoseMix_pth((void *) mixdata[i]); */ - - rc = pthread_create(&callThd[i], attr, MultiPoseMix_pth, (void *) mixdata[i]); - - if (rc) - { - printf("ERROR813: return code from pthread_create() %d is %d\n", i, rc); - fflush(NULL); - exit(EXIT_FAILURE); - } - } - - rc = 0; - for (i = 0; i < n; ++i) - { - rc = pthread_join(callThd[i], (void **) NULL); - - if (rc) - { - printf("ERROR814: return code from pthread_join() %d is %d\n", i, rc); - fflush(NULL); - exit(EXIT_FAILURE); - } - } - - NewCalcMixDens(mixA, n, probs); - - if (CheckConvergenceMix(probs[0], oldprobs, vlen, 1e-3) == 1 && count > 5) - break; - } + count = 0; + while(1) + { + ++count; + + memcpy(oldprobs, probs[0], vlen * sizeof(double)); + + rc = 0; + for (i = 0; i < n; ++i) + { + mixdata[i]->cdsA = mixA[i]; + mixdata[i]->probs = probs[i]; + mixdata[i]->vars = vars[i]; + + /* MultiPoseMix_pth((void *) mixdata[i]); */ + + rc = pthread_create(&callThd[i], attr, MultiPoseMix_pth, (void *) mixdata[i]); + + if (rc) + { + printf("ERROR813: return code from pthread_create() %d is %d\n", i, rc); + fflush(NULL); + exit(EXIT_FAILURE); + } + } + + rc = 0; + for (i = 0; i < n; ++i) + { + rc = pthread_join(callThd[i], (void **) NULL); + + if (rc) + { + printf("ERROR814: return code from pthread_join() %d is %d\n", i, rc); + fflush(NULL); + exit(EXIT_FAILURE); + } + } + + NewCalcMixDens(mixA, n, probs); + + if (CheckConvergenceMix(probs[0], oldprobs, vlen, 1e-3) && count > 5) + break; + } /* VecPrint(probs[0], vlen); */ } @@ -714,17 +715,17 @@ for (j = 0; j < mixn; ++j) { - dist = 0.0; - for (k = 0; k < cnum; ++k) - dist += SqrCdsDist(mixA[j]->cds[k], i, mixA[j]->avecds, i); + dist = 0.0; + for (k = 0; k < cnum; ++k) + dist += SqrCdsDist(mixA[j]->cds[k], i, mixA[j]->avecds, i); if (probs[j][i] == 0.0) printf(" %10.3f", 0.0); else { logL[j] = log(probs[j][i]) + normal_lnpdf(sqrt(dist), 0.0, 3.0 * cnum * mixA[j]->var[i]); - printf(" %10.3f", logL[j]); - } + printf(" %10.3f", logL[j]); + } mxlogL[j] += logL[j]; } @@ -741,11 +742,11 @@ smallest = logL[j]; } - logLR = largest - smallest; - printf(" %10.3f", logLR); + logLR = largest - smallest; + printf(" %10.3f", logLR); - if (logLR >= 3.0) - printf(" *"); + if (logLR >= 3.0) + printf(" *"); } printf(" \n"); @@ -770,8 +771,8 @@ { double n, p; - cdsA->stats->nparams = p = CalcParamNum(cdsA); - cdsA->stats->ndata = n = 3.0 * cdsA->cnum * cdsA->vlen; + stats->nparams = p = CalcParamNum(cdsA); + stats->ndata = n = 3.0 * cdsA->cnum * cdsA->vlen; return(- p * n / (n - p - 1)); } @@ -786,10 +787,10 @@ for (i = 0; i < mixn; ++i) { - if (mixA[i]->algo->leastsquares == 1) - CalcNormResidualsLS(mixA[i]); - else - CalcNormResiduals(mixA[i]); + if (algo->leastsquares) + CalcNormResidualsLS(mixA[i]); + else + CalcNormResiduals(mixA[i]); } *logL = 0.0; @@ -802,9 +803,6 @@ *logL += log(Lik); } - for (i = 0; i < mixn; ++i) - *logL += CalcHierarchLogL(mixA[i]); - *AIC = *logL; for (i = 0; i < mixn; ++i) *AIC += CalcAICcorrxn(mixA[i]); @@ -817,14 +815,14 @@ Mixture(CdsArray *cdsA, PDBCdsArray *pdbA) { int count, i; - const int vlen = cdsA->vlen, cnum = cdsA->cnum, mixn = cdsA->algo->mixture; + const int vlen = cdsA->vlen, cnum = cdsA->cnum, mixn = algo->mixture; double **probs = calloc(mixn, sizeof(double *)); double *aveprobs = calloc(mixn, sizeof(double)); int *slope = calloc(mixn, sizeof(int)); double *oldprobs = calloc(vlen, sizeof(double)); CdsArray **mixA = NULL; - Algorithm *algo = cdsA->algo; - /* Statistics *stats = cdsA->stats; */ + + /* S */ PDBCdsArray *pdb2A = NULL; double **newprobs = MatAlloc(vlen, cnum); double **vars = MatAlloc(mixn, vlen); @@ -842,7 +840,7 @@ CdsArrayAlloc(mixA[i], cnum, vlen); CdsArraySetup(mixA[i]); CdsArrayCopy(mixA[i], cdsA); - mixA[i]->algo->write_file = 0; + algo->write_file = 0; } printf(" Initializing mixture iterations ... \n"); @@ -879,7 +877,7 @@ /* VecPrint(probs[0], vlen); */ /* VecPrint(probs[1], vlen); */ - if (CheckConvergenceMix(probs[0], oldprobs, vlen, algo->precision) == 1 && count > 5) + if (CheckConvergenceMix(probs[0], oldprobs, vlen, algo->precision) && count > 5) break; } @@ -910,7 +908,7 @@ /* for (j = 0; j < cnum; ++j) */ /* TransformPDBCdsIp(pdb2A->cds[j]); */ /* */ -/* if (algo->alignment == 1) */ +/* if (algo->alignment) */ /* Align2segID(pdb2A); */ /* */ /* printf("\n Writing transformed coordinates PDB file ... "); */ @@ -974,20 +972,20 @@ Mixture_pth(CdsArray *cdsA, PDBCdsArray *pdbA) { int count, i, j, lineskip; - const int vlen = cdsA->vlen, cnum = cdsA->cnum, mixn = cdsA->algo->mixture; + const int vlen = cdsA->vlen, cnum = cdsA->cnum, mixn = algo->mixture; double **probs = calloc(mixn, sizeof(double *)); double *aveprobs = calloc(mixn, sizeof(double)); int *slope = calloc(mixn, sizeof(int)); double *oldprobs = calloc(vlen, sizeof(double)); double **vars = MatAlloc(mixn, vlen); - CdsArray **mixA = NULL; - Algorithm *algo = cdsA->algo; + CdsArray **mixA = NULL; + double **newprobs = MatAlloc(vlen, cnum); double logL, AIC, Lik; int rc; MixData **mixdata = NULL; - pthread_t *callThd; + pthread_t *callThd = NULL; pthread_attr_t attr; callThd = malloc(mixn * sizeof(pthread_t)); @@ -1011,7 +1009,7 @@ CdsArrayAlloc(mixA[i], cnum, vlen); CdsArraySetup(mixA[i]); CdsArrayCopy(mixA[i], cdsA); - mixA[i]->algo->write_file = 0; + algo->write_file = 0; } printf(" Initializing mixture iterations ... \n"); @@ -1042,51 +1040,51 @@ mixdata[i]->cdsA = mixA[i]; mixdata[i]->probs = probs[i]; mixdata[i]->vars = vars[i]; - + /* MultiPoseMix_pth((void *) mixdata[i]); */ - rc = pthread_create(&callThd[i], &attr, MultiPoseMix_pth, (void *) mixdata[i]); - - if (rc) - { - printf("ERROR815: return code from pthread_create() %d is %d\n", i, rc); - fflush(NULL); - exit(EXIT_FAILURE); - } + rc = pthread_create(&callThd[i], &attr, MultiPoseMix_pth, (void *) mixdata[i]); + + if (rc) + { + printf("ERROR815: return code from pthread_create() %d is %d\n", i, rc); + fflush(NULL); + exit(EXIT_FAILURE); + } } rc = 0; - for (i = 0; i < mixn; ++i) - { - rc = pthread_join(callThd[i], (void **) NULL); - - if (rc) - { - printf("ERROR816: return code from pthread_join() %d is %d\n", i, rc); - fflush(NULL); - exit(EXIT_FAILURE); - } - } - - for (i = 0; i < mixn; ++i) - { - printf(" Mixture %d: %4d rounds\n", i, mixdata[i]->rounds); - fflush(NULL); + for (i = 0; i < mixn; ++i) + { + rc = pthread_join(callThd[i], (void **) NULL); + + if (rc) + { + printf("ERROR816: return code from pthread_join() %d is %d\n", i, rc); + fflush(NULL); + exit(EXIT_FAILURE); + } + } + + for (i = 0; i < mixn; ++i) + { + printf(" Mixture %d: %4d rounds\n", i, mixdata[i]->rounds); + fflush(NULL); } NewCalcMixDens(mixA, mixn, probs); AveProb(aveprobs, mixn, probs, vlen); - printf("\n mxp:"); - for (j = 0; j < mixn; ++j) - printf(" % 6.4f", aveprobs[j]); - printf("\n"); + printf("\n mxp:"); + for (j = 0; j < mixn; ++j) + printf(" % 6.4f", aveprobs[j]); + printf("\n"); - if (CheckConvergenceMix(probs[0], oldprobs, vlen, algo->precision) == 1 && count > 5) + if (CheckConvergenceMix(probs[0], oldprobs, vlen, algo->precision) && count > 5) break; lineskip = 3 + mixn; - printf("\033[<%d>A", lineskip); + printf("\033[<%d>A", lineskip); } pthread_attr_destroy(&attr); @@ -1095,10 +1093,10 @@ for (i = 0; i < mixn; ++i) { - if (algo->leastsquares == 1) - CalcNormResidualsLS(mixA[i]); - else - CalcNormResiduals(mixA[i]); + if (algo->leastsquares) + CalcNormResidualsLS(mixA[i]); + else + CalcNormResiduals(mixA[i]); } logL = 0.0; @@ -1111,8 +1109,8 @@ logL += log(Lik); } - for (i = 0; i < mixn; ++i) - logL += CalcHierarchLogL(mixA[i]); +// for (i = 0; i < mixn; ++i) +// logL += CalcHierarchLogL(mixA[i]); AIC = logL; for (i = 0; i < mixn; ++i) @@ -1146,38 +1144,19 @@ /* For superimposing to an alignment, we don't need to weight by occupancy since we are using pseudo-coordinates here from the E-M expectation step */ + // DLT OP FIX comment static void CalcTranslations(CdsArray *scratchA, Algorithm *algo) { Cds **cds = scratchA->cds; int i; - for (i = 0; i < scratchA->cnum; ++i) - { - if (algo->alignment == 1 && algo->rounds < 3) - CenMassWtIpOcc(cds[i], scratchA->w); - else - CenMassWtIp(cds[i], scratchA->w); - } -} - - -static void -MatDiagMultCdsMultMatDiag(Cds *outcds, const double *wtK, const Cds *cds) -{ - int i; - double wtKi; - const double *x = (const double *) cds->x, - *y = (const double *) cds->y, - *z = (const double *) cds->z; - - for (i = 0; i < cds->vlen; ++i) - { - wtKi = wtK[i]; - - outcds->x[i] = wtKi * x[i]; - outcds->y[i] = wtKi * y[i]; - outcds->z[i] = wtKi * z[i]; + for (i = 0; i < scratchA->cnum; ++i) + { + if (algo->alignment && algo->rounds < 3) + CenMassWtIpNu(cds[i], scratchA->w); + else + CenMassWtIp(cds[i], scratchA->w); } } @@ -1192,25 +1171,25 @@ double deviation = 0.0, deviation_sum = 0.0; int i; - MatDiagMultCdsMultMatDiag(tcds, wts, avecds); + MatDiagMultCdsMultMatDiag(tcds, wts, avecds); - for (i = 0; i < cdsA->cnum; ++i) - { - /* note that the avecds are already multiplied by the weight matrices */ - deviation = ProcGSLSVDvan(cds[i], - tcds, - cds[i]->matrix, - cdsA->tmpmat3a, - cdsA->tmpmat3b, - cdsA->tmpmat3c, - cdsA->tmpvec3a); + for (i = 0; i < cdsA->cnum; ++i) + { + /* note that the avecds are already multiplied by the weight matrices */ + deviation = ProcGSLSVDvan(cds[i], + tcds, + cds[i]->matrix, + cdsA->tmpmat3a, + cdsA->tmpmat3b, + cdsA->tmpmat3c, + cdsA->tmpvec3a); /* RotateCdsIp(cds[i], (const double **) cds[i]->matrix); */ - /* find global rmsd and average cds (both held in structure) */ - cds[i]->wRMSD_from_mean = sqrt(deviation / (3 * cdsA->vlen)); - deviation_sum += deviation; - } + /* find global rmsd and average cds (both held in structure) */ + cds[i]->wRMSD_from_mean = sqrt(deviation / (3 * cdsA->vlen)); + deviation_sum += deviation; + } return(deviation_sum); } @@ -1219,10 +1198,7 @@ static void HierarchVars(CdsArray *cdsA) { - int i; - double mean, mu, lambda, b, c, zeta, sigma; - - switch(cdsA->algo->hierarch) + switch(algo->hierarch) { case 0: break; @@ -1231,94 +1207,17 @@ /* This accounts for the fact that the smallest eigenvalue of the covariance matrix is always zero, i.e. the covariance matrix is necessarily of rank vlen - 1 */ - if (cdsA->algo->rounds > 4) + if (algo->rounds > 4) InvGammaFitEvals(cdsA, 1); else InvGammaFitEvals(cdsA, 0); - if (cdsA->algo->verbose != 0) - printf(" HierarchVars() chi2:%f\n", cdsA->stats->hierarch_chi2); - break; - - case 2: - InvGammaFitVars(cdsA, 1); - if (cdsA->algo->verbose != 0) - printf(" HierarchVars() chi2:%f\n", cdsA->stats->hierarch_chi2); - break; - - case 3: - InvGamma1FitEvals(cdsA, 1); - break; - - case 4: - InvGammaFitVars_minc(cdsA, 1.0, 1); - break; - - case 5: - InvGammaMMFitVars(cdsA, &b, &c); - break; - - case 6: - InvGammaStacyFitVars(cdsA, &b, &c); - break; - - case 7: - for (i = 0; i < cdsA->vlen; ++i) - cdsA->var[i] = cdsA->CovMat[i][i]; - cdsA->algo->covweight = 0; - cdsA->algo->varweight = 1; - InvGammaFitVars(cdsA, 1); - cdsA->algo->covweight = 1; - cdsA->algo->varweight = 0; - CovMat2CorMat(cdsA->CovMat, cdsA->vlen); - CorMat2CovMat(cdsA->CovMat, (const double *) cdsA->var, cdsA->vlen); - break; - - case 8: /* ML fit of variances to a reciprocal inverse gaussian dist */ - RecipInvGaussFitVars(cdsA, &mu, &lambda); - RecipInvGaussAdjustVars(cdsA, mu, lambda); - break; - - case 9: /* ML fit of variances to a lognorml distribution */ - LognormalFitVars(cdsA, &zeta, &sigma); - LognormalAdjustVars(cdsA, zeta, sigma); - break; - - case 10: - InvgaussFitVars(cdsA, &mean, &lambda); - InvgaussAdjustVars(cdsA, zeta, sigma); - break; - - case 12: /* inv gamma fit to eigenvalues of covariance mat, but only weighting by variances */ - cdsA->algo->covweight = 1; - cdsA->algo->varweight = 0; - if (cdsA->algo->alignment == 1) - CalcCovMatOcc(cdsA); - else - CalcCovMat(cdsA); - InvGammaFitEvals(cdsA, 1); - cdsA->algo->covweight = 0; - cdsA->algo->varweight = 1; - for (i = 0; i < cdsA->vlen; ++i) - cdsA->var[i] = cdsA->CovMat[i][i]; - break; - - case 13: /* inv gamma fit to eigenvalues of covariance mat, but only weighting by variances */ - cdsA->algo->covweight = 1; - cdsA->algo->varweight = 0; - if (cdsA->algo->alignment == 1) - CalcCovMatOcc(cdsA); - else - CalcCovMat(cdsA); - InvGammaFitVars(cdsA, 0); /* no iterations */ - cdsA->algo->covweight = 0; - cdsA->algo->varweight = 1; - for (i = 0; i < cdsA->vlen; ++i) - cdsA->var[i] = cdsA->CovMat[i][i]; + if (algo->verbose) + printf(" HierarchVars() chi2:%f\n", stats->hierarch_chi2); break; default: - printf("\n ERROR: Bad -g option \"%d\" \n", cdsA->algo->hierarch); + printf("\n ERROR: Bad -g option \"%d\" \n", algo->hierarch); Usage(0); exit(EXIT_FAILURE); break; @@ -1345,26 +1244,26 @@ static int CheckConvergenceOuter(CdsArray *cdsA, int round, const double precision) { - Algorithm *algo = cdsA->algo; + int i; if (round >= algo->iterations) return(1); - if (algo->abort == 1) + if (algo->abort) return(1); -/* else if (algo->alignment == 1 && round < 10) */ +/* else if (algo->alignment && round < 10) */ /* return(0); */ else if (round > 6) { /* if (Mat3FrobEq((const double **) mat1, (const double **) mat2, algo->precision) == 0) */ - cdsA->stats->precision = 0.0; + stats->precision = 0.0; for (i = 0; i < cdsA->cnum; ++i) - cdsA->stats->precision += FrobDiffNormIdentMat((const double **) cdsA->cds[i]->matrix, 3); - cdsA->stats->precision /= cdsA->cnum; + stats->precision += FrobDiffNormIdentMat((const double **) cdsA->cds[i]->matrix, 3); + stats->precision /= cdsA->cnum; - if (cdsA->stats->precision > precision) + if (stats->precision > precision) return(0); else return(1); @@ -1380,7 +1279,6 @@ { int i, round, innerround; int slxn; /* index of random coord to select as first */ - double deviation_sum = 0.0; const int cnum = baseA->cnum; const int vlen = baseA->vlen; double *evals = malloc(3 * sizeof(double)); @@ -1388,7 +1286,6 @@ Statistics *stats = NULL; Cds **cds = NULL; Cds *avecds = NULL; - Cds *tcds = NULL; CdsArray *scratchA = NULL; gsl_rng *r2 = NULL; @@ -1407,11 +1304,8 @@ CdsArrayCopy(scratchA, baseA); /* setup local aliases based on scratchA */ - algo = scratchA->algo; - stats = scratchA->stats; cds = scratchA->cds; avecds = scratchA->avecds; - tcds = scratchA->tcds; memcpy(scratchA->w, probs, vlen * sizeof(double)); memcpy(baseA->w, probs, vlen * sizeof(double)); @@ -1422,7 +1316,7 @@ stats->hierarch_p1 = 0.0; stats->hierarch_p2 = 0.0; - if (algo->embedave != 0) + if (algo->embedave) { printf(" Calculating distance matrix for embedding average ... \n"); fflush(NULL); @@ -1430,8 +1324,8 @@ CdsCopyAll(avecds, cds[0]); DistMatsAlloc(scratchA); - if (algo->alignment == 1) - CalcMLDistMatOcc(scratchA); + if (algo->alignment) + CalcMLDistMatNu(scratchA); else CalcMLDistMat(scratchA); @@ -1453,7 +1347,7 @@ CdsCopyAll(avecds, baseA->cds[slxn]); } - if (algo->notrans == 0) + if (algo->dotrans) { CenMassWtIp(avecds, scratchA->w); ApplyCenterIp(avecds); @@ -1463,8 +1357,8 @@ (1) First calculates the translations (2) Does inner loop -- calc rotations and average till convergence (3) Holding the superposition constant, calculates the covariance - matrices and corresponding weight matrices, looping till - convergence when using a dimensional/axial covariance matrix + matrices and corresponding weight matrices, looping till + convergence when using a dimensional/axial covariance matrix */ round = 0; while(1) @@ -1476,7 +1370,7 @@ /* fflush(NULL); */ ++round; - baseA->algo->rounds = algo->rounds = round; + algo->rounds = round; /* Find weighted center and translate all cds */ CalcTranslations(scratchA, algo); @@ -1488,7 +1382,7 @@ memcpy(cds[i]->translation, cds[i]->center, 3 * sizeof(double)); /* when superimposing to an alignemnt, initially iterate into unwted LS for a few rounds */ - if (algo->alignment == 1 && round < 5) + if (algo->alignment && round < 5) memsetd(scratchA->w, 1.0, vlen); /* Inner loop: @@ -1509,13 +1403,13 @@ MatCpySym(cds[i]->last_matrix, (const double **) cds[i]->matrix, 3); /* find the optimal rotation matrices */ - if (algo->alignment == 1 /* && (round == 1 || cnum == 2) */) - deviation_sum = CalcRotationsOcc(scratchA); + if (algo->alignment /* && (round == 1 || cnum == 2) */) + CalcRotationsNu(scratchA); else - deviation_sum = CalcRotations(scratchA); + CalcRotations(scratchA); if (innerround == 1 && - CheckConvergenceOuter(scratchA, round, algo->precision) == 1) + CheckConvergenceOuter(scratchA, round, algo->precision)) goto outsidetheloops; /* rotate the scratch cds with new rotation matrix */ @@ -1523,17 +1417,17 @@ RotateCdsIp(cds[i], (const double **) cds[i]->matrix); /* find global rmsd and average cds (both held in structure) */ - if (algo->alignment == 1) - { - AveCdsOcc(scratchA); - EM_MissingCds(scratchA); - } - else - { - AveCds(scratchA); - } + if (algo->alignment) + { + AveCdsNu(scratchA); + EM_MissingCds(scratchA); + } + else + { + AveCds(scratchA); + } - stats->wRMSD_from_mean = sqrt(deviation_sum / (3 * vlen * cnum)); + //stats->wRMSD_from_mean = sqrt(deviation_sum / (3 * vlen * cnum)); if (innerround > 160) { @@ -1546,7 +1440,7 @@ /* Weighting by dimensional, axial Xi covariance matrix, here diagonal. */ /* Holding the superposition constant, calculates the covariance - matrices and corresponding weight matrices, looping till + matrices and corresponding weight matrices, looping till convergence. */ CalcCovariances(scratchA); @@ -1578,15 +1472,15 @@ CalcWtsMix(CdsArray *cdsA, const double *probs) { int i; - Algorithm *algo = cdsA->algo; + double *variance = cdsA->var; double *weight = cdsA->w; const int vlen = cdsA->vlen; - if (algo->noave == 0) + if (algo->doave) AveCds(cdsA); - if (algo->leastsquares != 0) + if (algo->leastsquares) { for (i = 0; i < vlen; ++i) weight[i] = probs[i]; @@ -1594,12 +1488,8 @@ return; } - if (algo->varweight != 0) + if (algo->varweight) { - for (i = 0; i < vlen; ++i) - if (variance[i] < probs[i] * algo->constant) - variance[i] = probs[i] * algo->constant; - HierarchVars(cdsA); for (i = 0; i < vlen; ++i) @@ -1612,6 +1502,4 @@ weight[i] = probs[i] / variance[i]; } } - - /* cdsA->stats->wtnorm = NormalizeWeights(weight, vlen); */ } diff -Nru theseus-2.0.6/MultiPoseMix.h theseus-3.0.0/MultiPoseMix.h --- theseus-2.0.6/MultiPoseMix.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/MultiPoseMix.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/myassert.h theseus-3.0.0/myassert.h --- theseus-2.0.6/myassert.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/myassert.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/PCAstats.c theseus-3.0.0/PCAstats.c --- theseus-2.0.6/PCAstats.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/PCAstats.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -41,56 +41,39 @@ #include "pdbUtils.h" -#if 0 // defined(__APPLE__) - #include - #include - #include - #include - #include - #include - - static double start_time, end_time; -#endif - - void CalcPCA(CdsArray *cdsA) { int i, j; int vlen = (int) cdsA->vlen; - int upper, lower, pcanum; + int pcanum; double **CovMat = cdsA->CovMat; double sum, runsum; - PDBCds *pdbave; + PDBCds *pdbave = NULL; char pcafile_name[256]; - FILE *pcavecs_fp, *pcastats_fp; + FILE *pcavecs_fp = NULL, *pcastats_fp = NULL; double biggest, bstick; char aster; char *cov_name = NULL, *cor_name = NULL, *pcvecs_name = NULL, *pcstats_name = NULL; - #if 0 //defined(__APPLE__) - double milliseconds; - start_time = seconds(); - #endif - pdbave = cdsA->pdbA->avecds; - pcanum = cdsA->algo->pca; + pcanum = algo->pca; if (pcanum > cdsA->vlen) - pcanum = cdsA->algo->pca = cdsA->vlen; + pcanum = algo->pca = cdsA->vlen; if (pcanum > cdsA->cnum - 1) - pcanum = cdsA->algo->pca = cdsA->cnum - 1; + pcanum = algo->pca = cdsA->cnum - 1; - cov_name = mystrcat(cdsA->algo->rootname, "_cov.mat"); + cov_name = mystrcat(algo->rootname, "_cov.mat"); PrintCovMatGnuPlot((const double **) CovMat, vlen, cov_name); /* convert it to a correlation matrix */ - if (cdsA->algo->cormat == 1) + if (algo->cormat) { CovMat2CorMat(CovMat, vlen); - cor_name = mystrcat(cdsA->algo->rootname, "_cor.mat"); + cor_name = mystrcat(algo->rootname, "_cor.mat"); PrintCovMatGnuPlot((const double **) CovMat, vlen, cor_name); } @@ -101,9 +84,6 @@ for (i = 0; i < vlen; ++i) sum += CovMat[i][i]; - lower = vlen - pcanum + 1; /* careful -- inclusive indices */ - upper = vlen - 0; - //cdsA->pcamat = MatAlloc(pcanum, vlen); cdsA->pcamat = MatAlloc(vlen, vlen); cdsA->pcavals = malloc(vlen * sizeof(double)); @@ -120,14 +100,8 @@ //MatPrint(cdsA->pcamat, vlen); - #if 0 //defined(__APPLE__) - end_time = seconds(); - milliseconds = (double) (end_time - start_time) / 0.001; - printf(" PCs calculated in %.3f ms\n", milliseconds); - #endif - - pcvecs_name = mystrcat(cdsA->algo->rootname, "_pcvecs.txt"); - pcstats_name = mystrcat(cdsA->algo->rootname, "_pcstats.txt"); + pcvecs_name = mystrcat(algo->rootname, "_pcvecs.txt"); + pcstats_name = mystrcat(algo->rootname, "_pcstats.txt"); pcavecs_fp = myfopen(pcvecs_name, "w"); pcastats_fp = myfopen(pcstats_name, "w"); @@ -186,15 +160,15 @@ biggest = fabs(cdsA->pcamat[i][j]); /* rescale (for rasmol really) so that the largest eigenvalue component - is = 99.99, i.e. the largest value allowable in the b-value column + is = 99.99, i.e. the largest value allowable in the b-value column of a PDB file */ /* biggest = 1.0; */ for (j = 0; j < vlen; ++j) cdsA->pdbA->avecds->tempFactor[j] = cdsA->pcamat[i][j] * (99.99 / biggest); - sprintf(pcafile_name, "%s_pc%d_ave.pdb", cdsA->algo->rootname, i+1); + sprintf(pcafile_name, "%s_pc%d_ave.pdb", algo->rootname, i+1); - //strcpy(pcafile_name, mystrcat(cdsA->algo->rootname, "_pc")); + //strcpy(pcafile_name, mystrcat(algo->rootname, "_pc")); //pcafile_name[11] = '\0'; //strncat(pcafile_name, itoa(i+1, &numstring[0], 10), 5); //strncat(pcafile_name, "_ave.pdb", 8); @@ -204,11 +178,11 @@ if (pcanum == vlen) { - char *pcvecs_mat_name = mystrcat(cdsA->algo->rootname, "_pcvecs.mat"); + char *pcvecs_mat_name = mystrcat(algo->rootname, "_pcvecs.mat"); - for (i = 0; i < vlen; ++i) - for (j = 0; j < pcanum; ++j) - cdsA->pcamat[j][i] *= sqrt(cdsA->pcavals[j]); + for (i = 0; i < vlen; ++i) + for (j = 0; j < pcanum; ++j) + cdsA->pcamat[j][i] *= sqrt(cdsA->pcavals[j]); PrintCovMatGnuPlot((const double **) cdsA->pcamat, vlen, pcvecs_mat_name); free(pcvecs_mat_name); @@ -216,7 +190,7 @@ fclose(pcastats_fp); fclose(pcavecs_fp); - + if (cov_name != NULL) free(cov_name); @@ -237,7 +211,7 @@ int i, j; int vlen = (int) 3 * cdsA->vlen; double **mat = NULL; - int upper, lower, pcanum; + int pcanum; double **evecs = NULL, *evals = NULL; double sum, runsum; PDBCds *pdbave = NULL; @@ -246,18 +220,13 @@ double biggest, bstick; char aster; - #if 0 //defined(__APPLE__) - double milliseconds; - start_time = seconds(); - #endif - pdbave = cdsA->pdbA->avecds; mat = MatAlloc(vlen, vlen); - if (cdsA->algo->pca > cdsA->cnum - 1) - pcanum = cdsA->algo->pca = cdsA->cnum - 1; + if (algo->pca > cdsA->cnum - 1) + pcanum = algo->pca = cdsA->cnum - 1; else - pcanum = cdsA->algo->pca; + pcanum = algo->pca; /* copy over the covariance matrix */ memcpy(mat[0], cdsA->FullCovMat[0], vlen * vlen * sizeof(double)); @@ -265,7 +234,7 @@ /* fflush(NULL); */ /* convert it to a correlation matrix */ - if (cdsA->algo->cormat == 1) + if (algo->cormat) CovMat2CorMat(mat, vlen); /* find the total variance */ @@ -273,8 +242,6 @@ for (i = 0; i < vlen; ++i) sum += mat[i][i]; - lower = vlen - pcanum + 1; /* careful -- inclusive indices */ - upper = vlen - 0; evecs = MatAlloc(vlen, vlen); evals = malloc(vlen * sizeof(double)); @@ -290,12 +257,6 @@ PrintCovMatGnuPlot((const double **) evecs, vlen, "evecs.mat"); - #if 0 //defined(__APPLE__) - end_time = seconds(); - milliseconds = (double) (end_time - start_time) / 0.001; - printf(" PCs calculated in %.3f ms (Apple CoreServices)\n", milliseconds); - #endif - pcavecs_fp = fopen("pcavecs.txt", "w"); pcastats_fp = fopen("pcastats.txt", "w"); if (pcavecs_fp == NULL || pcastats_fp == NULL) @@ -352,14 +313,14 @@ biggest = fabs(evecs[i][j]); /* rescale (for rasmol really) so that the largest eigenvalue component - is = 99.99, i.e. the largest value allowable in the b-value column + is = 99.99, i.e. the largest value allowable in the b-value column of a PDB file */ for (j = 0; j < vlen; ++j) cdsA->pdbA->avecds->tempFactor[j] = evecs[i][j] * (99.99 / biggest); - sprintf(pcafile_name, "%s_pc%d", cdsA->algo->rootname, i+1); + sprintf(pcafile_name, "%s_pc%d", algo->rootname, i+1); - //strncpy(pcafile_name, mystrcat(cdsA->algo->rootname, "_pc"), 11); + //strncpy(pcafile_name, mystrcat(algo->rootname, "_pc"), 11); //pcafile_name[11] = '\0'; //strncat(pcafile_name, itoa(i+1, &numstring[0], 10), 5); @@ -377,86 +338,6 @@ void -CalcStructPCA(CdsArray *cdsA) -{ - int i, j; - int cnum = (int) cdsA->cnum; - double **mat = MatAlloc(cnum, cnum); - double **evecs = MatAlloc(cnum, cnum); - double *w = NULL; - double sum, runsum; - FILE *pcavecs_fp = NULL, *pcastats_fp = NULL; - - if (cdsA->SCovMat == NULL) - cdsA->SCovMat = MatAlloc(cnum, cnum); - - CalcStructCovMat(cdsA); - - /* copy over the covariance matrix */ - memcpy(mat[0], cdsA->SCovMat[0], cnum * cnum * sizeof(double)); - -/* MatPrint(cdsA->SCovMat, cnum); */ -/* fflush(NULL); */ - - /* convert it to a correlation matrix */ - if (cdsA->algo->cormat == 1) - CovMat2CorMat(mat, cnum); - - /* find the total variance */ - sum = 0.0; - for (i = 0; i < cnum; ++i) - sum += mat[i][i]; - - w = (double *) malloc(cnum * sizeof(double)); - - EigenGSLDest(mat, cnum, w, evecs, 1); - MatTransIp(evecs, cnum); - - pcavecs_fp = myfopen(mystrcat(cdsA->algo->rootname, "_struct_pcvecs.txt"), "w"); - pcastats_fp = myfopen(mystrcat(cdsA->algo->rootname, "_struct_pcstats.txt"), "w"); - if (pcavecs_fp == NULL || pcastats_fp == NULL) - { - fprintf(stderr, "\n ERROR: Could not open PCA files \n"); - PrintTheseusTag(); - exit(EXIT_FAILURE); - } - - runsum = 0.0; - fprintf(pcastats_fp, "eigenv raw raw_%% cumul_%% \n"); - - for (i = cnum - 1; i >= 0; --i) - { - runsum += w[i]; - fprintf(pcastats_fp, "%-6i %8.3f %8.3f %8.3f\n", - i+1, w[i], w[i] * 100.0 / sum, runsum * 100.0 / sum); - } - fputc('\n', pcastats_fp); - - fprintf(pcavecs_fp, "model "); - for (j = 0; j < cnum; ++j) - fprintf(pcavecs_fp, " %3d ", j+1); - fputc('\n', pcavecs_fp); - - for (i = 0; i < cnum; ++i) - { - fprintf(pcavecs_fp, "%-4d ", i+1); - - for (j = 0; j < cnum; ++j) - fprintf(pcavecs_fp, "%8.3f ", sqrt(w[j]) * mat[j][i]); - - fputc('\n', pcavecs_fp); - } - - cdsA->modpcamat = mat; - cdsA->modpcavals = w; - - fclose(pcastats_fp); - fclose(pcavecs_fp); - MatDestroy(&evecs); -} - - -void WritePCAFile(PDBCdsArray *parray, CdsArray *cdsA, const char *outfile_root) { FILE *pdbfile = NULL; @@ -471,24 +352,24 @@ char covcor_str[16] = "correlation"; /* find largest absolute value in the eigenvector PCA */ - for (i = 0; i < cdsA->algo->pca; ++i) + for (i = 0; i < algo->pca; ++i) { biggest = -DBL_MAX; for (j = 0; j < cvlen; ++j) { /* printf("\n%3d %3d % f", i, j, mat[i][j]); */ - if (biggest < fabs(mat[cdsA->algo->pca - 1 - i][j])) - biggest = fabs(mat[cdsA->algo->pca - 1 - i][j]); + if (biggest < fabs(mat[algo->pca - 1 - i][j])) + biggest = fabs(mat[algo->pca - 1 - i][j]); } /* } */ biggest = 99.99 / biggest; -/* for (i = 0; i < cdsA->algo->pca; ++i) */ +/* for (i = 0; i < algo->pca; ++i) */ /* { */ /* rescale (for rasmol really) so that the largest eigenvector component - is = 99.99, i.e. the largest value allowable in the b-value column + is = 99.99, i.e. the largest value allowable in the b-value column of a PDB file */ - if (cdsA->algo->atoms == 0) + if (algo->atoms == 0) { m = 0; for (j = 0; j < cvlen; ++j) @@ -509,7 +390,7 @@ cdsA->cds[0]->chainID[j] == parray->cds[0]->chainID[m] && cdsA->cds[0]->resSeq[j] == parray->cds[0]->resSeq[m]) { - tempFactor = mat[cdsA->algo->pca - 1 - i][j] * biggest; + tempFactor = mat[algo->pca - 1 - i][j] * biggest; /* printf("\n%4d %4d % f", j, m, tempFactor); */ for (k = 0; k < parray->cnum; ++k) parray->cds[k]->tempFactor[m] = tempFactor; @@ -528,7 +409,7 @@ { for (j = 0; j < cvlen; ++j) { - tempFactor = mat[cdsA->algo->pca - 1 - i][j] * biggest; + tempFactor = mat[algo->pca - 1 - i][j] * biggest; for (k = 0; k < parray->cnum; ++k) parray->cds[k]->tempFactor[j] = tempFactor; @@ -553,88 +434,7 @@ exit(EXIT_FAILURE); } - if (cdsA->algo->cormat == 0) - strncpy(covcor_str, "covariance", 10); - - fprintf(pdbfile, "REMARK ===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-=\n"); - fprintf(pdbfile, "REMARK + File made by Douglas Theobald's THESEUS program\n"); - fprintf(pdbfile, "REMARK + Multiple maximum likelihood superpositioning\n"); - fprintf(pdbfile, "REMARK + Principal component %3d of %s matrix in B-factor column\n", i+1, covcor_str); - fprintf(pdbfile, "REMARK + All B-factors scaled by %12.3f\n", biggest); - fprintf(pdbfile, "REMARK + dtheobald@brandeis.edu\n"); - fprintf(pdbfile, "REMARK =-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===\n"); - - for (j = 0; j < parray->cnum; ++j) - { - fprintf(pdbfile, "MODEL %8d\n", j+1); - PrintPDBCds(pdbfile, parray->cds[j]); - fprintf(pdbfile, "ENDMDL\n"); - } - fprintf(pdbfile, "END\n"); - - fclose(pdbfile); - } - - PCADestroy(cdsA); -} - - -void -WriteModelPCAFile(PDBCdsArray *parray, CdsArray *cdsA, const char *outfile_root) -{ - FILE *pdbfile = NULL; - char pcafile_name[256]; - int i, j, k; - double biggest = -DBL_MAX; - const double **mat = (const double **) cdsA->modpcamat; - const int pvlen = parray->vlen; - const int cnum = cdsA->cnum; - double tempFactor; - char covcor_str[16] = "correlation"; - const int pcanum = cdsA->cnum; - - /* find largest absolute value in the eigenvector PCA */ - for (i = 0; i < pcanum; ++i) - { - biggest = -DBL_MAX; - for (j = 0; j < cnum; ++j) - { - if (biggest < fabs(mat[pcanum - 1 - i][j])) - biggest = fabs(mat[pcanum - 1 - i][j]); - } - - biggest = 99.99 / biggest; - - /* rescale (for rasmol really) so that the largest eigenvector component - is = 99.99, i.e. the largest value allowable in the b-value column - of a PDB file */ - for (j = 0; j < cnum; ++j) - { - tempFactor = mat[pcanum - 1 - i][j] * biggest; - - for (k = 0; k < pvlen; ++k) - parray->cds[j]->tempFactor[k] = tempFactor; - } - - sprintf(pcafile_name, "%s_mod_pca%d.pdb", outfile_root, i+1); - -/* strncpy(pcafile_name, outfile_root, strlen(outfile_root)); */ -/* pcafile_name[strlen(outfile_root)] = '\0'; */ -/* strncat(pcafile_name, "_mod_pca", 8); */ -/* strncat(pcafile_name, itoa(i+1, &numstring[0], 10), 5); */ -/* strcat(pcafile_name, ".pdb"); */ - - pdbfile = fopen(pcafile_name, "w"); - if (pdbfile ==NULL) - { - perror("\n ERROR"); - fprintf(stderr, - "\n ERROR99: could not open file '%s' for writing. \n\n", pcafile_name); - PrintTheseusTag(); - exit(EXIT_FAILURE); - } - - if (cdsA->algo->cormat == 0) + if (algo->cormat == 0) strncpy(covcor_str, "covariance", 10); fprintf(pdbfile, "REMARK ===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-=\n"); @@ -673,7 +473,7 @@ double *vals = cdsA->pcavals; const int cvlen = 3 * cdsA->vlen; const int pvlen = parray->vlen; - PDBCds *pcacds; + PDBCds *pcacds = NULL; char covcor_str[16] = "correlation"; pcacds = PDBCdsInit(); @@ -684,24 +484,24 @@ memset(pcacds->y, 0, pvlen * sizeof(double)); memset(pcacds->z, 0, pvlen * sizeof(double)); - if (cdsA->algo->pca > cdsA->cnum - 1) - pcanum = cdsA->algo->pca = cdsA->cnum - 1; + if (algo->pca > cdsA->cnum - 1) + pcanum = algo->pca = cdsA->cnum - 1; else - pcanum = cdsA->algo->pca; + pcanum = algo->pca; CalcFullCovMat(cdsA); /* Multiply each PCA by the sqrt of the corresponding eigenvalue. If correlation matrix was used, we need to get back into std deviation space, so multiply by the sqrt of the corresponding variance */ - if (cdsA->algo->cormat == 1) + if (algo->cormat) { for (i = 0; i < pcanum; ++i) for (j = 0; j < cvlen; ++j) /* vecs[i][j] *= sqrt(cdsA->FullCovMat[j][j] * vals[i]); */ vecs[i][j] *= sqrt(cdsA->FullCovMat[j][j]); } - else if (cdsA->algo->cormat == 0) + else if (algo->cormat == 0) { for (i = 0; i < pcanum; ++i) for (j = 0; j < cvlen; ++j) @@ -759,7 +559,7 @@ exit(EXIT_FAILURE); } - if (cdsA->algo->cormat == 0) + if (algo->cormat == 0) strncpy(covcor_str, "covariance", 10); fprintf(pdbfile, "REMARK ===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-=\n"); @@ -790,30 +590,30 @@ double *vals = cdsA->pcavals; const int cvlen = 3 * cdsA->vlen; const int pvlen = parray->vlen; - PDBCds *pcacds; + PDBCds *pcacds = NULL; //char numstring[5], tmpstring[5]; pcacds = PDBCdsInit(); PDBCdsAlloc(pcacds, pvlen); PDBCdsCopyAll(pcacds, parray->cds[0]); - if (cdsA->algo->pca > cdsA->cnum - 1) - pcanum = cdsA->algo->pca = cdsA->cnum - 1; + if (algo->pca > cdsA->cnum - 1) + pcanum = algo->pca = cdsA->cnum - 1; else - pcanum = cdsA->algo->pca; + pcanum = algo->pca; CalcFullCovMat(cdsA); /* Multiply each PCA by the sqrt of the corresponding eigenvalue. If correlation matrix was used, we need to get back into std deviation space, so multiply by the sqrt of the corresponding variance */ - if (cdsA->algo->cormat == 1) + if (algo->cormat) { for (i = 0; i < pcanum; ++i) for (j = 0; j < cvlen; ++j) vecs[i][j] *= sqrt(cdsA->FullCovMat[j][j] * vals[i]); } - else if (cdsA->algo->cormat == 0) + else if (algo->cormat == 0) { for (i = 0; i < pcanum; ++i) for (j = 0; j < cvlen; ++j) @@ -834,7 +634,7 @@ for (j = 0; j < cvlen; j += 3) { /* skip inital PDBCds that may have been selected out */ - while (strncmp(cdsA->cds[0]->resName[j/3], parray->cds[0]->resName[m], 3) != 0 || + while (strncmp(cdsA->cds[0]->resName[j/3], parray->cds[0]->resName[m], 3) || cdsA->cds[0]->chainID[j/3] != parray->cds[0]->chainID[m] || cdsA->cds[0]->resSeq[j/3] != parray->cds[0]->resSeq[m]) { diff -Nru theseus-2.0.6/PCAstats.h theseus-3.0.0/PCAstats.h --- theseus-2.0.6/PCAstats.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/PCAstats.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -33,15 +33,9 @@ Calc3NPCA(CdsArray *cdsA); void -CalcStructPCA(CdsArray *cdsA); - -void WritePCAFile(PDBCdsArray *parray, CdsArray *cdsA, const char *outfile_root); void -WriteModelPCAFile(PDBCdsArray *parray, CdsArray *cdsA, const char *outfile_root); - -void WritePCAMorphFile(PDBCdsArray *parray, CdsArray *cdsA, const char *outfile_root); void diff -Nru theseus-2.0.6/PDBCds.h theseus-3.0.0/PDBCds.h --- theseus-2.0.6/PDBCds.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/PDBCds.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -26,14 +26,18 @@ #ifndef PDBCOORDS_SEEN #define PDBCOORDS_SEEN -#include "DLTmath.h" #include "msa.h" #include "Cds.h" +typedef struct Seq2PDB Seq2PDB; +typedef struct PDBCds PDBCds; +typedef struct PDBCdsArray PDBCdsArray; + + /* A structure for mapping sequence names in an alignment to their corresponding pdbfiles */ -typedef struct Seq_2_PDB +struct Seq2PDB { int seqnum; char **pdbfile_name; @@ -45,17 +49,20 @@ int *map; MSA *msa; int *singletons; -} Seq2PDB; +}; /* PDBCds is mostly for reading/writing PDB files */ -typedef struct PDB_Cds +struct PDBCds { char filename[FILENAME_MAX]; int model; + + int vlen; /* number of coordinates */ + double **matrix; double *translation; - int vlen; /* number of coordinates */ + double scale; /* PDB ATOM/HETATM fields */ char **record; /* ATOM or HETATM */ @@ -77,6 +84,8 @@ char **element; char **charge; + int *nu; + /* not to be accessed - for space only */ char *record_space; char *name_space; @@ -84,20 +93,20 @@ char *segID_space; char *element_space; char *charge_space; -} PDBCds; +}; -typedef struct PDB_Cds_Array +struct PDBCdsArray { - PDBCds **cds; /* pointer to an array of cnum pointers to Cds */ - PDBCds *avecds; /* average Cds of all in CdsArray */ - struct Cds_Array *cdsA; /* associated CdsArray - do not free */ - struct Cds_Array *scratchA; /* do not free */ - int vlen; /* number of coordinates */ - int cnum; /* number of Cds in array */ - int *upper, *lower; - int range_num; - Seq2PDB *seq2pdb; -} PDBCdsArray; + PDBCds **cds; /* pointer to an array of cnum pointers to Cds */ + PDBCds *avecds; /* average Cds of all in CdsArray */ + struct CdsArray *cdsA; /* associated CdsArray - do not free */ + struct CdsArray *scratchA; /* do not free */ + int vlen; /* number of coordinates */ + int cnum; /* number of Cds in array */ + int *upper, *lower; + int range_num; + Seq2PDB *seq2pdb; +}; #endif Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/._pdbIO.c and /tmp/g2bOMTRwaC/theseus-3.0.0/._pdbIO.c differ diff -Nru theseus-2.0.6/pdbIO.c theseus-3.0.0/pdbIO.c --- theseus-2.0.6/pdbIO.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/pdbIO.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -41,6 +41,47 @@ #include "DLTmath.h" +static int +atom_selxn(char *name, int mode, char *useratoms); + +static int +range_selxn(int chainID, char *chains, int resSeq, int *lower, int *upper, int range_num); + +static void +XPLORcorrections(PDBCds *pdbcds, int record); + +static void +ScanPDBLine(char *buff, PDBCds *cp, int j, int amber); + +static int +ScanTPSLine(char *buff, PDBCds *cp, int j); + +static int +GetSlxnLen(PDBCdsArray *pdbA, const int crds, Algorithm *algo, char *chains, + int *lower, int *upper, const int range_num, int *selection_index, int rev); + +static void +PrintCds2File(FILE *pdbfile, Cds *cds); + +static void +PrintNuPDBCds(FILE *pdbfile, PDBCds *pdbcds); + +static int +IsNameCAorP(char *name); + +static void +PrintTPSCds(FILE *pdbfile, PDBCds *pdbcds); + +static void +PrintTheseusModelHeader(FILE *pdbfile); + +static void +PrintModelFileStats(FILE *pdbfile, PDBCdsArray *pdbA, Algorithm *algo, Statistics *stats); + +static void +WritePDBCdsFile(PDBCds *cds, char *file_name); + + static const char atoms0[] = "CA :C1*:C1'"; static const char atoms1[] = "N :C :O :CA :P :C1*:C1':O3*:O3':O5*:O5':C3*:C3':C4*:C4':C5*:C5'"; static const char atoms3[] = "CA :CB :P :C1*:C1'"; @@ -197,12 +238,13 @@ 31 - 38 Real(8.3) x Orthogonal coordinates for X in Angstroms. 39 - 46 Real(8.3) y Orthogonal coordinates for Y in Angstroms. 47 - 54 Real(8.3) z Orthogonal coordinates for Z in Angstroms. -55 - 60 Real(6.2) occupancy Occupancy. +55 - 60 Real(6.2) occupancy Nuupancy. 61 - 66 Real(6.2) tempFactor Temperature factor. 77 - 78 LString(2) element Element symbol, right-justified. 79 - 80 LString(2) charge Charge on the atom. */ + static void ScanPDBLine(char *buff, PDBCds *cp, int j, int amber) { @@ -237,42 +279,63 @@ sscanf(&buff[30], "%8lf", &cp->x[j]); /* 11 30-37 8 Real(8.3) x Orthogonal coordinates for X */ sscanf(&buff[38], "%8lf", &cp->y[j]); /* 12 38-45 8 Real(8.3) y Orthogonal coordinates for Y */ sscanf(&buff[46], "%8lf", &cp->z[j]); /* 13 46-53 8 Real(8.3) z Orthogonal coordinates for Z */ - sscanf(&buff[54], "%6lf", &cp->occupancy[j]); /* 14 54-59 6 Real(6.2) occupancy Occupancy. */ + sscanf(&buff[54], "%6lf", &cp->occupancy[j]); /* 14 54-59 6 Real(6.2) occupancy Nuupancy. */ sscanf(&buff[60], "%6lf", &cp->tempFactor[j]);/* 15 60-65 6 Real(6.2) tempFactor Temperature factor. */ sscanf(&buff[72], "%4c", cp->segID[j]); /* 16 72-75 4 LString(4) segID Segment identifier, left-just. */ sscanf(&buff[76], "%2c", cp->element[j]); /* 17 76-77 2 LString(2) element Element symbol, right-just. */ sscanf(&buff[78], "%2c", cp->charge[j]); /* 18 78-79 2 LString(2) charge Charge on the atom. */ } else /* standard PDB format http://www.rcsb.org/pdb/file_formats/pdb/pdbguide2.2/guide2.2_frame.html */ +// { +// /* 1 2 3 4 5 6 7 8 +// 012345678901234567890123456789012345678901234567890123456789012345678901234567890 +// ATOM 145 N VAL A 25 32.433 16.336 57.540 1.00 11.92 A1 N +// ATOM 146 CA VAL A 25 31.132 16.439 58.160 1.00 11.85 A1 C +// ATOM 15088 1HG1 ILE 28 -3.430 4.303 -6.057 1.00 0.00 H +// ATOM 7580 HD23 LEU 1724 111.285 90.889 -61.535 +// */ +// /* # COLs LEN DATA TYPE FIELD DEFINITION */ +// sscanf(&buff[0], "%6c", cp->record[j]); /* 1 0-5 6 Record name "ATOM " */ +// sscanf(&buff[6], "%5u", &cp->serial[j]); /* 2 6-10 5 Integer serial Atom serial number. */ +// sscanf(&buff[12], "%1c", &cp->Hnum[j]); /* 3 12 1 hydrogen number, usu (!official). */ +// sscanf(&buff[13], "%3c", cp->name[j]); /* 4 13-15 4(3) Atom name Atom name. */ +// sscanf(&buff[16], "%1c", &cp->altLoc[j]); /* 5 16 1 Character altLoc Alternate location indicator. */ +// sscanf(&buff[17], "%3c", cp->resName[j]); /* 6 17-19 3 Residue name resName Residue name. */ +// sscanf(&buff[20], "%1c", &cp->xchainID[j]); /* 7 20 1 Character xchainID Chain identifier (!official). */ +// sscanf(&buff[21], "%1c", &cp->chainID[j]); /* 8 21 1 Character chainID Chain identifier. */ +// sscanf(&buff[22], "%4d", &cp->resSeq[j]); /* 9 22-25 4 Integer resSeq Residue sequence number. */ +// sscanf(&buff[26], "%1c", &cp->iCode[j]); /* 10 26 1 AChar iCode Code for insertion of residues. */ +// sscanf(&buff[30], "%8lf", &cp->x[j]); /* 11 30-37 8 Real(8.3) x Orthogonal coordinates for X */ +// sscanf(&buff[38], "%8lf", &cp->y[j]); /* 12 38-45 8 Real(8.3) y Orthogonal coordinates for Y */ +// sscanf(&buff[46], "%8lf", &cp->z[j]); /* 13 46-53 8 Real(8.3) z Orthogonal coordinates for Z */ +// sscanf(&buff[54], "%6lf", &cp->occupancy[j]); /* 14 54-59 6 Real(6.2) occupancy Nuupancy. */ +// sscanf(&buff[60], "%6lf", &cp->tempFactor[j]);/* 15 60-65 6 Real(6.2) tempFactor Temperature factor. */ +// sscanf(&buff[72], "%4c", cp->segID[j]); /* 16 72-75 4 LString(4) segID Segment identifier, left-just. */ +// sscanf(&buff[76], "%2c", cp->element[j]); /* 17 76-77 2 LString(2) element Element symbol, right-just. */ +// sscanf(&buff[78], "%2c", cp->charge[j]); /* 18 78-79 2 LString(2) charge Charge on the atom. */ +// +// //printf("%4d:%4d %d %d \'%s\'\n", i, j, cp->serial[j], cp->Hnum[j], cp->name[j]); +// } { - /* 1 2 3 4 5 6 7 8 - 012345678901234567890123456789012345678901234567890123456789012345678901234567890 - ATOM 145 N VAL A 25 32.433 16.336 57.540 1.00 11.92 A1 N - ATOM 146 CA VAL A 25 31.132 16.439 58.160 1.00 11.85 A1 C - ATOM 15088 1HG1 ILE 28 -3.430 4.303 -6.057 1.00 0.00 H - ATOM 7580 HD23 LEU 1724 111.285 90.889 -61.535 - */ - /* # COLs LEN DATA TYPE FIELD DEFINITION */ - sscanf(&buff[0], "%6c", cp->record[j]); /* 1 0-5 6 Record name "ATOM " */ - sscanf(&buff[6], "%5u", &cp->serial[j]); /* 2 6-10 5 Integer serial Atom serial number. */ - sscanf(&buff[12], "%1c", &cp->Hnum[j]); /* 3 12 1 hydrogen number, usu (!official). */ - sscanf(&buff[13], "%3c", cp->name[j]); /* 4 13-15 4(3) Atom name Atom name. */ - sscanf(&buff[16], "%1c", &cp->altLoc[j]); /* 5 16 1 Character altLoc Alternate location indicator. */ - sscanf(&buff[17], "%3c", cp->resName[j]); /* 6 17-19 3 Residue name resName Residue name. */ - sscanf(&buff[20], "%1c", &cp->xchainID[j]); /* 7 20 1 Character xchainID Chain identifier (!official). */ - sscanf(&buff[21], "%1c", &cp->chainID[j]); /* 8 21 1 Character chainID Chain identifier. */ - sscanf(&buff[22], "%4d", &cp->resSeq[j]); /* 9 22-25 4 Integer resSeq Residue sequence number. */ - sscanf(&buff[26], "%1c", &cp->iCode[j]); /* 10 26 1 AChar iCode Code for insertion of residues. */ - sscanf(&buff[30], "%8lf", &cp->x[j]); /* 11 30-37 8 Real(8.3) x Orthogonal coordinates for X */ - sscanf(&buff[38], "%8lf", &cp->y[j]); /* 12 38-45 8 Real(8.3) y Orthogonal coordinates for Y */ - sscanf(&buff[46], "%8lf", &cp->z[j]); /* 13 46-53 8 Real(8.3) z Orthogonal coordinates for Z */ - sscanf(&buff[54], "%6lf", &cp->occupancy[j]); /* 14 54-59 6 Real(6.2) occupancy Occupancy. */ - sscanf(&buff[60], "%6lf", &cp->tempFactor[j]);/* 15 60-65 6 Real(6.2) tempFactor Temperature factor. */ - sscanf(&buff[72], "%4c", cp->segID[j]); /* 16 72-75 4 LString(4) segID Segment identifier, left-just. */ - sscanf(&buff[76], "%2c", cp->element[j]); /* 17 76-77 2 LString(2) element Element symbol, right-just. */ - sscanf(&buff[78], "%2c", cp->charge[j]); /* 18 78-79 2 LString(2) charge Charge on the atom. */ - - //printf("%4d:%4d %d %d \'%s\'\n", i, j, cp->serial[j], cp->Hnum[j], cp->name[j]); + sscanf(buff, "%6c%5u%*1c%1c%3c%1c%3c%1c%1c%4d%1c%*3c%8lf%8lf%8lf%6lf%6lf%*6c%4c%2c%2c", + cp->record[j], /* 1 0-5 6 Record name "ATOM " */ + &cp->serial[j], /* 2 6-10 5 Integer serial Atom serial number. */ + &cp->Hnum[j], /* 3 12 1 hydrogen number, usu (!official). */ + cp->name[j], /* 4 13-15 4(3) Atom name Atom name. */ + &cp->altLoc[j], /* 5 16 1 Character altLoc Alternate location indicator. */ + cp->resName[j], /* 6 17-19 3 Residue name resName Residue name. */ + &cp->xchainID[j], /* 7 20 1 Character xchainID Chain identifier (!official). */ + &cp->chainID[j], /* 8 21 1 Character chainID Chain identifier. */ + &cp->resSeq[j], /* 9 22-25 4 Integer resSeq Residue sequence number. */ + &cp->iCode[j], /* 10 26 1 AChar iCode Code for residue insertion. */ + &cp->x[j], /* 11 30-37 8 Real(8.3) x Orthogonal coordinates for X */ + &cp->y[j], /* 12 38-45 8 Real(8.3) y Orthogonal coordinates for Y */ + &cp->z[j], /* 13 46-53 8 Real(8.3) z Orthogonal coordinates for Z */ + &cp->occupancy[j], /* 14 54-59 6 Real(6.2) occupancy Nuupancy. */ + &cp->tempFactor[j],/* 15 60-65 6 Real(6.2) tempFactor Temperature factor. */ + cp->segID[j], /* 16 72-75 4 LString(4) segID Segment identifier, left-just. */ + cp->element[j], /* 17 76-77 2 LString(2) element Element symbol, right-just. */ + cp->charge[j]); /* 18 78-79 2 LString(2) charge Charge on the atom. */ } } @@ -281,14 +344,14 @@ int ReadPDBCds(char *pdbfile_name, PDBCdsArray *pdbA, int cds_i, int modelnum, int amber, int fix_atom_names) { - //char buff[99]; int bufflen = 256; char *buff = NULL; int i, j, pos; FILE *pdbfile = NULL; char pdbdir[FILENAME_MAX], dirpdbfile_name[FILENAME_MAX]; - PDBCds **cds = pdbA->cds; - PDBCds *cp = NULL; + PDBCds **cds = pdbA->cds; + PDBCds *cp = NULL; + buff = calloc(bufflen, sizeof(char)); if (buff == NULL) @@ -307,7 +370,6 @@ if (getenv("PDBDIR") != NULL) { strncpy(pdbdir, getenv("PDBDIR"), FILENAME_MAX); - strncpy(pdbdir, getenv("PDBDIR"), FILENAME_MAX); strncpy(dirpdbfile_name, pdbdir, FILENAME_MAX - 1); strncat(dirpdbfile_name, pdbfile_name, FILENAME_MAX - strlen(pdbfile_name) - 1); @@ -335,7 +397,7 @@ { if (strncmp(buff, "MODEL", 5) == 0 || strncmp(buff, "REMARK FRAME:", 13) == 0 || - strncmp(buff, "REMARK OPLS-AA", 15) == 0) // DLTPU + strncmp(buff, "REMARK OPLS", 12) == 0) { cp->model = i; break; @@ -361,11 +423,14 @@ strncmp(buff, "HETATM", 6) == 0) { ScanPDBLine(buff, cp, j, amber); - if (fix_atom_names == 1) + if (fix_atom_names) XPLORcorrections(cp, j); if (isfinite(cp->occupancy[j]) == 0) + { cp->occupancy[j] = 1.0; + cp->nu[j] = 1; + } ++j; } @@ -375,363 +440,349 @@ //PrintPDBCds(stdout, pdbA->cds[0]); free(buff); fclose(pdbfile); + return(EXIT_SUCCESS); } -// CdsArray -// *GetDefaultCdsSel(CdsArray *baseA, PDBCdsArray *pdbA) -// { -// int vlen, slxnlen = 0; -// int *selection_index = NULL; /* array of ints corresponding to selected atoms */ -// int i, lower, upper, pdbi; -// -// if (pdbA->cnum < 2) -// { -// fprintf(stderr, "\n ERROR23: Number of cds in pdbfiles is less than 2. \n"); -// PrintTheseusTag(); -// exit(EXIT_FAILURE); -// } -// -// CdsArrayAllocNum(baseA, pdbA->cnum); -// -// for (pdbi = 0; pdbi < pdbA->cnum; ++pdbi) -// { -// lower = 0; -// upper = vlen = pdbA->cds[pdbi]->vlen; -// -// if (upper - 4 < lower) -// { -// fprintf(stderr, "\n ERROR: upper residue bound must be at least 3 greater than the lower bound. \n"); -// PrintTheseusTag(); -// exit(EXIT_FAILURE); -// } -// -// selection_index = (int *) malloc (vlen * sizeof(int)); -// if (selection_index == NULL) -// { -// perror("\n\n ERROR"); -// fprintf(stderr, "\n ERROR: could not allocate memory for selection_index in GetCdsSelection(). \n"); -// PrintTheseusTag(); -// exit(EXIT_FAILURE); -// } -// -// slxnlen = 0; -// for (i = 0; i < vlen; ++i) -// { -// if ( strncmp(pdbA->cds[pdbi]->record[i], "ATOM ", 6) == 0 -// && ( pdbA->cds[pdbi]->altLoc[i] == ' ' -// || pdbA->cds[pdbi]->altLoc[i] == 'A' -// || pdbA->cds[pdbi]->altLoc[i] == '1') -// /* && strncmp(pdbA->cds[pdbi]->resName[i], "GLY", 3) != 0 */ /* DLT - skip glycines for now */ -// && atom_selxn(pdbA->cds[pdbi]->name[i], baseA->algo->atoms, baseA->algo->atomslxn /* DLT - use '3' to get CBs */) == 1) -// { -// selection_index[slxnlen] = i; -// ++slxnlen; -// } -// } -// -// baseA->cds[pdbi] = CdsInit(); -// CdsAlloc(baseA->cds[pdbi], slxnlen); -// -// baseA->cds[pdbi]->model = pdbA->cds[pdbi]->model; -// strcpy(baseA->cds[pdbi]->filename, pdbA->cds[pdbi]->filename); -// -// for (i = 0; i < slxnlen; ++i) -// { -// strncpy(baseA->cds[pdbi]->resName[i], pdbA->cds[pdbi]->resName[selection_index[i]], 4); -// baseA->cds[pdbi]->resSeq[i] = pdbA->cds[pdbi]->resSeq[selection_index[i]]; -// baseA->cds[pdbi]->x[i] = pdbA->cds[pdbi]->x[selection_index[i]]; -// baseA->cds[pdbi]->y[i] = pdbA->cds[pdbi]->y[selection_index[i]]; -// baseA->cds[pdbi]->z[i] = pdbA->cds[pdbi]->z[selection_index[i]]; -// baseA->cds[pdbi]->o[i] = pdbA->cds[pdbi]->occupancy[selection_index[i]]; -// baseA->cds[pdbi]->b[i] = pdbA->cds[pdbi]->tempFactor[selection_index[i]]; -// } -// -// free(selection_index); -// } -// -// baseA->vlen = slxnlen; -// baseA->avecds = CdsInit(); -// CdsAlloc(baseA->avecds, slxnlen); -// /*PrintCds(baseA->cds[0]); -// PrintCds(baseA->cds[1]);*/ -// -// return (baseA); -// } - - -/* static int */ -/* isdelim(char character, char *delims) */ -/* { */ -/* int i, len; */ -/* */ -/* len = strlen(delims); */ -/* */ -/* for (i = 0; i < len; ++i) */ -/* { */ -/* if (character == delims[i]) */ -/* return(1); */ -/* } */ -/* return (0); */ -/* } */ - - static int -GetSlxnLen(PDBCdsArray *pdbA, const int crds, Algorithm *algo, char *chains, - int *lower, int *upper, const int range_num, int *selection_index, int rev) +ScanTPSLine(char *buff, PDBCds *cp, int j) { - int i, vlen; +// char *endline = NULL; + int numret; - vlen = 0; - for (i = 0; i < pdbA->cds[crds]->vlen; ++i) - { -/* printf("\n%s, %s, %c, %c, %d", */ -/* pdbA->cds[crds]->record[i], */ -/* pdbA->cds[crds]->name[i], */ -/* pdbA->cds[crds]->altLoc[i], */ -/* pdbA->cds[crds]->chainID[i], */ -/* pdbA->cds[crds]->resSeq[i]); */ -/* fflush(NULL); */ - if ( ( strncmp(pdbA->cds[crds]->record[i], "ATOM ", 6) == 0 - || strncmp(pdbA->cds[crds]->record[i], "HETATM", 6) == 0) - && atom_selxn(pdbA->cds[crds]->name[i], algo->atoms, algo->atomslxn) == 1 - && (pdbA->cds[crds]->altLoc[i] == ' ' || pdbA->cds[crds]->altLoc[i] == 'A') - && range_selxn(pdbA->cds[crds]->chainID[i], chains, pdbA->cds[crds]->resSeq[i], lower, upper, range_num) == rev) /* DLT debug FIX THIS!!!! */ - { - if (selection_index != NULL) - selection_index[vlen] = i; - ++vlen; - } - } +// endline = strpbrk(&buff[54], "\r\n\f"); +// if (endline != NULL) /* kill the newline, if there */ +// *endline = '\0'; + + strncpy(cp->record[j], "ATOM ", 6); + cp->serial[j] = j; + cp->Hnum[j] = ' '; + strncpy(cp->name[j], "CA ", 3); + cp->altLoc[j] = ' '; + strncpy(cp->resName[j], "LM ", 3); + cp->xchainID[j] = ' '; + cp->chainID[j] = 'A'; + cp->resSeq[j] = j; + cp->iCode[j] = ' '; + numret = sscanf(buff, "%lf%lf%lf", &cp->x[j], &cp->y[j], &cp->z[j]); + cp->occupancy[j] = 1.0; + cp->tempFactor[j] = 1.0; + strncpy(cp->segID[j], " ", 4); + strncpy(cp->element[j], " ", 2); + strncpy(cp->charge[j], " ", 2); + cp->nu[j] = 1; - return(vlen); + return(numret); } -void -GetCdsSelection(CdsArray *baseA, PDBCdsArray *pdbA) +/* reads coordinates for each model in a Rohlf-style tps morphometrics file */ +int +ReadTPSCds(char *pdbfile_name, PDBCdsArray *pdbA, int cds_i, int modelnum) { - int vlen, last_len; - int *selection_index = NULL; /* array of ints corresponding to selected atoms */ - int i, j; - char *chains = NULL; /* array of chars holding chain IDs of ranges in the selection criteria */ - int *upper = NULL, *lower = NULL; /* an array of ints holding the upper and lower range bounds */ - int selection_len; - char **endptr = NULL; - int range_num = 1; /* number of residue ranges specified in selection */ - char **selections = NULL; /* an array of range_num strings to hold each range selection */ - char delims[] = ":"; + int bufflen = 256; + char *buff = NULL; + int i, j, pos, numret; + FILE *pdbfile = NULL; + char pdbdir[FILENAME_MAX], dirpdbfile_name[FILENAME_MAX]; + PDBCds **cds = pdbA->cds; + PDBCds *cp = NULL; + - if (baseA->algo->selection != NULL) + buff = calloc(bufflen, sizeof(char)); + if (buff == NULL) { - selection_len = strlen(baseA->algo->selection); + perror("\n\n ERROR"); + fprintf(stderr, "\n\n ERROR_071: could not allocate memory for buff. \n"); + PrintTheseusTag(); + exit(EXIT_FAILURE); + } - for(i = 0; i < selection_len; ++i) + buff[bufflen-1] = '\0'; + + pdbfile = fopen(pdbfile_name, "r"); + if (pdbfile == NULL) + { + if (getenv("PDBDIR") != NULL) { - if (baseA->algo->selection[i] == ':') - ++range_num; + strncpy(pdbdir, getenv("PDBDIR"), FILENAME_MAX); + strncpy(dirpdbfile_name, pdbdir, FILENAME_MAX - 1); + strncat(dirpdbfile_name, pdbfile_name, FILENAME_MAX - strlen(pdbfile_name) - 1); + + pdbfile = fopen(dirpdbfile_name, "r"); } - selections = (char **) calloc(range_num, sizeof(char *)); - lower = (int *) calloc(range_num, sizeof(int)); - upper = (int *) calloc(range_num, sizeof(int)); - chains = (char *) calloc(range_num, sizeof(char)); - if (selections == NULL || lower == NULL || upper == NULL || chains == NULL) + if (pdbfile == NULL) { - perror("\n ERROR"); - fprintf(stderr, "\n ERROR1000: could not allocate memory for selections in GetCdsSelection(). \n"); + perror("\n\n ERROR"); + fprintf(stderr, "\n\n ERROR71: file \"%s\" not found. \n", pdbfile_name); PrintTheseusTag(); exit(EXIT_FAILURE); } + } - for (i = 0; i < range_num; ++i) + for (i = 0; i < modelnum; ++i) + { + pos = cds_i + i; + cp = cds[pos]; + + /* move to next model */ + if (modelnum > 1) { - selections[i] = (char *) calloc(128, sizeof(char)); - if (selections[i] == NULL) + while (fgets(buff, bufflen, pdbfile) != NULL) { - perror("\n ERROR"); - fprintf(stderr, "\n ERROR1001: could not allocate memory for selections[] in GetCdsSelection(). \n"); - PrintTheseusTag(); - exit(EXIT_FAILURE); + if (strncmp(buff, "LM3", 3) == 0) + { + cp->model = i; + break; + } } } - /* if the user specified XPLOR/CNS-style atoms, translate them to standard PDB format (DNA primes should be "*", not "'") */ - for (i = 0; i < strlen(baseA->algo->selection); ++i) - if (baseA->algo->selection[i] == '\'') - baseA->algo->selection[i] = '*'; - - /* copy each range selection string into the 'selections[]' array */ - mystrncpy(selections[0], strtok(baseA->algo->selection, delims), 127); - for (i = 1; i < range_num; ++i) - mystrncpy(selections[i], strtok(NULL, delims), 127); - -/* for (i = 0; i < range_num; ++i) */ -/* printf"\n selections[%d] = %s", i, selections[i]); */ + // printf("i=%d\n", i); + // fflush(NULL); - for (j = 0; j < range_num; ++j) + /* read in the model's cds */ + j = 0; + while (fgets(buff, bufflen, pdbfile)) { - /* parse residue number range */ - selection_len = strlen(selections[j]); - - i = 0; - while(isspace(selections[j][i]) && i < selection_len) - ++i; - - if (isalpha(selections[j][i])) + //printf("BUFF[%d]:%s\n", j, buff);fflush(NULL); + if (j >= cp->vlen || + (strncmp(buff, "IMAGE", 5) == 0) || + (strncmp(buff, "ID", 2) == 0) || + (strncmp(buff, "SCALE", 5) == 0)) { - chains[j] = toupper(selections[j][i]); - ++i; + //printf("INNERBUFF:%s\n", buff);fflush(NULL); + if (strncmp(buff, "ID", 2) == 0) + { + sscanf(buff, "ID=%s", cp->filename); + //printf("ID=%s\n", cp->filename);fflush(NULL); + } + + break; } else - chains[j] = 0; - - if (isalpha(selections[j][i])) { - fprintf(stderr, "\n ERROR1002: incorrect format for chainID selection (too many characters). \n"); - Usage(0); - exit(EXIT_FAILURE); + numret = ScanTPSLine(buff, cp, j); + //printf("LM %4d [%4d]: %16f %16f %16f\n", j, numret, cp->x[j], cp->y[j], cp->z[j]); + + if (numret == 3) + ++j; } + } - if (isdigit(selections[j][i])) - { - lower[j] = (int) strtol(&selections[j][i], endptr, 10); + //mystrncpy(cp->filename, pdbfile_name, FILENAME_MAX - 1); + } - while(selections[j][i] != '-' && i < selection_len) - ++i; + //PrintPDBCds(stdout, pdbA->cds[0]); - ++i; - while(isspace(selections[j][i]) && i < selection_len) - ++i; + free(buff); + fclose(pdbfile); - upper[j] = (int) strtol(&selections[j][i], endptr, 10); - } - else - { - lower[j] = 0; - upper[j] = pdbA->vlen - 1; - } - } - } - else - { - range_num = 1; - selections = (char **) calloc(1, sizeof(char *)); - lower = (int *) calloc(1, sizeof(int)); - upper = (int *) calloc(1, sizeof(int)); - chains = (char *) calloc(1, sizeof(char)); - selections[0] = (char *) calloc(128, sizeof(char)); - if (selections == NULL || lower == NULL || upper == NULL || chains == NULL || selections[0] == NULL) - { - perror("\n ERROR"); - fprintf(stderr, "\n ERROR1003: could not allocate memory for selections in GetCdsSelection(). \n"); - PrintTheseusTag(); - exit(EXIT_FAILURE); - } + return(EXIT_SUCCESS); +} - lower[0] = -INT_MAX; - upper[0] = INT_MAX; - } -/* if (upper[0] - 4 < lower[0]) */ -/* { */ -/* fprintf(stderr, "\n ERROR876: upper residue bound must be at least 3 greater than the lower bound. %d %d \n", */ -/* upper[0], lower[0]); */ -/* PrintTheseusTag(); */ -/* exit(EXIT_FAILURE); */ -/* } */ +PDBCdsArray +*GetTPSCds(char **argv_array, int narguments) +{ + int i, j, k; + int *models_per_tps = NULL; + int *len_array = NULL; + int cnum, models, vlen, last_vlen; + char tpsfile_name[FILENAME_MAX], pdbdir[FILENAME_MAX], + dirtpsfile_name[FILENAME_MAX]; + char buff[256]; + FILE *tpsfile = NULL; + PDBCdsArray *pdbA = NULL; - selection_index = (int *) calloc (pdbA->vlen, sizeof(int)); - if (selection_index == NULL) + if (getenv("PDBDIR") != NULL) + strncpy(pdbdir, getenv("PDBDIR"), FILENAME_MAX); + + vlen = last_vlen = 0; + + models_per_tps = (int *) calloc(narguments, sizeof(int)); + len_array = (int *) calloc(narguments, sizeof(int)); + if (models_per_tps == NULL || len_array == NULL) { perror("\n ERROR"); - fprintf(stderr, "\n ERROR1004: could not allocate memory for selection_index in GetCdsSelection(). \n"); + fprintf(stderr, + "\n ERROR1: could not allocate memory for 'models_per_tps' or 'len_array' in GetTPSCds(). \n"); PrintTheseusTag(); exit(EXIT_FAILURE); } - /* First count the number of selected atoms for each structure, and make sure they are all the same */ - last_len = vlen = 0; - for (i = 0; i < pdbA->cnum; ++i) + /* count models and atoms, verify consistency and allocate */ + cnum = 0; + for (i = 0; i < narguments; ++i) { -// printf("\n%s, %s, %c, %c, %d", -// pdbA->cds[i]->record[i], -// pdbA->cds[i]->name[i], -// pdbA->cds[i]->altLoc[i], -// pdbA->cds[i]->chainID[i], -// pdbA->cds[i]->resSeq[i]); + strncpy(tpsfile_name, argv_array[i], 255); + + tpsfile = fopen(tpsfile_name, "r"); + if (tpsfile == NULL) + { + strncpy(dirtpsfile_name, pdbdir, FILENAME_MAX - 1); + strncat(dirtpsfile_name, tpsfile_name, FILENAME_MAX - strlen(tpsfile_name) - 1); + + tpsfile = fopen(dirtpsfile_name, "r"); + + if (tpsfile == NULL) + { + perror("\n ERROR"); + fprintf(stderr, "\n ERROR73: file \"%s\" not found. \n", tpsfile_name); + PrintTheseusTag(); + exit(EXIT_FAILURE); + } + } + + models = 0; + last_vlen = 0; + while (fgets(buff, (int) sizeof(buff), tpsfile) != NULL) + { + if (strncmp(buff, "LM3", 3) == 0) // DLT + { + sscanf(buff, "LM3=%d", &vlen); + +// printf("\nvlen=%d\n", vlen); +// fflush(NULL); + + if ((models > 0) && (vlen != last_vlen)) + { + fprintf(stderr, + "\n ERROR1005: cds #%d in \"%s\" has a different landmark number (%d) than the preceding cds (%d). \n", + models + 1, tpsfile_name, vlen, last_vlen); + PrintTheseusTag(); + exit(EXIT_FAILURE); + } + else if (vlen < 3) + { + fprintf(stderr, + "\n ERROR1006: cds #%d in \"%s\" has no selected landmarks. \n", + models + 1, tpsfile_name); + PrintTheseusTag(); + exit(EXIT_FAILURE); + } + else + { + last_vlen = vlen; + } + + ++models; + } + } + +// printf("\nmodels=%d\n",models); // fflush(NULL); - last_len = vlen; - if (baseA->algo->revsel == 0) - vlen = GetSlxnLen(pdbA, i, baseA->algo, chains, lower, upper, range_num, NULL, 1); + + if (models) + { + models_per_tps[i] = models; + cnum += models; + } else - vlen = GetSlxnLen(pdbA, i, baseA->algo, chains, lower, upper, range_num, NULL, 0); - - if (i > 0 && vlen != last_len) { - fprintf(stderr, - "\n ERROR1: length of selection is (%d) different from preceding cds (%d) in GetCdsSelection() \n", vlen, i+1); - PrintTheseusTag(); - exit(EXIT_FAILURE); + models_per_tps[i] = 1; + ++cnum; + vlen = 0; + rewind(tpsfile); + + while (fgets(buff, sizeof(buff), tpsfile) != NULL) + { + if (strncmp(buff, "LM3", 3) == 0) // DLT + { + sscanf(buff, "LM3=%d", &vlen); + +// printf("\nvlen=%d\n", vlen); +// fflush(NULL); + } + else if ((strncmp(buff, "IMAGE", 5) == 0) || + (strncmp(buff, "ID", 2) == 0) || + (strncmp(buff, "SCALE", 5) == 0)) + { + break; + } + } } + fclose(tpsfile); + if (vlen < 3) { - fprintf(stderr, - "\n ERROR3: 'vlen' is too small (%d) in GetCdsSelection(); not enough atoms selected in cds %d. \n", vlen, i+1); + fprintf(stderr, "\n ERROR1007: 'vlen' is too small (%d) in GetTPSCds()", vlen); + fprintf(stderr, "\n Too few landmarks read. \n"); PrintTheseusTag(); exit(EXIT_FAILURE); } + + len_array[i] = vlen; +// printf("\n vlen[%d] = %d\n", i, vlen); } - /* Now allocate based on this length */ - /* baseA = CdsArrayInit(); */ /* NB!!: we don't need to initialize, since the pointer was passed above */ - CdsArrayAlloc(baseA, pdbA->cnum, vlen); - CdsArraySetup(baseA); + pdbA = PDBCdsArrayInit(); + PDBCdsArrayAllocNum(pdbA, cnum); - /* Now get the selections again and read them in this time */ - for (j = 0; j < pdbA->cnum; ++j) + k = 0; + for (i = 0; i < narguments; ++i) { - if (baseA->algo->revsel == 0) - vlen = GetSlxnLen(pdbA, j, baseA->algo, chains, lower, upper, range_num, selection_index, 1); - else - vlen = GetSlxnLen(pdbA, j, baseA->algo, chains, lower, upper, range_num, selection_index, 0); - - baseA->cds[j]->model = pdbA->cds[j]->model; - strncpy(baseA->cds[j]->filename, pdbA->cds[j]->filename, FILENAME_MAX - 1); - - for (i = 0; i < baseA->vlen; ++i) + for (j = 0; j < models_per_tps[i]; ++j) { - strncpy(baseA->cds[j]->resName[i], pdbA->cds[j]->resName[selection_index[i]], 3); - baseA->cds[j]->chainID[i] = pdbA->cds[j]->chainID[selection_index[i]]; - baseA->cds[j]->resSeq[i] = pdbA->cds[j]->resSeq[selection_index[i]]; - baseA->cds[j]->x[i] = pdbA->cds[j]->x[selection_index[i]]; - baseA->cds[j]->y[i] = pdbA->cds[j]->y[selection_index[i]]; - baseA->cds[j]->z[i] = pdbA->cds[j]->z[selection_index[i]]; - baseA->cds[j]->o[i] = 1.0;/* pdbA->cds[j]->occupancy[selection_index[i]]; */ /* DLT debug - shouldn't need to do this, I think prob is in SuperPose() */ - baseA->cds[j]->b[i] = pdbA->cds[j]->tempFactor[selection_index[i]]; + PDBCdsAlloc(pdbA->cds[k], len_array[i]); + ++k; } } -/* PrintCds(baseA->cds[0]); */ -/* PrintCds(baseA->cds[1]); */ + /* read in all cds for all pdb files */ + for (i = 0, j = 0; i < narguments && j < cnum; ++i) + { + strncpy(tpsfile_name, argv_array[i], FILENAME_MAX - 1); + ReadTPSCds(tpsfile_name, pdbA, j, models_per_tps[i]); + j += models_per_tps[i]; + } - for (i = 0; i < range_num; ++i) - free(selections[i]); + free(models_per_tps); + free(len_array); - free(selections); - free(selection_index); - free(upper); - free(lower); - free(chains); + return(pdbA); +} + + +static int +GetSlxnLen(PDBCdsArray *pdbA, const int crds, Algorithm *algo, char *chains, + int *lower, int *upper, const int range_num, int *selection_index, int rev) +{ + int i, vlen; + + vlen = 0; + for (i = 0; i < pdbA->cds[crds]->vlen; ++i) + { +/* printf("\n%s, %s, %c, %c, %d", */ +/* pdbA->cds[crds]->record[i], */ +/* pdbA->cds[crds]->name[i], */ +/* pdbA->cds[crds]->altLoc[i], */ +/* pdbA->cds[crds]->chainID[i], */ +/* pdbA->cds[crds]->resSeq[i]); */ +/* fflush(NULL); */ + if ( ( strncmp(pdbA->cds[crds]->record[i], "ATOM ", 6) == 0 + || strncmp(pdbA->cds[crds]->record[i], "HETATM", 6) == 0) + && atom_selxn(pdbA->cds[crds]->name[i], algo->atoms, algo->atomslxn) + && (pdbA->cds[crds]->altLoc[i] == ' ' || pdbA->cds[crds]->altLoc[i] == 'A') + && range_selxn(pdbA->cds[crds]->chainID[i], chains, pdbA->cds[crds]->resSeq[i], lower, upper, range_num) == rev) /* DLT debug FIX THIS!!!! */ + { + if (selection_index != NULL) + selection_index[vlen] = i; + ++vlen; + } + } + + return(vlen); } void -GetCdsSelection_old(CdsArray *baseA, PDBCdsArray *pdbA) +GetCdsSelection(CdsArray *baseA, PDBCdsArray *pdbA) { - int vlen; + int vlen, last_len; int *selection_index = NULL; /* array of ints corresponding to selected atoms */ int i, j; char *chains = NULL; /* array of chars holding chain IDs of ranges in the selection criteria */ @@ -742,13 +793,13 @@ char **selections = NULL; /* an array of range_num strings to hold each range selection */ char delims[] = ":"; - if (baseA->algo->selection != NULL) + if (algo->selection != NULL) { - selection_len = strlen(baseA->algo->selection); + selection_len = strlen(algo->selection); for(i = 0; i < selection_len; ++i) { - if (baseA->algo->selection[i] == ':') + if (algo->selection[i] == ':') ++range_num; } @@ -777,12 +828,12 @@ } /* if the user specified XPLOR/CNS-style atoms, translate them to standard PDB format (DNA primes should be "*", not "'") */ - for (i = 0; i < strlen(baseA->algo->selection); ++i) - if (baseA->algo->selection[i] == '\'') - baseA->algo->selection[i] = '*'; + for (i = 0; i < strlen(algo->selection); ++i) + if (algo->selection[i] == '\'') + algo->selection[i] = '*'; /* copy each range selection string into the 'selections[]' array */ - mystrncpy(selections[0], strtok(baseA->algo->selection, delims), 127); + mystrncpy(selections[0], strtok(algo->selection, delims), 127); for (i = 1; i < range_num; ++i) mystrncpy(selections[i], strtok(NULL, delims), 127); @@ -870,71 +921,56 @@ exit(EXIT_FAILURE); } - vlen = 0; - - if (baseA->algo->revsel == 0) + /* First count the number of selected atoms for each structure, and make sure they are all the same */ + last_len = vlen = 0; + for (i = 0; i < pdbA->cnum; ++i) { - for (i = 0; i < pdbA->vlen; ++i) +// printf("\n%s, %s, %c, %c, %d", +// pdbA->cds[i]->record[i], +// pdbA->cds[i]->name[i], +// pdbA->cds[i]->altLoc[i], +// pdbA->cds[i]->chainID[i], +// pdbA->cds[i]->resSeq[i]); +// fflush(NULL); + last_len = vlen; + if (algo->revsel == 0) + vlen = GetSlxnLen(pdbA, i, algo, chains, lower, upper, range_num, NULL, 1); + else + vlen = GetSlxnLen(pdbA, i, algo, chains, lower, upper, range_num, NULL, 0); + + if (i > 0 && vlen != last_len) { -/* printf("\n%s, %s, %c, %c, %d", */ -/* pdbA->cds[0]->record[i], */ -/* pdbA->cds[0]->name[i], */ -/* pdbA->cds[0]->altLoc[i], */ -/* pdbA->cds[0]->chainID[i], */ -/* pdbA->cds[0]->resSeq[i]); */ - if ( ( strncmp(pdbA->cds[0]->record[i], "ATOM ", 6) == 0 - || strncmp(pdbA->cds[0]->record[i], "HETATM", 6) == 0) - && atom_selxn(pdbA->cds[0]->name[i], baseA->algo->atoms, baseA->algo->atomslxn) == 1 - && (pdbA->cds[0]->altLoc[i] == ' ' || pdbA->cds[0]->altLoc[i] == 'A') - && range_selxn(pdbA->cds[0]->chainID[i], chains, pdbA->cds[0]->resSeq[i], lower, upper, range_num) == 1) /* DLT debug FIX THIS!!!! */ - { - selection_index[vlen] = i; - ++vlen; - } + fprintf(stderr, + "\n ERROR1: length of selection is (%d) different from preceding cds (%d) in GetCdsSelection() \n", vlen, i+1); + PrintTheseusTag(); + exit(EXIT_FAILURE); } - } - else - { - for (i = 0; i < pdbA->vlen; ++i) + + if (vlen < 3) { -/* printf("\n\n%s, %s, %c, %c, %d\n", */ -/* pdbA->cds[0]->record[i], */ -/* pdbA->cds[0]->name[i], */ -/* pdbA->cds[0]->altLoc[i], */ -/* pdbA->cds[0]->chainID[i], */ -/* pdbA->cds[0]->resSeq[i]); */ - if ( ( strncmp(pdbA->cds[0]->record[i], "ATOM ", 6) == 0 - || strncmp(pdbA->cds[0]->record[i], "HETATM", 6) == 0) - && atom_selxn(pdbA->cds[0]->name[i], baseA->algo->atoms, baseA->algo->atomslxn) == 1 - && (pdbA->cds[0]->altLoc[i] == ' ' || pdbA->cds[0]->altLoc[i] == 'A') - && range_selxn(pdbA->cds[0]->chainID[i], chains, pdbA->cds[0]->resSeq[i], lower, upper, range_num) == 0) /* DLT debug FIX THIS!!!! */ - { - selection_index[vlen] = i; - ++vlen; - } + fprintf(stderr, + "\n ERROR3: 'vlen' is too small (%d) in GetCdsSelection(); not enough atoms selected in cds %d. \n", vlen, i+1); + PrintTheseusTag(); + exit(EXIT_FAILURE); } } - if (vlen < 3) - { - fprintf(stderr, - "\n ERROR3: 'vlen' is too small (%d) in GetCdsSelection(); not enough atoms selected. \n", vlen); - PrintTheseusTag(); - exit(EXIT_FAILURE); - } - + /* Now allocate based on this length */ /* baseA = CdsArrayInit(); */ /* NB!!: we don't need to initialize, since the pointer was passed above */ CdsArrayAlloc(baseA, pdbA->cnum, vlen); CdsArraySetup(baseA); - for (i = 0; i < pdbA->cnum; ++i) - { - baseA->cds[i]->model = pdbA->cds[i]->model; - strncpy(baseA->cds[i]->filename, pdbA->cds[i]->filename, FILENAME_MAX - 1); - } - + /* Now get the selections again and read them in this time */ for (j = 0; j < pdbA->cnum; ++j) { + if (algo->revsel == 0) + vlen = GetSlxnLen(pdbA, j, algo, chains, lower, upper, range_num, selection_index, 1); + else + vlen = GetSlxnLen(pdbA, j, algo, chains, lower, upper, range_num, selection_index, 0); + + baseA->cds[j]->model = pdbA->cds[j]->model; + strncpy(baseA->cds[j]->filename, pdbA->cds[j]->filename, FILENAME_MAX - 1); + for (i = 0; i < baseA->vlen; ++i) { strncpy(baseA->cds[j]->resName[i], pdbA->cds[j]->resName[selection_index[i]], 3); @@ -943,8 +979,10 @@ baseA->cds[j]->x[i] = pdbA->cds[j]->x[selection_index[i]]; baseA->cds[j]->y[i] = pdbA->cds[j]->y[selection_index[i]]; baseA->cds[j]->z[i] = pdbA->cds[j]->z[selection_index[i]]; - baseA->cds[j]->o[i] = 1.0;/* pdbA->cds[j]->occupancy[selection_index[i]]; */ /* DLT debug - shouldn't need to do this, I think prob is in SuperPose() */ + baseA->cds[j]->o[i] = pdbA->cds[j]->occupancy[selection_index[i]]; baseA->cds[j]->b[i] = pdbA->cds[j]->tempFactor[selection_index[i]]; + baseA->cds[j]->nu[i] = 1; + baseA->cds[j]->mu[i] = 0; } } @@ -962,8 +1000,6 @@ } - - PDBCdsArray *GetPDBCds(char **argv_array, int narguments, int fmodel, int amber, int fix_atom_names) { @@ -973,7 +1009,7 @@ int cnum, models, vlen, last_vlen; char pdbfile_name[FILENAME_MAX], pdbdir[FILENAME_MAX], dirpdbfile_name[FILENAME_MAX]; - char buff[99]; + char buff[256]; FILE *pdbfile = NULL; PDBCdsArray *pdbA = NULL; @@ -1022,27 +1058,25 @@ { if (strncmp(buff, "MODEL", 5) == 0 || strncmp(buff, "REMARK FRAME:", 13) == 0 || - strncmp(buff, "REMARK OPLS-AA", 15) == 0) // DLTPU + strncmp(buff, "REMARK OPLS", 12) == 0) { vlen = 0; while (fgets(buff, sizeof(buff), pdbfile)) { - if ((strncmp(buff, "END", 3) == 0) || (strncmp(buff, "ENDMDL", 6) == 0)) // DLTPU + if ((strncmp(buff, "END", 3) == 0) || (strncmp(buff, "ENDMDL", 6) == 0)) break; else if (strncmp(buff, "ATOM ", 6) == 0 || strncmp(buff, "HETATM", 6) == 0) ++vlen; } /* printf("\n vlen1 = %d", vlen); */ - if (fmodel == 1) + if (fmodel) break; else if ((models > 0) && (vlen != last_vlen)) { fprintf(stderr, "\n WARNING1005: cds #%d in \"%s\" has a different atom number (%d) than the preceding cds (%d). \n", models + 1, pdbfile_name, vlen, last_vlen); -/* PrintTheseusTag(); */ -/* exit(EXIT_FAILURE); */ } else if (vlen < 3) { @@ -1061,12 +1095,12 @@ } } - if (models != 0 && fmodel == 0) + if (models && fmodel == 0) { models_per_pdb[i] = models; cnum += models; } - else if (models != 0 && fmodel == 1) /* read only the first model */ + else if (models && fmodel == 1) /* read only the first model */ { models_per_pdb[i] = 1; cnum += 1; @@ -1125,6 +1159,7 @@ free(models_per_pdb); free(len_array); + return(pdbA); } @@ -1151,7 +1186,7 @@ cds->o[i]); } - printf(" END \n\n"); + printf(" END \n\n"); fflush(NULL); } @@ -1174,13 +1209,13 @@ fprintf(pdbfile, /* r s H n aL rN x c rSiC x y z o tF sI e c */ "%-6.6s%5u %1c%3.3s%1c%-3.3s%1c%1c%4d%1c %8.3f%8.3f%8.3f%6.2f%6.2f %-4.4s%2.2s%2.2s\n", - pdbcds->record[i], serial, pdbcds->Hnum[i], + pdbcds->record[i], serial, pdbcds->Hnum[i], pdbcds->name[i], pdbcds->altLoc[i], pdbcds->resName[i], pdbcds->xchainID[i], pdbcds->chainID[i], pdbcds->resSeq[i], pdbcds->iCode[i], pdbcds->x[i], pdbcds->y[i], pdbcds->z[i], pdbcds->occupancy[i], pdbcds->tempFactor[i], pdbcds->segID[i], pdbcds->element[i], pdbcds->charge[i]); - /* fflush(NULL); */ + // fflush(NULL); ++serial; /* add TER cards at end of ATOM chains */ @@ -1199,7 +1234,6 @@ IsNameCAorP(pdbcds->name[i]) && IsNameCAorP(pdbcds->name[i+1]) && strncmp(pdbcds->name[i], pdbcds->name[i+1], 3) != 0) - /* (SqrPDBCdsDist(pdbcds, i, pdbcds, i + 1) > 64) */ /* DLT debug -- this should just check backbone atoms */ ) { fprintf(pdbfile, @@ -1215,7 +1249,7 @@ } -void +static void PrintCds2File(FILE *pdbfile, Cds *cds) { int i; @@ -1268,15 +1302,15 @@ } -void -PrintOccPDBCds(FILE *pdbfile, PDBCds *pdbcds) +static void +PrintNuPDBCds(FILE *pdbfile, PDBCds *pdbcds) { int i; unsigned int serial = 1; for (i = 0; i < pdbcds->vlen; ++i) { - if (pdbcds->occupancy[i] == 1.0) + if (pdbcds->nu[i]) { /* r s Hn ar xc r i x y z o tF sI e c */ /* ATOM 1949 1HB ARG A 255 19.326 -3.835 -3.438 1.00 1.31 H */ @@ -1309,7 +1343,6 @@ IsNameCAorP(pdbcds->name[i]) && IsNameCAorP(pdbcds->name[i+1]) && strncmp(pdbcds->name[i], pdbcds->name[i+1], 3) != 0) - /* (SqrPDBCdsDist(pdbcds, i, pdbcds, i + 1) > 64) */ /* DLT debug -- this should just check backbone atoms */ ) { fprintf(pdbfile, @@ -1326,7 +1359,7 @@ } -int +static int IsNameCAorP(char *name) { if (strlen(name) < 3) @@ -1338,93 +1371,6 @@ } -// int -// ReadCds(char *pdbfile_name, CdsArray *cdsA, int cds_index, int modelnum) -// { -// char buff[99], alt; -// int i = 0; -// int ncoord; -// FILE *pdbfile = NULL; -// -// pdbfile = fopen(pdbfile_name, "r"); -// if (pdbfile == NULL) -// { -// fprintf(stderr, "\n ERROR: file \"%s\" not found. \n", pdbfile_name); -// PrintTheseusTag(); -// exit(EXIT_FAILURE); -// } -// -// if (cdsA->cds[cds_index]->vlen == 0) -// { -// while (fgets(buff, sizeof(buff), pdbfile)) -// { -// if (strncmp(buff, "ENDMDL", 6) == 0 || -// strncmp(buff, "END ", 6) == 0) -// break; -// -// if (!strncmp(buff, "ATOM ", 6) -// && ( strncmp(&buff[13], "CA ", 3) == 0 -// || strncmp(&buff[13], "P ", 3) == 0)) -// { -// alt = buff[16]; /* alternate position indicator */ -// if (alt == ' ' || alt == 'A' || alt == '1' || alt == 'L' || alt == 'O') -// ++i; -// } -// } -// -// cdsA->vlen = i; -// rewind(pdbfile); -// } -// -// for (i = 0; i < modelnum; ++i) -// { -// /* move to next model */ -// while (fgets(buff, sizeof(buff), pdbfile)) -// { -// if (!strncmp(buff, "MODEL", 5)) -// { -// cdsA->cds[cds_index + i]->model = i; -// break; -// } -// } -// -// /* read in the model's cds */ -// ncoord = 0; -// while (fgets(buff, sizeof(buff), pdbfile)) -// { -// if (!strncmp(buff, "ENDMDL", 6) || -// !strncmp(buff, "END ", 6)) -// break; -// -// if (!strncmp(buff, "ATOM ", 6) && -// (!strncmp(&buff[13], "CA ", 3) || -// /*!strncmp(&buff[13], "CB ", 3) ||*/ -// !strncmp(&buff[13], "P ", 3))) -// { -// alt = buff[16]; /* alternate position indicator */ -// if (alt == ' ' || alt == 'A' || alt == '1' || alt == 'L' || alt == 'O') -// { -// sscanf(&buff[17], -// "%3c%*1c%4d%*4c%8lf%8lf%8lf%6lf%6lf", -// cdsA->cds[cds_index + i]->resName[ncoord], -// &cdsA->cds[cds_index + i]->resSeq[ncoord], -// &cdsA->cds[cds_index + i]->x[ncoord], -// &cdsA->cds[cds_index + i]->y[ncoord], -// &cdsA->cds[cds_index + i]->z[ncoord], -// &cdsA->cds[cds_index + i]->o[ncoord], -// &cdsA->cds[cds_index + i]->b[ncoord]); -// -// ncoord++; -// } -// } -// } -// } -// -// fclose(pdbfile); -// return (EXIT_SUCCESS); -// } - - void WriteModelFile(PDBCdsArray *pdbA, char *outfile_name) { @@ -1446,11 +1392,91 @@ for (i = 0; i < pdbA->cnum; ++i) { - fprintf(pdbfile, "MODEL %8d\n", i+1); + fprintf(pdbfile, "MODEL %4d\n", i+1); PrintPDBCds(pdbfile, pdbA->cds[i]); fprintf(pdbfile, "ENDMDL\n"); } - fprintf(pdbfile, "END\n"); + fprintf(pdbfile, "END \n"); + + fclose(pdbfile); +} + + +/* Prints out a set of PDB coordinates (one model) + adding TER cards when appropriate + and renumbering the 'serial' field from 1 + it can handle two character chain IDs (xchainID & chainID) */ +static void +PrintTPSCds(FILE *pdbfile, PDBCds *pdbcds) +{ + int i; + + fprintf(pdbfile, "LM3=%d\n", pdbcds->vlen); + + for (i = 0; i < pdbcds->vlen; ++i) + { + /* r s Hn ar xc r i x y z o tF sI e c */ + /* ATOM 1949 1HB ARG A 255 19.326 -3.835 -3.438 1.00 1.31 H */ + + fprintf(pdbfile, "%-.20f %-.20f %-.20f\n", pdbcds->x[i], pdbcds->y[i], pdbcds->z[i]); + // fflush(NULL); + } + + fflush(NULL); +} + + +void +WriteTheseusTPSModelFile(PDBCdsArray *pdbA, char *outfile_name) +{ + FILE *pdbfile = NULL; + int i; + + /* ////////////////////////////////////////////////////////////// */ + pdbfile = myfopen(outfile_name, "w"); + if (pdbfile == NULL) + { + perror("\n ERROR"); + fprintf(stderr, + "\n ERROR99: could not open file '%s' for writing. \n", outfile_name); + PrintTheseusTag(); + exit(EXIT_FAILURE); + } + + for (i = 0; i < pdbA->cnum; ++i) + { + PrintTPSCds(pdbfile, pdbA->cds[i]); +// fprintf(pdbfile, "ID=%d\n\n", i+1); + fprintf(pdbfile, "ID=%s\n\n", pdbA->cds[i]->filename); + } + + fclose(pdbfile); +} + + +/* writes a pdb file of the average cds */ +void +WriteAveTPSCdsFile(PDBCdsArray *pdbA, char *outfile_name) +{ + FILE *pdbfile = NULL; + + /* char avecds_filename[512]; */ + + /* strcpy(avecds_filename, getroot(outfile_name)); */ + /* strcat(outfile_name, "_ave.pdb"); */ + + pdbfile = myfopen(outfile_name, "w"); + if (pdbfile ==NULL) + { + perror("\n ERROR"); + fprintf(stderr, + "\n ERROR99: could not open file '%s' for writing. \n\n", outfile_name); + PrintTheseusTag(); + exit(EXIT_FAILURE); + } + + PrintTPSCds(pdbfile, pdbA->avecds); + fprintf(pdbfile, "ID=%s\n\n", "ave"); fclose(pdbfile); } @@ -1489,11 +1515,11 @@ for (i = 0; i < pdbA->cnum; ++i) { - fprintf(pdbfile, "MODEL %8d\n", i+1); + fprintf(pdbfile, "MODEL %4d\n", i+1); PrintPDBCds(pdbfile, pdbA->cds[i]); fprintf(pdbfile, "ENDMDL\n"); } - fprintf(pdbfile, "END\n"); + fprintf(pdbfile, "END \n"); fclose(pdbfile); } @@ -1531,11 +1557,11 @@ for (i = 0; i < pdbA->cnum; ++i) { - fprintf(pdbfile, "MODEL %8d\n", i+1); + fprintf(pdbfile, "MODEL %4d\n", i+1); PrintPDBCds(pdbfile, pdbA->cds[i]); fprintf(pdbfile, "ENDMDL\n"); } - fprintf(pdbfile, "END\n"); + fprintf(pdbfile, "END \n"); fclose(pdbfile); } @@ -1560,15 +1586,53 @@ } PrintTheseusModelHeader(pdbfile); + fprintf(pdbfile, "REMARK Log Likelihood %11.2f\n", stats->logL); + fprintf(pdbfile, "REMARK Log Marginal Likelihood %11.2f\n", stats->mlogL); + //PrintModelFileStats(pdbfile, pdbA, algo, stats); + + for (i = 0; i < cdsA->cnum; ++i) + { + fprintf(pdbfile, "MODEL %4d\n", i+1); + PrintCds2File(pdbfile, cdsA->cds[i]); + fprintf(pdbfile, "ENDMDL\n"); + } + fprintf(pdbfile, "END \n"); + + fclose(pdbfile); +} + + +void +OverWriteTheseusCdsModelFile(CdsArray *cdsA, char *outfile_name) +{ + FILE *pdbfile = NULL; + int i; + + /* ////////////////////////////////////////////////////////////// */ + + pdbfile = fopen(outfile_name, "w"); + if (pdbfile == NULL) + { + perror("\n ERROR"); + fprintf(stderr, + "\n ERROR99: could not open file '%s' for writing. \n", outfile_name); + PrintTheseusTag(); + exit(EXIT_FAILURE); + } + + PrintTheseusModelHeader(pdbfile); + fprintf(pdbfile, "REMARK Log Likelihood %11.2f\n", stats->logL); + fprintf(pdbfile, "REMARK Log Marginal Likelihood %11.2f\n", stats->mlogL); + fprintf(pdbfile, "REMARK RMSD %22.6f\n", stats->ave_paRMSD); //PrintModelFileStats(pdbfile, pdbA, algo, stats); for (i = 0; i < cdsA->cnum; ++i) { - fprintf(pdbfile, "MODEL %8d\n", i+1); + fprintf(pdbfile, "MODEL %4d\n", i+1); PrintCds2File(pdbfile, cdsA->cds[i]); fprintf(pdbfile, "ENDMDL\n"); } - fprintf(pdbfile, "END\n"); + fprintf(pdbfile, "END \n"); fclose(pdbfile); } @@ -1602,7 +1666,7 @@ PrintTheseusModelHeader(pdbfile); PrintModelFileStats(pdbfile, pdbA, algo, stats); PrintPDBCds(pdbfile, pdbA->cds[i]); - fprintf(pdbfile, "END\n"); + fprintf(pdbfile, "END \n"); fclose(pdbfile); } } @@ -1630,23 +1694,30 @@ for (i = 0; i < pdbA->cnum; ++i) { - fprintf(pdbfile, "MODEL %8d\n", i+1); - PrintOccPDBCds(pdbfile, pdbA->cds[i]); + fprintf(pdbfile, "MODEL %4d\n", i+1); + PrintNuPDBCds(pdbfile, pdbA->cds[i]); fprintf(pdbfile, "ENDMDL\n"); } - fprintf(pdbfile, "END\n"); + fprintf(pdbfile, "END \n"); fclose(pdbfile); } -void +static void PrintTheseusModelHeader(FILE *pdbfile) { time_t tod; time(&tod); + fprintf(pdbfile, "REMARK 3 \n"); + fprintf(pdbfile, "REMARK 3 REFINEMENT. \n"); + fprintf(pdbfile, "REMARK 3 PROGRAM : THESEUS %-10s \n", VERSION); + fprintf(pdbfile, "REMARK 3 AUTHORS : DOUGLAS THEOBALD \n"); + fprintf(pdbfile, "REMARK 3 OTHER REFINEMENT REMARKS: MAXIMUM LIKELIHOOD SUPERPOSITION \n"); + fprintf(pdbfile, "REMARK 3 \n"); + fprintf(pdbfile, "REMARK ===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-=\n"); fprintf(pdbfile, "REMARK + File made by THESEUS +\n"); fprintf(pdbfile, "REMARK + Multiple maximum likelihood superpositioning +\n"); @@ -1658,10 +1729,11 @@ fprintf(pdbfile, "REMARK on machine '%-.55s'\n", getenv("HOST")); fprintf(pdbfile, "REMARK in directory '%-.55s'\n", getenv("PWD")); fprintf(pdbfile, "REMARK =-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===\n"); + fprintf(pdbfile, "REMARK\n"); } -void +static void PrintModelFileStats(FILE *pdbfile, PDBCdsArray *pdbA, Algorithm *algo, Statistics *stats) { int i; @@ -1670,25 +1742,23 @@ fprintf(pdbfile, "REMARK %-.71s\n", &algo->cmdline[i]); fprintf(pdbfile, "REMARK\n"); for (i = 0; i < pdbA->cnum; ++i) - fprintf(pdbfile, "REMARK MODEL %8d %s %6d\n", i+1, pdbA->cds[i]->filename, pdbA->cds[i]->vlen); + fprintf(pdbfile, "REMARK MODEL %4d %s %6d\n", i+1, pdbA->cds[i]->filename, pdbA->cds[i]->vlen); fprintf(pdbfile, "REMARK\n"); fprintf(pdbfile, "REMARK =-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===\n"); fprintf(pdbfile, "REMARK unweighted %10.5f\n", stats->stddev); fprintf(pdbfile, "REMARK Classical pairwise %10.5f\n", stats->ave_paRMSD); fprintf(pdbfile, "REMARK Maximum Likelihood %10.5f\n", stats->mlRMSD); - if (algo->hierarch != 0) + if (algo->hierarch) fprintf(pdbfile, "REMARK Hierarchical var (%3.2e, %3.2e) chi^2 %10.5f\n", stats->hierarch_p1, stats->hierarch_p2, stats->hierarch_chi2); - if (algo->htrans != 0) - fprintf(pdbfile, "REMARK Translation normal chi^2 %10.5f\n", stats->htrans_chi2); - fprintf(pdbfile, "REMARK Log Likelihood %11.2f\n", stats->logL); + fprintf(pdbfile, "REMARK Log Marginal Likelihood %11.2f\n", stats->mlogL); fprintf(pdbfile, "REMARK AIC %11.2f\n", stats->AIC); fprintf(pdbfile, "REMARK BIC %11.2f\n", stats->BIC); fprintf(pdbfile, "REMARK Rotational, translational, covar chi^2 %11.2f\n", stats->chi2); - if (algo->hierarch != 0) + if (algo->hierarch) { fprintf(pdbfile, "REMARK Hierarchical var (%3.2e, %3.2e) chi^2 %11.2f\n", stats->hierarch_p1, stats->hierarch_p2, stats->hierarch_chi2); @@ -1696,14 +1766,12 @@ stats->omnibus_chi2); } - if (algo->htrans != 0) - fprintf(pdbfile, "REMARK Translation normal chi^2 %10.5f\n", stats->htrans_chi2); - fprintf(pdbfile, "REMARK skewness %7.3f\n", stats->skewness[3]); fprintf(pdbfile, "REMARK skewness Z-value %7.3f\n", fabs(stats->skewness[3]/stats->SES)); fprintf(pdbfile, "REMARK kurtosis %7.3f\n", stats->kurtosis[3]); fprintf(pdbfile, "REMARK kurtosis Z-value %7.3f\n", fabs(stats->kurtosis[3]/stats->SEK)); fprintf(pdbfile, "REMARK =-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===\n"); + fprintf(pdbfile, "NUMMDL %-d\n", pdbA->cnum); } @@ -1718,7 +1786,7 @@ } -void +static void WritePDBCdsFile(PDBCds *cds, char *file_name) { FILE *pdbfile = NULL; @@ -1735,7 +1803,8 @@ PrintTheseusModelHeader(pdbfile); PrintPDBCds(pdbfile, cds); - fprintf(pdbfile, "END\n\n"); + fprintf(pdbfile, "END \n\n"); + fflush(NULL); fclose(pdbfile); } @@ -1769,7 +1838,8 @@ PrintTheseusModelHeader(pdbfile); PrintCds2File(pdbfile, cdsA->avecds); - fprintf(pdbfile, "END\n"); + fprintf(pdbfile, "END \n\n"); + fflush(NULL); fclose(pdbfile); } @@ -1797,7 +1867,8 @@ PrintTheseusModelHeader(pdbfile); PrintPDBCds(pdbfile, pdbA->avecds); - fprintf(pdbfile, "END\n\n"); + fprintf(pdbfile, "END \n\n"); + fflush(NULL); fclose(pdbfile); } @@ -1835,7 +1906,7 @@ { int i, cnum, vlen; FILE *fp = NULL; - PDBCdsArray *pdbA; + PDBCdsArray *pdbA = NULL; char bincheck[14]; fp = fopen(filename, "rb"); @@ -1954,7 +2025,7 @@ { fwrite(&rows, sizeof(int), 1, fp); fwrite(&cols, sizeof(int), 1, fp); - fwrite(&mat[0][0], sizeof(double), rows * cols, fp); + fwrite(mat[0], sizeof(double), rows * cols, fp); } @@ -1965,5 +2036,436 @@ fread(&rows, sizeof(int), 1, fp); fread(&cols, sizeof(int), 1, fp); - fread(&mat[0][0], sizeof(double), rows * cols, fp); + fread(mat[0], sizeof(double), rows * cols, fp); +} + + +int +ConvertLele_freeform(char *fp_name, const int dim, const int forms, const int lmarks) +{ + int i, j, k, lines, numscanned; + FILE *fp0 = NULL, *fp1 = NULL; + double *vals = calloc(dim, sizeof(double)); + + fp0 = fopen(fp_name, "r"); + fp1 = fopen("lele.pdb", "w"); + if (fp0 == NULL || fp1 == NULL) + { + fprintf(stderr, "\n ERROR6969: cannot open file \"%s\" \n", fp_name); + exit(EXIT_FAILURE); + } + + i = j = 0; + lines = 0; + for (i = 0; i < forms; ++i) + { + fprintf(fp1, "MODEL %8d\n", i+1); + + for (j = 0; j < lmarks; ++j) + { + for (k = 0; k < dim; ++k) + { + numscanned = fscanf(fp0, "%le ", &vals[k]); + //printf("\n**** %f", vals[k]); + //fflush(NULL); + + if (numscanned < 1 || numscanned == EOF) + { + fprintf(stderr, + "\n ERROR6968: %d number of coordinates on line %d \n", + numscanned, lines); + exit(EXIT_FAILURE); + } + } + + /* r s Hn ar xc r i x y z o tF sI e c */ + /* ATOM 1949 1HB ARG A 255 19.326 -3.835 -3.438 1.00 1.31 H */ + + fprintf(fp1, + /* r s H n aL rN x c rSiC x y z o tF sI e c */ + "%-6.6s%5u %3.3s %-3.3s %1c%4d %8.3f%8.3f%8.3f%6.2f%6.2f\n", + "ATOM ", i*lmarks + j, "CA ", "ALA", 'A', j+1, + vals[0], vals[1], vals[2], + 1.0, 10.0); + + ++lines; + } + + fprintf(fp1, "ENDMDL\n"); + } + + fprintf(fp1, "END\n\n"); + fclose(fp0); + fclose(fp1); + free(vals); + + return(1); +} + + +int +ConvertDryden(char *fp_name, const int dim, const int forms, const int lmarks) +{ + int i, j, lines, numscanned; + FILE *fp0 = NULL, *fp1 = NULL; + double vals[2]; + char line[512]; + + fp0 = fopen(fp_name, "r"); + fp1 = fopen("dryden.pdb", "w"); + if (fp0 == NULL || fp1 == NULL) + { + fprintf(stderr, "\n ERROR6969: cannot open file \"%s\" \n", fp_name); + exit(EXIT_FAILURE); + } + +/* *length = 0; */ +/* while(1) */ +/* { */ +/* ch = getc(fp); */ +/* */ +/* if (ch == EOF || ch == '\n') */ +/* ++(*length); */ +/* */ +/* if (ch == EOF) */ +/* break; */ +/* } */ +/* */ +/* array = calloc((*length + 1), sizeof(double)); */ + +/* rewind(fp); */ + + fgets(line, 512, fp0); + + i = j = 0; + lines = 0; + for (i = 0; i < forms; ++i) + { + fprintf(fp1, "MODEL %8d\n", i+1); + + fscanf(fp0, "%*s"); + + for (j = 0; j < lmarks; ++j) + { + numscanned = fscanf(fp0, "%le %le ", &vals[0], &vals[1]); + + if (numscanned < dim || numscanned == EOF) + { + fprintf(stderr, + "\n ERROR6968: %d number of coordinates on line %d \n", + numscanned, lines); + exit(EXIT_FAILURE); + } + + /* r s Hn ar xc r i x y z o tF sI e c */ + /* ATOM 1949 1HB ARG A 255 19.326 -3.835 -3.438 1.00 1.31 H */ + + fprintf(fp1, + /* r s H n aL rN x c rSiC x y z o tF sI e c */ + "%-6.6s%5u %3.3s %-3.3s %1c%4d %8.3f%8.3f%8.3f%6.2f%6.2f\n", + "ATOM ", i*lmarks + j, "CA ", "ALA", 'A', j+1, + vals[0], vals[1], 0.0, + 1.0, 10.0); + + ++lines; + } + + fprintf(fp1, "ENDMDL\n"); + } + + fprintf(fp1, "END\n\n"); + fclose(fp0); + fclose(fp1); + + return(1); +} + + +int +ConvertLele(char *fp_name, const int dim, const int forms, const int lmarks) +{ + int i, j, lines, numscanned; + FILE *fp0 = NULL, *fp1 = NULL; + double vals[3]; + char line[512]; + + fp0 = fopen(fp_name, "r"); + fp1 = fopen("lele.pdb", "w"); + if (fp0 == NULL || fp1 == NULL) + { + fprintf(stderr, "\n ERROR6969: cannot open file \"%s\" \n", fp_name); + exit(EXIT_FAILURE); + } + +/* *length = 0; */ +/* while(1) */ +/* { */ +/* ch = getc(fp); */ +/* */ +/* if (ch == EOF || ch == '\n') */ +/* ++(*length); */ +/* */ +/* if (ch == EOF) */ +/* break; */ +/* } */ +/* */ +/* array = calloc((*length + 1), sizeof(double)); */ + +/* rewind(fp); */ + + i = j = 0; + lines = 0; + for (i = 0; i < forms; ++i) + { + fprintf(fp1, "MODEL %8d\n", i+1); + + for (j = 0; j < lmarks; ++j) + { + fgets(line, 512, fp0); + numscanned = sscanf(line, "%le %le %le ", &vals[0], &vals[1], &vals[2]); + + if (numscanned < dim || numscanned == EOF) + { + fprintf(stderr, + "\n ERROR6968: %d number of coordinates on line %d \n", + numscanned, lines); + exit(EXIT_FAILURE); + } + + /* r s Hn ar xc r i x y z o tF sI e c */ + /* ATOM 1949 1HB ARG A 255 19.326 -3.835 -3.438 1.00 1.31 H */ + + fprintf(fp1, + /* r s H n aL rN x c rSiC x y z o tF sI e c */ + "%-6.6s%5u %3.3s %-3.3s %1c%4d %8.3f%8.3f%8.3f%6.2f%6.2f\n", + "ATOM ", i*lmarks + j, "CA ", "ALA", 'A', j+1, + vals[0], vals[1], vals[2], + 1.0, 10.0); + + ++lines; + } + + fprintf(fp1, "ENDMDL\n"); + } + + fprintf(fp1, "END\n\n"); + fclose(fp0); + fclose(fp1); + + return(1); +} + + +void +WriteLeleModelFile(PDBCdsArray *pdbAr) +{ + FILE *pdbfile = NULL; + int i, j; + char outfile_name[] = "lele.txt"; + + pdbfile = myfopen(outfile_name, "w"); + if (pdbfile == NULL) + { + perror("\n ERROR"); + fprintf(stderr, + "\n ERROR99: could not open file '%s' for writing. \n", outfile_name); + PrintTheseusTag(); + exit(EXIT_FAILURE); + } + + for (i = 0; i < pdbAr->cnum; ++i) + { + for (j = 0; j < pdbAr->vlen; ++j) + { + fprintf(pdbfile, "%.3f\t%.3f\t%.3f\n", + pdbAr->cds[i]->x[j], + pdbAr->cds[i]->y[j], + pdbAr->cds[i]->z[j]); + } + } + + fprintf(pdbfile, "\n"); + + fclose(pdbfile); +} + + +void +WriteInstModelFile(char *fext, CdsArray *cdsA) +{ + int i; + PDBCdsArray *mpA = PDBCdsArrayInit(); + char *fext_name = NULL; + + PDBCdsArrayAlloc(mpA, cdsA->cnum, cdsA->vlen); + + for (i = 0; i < mpA->cnum; ++i) + CopyCds2PDB(mpA->cds[i], cdsA->cds[i]); + + fext_name = mystrcat(algo->rootname, fext); + WriteTheseusModelFileNoStats(mpA, algo, fext_name); + + free(fext_name); + PDBCdsArrayDestroy(&mpA); +} + + +void +WriteEdgarSSM(CdsArray *cdsA) +{ + #include "pdbSSM.h" + + printf(" Calculating SSM ... \n"); + fflush(NULL); + + + SSM *ssm = SSMInit(); + SSMAlloc(ssm, cdsA); + SSMCalc(ssm, cdsA); + + printf(" Writing SSM ... \n"); + fflush(NULL); + + WriteSSM(ssm); + SSMDestroy(&ssm); +} + + +void +WriteOlveFiles(CdsArray *cdsA) +{ + char *olve_name = NULL; + PDBCdsArray *olveA = NULL; + Cds **cds = cdsA->cds; + const int cnum = cdsA->cnum; + const int vlen = cdsA->vlen; + int i; + + printf(" Writing Olve's files ... \n"); + fflush(NULL); + + olveA = PDBCdsArrayInit(); + PDBCdsArrayAlloc(olveA, cnum, vlen); + + for (i = 0; i < cnum; ++i) + CopyCds2PDB(olveA->cds[i], cds[i]); + + olve_name = mystrcat(algo->rootname, "_olve.pdb"); + WriteOlveModelFile(olveA, algo, stats, olve_name); + free(olve_name); + PDBCdsArrayDestroy(&olveA); +} + + +/* Write out a taxa distance matrix in NEXUS format */ +void +WriteDistMatTree(CdsArray *cdsA) +{ + #include "DistMat.h" + + DISTMAT *distmat = NULL; + Cds **cds = cdsA->cds; + const int cnum = cdsA->cnum; + const int vlen = cdsA->vlen; + double sum; + int i, j,k; + int cnt; + char num[32]; + char *ptr = NULL; + char *tree_name = NULL; + + distmat = DISTMATalloc(cnum); + + for (i = 0; i < cnum; ++i) + { + strcpy(distmat->taxa[i], cds[i]->filename); + ptr = strrchr(distmat->taxa[i], '.'); + if (ptr != NULL) + *ptr = '\0'; + sprintf(num, "_%d", i); + strcat(distmat->taxa[i], num); + } + +// for (i = 0; i < cnum; ++i) +// { +// for (j = 0; j <= i; ++j) +// { +// sum = 0.0; +// for (k = 0; k < vlen; ++k) +// sum += SqrCdsDistMahal2((const Cds *) cds[i], k, +// (const Cds *) cds[j], k, +// (const double) scratchA->w[k]); +// +// distmat->dist[i][j] = sqrt(sum); +// } +// } + + for (i = 0; i < cnum; ++i) + { + for (j = 0; j < i; ++j) + { + sum = 0.0; + cnt = 0; + for (k = 0; k < vlen; ++k) + { + if (cds[i]->nu[k] && cds[j]->nu[k]) + { + cnt += cdsA->w[k]; + sum += SqrCdsDistMahal2((const Cds *) cds[i], k, + (const Cds *) cds[j], k, + (const double) cdsA->w[k]); + } + } + + distmat->dist[i][j] = sqrt(sum/(3.0*cnt)); + } + } + + tree_name = mystrcat(algo->rootname, "_ML_tree.nxs"); + print_NX_distmat(distmat, tree_name); + free(tree_name); + +// //double total = 0.0; +// for (i = 0; i < cnum; ++i) +// { +// for (j = 0; j < i; ++j) +// { +// sum = 0.0; +// for (k = 0; k < vlen; ++k) +// sum += SqrCdsDist((const Cds *) cds[i], k, +// (const Cds *) cds[j], k); +// +// distmat->dist[i][j] = sqrt(sum/vlen); +// //total += sum/vlen; +// } +// } + + for (i = 0; i < cnum; ++i) + { + for (j = 0; j < i; ++j) + { + sum = 0.0; + cnt = 0; + for (k = 0; k < vlen; ++k) + { + if (cds[i]->nu[k] && cds[j]->nu[k]) + { + ++cnt; + sum += SqrCdsDist((const Cds *) cds[i], k, + (const Cds *) cds[j], k); + } + } + + distmat->dist[i][j] = sqrt(sum/cnt); + } + } + + //printf("\nrmsd? %g\n", sqrt(total/((cnum*cnum- cnum)/2))); // verified same as paRMSD + + tree_name = mystrcat(algo->rootname, "_LS_tree.nxs"); + print_NX_distmat(distmat, tree_name); + + free(tree_name); + DISTMATdestroy(&distmat); } + diff -Nru theseus-2.0.6/pdbIO.h theseus-3.0.0/pdbIO.h --- theseus-2.0.6/pdbIO.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/pdbIO.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,24 +31,15 @@ int ReadPDBCds(char *pdbfile_name, PDBCdsArray *pdbA, int cds_i, int modelnum, int amber, int fix_atom_names); -/* void */ -/* XPLORcorrections(PDBCds *pdbcds, int record); */ +int +ReadTPSCds(char *pdbfile_name, PDBCdsArray *pdbA, int cds_i, int modelnum); + +PDBCdsArray +*GetTPSCds(char **argv_array, int narguments); void GetCdsSelection(CdsArray *baseA, PDBCdsArray *pdbA); -CdsArray -*GetCdsSel(CdsArray *baseA, PDBCdsArray *pdbA); - -/* CdsArray */ -/* *GetDefaultCdsSel(CdsArray *baseA, PDBCdsArray *pdbA); */ - -/* static int */ -/* atom_selxn(char *record, int mode); */ - -/* int */ -/* range_selxn(int chainID, char *chains, int resSeq, int *lower, int *upper, int range_num); */ - PDBCdsArray *GetPDBCds(char **argv_array, int narguments, int fmodel, int amber, int fix_atom_names); @@ -59,19 +50,13 @@ PrintPDBCds(FILE *pdbfile, PDBCds *pdbcds); void -PrintCds2File(FILE *pdbfile, Cds *cds); +WriteModelFile(PDBCdsArray *pdbA, char *outfile_name); void -PrintOccPDBCds(FILE *pdbfile, PDBCds *pdbcds); - -int -IsNameCAorP(char *name); - -/* int */ -/* ReadCds(char *pdbfile_name, CdsArray *cdsA, int cds_index, int modelnum); */ +WriteTheseusTPSModelFile(PDBCdsArray *pdbA, char *outfile_name); void -WriteModelFile(PDBCdsArray *pdbA, char *outfile_name); +WriteAveTPSCdsFile(PDBCdsArray *pdbA, char *outfile_name); void WriteTheseusModelFile(PDBCdsArray *pdbA, Algorithm *algo, Statistics *stats, char *outfile_name); @@ -83,13 +68,10 @@ WriteTheseusCdsModelFile(CdsArray *cdsA, char *outfile_name); void -WriteTheseusPDBFiles(PDBCdsArray *pdbA, Algorithm *algo, Statistics *stats); - -void -PrintTheseusModelHeader(FILE *pdbfile); +OverWriteTheseusCdsModelFile(CdsArray *cdsA, char *outfile_name); void -PrintModelFileStats(FILE *pdbfile, PDBCdsArray *pdbA, Algorithm *algo, Statistics *stats); +WriteTheseusPDBFiles(PDBCdsArray *pdbA, Algorithm *algo, Statistics *stats); void WriteOlveModelFile(PDBCdsArray *pdbA, Algorithm *algo, Statistics *stats, char *outfile_name); @@ -98,9 +80,6 @@ WriteCdsFile(Cds *cds, char *outfile_name); void -WritePDBCdsFile(PDBCds *cds, char *file_name); - -void WriteAveCdsFile(CdsArray *cdsA, char *outfile_name); void @@ -127,4 +106,28 @@ void ReadBinMatrix(double **mat, FILE *fp); +int +ConvertLele_freeform(char *fp_name, const int dim, const int forms, const int lmarks); + +int +ConvertDryden(char *fp_name, const int dim, const int forms, const int lmarks); + +int +ConvertLele(char *fp_name, const int dim, const int forms, const int lmarks); + +void +WriteLeleModelFile(PDBCdsArray *pdbAr); + +void +WriteInstModelFile(char *fext, CdsArray *cdsA); + +void +WriteEdgarSSM(CdsArray *cdsA); + +void +WriteOlveFiles(CdsArray *cdsA); + +void +WriteDistMatTree(CdsArray *cdsA); + #endif Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/._pdbMalloc.c and /tmp/g2bOMTRwaC/theseus-3.0.0/._pdbMalloc.c differ diff -Nru theseus-2.0.6/pdbMalloc.c theseus-3.0.0/pdbMalloc.c --- theseus-2.0.6/pdbMalloc.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/pdbMalloc.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -125,6 +125,18 @@ free(algo->argv); } + if (algo->selection != NULL) + { + free(algo->selection); + algo->selection = NULL; + } + + if (algo->atomslxn != NULL) + { + free(algo->atomslxn); + algo->atomslxn = NULL; + } + free(algo); } @@ -146,17 +158,13 @@ mystrcpy(algo->rootname, "theseus"); algo->weight = 0; algo->verbose = 0; /* verbose behavior = 1 */ - algo->method = 3; /* 7: LAPACK SVD, 10: my classic Jacobi SVD */ - algo->print_trans = 0; algo->write_file = 1; - algo->print_weight = 0; algo->precision = 1e-7; - algo->iterations = 200; + algo->iterations = 400; algo->selection = NULL; algo->atomslxn = NULL; algo->revsel = 0; algo->atoms = 0; - algo->reflection = 0; algo->embedave = 0; /* 2 = biased ML embedded structure */ algo->landmarks = 0; algo->writestats = 1; @@ -172,24 +180,22 @@ algo->princaxes = 1; algo->nullrun = 0; algo->binary = 0; - algo->modelpca = 0; - algo->raxes[0] = - algo->raxes[1] = - algo->raxes[2] = 1.0; algo->mbias = 0; - algo->notrans = 0; - algo->norot = 0; + algo->doave = 1; + algo->domp = 1; + algo->dotrans = 1; + algo->dorot = 1; + algo->dohierarch = 1; + algo->docovars = 1; algo->alignment = 0; algo->fmodel = 0; algo->covweight = 0; algo->varweight = 1; - algo->hierarch = 1; + algo->hierarch = 5; algo->leastsquares = 0; algo->filenum = 0; algo->infiles = NULL; - algo->noave = 0; algo->noinnerloop = 0; - algo->htrans = 0; algo->rounds = 0; algo->innerrounds = 0; algo->fasta = 0; @@ -204,10 +210,8 @@ algo->param[0] = algo->param[1] = 1.0; algo->radii[0] = algo->radii[1] = algo->radii[2] = 50.0; algo->ssm = 0; - algo->lele5 = 0; algo->bayes = 0; algo->ipmat = 0; - algo->commandeur = 0; algo->missing = 0; algo->scale = 0; algo->instfile = 0; @@ -216,6 +220,10 @@ algo->amber = 0; algo->atom_names = 0; algo->scalefactor = 1.0; + algo->morphfile = 0; + algo->scaleanchor = 0; + algo->randgibbs = 0; + algo->covnu = 1; return(algo); } @@ -273,17 +281,17 @@ cdsA->cds = NULL; cdsA->avecds = NULL; + cdsA->ac = NULL; cdsA->tcds = NULL; - cdsA->jkcds = NULL; + cdsA->tc = NULL; cdsA->w = NULL; cdsA->var = NULL; + cdsA->evals = NULL; + cdsA->samplevar3N = NULL; cdsA->df = NULL; cdsA->S2 = NULL; - cdsA->algo = AlgorithmInit(); - cdsA->stats = StatsInit(); - cdsA->residuals = NULL; cdsA->Var_matrix = NULL; @@ -292,28 +300,21 @@ cdsA->CovMat = NULL; cdsA->WtMat = NULL; cdsA->FullCovMat = NULL; - cdsA->MVCovMat = NULL; - cdsA->SCovMat = NULL; cdsA->pcamat = NULL; cdsA->pcavals = NULL; - cdsA->modpcamat = NULL; - cdsA->modpcavals = NULL; - cdsA->tmpmat1 = NULL; - cdsA->tmpmat2 = NULL; cdsA->tmpmatKK1 = NULL; cdsA->tmpmatKK2 = NULL; cdsA->tmpvecK = NULL; - cdsA->tmpmat3K = NULL; - cdsA->tmpmatK3a = NULL; - cdsA->tmpmatK3b = NULL; cdsA->tmpmat3a = MatAlloc(3, 3); cdsA->tmpmat3b = MatAlloc(3, 3); cdsA->tmpmat3c = MatAlloc(3, 3); cdsA->tmpmat3d = MatAlloc(3, 3); + cdsA->tmpvec3a = malloc(3 * sizeof(double)); + return(cdsA); } @@ -350,12 +351,14 @@ cdsA->avecds = CdsInit(); CdsAlloc(cdsA->avecds, vlen); + cdsA->ac = MatAlloc(3, vlen); cdsA->tcds = CdsInit(); + cdsA->tc = MatAlloc(3, vlen); CdsAlloc(cdsA->tcds, vlen); - cdsA->jkcds = CdsInit(); - CdsAlloc(cdsA->jkcds, vlen); cdsA->var = (double *) calloc(vlen, sizeof(double)); + cdsA->evals = (double *) calloc(vlen, sizeof(double)); + cdsA->samplevar3N = (double *) calloc(vlen, sizeof(double)); cdsA->w = (double *) calloc(vlen, sizeof(double)); cdsA->df = (int *) calloc(vlen, sizeof(int)); } @@ -373,6 +376,8 @@ CdsArraySetup(CdsArray *cdsA) { memsetd(cdsA->var, 1.0, cdsA->vlen); + memsetd(cdsA->evals, 1.0, cdsA->vlen); + memsetd(cdsA->samplevar3N, 1.0, cdsA->vlen); memsetd(cdsA->w, 1.0, cdsA->vlen); } @@ -399,29 +404,16 @@ CdsDestroy(&(cdsA->avecds)); CdsDestroy(&(cdsA->tcds)); - CdsDestroy(&(cdsA->jkcds)); - free(cdsA->stats); - cdsA->stats = NULL; - - if (cdsA->algo->selection != NULL) - { - free(cdsA->algo->selection); - cdsA->algo->selection = NULL; - } - - if (cdsA->algo->atomslxn != NULL) - { - free(cdsA->algo->atomslxn); - cdsA->algo->atomslxn = NULL; - } - - AlgorithmDestroy(cdsA->algo); + MatDestroy(&(cdsA->ac)); + MatDestroy(&(cdsA->tc)); DistMatsDestroy(cdsA); CovMatsDestroy(cdsA); PCADestroy(cdsA); + free(cdsA->tmpvec3a); + if (cdsA->residuals != NULL) { free(cdsA->residuals); @@ -432,6 +424,10 @@ cdsA->w = NULL; free(cdsA->var); cdsA->var = NULL; + free(cdsA->evals); + cdsA->evals = NULL; + free(cdsA->samplevar3N); + cdsA->samplevar3N = NULL; free(cdsA->df); cdsA->df = NULL; @@ -482,10 +478,6 @@ MatDestroy(&(cdsA->tmpmatKK2)); if (cdsA->tmpvecK != NULL) free(cdsA->tmpvecK); - if (cdsA->tmpmat1 != NULL) - MatDestroy(&(cdsA->tmpmat1)); - if (cdsA->tmpmat2 != NULL) - MatDestroy(&(cdsA->tmpmat2)); if (cdsA->tmpmat3a != NULL) MatDestroy(&(cdsA->tmpmat3a)); if (cdsA->tmpmat3b != NULL) @@ -494,16 +486,6 @@ MatDestroy(&(cdsA->tmpmat3c)); if (cdsA->tmpmat3d != NULL) MatDestroy(&(cdsA->tmpmat3d)); - if (cdsA->MVCovMat != NULL) - MatDestroy(&(cdsA->MVCovMat)); - if (cdsA->SCovMat != NULL) - MatDestroy(&(cdsA->SCovMat)); - if (cdsA->tmpmatK3a != NULL) - MatDestroy(&(cdsA->tmpmatK3a)); - if (cdsA->tmpmatK3b != NULL) - MatDestroy(&(cdsA->tmpmatK3b)); - if (cdsA->tmpmat3K != NULL) - MatDestroy(&(cdsA->tmpmat3K)); } @@ -531,7 +513,7 @@ Cds *CdsInit(void) { - int i, j; + int i; Cds *cds = NULL; cds = (Cds *) malloc(sizeof(Cds)); @@ -542,37 +524,43 @@ cds->resName_space = NULL; cds->chainID = NULL; cds->resSeq = NULL; + cds->wc = NULL; cds->x = NULL; cds->y = NULL; cds->z = NULL; cds->o = NULL; cds->b = NULL; - cds->residual_x = NULL; - cds->residual_y = NULL; - cds->residual_z = NULL; + cds->nu = NULL; + cds->mu = NULL; + cds->sc = NULL; + cds->sx = NULL; + cds->sy = NULL; + cds->sz = NULL; + cds->so = NULL; + cds->sb = NULL; + cds->cc = NULL; cds->covx = NULL; cds->covy = NULL; cds->covz = NULL; - cds->innerprod = NULL; - cds->vlen = 0; - cds->innerprod2 = NULL; + cds->residual_x = NULL; + cds->residual_y = NULL; + cds->residual_z = NULL; + cds->outerprod = NULL; + + cds->vlen = 0; + cds->innerprod = MatAlloc(3, 3); cds->matrix = MatAlloc(3, 3); cds->last_matrix = MatAlloc(3, 3); + cds->last_outer_matrix = MatAlloc(3, 3); cds->evecs = MatAlloc(4,4); for (i = 0; i < 4; ++i) cds->evecs[i][0] = 1.0; for (i = 0; i < 3; ++i) - for (j = 0; j < 3; ++j) - cds->matrix[i][j] = cds->last_matrix[i][j] = 0.0; - - cds->tmpmat3a = MatAlloc(3, 3); - cds->tmpmat3b = MatAlloc(3, 3); - cds->tmpmat3c = MatAlloc(3, 3); - cds->tmpmat3d = MatAlloc(3, 3); + cds->matrix[i][i] = cds->last_matrix[i][i] = cds->last_outer_matrix[i][i] = 1.0; cds->bfact_c = 1.0; cds->scale = 1.0; @@ -588,39 +576,44 @@ cds->vlen = vlen; - cds->resName = (char **) calloc(vlen, sizeof(char *)); - cds->resName_space = (char *) calloc(4 * vlen, sizeof(char)); - cds->chainID = (char *) calloc(vlen, sizeof(char)); - cds->resSeq = (int *) calloc(vlen, sizeof(int)); - cds->x = (double *) calloc(vlen, sizeof(double)); - cds->y = (double *) calloc(vlen, sizeof(double)); - cds->z = (double *) calloc(vlen, sizeof(double)); - cds->o = (double *) calloc(vlen, sizeof(double)); - cds->b = (double *) calloc(vlen, sizeof(double)); - cds->prvar = (double *) calloc(vlen, sizeof(double)); - cds->residual_x = (double *) calloc(vlen, sizeof(double)); - cds->residual_y = (double *) calloc(vlen, sizeof(double)); - cds->residual_z = (double *) calloc(vlen, sizeof(double)); - cds->covx = (double *) calloc(vlen, sizeof(double)); - cds->covy = (double *) calloc(vlen, sizeof(double)); - cds->covz = (double *) calloc(vlen, sizeof(double)); + cds->resName = (char **) calloc(vlen, sizeof(char *)); + cds->resName_space = (char *) calloc(4 * vlen, sizeof(char)); + cds->chainID = (char *) calloc(vlen, sizeof(char)); + cds->resSeq = (int *) calloc(vlen, sizeof(int)); + cds->wc = MatAlloc(5, vlen); + cds->x = cds->wc[0]; + cds->y = cds->wc[1]; + cds->z = cds->wc[2]; + cds->o = cds->wc[3]; + cds->b = cds->wc[4]; + cds->nu = (int *) calloc(vlen, sizeof(int)); + cds->mu = (int *) calloc(vlen, sizeof(int)); + cds->sc = MatAlloc(5, vlen); + cds->sx = cds->sc[0]; + cds->sy = cds->sc[1]; + cds->sz = cds->sc[2]; + cds->so = cds->sc[3]; + cds->sb = cds->sc[4]; + cds->cc = MatAlloc(3, vlen);// DLT could be moved to CovMatAlloc + cds->covx = cds->cc[0]; + cds->covy = cds->cc[1]; + cds->covz = cds->cc[2]; + cds->prvar = (double *) calloc(vlen, sizeof(double)); + cds->residual_x = (double *) calloc(vlen, sizeof(double)); + cds->residual_y = (double *) calloc(vlen, sizeof(double)); + cds->residual_z = (double *) calloc(vlen, sizeof(double)); if ( cds->resName == NULL || cds->resName_space == NULL || cds->chainID == NULL || cds->resSeq == NULL - || cds->x == NULL - || cds->y == NULL - || cds->z == NULL - || cds->o == NULL - || cds->b == NULL + || cds->wc == NULL + || cds->sc == NULL + || cds->cc == NULL || cds->prvar == NULL || cds->residual_x == NULL || cds->residual_y == NULL - || cds->residual_z == NULL - || cds->covx == NULL - || cds->covy == NULL - || cds->covz == NULL) + || cds->residual_z == NULL) { perror("\n ERROR"); fprintf(stderr, "\n ERROR5: could not allocate memory in function CdsAlloc(). \n"); @@ -641,14 +634,12 @@ int i; memsetd(cds->translation, 0.0, 3); - memsetd(cds->jktranslation, 0.0, 3); - memsetd(cds->transsum, 0.0, 3); for (i = 0; i < 4; ++i) cds->evecs[i][0] = 1.0; for (i = 0; i < 3; ++i) - cds->matrix[i][i] = cds->last_matrix[i][i] = 1.0; + cds->matrix[i][i] = cds->last_matrix[i][i] = cds->last_outer_matrix[i][i] = 1.0; } @@ -661,34 +652,25 @@ free(cds->resName_space); free(cds->chainID); free(cds->resName); - free(cds->x); - free(cds->y); - free(cds->z); - free(cds->o); - free(cds->b); + MatDestroy(&(cds->wc)); + MatDestroy(&(cds->sc)); + MatDestroy(&(cds->cc)); + free(cds->nu); + free(cds->mu); free(cds->prvar); free(cds->residual_x); free(cds->residual_y); free(cds->residual_z); - free(cds->covx); - free(cds->covy); - free(cds->covz); MatDestroy(&(cds->matrix)); MatDestroy(&(cds->last_matrix)); + MatDestroy(&(cds->last_outer_matrix)); + MatDestroy(&(cds->innerprod)); - if (cds->innerprod != NULL) - MatDestroy(&(cds->innerprod)); - - if (cds->innerprod2 != NULL) - MatDestroy(&(cds->innerprod2)); + if (cds->outerprod != NULL) + MatDestroy(&(cds->outerprod)); MatDestroy(&(cds->evecs)); - MatDestroy(&(cds->tmpmat3a)); - MatDestroy(&(cds->tmpmat3b)); - MatDestroy(&(cds->tmpmat3c)); - MatDestroy(&(cds->tmpmat3d)); - free(cds); *cds_ptr = NULL; } @@ -697,7 +679,7 @@ PDBCdsArray *PDBCdsArrayInit(void) { - PDBCdsArray *pdbA = NULL; + PDBCdsArray *pdbA = NULL; pdbA = (PDBCdsArray *) malloc(sizeof(PDBCdsArray)); if (pdbA == NULL) @@ -805,6 +787,7 @@ pdbcds = (PDBCds *) malloc(sizeof(PDBCds)); pdbcds->vlen = 0; /* so that we know later if this has been allocated or not */ + pdbcds->scale = 1.0; pdbcds->translation = calloc(3, sizeof(double)); pdbcds->matrix = MatAlloc(3, 3); memset(&pdbcds->matrix[0][0], 0, 9 * sizeof(double)); @@ -840,6 +823,8 @@ pdbcds->element = (char **) calloc(vlen, sizeof(char *)); pdbcds->charge = (char **) calloc(vlen, sizeof(char *)); + pdbcds->nu = (int *) calloc(vlen, sizeof(int)); + /* allocate space for the fields in total */ pdbcds->record_space = (char *) calloc(8 * vlen, sizeof(char)); pdbcds->name_space = (char *) calloc(4 * vlen, sizeof(char)); @@ -866,6 +851,7 @@ || pdbcds->segID == NULL || pdbcds->element == NULL || pdbcds->charge == NULL + || pdbcds->nu == NULL || pdbcds->record_space == NULL || pdbcds->name_space == NULL || pdbcds->resName_space == NULL @@ -926,6 +912,8 @@ free(pdbcds->element); free(pdbcds->charge); + free(pdbcds->nu); + free(pdbcds); *pdbcds_ptr = NULL; } diff -Nru theseus-2.0.6/pdbMalloc.h theseus-3.0.0/pdbMalloc.h --- theseus-2.0.6/pdbMalloc.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/pdbMalloc.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -41,6 +41,9 @@ Algorithm *AlgorithmInit(void); +void +AlgorithmDestroy(Algorithm *algo); + Statistics *StatsInit(void); diff -Nru theseus-2.0.6/pdbSSM.c theseus-3.0.0/pdbSSM.c --- theseus-2.0.6/pdbSSM.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/pdbSSM.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -54,25 +54,25 @@ where =N is the number of structures. -Following the first line are N sequences in FASTA format. Presumably you will -use the standard amino acid alphabet here, but you can use any symbols you like --- e.g. an X at each position just as a placeholder. MUSCLE will use a sequence -for use in the output alignment and to get the length so that the similarity +Following the first line are N sequences in FASTA format. Presumably you will +use the standard amino acid alphabet here, but you can use any symbols you like +-- e.g. an X at each position just as a placeholder. MUSCLE will use a sequence +for use in the output alignment and to get the length so that the similarity matrix dimension is known in advance. -Following the FASTA data are N(N-1)/2 similarity matrices. A similarity matrix +Following the FASTA data are N(N-1)/2 similarity matrices. A similarity matrix is formatted as follows: #MATRIX, #ENDMATRIX -where and are "structure indexes". A structure index=1, 2 ... N as +where and are "structure indexes". A structure index=1, 2 ... N as defined by the order they appear in the FASTA data section of the file. -The matrices must appear in order of increasing i, then within a given i in -order of increasing j, including only matrices in which i < j (of course, the -matrix for i,j contains the same data as the matrix for j,i). So, for example, +The matrices must appear in order of increasing i, then within a given i in +order of increasing j, including only matrices in which i < j (of course, the +matrix for i,j contains the same data as the matrix for j,i). So, for example, if N=4 then the order is: #MATRIX1,2 @@ -93,10 +93,10 @@ The data contains L_i lines, where L_i is the length of the i'th structure. One line contains L_j floating-point values, where L_j is the length of the j'th -structure. A floating-point value may be formatted in any way readable by the C +structure. A floating-point value may be formatted in any way readable by the C language atof() function. -The first value starts in the first column of the line, values are separated by +The first value starts in the first column of the line, values are separated by exactly one space character. */ @@ -104,7 +104,7 @@ SSM *SSMInit(void) { - SSM *ssm; + SSM *ssm = NULL; ssm = (SSM *) malloc(sizeof(SSM)); if (ssm == NULL) @@ -131,7 +131,7 @@ ssm->n = cnum = cdsA->cnum; ssm->L = (int *) malloc(cnum * sizeof(int)); - + for (i = 0; i < cnum; ++i) { ssm->L[i] = cdsA->cds[i]->aalen; @@ -181,7 +181,7 @@ int cnum = cdsA->cnum; double *lnvar = malloc(cdsA->vlen * sizeof(double)); double *invvar = malloc(cdsA->vlen * sizeof(double)); - + for (i = 0; i < cdsA->vlen; ++i) lnvar[i] = log(cdsA->var[i]); @@ -195,34 +195,34 @@ { for (m = p = 0; m < cdsA->vlen; ++m) { - if (cdsA->cds[i]->o[m] > 0.0) + if (cdsA->cds[i]->nu[m]) { - for (n = q = 0; n < cdsA->vlen; ++n) - { - if (cdsA->cds[j]->o[n] > 0.0) - { -/* printf("[%d][%d][%d]:%e %e %e %e %e %e\n", k, p, q, */ -/* sqrt(SqrCdsDist(cdsA->cds[i], m, cdsA->avecds, m)), */ -/* sqrt(SqrCdsDist(cdsA->cds[i], m, cdsA->avecds, n)), */ -/* sqrt(SqrCdsDist(cdsA->cds[j], n, cdsA->avecds, m)), */ -/* sqrt(SqrCdsDist(cdsA->cds[j], n, cdsA->avecds, n)), */ -/* lnvar[i], lnvar[j]); */ -/* fflush(NULL); */ - - ssm->mat[k][p][q] = - (invvar[m] * SqrCdsDist(cdsA->cds[i], m, cdsA->avecds, m) + - invvar[n] * SqrCdsDist(cdsA->cds[i], m, cdsA->avecds, n) + - invvar[m] * SqrCdsDist(cdsA->cds[j], n, cdsA->avecds, m) + - invvar[n] * SqrCdsDist(cdsA->cds[j], n, cdsA->avecds, n) + - 2.0 * (lnvar[i] + lnvar[j])) * -0.25; - -// printf("[%d][%d][%d]:%e\n", k, p, q, ssm->mat[k][p][q]); -// fflush(NULL); - - q++; - } - } - //printf("\nq:%d", q); + for (n = q = 0; n < cdsA->vlen; ++n) + { + if (cdsA->cds[j]->nu[n]) + { +/* printf("[%d][%d][%d]:%e %e %e %e %e %e\n", k, p, q, */ +/* sqrt(SqrCdsDist(cdsA->cds[i], m, cdsA->avecds, m)), */ +/* sqrt(SqrCdsDist(cdsA->cds[i], m, cdsA->avecds, n)), */ +/* sqrt(SqrCdsDist(cdsA->cds[j], n, cdsA->avecds, m)), */ +/* sqrt(SqrCdsDist(cdsA->cds[j], n, cdsA->avecds, n)), */ +/* lnvar[i], lnvar[j]); */ +/* fflush(NULL); */ + + ssm->mat[k][p][q] = + (invvar[m] * SqrCdsDist(cdsA->cds[i], m, cdsA->avecds, m) + + invvar[n] * SqrCdsDist(cdsA->cds[i], m, cdsA->avecds, n) + + invvar[m] * SqrCdsDist(cdsA->cds[j], n, cdsA->avecds, m) + + invvar[n] * SqrCdsDist(cdsA->cds[j], n, cdsA->avecds, n) + + 2.0 * (lnvar[i] + lnvar[j])) * -0.25; + +// printf("[%d][%d][%d]:%e\n", k, p, q, ssm->mat[k][p][q]); +// fflush(NULL); + + q++; + } + } + //printf("\nq:%d", q); p++; } diff -Nru theseus-2.0.6/pdbSSM.h theseus-3.0.0/pdbSSM.h --- theseus-2.0.6/pdbSSM.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/pdbSSM.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/._pdbStats.c and /tmp/g2bOMTRwaC/theseus-3.0.0/._pdbStats.c differ diff -Nru theseus-2.0.6/pdbStats.c theseus-3.0.0/pdbStats.c --- theseus-2.0.6/pdbStats.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/pdbStats.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2010 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,75 +24,62 @@ */ #include "pdbStats_local.h" -#include -#include -#include "myassert.h" static void Vars2Bfacts(CdsArray *cdsA); -void -CheckVars(CdsArray *cdsA) -{ - int i; +static void +WriteVariance(CdsArray *cdsA, char *outfile_name); - for(i = 0; i < cdsA->vlen; ++i) - { - if (!isfinite(cdsA->var[i]) || cdsA->var[i] < DBL_EPSILON) - { - printf("Bad variance: %4d % e\n", i, cdsA->var[i]); - fflush(NULL); - } - } -} +static void +Vars2Bfacts(CdsArray *cdsA); +static double +SqrCoordMag(const Cds *cds, const int vec); -void -CopyStats(CdsArray *cdsA1, CdsArray *cdsA2) -{ - int i; - Cds **cds1 = cdsA1->cds, **cds2 = cdsA2->cds; - Statistics *stats1 = cdsA1->stats, *stats2 = cdsA2->stats; - const int cnum = cdsA1->cnum; - const int vlen = cdsA1->vlen; +static double +RadiusGyration(Cds *cds, const double *weights); - memcpy(stats1, stats2, sizeof(Statistics)); +static void +WriteResiduals(CdsArray *cdsA, char *outfile_name); - memcpy(stats1->skewness, stats2->skewness, 4 * sizeof(double)); - memcpy(stats1->kurtosis, stats2->kurtosis, 4 * sizeof(double)); +static void +MomentsCds(CdsArray *cdsA); - cdsA1->avecds->radgyr = cdsA2->avecds->radgyr; +static void +CalcAIC(CdsArray *cdsA); - memcpy(cdsA1->var, cdsA2->var, vlen * sizeof(double)); - memcpy(cdsA1->w, cdsA2->w, vlen * sizeof(double)); +static void +CalcBIC(CdsArray *cdsA); - for (i = 0; i < cnum; ++i) - { - cds1[i]->radgyr = cds2[i]->radgyr; - MatCpySym(cds1[i]->matrix, (const double **) cds2[i]->matrix, 3); - MatCpySym(cds1[i]->evecs, (const double **) cds2[i]->evecs, 4); - memcpy(cds1[i]->evals, cds2[i]->evals, 4 * sizeof(double)); - memcpy(cds1[i]->center, cds2[i]->center, 4 * sizeof(double)); - memcpy(cds1[i]->translation, cds2[i]->translation, 4 * sizeof(double)); - cds1[i]->ref_wRMSD_from_mean = cds2[i]->ref_wRMSD_from_mean; - cds1[i]->wRMSD_from_mean = cds2[i]->wRMSD_from_mean; - } +// static double +// FrobTermDiag(CdsArray *cdsA); +// +// static double +// FrobTerm2(CdsArray *cdsA); +// +// static double +// CalcLogScaleJacob(CdsArray *cdsA); - CdsCopyAll(cdsA1->avecds, cdsA2->avecds); +// static double ** +// CalcCov(CdsArray *cdsA); - if (cdsA2->algo->pca > 0 && cdsA2->pcamat != NULL) - { - if (cdsA1->pcamat != NULL) - MatDestroy(&cdsA1->pcamat); +// static double * +// CalcVar(CdsArray *cdsA); - if (cdsA1->pcavals != NULL) - free(cdsA1->pcavals); - cdsA1->pcamat = MatAlloc(cdsA2->algo->pca, vlen); - cdsA1->pcavals = malloc(vlen * sizeof(double)); +void +CheckVars(CdsArray *cdsA) +{ + int i; - memcpy(cdsA1->pcamat[0], cdsA2->pcamat[0], cdsA2->algo->pca * vlen * sizeof(double)); - memcpy(cdsA1->pcavals, cdsA2->pcavals, vlen * sizeof(double)); + for(i = 0; i < cdsA->vlen; ++i) + { + if (!isfinite(cdsA->var[i]) || cdsA->var[i] < DBL_EPSILON) + { + printf("Bad variance: %4d % e\n", i, cdsA->var[i]); + fflush(NULL); + } } } @@ -106,58 +93,13 @@ { cdsA->df[i] = 0; for (j = 0; j < cdsA->cnum; ++j) - cdsA->df[i] += cdsA->cds[j]->o[i]; + cdsA->df[i] += cdsA->cds[j]->nu[i]; //cdsA->df[i] *= 3; } } -/* Calculates the atomic variances for a family of Cds */ -/* returns the standard deviation */ -double -VarianceCdsNoVec(CdsArray *cdsA) -{ - int i, j; - double sqrx, sqry, sqrz, sqrdist; - double tmpx, tmpy, tmpz; - double variance; - const int cnum = cdsA->cnum, vlen = cdsA->vlen; - const double idf = 1.0 / (3 * cnum); - double *var = cdsA->var; - const Cds **cds = (const Cds **) cdsA->cds; - Cds *cdsj; - const double *avex = (const double *) cdsA->avecds->x, - *avey = (const double *) cdsA->avecds->y, - *avez = (const double *) cdsA->avecds->z; - - variance = 0.0; - for (i = 0; i < vlen; ++i) - { - sqrx = sqry = sqrz = 0.0; - for (j = 0; j < cnum; ++j) - { - cdsj = (Cds *) cds[j]; - tmpx = cdsj->x[i] - avex[i]; - sqrx += tmpx * tmpx; - tmpy = cdsj->y[i] - avey[i]; - sqry += tmpy * tmpy; - tmpz = cdsj->z[i] - avez[i]; - sqrz += tmpz * tmpz; - } - - sqrdist = sqrx + sqry + sqrz; - var[i] = sqrdist * idf; - variance += sqrdist; - } - - variance /= (vlen * cnum); - cdsA->stats->stddev = sqrt(variance); - - return(cdsA->stats->stddev); -} - - double VarianceCds(CdsArray *cdsA) { @@ -165,10 +107,10 @@ double tmpx, tmpy, tmpz; double variance; const int cnum = cdsA->cnum, vlen = cdsA->vlen; - const double idf = 1.0 / (3 * cnum); + const double idf = 1.0 / (3.0 * cnum); double *var = cdsA->var; - const Cds **cds = (const Cds **) cdsA->cds; - Cds *cdsj; + const Cds **cds = (const Cds **) cdsA->cds; + Cds *cdsj = NULL; const double *avex = (const double *) cdsA->avecds->x, *avey = (const double *) cdsA->avecds->y, *avez = (const double *) cdsA->avecds->z; @@ -182,32 +124,49 @@ for (i = 0; i < vlen; ++i) { tmpx = cdsj->x[i] - avex[i]; - var[i] += tmpx * tmpx; tmpy = cdsj->y[i] - avey[i]; - var[i] += tmpy * tmpy; tmpz = cdsj->z[i] - avez[i]; - var[i] += tmpz * tmpz; + var[i] += tmpx * tmpx + tmpy * tmpy + tmpz * tmpz; } } - for (i = 0; i < vlen; ++i) - var[i] *= idf; +// if (algo->scale > 0) +// { +// double lndet = 0.0; +// for (i = 0; i < vlen; ++i) +// lndet += log(var[i]); +// +// lndet /= vlen; +// double det = exp(lndet); +// +// for (i = 0; i < vlen; ++i) +// var[i] /= det; +// +// variance = det; +// stats->var = variance; +// stats->stddev = sqrt(variance); +// } +// else + { + for (i = 0; i < vlen; ++i) + var[i] *= idf; - variance = 0.0; - for (i = 0; i < vlen; ++i) - variance += var[i]; + variance = 0.0; + for (i = 0; i < vlen; ++i) + variance += var[i]; - variance /= vlen; - cdsA->stats->var = variance; - cdsA->stats->stddev = sqrt(variance); + variance /= vlen; + stats->var = variance; + stats->stddev = sqrt(variance); + } - return(cdsA->stats->stddev); + return(stats->stddev); } /* Same as VarianceCds() but weights by occupancies */ double -VarianceCdsOcc(CdsArray *cdsA) +VarianceCdsNu(CdsArray *cdsA) { int i, j; double sqrx, sqry, sqrz, sqrdist; @@ -215,33 +174,33 @@ double variance; const int cnum = cdsA->cnum, vlen = cdsA->vlen; double *var = cdsA->var; - const Cds **cds = (const Cds **) cdsA->cds; - Cds *cdsj = NULL; + const Cds **cds = (const Cds **) cdsA->cds; + Cds *cdsj = NULL; const double *avex = (const double *) cdsA->avecds->x, *avey = (const double *) cdsA->avecds->y, *avez = (const double *) cdsA->avecds->z; - double occ, occsum; + double nu, nusum; variance = 0.0; for (i = 0; i < vlen; ++i) { sqrx = sqry = sqrz = 0.0; - occsum = 0.0; + nusum = 0.0; for (j = 0; j < cnum; ++j) { cdsj = (Cds *) cds[j]; - occ = cdsj->o[i]; + nu = cdsj->nu[i]; tmpx = cdsj->x[i] - avex[i]; - sqrx += occ * tmpx * tmpx; + sqrx += nu * tmpx * tmpx; tmpy = cdsj->y[i] - avey[i]; - sqry += occ * tmpy * tmpy; + sqry += nu * tmpy * tmpy; tmpz = cdsj->z[i] - avez[i]; - sqrz += occ * tmpz * tmpz; + sqrz += nu * tmpz * tmpz; - occsum += occ; + nusum += nu; /* printf("\n%4d %4d %e - %e %e %e - %e %e %e", */ -/* i, j, occ, */ +/* i, j, nu, */ /* cdsj->x[i], cdsj->y[i], cdsj->z[i], */ /* avex[i], avey[i], avez[i]); */ } @@ -249,113 +208,27 @@ sqrdist = sqrx + sqry + sqrz; //var[i] = sqrdist / (3.0 * cdsA->df[i]); - var[i] = sqrdist / (3.0 * occsum); // should be the same as with df[i] + var[i] = sqrdist / (3.0 * nusum); // should be the same as with df[i] variance += var[i]; /* printf("\nvar[%3d]:%f\n", i, var[i]); */ } //WriteVariance(cdsA, "jacob.log"); exit(1); variance /= (double) vlen; - cdsA->stats->stddev = sqrt(variance); + stats->stddev = sqrt(variance); - return(cdsA->stats->stddev); + return(stats->stddev); } -/* double */ -/* CalcOrderParams(CdsArray *cdsA) */ -/* { */ -/* int i, j; */ -/* double tmpx, tmpy, tmpz; */ -/* const int cnum = cdsA->cnum, vlen = cdsA->vlen; */ -/* const double idf = 1.0 / (3 * cnum); */ -/* double *S2 = cdsA->S2; */ -/* const Cds **cds = (const Cds **) cdsA->cds; */ -/* Cds *cdsj; */ -/* const double *avex = (const double *) cdsA->avecds->x, */ -/* *avey = (const double *) cdsA->avecds->y, */ -/* *avez = (const double *) cdsA->avecds->z; */ -/* */ -/* memset(S2, 0, vlen * sizeof(double)); */ -/* */ -/* for (i = 0; i < vlen; ++i) */ -/* { */ -/* cdsj = (Cds *) cds[j]; */ -/* */ -/* for (j = 0; j < cnum; ++j) */ -/* { */ -/* tmpx = cdsj->x[i] - avex[i]; */ -/* var[i] += tmpx * tmpx; */ -/* tmpy = cdsj->y[i] - avey[i]; */ -/* var[i] += tmpy * tmpy; */ -/* tmpz = cdsj->z[i] - avez[i]; */ -/* var[i] += tmpz * tmpz; */ -/* } */ -/* } */ -/* */ -/* return(cdsA->stats->stddev); */ -/* } */ - - -/* void */ -/* WriteTransformations(CdsArray *cdsA, char *outfile_name) */ -/* { */ -/* FILE *transfile = NULL; */ -/* int i, j; */ -/* */ -/* transfile = myfopen(outfile_name, "w"); */ -/* if (transfile == NULL) */ -/* { */ -/* perror("\n ERROR"); */ -/* fprintf(stderr, */ -/* "\n ERROR99: could not open file '%s' for writing. \n", outfile_name); */ -/* PrintTheseusTag(); */ -/* exit(EXIT_FAILURE); */ -/* } */ -/* */ -/* fprintf(transfile, "# Translation vectors\n"); */ -/* */ -/* for (i = 0; i < cdsA->cnum; ++i) */ -/* { */ -/* fprintf(transfile, */ -/* "MODEL %3d, t: %9.4f %9.4f %9.4f\n", */ -/* i+1, */ -/* cdsA->cds[i]->translation[0], */ -/* cdsA->cds[i]->translation[1], */ -/* cdsA->cds[i]->translation[2]); */ -/* } */ -/* */ -/* fprintf(transfile, "\n# Rotation matrices\n"); */ -/* */ -/* for (i = 0; i < cdsA->cnum; ++i) */ -/* { */ -/* fprintf(transfile, "MODEL %3d, R: ", i+1); */ -/* */ -/* for (j = 0; j < 3; ++j) */ -/* { */ -/* fprintf(transfile, */ -/* "% 10.7f % 10.7f % 10.7f ", */ -/* cdsA->cds[i]->matrix[j][0], */ -/* cdsA->cds[i]->matrix[j][1], */ -/* cdsA->cds[i]->matrix[j][2]); */ -/* } */ -/* */ -/* fputc('\n', transfile); */ -/* } */ -/* */ -/* fprintf(transfile, "\n\n"); */ -/* fflush(NULL); */ -/* */ -/* fclose(transfile); */ -/* } */ - - void WriteTransformations(CdsArray *cdsA, char *outfile_name) { FILE *transfile = NULL; int i, j; - double angle, *v = malloc(3*sizeof(double)); + int cnum = cdsA->cnum; + double angle; + double *v = malloc(3*sizeof(double)); transfile = myfopen(outfile_name, "w"); if (transfile == NULL) @@ -367,23 +240,31 @@ exit(EXIT_FAILURE); } + fprintf(transfile, "# Convention: X_sup = (X_orig + 1t')R ,\n"); + fprintf(transfile, "# where the X structures are Kx3 row x col matrices (of K atoms),\n"); + fprintf(transfile, "# R is an orthogonal 3x3 rotation matrix,\n"); + fprintf(transfile, "# t is a 3x1 translation column vector, and\n"); + fprintf(transfile, "# 1 is a Kx1 column vector of ones, such that\n"); + fprintf(transfile, "# 1t' is a Kx3 matrix of identical rows, in which\n"); + fprintf(transfile, "# each row is the row vector t'.\n\n"); + fprintf(transfile, "# Translation vectors\n"); - for (i = 0; i < cdsA->cnum; ++i) + for (i = 0; i < cnum; ++i) { fprintf(transfile, - "MODEL %3d, t: %9.4f %9.4f %9.4f\n", + "MODEL %4d t: %9.4f %9.4f %9.4f\n", i+1, - cdsA->cds[i]->translation[0], - cdsA->cds[i]->translation[1], - cdsA->cds[i]->translation[2]); + -cdsA->cds[i]->translation[0], + -cdsA->cds[i]->translation[1], + -cdsA->cds[i]->translation[2]); } fprintf(transfile, "\n# Rotation matrices\n"); - for (i = 0; i < cdsA->cnum; ++i) + for (i = 0; i < cnum; ++i) { - fprintf(transfile, "MODEL %3d, R: ", i+1); + fprintf(transfile, "MODEL %4d R: ", i+1); for (j = 0; j < 3; ++j) { @@ -399,58 +280,49 @@ fprintf(transfile, "\n# Rotations, angle-axis representation\n"); - for (i = 0; i < cdsA->cnum; ++i) + for (i = 0; i < cnum; ++i) { angle = RotMat2AxisAngle(cdsA->cds[i]->matrix, v); fprintf(transfile, - "MODEL %3d, Angle: % 10.7f Axis: % 10.7f % 10.7f % 10.7f\n", + "MODEL %4d Angle: % 10.7f Axis: % 10.7f % 10.7f % 10.7f\n", i+1, angle, v[0], v[1], v[2]); } - fprintf(transfile, "\n\n"); - fflush(NULL); - - free(v); - fclose(transfile); -} - - -double -CalcResRMSD(CdsArray *cdsA, int res) -{ - int i, j, cnt; - double scd; - const int cnum = cdsA->cnum; - const Cds *cdsi, *cdsj; - const Cds **cds = (const Cds **) cdsA->cds; - - scd = 0.0; - cnt = 0; - for (i = 0; i < cnum; ++i) + if (algo->scale > 0) { - cdsi = cds[i]; + fprintf(transfile, "\n# Scale factors\n"); - if (cdsi->o[res] > 0) + for (i = 0; i < cnum; ++i) { - for (j = 0; j < i; ++j) - { - cdsj = cds[j]; - - if (cdsj->o[res] > 0) - { - scd += SqrCdsDist(cdsi, res, cdsj, res); - cnt++; - } - } + fprintf(transfile, + "MODEL %4d beta: %18.10f\n", + i+1, + cdsA->cds[i]->scale); } } - return(sqrt(scd / cnt)); + fprintf(transfile, "\n# Mean\n"); + + for (i = 0; i < cdsA->vlen; ++i) + { + fprintf(transfile, + "LM %5d M: % 18.12f % 18.12f % 18.12f\n", + i+1, + cdsA->avecds->x[i], + cdsA->avecds->y[i], + cdsA->avecds->z[i]); + } + + fprintf(transfile, "\n\n"); + fflush(NULL); + + free(v); + fclose(transfile); } -void +static void WriteVariance(CdsArray *cdsA, char *outfile_name) { FILE *varfile = NULL; @@ -470,12 +342,11 @@ fprintf(varfile, " #ATOM resName resSeq variance std_dev RMSD\n"); - if (cdsA->algo->varweight == 1 || cdsA->algo->leastsquares == 1) + if (algo->varweight || algo->leastsquares) { for (i = 0; i < cdsA->vlen; ++i) { dfi = cdsA->df[i]; - //rmsd = CalcResRMSD(cdsA, i); /* we have to factor in the fact that RMSD is over all axes, whereas my variance is *per* axis (must multiple variance by a factor of 3) */ @@ -492,7 +363,7 @@ notcore = 0; for (j=0; j < cdsA->cnum; ++j) - if (cdsA->cds[j]->o[i] == 0) + if (cdsA->cds[j]->nu[i] == 0) notcore = 1; if (notcore == 0) @@ -501,7 +372,7 @@ fprintf(varfile, "\n"); } } - else if (cdsA->algo->covweight == 1) + else if (algo->covweight) { for (i = 0; i < cdsA->vlen; ++i) { @@ -564,96 +435,6 @@ /* Eqn 1 and following paragraph, Kearsley, S.K. (1990) "An algorithm for the simultaneous superposition of a structural series." Journal of Computational Chemistry 11(10):1187-1192. */ -/* double */ -/* CalcPRMSD(CdsArray *cdsA) */ -/* { */ -/* int i, j, k; */ -/* double sqrdist; */ -/* double wsqrdist; */ -/* double paRMSD, pawRMSD; */ -/* const int cnum = cdsA->cnum, vlen = cdsA->vlen; */ -/* const Cds **cds = (const Cds **) cdsA->cds; */ -/* const Cds *cdsi, *cdsj; */ -/* const double *w = (const double *) cdsA->w; */ -/* */ -/* sqrdist = wsqrdist = 0.0; */ -/* for (i = 0; i < cnum; ++i) */ -/* { */ -/* cdsi = cds[i]; */ -/* */ -/* for (j = 0; j < i; ++j) */ -/* { */ -/* cdsj = cds[j]; */ -/* */ -/* for (k = 0; k < vlen; ++k) */ -/* { */ -/* sqrdist += SqrCdsDist(cdsi, k, cdsj, k); */ -/* wsqrdist += (w[k] * SqrCdsDist(cdsi, k, cdsj, k)); */ -/* } */ -/* } */ -/* } */ -/* */ -/* paRMSD = (2.0 * sqrdist) / (double) (vlen * cnum * (cnum - 1)); */ -/* pawRMSD = (2.0 * wsqrdist) / (double) (vlen * cnum * (cnum - 1)); */ -/* cdsA->stats->ave_pawRMSD = sqrt(pawRMSD); */ -/* cdsA->stats->ave_paRMSD = sqrt(paRMSD); */ -/* */ -/* return (cdsA->stats->ave_paRMSD); */ -/* } */ - - -/* average of all possible unique pairwise RMSDs */ -/* Eqn 1 and following paragraph, Kearsley, S.K. (1990) "An algorithm for the - simultaneous superposition of a structural series." Journal of Computational - Chemistry 11(10):1187-1192. */ -double -CalcPRMSD_old(CdsArray *cdsA) -{ - int i, j, k; - double sqrdist; - double wsqrdist; - double wtsum; - double paRMSD, pawRMSD; - const int cnum = cdsA->cnum, vlen = cdsA->vlen; - const Cds **cds = (const Cds **) cdsA->cds; - const Cds *cdsi, *cdsj; - const double *w = (const double *) cdsA->w; - - - sqrdist = wsqrdist = wtsum = 0.0; - for (i = 0; i < cnum; ++i) - { - cdsi = cds[i]; - - for (j = 0; j < i; ++j) - { - cdsj = cds[j]; - - for (k = 0; k < vlen; ++k) - { - if (cdsi->o[k] > 0 && cdsj->o[k] > 0) - { - wtsum += 1.0; - sqrdist += SqrCdsDist(cdsi, k, cdsj, k); - wsqrdist += (w[k] * SqrCdsDist(cdsi, k, cdsj, k)); - } - } - } - } - -// paRMSD = (2.0 * sqrdist) / (double) (vlen * cnum * (cnum - 1)); -// printf("\npaRMSD = %8.3e\n", sqrt(paRMSD)); - paRMSD = sqrdist / wtsum; -// printf("\npaRMSD = %8.3e\n", sqrt(paRMSD)); - //pawRMSD = (2.0 * wsqrdist) / (double) (vlen * cnum * (cnum - 1)); - pawRMSD = wsqrdist / wtsum; - cdsA->stats->ave_pawRMSD = sqrt(pawRMSD); - cdsA->stats->ave_paRMSD = sqrt(paRMSD); - - return (cdsA->stats->ave_paRMSD); -} - - double CalcPRMSD(CdsArray *cdsA) { @@ -664,7 +445,7 @@ double paRMSD, pawRMSD; const int cnum = cdsA->cnum, vlen = cdsA->vlen; const Cds **cds = (const Cds **) cdsA->cds; - const Cds *cdsi, *cdsj; + const Cds *cdsi = NULL, *cdsj = NULL; const double *w = (const double *) cdsA->w; @@ -678,13 +459,13 @@ { cdsi = cds[i]; - if (cdsi->o[k] > 0) + if (cdsi->nu[k]) { for (j = 0; j < i; ++j) { cdsj = cds[j]; - if (cdsj->o[k] > 0) + if (cdsj->nu[k]) { x = SqrCdsDist(cdsi, k, cdsj, k); //scd += x; @@ -704,12 +485,11 @@ // printf("\npaRMSD = %8.3e\n", sqrt(paRMSD)); paRMSD = sqrdist / wtsum; // printf("\npaRMSD = %8.3e\n", sqrt(paRMSD)); - //pawRMSD = (2.0 * wsqrdist) / (double) (vlen * cnum * (cnum - 1)); pawRMSD = wsqrdist / wtsum; - cdsA->stats->ave_pawRMSD = sqrt(pawRMSD); - cdsA->stats->ave_paRMSD = sqrt(paRMSD); + stats->ave_pawRMSD = sqrt(pawRMSD); + stats->ave_paRMSD = sqrt(paRMSD); - return (cdsA->stats->ave_paRMSD); + return (stats->ave_paRMSD); } @@ -722,42 +502,35 @@ { int i, vlen = cdsA->vlen; double *variance = cdsA->var; - Algorithm *algo = cdsA->algo; + double *evals = cdsA->evals; - if (algo->covweight == 1) + if (algo->covweight) { - cdsA->stats->mlRMSD = sqrt(cdsA->stats->wtnorm); + double det = 0.0; + for (i = 0; i < vlen; ++i) + det += log(evals[i]); + det /= vlen; + stats->mlRMSD = sqrt(exp(det)); } - else if (algo->varweight == 1) + else if (algo->varweight) { - cdsA->stats->mlRMSD = 0.0; + stats->mlRMSD = 0.0; for (i = 0; i < vlen; ++i) - cdsA->stats->mlRMSD += (1.0 / variance[i]); + stats->mlRMSD += (1.0 / variance[i]); - cdsA->stats->mlRMSD = sqrt(vlen / cdsA->stats->mlRMSD); - -/* double *newvar = malloc(vlen * sizeof(double));; */ -/* memcpy(newvar, variance, vlen * sizeof(double)); */ -/* qsort(newvar, vlen, sizeof(double), dblcmp); */ -/* */ -/* cdsA->stats->mlRMSD = 0.0; */ -/* for (i = 1; i < vlen; ++i) */ -/* cdsA->stats->mlRMSD += log(newvar[i]); */ -/* */ -/* cdsA->stats->mlRMSD = exp(3.0 * cdsA->stats->mlRMSD / (vlen-1)); */ -/* free(newvar); */ + stats->mlRMSD = sqrt(vlen / stats->mlRMSD); } else { - cdsA->stats->mlRMSD = 0.0; + stats->mlRMSD = 0.0; for (i = 0; i < vlen; ++i) - cdsA->stats->mlRMSD += variance[i]; + stats->mlRMSD += variance[i]; - cdsA->stats->mlRMSD = sqrt(cdsA->stats->mlRMSD / vlen); + stats->mlRMSD = sqrt(stats->mlRMSD / vlen); } - return(cdsA->stats->mlRMSD); + return(stats->mlRMSD); } @@ -775,32 +548,6 @@ } -/* double */ -/* SqrCdsDistMahal(const Cds *cds1, const int atom1, */ -/* const Cds *cds2, const int atom2, */ -/* const double *weights) */ -/* { */ -/* double xdist, ydist, zdist; */ -/* */ -/* xdist = weights[0] * mysquare(cds2->x[atom2] - cds1->x[atom1]); */ -/* ydist = weights[1] * mysquare(cds2->y[atom2] - cds1->y[atom1]); */ -/* zdist = weights[2] * mysquare(cds2->z[atom2] - cds1->z[atom1]); */ -/* */ -/* return(xdist + ydist + zdist); */ -/* } */ - - -/* double */ -/* SqrCdsDistMahal(const Cds *cds1, const int atom1, */ -/* const Cds *cds2, const int atom2, */ -/* const double *weights) */ -/* { */ -/* return(weights[0] * mysquare(cds2->x[atom2] - cds1->x[atom1]) + */ -/* weights[1] * mysquare(cds2->y[atom2] - cds1->y[atom1]) + */ -/* weights[2] * mysquare(cds2->z[atom2] - cds1->z[atom1])); */ -/* } */ - - double SqrCdsDistMahal2(const Cds *cds1, const int atom1, const Cds *cds2, const int atom2, @@ -812,79 +559,7 @@ } -double -SqrPDBCdsDist(PDBCds *cds1, int atom1, PDBCds *cds2, int atom2) -{ - double xdist, ydist, zdist; - - xdist = cds2->x[atom2] - cds1->x[atom1]; - ydist = cds2->y[atom2] - cds1->y[atom1]; - zdist = cds2->z[atom2] - cds1->z[atom1]; - - return(xdist * xdist + ydist * ydist + zdist * zdist); -} - - -double -CdsDist(Cds *cds1, int atom1, Cds *cds2, int atom2) -{ - double dist; - double xdist, ydist, zdist; - double xx, yy, zz; - double sum; - - xdist = cds2->x[atom2] - cds1->x[atom1]; - ydist = cds2->y[atom2] - cds1->y[atom1]; - zdist = cds2->z[atom2] - cds1->z[atom1]; - - xx = xdist * xdist; - yy = ydist * ydist; - zz = zdist * zdist; - - sum = xx + yy + zz; - dist = sqrt(sum); - - return(dist); -} - - -double -VecMag(const double *vec) -{ - double dist; - double xx, yy, zz; - double sum; - - xx = vec[0] * vec[0]; - yy = vec[1] * vec[1]; - zz = vec[2] * vec[2]; - - sum = xx + yy + zz; - dist = sqrt(sum); - - return(dist); -} - - -double -CoordMag(const Cds *cds, const int vec) -{ - double dist; - double xx, yy, zz; - double sum; - - xx = cds->x[vec] * cds->x[vec]; - yy = cds->y[vec] * cds->y[vec]; - zz = cds->z[vec] * cds->z[vec]; - - sum = xx + yy + zz; - dist = sqrt(sum); - - return(dist); -} - - -double +static double SqrCoordMag(const Cds *cds, const int vec) { double xx, yy, zz; @@ -900,23 +575,7 @@ } -double -CoordMult(const Cds *cds1, const Cds *cds2, const int vec) -{ - double xx, yy, zz; - double mag; - - xx = cds1->x[vec] * cds2->x[vec]; - yy = cds1->y[vec] * cds2->y[vec]; - zz = cds1->z[vec] * cds2->z[vec]; - - mag = xx + yy + zz; - - return(mag); -} - - -double +static double RadiusGyration(Cds *cds, const double *weights) { double sum; @@ -932,196 +591,39 @@ } -double -TraceCds(const Cds *cds1, const Cds *cds2, const double *weights) +static void +WriteResiduals(CdsArray *cdsA, char *outfile_name) { - double sum; + FILE *residualfile = NULL; int i; - sum = 0.0; - for (i = 0; i < cds1->vlen; ++i) - sum += (weights[i] * CoordMult(cds1, cds2, i)); - sum /= cds1->vlen; + residualfile = myfopen(outfile_name, "w"); + if (residualfile == NULL) + { + perror("\n ERROR"); + fprintf(stderr, + "\n ERROR99: could not open file '%s'. \n", outfile_name); + PrintTheseusTag(); + exit(EXIT_FAILURE); + } + + for (i = 0; i < cdsA->cnum * cdsA->vlen * 3; ++i) + { + fprintf(residualfile, + "%-3d %12.6f\n", + i+1, + cdsA->residuals[i]); + } + fputc('\n', residualfile); - return(sqrt(sum)); + fclose(residualfile); } -/* calculate all weighted residuals */ -void -CalcResiduals(CdsArray *cdsA) +static void +MomentsCds(CdsArray *cdsA) { - int i, j; - const int cnum = cdsA->cnum, vlen = cdsA->vlen; - const double *w = (const double *) cdsA->w; - double *sqrtw = malloc(cdsA->vlen * sizeof(double)); - double weight; - const double *avex = (const double *) cdsA->avecds->x, - *avey = (const double *) cdsA->avecds->y, - *avez = (const double *) cdsA->avecds->z; - Cds *cds; - - for (j = 0; j < vlen; ++j) - sqrtw[j] = sqrt(w[j]); - - for (i = 0; i < cnum; ++i) - { - for (j = 0; j < vlen; ++j) - { - weight = /* sqrtw[j] */ 1.0; - cds = cdsA->cds[i]; - cds->residual_x[j] = weight * (cds->x[j] - avex[j]); - cds->residual_y[j] = weight * (cds->y[j] - avey[j]); - cds->residual_z[j] = weight * (cds->z[j] - avez[j]); - } - } - - free(sqrtw); - /* StudentizeResiduals(cdsA); */ -} - - -void -StudentizeResiduals(CdsArray *cdsA) -{ - int i, j; - double sum, h, tmp; - const double ninv = 1.0 / cdsA->vlen; - const int cnum = cdsA->cnum, vlen = cdsA->vlen; - const Cds **cds = (const Cds **) cdsA->cds; - const double *var = (const double *) cdsA->var; - const double *avex = (const double *) cdsA->avecds->x, - *avey = (const double *) cdsA->avecds->y, - *avez = (const double *) cdsA->avecds->z; - - for (i = 0; i < cnum; ++i) - { - for (j = 0; j < vlen; ++j) - { - cds[i]->residual_x[j] = (avex[j] - cds[i]->x[j]); - cds[i]->residual_y[j] = (avey[j] - cds[i]->y[j]); - cds[i]->residual_z[j] = (avez[j] - cds[i]->z[j]); - } - } - - sum = 0.0; - for (i = 0; i < cnum; ++i) - { - for (j = 0; j < vlen; ++j) - { - sum += mysquare(cds[i]->residual_x[j]); - sum += mysquare(cds[i]->residual_y[j]); - sum += mysquare(cds[i]->residual_z[j]); - } - } - sum /= 3.0; - - for (i = 0; i < cnum; ++i) - { - for (j = 0; j < vlen; ++j) - { - h = ninv + mysquare(cds[i]->residual_x[j]) / sum; - tmp = var[j] * (1.0 - h); - cds[i]->residual_x[j] /= tmp; - cds[i]->residual_y[j] /= tmp; - cds[i]->residual_z[j] /= tmp; - } - } -} - - -void -PrintResiduals(CdsArray *cdsA) -{ - int i, j; - const Cds **cds = (const Cds **) cdsA->cds; - - putchar('\n'); - for (i = 0; i < cdsA->cnum; ++i) - { - for (j = 0; j < cdsA->vlen; ++j) - { - fprintf(stderr, "\n%-3d %12.6f %12.6f %12.6f", - j+1, - cds[i]->residual_x[j], - cds[i]->residual_y[j], - cds[i]->residual_z[j]); - } - } -} - - -void -WriteResiduals(CdsArray *cdsA, char *outfile_name) -{ - FILE *residualfile = NULL; - int i; - - residualfile = myfopen(outfile_name, "w"); - if (residualfile == NULL) - { - perror("\n ERROR"); - fprintf(stderr, - "\n ERROR99: could not open file '%s'. \n", outfile_name); - PrintTheseusTag(); - exit(EXIT_FAILURE); - } - - for (i = 0; i < cdsA->cnum * cdsA->vlen * 3; ++i) - { - fprintf(residualfile, - "%-3d %12.6f\n", - i+1, - cdsA->residuals[i]); - } - fputc('\n', residualfile); - - fclose(residualfile); -} - - -double -Durbin_Watson(CdsArray *cdsA) -{ - int i, j, jm; - double sumn, sumd; - Cds *cds; - - sumn = 0.0; - for (i = 0; i < cdsA->cnum; ++i) - { - for (j = 1; j < cdsA->vlen; ++j) - { - cds = cdsA->cds[i]; - jm = j-1; - sumn += mysquare(cds->residual_x[j] - cds->residual_x[jm]); - sumn += mysquare(cds->residual_y[j] - cds->residual_y[jm]); - sumn += mysquare(cds->residual_z[j] - cds->residual_z[jm]); - } - } - - sumd = 0.0; - for (i = 0; i < cdsA->cnum; ++i) - { - for (j = 0; j < cdsA->vlen; ++j) - { - cds = cdsA->cds[i]; - sumd += mysquare(cds->residual_x[j]); - sumd += mysquare(cds->residual_y[j]); - sumd += mysquare(cds->residual_z[j]); - } - } - - cdsA->stats->dw = sumn / sumd; - - return(cdsA->stats->dw); -} - - -void -MomentsCds(CdsArray *cdsA) -{ - double ave, median, adev, mdev, sdev, var, skew, kurt, hrange, lrange; + double ave, median, adev, mdev, sdev, var, skew, kurt, hrange, lrange; moments((const double *) cdsA->residuals, cdsA->cnum * cdsA->vlen * 3, /* data array and length */ @@ -1131,217 +633,8 @@ &skew, &kurt, /* skewness and kurtosis */ &hrange, &lrange); /* range of data, high and low */ - cdsA->stats->skewness[3] = skew; - cdsA->stats->kurtosis[3] = kurt; -} - - -void -SkewnessCds(CdsArray *cdsA) -{ - int i; - double skew; - - skew = 0.0; - for (i = 0; i < cdsA->cnum * cdsA->vlen * 3; ++i) - skew += mycube(cdsA->residuals[i]); - - cdsA->stats->skewness[3] = skew / (cdsA->cnum * cdsA->vlen * 3); -} - - -/* find the multivariate normal skewness - Mardia, K.V. (1970) "Measures of multivariate skewness and kurtosis with applications." - Biometrika 57, 519-530. -*/ - - -/* takes a dataset and finds the kurtosis */ -void -KurtosisCds(CdsArray *cdsA) -{ - int i; - double kurt; - - kurt = 0.0; - for (i = 0; i < cdsA->cnum * cdsA->vlen * 3; ++i) - kurt += mypow4(cdsA->residuals[i]); - - cdsA->stats->kurtosis[3] = kurt / (cdsA->cnum * cdsA->vlen * 3) - 3.0; -} - - -double -CoordxMatxCoord(const double *v1, const double *v2, const double **sigma) -{ - int j, k; - double vm[3] = {0.0, 0.0, 0.0}; - double val; - - for (j = 0; j < 3; ++j) - { - vm[j] = 0.0; - for (k = 0; k < 3; ++k) - vm[j] += (v1[k] * sigma[k][j]); - } - - val = 0.0; - for (j = 0; j < 3; ++j) - val += (vm[j] * v2[j]); - - /* printf("\n %f", val); */ - - return(val); -} - - -double -CalcANOVAF(CdsArray *cdsA) -{ - int i, j; - double *array1, *array2; - long unsigned int signsum; - int wilcoxZplus, mean, sigma; - CdsArray *anova; - - anova = CdsArrayInit(); - CdsArrayAlloc(anova, (cdsA->cnum * 2), cdsA->vlen); - anova->algo->method = cdsA->algo->method; - anova->algo->writestats = 0; - - for (i = 0; i < cdsA->cnum; ++i) - { - for (j = 0; j < cdsA->vlen; ++j) - { - anova->cds[i]->x[j] = cdsA->cds[i]->x[j]; - anova->cds[i]->y[j] = cdsA->cds[i]->y[j]; - anova->cds[i]->z[j] = cdsA->cds[i]->z[j]; - } - } - - for (i = 0; i < cdsA->cnum; ++i) - { - for (j = 0; j < cdsA->vlen; ++j) - { - anova->cds[i + cdsA->cnum]->x[j] = -(cdsA->cds[i]->x[j]); - anova->cds[i + cdsA->cnum]->y[j] = cdsA->cds[i]->y[j]; - anova->cds[i + cdsA->cnum]->z[j] = cdsA->cds[i]->z[j]; - } - } - - /*for (i = 0; i < anova->cnum; ++i) - PrintCds(anova->cds[i]); - fflush(NULL);*/ - - AveCds(anova); - cdsA->algo->rounds = MultiPose(anova); - - /* Kolmogorov-Smirnov distribution comparison */ - array1 = malloc((cdsA->vlen+1) * sizeof(double)); - array2 = malloc((cdsA->vlen+1) * sizeof(double)); - - memcpy(array1, anova->var, cdsA->vlen * sizeof(double)); - memcpy(array2, cdsA->var, cdsA->vlen * sizeof(double)); - -/* for (i = 0; i < cdsA->vlen; ++i) */ -/* { */ -/* array1[i] = anova->var[i]; */ -/* array2[i] = cdsA->var[i]; */ -/* } */ - - cdsA->stats->KSp = kstwo(array1, cdsA->vlen, array2, cdsA->vlen) / 2.0; - - /* one-tailed, paired sign-test distribution comparison */ - signsum = 0.0; - for (i = 0; i < cdsA->vlen; ++i) - { - if (anova->var[i] > cdsA->var[i]) - ++signsum; - } - - cdsA->stats->signp = Binomial_sum((long unsigned int) cdsA->vlen, signsum, 0.5); - - /* one-tailed, Wilcoxon ranked sign test */ - for (i = 0; i < cdsA->vlen; ++i) - { - array2[i] = anova->var[i] - cdsA->var[i]; - array1[i] = fabs(array1[i]); - } - - array1[cdsA->vlen] = array2[cdsA->vlen] = DBL_MAX; - quicksort2d(array1, array2, cdsA->vlen); - - wilcoxZplus = 0; - for (i = 0; i < cdsA->vlen; ++i) - { - if(array2[i] > 0.0) - wilcoxZplus += i; - } - - sigma = sqrt((double)cdsA->vlen * ((double)cdsA->vlen + 1) * (2.0 * (double)cdsA->vlen + 1) / 24.0); - mean = (double)cdsA->vlen * ((double)cdsA->vlen + 1) / 4.0; - cdsA->stats->wilcoxonp = normal_pdf((double)wilcoxZplus, mean, mysquare(sigma)); - - cdsA->stats->anova_RMSD = anova->stats->wRMSD_from_mean; - - CalcLogL(anova); - CalcAIC(anova); - - cdsA->stats->anova_logL = anova->stats->logL; - cdsA->stats->anova_AIC = anova->stats->AIC; - - CdsArrayDestroy(&anova); - free(array1); - free(array2); - - return(cdsA->stats->refl_RMSD); -} - - -void -CalcNormResidualsOld(CdsArray *cdsA) -{ - int i, j, k, m; - double logL, rootv; - const int cnum = cdsA->cnum, vlen = cdsA->vlen; - const double *var = (const double *) cdsA->var; - const Cds **cds = (const Cds **) cdsA->cds; - const Cds *cdsm; - const double *avex = (const double *) cdsA->avecds->x, - *avey = (const double *) cdsA->avecds->y, - *avez = (const double *) cdsA->avecds->z; - double *invvar = malloc(vlen * sizeof(double)); - double *normresid = NULL; - - if (cdsA->residuals == NULL) - cdsA->residuals = calloc(vlen * 3 * cnum, sizeof(double)); - - normresid = cdsA->residuals; - - for (i = 0; i < vlen; ++i) - invvar[i] = 1.0 / var[i]; - - /* memset(&SumMat[0][0], 0, 9 * sizeof(double)); */ - j = 0; - - for (k = 0; k < vlen; ++k) - { - rootv = sqrt(invvar[k]); - for (m = 0; m < cnum; ++m) - { - cdsm = (Cds *) cds[m]; - normresid[j] = (cdsm->x[k] - avex[k]) * rootv; - ++j; - normresid[j] = (cdsm->y[k] - avey[k]) * rootv; - ++j; - normresid[j] = (cdsm->z[k] - avez[k]) * rootv; - ++j; - } - } - - cdsA->stats->chi2 = chi_sqr_adapt(normresid, vlen * 3 * cnum, 0, &logL, 0.0, 1.0, normal_pdf, normal_lnpdf, normal_int); - - free(invvar); + stats->skewness[3] = skew; + stats->kurtosis[3] = kurt; } @@ -1352,8 +645,8 @@ double logL, rootv; const int cnum = cdsA->cnum, vlen = cdsA->vlen; const double *var = (const double *) cdsA->var; - const Cds **cds = (const Cds **) cdsA->cds; - const Cds *cdsm; + const Cds **cds = (const Cds **) cdsA->cds; + const Cds *cdsm = NULL; const double *avex = (const double *) cdsA->avecds->x, *avey = (const double *) cdsA->avecds->y, *avez = (const double *) cdsA->avecds->z; @@ -1376,7 +669,7 @@ { cdsm = (Cds *) cds[m]; - if (cdsm->o[k] == 1) + if (cdsm->nu[k]) { normresid[j] = (cdsm->x[k] - avex[k]) * rootv; ++j; @@ -1390,8 +683,8 @@ //VecPrint(normresid, j-1); exit(1); - cdsA->stats->chi2 = chi_sqr_adapt(normresid, j-1, 0, &logL, 0.0, 1.0, normal_pdf, normal_lnpdf, normal_int); -// printf("\nchi^2: %f\n", cdsA->stats->chi2); + stats->chi2 = chi_sqr_adapt(normresid, j-1, 0, &logL, 0.0, 1.0, normal_pdf, normal_lnpdf, normal_int); +// printf("\nchi^2: %f\n", stats->chi2); } @@ -1402,8 +695,8 @@ double logL, avevar; const int cnum = cdsA->cnum, vlen = cdsA->vlen; const double *var = (const double *) cdsA->var; - const Cds **cds = (const Cds **) cdsA->cds; - const Cds *cdsm; + const Cds **cds = (const Cds **) cdsA->cds; + const Cds *cdsm = NULL; const double *avex = (const double *) cdsA->avecds->x, *avey = (const double *) cdsA->avecds->y, *avez = (const double *) cdsA->avecds->z; @@ -1418,7 +711,7 @@ for (m = 0; m < vlen; ++m) avevar += var[m]; avevar /= vlen; -// printf("\n##### %f %f", avevar, cdsA->stats->stddev*cdsA->stats->stddev); fflush(NULL); +// printf("\n##### %f %f", avevar, stats->stddev*stats->stddev); fflush(NULL); j = 0; for (k = 0; k < vlen; ++k) @@ -1426,8 +719,8 @@ for (m = 0; m < cnum; ++m) { cdsm = (Cds *) cds[m]; - // printf("\n%4d %4d %f", k, m, cdsm->o[k]); fflush(NULL); - if (cdsm->o[k] == 1) + // printf("\n%4d %4d %d", k, m, cdsm->nu[k]); fflush(NULL); + if (cdsm->nu[k]) { normresid[j] = (cdsm->x[k] - avex[k]); ++j; @@ -1442,551 +735,150 @@ //VecPrint(normresid, j-1); exit(1); - cdsA->stats->chi2 = chi_sqr_adapt(normresid, j-1, 0, &logL, 0.0, 1.0, normal_pdf, normal_lnpdf, normal_int); -// printf("\nchi^2: %f\n", cdsA->stats->chi2); + stats->chi2 = chi_sqr_adapt(normresid, j-1, 0, &logL, 0.0, 1.0, normal_pdf, normal_lnpdf, normal_int); +// printf("\nchi^2: %f\n", stats->chi2); } double FrobTerm(CdsArray *cdsA) { - int i; - double trace; - const int cnum = cdsA->cnum, vlen = cdsA->vlen; - const int len = vlen * 3 * cnum; - double *residuals = cdsA->residuals; - - trace = 0.0; - for (i = 0; i < len; ++i) - trace += residuals[i] * residuals[i]; - - //cdsA->stats->chi2 = chi_sqr_adapt(residuals, len, 0, &logL, 0.0, 1.0, normal_pdf, normal_lnpdf, normal_int); - - return(-0.5 * trace); -} - - -static double -FrobTermDiag(CdsArray *cdsA) -{ - int k, m;; + int k, m; const int cnum = cdsA->cnum, vlen = cdsA->vlen; - const double *var = (const double *) cdsA->var; - const Cds **cds = (const Cds **) cdsA->cds; - const Cds *cdsm = NULL; - const double *avex = (const double *) cdsA->avecds->x, - *avey = (const double *) cdsA->avecds->y, - *avez = (const double *) cdsA->avecds->z; - double fterm, tmpx, tmpy, tmpz; - double *newvar = malloc(vlen * sizeof(double)); - - memcpy(newvar, cdsA->var, vlen * sizeof(double)); - qsort(newvar, vlen, sizeof(double), dblcmp_rev); - - fterm = 0.0; - for (k = 0; k < vlen; ++k) - { - if (var[k] != newvar[vlen-1] && - var[k] != newvar[vlen-2] && - var[k] != newvar[vlen-3]) - { - for (m = 0; m < cnum; ++m) - { - cdsm = (Cds *) cds[m]; - - tmpx = cdsm->x[k] - avex[k]; - tmpy = cdsm->y[k] - avey[k]; - tmpz = cdsm->z[k] - avez[k]; - - fterm += (tmpx*tmpx + tmpy*tmpy + tmpz*tmpz) / var[k]; - } - } - } - - free(newvar); - - return(fterm); -} - - -double -FrobTerm2(CdsArray *cdsA) -{ - int i, j, k, m; - double trace; - const int cnum = cdsA->cnum, vlen = cdsA->vlen; - const Cds **cds = (const Cds **) cdsA->cds; - const Cds *cdsi; - const double *avex = (const double *) cdsA->avecds->x, - *avey = (const double *) cdsA->avecds->y, - *avez = (const double *) cdsA->avecds->z; - double **ErrMat = MatAlloc(vlen, 3); - double **TmpMat = MatAlloc(3, vlen); - double **SumMat = MatAlloc(3, 3); - double **InvCovMat = MatAlloc(vlen, vlen); - - memset(&SumMat[0][0], 0, 9 * sizeof(double)); - - //pseudoinv_sym(cdsA->CovMat, InvCovMat, vlen, DBL_MIN); - PseudoinvSymGSL(cdsA->CovMat, InvCovMat, vlen, DBL_MIN); - - for (m = 0; m < cnum; ++m) - { - for (i = 0; i < vlen; ++i) - { - cdsi = (Cds *) cds[m]; - ErrMat[i][0] = cdsi->x[i] - avex[i]; - ErrMat[i][1] = cdsi->y[i] - avey[i]; - ErrMat[i][2] = cdsi->z[i] - avez[i]; - } - - /* (i x k)(k x j) = (i x j) */ - for (i = 0; i < 3; ++i) - { - for (j = 0; j < vlen; ++j) - { - TmpMat[i][j] = 0.0; - for (k = 0; k < vlen; ++k) - TmpMat[i][j] += ErrMat[k][i] * InvCovMat[k][j]; - } - } - - for (i = 0; i < 3; ++i) - { - for (j = 0; j < 3; ++j) - { - for (k = 0; k < vlen; ++k) - SumMat[i][j] += TmpMat[i][k] * ErrMat[k][j]; - } - } - } - - trace = SumMat[0][0] + SumMat[1][1] + SumMat[2][2]; - - MatDestroy(&ErrMat); - MatDestroy(&SumMat); - MatDestroy(&TmpMat); - MatDestroy(&InvCovMat); - - return(-0.5 * trace); -} - - -double -CalcHierarchLogL(CdsArray *cdsA) -{ - Algorithm *algo = cdsA->algo; - Statistics *stats = cdsA->stats; - const int vlen = cdsA->vlen, cnum = cdsA->cnum; - const int nd = cnum * 3; - double logL; - - switch(algo->hierarch) - { - case 0: - { - return(0.0); - break; - } - - case 1: - case 2: - case 7: - { - if (algo->varweight != 0) - { - double *newvar = malloc(vlen * sizeof(double)); - double b, c, xn1; - - b = stats->hierarch_p1; - c = stats->hierarch_p2; - - memcpy(newvar, cdsA->var, vlen * sizeof(double)); - qsort(newvar, vlen, sizeof(double), dblcmp_rev); - /* qsort-dblcmp_rev sorts big to small */ - xn1 = newvar[vlen-4]; - - logL = invgamma_logL(newvar, vlen-3, b, c); - //- b * ExpInvXn(xn1, b, c) - (1+c)*ExpLogXn(xn1, b, c) - //-c * log(b) - lgamma(c) - //+ log(invgamma_cdf(xn1, b, c)); - - free(newvar); - - return(logL); - /* return(dist_logL(invgamma_lnpdf, stats->hierarch_p1, stats->hierarch_p2, cdsA->var, cdsA->vlen - 3)); */ - /* invgamma_fit(newvar, cdsA->vlen - 3, &stats->hierarch_p1, &stats->hierarch_p12, &logL); */ - /* return(cdsA->vlen * invgamma_logL(stats->hierarch_p1, stats->hierarch_p2)); */ - } - else if (cdsA->algo->covweight != 0) - { - double **evecs = cdsA->tmpmatKK2; - int newlen; - - if (vlen - 3 < nd - 6) - newlen = vlen - 3; - else - newlen = nd - 6; - - eigenvalsym((const double **) cdsA->CovMat, cdsA->var, evecs, vlen); - logL = invgamma_logL(cdsA->var + vlen - newlen, newlen, stats->hierarch_p1, stats->hierarch_p2); - - return(logL); - } - - break; - } - - case 4: /* ML fit of variances to an inverse gamma distribution - 2 param */ - { - if (algo->varweight != 0) - { - double *newvar = malloc(vlen * sizeof(double)); - - memcpy(newvar, cdsA->var, vlen * sizeof(double)); - qsort(newvar, vlen, sizeof(double), dblcmp_rev); - logL = invgamma_logL(newvar, vlen - 3, stats->hierarch_p1, stats->hierarch_p2); - free(newvar); - - return(logL); - /* return(dist_logL(invgamma_lnpdf, stats->hierarch_p1, stats->hierarch_p2, cdsA->var, cdsA->vlen - 3)); */ - /* invgamma_fit(newvar, cdsA->vlen - 3, &stats->hierarch_p1, &stats->hierarch_p12, &logL); */ - /* return(cdsA->vlen * invgamma_logL(stats->hierarch_p1, stats->hierarch_p2)); */ - } - else if (cdsA->algo->covweight != 0) - { - double **evecs = cdsA->tmpmatKK2; - int newlen; - - if (vlen - 3 < nd - 6) - newlen = vlen - 3; - else - newlen = nd - 6; - - eigenvalsym((const double **) cdsA->CovMat, cdsA->var, evecs, vlen); - logL = invgamma_logL(cdsA->var + vlen - newlen, newlen, stats->hierarch_p1, stats->hierarch_p2); - - return(logL); - } - - break; - } - - case 3: - case 5: - case 6: - case 8: - case 9: - case 10: - case 11: - case 12: - case 13: - case 14: - case 15: - case 16: - case 17: - case 18: - case 19: - { - return(invgamma_logL(cdsA->var, cdsA->vlen, stats->hierarch_p1, stats->hierarch_p2)); - break; - } - -// case 8: /* Reciprocal Inverse Gaussian */ /* DLT debug */ -// /* return(dist_logL(recinvgauss_lnpdf, stats->hierarch_p1, stats->hierarch_p2, cdsA->var, cdsA->vlen)); */ -// break; - - // case 9: /* Lognormal */ - // return(cdsA->vlen * lognormal_logL(stats->hierarch_p1, stats->hierarch_p2)); - // break; - -// case 10: -// return(dist_logL(invgauss_lnpdf, stats->hierarch_p1, stats->hierarch_p2, cdsA->var, cdsA->vlen)); -// break; - - default: - { - printf("\n ERROR: Bad -g option \"%d\" \n", algo->hierarch); - Usage(0); - exit(EXIT_FAILURE); - break; - } - } - - return(0.0); -} - - -static double -CalcLogScaleJacob(CdsArray *cdsA) -{ - double scales; - int i; - - scales = 0.0; - for (i = 0; i < cdsA->cnum; ++i) - scales += log(cdsA->cds[i]->scale); - - return(3.0 * cdsA->vlen * scales); -} - - -/* Calculates the likelihood for a specified Gaussian model, given a - structural superposition. - - NOTA BENE: This function assumes that the variances, covariance matrices, - hierarchical model parameters, average coordinates, rotations, and - translations have all been pre-calculated. Even when not calculating the - optimal ML rotations and translation transformations, the other parameters - in general must be estimated iteratively, as described below. - - This is not nearly as trivial as it may first appear. For the dimensionally - weighted case, this involves an iterative ML estimate of the covariance - matrices, even when the atomic row-wise matrix is assumed to be diagonal or - proportional to the identity matrix. The way I do it, the superposition as a - whole is rotated to bring it into alignment with the principal axes of the - dimensional covariance matrix. Furthermore, the first term of the likelihood - equation (the Mahalonobius Frobenius matrix norm term) is normally equal to - NKD/2 at the maximum. However, when using shrinkage or hierarchical estimates - of the covariance matrices, this convenient simplification no longer holds, - and the double matrix-weighted Frobenius norm must be calcualted explicitly. -*/ -double -CalcLogL(CdsArray *cdsA) -{ - const int vlen = cdsA->vlen; - const double cnum = cdsA->cnum; - const double nk = cnum * vlen; - const double nd = cnum * 3.0; - const double ndk = nk * 3.0; - const double ndk2 = 0.5 * ndk; - const double *var = (const double *) cdsA->var; - double lndetrow , frobterm, igL, scales; - Algorithm *algo = cdsA->algo; - Statistics *stats = cdsA->stats; - int i; - - lndetrow = frobterm = igL = 0.0; - - if (algo->leastsquares == 1) - { - frobterm = FrobTerm(cdsA); - lndetrow = 2.0 * vlen * log(cdsA->stats->stddev); - } - else if (algo->varweight == 1) - { - if (algo->hierarch != 0) - { - double *newvar = malloc(vlen * sizeof(double)); - double xn1; - - memcpy(newvar, var, vlen * sizeof(double)); - qsort(newvar, vlen, sizeof(double), dblcmp_rev); - /* qsort-dblcmp_rev sorts big to small */ - - xn1 = newvar[vlen - 4]; - - lndetrow = 0.0; - for (i = 0; i < vlen-3; ++i) - lndetrow += log(newvar[i]); - - //lndetrow += ExpLogXn(stats->hierarch_p1, stats->hierarch_p2, xn1); - - frobterm = FrobTermDiag(cdsA); - igL = CalcHierarchLogL(cdsA); - - free(newvar); - } - else - { - lndetrow = 0.0; - for (i = 0; i < vlen; ++i) - lndetrow += log(var[i]); - - frobterm = -ndk2 /* FrobTerm(cdsA) */; - } - } - else if (algo->covweight == 1) - { - lndetrow = MatSymLnDet((const double **) cdsA->CovMat, vlen); - - if (algo->hierarch != 0) - { - frobterm = FrobTerm2(cdsA); - igL = CalcHierarchLogL(cdsA); - } - else - { - frobterm = -ndk2; - } - } - - if (algo->scale > 0) - scales = CalcLogScaleJacob(cdsA); - else - scales = 0.0; - - if (algo->verbose == 1) - { - printf("! scales frobterm -ndk2 igL lndetrow covs\n"); - printf("! % 12.4f % 12.4f % 12.4f % 12.4f % 12.4f % 12.4f\n", - scales, frobterm, -ndk2, igL, - 0.5 * nd * lndetrow, - - 0.5 * nd * lndetrow); - } - -/* printf("\n _>_>_>_>_>_>_>_>_> Frobterm: %f, -NDK/2: %f", FrobTerm(cdsA), -ndk2); */ - - stats->logL = scales - + frobterm - - ndk2 * log(2.0*MY_PI) - - 0.5 * nd * lndetrow - + igL; - - return(stats->logL); -} - - -static double ** -CalcCov(CdsArray *cdsA) -{ - double newx1, newy1, newz1, newx2, newy2, newz2; - double covsum; - double *cdskx, *cdsky, *cdskz; - int i, j, k; - const int cnum = cdsA->cnum; - const int vlen = cdsA->vlen; - const Cds **cds = (const Cds **) cdsA->cds; - const Cds *cdsk; - double **CovMat = MatAlloc(vlen, vlen); - const double *avex = (const double *) cdsA->avecds->x, - *avey = (const double *) cdsA->avecds->y, - *avez = (const double *) cdsA->avecds->z; - - - /* calculate covariance matrix of atoms across structures, - based upon current superposition, put in CovMat */ - for (i = 0; i < vlen; ++i) - { - for (j = 0; j <= i; ++j) - { - covsum = 0.0; - for (k = 0; k < cnum; ++k) - { - cdsk = cds[k]; - cdskx = cdsk->x; - cdsky = cdsk->y; - cdskz = cdsk->z; + const Cds **cds = (const Cds **) cdsA->cds; + const Cds *cdsm = NULL; + const double *avex = (const double *) cdsA->avecds->x, + *avey = (const double *) cdsA->avecds->y, + *avez = (const double *) cdsA->avecds->z; + double fterm, tmpx, tmpy, tmpz; - newx1 = cdskx[i] - avex[i]; - newy1 = cdsky[i] - avey[i]; - newz1 = cdskz[i] - avez[i]; - newx2 = cdskx[j] - avex[j]; - newy2 = cdsky[j] - avey[j]; - newz2 = cdskz[j] - avez[j]; + fterm = 0.0; + for (k = 0; k < vlen; ++k) + { + for (m = 0; m < cnum; ++m) + { + cdsm = (Cds *) cds[m]; - #ifdef FP_FAST_FMA - covsum += fma(newx1, newx2, fma(newy1, newy2, newz1 * newz2)); - #else - covsum += (newx1 * newx2 + newy1 * newy2 + newz1 * newz2); - #endif - } + tmpx = cdsm->x[k] - avex[k]; + tmpy = cdsm->y[k] - avey[k]; + tmpz = cdsm->z[k] - avez[k]; - CovMat[i][j] = CovMat[j][i] = covsum; /* sample variance, ML biased not n-1 definition */ + fterm += (tmpx*tmpx + tmpy*tmpy + tmpz*tmpz); } } - return(CovMat); + return(fterm / stats->var); } -static double * -CalcVar(CdsArray *cdsA) +double +FrobTermDiag(CdsArray *cdsA) { - double newx, newy, newz; - double varsum; - int i, k; - const int cnum = cdsA->cnum; - const int vlen = cdsA->vlen; - const Cds **cds = (const Cds **) cdsA->cds; - const Cds *cdsk; - double *var = malloc(vlen * sizeof(double)); + int k, m; + const int cnum = cdsA->cnum, vlen = cdsA->vlen; + const double *var = (const double *) cdsA->var; + const Cds **cds = (const Cds **) cdsA->cds; + const Cds *cdsm = NULL; const double *avex = (const double *) cdsA->avecds->x, *avey = (const double *) cdsA->avecds->y, *avez = (const double *) cdsA->avecds->z; + double fterm, tmpx, tmpy, tmpz, invvark; - AveCds(cdsA); - - /* calculate covariance matrix of atoms across structures, - based upon current superposition, put in CovMat */ - for (i = 0; i < vlen; ++i) + fterm = 0.0; + for (k = 0; k < vlen; ++k) { - varsum = 0.0; - for (k = 0; k < cnum; ++k) + invvark = 1.0 / var[k]; + for (m = 0; m < cnum; ++m) { - cdsk = cds[k]; + cdsm = (Cds *) cds[m]; - newx = cdsk->x[i] - avex[i]; - newy = cdsk->y[i] - avey[i]; - newz = cdsk->z[i] - avez[i]; + tmpx = cdsm->x[k] - avex[k]; + tmpy = cdsm->y[k] - avey[k]; + tmpz = cdsm->z[k] - avez[k]; - #ifdef FP_FAST_FMA - varsum += fma(newx, newx, fma(newy, newy, newz * newz)); - #else - varsum += (newx * newx + newy * newy + newz * newz); - #endif + fterm += (tmpx*tmpx + tmpy*tmpy + tmpz*tmpz) * invvark; } - - var[i] = varsum; /* sample variance, ML biased not n-1 definition */ } - return(var); + return(fterm); } double -CalcMgLogLCov(CdsArray *cdsA) +FrobTerm2(CdsArray *cdsA) { - const int vlen = cdsA->vlen; - const double cnum = cdsA->cnum; - double **CovMat = NULL; - double *eval = malloc(vlen * sizeof(double)); - double mglogl, term, lndet; - const double psi = cdsA->stats->hierarch_p1; - int i; + int i, j, k, m; + double trace; + const int cnum = cdsA->cnum, vlen = cdsA->vlen; + const Cds **cds = (const Cds **) cdsA->cds; + const Cds *cdsi = NULL; + const double *avex = (const double *) cdsA->avecds->x, + *avey = (const double *) cdsA->avecds->y, + *avez = (const double *) cdsA->avecds->z; + double **ErrMat = MatAlloc(vlen, 3); + double **TmpMat = MatAlloc(3, vlen); + double **SumMat = MatAlloc(3, 3); + double **InvCovMat = MatAlloc(vlen, vlen); - term = 0.5 * (3.0 * cnum - vlen + 2.0); + memset(&SumMat[0][0], 0, 9 * sizeof(double)); -/* printf("\nterm:%g\n", term); */ -/* fflush(NULL); */ + //pseudoinv_sym(cdsA->CovMat, InvCovMat, vlen, DBL_MIN); + PseudoinvSymGSL((const double **) cdsA->CovMat, InvCovMat, vlen, DBL_MIN); - mglogl = MultivarLnGamma(vlen, term) - - 3.0 * cnum * log(M_PI) - - 0.5 * vlen * (vlen - 1.0) * log(2.0) - + 0.5 * log(psi); + for (m = 0; m < cnum; ++m) + { + for (i = 0; i < vlen; ++i) + { + cdsi = (Cds *) cds[m]; + ErrMat[i][0] = cdsi->x[i] - avex[i]; + ErrMat[i][1] = cdsi->y[i] - avey[i]; + ErrMat[i][2] = cdsi->z[i] - avez[i]; + } -/* printf("\nmglogl:%g\n", mglogl); */ -/* fflush(NULL); */ + /* (i x k)(k x j) = (i x j) */ + for (i = 0; i < 3; ++i) + { + for (j = 0; j < vlen; ++j) + { + TmpMat[i][j] = 0.0; + for (k = 0; k < vlen; ++k) + TmpMat[i][j] += ErrMat[k][i] * InvCovMat[k][j]; + } + } - CovMat = CalcCov(cdsA); + for (i = 0; i < 3; ++i) + { + for (j = 0; j < 3; ++j) + { + for (k = 0; k < vlen; ++k) + SumMat[i][j] += TmpMat[i][k] * ErrMat[k][j]; + } + } + } - for (i = 0; i < vlen; ++i) - CovMat[i][i] += psi; + trace = SumMat[0][0] + SumMat[1][1] + SumMat[2][2]; - EigenvalsGSLDest(CovMat, vlen, eval); + MatDestroy(&ErrMat); + MatDestroy(&SumMat); + MatDestroy(&TmpMat); + MatDestroy(&InvCovMat); - lndet = 0.0; - for (i = 0; i < vlen; ++i) - lndet += log(eval[i]); + return(trace); +} - mglogl -= term * lndet; - cdsA->stats->mglogl = mglogl; +double +CalcLogScaleJacob(CdsArray *cdsA) +{ + double scales; + int i; - free(eval); - MatDestroy(&CovMat); + scales = 0.0; + for (i = 0; i < cdsA->cnum; ++i) + scales += log(cdsA->cds[i]->scale); - return(mglogl); + return(3.0 * cdsA->vlen * scales); } @@ -1994,39 +886,62 @@ CalcMgLogL(CdsArray *cdsA) { const int vlen = cdsA->vlen; - const double cnum = cdsA->cnum; - double *var3N = NULL; - double mglogl, term, lndet; - const double psi = cdsA->stats->hierarch_p1; + const int cnum = cdsA->cnum; + double *var3N = cdsA->samplevar3N; + double *evals = cdsA->evals; + double mglogl = 0.0, term, lndet, sum; + const double phi = 2.0*stats->hierarch_p1; + double nu = algo->covnu; int i; - term = 0.5 * (3.0 * cnum + 1.0); /* n = 1 corresponds to c = 0.5 for inverse gamma */ - mglogl = vlen * gsl_sf_lngamma(term) - - 1.5 * cnum * log(M_PI); + CalcVar(cdsA); + + if (algo->leastsquares) + { + term = 0.5 * (3.0 * vlen * cnum - 2.0); // with Jeffreys prior on phi this is just 3KN/2 + sum = 0.0; + for (i = 0; i < vlen; ++i) + sum += var3N[i]; -/* printf("\nmglogl:%g", mglogl); */ -/* fflush(NULL); */ + sum *= 0.5; - mglogl += 0.5 * vlen * log(psi); + mglogl = gsl_sf_lngamma(term) + - 0.5 * (3.0 * vlen * cnum) * log(2.0*M_PI) + -term * log(sum); + } + else if (algo->varweight) + { + term = 0.5 * (3.0 * cnum + 1.0); /* n = 1 corresponds to c = 0.5 for inverse gamma */ -/* printf("\npsi:%g (%g)", psi, 0.5 * vlen * log(psi)); */ -/* fflush(NULL); */ + lndet = 0.0; + for (i = 0; i < vlen; ++i) + lndet += log(var3N[i] + phi); - var3N = CalcVar(cdsA); + mglogl = vlen * gsl_sf_lngamma(term) + - term * vlen * log(M_PI) + + 0.5 * vlen * log(phi) + - term * lndet; - lndet = 0.0; - for (i = 0; i < vlen; ++i) - lndet += log(var3N[i] + psi); + //printf("mglogl: % 20.10f % 20.10f % 20.10e % 20.10f\n", term, lndet, phi, mglogl); + } + else if (algo->covweight > 0) + { + term = 0.5 * (3.0 * cnum + nu); -/* printf("\nlndet:%g (%g)\n\n", lndet, -term * lndet); */ -/* fflush(NULL); */ + mglogl = MultivarLnGamma(vlen, term) + - MultivarLnGamma(vlen, 0.5 * nu) + - 3.0 * cnum * vlen * log(M_PI) + + 0.5 * vlen * nu * log(phi); - mglogl -= term * lndet; + lndet = 0.0; + for (i = 0; i < vlen; ++i) + lndet += log(phi + 3.0 * cnum * evals[i]); - cdsA->stats->mglogl = mglogl; + mglogl -= term * lndet; + } - free(var3N); + stats->mlogL = mglogl; return(mglogl); } @@ -2036,7 +951,7 @@ double CalcParamNum(CdsArray *cdsA) { - Algorithm *algo = cdsA->algo; + const double vlen = cdsA->vlen; const double cnum = cdsA->cnum; double params; @@ -2044,13 +959,13 @@ params = 0.0; /* for the atomic covariances/variances */ - if (algo->leastsquares == 1) + if (algo->leastsquares) params += 1.0; - if (algo->varweight != 0) - params += vlen; +// if (algo->varweight) +// params += vlen; - if (algo->covweight != 0) + if (algo->covweight) params += vlen * (vlen + 1.0) / 2.0; /* for the hierarchical parameters */ @@ -2060,6 +975,8 @@ break; case 1: /* ML fit of variances to an inverse gamma distribution - 1 param */ + case 5: + case 6: params += 1.0; break; @@ -2067,48 +984,45 @@ case 2: case 3: case 4: - case 5: - case 6: case 7: case 8: case 9: case 10: case 11: - case 12: case 13: params += 2.0; break; } /* for the mean */ - if (algo->noave == 0) + if (algo->doave) params += 3.0 * vlen; /* translations */ - if (algo->notrans == 0) + if (algo->dotrans) params += 3.0 * cnum; /* rotations */ - if (algo->norot == 0) + if (algo->dorot) params += 3.0 * cnum; return(params); } -void +static void CalcAIC(CdsArray *cdsA) { double n, p; - cdsA->stats->nparams = p = CalcParamNum(cdsA); - cdsA->stats->ndata = n = 3.0 * cdsA->cnum * cdsA->vlen; + stats->nparams = p = CalcParamNum(cdsA); + stats->ndata = n = 3.0 * cdsA->cnum * cdsA->vlen; - cdsA->stats->AIC = cdsA->stats->logL - p * n / (n - p - 1); + stats->AIC = stats->mlogL - p * n / (n - p - 1); } -void +static void CalcBIC(CdsArray *cdsA) { double n, p; @@ -2116,7 +1030,106 @@ p = CalcParamNum(cdsA); n = 3.0 * cdsA->cnum * cdsA->vlen; - cdsA->stats->BIC = cdsA->stats->logL - (log(n) * p / 2.0); + stats->BIC = stats->mlogL - (log(n) * p / 2.0); +} + + +double ** +CalcCov(CdsArray *cdsA) +{ + double newx1, newy1, newz1, newx2, newy2, newz2; + double covsum; + double *cdskx = NULL, *cdsky = NULL, *cdskz = NULL; + int i, j, k; + const int cnum = cdsA->cnum; + const int vlen = cdsA->vlen; + const Cds **cds = (const Cds **) cdsA->cds; + const Cds *cdsk = NULL; + double **CovMat = MatAlloc(vlen, vlen); + const double *avex = (const double *) cdsA->avecds->x, + *avey = (const double *) cdsA->avecds->y, + *avez = (const double *) cdsA->avecds->z; + + + /* calculate covariance matrix of atoms across structures, + based upon current superposition, put in CovMat */ + for (i = 0; i < vlen; ++i) + { + for (j = 0; j <= i; ++j) + { + covsum = 0.0; + for (k = 0; k < cnum; ++k) + { + cdsk = cds[k]; + cdskx = cdsk->x; + cdsky = cdsk->y; + cdskz = cdsk->z; + + newx1 = cdskx[i] - avex[i]; + newy1 = cdsky[i] - avey[i]; + newz1 = cdskz[i] - avez[i]; + + newx2 = cdskx[j] - avex[j]; + newy2 = cdsky[j] - avey[j]; + newz2 = cdskz[j] - avez[j]; + + #ifdef FP_FAST_FMA + covsum += fma(newx1, newx2, fma(newy1, newy2, newz1 * newz2)); + #else + covsum += (newx1 * newx2 + newy1 * newy2 + newz1 * newz2); + #endif + } + + CovMat[i][j] = CovMat[j][i] = covsum; /* sample variance, ML biased not n-1 definition */ + } + } + + return(CovMat); +} + + +/* NB: Calculates var * 3 * N */ +void +CalcVar(CdsArray *cdsA) +{ + double newx, newy, newz; + double varsum, stddev; + int i, k; + const int cnum = cdsA->cnum; + const int vlen = cdsA->vlen; + const Cds **cds = (const Cds **) cdsA->cds; + const Cds *cdsk = NULL; + const double *avex = (const double *) cdsA->avecds->x, + *avey = (const double *) cdsA->avecds->y, + *avez = (const double *) cdsA->avecds->z; + + //if (algo->doave) + //AveCds(cdsA); + + stddev = 0.0; + for (i = 0; i < vlen; ++i) + { + varsum = 0.0; + for (k = 0; k < cnum; ++k) + { + cdsk = cds[k]; + + newx = cdsk->x[i] - avex[i]; + newy = cdsk->y[i] - avey[i]; + newz = cdsk->z[i] - avez[i]; + + #ifdef FP_FAST_FMA + varsum += fma(newx, newx, fma(newy, newy, newz * newz)); + #else + varsum += (newx * newx) + (newy * newy) + (newz * newz); + #endif + } + + cdsA->samplevar3N[i] = varsum; + stddev += varsum; + } + + stats->stddev = sqrt(stddev/(3.0*cnum*vlen)); } @@ -2199,7 +1212,7 @@ const int vlen = scratchA->vlen; int i; double term, trsiginv; - Cds *cds = scratchA->avecds; + Cds *cds = scratchA->avecds; trsiginv = 0.0; for (i = 0; i < vlen; ++i) @@ -2232,16 +1245,15 @@ void CalcStats(CdsArray *incdsA) { - int i; + int i, j; int tmph; double smallestRMSD, n; - Algorithm *algo = incdsA->algo; const int cnum = incdsA->cnum; const int vlen = incdsA->vlen; double *evals = malloc(3 * sizeof(double)); double **rotmat = MatAlloc(3, 3); double **lastmat = MatAlloc(3,3); - CdsArray *cdsA = NULL; + CdsArray *cdsA = NULL; // cdsA = CdsArrayInit(); // CdsArrayAlloc(cdsA, cnum, vlen); @@ -2250,7 +1262,7 @@ cdsA = incdsA; -// if (algo->covweight == 1) +// if (algo->covweight) // SetupCovWeighting(cdsA); // else @@ -2258,20 +1270,14 @@ if (cdsA->CovMat == NULL) cdsA->CovMat = MatAlloc(vlen, vlen); - //if (algo->alignment == 1) + //if (algo->alignment) CalcDf(cdsA); - if (algo->bfact > 0) - { - for (i = 0; i < cnum; ++i) - Bfacts2PrVars(cdsA, i); - } - - if (algo->noave == 0) + if (algo->doave) { - if (algo->alignment == 1) + if (algo->alignment) { - AveCdsOcc(cdsA); + AveCdsNu(cdsA); EM_MissingCds(cdsA); //printf("\n\nAveCds\n"); //PrintCds(scratchA->avecds); @@ -2281,29 +1287,36 @@ AveCds(cdsA); } - if (algo->mbias == 1) + if (algo->mbias) UnbiasMean(cdsA); } - CalcCovariances(cdsA); - /* CheckVars(cdsA); */ + CalcVar(cdsA); + + if (algo->docovars) + CalcCovariances(cdsA); + + if (algo->dohierarch) + { + if (algo->varweight || algo->covweight) + HierarchVars(cdsA); + } + CalcWts(cdsA); /* CheckVars(cdsA); */ - if (algo->leastsquares == 1) + if (algo->leastsquares) CalcNormResidualsLS(cdsA); else CalcNormResiduals(cdsA); - /* CheckVars(cdsA); */ - if (algo->write_file == 1) + if (algo->write_file) { char *residuals_name = mystrcat(algo->rootname, "_residuals.txt"); WriteResiduals(cdsA, residuals_name); free(residuals_name); } - CalcLogL(cdsA); CalcMgLogL(cdsA); CalcAIC(cdsA); CalcBIC(cdsA); @@ -2312,42 +1325,57 @@ Vars2Bfacts(cdsA); -/* SkewnessCds(cdsA); */ -/* KurtosisCds(cdsA); */ MomentsCds(cdsA); - cdsA->stats->omnibus_chi2 = (vlen * cdsA->stats->hierarch_chi2 + vlen * cnum * 3 * cdsA->stats->chi2) / (vlen * cnum * 3 + vlen); - cdsA->stats->omnibus_chi2_P = chisqr_sdf(vlen * cdsA->stats->hierarch_chi2 + vlen * cnum * 3 * cdsA->stats->chi2, - vlen * cnum * 3 + vlen, 0); + if (algo->leastsquares) + { + stats->omnibus_chi2 = stats->chi2; // DLT FIX + stats->omnibus_chi2_P = 1.0; // DLT FIX + } + else + { + stats->omnibus_chi2 = (vlen * stats->hierarch_chi2 + vlen * cnum * 3 * stats->chi2) / (vlen * cnum * 3 + vlen); + stats->omnibus_chi2_P = chisqr_sdf(vlen * stats->hierarch_chi2 + vlen * cnum * 3 * stats->chi2, + vlen * cnum * 3 + vlen, 0); + } n = (double) vlen * cnum * 3; - cdsA->stats->SES = sqrt((6.0 * n * (n-1)) / ((n-2) * (n+1) * (n+3))); /* exact formulas */ - cdsA->stats->SEK = sqrt((24.0 * n * (n-1) * (n-1)) / ((n-3) * (n-2) * (n+3) * (n+5))); + stats->SES = sqrt((6.0 * n * (n-1)) / ((n-2) * (n+1) * (n+3))); /* exact formulas */ + stats->SEK = sqrt((24.0 * n * (n-1) * (n-1)) / ((n-3) * (n-2) * (n+3) * (n+5))); - if (algo->write_file == 1) + if (algo->write_file) { char *variances_name = mystrcat(algo->rootname, "_variances.txt"); WriteVariance(cdsA, variances_name); free(variances_name); } - if (algo->covweight == 1 && (algo->write_file > 0 || algo->info != 0) && algo->pca == 0) + if (algo->covweight && (algo->write_file > 0 || algo->info) && algo->pca == 0) { -/* if (algo->alignment == 1) */ -/* CalcCovMatOcc(cdsA); */ +/* if (algo->alignment) */ +/* CalcCovMatNu(cdsA); */ /* else */ /* CalcCovMat(cdsA); */ char *cov_name = mystrcat(algo->rootname, "_cov.mat"); char *cor_name = mystrcat(algo->rootname, "_cor.mat"); + double nu = algo->covnu; + double fact = (3.0*cnum+nu)/(3.0*cnum+nu-vlen-1.0); + + for (i = 0; i < vlen; ++i) + for (j = 0; j < vlen; ++j) + cdsA->CovMat[i][j] *= fact; PrintCovMatGnuPlot((const double **) cdsA->CovMat, vlen, cov_name); + + write_C_mat((const double **) cdsA->CovMat, vlen, 4, 10); + CovMat2CorMat(cdsA->CovMat, vlen); PrintCovMatGnuPlot((const double **) cdsA->CovMat, vlen, cor_name); free(cov_name); free(cor_name); } - if (algo->fullpca == 1) + if (algo->fullpca) { printf(" Calculating anisotropic Principal Components of the superposition ... \n"); fflush(NULL); @@ -2363,41 +1391,26 @@ printf(" Calculating isotropic Principal Components of the superposition ... \n"); fflush(NULL); - if (algo->alignment == 1) - CalcCovMatOcc(cdsA); + if (algo->alignment) + CalcCovMatNu(cdsA); else CalcCovMat(cdsA); - tmph = cdsA->algo->hierarch; - -/* if (cdsA->algo->hierarch >= 1 && cdsA->algo->hierarch <= 8) */ /* DLT -- I don't understand why I was using this; do I need it? */ -/* cdsA->algo->hierarch = 7; */ -/* else */ -/* cdsA->algo->hierarch = 12; */ + tmph = algo->hierarch; HierarchVars(cdsA); - cdsA->algo->hierarch = tmph; + algo->hierarch = tmph; //#include "internmat.h" //memcpy(&cdsA->CovMat[0][0], &internmat[0][0], vlen * vlen * sizeof(double)); CalcPCA(cdsA); /* PCA analysis of covariance matrix */ } - if (algo->modelpca == 1) - { - printf(" Calculating Principal Components across models ... \n"); - fflush(NULL); - - CalcStructPCA(cdsA); - } - - if (algo->stats == 1) + if (algo->stats) { RadiusGyration(cdsA->avecds, cdsA->w); for (i = 0; i < cnum; ++i) RadiusGyration(cdsA->cds[i], cdsA->w); - - Durbin_Watson(cdsA); } printf(" Calculating likelihood statistics ... \n"); @@ -2409,16 +1422,13 @@ if (smallestRMSD > cdsA->cds[i]->wRMSD_from_mean) { smallestRMSD = cdsA->cds[i]->wRMSD_from_mean; - cdsA->stats->median = i; + stats->median = i; } } - // CopyStats(incdsA, cdsA); - free(evals); MatDestroy(&rotmat); MatDestroy(&lastmat); - // CdsArrayDestroy(&cdsA); } @@ -2427,10 +1437,9 @@ { CalcStats(cdsA); - cdsA->stats->starting_stddev = cdsA->stats->stddev; - cdsA->stats->starting_paRMSD = cdsA->stats->ave_paRMSD; - cdsA->stats->starting_pawRMSD = cdsA->stats->ave_pawRMSD; - cdsA->stats->starting_ave_wRMSD_from_mean = cdsA->stats->ave_pawRMSD * sqrt((double)(cdsA->cnum - 1) / (double)(2 * cdsA->cnum)); - cdsA->stats->starting_mlRMSD = cdsA->stats->mlRMSD; - cdsA->stats->starting_logL = cdsA->stats->logL; + stats->starting_stddev = stats->stddev; + stats->starting_paRMSD = stats->ave_paRMSD; + stats->starting_pawRMSD = stats->ave_pawRMSD; + stats->starting_mlRMSD = stats->mlRMSD; + stats->starting_logL = stats->logL; } diff -Nru theseus-2.0.6/pdbStats.h theseus-3.0.0/pdbStats.h --- theseus-2.0.6/pdbStats.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/pdbStats.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -29,13 +29,7 @@ #include "pdbMalloc.h" void -CalcStats(CdsArray *cdsA); - -void -CalcPreStats(CdsArray *cdsA); - -void -CopyStats(CdsArray *cdsA1, CdsArray *cdsA2); +CheckVars(CdsArray *cdsA); void CalcDf(CdsArray *cdsA); @@ -44,15 +38,12 @@ VarianceCds(CdsArray *cdsA); double -VarianceCdsOcc(CdsArray *cdsA); +VarianceCdsNu(CdsArray *cdsA); void WriteTransformations(CdsArray *cdsA, char *outfile_name); void -WriteVariance(CdsArray *cdsA, char *outfile_name); - -void Bfacts2PrVars(CdsArray *cdsA, int coord); double @@ -62,80 +53,25 @@ CalcMLRMSD(CdsArray *cdsA); double -SqrCdsDist(const Cds *cds1, const int atom1, const Cds *cds2, const int atom2); - -double -SqrCdsDistMahal(const Cds *cds1, const int atom1, - const Cds *cds2, const int atom2, - const double *weights); +SqrCdsDist(const Cds *cds1, const int atom1, + const Cds *cds2, const int atom2); double SqrCdsDistMahal2(const Cds *cds1, const int atom1, - const Cds *cds2, const int atom2, - const double weight); - -double -SqrPDBCdsDist(PDBCds *cds1, int atom1, PDBCds *cds2, int atom2); - -double -CdsDist(Cds *cds1, int atom1, Cds *cds2, int atom2); - -double -VecMag(const double *vec); - -double -CoordMag(const Cds *cds, const int vec); - -double -SqrCoordMag(const Cds *cds, const int vec); - -double -CoordMult(const Cds *cds1, const Cds *cds2, const int vec); - -double -RadiusGyration(Cds *cds, const double *weights); - -double -TraceCds(const Cds *cds1, const Cds *cds2, const double *weights); - -void -CalcResiduals(CdsArray *cdsA); + const Cds *cds2, const int atom2, + const double weight); void -StudentizeResiduals(CdsArray *cdsA); - -void -PrintResiduals(CdsArray *cdsA); - -void -WriteResiduals(CdsArray *cdsA, char *outfile_name); - -double -Durbin_Watson(CdsArray *cdsA); - -void -ExpectationVector(CdsArray *cdsA); - -void -MomentsCds(CdsArray *cdsA); - -void -SkewnessCds(CdsArray *cdsA); +CalcNormResiduals(CdsArray *cdsA); void -KurtosisCds(CdsArray *cdsA); - -double -CoordxMatxCoord(const double *v1, const double *v2, const double **sigma); - -double -CalcANOVAF(CdsArray *cdsA); +CalcNormResidualsLS(CdsArray *cdsA); double -CalcHierarchLogL(CdsArray *cdsA); +FrobTerm(CdsArray *cdsA); double -CalcLogL(CdsArray *cdsA); +CalcMgLogLCov(CdsArray *cdsA); double CalcMgLogL(CdsArray *cdsA); @@ -144,10 +80,7 @@ CalcParamNum(CdsArray *cdsA); void -CalcAIC(CdsArray *cdsA); - -void -CalcBIC(CdsArray *cdsA); +CalcVar(CdsArray *cdsA); double TrCdsInnerProd(Cds *cds, const int len); @@ -165,12 +98,9 @@ UnbiasMean(CdsArray *scratchA); void -CalcNormResiduals(CdsArray *cdsA); +CalcStats(CdsArray *incdsA); void -CalcNormResidualsLS(CdsArray *cdsA); - -double -FrobTerm(CdsArray *cdsA); +CalcPreStats(CdsArray *cdsA); #endif diff -Nru theseus-2.0.6/pdbStats_local.h theseus-3.0.0/pdbStats_local.h --- theseus-2.0.6/pdbStats_local.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/pdbStats_local.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -41,6 +41,7 @@ #include "distfit.h" #include "DLTmath.h" #include "pdbStats.h" +#include "myassert.h" +#include +#include -extern int -MultiPose(CdsArray *baseA); /* theseus.c */ Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/._pdbUtils.c and /tmp/g2bOMTRwaC/theseus-3.0.0/._pdbUtils.c differ diff -Nru theseus-2.0.6/pdbUtils.c theseus-3.0.0/pdbUtils.c --- theseus-2.0.6/pdbUtils.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/pdbUtils.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -41,7 +41,7 @@ void -CdsCopyXYZ(Cds *cds1, const Cds *cds2) +CdsCopyXYZ(Cds *cds1, const Cds *cds2) // DLT FIX MAT COORDS { memcpy(cds1->x, cds2->x, cds2->vlen * sizeof(double)); memcpy(cds1->y, cds2->y, cds2->vlen * sizeof(double)); @@ -50,7 +50,7 @@ void -CdsCopy(Cds *cds1, const Cds *cds2) +CdsCopy(Cds *cds1, const Cds *cds2) // DLT FIX MAT COORDS { strncpy(cds1->filename, cds2->filename, FILENAME_MAX-1); memcpy(cds1->resSeq, cds2->resSeq, cds2->vlen * sizeof(int)); @@ -61,54 +61,8 @@ } -/* copy cds, omitting the given atom */ void -CdsDelete(CdsArray *cdsA, const int omit) -{ - int i, j; - - if(omit == cdsA->vlen - 1) - { - cdsA->vlen--; - cdsA->avecds->vlen--; - - for (i = 0; i < cdsA->cnum; ++i) - cdsA->cds[i]->vlen--; - } - else - { - cdsA->vlen--; - cdsA->avecds->vlen--; - - for (i = 0; i < cdsA->cnum; ++i) - cdsA->cds[i]->vlen--; - - memmove(&cdsA->avecds->resSeq[omit], &cdsA->avecds->resSeq[omit+1], - (cdsA->vlen - omit) * sizeof(int)); - memmove(&cdsA->avecds->x[omit], &cdsA->avecds->x[omit+1], - (cdsA->vlen - omit) * sizeof(double)); - memmove(&cdsA->avecds->y[omit], &cdsA->avecds->y[omit+1], - (cdsA->vlen - omit) * sizeof(double)); - memmove(&cdsA->avecds->z[omit], &cdsA->avecds->z[omit+1], - (cdsA->vlen - omit) * sizeof(double)); - - for (j = 0; j < cdsA->cnum; ++j) - { - memmove(&cdsA->cds[j]->resSeq[omit], &cdsA->cds[j]->resSeq[omit+1], - (cdsA->vlen - omit) * sizeof(int)); - memmove(&cdsA->cds[j]->x[omit], &cdsA->cds[j]->x[omit+1], - (cdsA->vlen - omit) * sizeof(double)); - memmove(&cdsA->cds[j]->y[omit], &cdsA->cds[j]->y[omit+1], - (cdsA->vlen - omit) * sizeof(double)); - memmove(&cdsA->cds[j]->z[omit], &cdsA->cds[j]->z[omit+1], - (cdsA->vlen - omit) * sizeof(double)); - } - } -} - - -void -CdsArrayCopy(CdsArray *cdsA1, const CdsArray *cdsA2) +CdsArrayCopy(CdsArray *cdsA1, const CdsArray *cdsA2) // DLT FIX MAT COORDS { int i; @@ -121,42 +75,13 @@ CdsCopyAll(cdsA1->avecds, cdsA2->avecds); - AlgorithmCopy(cdsA1->algo, cdsA2->algo); - StatisticsCopy(cdsA1->stats, cdsA2->stats); - memcpy(cdsA1->var, cdsA2->var, cdsA2->vlen * sizeof(double)); memcpy(cdsA1->w, cdsA2->w, cdsA2->vlen * sizeof(double)); } void -AlgorithmCopy(Algorithm *algo1, const Algorithm *algo2) -{ - memcpy(algo1, algo2, sizeof(Algorithm)); - algo1->argv = NULL; /* DLT debug -- these should be copied */ - algo1->infiles = NULL; /* DLT debug -- these should be copied */ - if (algo2->selection != NULL) - { - algo1->selection = (char *) malloc((strlen(algo2->selection) + 1) * sizeof(char)); - strcpy(algo1->selection, algo2->selection); - } - if (algo2->atomslxn != NULL) - { - algo1->atomslxn = (char *) malloc((strlen(algo2->atomslxn) + 1) * sizeof(char)); - strcpy(algo1->atomslxn, algo2->atomslxn); - } -} - - -void -StatisticsCopy(Statistics *stats1, const Statistics *stats2) -{ - memcpy(stats1, stats2, sizeof(Statistics)); -} - - -void -CdsCopyAll(Cds *cds1, const Cds *cds2) +CdsCopyAll(Cds *cds1, const Cds *cds2) // DLT FIX MAT COORDS { int i; @@ -174,6 +99,8 @@ memcpy(cds1->z, cds2->z, cds2->vlen * sizeof(double)); memcpy(cds1->o, cds2->o, cds2->vlen * sizeof(double)); memcpy(cds1->b, cds2->b, cds2->vlen * sizeof(double)); + memcpy(cds1->nu, cds2->nu, cds2->vlen * sizeof(int)); + memcpy(cds1->mu, cds2->mu, cds2->vlen * sizeof(int)); memcpy(cds1->residual_x, cds2->residual_x, cds2->vlen * sizeof(double)); memcpy(cds1->residual_y, cds2->residual_y, cds2->vlen * sizeof(double)); memcpy(cds1->residual_z, cds2->residual_z, cds2->vlen * sizeof(double)); @@ -220,38 +147,60 @@ memcpy(cds1->occupancy, cds2->occupancy, cds2->vlen * sizeof(double)); memcpy(cds1->tempFactor, cds2->tempFactor, cds2->vlen * sizeof(double)); + memcpy(cds1->nu, cds2->nu, cds2->vlen * sizeof(int)); + MatCpySym(cds1->matrix, (const double **) cds2->matrix, 3); memcpy(cds1->translation, cds2->translation, 3 * sizeof(double)); } void -CdsAdd(Cds *cds1, const Cds *cds2) +MatMultCdsMultMatDiag(Cds *outcds, const double **matK, const Cds *cds) { - int i; + int i, j; + const int vlen = cds->vlen; + const double *x = (const double *) cds->x, + *y = (const double *) cds->y, + *z = (const double *) cds->z; + double tmpx, tmpy, tmpz; + double matKij; - cds1->vlen = cds2->vlen; - for (i = 0; i < cds2->vlen; ++i) + for (i = 0; i < vlen; ++i) { - cds1->x[i] += cds2->x[i]; - cds1->y[i] += cds2->y[i]; - cds1->z[i] += cds2->z[i]; - } + tmpx = tmpy = tmpz = 0.0; + for (j = 0; j < vlen; ++j) + { + matKij = matK[i][j]; + tmpx += matKij * x[j]; + tmpy += matKij * y[j]; + tmpz += matKij * z[j]; + } - cds1->RMSD_from_mean += cds2->RMSD_from_mean; - cds1->wRMSD_from_mean += cds2->wRMSD_from_mean; + outcds->x[i] = tmpx; + outcds->y[i] = tmpy; + outcds->z[i] = tmpz; + } } void -RotMatAddIp(double **mat1, const double **mat2) +MatDiagMultCdsMultMatDiag(Cds *outcds, const double *wtK, const Cds *cds) { - int i, j; + int i; + double wtKi; + const double *x = (const double *) cds->x, + *y = (const double *) cds->y, + *z = (const double *) cds->z; - for (i = 0; i < 3; ++i) - for (j = 0; j < 3; ++j) - mat1[i][j] += mat2[i][j]; + for (i = 0; i < cds->vlen; ++i) + { + wtKi = wtK[i]; + + outcds->x[i] = wtKi * x[i]; + outcds->y[i] = wtKi * y[i]; + outcds->z[i] = wtKi * z[i]; + } } @@ -293,8 +242,8 @@ for (i = 0; i < cds->vlen; ++i) { strcpy(pdbcds->record[i], "ATOM"); - pdbcds->serial[i] = i+1; - pdbcds->Hnum[i] = ' '; + pdbcds->serial[i] = i+1; + pdbcds->Hnum[i] = ' '; if (strncmp(cds->resName[i], "ADE", 3) == 0 || strncmp(cds->resName[i], "CYT", 3) == 0 || @@ -312,9 +261,13 @@ strncmp(cds->resName[i], " DI", 3) == 0 || /* remediated PDB residue names */ strncmp(cds->resName[i], " DU", 3) == 0 || /* remediated PDB residue names */ strncmp(cds->resName[i], " U", 3) == 0) + { strcpy(pdbcds->name[i], "P "); + } else + { strcpy(pdbcds->name[i], "CA "); + } pdbcds->altLoc[i] = ' '; strncpy(pdbcds->resName[i], cds->resName[i], 3); @@ -345,32 +298,8 @@ if (pdbA->cds[i]->vlen != vlen) { fprintf(stderr, - "\n WARNING20: PDB coordinates %d and %d are of unequal length [%d vs %d]. \n\n", - 0, i, vlen, pdbA->cds[i]->vlen); - //PrintTheseusTag(); - //exit(EXIT_FAILURE); - } - } - - return(vlen); -} - - -int -NMRCheckCdsArray(CdsArray *pdbA) -{ - int i; - int vlen = pdbA->cds[0]->vlen; - - for(i = 1; i < pdbA->cnum; ++i) - { - if (pdbA->cds[i]->vlen != vlen) - { - fprintf(stderr, - "\n WARNING20: PDB coordinates %d and %d are of unequal length [%d vs %d]. \n\n", + "\n WARNING20: PDB coordinates %d and %d are unequal length [%d vs %d]. \n\n", 0, i, vlen, pdbA->cds[i]->vlen); - //PrintTheseusTag(); - //exit(EXIT_FAILURE); } } @@ -395,16 +324,6 @@ for (i = 0; i < pdbcds->vlen; ++i) { -/* MatPrint(pdbcds->matrix, 3); */ -/* fprintf(stderr, */ -/* " translation = %8.3f %8.3f %8.3f \n", */ -/* pdbcds->translation[0], */ -/* pdbcds->translation[1], */ -/* pdbcds->translation[2]); */ -/* fprintf(stderr, */ -/* " before: x = %8.3f y = %8.3f z = %8.3f \n", */ -/* pdbcds->x[i], pdbcds->y[i], pdbcds->z[i]); */ - xt = x[i] - transx; yt = y[i] - transy; zt = z[i] - transz; @@ -412,27 +331,33 @@ x[i] = (xt * rmat00) + (yt * rmat10) + (zt * rmat20); y[i] = (xt * rmat01) + (yt * rmat11) + (zt * rmat21); z[i] = (xt * rmat02) + (yt * rmat12) + (zt * rmat22); + } -/* fprintf(stderr, */ -/* " after rotation: x = %8.3f y = %8.3f z = %8.3f \n", */ -/* pdbcds->x[i], pdbcds->y[i], pdbcds->z[i]); */ -/* exit(0); */ + if (algo->scale > 0) + { + double scale = pdbcds->scale; + for (i = 0; i < pdbcds->vlen; ++i) + { + x[i] *= scale; + y[i] *= scale; + z[i] *= scale; + } } } void -RotateCdsOp(const Cds *cds1, const double **rmat, Cds *cds2) +RotateCdsOp(double **c2, const double **c1, const double **rmat, const int vlen) { int i; double xt, yt, zt; - double *x1 = cds1->x, *y1 = cds1->y, *z1 = cds1->z, - *x2 = cds2->x, *y2 = cds2->y, *z2 = cds2->z; + const double *x1 = c1[0], *y1 = c1[1], *z1 = c1[2]; + double *x2 = c2[0], *y2 = c2[1], *z2 = c2[2]; const double rmat00 = rmat[0][0], rmat01 = rmat[0][1], rmat02 = rmat[0][2], rmat10 = rmat[1][0], rmat11 = rmat[1][1], rmat12 = rmat[1][2], rmat20 = rmat[2][0], rmat21 = rmat[2][1], rmat22 = rmat[2][2]; - for (i = 0; i < cds1->vlen; ++i) + for (i = 0; i < vlen; ++i) { xt = x1[i]; yt = y1[i]; @@ -445,6 +370,9 @@ } +/* This is equivalent to XR, where X = (k x 3) and R = (3 x 3), + where the convention is (row x col). + Recall that x[i] = X[i][0], y[i] = X[i][1], z[i] = X[i][2] */ void RotateCdsIp(Cds *cds, const double **rmat) { @@ -468,15 +396,29 @@ } +/* This is equivalent to XR, where X = (k x 3) and R = (3 x 3), + where the convention is (row x col). + Recall that x[i] = X[i][0], y[i] = X[i][1], z[i] = X[i][2] */ void -RotateCdsArrayIp(CdsArray *cdsA, const double **rmat) +RotateCdsIp2(double **c1, const int vlen, const double **rmat) { int i; + double xt, yt, zt; + double *x = c1[0], *y = c1[1], *z = c1[2]; + const double rmat00 = rmat[0][0], rmat01 = rmat[0][1], rmat02 = rmat[0][2], + rmat10 = rmat[1][0], rmat11 = rmat[1][1], rmat12 = rmat[1][2], + rmat20 = rmat[2][0], rmat21 = rmat[2][1], rmat22 = rmat[2][2]; - for (i = 0; i < cdsA->cnum; ++i) - RotateCdsIp(cdsA->cds[i], rmat); + for (i = 0; i < vlen; ++i) + { + xt = x[i]; + yt = y[i]; + zt = z[i]; - RotateCdsIp(cdsA->avecds, rmat); + x[i] = (xt * rmat00) + (yt * rmat10) + (zt * rmat20); + y[i] = (xt * rmat01) + (yt * rmat11) + (zt * rmat21); + z[i] = (xt * rmat02) + (yt * rmat12) + (zt * rmat22); + } } @@ -522,79 +464,6 @@ } -double -NormalizeWeightsOcc(double *w, double *o, int vlen) -{ - int i; - double weightsum; - double normalize; - - weightsum = 0.0; - for (i = 0; i < vlen; ++i) - weightsum += o[i] * w[i]; - - normalize = vlen / weightsum; - - for (i = 0; i < vlen; ++i) - w[i] *= normalize; - - return(normalize); -} - - -double -NormalizeWeights(double *w, int vlen) -{ - int i; - double weightsum; - double normalize; - -/* normalize by trace of weight matrix */ - weightsum = 0.0; - for (i = 0; i < vlen; ++i) - weightsum += w[i]; - - /* printf("\nweightsum: %f ", weightsum); */ - - normalize = vlen / weightsum; -/* printf("normalize: %f", normalize); */ - for (i = 0; i < vlen; ++i) - w[i] *= normalize; - - return(normalize); - -/* normalize by determinant of weight matrix */ -/* weightsum = 1.0; */ -/* for (i = 0; i < cds->vlen; ++i) */ -/* weightsum *= w[i]; */ -/* */ -/* normalize = pow(weightsum, -1.0 / cds->vlen); */ -/* */ -/* for (i = 0; i < cds->vlen; ++i) */ -/* w[i] *= normalize; */ - -/* normalize by trace of covariance matrix */ -/* weightsum = 0.0; */ -/* for (i = 0; i < cds->vlen; ++i) */ -/* weightsum += vars[i]; */ -/* */ -/* normalize = cds->vlen / weightsum; */ -/* */ -/* for (i = 0; i < cds->vlen; ++i) */ -/* w[i] *= normalize; */ - -/* normalize by determinant of covariance matrix */ -/* weightsum = 1.0; */ -/* for (i = 0; i < cds->vlen; ++i) */ -/* weightsum *= vars[i]; */ -/* */ -/* normalize = pow(weightsum, 1.0 / cds->vlen); */ -/* */ -/* for (i = 0; i < cds->vlen; ++i) */ -/* w[i] *= normalize; */ -} - - void CenMass(Cds *cds) { @@ -620,18 +489,40 @@ void -CenMassWtOp(Cds *cds, const CdsArray *weights) +CenMass2(const double **cds, const int vlen, double *center) +{ + int i; + double tempx, tempy, tempz; + const double *x = cds[0], + *y = cds[1], + *z = cds[2]; + + tempx = tempy = tempz = 0.0; + for (i = 0; i < vlen; ++i) + { + tempx += x[i]; + tempy += y[i]; + tempz += z[i]; + } + + center[0] = tempx / vlen; + center[1] = tempy / vlen; + center[2] = tempz / vlen; +} + + +void +CenMassWt2(const double **cds, const double *wts, const int vlen, double *center) { int i; double tempx, tempy, tempz; - const double *wts = (const double *) weights->w; double wti, wtsum; - const double *x = (const double *) cds->x, - *y = (const double *) cds->y, - *z = (const double *) cds->z; + const double *x = cds[0], + *y = cds[1], + *z = cds[2]; tempx = tempy = tempz = wtsum = 0.0; - for (i = 0; i < cds->vlen; ++i) + for (i = 0; i < vlen; ++i) { wti = wts[i]; wtsum += wti; @@ -640,12 +531,43 @@ tempz += (wti * z[i]); } - cds->center[0] = tempx / wtsum; - cds->center[1] = tempy / wtsum; - cds->center[2] = tempz / wtsum; + center[0] = tempx / wtsum; + center[1] = tempy / wtsum; + center[2] = tempz / wtsum; +} -/* printf("\n********** %f %f %f", cds->center[0], cds->center[1], cds->center[2]); */ -/* fflush(NULL); */ + +void +CenMassWtNu2(const double **cds, const double **ave, const int *nu, const double *wts, const int vlen, const double **rmat, double *center) +{ + int i, nui; + double tempx, tempy, tempz; + double wti, wtsum; + const double *x = cds[0], + *y = cds[1], + *z = cds[2]; + const double *ax = ave[0], + *ay = ave[1], + *az = ave[2]; + const double rmat00 = rmat[0][0], rmat01 = rmat[0][1], rmat02 = rmat[0][2], + rmat10 = rmat[1][0], rmat11 = rmat[1][1], rmat12 = rmat[1][2], + rmat20 = rmat[2][0], rmat21 = rmat[2][1], rmat22 = rmat[2][2]; + + tempx = tempy = tempz = wtsum = 0.0; + for (i = 0; i < vlen; ++i) + { + wti = wts[i]; + nui = nu[i]; + wtsum += wti * nui; + // DLT OP this is inefficient as hell + tempx += wti * (nui*x[i] + (1-nui) * ((ax[i]*rmat00) + (ay[i]*rmat01) + (az[i]*rmat02))); + tempy += wti * (nui*y[i] + (1-nui) * ((ax[i]*rmat10) + (ay[i]*rmat11) + (az[i]*rmat12))); + tempz += wti * (nui*z[i] + (1-nui) * ((ax[i]*rmat20) + (ay[i]*rmat21) + (az[i]*rmat22))); + } + + center[0] = tempx / wtsum; + center[1] = tempy / wtsum; + center[2] = tempz / wtsum; } @@ -676,7 +598,6 @@ cds->center[2] = tempz / wtsum; /* printf("wtsum = %8.3f\n", wtsum); */ -/* printf("wtocc = %8.3f\n", wtocc); */ /* fflush(NULL); */ /* printf("\nDT: % 8.3f % 8.3f % 8.3f\n", */ @@ -701,7 +622,6 @@ /* weights->CovMat[i][j] = internmat[i][j]; */ /* */ /* CovInvWeightLAPACK((CdsArray *) weights); */ -/* NormalizeCovMat(weights->WtMat, cds->vlen); */ wtsum = 0.0; for (i = 0; i < cds->vlen; ++i) @@ -726,6 +646,36 @@ } +/* calculate inv covariance matrix weighted cds + \Sigma^-1 * \CdsMat */ +void +CalcCovCds(Cds *cds, const double **wtmat) +{ + int i, k; + double *covx = cds->covx, + *covy = cds->covy, + *covz = cds->covz; + const double *x = (const double *) cds->x, + *y = (const double *) cds->y, + *z = (const double *) cds->z; + double wtmatik; + + for (i = 0; i < cds->vlen; ++i) + { + covx[i] = covy[i] = covz[i] = 0.0; + + for (k = 0; k < cds->vlen; ++k) + { + wtmatik = wtmat[i][k]; + + covx[i] += (wtmatik * x[k]); + covy[i] += (wtmatik * y[k]); + covz[i] += (wtmatik * z[k]); + } + } +} + + void CenMassCov(Cds *cds, const double **wtmat) { @@ -734,17 +684,18 @@ double *covx = cds->covx, *covy = cds->covy, *covz = cds->covz; + double vlen = cds->vlen; double wtsum; CalcCovCds(cds, wtmat); wtsum = 0.0; - for (i = 0; i < cds->vlen; ++i) - for (j = 0; j < cds->vlen; ++j) + for (i = 0; i < vlen; ++i) + for (j = 0; j < vlen; ++j) wtsum += wtmat[i][j]; tempx = tempy = tempz = 0.0; - for (i = 0; i < cds->vlen; ++i) + for (i = 0; i < vlen; ++i) { tempx += covx[i]; tempy += covy[i]; @@ -757,246 +708,126 @@ } -/*void*/ -/*CenMassCovOcc(Cds *cds, const double **wtmat)*/ -/*{*/ -/* double tempx, tempy, tempz;*/ -/* int i, j;*/ -/* const double *occ = (const double *) cds->o;*/ -/* double *covx = cds->covx,*/ -/* *covy = cds->covy,*/ -/* *covz = cds->covz;*/ -/* double wtsum;*/ -/**/ -/* wtsum = 0.0;*/ -/* for (i = 0; i < cds->vlen; ++i)*/ -/* for (j = 0; j < cds->vlen; ++j)*/ -/* wtsum += occ[i] * occ[j] * wtmat[i][j];*/ -/**/ -/* tempx = tempy = tempz = 0.0;*/ -/* for (i = 0; i < cds->vlen; ++i)*/ -/* {*/ -/* tempx += occ[i] * covx[i];*/ -/* tempy += occ[i] * covy[i];*/ -/* tempz += occ[i] * covz[i];*/ -/* }*/ -/**/ -/* cds->center[0] = tempx / wtsum;*/ -/* cds->center[1] = tempy / wtsum;*/ -/* cds->center[2] = tempz / wtsum;*/ -/*}*/ -/**/ -/**/ -/*void*/ -/*CenMassWtOpOcc(Cds *cds, const CdsArray *weights)*/ -/*{*/ -/* int i;*/ -/* double tempx, tempy, tempz;*/ -/* const double *wts = (const double *) weights->w;*/ -/* const double *occ = (const double *) cds->o;*/ -/* double wti, wtsum;*/ -/* const double *x = (const double *) cds->x,*/ -/* *y = (const double *) cds->y,*/ -/* *z = (const double *) cds->z;*/ -/**/ -/* tempx = tempy = tempz = wtsum = 0.0;*/ -/* for (i = 0; i < cds->vlen; ++i)*/ -/* {*/ -/* wti = wts[i] * occ[i];*/ -/* wtsum += wti;*/ -/* tempx += (wti * x[i]);*/ -/* tempy += (wti * y[i]);*/ -/* tempz += (wti * z[i]);*/ -/* }*/ -/**/ -/* cds->center[0] = tempx / wtsum;*/ -/* cds->center[1] = tempy / wtsum;*/ -/* cds->center[2] = tempz / wtsum;*/ -/*}*/ -/**/ -/**/ -/*void*/ -/*CenMassOcc(Cds *cds)*/ -/*{*/ -/* int i;*/ -/* double tempx, tempy, tempz, occi, occsum;*/ -/* const double *occ = (const double *) cds->o;*/ -/* const double *x = (const double *) cds->x,*/ -/* *y = (const double *) cds->y,*/ -/* *z = (const double *) cds->z;*/ -/**/ -/* tempx = tempy = tempz = occsum = 0.0;*/ -/* for (i = 0; i < cds->vlen; ++i)*/ -/* {*/ -/* occi = occ[i];*/ -/* occsum += occi;*/ -/* tempx += (occi * x[i]);*/ -/* tempy += (occi * y[i]);*/ -/* tempz += (occi * z[i]);*/ -/* }*/ -/**/ -/* cds->center[0] = tempx / occsum;*/ -/* cds->center[1] = tempy / occsum;*/ -/* cds->center[2] = tempz / occsum;*/ -/*}*/ +/* calculate inv covariance matrix weighted cds + \Sigma^-1 * \CdsMat */ +static void +CalcCovCds2(double **cc, const double **c, const double **wtmat, const int vlen) +{ + int i, k; + double *covx = cc[0], + *covy = cc[1], + *covz = cc[2]; + const double *x = c[0], + *y = c[1], + *z = c[2]; + double wtmatik; + //double xk, yk, zk; + double covxi, covyi, covzi; + + for (i = 0; i < vlen; ++i) + { + covxi = covyi = covzi = 0.0; + for (k = 0; k < vlen; ++k) + { + wtmatik = wtmat[i][k]; + + covxi += wtmatik * x[k]; + covyi += wtmatik * y[k]; + covzi += wtmatik * z[k]; + } + + covx[i] = covxi; + covy[i] = covyi; + covz[i] = covzi; + } +} void -CenMassOccVec(Cds *cds, double *cenmass) +CenMassCov2(const double **c, double **cc, const double **wtmat, const int vlen, double *center) { - int i; - double tempx, tempy, tempz, occi, occsum; - const double *occ = (const double *) cds->o; - const double *x = (const double *) cds->x, - *y = (const double *) cds->y, - *z = (const double *) cds->z; + double tempx, tempy, tempz; + int i, j; + double *covx = cc[0], + *covy = cc[1], + *covz = cc[2]; + double wtsum; - tempx = tempy = tempz = occsum = 0.0; - for (i = 0; i < cds->vlen; ++i) + CalcCovCds2(cc, c, wtmat, vlen); + + wtsum = 0.0; + for (i = 0; i < vlen; ++i) + for (j = 0; j < vlen; ++j) + wtsum += wtmat[i][j]; + + //printf("wtsum: %f\n", wtsum); + + tempx = tempy = tempz = 0.0; + for (i = 0; i < vlen; ++i) { - occi = occ[i]; - occsum += occi; - tempx += (occi * x[i]); - tempy += (occi * y[i]); - tempz += (occi * z[i]); + tempx += covx[i]; + tempy += covy[i]; + tempz += covz[i]; } - cenmass[0] = tempx / occsum; - cenmass[1] = tempy / occsum; - cenmass[2] = tempz / occsum; + center[0] = tempx / wtsum; + center[1] = tempy / wtsum; + center[2] = tempz / wtsum; } void -CenMassWtIpOcc(Cds *cds, const double *wts) +CenMassNuVec(const double **c, const int *nu, double *cenmass, const int vlen) { int i; - double tempx, tempy, tempz; - const double *occ = (const double *) cds->o; - double wti, wtsum; - const double *x = (const double *) cds->x, - *y = (const double *) cds->y, - *z = (const double *) cds->z; + double tempx, tempy, tempz, nui, nusum; + const double *x = (const double *) c[0], + *y = (const double *) c[1], + *z = (const double *) c[2]; - tempx = tempy = tempz = wtsum = 0.0; - for (i = 0; i < cds->vlen; ++i) + tempx = tempy = tempz = nusum = 0.0; + for (i = 0; i < vlen; ++i) { - wti = wts[i] * occ[i]; - wtsum += wti; - tempx += (wti * x[i]); - tempy += (wti * y[i]); - tempz += (wti * z[i]); + nui = nu[i]; + nusum += nui; + tempx += (nui * x[i]); + tempy += (nui * y[i]); + tempz += (nui * z[i]); } - cds->center[0] = tempx / wtsum; - cds->center[1] = tempy / wtsum; - cds->center[2] = tempz / wtsum; -// printf("wtsum = %8.3f\n", wtsum); -// fflush(); - -/* printf("\n% 8.3f % 8.3f % 8.3f", */ -/* cds->center[0], cds->center[1], cds->center[2]); */ -/* fflush(NULL); */ + cenmass[0] = tempx / nusum; + cenmass[1] = tempy / nusum; + cenmass[2] = tempz / nusum; } - - -/* cdsi->x[j] = avex[j]*rmat00 + avey[j]*rmat01 + avez[j]*rmat02; */ -/* cdsi->y[j] = avex[j]*rmat10 + avey[j]*rmat11 + avez[j]*rmat12; */ -/* cdsi->z[j] = avex[j]*rmat20 + avey[j]*rmat21 + avez[j]*rmat22; */ - void -CenMassWtIpEM(Cds *cds, const Cds *avecds, const double *wts) +CenMassWtIpNu(Cds *cds, const double *wts) { int i; double tempx, tempy, tempz; - const double *occ = (const double *) cds->o; double wti, wtsum; const double *x = (const double *) cds->x, *y = (const double *) cds->y, *z = (const double *) cds->z; - double rmat00, rmat01, rmat02, - rmat10, rmat11, rmat12, - rmat20, rmat21, rmat22; - double **rmat = cds->matrix; - double *avex = avecds->x, - *avey = avecds->y, - *avez = avecds->z; - - rmat00 = rmat[0][0], rmat01 = rmat[0][1], rmat02 = rmat[0][2], - rmat10 = rmat[1][0], rmat11 = rmat[1][1], rmat12 = rmat[1][2], - rmat20 = rmat[2][0], rmat21 = rmat[2][1], rmat22 = rmat[2][2]; + const int *nu = (const int *) cds->nu; tempx = tempy = tempz = wtsum = 0.0; for (i = 0; i < cds->vlen; ++i) { - wti = wts[i]; -// printf("wt[%3d] = %8.3f\n", i, wti); - - if (occ[i] == 1.0) - { - //wtsum += 1.0; - wtsum += wti; - tempx += (wti * x[i]); - tempy += (wti * y[i]); - tempz += (wti * z[i]); - } - else if (occ[i] == 0.0) - { - tempx += (wti * (avex[i]*rmat00 + avey[i]*rmat01 + avez[i]*rmat02)); - tempy += (wti * (avex[i]*rmat10 + avey[i]*rmat11 + avez[i]*rmat12)); - tempz += (wti * (avex[i]*rmat20 + avey[i]*rmat21 + avez[i]*rmat22)); - -/* tempx += (wti * (avex[i]*rmat00 + avey[i]*rmat10 + avez[i]*rmat20)); */ -/* tempy += (wti * (avex[i]*rmat01 + avey[i]*rmat11 + avez[i]*rmat21)); */ -/* tempz += (wti * (avex[i]*rmat02 + avey[i]*rmat12 + avez[i]*rmat22)); */ - } + wti = wts[i] * nu[i]; + wtsum += wti; + tempx += (wti * x[i]); + tempy += (wti * y[i]); + tempz += (wti * z[i]); } cds->center[0] = tempx / wtsum; cds->center[1] = tempy / wtsum; cds->center[2] = tempz / wtsum; - -/* printf("wtsum = %8.3f\n", wtsum); */ -/* fflush(NULL); */ - -/* printf("\nEM: % 8.3f % 8.3f % 8.3f", */ -/* cds->center[0], cds->center[1], cds->center[2]); */ -/* fflush(NULL); */ } -/*void*/ -/*CenMassCovOpOcc(Cds *cds, const CdsArray *weights)*/ -/*{*/ -/* double tempx, tempy, tempz;*/ -/* int i, j;*/ -/* const double *occ = (const double *) cds->o;*/ -/* double *covx = cds->covx,*/ -/* *covy = cds->covy,*/ -/* *covz = cds->covz;*/ -/* double wtsum;*/ -/**/ -/* wtsum = 0.0;*/ -/* for (i = 0; i < cds->vlen; ++i)*/ -/* for (j = 0; j < cds->vlen; ++j)*/ -/* wtsum += occ[i] * occ[j] * weights->WtMat[i][j];*/ -/**/ -/* tempx = tempy = tempz = 0.0;*/ -/* for (i = 0; i < cds->vlen; ++i)*/ -/* {*/ -/* tempx += occ[i] * covx[i];*/ -/* tempy += occ[i] * covy[i];*/ -/* tempz += occ[i] * covz[i];*/ -/* }*/ -/**/ -/* cds->center[0] = tempx / wtsum;*/ -/* cds->center[1] = tempy / wtsum;*/ -/* cds->center[2] = tempz / wtsum;*/ -/*}*/ - - void ApplyCenter(Cds *cds, const double cenx, const double ceny, const double cenz) { @@ -1069,15 +900,34 @@ void -TransCdsIp(Cds *cds, const double *trans) +TranslateCdsOp2(double **cds2, const double **cds1, const int vlen, const double *center) { int i; - double *x = cds->x, *y = cds->y, *z = cds->z; + const double *x1 = cds1[0], *y1 = cds1[1], *z1 = cds1[2]; + double *x2 = cds2[0], *y2 = cds2[1], *z2 = cds2[2]; + const double cenx = center[0], + ceny = center[1], + cenz = center[2]; + + for (i = 0; i < vlen; ++i) + { + x2[i] = x1[i] - cenx; + y2[i] = y1[i] - ceny; + z2[i] = z1[i] - cenz; + } +} + + +void +TransCdsIp(double **c, const double *trans, const int vlen) +{ + int i; + double *x = c[0], *y = c[1], *z = c[2]; const double transx = trans[0], transy = trans[1], transz = trans[2]; - for (i = 0; i < cds->vlen; ++i) + for (i = 0; i < vlen; ++i) { x[i] += transx; y[i] += transy; @@ -1087,15 +937,15 @@ void -NegTransCdsIp(Cds *cds, const double *trans) +NegTransCdsIp(double **c, const double *trans, const int vlen) { int i; - double *x = cds->x, *y = cds->y, *z = cds->z; + double *x = c[0], *y = c[1], *z = c[2]; const double transx = trans[0], transy = trans[1], transz = trans[2]; - for (i = 0; i < cds->vlen; ++i) + for (i = 0; i < vlen; ++i) { x[i] -= transx; y[i] -= transy; @@ -1112,8 +962,8 @@ int i, j; double *avex = cdsA->avecds->x, *avey = cdsA->avecds->y, *avez = cdsA->avecds->z; - const Cds **cds = (const Cds **) cdsA->cds; - Cds *cdsi; + const Cds **cds = (const Cds **) cdsA->cds; + Cds *cdsi = NULL; for (i = 0; i < cdsA->cnum; ++i) { @@ -1121,7 +971,7 @@ for (j = 0; j < cdsA->vlen; ++j) { - if (cdsi->o[j] == 0.0) + if (cdsi->nu[j] == 0) { cdsi->x[j] = avex[j]; cdsi->y[j] = avey[j]; @@ -1140,8 +990,8 @@ double *avex = scratchA->avecds->x, *avey = scratchA->avecds->y, *avez = scratchA->avecds->z; - const Cds **cds = (const Cds **) baseA->cds; - Cds *cdsi; + const Cds **cds = (const Cds **) baseA->cds; + Cds *cdsi = NULL; double rmat00, rmat01, rmat02, rmat10, rmat11, rmat12, @@ -1159,7 +1009,7 @@ for (j = 0; j < baseA->vlen; ++j) { - if (cdsi->o[j] == 0.0) + if (cdsi->nu[j] == 0) { cdsi->x[j] = avex[j]*rmat00 + avey[j]*rmat01 + avez[j]*rmat02; cdsi->y[j] = avex[j]*rmat10 + avey[j]*rmat11 + avez[j]*rmat12; @@ -1174,43 +1024,6 @@ } -void -AveCdsNovec(CdsArray *cdsA) -{ - int i, j; - double xtmp, ytmp, ztmp, /* otmp, */btmp; - double *avex = cdsA->avecds->x, - *avey = cdsA->avecds->y, - *avez = cdsA->avecds->z, - *aveo = cdsA->avecds->o, - *aveb = cdsA->avecds->b; - const int cnum = cdsA->cnum; - const Cds **cds = (const Cds **) cdsA->cds; - Cds *cdsj; - double invcnum = 1.0 / (double) cnum; - - for (i = 0; i < cdsA->vlen; ++i) - { - xtmp = ytmp = ztmp = /* otmp = */ btmp = 0.0; - for (j = 0; j < cnum; ++j) - { - cdsj = (Cds *) cds[j]; - xtmp += cdsj->x[i]; - ytmp += cdsj->y[i]; - ztmp += cdsj->z[i]; - /* otmp += cdsj->o[i]; */ - btmp += cdsj->b[i]; - } - - avex[i] = xtmp * invcnum; - avey[i] = ytmp * invcnum; - avez[i] = ztmp * invcnum; - aveo[i] = 1.0; - aveb[i] = btmp * invcnum; - } -} - - /* Calculate the ML estimate of a hierarchical mean, where the atoms are normally distributed with hyper-mean zero */ /* See also below, which is probably more valid (only the weighted mean has zero centroid) */ @@ -1221,12 +1034,10 @@ int i, j; double *avex = cdsA->avecds->x, *avey = cdsA->avecds->y, - *avez = cdsA->avecds->z, - *aveo = cdsA->avecds->o, - *aveb = cdsA->avecds->b; + *avez = cdsA->avecds->z; const int cnum = cdsA->cnum, vlen = cdsA->vlen; - const Cds **cds = (const Cds **) cdsA->cds; - Cds *cdsj; + const Cds **cds = (const Cds **) cdsA->cds; + Cds *cdsj = NULL; double invcnum, psi; @@ -1238,38 +1049,34 @@ memset(avex, 0, vlen * sizeof(double)); memset(avey, 0, vlen * sizeof(double)); memset(avez, 0, vlen * sizeof(double)); - memset(aveb, 0, vlen * sizeof(double)); for (j = 0; j < cnum; ++j) { cdsj = (Cds *) cds[j]; - for (i = 0; i < vlen; ++i) - { - avex[i] += cdsj->x[i]; - avey[i] += cdsj->y[i]; - avez[i] += cdsj->z[i]; - aveb[i] += cdsj->b[i]; - } + for (i = 0; i < vlen; ++i) + { + avex[i] += cdsj->x[i]; + avey[i] += cdsj->y[i]; + avez[i] += cdsj->z[i]; + } } for (i = 0; i < vlen; ++i) { invcnum = 1.0 / ((double) cnum + cdsA->var[i] / psi); //printf("\ninvcnum = %e %e", invcnum, 1.0/cnum); - - avex[i] *= invcnum; - avey[i] *= invcnum; - avez[i] *= invcnum; - aveo[i] = 1.0; - aveb[i] *= invcnum; + + avex[i] *= invcnum; + avey[i] *= invcnum; + avez[i] *= invcnum; } - + return(psi); } -/* Calculate the ML estimate of a hierarchical mean, where the variance-weighted atoms +/* Calculate the ML estimate of a hierarchical mean, where the variance-weighted atoms are normally distributed with hyper-mean zero */ /* 2009-06-11 */ double @@ -1278,12 +1085,10 @@ int i, j; double *avex = cdsA->avecds->x, *avey = cdsA->avecds->y, - *avez = cdsA->avecds->z, - *aveo = cdsA->avecds->o, - *aveb = cdsA->avecds->b; + *avez = cdsA->avecds->z; const int cnum = cdsA->cnum, vlen = cdsA->vlen; - const Cds **cds = (const Cds **) cdsA->cds; - Cds *cdsj; + const Cds **cds = (const Cds **) cdsA->cds; + Cds *cdsj = NULL; double invcnum, psi, norm; @@ -1291,8 +1096,8 @@ for (i = 0; i < vlen; ++i) { norm += 1.0 / cdsA->var[i]; - psi += (avex[i]*avex[i]/cdsA->var[i] + - avey[i]*avey[i]/cdsA->var[i] + + psi += (avex[i]*avex[i]/cdsA->var[i] + + avey[i]*avey[i]/cdsA->var[i] + avez[i]*avez[i]/cdsA->var[i]); } psi /= (3.0 * norm); @@ -1300,33 +1105,29 @@ memset(avex, 0, vlen * sizeof(double)); memset(avey, 0, vlen * sizeof(double)); memset(avez, 0, vlen * sizeof(double)); - memset(aveb, 0, vlen * sizeof(double)); for (j = 0; j < cnum; ++j) { cdsj = (Cds *) cds[j]; - for (i = 0; i < vlen; ++i) - { - avex[i] += cdsj->x[i]; - avey[i] += cdsj->y[i]; - avez[i] += cdsj->z[i]; - aveb[i] += cdsj->b[i]; - } + for (i = 0; i < vlen; ++i) + { + avex[i] += cdsj->x[i]; + avey[i] += cdsj->y[i]; + avez[i] += cdsj->z[i]; + } } for (i = 0; i < vlen; ++i) { invcnum = 1.0 / ((double) cnum + 1.0 / psi); //printf("\ninvcnum = %e %e", invcnum, 1.0/cnum); - - avex[i] *= invcnum; - avey[i] *= invcnum; - avez[i] *= invcnum; - aveo[i] = 1.0; - aveb[i] *= invcnum; + + avex[i] *= invcnum; + avey[i] *= invcnum; + avez[i] *= invcnum; } - + return(psi); } @@ -1337,124 +1138,106 @@ int i, j; double *avex = cdsA->avecds->x, *avey = cdsA->avecds->y, - *avez = cdsA->avecds->z, - *aveo = cdsA->avecds->o, - *aveb = cdsA->avecds->b; + *avez = cdsA->avecds->z; const int cnum = cdsA->cnum, vlen = cdsA->vlen; - const Cds **cds = (const Cds **) cdsA->cds; - Cds *cdsj; + const Cds **cds = (const Cds **) cdsA->cds; + Cds *cdsj = NULL; double invcnum = 1.0 / (double) cnum; memset(avex, 0, vlen * sizeof(double)); memset(avey, 0, vlen * sizeof(double)); memset(avez, 0, vlen * sizeof(double)); - memset(aveb, 0, vlen * sizeof(double)); for (j = 0; j < cnum; ++j) { cdsj = (Cds *) cds[j]; - for (i = 0; i < vlen; ++i) - { - avex[i] += cdsj->x[i]; - avey[i] += cdsj->y[i]; - avez[i] += cdsj->z[i]; - aveb[i] += cdsj->b[i]; - } + for (i = 0; i < vlen; ++i) + { + avex[i] += cdsj->x[i]; + avey[i] += cdsj->y[i]; + avez[i] += cdsj->z[i]; + } } for (i = 0; i < vlen; ++i) { - avex[i] *= invcnum; - avey[i] *= invcnum; - avez[i] *= invcnum; - aveo[i] = 1.0; - aveb[i] *= invcnum; + avex[i] *= invcnum; + avey[i] *= invcnum; + avez[i] *= invcnum; } } + void AveCdsTB(CdsArray *cdsA, int omit) { int i, j; double *avex = cdsA->avecds->x, *avey = cdsA->avecds->y, - *avez = cdsA->avecds->z, - *aveo = cdsA->avecds->o, - *aveb = cdsA->avecds->b; + *avez = cdsA->avecds->z; const int cnum = cdsA->cnum, vlen = cdsA->vlen; - const Cds **cds = (const Cds **) cdsA->cds; - Cds *cdsj; + const Cds **cds = (const Cds **) cdsA->cds; + Cds *cdsj = NULL; double invcnum = 1.0 / (double) (cnum-1); memset(avex, 0, vlen * sizeof(double)); memset(avey, 0, vlen * sizeof(double)); memset(avez, 0, vlen * sizeof(double)); - memset(aveb, 0, vlen * sizeof(double)); for (j = 0; j < cnum; ++j) { - + if (j == omit) continue; cdsj = (Cds *) cds[j]; - for (i = 0; i < vlen; ++i) - { - avex[i] += cdsj->x[i]; - avey[i] += cdsj->y[i]; - avez[i] += cdsj->z[i]; - aveb[i] += cdsj->b[i]; - } + for (i = 0; i < vlen; ++i) + { + avex[i] += cdsj->x[i]; + avey[i] += cdsj->y[i]; + avez[i] += cdsj->z[i]; + } } for (i = 0; i < vlen; ++i) { - avex[i] *= invcnum; - avey[i] *= invcnum; - avez[i] *= invcnum; - aveo[i] = 1.0; - aveb[i] *= invcnum; + avex[i] *= invcnum; + avey[i] *= invcnum; + avez[i] *= invcnum; } } - static void *AveCdsPth(void *avedata_ptr) { AveData *avedata = (AveData *) avedata_ptr; int i, j; - double xtmp, ytmp, ztmp, /* otmp, */btmp; + double xtmp, ytmp, ztmp; double *avex = avedata->cdsA->avecds->x, *avey = avedata->cdsA->avecds->y, - *avez = avedata->cdsA->avecds->z, - *aveo = avedata->cdsA->avecds->o, - *aveb = avedata->cdsA->avecds->b; + *avez = avedata->cdsA->avecds->z; const int cnum = avedata->cnum; double invcnum = 1.0 / cnum; - const Cds **cds = (const Cds **) avedata->cdsA->cds; - Cds *cdsj; + const Cds **cds = (const Cds **) avedata->cdsA->cds; + Cds *cdsj = NULL; for (i = avedata->start; i < avedata->end; ++i) { - xtmp = ytmp = ztmp = /* otmp = */ btmp = 0.0; + xtmp = ytmp = ztmp = 0.0; for (j = 0; j < cnum; ++j) { cdsj = (Cds *) cds[j]; xtmp += cdsj->x[i]; ytmp += cdsj->y[i]; ztmp += cdsj->z[i]; - /* otmp += cdsj->o[i]; */ - btmp += cdsj->b[i]; } avex[i] = xtmp * invcnum; avey[i] = ytmp * invcnum; avez[i] = ztmp * invcnum; - aveo[i] = 1.0; - aveb[i] = btmp * invcnum; } pthread_exit((void *) 0); @@ -1470,8 +1253,8 @@ incr = vlen / thrdnum; - for (i = 0; i < thrdnum - 1; ++i) - { + for (i = 0; i < thrdnum - 1; ++i) + { avedata[i]->cdsA = cdsA; avedata[i]->start = i * incr; avedata[i]->end = i*incr + incr; @@ -1485,21 +1268,21 @@ printf("ERROR811: return code from pthread_create() %d is %d\n", i, rc); exit(EXIT_FAILURE); } - } + } + + avedata[thrdnum - 1]->cdsA = cdsA; + avedata[thrdnum - 1]->start = (thrdnum - 1) * incr; + avedata[thrdnum - 1]->end = vlen; + avedata[thrdnum - 1]->vlen = vlen; + avedata[thrdnum - 1]->cnum = cnum; + + rc = pthread_create(&callThd[thrdnum - 1], attr, AveCdsPth, (void *) avedata[thrdnum - 1]); - avedata[thrdnum - 1]->cdsA = cdsA; - avedata[thrdnum - 1]->start = (thrdnum - 1) * incr; - avedata[thrdnum - 1]->end = vlen; - avedata[thrdnum - 1]->vlen = vlen; - avedata[thrdnum - 1]->cnum = cnum; - - rc = pthread_create(&callThd[thrdnum - 1], attr, AveCdsPth, (void *) avedata[thrdnum - 1]); - - if (rc) - { - printf("ERROR811: return code from pthread_create() %d is %d\n", i, rc); - exit(EXIT_FAILURE); - } + if (rc) + { + printf("ERROR811: return code from pthread_create() %d is %d\n", i, rc); + exit(EXIT_FAILURE); + } for (i = 0; i < thrdnum; ++i) { @@ -1516,132 +1299,159 @@ } -/* void */ -/* AveCdsOcc(CdsArray *cdsA) */ -/* { */ -/* int i, j; */ -/* double xtmp, ytmp, ztmp, btmp, occ; */ -/* double *avex = cdsA->avecds->x, */ -/* *avey = cdsA->avecds->y, */ -/* *avez = cdsA->avecds->z, */ -/* *aveo = cdsA->avecds->o, */ -/* *aveb = cdsA->avecds->b; */ -/* const Cds **cds = (const Cds **) cdsA->cds; */ -/* Cds *cdsj; */ -/* const double cnum = cdsA->cnum; */ -/* double invdf, occsum; */ -/* */ -/* for (i = 0; i < cdsA->vlen; ++i) */ -/* { */ -/* xtmp = ytmp = ztmp = btmp = 0.0; */ -/* occsum = 0.0; */ -/* for (j = 0; j < cnum; ++j) */ -/* { */ -/* cdsj = (Cds *) cds[j]; */ -/* occ = cdsj->o[i]; */ -/* occsum += occ; */ -/* xtmp += occ * cdsj->x[i]; */ -/* ytmp += occ * cdsj->y[i]; */ -/* ztmp += occ * cdsj->z[i]; */ -/* btmp += occ * cdsj->b[i]; */ -/* } */ -/* */ -/* //invdf = 1.0 / cdsA->df[i]; */ -/* invdf = 1.0 / occsum; */ -/* */ -/* avex[i] = xtmp * invdf; */ -/* avey[i] = ytmp * invdf; */ -/* avez[i] = ztmp * invdf; */ -/* aveo[i] = 1.0; */ -/* aveb[i] = btmp * invdf; */ -/* } */ -/* } */ - - void -AveCdsOcc(CdsArray *cdsA) +AveCdsNu(CdsArray *cdsA) { int i, j; double *avex = cdsA->avecds->x, *avey = cdsA->avecds->y, - *avez = cdsA->avecds->z, - *aveo = cdsA->avecds->o, - *aveb = cdsA->avecds->b; + *avez = cdsA->avecds->z; const int cnum = cdsA->cnum, vlen = cdsA->vlen; - const Cds **cds = (const Cds **) cdsA->cds; - Cds *cdsj; - double occ, occsum, invocc; + const Cds **cds = (const Cds **) cdsA->cds; + Cds *cdsj = NULL; + double nu, nusum, invnu; memset(avex, 0, vlen * sizeof(double)); memset(avey, 0, vlen * sizeof(double)); memset(avez, 0, vlen * sizeof(double)); - memset(aveb, 0, vlen * sizeof(double)); for (i = 0; i < vlen; ++i) { - occsum = 0.0; + nusum = 0.0; for (j = 0; j < cnum; ++j) { cdsj = (Cds *) cds[j]; - occ = cdsj->o[i]; - occsum += occ; - avex[i] += occ * cdsj->x[i]; - avey[i] += occ * cdsj->y[i]; - avez[i] += occ * cdsj->z[i]; - aveb[i] += occ * cdsj->b[i]; + nu = cdsj->nu[i]; + nusum += nu; + avex[i] += nu * cdsj->x[i]; + avey[i] += nu * cdsj->y[i]; + avez[i] += nu * cdsj->z[i]; } - invocc = 1.0 / occsum; + invnu = 1.0 / nusum; - avex[i] *= invocc; - avey[i] *= invocc; - avez[i] *= invocc; - aveo[i] = 1.0; - aveb[i] *= invocc; + avex[i] *= invnu; + avey[i] *= invnu; + avez[i] *= invnu; } } +static void +CdsInnProd(Cds *cds) +{ + /* (i x k)(k x j) = (i x j) */ + /* (3 x N)(N x 3) = (3 x 3) */ + int k; + double **ip = NULL; + const double *x = (const double *) cds->x, + *y = (const double *) cds->y, + *z = (const double *) cds->z; + double xk, yk, zk; + + ip = cds->innerprod; + + memset(ip[0], 0, 9 * sizeof(double)); + + for (k = 0; k < cds->vlen; ++k) + { + xk = x[k]; + yk = y[k]; + zk = z[k]; + + ip[0][0] += (xk * xk); + ip[1][1] += (yk * yk); + ip[2][2] += (zk * zk); + ip[0][1] += (xk * yk); + ip[0][2] += (xk * zk); + ip[1][2] += (yk * zk); + } + + ip[1][0] = ip[0][1]; + ip[2][0] = ip[0][2]; + ip[2][1] = ip[1][2]; + + //printf("tr(X'X) = % e\n", ip[0][0] + ip[1][1] + ip[2][2]); + + /* Mat3Print(ip2); */ +} + + +static void +CdsInnProdWt(Cds *cds, const double *wts) +{ + /* (i x k)(k x j) = (i x j) */ + /* (3 x N)(N x 3) = (3 x 3) */ + int k; + double **ip = NULL; + const double *x = (const double *) cds->x, + *y = (const double *) cds->y, + *z = (const double *) cds->z; + double xk, yk, zk, wtsi; + + ip = cds->innerprod; + + memset(ip[0], 0, 9 * sizeof(double)); + + for (k = 0; k < cds->vlen; ++k) + { + wtsi = wts[k]; + + xk = x[k]; + yk = y[k]; + zk = z[k]; + + ip[0][0] += (xk * xk) * wtsi; + ip[1][1] += (yk * yk) * wtsi; + ip[2][2] += (zk * zk) * wtsi; + ip[0][1] += (xk * yk) * wtsi; + ip[0][2] += (xk * zk) * wtsi; + ip[1][2] += (yk * zk) * wtsi; + } + + ip[1][0] = ip[0][1]; + ip[2][0] = ip[0][2]; + ip[2][1] = ip[1][2]; + + //printf("tr(X'X) = % e\n", ip[0][0] + ip[1][1] + ip[2][2]); + + /* Mat3Print(ip2); */ +} + + void -CalcCdsPrincAxes(Cds *cds, double **rotmat) +CalcCdsPrincAxes(Cds *cds, double **r, double **u, double **vt, double *lambda, const double *wts) { - double *evals = (double *) malloc(3 * sizeof(double)); + int j; double det; - int i, j; - CdsInnerProd2(cds); - //eigensym((const double **) cds->innerprod2, evals, rotmat, 3); - //Mat3TransposeIp(rotmat); - //printf("\nCalcCdsPrincAxes B:"); - //Mat3Print(rotmat); - //CdsInnerProd2(cds); - jacobi3_cyc(cds->innerprod2, evals, rotmat, 1e-8); - //Mat3Print(rotmat); - //printf("\nCalcCdsPrincAxes A:"); - det = Mat3Det((const double **) rotmat); + if (algo->leastsquares) + CdsInnProd(cds); + else + CdsInnProdWt(cds, wts); + + CalcGSLSVD3(cds->innerprod, u, lambda, vt); + + det = Mat3Det((const double **) u); +// printf("\n * determinant of SVD UVt matrix = %f\n", det); if (det < 0) { - //printf("\nNEGATIVE DETERMINANT\n"); - for (i = 0; i < 3; ++i) - { - if (rotmat[i][i] < 0) - { - for (j = 0; j < 3; ++j) - rotmat[i][j] *= -1.0; - - break; - } - } +// printf("\nlambda: % f % f % f\n", lambda[0], lambda[1], lambda[2]); + printf("\nNEGATIVE DETERMINANT\n"); + lambda[2] = -lambda[2]; - //Mat3Print(rotmat); + for (j = 0; j < 3; ++j) + u[j][2] = -u[j][2]; } -/* Mat3Print(rotmat); */ -/* printf("\n evals %f %f %f det = %f %f", */ -/* evals[0], evals[1], evals[2], det, Mat3Det((const double **)rotmat)); */ + Mat3Cpy(r, (const double **) u); - free(evals); + if (VerifyRotMat(r, 1e-8) == 0) + { + printf("\nWARNING_772: BAD ROTATION MATRIX U\n\n"); + //exit(EXIT_FAILURE); + } } @@ -1650,7 +1460,7 @@ { int i, j; double xtmp, ytmp, ztmp, otmp, btmp; - const Cds **cds = (const Cds **) cdsA->cds; + const Cds **cds = (const Cds **) cdsA->cds; for (i = 0; i < cdsA->vlen; ++i) { @@ -1674,40 +1484,3 @@ } } } - - -/* multiplies first quaternion by second, puts result in second */ -/* when concatenating quats, you multiply the second rotation by the first, - in that noncommunative order */ -double -*ConcatQuatsIp(const double *quat1, double *quat2) -{ - double ww, xx, yy, zz, - wx, xw, yz, zy, - wy, xz, yw, zx, - wz, xy, yx, zw; - - ww = quat1[0] * quat2[0]; - xx = quat1[1] * quat2[1]; - yy = quat1[2] * quat2[2]; - zz = quat1[3] * quat2[3]; - wx = quat1[0] * quat2[1]; - xw = quat1[1] * quat2[0]; - yz = quat1[2] * quat2[3]; - zy = quat1[3] * quat2[2]; - wy = quat1[0] * quat2[2]; - xz = quat1[1] * quat2[3]; - yw = quat1[2] * quat2[0]; - zx = quat1[3] * quat2[1]; - wz = quat1[0] * quat2[3]; - xy = quat1[1] * quat2[2]; - yx = quat1[2] * quat2[1]; - zw = quat1[3] * quat2[0]; - - quat2[0] = ww - xx - yy - zz; - quat2[1] = wx + xw + yz - zy; - quat2[2] = wy - xz - yw - zx; - quat2[3] = wz + xy - yx + zw; - - return(quat2); -} diff -Nru theseus-2.0.6/pdbUtils.h theseus-3.0.0/pdbUtils.h --- theseus-2.0.6/pdbUtils.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/pdbUtils.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -37,28 +37,19 @@ CdsCopyAll(Cds *cds1, const Cds *cds2); void -CdsDelete(CdsArray *cdsA, int omit); - -void CdsArrayCopy(CdsArray *cdsA1, const CdsArray *cdsA2); void -AlgorithmCopy(Algorithm *algo1, const Algorithm *algo2); - -void -StatisticsCopy(Statistics *stats1, const Statistics *stats2); - -void CdsCopy(Cds *cds1, const Cds *cds2); void PDBCdsCopyAll(PDBCds *cds1, const PDBCds *cds2); void -CdsAdd(Cds *cds1, const Cds *cds2); +MatMultCdsMultMatDiag(Cds *outcds, const double **matK, const Cds *cds); void -RotMatAddIp(double **mat1, const double **mat2); +MatDiagMultCdsMultMatDiag(Cds *outcds, const double *wtK, const Cds *cds); void CopyCds2PDB(PDBCds *pdbcds, const Cds *cds); @@ -70,13 +61,16 @@ TransformPDBCdsIp(PDBCds *pdbcds); void -RotateCdsIp(Cds *cds, const double **U); +RotateCdsIp(Cds *cds, const double **rmat); + +void +RotateCdsIp2(double **c1, const int vlen, const double **rmat); void RotateCdsArrayIp(CdsArray *cdsA, const double **rmat); void -RotateCdsOp(const Cds *cds1, const double **U, Cds *cds2); +RotateCdsOp(double **c2, const double **c1, const double **rmat, const int vlen); void TransformCdsIp(Cds *cds); @@ -84,20 +78,17 @@ void ScaleCds(Cds *cds, const double scale); -double -NormalizeWeights(double *w, int vlen); - -double -NormalizeWeightsOcc(double *w, double *o, int vlen); +void +CenMass(Cds *cds); void -UnityWeightsCdsArray(CdsArray *cdsA); +CenMassWtOp(Cds *cds, const CdsArray *weights); void -CenMass(Cds *cds); +CenMass2(const double **cds, const int vlen, double *center); void -CenMassWtOp(Cds *cds, const CdsArray *weights); +CenMassWt2(const double **cds, const double *wts, const int vlen, double *center); void CenMassWtIp(Cds *cds, const double *weights); @@ -106,16 +97,28 @@ CenMassCovOp(Cds *cds, const CdsArray *weights); void +CalcCovCds(Cds *cds, const double **wtmat); + +void +CenMassCov(Cds *cds, const double **wtmat); + +void +CenMassCov2(const double **c, double **cc, const double **wtmat, const int vlen, double *center); + +void CenMassCov(Cds *cds, const double **wtmat); void CenMassWt(Cds *cds); void -CenMassOccVec(Cds *cds, double *cenmass); +CenMassNuVec(const double **c, const int *nu, double *cenmass, const int vlen); + +void +CenMassWtNu2(const double **cds, const double **ave, const int *nu, const double *wts, const int vlen, const double **rmat, double *center); void -CenMassWtIpOcc(Cds *cds, const double *wts); +CenMassWtIpNu(Cds *cds, const double *wts); void CenMassWtIpEM(Cds *cds, const Cds *avecds, const double *wts); @@ -133,10 +136,13 @@ ApplyCenterOp(Cds *cds1, const Cds *cds2); void -TransCdsIp(Cds *cds, const double *trans); +TranslateCdsOp2(double **cds2, const double **cds1, const int vlen, const double *center); void -NegTransCdsIp(Cds *cds, const double *trans); +TransCdsIp(double **c, const double *trans, const int vlen); + +void +NegTransCdsIp(double **c, const double *trans, const int vlen); void EM_MissingCds(CdsArray *cdsA); @@ -155,15 +161,12 @@ AveCdsTB(CdsArray *cdsA, int omit); void -AveCdsOcc(CdsArray *cdsA); +AveCdsNu(CdsArray *cdsA); void -CalcCdsPrincAxes(Cds *cds, double **rotmat); +CalcCdsPrincAxes(Cds *cds, double **r, double **u, double **vt, double *lambda, const double *wts); void SumCdsTB(CdsArray *cdsA, const int exclude); -double -*ConcatQuatsIp(const double *quat1, double *quat2); - #endif diff -Nru theseus-2.0.6/ProcGSLSVD.c theseus-3.0.0/ProcGSLSVD.c --- theseus-2.0.6/ProcGSLSVD.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/ProcGSLSVD.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -37,16 +37,14 @@ static double -CalcE0Cov(const Cds *cds1, const Cds *cds2, - const double *axesw); +CalcE0Cov(const Cds *cds1, const Cds *cds2); static void CalcR(const Cds *cds1, const Cds *cds2, double **Rmat, const double *weights); static void -CalcRCov(const Cds *cds1, const Cds *cds2, double **Rmat, - const double **WtMat, const double *axesw); +CalcRCov(const Cds *cds1, const Cds *cds2, double **Rmat, const double **WtMat); static int CalcGSLSVD(double **Rmat, double **Umat, double *sigma, double **VTmat); @@ -56,15 +54,15 @@ static double -CalcMahFrobInnProd(const Cds *cds, const double *weights, const double *axesw) -{ +CalcMahFrobInnProd(const Cds *cds, const double *weights) +{ int i; double sum; const double *x = (const double *) cds->x, *y = (const double *) cds->y, *z = (const double *) cds->z; double xi, yi, zi; - const double axeswx = axesw[0], axeswy = axesw[1], axeswz = axesw[2]; + sum = 0.0; i = cds->vlen; @@ -74,8 +72,7 @@ yi = *y++; zi = *z++; - sum += *weights++ * - (axeswx * (xi * xi) + axeswy * (yi * yi) + axeswz * (zi * zi)); + sum += *weights++ * ((xi * xi) + (yi * yi) + (zi * zi)); } return(sum); @@ -84,7 +81,7 @@ static double CalcInnProdNorm(const Cds *cds) -{ +{ int i; double sum; const double *x = (const double *) cds->x, @@ -108,8 +105,33 @@ static double -CalcMahFrobInnProdRot(const Cds *cds, const double **rmat, const double *weights, const double *axesw) -{ +CalcInnProdNorm2(const double **cds, const int len) +{ + int i; + double sum; + const double *x = (const double *) cds[0], + *y = (const double *) cds[1], + *z = (const double *) cds[2]; + double xi, yi, zi; + + sum = 0.0; + i = len; + while(i-- > 0) + { + xi = *x++; + yi = *y++; + zi = *z++; + + sum += xi * xi + yi * yi + zi * zi; + } + + return(sum); +} + + +static double +CalcMahFrobInnProdRot(const Cds *cds, const double **rmat, const double *weights) +{ int i; double sum; const double *x = (const double *) cds->x, @@ -119,7 +141,7 @@ const double rmat00 = rmat[0][0], rmat01 = rmat[0][1], rmat02 = rmat[0][2], rmat10 = rmat[1][0], rmat11 = rmat[1][1], rmat12 = rmat[1][2], rmat20 = rmat[2][0], rmat21 = rmat[2][1], rmat22 = rmat[2][2]; - const double axeswx = axesw[0], axeswy = axesw[1], axeswz = axesw[2]; + sum = 0.0; i = cds->vlen; @@ -133,8 +155,7 @@ yir = xi * rmat01 + yi * rmat11 + zi * rmat21; zir = xi * rmat02 + yi * rmat12 + zi * rmat22; - sum += *weights++ * - (axeswx * (xir * xir) + axeswy * (yir * yir) + axeswz * (zir * zir)); + sum += *weights++ * ((xir * xir) + (yir * yir) + (zir * zir)); } return(sum); @@ -142,9 +163,8 @@ static double -CalcE0Cov(const Cds *cds1, const Cds *cds2, - const double *axesw) -{ +CalcE0Cov(const Cds *cds1, const Cds *cds2) +{ int i; double sum; const double *x2 = (const double *) cds2->x, @@ -161,7 +181,7 @@ *cz1 = (const double *) cds1->covz; double x1i, y1i, z1i, x2i, y2i, z2i, cx1i, cy1i, cz1i, cx2i, cy2i, cz2i; - const double axeswx = axesw[0], axeswy = axesw[1], axeswz = axesw[2]; + sum = 0.0; i = cds1->vlen; @@ -181,9 +201,9 @@ cy2i = *cy2++; cz2i = *cz2++; - sum += (axeswx * (cx1i * x1i + cx2i * x2i) + - axeswy * (cy1i * y1i + cy2i * y2i) + - axeswz * (cz1i * z1i + cz2i * z2i)); + sum += (cx1i * x1i + cx2i * x2i) + + (cy1i * y1i + cy2i * y2i) + + (cz1i * z1i + cz2i * z2i); } return(sum); @@ -225,11 +245,11 @@ Rmat00 += x2i * x1i; Rmat01 += x2i * y1i; Rmat02 += x2i * z1i; - + Rmat10 += y2i * x1i; Rmat11 += y2i * y1i; Rmat12 += y2i * z1i; - + Rmat20 += z2i * x1i; Rmat21 += z2i * y1i; Rmat22 += z2i * z1i; @@ -262,9 +282,9 @@ Rmat10, Rmat11, Rmat12, Rmat20, Rmat21, Rmat22; - /* printf("\n axes weights: %f %f %f ", axeswx, axeswy, axeswz); */ - Rmat00 = Rmat01 = Rmat02 = Rmat10 = Rmat11 = Rmat12 = + Rmat00 = Rmat01 = Rmat02 = + Rmat10 = Rmat11 = Rmat12 = Rmat20 = Rmat21 = Rmat22 = 0.0; i = cds1->vlen; @@ -281,11 +301,11 @@ Rmat00 += x2i * x1i; Rmat01 += x2i * y1i; Rmat02 += x2i * z1i; - + Rmat10 += y2i * x1i; Rmat11 += y2i * y1i; Rmat12 += y2i * z1i; - + Rmat20 += z2i * x1i; Rmat21 += z2i * y1i; Rmat22 += z2i * z1i; @@ -304,8 +324,62 @@ static void -CalcRCov(const Cds *cds1, const Cds *cds2, double **Rmat, - const double **WtMat, const double *axesw) +CalcRvan2(const double **cds1, const double **cds2, const int len, double **Rmat) +{ + int i; + const double *x1 = cds1[0], + *y1 = cds1[1], + *z1 = cds1[2]; + const double *x2 = cds2[0], + *y2 = cds2[1], + *z2 = cds2[2]; + double x2i, y2i, z2i, x1i, y1i, z1i; + double Rmat00, Rmat01, Rmat02, + Rmat10, Rmat11, Rmat12, + Rmat20, Rmat21, Rmat22; + + Rmat00 = Rmat01 = Rmat02 = + Rmat10 = Rmat11 = Rmat12 = + Rmat20 = Rmat21 = Rmat22 = 0.0; + + i = len; + while(i-- > 0) + { + x1i = *x1++; + y1i = *y1++; + z1i = *z1++; + + x2i = *x2++; + y2i = *y2++; + z2i = *z2++; + + Rmat00 += x2i * x1i; + Rmat01 += x2i * y1i; + Rmat02 += x2i * z1i; + + Rmat10 += y2i * x1i; + Rmat11 += y2i * y1i; + Rmat12 += y2i * z1i; + + Rmat20 += z2i * x1i; + Rmat21 += z2i * y1i; + Rmat22 += z2i * z1i; + } + + Rmat[0][0] = Rmat00; + Rmat[0][1] = Rmat01; + Rmat[0][2] = Rmat02; + Rmat[1][0] = Rmat10; + Rmat[1][1] = Rmat11; + Rmat[1][2] = Rmat12; + Rmat[2][0] = Rmat20; + Rmat[2][1] = Rmat21; + Rmat[2][2] = Rmat22; +} + + +static void +CalcRCov(const Cds *cds1, const Cds *cds2, double **Rmat, const double **WtMat) { int i; const double *x2 = (const double *) cds2->covx, @@ -314,9 +388,6 @@ const double *x1 = (const double *) cds1->x, *y1 = (const double *) cds1->y, *z1 = (const double *) cds1->z; - const double axeswx = sqrt(axesw[0]), - axeswy = sqrt(axesw[1]), - axeswz = sqrt(axesw[2]); double x2i, y2i, z2i, x1i, y1i, z1i; double Rmat00, Rmat01, Rmat02, Rmat10, Rmat11, Rmat12, @@ -325,27 +396,25 @@ Rmat00 = Rmat01 = Rmat02 = Rmat10 = Rmat11 = Rmat12 = Rmat20 = Rmat21 = Rmat22 = 0.0; - /* printf("\n axes weights: %f %f %f ", axeswx, axeswy, axeswz); */ - i = cds1->vlen; while(i-- > 0) { - x2i = axeswx * *x2++; - y2i = axeswy * *y2++; - z2i = axeswz * *z2++; - - x1i = axeswx * *x1++; - y1i = axeswy * *y1++; - z1i = axeswz * *z1++; + x2i = *x2++; + y2i = *y2++; + z2i = *z2++; + + x1i = *x1++; + y1i = *y1++; + z1i = *z1++; Rmat00 += x2i * x1i; Rmat01 += x2i * y1i; Rmat02 += x2i * z1i; - + Rmat10 += y2i * x1i; Rmat11 += y2i * y1i; Rmat12 += y2i * z1i; - + Rmat20 += z2i * x1i; Rmat21 += z2i * y1i; Rmat22 += z2i * z1i; @@ -387,9 +456,11 @@ //return(dgesvd_opt_dest(Rmat, 3, 3, Umat, sigma, VTmat)); - svdGSLDest(Rmat, 3, sigma, VTmat); + // GSL says Jacobi SVD is more accurate the Golub + svdGSLJacobiDest(Rmat, 3, sigma, VTmat); Mat3TransposeIp(VTmat); Mat3Cpy(Umat, (const double **) Rmat); + return(1); /* printf("\n\n **********************************************************************:"); */ @@ -455,7 +526,7 @@ /* Takes U and V^t on input, calculates R = VU^t */ static int CalcRotMat(double **rotmat, double **Umat, double **Vtmat) -{ +{ int i, j, k; double det; @@ -496,7 +567,7 @@ rmsd = sqrt(E/atom_num) */ double ProcGSLSVDvan(const Cds *cds1, const Cds *cds2, double **rotmat, - double **Rmat, double **Umat, double **VTmat, double *sigma) + double **Rmat, double **Umat, double **VTmat, double *sigma) { double det, sumdev, term1, term2; @@ -531,6 +602,46 @@ } +/* returns sum of squared residuals, E + rmsd = sqrt(E/atom_num) */ +double +ProcGSLSVDvan2(const double **cds1, const double **cds2, const int len, double **rotmat, + double **Rmat, double **Umat, double **VTmat, double *sigma, + double *norm1, double *norm2, double *innprod) +{ + double det; + + *norm1 = CalcInnProdNorm2(cds1, len); + *norm2 = CalcInnProdNorm2(cds2, len); + CalcRvan2(cds1, cds2, len, Rmat); + CalcGSLSVD(Rmat, Umat, sigma, VTmat); + det = CalcRotMat(rotmat, Umat, VTmat); + +// VerifyRotMat(rotmat, 1e-5); +/* printf("\n*************** sumdev = %8.2f ", sumdev); */ +/* printf("\nrotmat:"); */ +/* write_C_mat((const double **)rotmat, 3, 8, 0); */ + + if (det < 0) + *innprod = sigma[0] + sigma[1] - sigma[2]; + else + *innprod = sigma[0] + sigma[1] + sigma[2]; + +/* printf("\nRmat:"); */ +/* write_C_mat((const double **)Rmat, 3, 8, 0); */ +/* printf("\nUmat:"); */ +/* write_C_mat((const double **)Umat, 3, 8, 0); */ +/* printf("\nVTmat:"); */ +/* write_C_mat((const double **)VTmat, 3, 8, 0); */ +/* int i; */ +/* for (i = 0; i < 3; ++i) */ +/* printf("\nsigma[%d] = %8.2f ", i, sigma[i]); */ + + return(*norm1 + *norm2 - 2.0 * *innprod); +} + + + static void CalcRFrag(const FragCds *cds1, const FragCds *cds2, double **Rmat) { @@ -546,7 +657,6 @@ Rmat10, Rmat11, Rmat12, Rmat20, Rmat21, Rmat22; - /* printf("\n axes weights: %f %f %f ", axeswx, axeswy, axeswz); */ Rmat00 = Rmat01 = Rmat02 = Rmat10 = Rmat11 = Rmat12 = Rmat20 = Rmat21 = Rmat22 = 0.0; @@ -565,11 +675,11 @@ Rmat00 += x2i * x1i; Rmat01 += x2i * y1i; Rmat02 += x2i * z1i; - + Rmat10 += y2i * x1i; Rmat11 += y2i * y1i; Rmat12 += y2i * z1i; - + Rmat20 += z2i * x1i; Rmat21 += z2i * y1i; Rmat22 += z2i * z1i; @@ -589,7 +699,7 @@ static double CalcInnProdNormFrag(const FragCds *cds) -{ +{ int i; double sum; const double *x = (const double *) cds->x, @@ -616,7 +726,7 @@ rmsd = sqrt(E/atom_num) */ double ProcGSLSVDFrag(const FragCds *frag1, const FragCds *frag2, double **rotmat, - double **Rmat, double **Umat, double **VTmat, double *sigma) + double **Rmat, double **Umat, double **VTmat, double *sigma) { double det, sumdev, term1, term2; @@ -639,16 +749,16 @@ double ProcGSLSVD(const Cds *cds1, const Cds *cds2, double **rotmat, - const double *weights, const double *axesw, - double **Rmat, double **Umat, double **VTmat, double *sigma) + const double *weights, + double **Rmat, double **Umat, double **VTmat, double *sigma) { double det, sumdev, term1, term2; - term1 = CalcMahFrobInnProd(cds2, weights, axesw); + term1 = CalcMahFrobInnProd(cds2, weights); CalcR(cds1, cds2, Rmat, weights); CalcGSLSVD(Rmat, Umat, sigma, VTmat); det = CalcRotMat(rotmat, Umat, VTmat); - term2 = CalcMahFrobInnProdRot(cds1, (const double **) rotmat, weights, axesw); + term2 = CalcMahFrobInnProdRot(cds1, (const double **) rotmat, weights); sumdev = term1 + term2; /* VerifyRotMat(rotmat, 1e-5); */ @@ -679,7 +789,7 @@ rmsd = sqrt(E/atom_num) */ double ProcGSLSVDCov(Cds *cds1, Cds *cds2, double **rotmat, - const double **WtMat, const double *axesw, double **Rmat, + const double **WtMat, double **Rmat, double **Umat, double **VTmat, double *sigma) { double det, sumdev = 0.0; @@ -687,8 +797,8 @@ CalcCovCds(cds1, WtMat); CalcCovCds(cds2, WtMat); - sumdev = CalcE0Cov(cds1, cds2, axesw); - CalcRCov(cds1, cds2, Rmat, WtMat, axesw); + sumdev = CalcE0Cov(cds1, cds2); + CalcRCov(cds1, cds2, Rmat, WtMat); CalcGSLSVD(Rmat, Umat, sigma, VTmat); det = CalcRotMat(rotmat, Umat, VTmat); diff -Nru theseus-2.0.6/ProcGSLSVD.h theseus-3.0.0/ProcGSLSVD.h --- theseus-2.0.6/ProcGSLSVD.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/ProcGSLSVD.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -33,18 +33,21 @@ double **Rmat, double **Umat, double **VTmat, double *sigma); double +ProcGSLSVDvan2(const double **cds1, const double **cds2, const int len, double **rotmat, + double **Rmat, double **Umat, double **VTmat, double *sigma, + double *norm1, double *norm2, double *innprod); + +double ProcGSLSVDFrag(const FragCds *frag1, const FragCds *frag2, double **rotmat, double **Rmat, double **Umat, double **VTmat, double *sigma); double -ProcGSLSVD(const Cds *cds1, const Cds *cds2, double **rotmat, - const double *weights, const double *axesw, - double **Rmat, double **Umat, double **VTmat, double *sigma); +ProcGSLSVD(const Cds *cds1, const Cds *cds2, double **rotmat, const double *weights, + double **Rmat, double **Umat, double **VTmat, double *sigma); double -ProcGSLSVDCov(Cds *cds1, Cds *cds2, double **rotmat, - const double **covmat, const double *axesw, - double **Rmat, double **Umat, double **VTmat, - double *sigma); +ProcGSLSVDCov(Cds *cds1, Cds *cds2, double **rotmat, const double **covmat, + double **Rmat, double **Umat, double **VTmat, + double *sigma); #endif diff -Nru theseus-2.0.6/ProcGSLSVDNu.c theseus-3.0.0/ProcGSLSVDNu.c --- theseus-2.0.6/ProcGSLSVDNu.c 1970-01-01 00:00:00.000000000 +0000 +++ theseus-3.0.0/ProcGSLSVDNu.c 2014-05-13 16:48:52.000000000 +0000 @@ -0,0 +1,599 @@ +/* + Theseus - maximum likelihood superpositioning of macromolecular structures + + Copyright (C) 2004-2014 Douglas L. Theobald + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the: + + Free Software Foundation, Inc., + 59 Temple Place, Suite 330, + Boston, MA 02111-1307 USA + + -/_|:|_|_\- +*/ + +#include +#include +#include +#include +#include +#include "pdbUtils.h" +#include "pdbStats.h" +#include "CovMat.h" +#include "DLTmath.h" +#include "ProcGSLSVDNu.h" + + +static double +CalcInnProdNormNu(const Cds *cds) +{ + int i; + double sum; + const double *x = (const double *) cds->x, + *y = (const double *) cds->y, + *z = (const double *) cds->z; + const int *nu = (const int *) cds->nu; + double xi, yi, zi; + + sum = 0.0; + i = cds->vlen; + while(i-- > 0) + { + xi = *x++; + yi = *y++; + zi = *z++; + + sum += *nu++ * (xi * xi + yi * yi + zi * zi); + } + + return(sum); +} + + +static double +CalcInnProdNormNu2(const double **cds, const int *nu, const int len) +{ + int i; + double sum; + const double *x = (const double *) cds[0], + *y = (const double *) cds[1], + *z = (const double *) cds[2]; + double xi, yi, zi; + + sum = 0.0; + i = len; + while(i-- > 0) + { + xi = *x++; + yi = *y++; + zi = *z++; + + sum += *nu++ * (xi * xi + yi * yi + zi * zi); + } + + return(sum); +} + + +static double +CalcE0Nu(const Cds *cds1, const Cds *cds2, + const double *weights) +{ + int i; + double sum; + const double *x1 = (const double *) cds1->x, + *y1 = (const double *) cds1->y, + *z1 = (const double *) cds1->z; + const double *x2 = (const double *) cds2->x, + *y2 = (const double *) cds2->y, + *z2 = (const double *) cds2->z; + const int *nu1 = (const int *) cds1->nu, + *nu2 = (const int *) cds2->nu; + double x1i, y1i, z1i, x2i, y2i, z2i, weight; + + sum = 0.0; + i = cds1->vlen; + while(i-- > 0) + { + x1i = *x1++; + y1i = *y1++; + z1i = *z1++; + x2i = *x2++; + y2i = *y2++; + z2i = *z2++; + weight = *weights++ * *nu1++ * *nu2++; + sum += weight * + ((x1i * x1i + x2i * x2i) + + (y1i * y1i + y2i * y2i) + + (z1i * z1i + z2i * z2i)); + } + + return(sum); +} + + +static double +CalcE0CovNu(const Cds *cds1, const Cds *cds2) +{ + int i; + double sum; + const double *x2 = (const double *) cds2->x, + *y2 = (const double *) cds2->y, + *z2 = (const double *) cds2->z; + const double *x1 = (const double *) cds1->x, + *y1 = (const double *) cds1->y, + *z1 = (const double *) cds1->z; + const double *cx2 = (const double *) cds2->covx, + *cy2 = (const double *) cds2->covy, + *cz2 = (const double *) cds2->covz; + const double *cx1 = (const double *) cds1->covx, + *cy1 = (const double *) cds1->covy, + *cz1 = (const double *) cds1->covz; + const int *nu1 = (const int *) cds1->nu, + *nu2 = (const int *) cds2->nu; + double x1i, y1i, z1i, x2i, y2i, z2i, + cx1i, cy1i, cz1i, cx2i, cy2i, cz2i; + + sum = 0.0; + i = cds1->vlen; + while(i-- > 0) + { + x1i = *x1++; + y1i = *y1++; + z1i = *z1++; + x2i = *x2++; + y2i = *y2++; + z2i = *z2++; + + cx1i = *cx1++; + cy1i = *cy1++; + cz1i = *cz1++; + cx2i = *cx2++; + cy2i = *cy2++; + cz2i = *cz2++; + + sum += *nu1++ * *nu2++ * + ((cx1i * x1i + cx2i * x2i) + + (cy1i * y1i + cy2i * y2i) + + (cz1i * z1i + cz2i * z2i)); + } + + return(sum); +} + + +static void +CalcRNu(const Cds *cds1, const Cds *cds2, double **Rmat, + const double *weights) +{ + int i; + double weight; + const double *x2 = (const double *) cds2->x, + *y2 = (const double *) cds2->y, + *z2 = (const double *) cds2->z; + const double *x1 = (const double *) cds1->x, + *y1 = (const double *) cds1->y, + *z1 = (const double *) cds1->z; + const int *nu1 = (const int *) cds1->nu, + *nu2 = (const int *) cds2->nu; + double x2i, y2i, z2i, x1i, y1i, z1i; + double Rmat00, Rmat01, Rmat02, + Rmat10, Rmat11, Rmat12, + Rmat20, Rmat21, Rmat22; + + Rmat00 = Rmat01 = Rmat02 = Rmat10 = Rmat11 = Rmat12 = + Rmat20 = Rmat21 = Rmat22 = 0.0; + + i = cds1->vlen; + while(i-- > 0) + { + weight = *weights++ * *nu1++ * *nu2++; + + x1i = *x1++; + y1i = *y1++; + z1i = *z1++; + + x2i = weight * *x2++; + y2i = weight * *y2++; + z2i = weight * *z2++; + + Rmat00 += x2i * x1i; + Rmat01 += x2i * y1i; + Rmat02 += x2i * z1i; + + Rmat10 += y2i * x1i; + Rmat11 += y2i * y1i; + Rmat12 += y2i * z1i; + + Rmat20 += z2i * x1i; + Rmat21 += z2i * y1i; + Rmat22 += z2i * z1i; + } + + Rmat[0][0] = Rmat00; + Rmat[0][1] = Rmat01; + Rmat[0][2] = Rmat02; + Rmat[1][0] = Rmat10; + Rmat[1][1] = Rmat11; + Rmat[1][2] = Rmat12; + Rmat[2][0] = Rmat20; + Rmat[2][1] = Rmat21; + Rmat[2][2] = Rmat22; +} + + +static void +CalcRvanNu(const Cds *cds1, const Cds *cds2, double **Rmat) +{ + int i; + double weight; + const double *x2 = (const double *) cds2->x, + *y2 = (const double *) cds2->y, + *z2 = (const double *) cds2->z; + const double *x1 = (const double *) cds1->x, + *y1 = (const double *) cds1->y, + *z1 = (const double *) cds1->z; + const int *nu1 = (const int *) cds1->nu, + *nu2 = (const int *) cds2->nu; + double x2i, y2i, z2i, x1i, y1i, z1i; + double Rmat00, Rmat01, Rmat02, + Rmat10, Rmat11, Rmat12, + Rmat20, Rmat21, Rmat22; + + Rmat00 = Rmat01 = Rmat02 = Rmat10 = Rmat11 = Rmat12 = + Rmat20 = Rmat21 = Rmat22 = 0.0; + + i = cds1->vlen; + while(i-- > 0) + { + weight = *nu1++ * *nu2++; + + x1i = *x1++; + y1i = *y1++; + z1i = *z1++; + + x2i = weight * *x2++; + y2i = weight * *y2++; + z2i = weight * *z2++; + + Rmat00 += x2i * x1i; + Rmat01 += x2i * y1i; + Rmat02 += x2i * z1i; + + Rmat10 += y2i * x1i; + Rmat11 += y2i * y1i; + Rmat12 += y2i * z1i; + + Rmat20 += z2i * x1i; + Rmat21 += z2i * y1i; + Rmat22 += z2i * z1i; + } + + Rmat[0][0] = Rmat00; + Rmat[0][1] = Rmat01; + Rmat[0][2] = Rmat02; + Rmat[1][0] = Rmat10; + Rmat[1][1] = Rmat11; + Rmat[1][2] = Rmat12; + Rmat[2][0] = Rmat20; + Rmat[2][1] = Rmat21; + Rmat[2][2] = Rmat22; +} + + +static void +CalcRvanNu2(const double **cds1, const double **cds2, const int *nu, const int len, double **Rmat) +{ + int i; + double weight; + const double *x1 = cds1[0], + *y1 = cds1[1], + *z1 = cds1[2]; + const double *x2 = cds2[0], + *y2 = cds2[1], + *z2 = cds2[2]; + double x2i, y2i, z2i, x1i, y1i, z1i; + double Rmat00, Rmat01, Rmat02, + Rmat10, Rmat11, Rmat12, + Rmat20, Rmat21, Rmat22; + + Rmat00 = Rmat01 = Rmat02 = + Rmat10 = Rmat11 = Rmat12 = + Rmat20 = Rmat21 = Rmat22 = 0.0; + + i = len; + while(i-- > 0) + { + //weight = *o1++ * *o2++; + weight = *nu++; + + x1i = *x1++; + y1i = *y1++; + z1i = *z1++; + + x2i = weight * *x2++; + y2i = weight * *y2++; + z2i = weight * *z2++; + + Rmat00 += x2i * x1i; + Rmat01 += x2i * y1i; + Rmat02 += x2i * z1i; + + Rmat10 += y2i * x1i; + Rmat11 += y2i * y1i; + Rmat12 += y2i * z1i; + + Rmat20 += z2i * x1i; + Rmat21 += z2i * y1i; + Rmat22 += z2i * z1i; + } + + Rmat[0][0] = Rmat00; + Rmat[0][1] = Rmat01; + Rmat[0][2] = Rmat02; + Rmat[1][0] = Rmat10; + Rmat[1][1] = Rmat11; + Rmat[1][2] = Rmat12; + Rmat[2][0] = Rmat20; + Rmat[2][1] = Rmat21; + Rmat[2][2] = Rmat22; +} + + +static void +CalcRCovNu(const Cds *cds1, const Cds *cds2, double **Rmat, + const double **WtMat) +{ + int i; + double weight; + const double *x2 = (const double *) cds2->covx, + *y2 = (const double *) cds2->covy, + *z2 = (const double *) cds2->covz; + const double *x1 = (const double *) cds1->x, + *y1 = (const double *) cds1->y, + *z1 = (const double *) cds1->z; + const int *nu1 = (const int *) cds1->nu, + *nu2 = (const int *) cds2->nu; + double x2i, y2i, z2i, x1i, y1i, z1i; + double Rmat00, Rmat01, Rmat02, + Rmat10, Rmat11, Rmat12, + Rmat20, Rmat21, Rmat22; + + Rmat00 = Rmat01 = Rmat02 = Rmat10 = Rmat11 = Rmat12 = + Rmat20 = Rmat21 = Rmat22 = 0.0; + + i = cds1->vlen; + while(i-- > 0) + { + weight = *nu1++ * *nu2++; + + x1i = *x1++; + y1i = *y1++; + z1i = *z1++; + + x2i = weight * *x2++; + y2i = weight * *y2++; + z2i = weight * *z2++; + + Rmat00 += x2i * x1i; + Rmat01 += x2i * y1i; + Rmat02 += x2i * z1i; + + Rmat10 += y2i * x1i; + Rmat11 += y2i * y1i; + Rmat12 += y2i * z1i; + + Rmat20 += weight * z2i * x1i; + Rmat21 += weight * z2i * y1i; + Rmat22 += weight * z2i * z1i; + } + + Rmat[0][0] = Rmat00; + Rmat[0][1] = Rmat01; + Rmat[0][2] = Rmat02; + Rmat[1][0] = Rmat10; + Rmat[1][1] = Rmat11; + Rmat[1][2] = Rmat12; + Rmat[2][0] = Rmat20; + Rmat[2][1] = Rmat21; + Rmat[2][2] = Rmat22; +} + + +static void +CalcGSLSVD(double **Rmat, double **Umat, double *sigma, double **VTmat) +{ + svdGSLDest(Rmat, 3, sigma, VTmat); + Mat3TransposeIp(VTmat); + Mat3Cpy(Umat, (const double **) Rmat); +} + + +/* Takes U and V^t on input, calculates R = VU^t */ +static int +CalcRotMat(double **rotmat, double **Umat, double **Vtmat) +{ + int i, j, k; + double det; + + memset(&rotmat[0][0], 0, 9 * sizeof(double)); + + det = Mat3Det((const double **)Umat) * Mat3Det((const double **)Vtmat); + + if (det > 0) + { + for (i = 0; i < 3; ++i) + for (j = 0; j < 3; ++j) + for (k = 0; k < 3; ++k) + rotmat[i][j] += (Vtmat[k][i] * Umat[j][k]); + + return(1); + } + else + { + for (i = 0; i < 3; ++i) + { + for (j = 0; j < 3; ++j) + { + for (k = 0; k < 2; ++k) + rotmat[i][j] += (Vtmat[k][i] * Umat[j][k]); + + rotmat[i][j] -= (Vtmat[2][i] * Umat[j][2]); + } + } + + return(-1); + } +} + + +/* returns sum of squared residuals, E + rmsd = sqrt(E/atom_num) */ +double +ProcGSLSVDvanNu(const Cds *cds1, const Cds *cds2, double **rotmat, + double **Rmat, double **Umat, double **VTmat, double *sigma, + double *norm1, double *norm2, double *innprod) +{ + double det; + + *norm1 = CalcInnProdNormNu(cds2); + *norm2 = CalcInnProdNormNu(cds1); + CalcRvanNu(cds1, cds2, Rmat); + CalcGSLSVD(Rmat, Umat, sigma, VTmat); + det = CalcRotMat(rotmat, Umat, VTmat); + +/* VerifyRotMat(rotmat, 1e-5); */ +/* printf("\n*************** sumdev = %8.2f ", sumdev); */ +/* printf("\nrotmat:"); */ +/* write_C_mat((const double **)rotmat, 3, 8, 0); */ + + if (det < 0) + *innprod = sigma[0] + sigma[1] - sigma[2]; + else + *innprod = sigma[0] + sigma[1] + sigma[2]; + +/* printf("\nRmat:"); */ +/* write_C_mat((const double **)Rmat, 3, 8, 0); */ +/* printf("\nUmat:"); */ +/* write_C_mat((const double **)Umat, 3, 8, 0); */ +/* printf("\nVTmat:"); */ +/* write_C_mat((const double **)VTmat, 3, 8, 0); */ +/* int i; */ +/* for (i = 0; i < 3; ++i) */ +/* printf("\nsigma[%d] = %8.2f ", i, sigma[i]); */ + + return(*norm1 + *norm2 - 2.0 * *innprod); +} + + +/* returns sum of squared residuals, E + rmsd = sqrt(E/atom_num) */ +double +ProcGSLSVDvanNu2(const double **cds1, const double **cds2, const int *nu, + const int len, double **rotmat, + double **Rmat, double **Umat, double **VTmat, double *sigma, + double *norm1, double *norm2, double *innprod) +{ + double det; + + *norm1 = CalcInnProdNormNu2(cds1, nu, len); + *norm2 = CalcInnProdNormNu2(cds2, nu, len); + CalcRvanNu2(cds1, cds2, nu, len, Rmat); + CalcGSLSVD(Rmat, Umat, sigma, VTmat); + det = CalcRotMat(rotmat, Umat, VTmat); + +/* VerifyRotMat(rotmat, 1e-5); */ +/* printf("\n*************** sumdev = %8.2f ", sumdev); */ +/* printf("\nrotmat:"); */ +/* write_C_mat((const double **)rotmat, 3, 8, 0); */ + + if (det < 0) + *innprod = sigma[0] + sigma[1] - sigma[2]; + else + *innprod = sigma[0] + sigma[1] + sigma[2]; + +/* printf("\nRmat:"); */ +/* write_C_mat((const double **)Rmat, 3, 8, 0); */ +/* printf("\nUmat:"); */ +/* write_C_mat((const double **)Umat, 3, 8, 0); */ +/* printf("\nVTmat:"); */ +/* write_C_mat((const double **)VTmat, 3, 8, 0); */ +/* int i; */ +/* for (i = 0; i < 3; ++i) */ +/* printf("\nsigma[%d] = %8.2f ", i, sigma[i]); */ + + return(*norm1 + *norm2 - 2.0 * *innprod); +} + + +double +ProcGSLSVDNu(const Cds *cds1, const Cds *cds2, double **rotmat, + const double *weights, + double **Rmat, double **Umat, double **VTmat, double *sigma) +{ + double det, sumdev; + + sumdev = CalcE0Nu(cds1, cds2, weights); + /* printf("\n # sumdev = %8.2f ", sumdev); */ + CalcRNu(cds1, cds2, Rmat, weights); + CalcGSLSVD(Rmat, Umat, sigma, VTmat); + det = CalcRotMat(rotmat, Umat, VTmat); + +/* VerifyRotMat(rotmat, 1e-5); */ +/* printf("\n\n rotmat:"); */ +/* write_C_mat((const double **)rotmat, 3, 8, 0); */ + + if (det < 0) + sumdev -= 2.0 * (sigma[0] + sigma[1] - sigma[2]); + else + sumdev -= 2.0 * (sigma[0] + sigma[1] + sigma[2]); + +/* printf("\n\n Rmat:"); */ +/* write_C_mat((const double **)Rmat, 3, 8, 0); */ +/* printf("\n\n Umat:"); */ +/* write_C_mat((const double **)Umat, 3, 8, 0); */ +/* printf("\n\n VTmat:"); */ +/* write_C_mat((const double **)VTmat, 3, 8, 0); */ +/* int i; */ +/* for (i = 0; i < 3; ++i) */ +/* printf("\n sigma[%d] = %8.2f ", i, sigma[i]); */ + + return(sumdev); +} + + +/* returns sum of squared residuals, E + rmsd = sqrt(E/atom_num) */ +double +ProcGSLSVDCovNu(Cds *cds1, Cds *cds2, double **rotmat, + const double **WtMat, double **Rmat, + double **Umat, double **VTmat, double *sigma) +{ + double det, sumdev = 0.0; + + CalcCovCds(cds1, WtMat); + CalcCovCds(cds2, WtMat); + + sumdev = CalcE0CovNu(cds1, cds2); + CalcRCovNu(cds1, cds2, Rmat, WtMat); + CalcGSLSVD(Rmat, Umat, sigma, VTmat); + det = CalcRotMat(rotmat, Umat, VTmat); + + if (det < 0) + sumdev -= 2.0 * (sigma[0] + sigma[1] - sigma[2]); + else + sumdev -= 2.0 * (sigma[0] + sigma[1] + sigma[2]); + + return(sumdev); +} diff -Nru theseus-2.0.6/ProcGSLSVDNu.h theseus-3.0.0/ProcGSLSVDNu.h --- theseus-2.0.6/ProcGSLSVDNu.h 1970-01-01 00:00:00.000000000 +0000 +++ theseus-3.0.0/ProcGSLSVDNu.h 2014-05-13 16:48:52.000000000 +0000 @@ -0,0 +1,51 @@ +/* + Theseus - maximum likelihood superpositioning of macromolecular structures + + Copyright (C) 2004-2014 Douglas L. Theobald + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the: + + Free Software Foundation, Inc., + 59 Temple Place, Suite 330, + Boston, MA 02111-1307 USA + + -/_|:|_|_\- +*/ + +#ifndef PROCGSLSVDOCC_SEEN +#define PROCGSLSVDOCC_SEEN + +double +ProcGSLSVDvanNu(const Cds *cds1, const Cds *cds2, double **rotmat, + double **Rmat, double **Umat, double **VTmat, double *sigma, + double *norm1, double *norm2, double *innprod); + +double +ProcGSLSVDNu(const Cds *cds1, const Cds *cds2, double **rotmat, + const double *weights, + double **Rmat, double **Umat, double **VTmat, double *sigma); + +double +ProcGSLSVDCovNu(Cds *cds1, Cds *cds2, double **rotmat, + const double **covmat, + double **Rmat, double **Umat, double **VTmat, + double *sigma); + +double +ProcGSLSVDvanNu2(const double **cds1, const double **cds2, const int *nu, + const int len, double **rotmat, + double **Rmat, double **Umat, double **VTmat, double *sigma, + double *norm1, double *norm2, double *innprod); + +#endif diff -Nru theseus-2.0.6/ProcGSLSVDOcc.c theseus-3.0.0/ProcGSLSVDOcc.c --- theseus-2.0.6/ProcGSLSVDOcc.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/ProcGSLSVDOcc.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,477 +0,0 @@ -/* - Theseus - maximum likelihood superpositioning of macromolecular structures - - Copyright (C) 2004-2013 Douglas L. Theobald - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the: - - Free Software Foundation, Inc., - 59 Temple Place, Suite 330, - Boston, MA 02111-1307 USA - - -/_|:|_|_\- -*/ - -#include -#include -#include -#include -#include -#include "pdbUtils.h" -#include "pdbStats.h" -#include "CovMat.h" -#include "DLTmath.h" -#include "ProcGSLSVDOcc.h" - - -static double -CalcInnProdNormOcc(const Cds *cds) -{ - int i; - double sum; - const double *x = (const double *) cds->x, - *y = (const double *) cds->y, - *z = (const double *) cds->z; - const double *o = (const double *) cds->o; - double xi, yi, zi; - - sum = 0.0; - i = cds->vlen; - while(i-- > 0) - { - xi = *x++; - yi = *y++; - zi = *z++; - - sum += *o++ * (xi * xi + yi * yi + zi * zi); - } - - return(sum); -} - - -static double -CalcE0Occ(const Cds *cds1, const Cds *cds2, - const double *weights) -{ - int i; - double sum; - const double *x1 = (const double *) cds1->x, - *y1 = (const double *) cds1->y, - *z1 = (const double *) cds1->z; - const double *x2 = (const double *) cds2->x, - *y2 = (const double *) cds2->y, - *z2 = (const double *) cds2->z; - const double *o1 = (const double *) cds1->o, - *o2 = (const double *) cds2->o; - double x1i, y1i, z1i, x2i, y2i, z2i, weight; - - sum = 0.0; - i = cds1->vlen; - while(i-- > 0) - { - x1i = *x1++; - y1i = *y1++; - z1i = *z1++; - x2i = *x2++; - y2i = *y2++; - z2i = *z2++; - weight = *weights++ * *o1++ * *o2++; - sum += weight * - ((x1i * x1i + x2i * x2i) + - (y1i * y1i + y2i * y2i) + - (z1i * z1i + z2i * z2i)); - } - - return(sum); -} - - -static double -CalcE0CovOcc(const Cds *cds1, const Cds *cds2) -{ - int i; - double sum; - const double *x2 = (const double *) cds2->x, - *y2 = (const double *) cds2->y, - *z2 = (const double *) cds2->z; - const double *x1 = (const double *) cds1->x, - *y1 = (const double *) cds1->y, - *z1 = (const double *) cds1->z; - const double *cx2 = (const double *) cds2->covx, - *cy2 = (const double *) cds2->covy, - *cz2 = (const double *) cds2->covz; - const double *cx1 = (const double *) cds1->covx, - *cy1 = (const double *) cds1->covy, - *cz1 = (const double *) cds1->covz; - const double *o1 = (const double *) cds1->o, - *o2 = (const double *) cds2->o; - double x1i, y1i, z1i, x2i, y2i, z2i, - cx1i, cy1i, cz1i, cx2i, cy2i, cz2i; - - sum = 0.0; - i = cds1->vlen; - while(i-- > 0) - { - x1i = *x1++; - y1i = *y1++; - z1i = *z1++; - x2i = *x2++; - y2i = *y2++; - z2i = *z2++; - - cx1i = *cx1++; - cy1i = *cy1++; - cz1i = *cz1++; - cx2i = *cx2++; - cy2i = *cy2++; - cz2i = *cz2++; - - sum += *o1++ * *o2++ * - ((cx1i * x1i + cx2i * x2i) + - (cy1i * y1i + cy2i * y2i) + - (cz1i * z1i + cz2i * z2i)); - } - - return(sum); -} - - -static void -CalcROcc(const Cds *cds1, const Cds *cds2, double **Rmat, - const double *weights) -{ - int i; - double weight; - const double *x2 = (const double *) cds2->x, - *y2 = (const double *) cds2->y, - *z2 = (const double *) cds2->z; - const double *x1 = (const double *) cds1->x, - *y1 = (const double *) cds1->y, - *z1 = (const double *) cds1->z; - const double *o1 = (const double *) cds1->o, - *o2 = (const double *) cds2->o; - double x2i, y2i, z2i, x1i, y1i, z1i; - double Rmat00, Rmat01, Rmat02, - Rmat10, Rmat11, Rmat12, - Rmat20, Rmat21, Rmat22; - - Rmat00 = Rmat01 = Rmat02 = Rmat10 = Rmat11 = Rmat12 = - Rmat20 = Rmat21 = Rmat22 = 0.0; - - i = cds1->vlen; - while(i-- > 0) - { - weight = *weights++ * *o1++ * *o2++; - - x1i = *x1++; - y1i = *y1++; - z1i = *z1++; - - x2i = weight * *x2++; - y2i = weight * *y2++; - z2i = weight * *z2++; - - Rmat00 += x2i * x1i; - Rmat01 += x2i * y1i; - Rmat02 += x2i * z1i; - - Rmat10 += y2i * x1i; - Rmat11 += y2i * y1i; - Rmat12 += y2i * z1i; - - Rmat20 += z2i * x1i; - Rmat21 += z2i * y1i; - Rmat22 += z2i * z1i; - } - - Rmat[0][0] = Rmat00; - Rmat[0][1] = Rmat01; - Rmat[0][2] = Rmat02; - Rmat[1][0] = Rmat10; - Rmat[1][1] = Rmat11; - Rmat[1][2] = Rmat12; - Rmat[2][0] = Rmat20; - Rmat[2][1] = Rmat21; - Rmat[2][2] = Rmat22; -} - - -static void -CalcRvanOcc(const Cds *cds1, const Cds *cds2, double **Rmat) -{ - int i; - double weight; - const double *x2 = (const double *) cds2->x, - *y2 = (const double *) cds2->y, - *z2 = (const double *) cds2->z; - const double *x1 = (const double *) cds1->x, - *y1 = (const double *) cds1->y, - *z1 = (const double *) cds1->z; - const double *o1 = (const double *) cds1->o, - *o2 = (const double *) cds2->o; - double x2i, y2i, z2i, x1i, y1i, z1i; - double Rmat00, Rmat01, Rmat02, - Rmat10, Rmat11, Rmat12, - Rmat20, Rmat21, Rmat22; - - Rmat00 = Rmat01 = Rmat02 = Rmat10 = Rmat11 = Rmat12 = - Rmat20 = Rmat21 = Rmat22 = 0.0; - - i = cds1->vlen; - while(i-- > 0) - { - weight = *o1++ * *o2++; - - x1i = *x1++; - y1i = *y1++; - z1i = *z1++; - - x2i = weight * *x2++; - y2i = weight * *y2++; - z2i = weight * *z2++; - - Rmat00 += x2i * x1i; - Rmat01 += x2i * y1i; - Rmat02 += x2i * z1i; - - Rmat10 += y2i * x1i; - Rmat11 += y2i * y1i; - Rmat12 += y2i * z1i; - - Rmat20 += z2i * x1i; - Rmat21 += z2i * y1i; - Rmat22 += z2i * z1i; - } - - Rmat[0][0] = Rmat00; - Rmat[0][1] = Rmat01; - Rmat[0][2] = Rmat02; - Rmat[1][0] = Rmat10; - Rmat[1][1] = Rmat11; - Rmat[1][2] = Rmat12; - Rmat[2][0] = Rmat20; - Rmat[2][1] = Rmat21; - Rmat[2][2] = Rmat22; -} - - -static void -CalcRCovOcc(const Cds *cds1, const Cds *cds2, double **Rmat, - const double **WtMat) -{ - int i; - double weight; - const double *x2 = (const double *) cds2->covx, - *y2 = (const double *) cds2->covy, - *z2 = (const double *) cds2->covz; - const double *x1 = (const double *) cds1->x, - *y1 = (const double *) cds1->y, - *z1 = (const double *) cds1->z; - const double *o1 = (const double *) cds1->o, - *o2 = (const double *) cds2->o; - double x2i, y2i, z2i, x1i, y1i, z1i; - double Rmat00, Rmat01, Rmat02, - Rmat10, Rmat11, Rmat12, - Rmat20, Rmat21, Rmat22; - - Rmat00 = Rmat01 = Rmat02 = Rmat10 = Rmat11 = Rmat12 = - Rmat20 = Rmat21 = Rmat22 = 0.0; - - i = cds1->vlen; - while(i-- > 0) - { - weight = *o1++ * *o2++; - - x1i = *x1++; - y1i = *y1++; - z1i = *z1++; - - x2i = weight * *x2++; - y2i = weight * *y2++; - z2i = weight * *z2++; - - Rmat00 += x2i * x1i; - Rmat01 += x2i * y1i; - Rmat02 += x2i * z1i; - - Rmat10 += y2i * x1i; - Rmat11 += y2i * y1i; - Rmat12 += y2i * z1i; - - Rmat20 += weight * z2i * x1i; - Rmat21 += weight * z2i * y1i; - Rmat22 += weight * z2i * z1i; - } - - Rmat[0][0] = Rmat00; - Rmat[0][1] = Rmat01; - Rmat[0][2] = Rmat02; - Rmat[1][0] = Rmat10; - Rmat[1][1] = Rmat11; - Rmat[1][2] = Rmat12; - Rmat[2][0] = Rmat20; - Rmat[2][1] = Rmat21; - Rmat[2][2] = Rmat22; -} - - -static int -CalcGSLSVD(double **Rmat, double **Umat, double *sigma, double **VTmat) -{ - svdGSLDest(Rmat, 3, sigma, VTmat); - Mat3TransposeIp(VTmat); - Mat3Cpy(Umat, (const double **) Rmat); - return(1); -} - - -/* Takes U and V^t on input, calculates R = VU^t */ -static int -CalcRotMat(double **rotmat, double **Umat, double **Vtmat) -{ - int i, j, k; - double det; - - memset(&rotmat[0][0], 0, 9 * sizeof(double)); - - det = Mat3Det((const double **)Umat) * Mat3Det((const double **)Vtmat); - - if (det > 0) - { - for (i = 0; i < 3; ++i) - for (j = 0; j < 3; ++j) - for (k = 0; k < 3; ++k) - rotmat[i][j] += (Vtmat[k][i] * Umat[j][k]); - - return(1); - } - else - { - for (i = 0; i < 3; ++i) - { - for (j = 0; j < 3; ++j) - { - for (k = 0; k < 2; ++k) - rotmat[i][j] += (Vtmat[k][i] * Umat[j][k]); - - rotmat[i][j] -= (Vtmat[2][i] * Umat[j][2]); - } - } - - return(-1); - } -} - - -/* returns sum of squared residuals, E - rmsd = sqrt(E/atom_num) */ -double -ProcGSLSVDvanOcc(const Cds *cds1, const Cds *cds2, double **rotmat, - double **Rmat, double **Umat, double **VTmat, double *sigma, - double *norm1, double *norm2, double *innprod) -{ - double det; - - *norm1 = CalcInnProdNormOcc(cds2); - *norm2 = CalcInnProdNormOcc(cds1); - CalcRvanOcc(cds1, cds2, Rmat); - CalcGSLSVD(Rmat, Umat, sigma, VTmat); - det = CalcRotMat(rotmat, Umat, VTmat); - -/* VerifyRotMat(rotmat, 1e-5); */ -/* printf("\n*************** sumdev = %8.2f ", sumdev); */ -/* printf("\nrotmat:"); */ -/* write_C_mat((const double **)rotmat, 3, 8, 0); */ - - if (det < 0) - *innprod = sigma[0] + sigma[1] - sigma[2]; - else - *innprod = sigma[0] + sigma[1] + sigma[2]; - -/* printf("\nRmat:"); */ -/* write_C_mat((const double **)Rmat, 3, 8, 0); */ -/* printf("\nUmat:"); */ -/* write_C_mat((const double **)Umat, 3, 8, 0); */ -/* printf("\nVTmat:"); */ -/* write_C_mat((const double **)VTmat, 3, 8, 0); */ -/* int i; */ -/* for (i = 0; i < 3; ++i) */ -/* printf("\nsigma[%d] = %8.2f ", i, sigma[i]); */ - - return(*norm1 + *norm2 - 2.0 * *innprod); -} - - - -double -ProcGSLSVDOcc(const Cds *cds1, const Cds *cds2, double **rotmat, - const double *weights, - double **Rmat, double **Umat, double **VTmat, double *sigma) -{ - double det, sumdev; - - sumdev = CalcE0Occ(cds1, cds2, weights); - /* printf("\n # sumdev = %8.2f ", sumdev); */ - CalcROcc(cds1, cds2, Rmat, weights); - CalcGSLSVD(Rmat, Umat, sigma, VTmat); - det = CalcRotMat(rotmat, Umat, VTmat); - -/* VerifyRotMat(rotmat, 1e-5); */ -/* printf("\n\n rotmat:"); */ -/* write_C_mat((const double **)rotmat, 3, 8, 0); */ - - if (det < 0) - sumdev -= 2.0 * (sigma[0] + sigma[1] - sigma[2]); - else - sumdev -= 2.0 * (sigma[0] + sigma[1] + sigma[2]); - -/* printf("\n\n Rmat:"); */ -/* write_C_mat((const double **)Rmat, 3, 8, 0); */ -/* printf("\n\n Umat:"); */ -/* write_C_mat((const double **)Umat, 3, 8, 0); */ -/* printf("\n\n VTmat:"); */ -/* write_C_mat((const double **)VTmat, 3, 8, 0); */ -/* int i; */ -/* for (i = 0; i < 3; ++i) */ -/* printf("\n sigma[%d] = %8.2f ", i, sigma[i]); */ - - return(sumdev); -} - - -/* returns sum of squared residuals, E - rmsd = sqrt(E/atom_num) */ -double -ProcGSLSVDCovOcc(Cds *cds1, Cds *cds2, double **rotmat, - const double **WtMat, double **Rmat, - double **Umat, double **VTmat, double *sigma) -{ - double det, sumdev = 0.0; - - CalcCovCds(cds1, WtMat); - CalcCovCds(cds2, WtMat); - - sumdev = CalcE0CovOcc(cds1, cds2); - CalcRCovOcc(cds1, cds2, Rmat, WtMat); - CalcGSLSVD(Rmat, Umat, sigma, VTmat); - det = CalcRotMat(rotmat, Umat, VTmat); - - if (det < 0) - sumdev -= 2.0 * (sigma[0] + sigma[1] - sigma[2]); - else - sumdev -= 2.0 * (sigma[0] + sigma[1] + sigma[2]); - - return(sumdev); -} diff -Nru theseus-2.0.6/ProcGSLSVDOcc.h theseus-3.0.0/ProcGSLSVDOcc.h --- theseus-2.0.6/ProcGSLSVDOcc.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/ProcGSLSVDOcc.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,45 +0,0 @@ -/* - Theseus - maximum likelihood superpositioning of macromolecular structures - - Copyright (C) 2004-2013 Douglas L. Theobald - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the: - - Free Software Foundation, Inc., - 59 Temple Place, Suite 330, - Boston, MA 02111-1307 USA - - -/_|:|_|_\- -*/ - -#ifndef PROCGSLSVDOCC_SEEN -#define PROCGSLSVDOCC_SEEN - -double -ProcGSLSVDvanOcc(const Cds *cds1, const Cds *cds2, double **rotmat, - double **Rmat, double **Umat, double **VTmat, double *sigma, - double *norm1, double *norm2, double *innprod); - -double -ProcGSLSVDOcc(const Cds *cds1, const Cds *cds2, double **rotmat, - const double *weights, - double **Rmat, double **Umat, double **VTmat, double *sigma); - -double -ProcGSLSVDCovOcc(Cds *cds1, Cds *cds2, double **rotmat, - const double **covmat, - double **Rmat, double **Umat, double **VTmat, - double *sigma); - -#endif diff -Nru theseus-2.0.6/ProcJacobiSVD.c theseus-3.0.0/ProcJacobiSVD.c --- theseus-2.0.6/ProcJacobiSVD.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/ProcJacobiSVD.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -35,20 +35,16 @@ #include "ProcJacobiSVD.h" static double -CalcE0(const Cds *cds1, const Cds *cds2, - const double *weights, const double *axesw); +CalcE0(const Cds *cds1, const Cds *cds2, const double *weights); static double -CalcE0Cov(const Cds *cds1, const Cds *cds2, - const double *axesw); +CalcE0Cov(const Cds *cds1, const Cds *cds2); static void -CalcR(const Cds *cds1, const Cds *cds2, double **Rmat, - const double *weights, const double *axesw); +CalcR(const Cds *cds1, const Cds *cds2, double **Rmat, const double *weights); static void -CalcRCov(const Cds *cds1, const Cds *cds2, double **Rmat, - const double **covmat, const double *axesw); +CalcRCov(const Cds *cds1, const Cds *cds2, double **Rmat, const double **covmat); static int CalcJacobiSVD(double **a, double **U, double *z, double **V, double tol); @@ -57,9 +53,8 @@ CalcRotMat(double **rotmat, double **Umat, double **VTmat); static double -CalcE0(const Cds *cds1, const Cds *cds2, - const double *weights, const double *axesw) -{ +CalcE0(const Cds *cds1, const Cds *cds2, const double *weights) +{ int i; double sum; const double *x1 = (const double *) cds1->x, @@ -69,7 +64,6 @@ *y2 = (const double *) cds2->y, *z2 = (const double *) cds2->z; double x1i, y1i, z1i, x2i, y2i, z2i; - const double axeswx = axesw[0], axeswy = axesw[1], axeswz = axesw[2]; /* #include "pdbIO.h" */ /* PrintCds((Cds *)cds1); */ /* PrintCds((Cds *)cds2); */ @@ -85,9 +79,9 @@ y2i = *y2++; z2i = *z2++; sum += *weights++ * - (axeswx * (x1i * x1i + x2i * x2i) + - axeswy * (y1i * y1i + y2i * y2i) + - axeswz * (z1i * z1i + z2i * z2i)); + ((x1i * x1i + x2i * x2i) + + (y1i * y1i + y2i * y2i) + + (z1i * z1i + z2i * z2i)); /* printf("\nsum = %d %f %f", i, weight, sum); */ } /* exit(0); */ @@ -96,9 +90,8 @@ static double -CalcE0Cov(const Cds *cds1, const Cds *cds2, - const double *axesw) -{ +CalcE0Cov(const Cds *cds1, const Cds *cds2) +{ int i; double sum; const double *x2 = (const double *) cds2->x, @@ -115,7 +108,7 @@ *cz1 = (const double *) cds1->covz; double x1i, y1i, z1i, x2i, y2i, z2i, cx1i, cy1i, cz1i, cx2i, cy2i, cz2i; - const double axeswx = axesw[0], axeswy = axesw[1], axeswz = axesw[2]; + sum = 0.0; i = cds1->vlen; @@ -135,9 +128,9 @@ cy2i = *cy2++; cz2i = *cz2++; - sum += (axeswx * (cx1i * x1i + cx2i * x2i) + - axeswy * (cy1i * y1i + cy2i * y2i) + - axeswz * (cz1i * z1i + cz2i * z2i)); + sum += ((cx1i * x1i + cx2i * x2i) + + (cy1i * y1i + cy2i * y2i) + + (cz1i * z1i + cz2i * z2i)); } return(sum); @@ -147,8 +140,7 @@ /* This function assumes that the coordinates have been centered previously Use CenMass() and ApplyCenter() */ static void -CalcR(const Cds *cds1, const Cds *cds2, double **Rmat, - const double *weights, const double *axesw) +CalcR(const Cds *cds1, const Cds *cds2, double **Rmat, const double *weights) { int i; double weight; @@ -158,9 +150,6 @@ const double *x1 = (const double *) cds1->x, *y1 = (const double *) cds1->y, *z1 = (const double *) cds1->z; - const double axeswx = sqrt(axesw[0]), - axeswy = sqrt(axesw[1]), - axeswz = sqrt(axesw[2]); double x2i, y2i, z2i, x1i, y1i, z1i; double Rmat00, Rmat01, Rmat02, Rmat10, Rmat11, Rmat12, @@ -169,29 +158,27 @@ Rmat00 = Rmat01 = Rmat02 = Rmat10 = Rmat11 = Rmat12 = Rmat20 = Rmat21 = Rmat22 = 0.0; - /* printf("\n axes weights: %f %f %f ", axeswx, axeswy, axeswz); */ - i = cds1->vlen; while(i-- > 0) { weight = *weights++; - x2i = axeswx * *x2++; - y2i = axeswy * *y2++; - z2i = axeswz * *z2++; - - x1i = axeswx * *x1++; - y1i = axeswy * *y1++; - z1i = axeswz * *z1++; + x2i = *x2++; + y2i = *y2++; + z2i = *z2++; + + x1i = *x1++; + y1i = *y1++; + z1i = *z1++; Rmat00 += weight * x2i * x1i; Rmat01 += weight * x2i * y1i; Rmat02 += weight * x2i * z1i; - + Rmat10 += weight * y2i * x1i; Rmat11 += weight * y2i * y1i; Rmat12 += weight * y2i * z1i; - + Rmat20 += weight * z2i * x1i; Rmat21 += weight * z2i * y1i; Rmat22 += weight * z2i * z1i; @@ -224,7 +211,6 @@ Rmat10, Rmat11, Rmat12, Rmat20, Rmat21, Rmat22; - /* printf("\n axes weights: %f %f %f ", axeswx, axeswy, axeswz); */ Rmat00 = Rmat01 = Rmat02 = Rmat10 = Rmat11 = Rmat12 = Rmat20 = Rmat21 = Rmat22 = 0.0; @@ -243,11 +229,11 @@ Rmat00 += x2i * x1i; Rmat01 += x2i * y1i; Rmat02 += x2i * z1i; - + Rmat10 += y2i * x1i; Rmat11 += y2i * y1i; Rmat12 += y2i * z1i; - + Rmat20 += z2i * x1i; Rmat21 += z2i * y1i; Rmat22 += z2i * z1i; @@ -280,7 +266,6 @@ Rmat10, Rmat11, Rmat12, Rmat20, Rmat21, Rmat22; - /* printf("\n axes weights: %f %f %f ", axeswx, axeswy, axeswz); */ Rmat00 = Rmat01 = Rmat02 = Rmat10 = Rmat11 = Rmat12 = Rmat20 = Rmat21 = Rmat22 = 0.0; @@ -299,11 +284,11 @@ Rmat00 += x2i * x1i; Rmat01 += x2i * y1i; Rmat02 += x2i * z1i; - + Rmat10 += y2i * x1i; Rmat11 += y2i * y1i; Rmat12 += y2i * z1i; - + Rmat20 += z2i * x1i; Rmat21 += z2i * y1i; Rmat22 += z2i * z1i; @@ -322,8 +307,7 @@ static void -CalcRCov(const Cds *cds1, const Cds *cds2, double **Rmat, - const double **covmat, const double *axesw) +CalcRCov(const Cds *cds1, const Cds *cds2, double **Rmat, const double **covmat) { int i; const double *x2 = (const double *) cds2->covx, @@ -332,9 +316,6 @@ const double *x1 = (const double *) cds1->x, *y1 = (const double *) cds1->y, *z1 = (const double *) cds1->z; - const double axeswx = sqrt(axesw[0]), - axeswy = sqrt(axesw[1]), - axeswz = sqrt(axesw[2]); double x2i, y2i, z2i, x1i, y1i, z1i; double Rmat00, Rmat01, Rmat02, Rmat10, Rmat11, Rmat12, @@ -343,27 +324,25 @@ Rmat00 = Rmat01 = Rmat02 = Rmat10 = Rmat11 = Rmat12 = Rmat20 = Rmat21 = Rmat22 = 0.0; - /* printf("\n axes weights: %f %f %f ", axeswx, axeswy, axeswz); */ - i = cds1->vlen; while(i-- > 0) { - x2i = axeswx * *x2++; - y2i = axeswy * *y2++; - z2i = axeswz * *z2++; - - x1i = axeswx * *x1++; - y1i = axeswy * *y1++; - z1i = axeswz * *z1++; + x2i = *x2++; + y2i = *y2++; + z2i = *z2++; + + x1i = *x1++; + y1i = *y1++; + z1i = *z1++; Rmat00 += x2i * x1i; Rmat01 += x2i * y1i; Rmat02 += x2i * z1i; - + Rmat10 += y2i * x1i; Rmat11 += y2i * y1i; Rmat12 += y2i * z1i; - + Rmat20 += z2i * x1i; Rmat21 += z2i * y1i; Rmat22 += z2i * z1i; @@ -385,7 +364,7 @@ Calculates the SVD of a, a = UzV^t . This is pulled almost verbatim from - John Nash, _Compact Numerical Methods for Computers_, 1979, Wiley, NY + John Nash, _Compact Numerical Methods for Computers_, 1979, Wiley, NY Chapter 3, algorithm 1, pp 30-31. It is based on the method of Chartres: @@ -397,11 +376,11 @@ Decomposition and algebraic eigenproblem." Comput. J. 18:74-76. I have added a convergence criterion based on the sum of cross-products - after a sweep -- it should be 0 if the matrix is orthogonal. + after a sweep -- it should be 0 if the matrix is orthogonal. I also made some adjustments for C-style indexing. Unlike Nash's implementation, this function returns the transpose of V, same behavior as LAPACK DGESVD. - Singular values are ordered largest to smallest and stored in z. + Singular values are ordered largest to smallest and stored in z. The matrix A is preserved (unlike in Nash 1979 where it is overwritten with U) */ static int @@ -586,11 +565,11 @@ p = 0.0; for (i = 0; i < 3; ++i) p += Vt[ss][i] * Vt[ss][i]; - + q = 0.0; for (i = 0; i < 3; ++i) q += Vt[ss+1][i] * Vt[ss+1][i]; - + if (p < q) { for (i = 0; i < 3; ++i) @@ -619,12 +598,12 @@ q += g * g; r += h * h; } - + pp = p*p; qr = q*r; orth = pp / qr; /* orthsum += orth; */ - + if (r > eps && qr > eps && orth > eps) { if (q < r) @@ -640,7 +619,7 @@ c = sqrt(0.5 * (v + q) / v); s = p / (v * c); } - + /* step 9 - apply rotation to a */ for (i = 0; i < 3; ++i) { @@ -649,7 +628,7 @@ Vt[j][i] = (r * c) + (h * s); Vt[k][i] = -(r * s) + (h * c); } - + /* step 10 - apply rotation to V */ for (i = 0; i < 3; ++i) { @@ -680,7 +659,7 @@ q += g * g; r += h * h; } - + orthsum += (p*p) / (q*r); } } @@ -717,7 +696,7 @@ /* Takes U and V^t on input, calculates R = VU^t */ static int CalcRotMat(double **rotmat, double **Umat, double **Vtmat) -{ +{ int i, j, k; double det; @@ -758,14 +737,14 @@ rmsd = sqrt(E/atom_num) */ double ProcJacobiSVD(const Cds *cds1, const Cds *cds2, double **rotmat, - const double *weights, const double *axesw, + const double *weights, double **Rmat, double **Umat, double **VTmat, double *sigma) { double det, sumdev; - sumdev = CalcE0(cds1, cds2, weights, axesw); + sumdev = CalcE0(cds1, cds2, weights); /* printf("\n # sumdev = %8.2f ", sumdev); */ - CalcR(cds1, cds2, Rmat, weights, axesw); + CalcR(cds1, cds2, Rmat, weights); CalcJacobiSVD(Rmat, Umat, sigma, VTmat, 1e-8); det = CalcRotMat(rotmat, Umat, VTmat); @@ -794,14 +773,14 @@ // static double // CalcInnProdNorm(const Cds *cds) -// { +// { // int i; // double sum; // const double *x = (const double *) cds->x, // *y = (const double *) cds->y, // *z = (const double *) cds->z; // double xi, yi, zi; -// +// // sum = 0.0; // i = cds->vlen; // while(i-- > 0) @@ -809,17 +788,17 @@ // xi = *x++; // yi = *y++; // zi = *z++; -// +// // sum += xi * xi + yi * yi + zi * zi; // } -// +// // return(sum); // } -// -// +// +// // static double // CalcInnProdNormRot(const Cds *cds, const double **rmat) -// { +// { // int i; // double sum; // const double *x = (const double *) cds->x, @@ -829,7 +808,7 @@ // const double rmat00 = rmat[0][0], rmat01 = rmat[0][1], rmat02 = rmat[0][2], // rmat10 = rmat[1][0], rmat11 = rmat[1][1], rmat12 = rmat[1][2], // rmat20 = rmat[2][0], rmat21 = rmat[2][1], rmat22 = rmat[2][2]; -// +// // sum = 0.0; // i = cds->vlen; // while(i-- > 0) @@ -837,14 +816,14 @@ // xi = *x++; // yi = *y++; // zi = *z++; -// +// // xir = xi * rmat00 + yi * rmat10 + zi * rmat20; // yir = xi * rmat01 + yi * rmat11 + zi * rmat21; // zir = xi * rmat02 + yi * rmat12 + zi * rmat22; -// +// // sum += xir * xir + yir * yir + zir * zir; // } -// +// // return(sum); // } @@ -912,7 +891,7 @@ rmsd = sqrt(E/atom_num) */ double ProcJacobiSVDCov(Cds *cds1, Cds *cds2, double **rotmat, - const double **covmat, const double *axesw, double **Rmat, + const double **covmat, double **Rmat, double **Umat, double **VTmat, double *sigma) { double det, sumdev = 0.0; @@ -920,8 +899,8 @@ CalcCovCds(cds1, covmat); CalcCovCds(cds2, covmat); - sumdev = CalcE0Cov(cds1, cds2, axesw); - CalcRCov(cds1, cds2, Rmat, covmat, axesw); + sumdev = CalcE0Cov(cds1, cds2); + CalcRCov(cds1, cds2, Rmat, covmat); CalcJacobiSVD(Rmat, Umat, sigma, VTmat, DBL_EPSILON); det = CalcRotMat(rotmat, Umat, VTmat); diff -Nru theseus-2.0.6/ProcJacobiSVD.h theseus-3.0.0/ProcJacobiSVD.h --- theseus-2.0.6/ProcJacobiSVD.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/ProcJacobiSVD.h 2014-05-13 16:48:52.000000000 +0000 @@ -5,7 +5,7 @@ double ProcJacobiSVD(const Cds *cds1, const Cds *cds2, double **rotmat, - const double *weights, const double *axesw, + const double *weights, double **Rmat, double **Umat, double **VTmat, double *sigma); double @@ -14,7 +14,7 @@ double ProcJacobiSVDCov(Cds *cds1, Cds *cds2, double **rotmat, - const double **covmat, const double *axesw, + const double **covmat, double **Rmat, double **Umat, double **VTmat, double *sigma); diff -Nru theseus-2.0.6/qcprot.c theseus-3.0.0/qcprot.c --- theseus-2.0.6/qcprot.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/qcprot.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,11 +1,11 @@ /******************************************************************************* - * -/_|:|_|_\- + * -/_|:|_|_\- * * File: qcprot.c * Version: 1.3 * - * Function: Rapid calculation of the least-squares rotation using a - * quaternion-based characteristic polynomial and + * Function: Rapid calculation of the least-squares rotation using a + * quaternion-based characteristic polynomial and * a cofactor matrix * * Author(s): Douglas L. Theobald @@ -17,7 +17,7 @@ * USA * * dtheobald@brandeis.edu - * + * * Pu Liu * Johnson & Johnson Pharmaceutical Research and Development, L.L.C. * 665 Stockton Drive @@ -25,7 +25,7 @@ * USA * * pliu24@its.jnj.com - * + * * * If you use this QCP rotation calculation method in a publication, please * reference: @@ -36,24 +36,24 @@ * Acta Crystallographica A 61(4):478-480. * * Pu Liu, Dmitris K. Agrafiotis, and Douglas L. Theobald (2009) - * "Fast determination of the optimal rotational matrix for macromolecular + * "Fast determination of the optimal rotational matrix for macromolecular * superpositions." - * in press, Journal of Computational Chemistry + * in press, Journal of Computational Chemistry * * - * Copyright (c) 2009-2013 Pu Liu and Douglas L. Theobald + * Copyright (c) 2009-2014 Pu Liu and Douglas L. Theobald * All rights reserved. * - * Redistribution and use in source and binary forms, with or without modification, are permitted + * Redistribution and use in source and binary forms, with or without modification, are permitted * provided that the following conditions are met: * - * * Redistributions of source code must retain the above copyright notice, this list of + * * Redistributions of source code must retain the above copyright notice, this list of * conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright notice, this list - * of conditions and the following disclaimer in the documentation and/or other materials + * * Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials * provided with the distribution. - * * Neither the name of the nor the names of its contributors may be used to - * endorse or promote products derived from this software without specific prior written + * * Neither the name of the nor the names of its contributors may be used to + * endorse or promote products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS @@ -66,7 +66,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Source: started anew. * @@ -81,7 +81,7 @@ * 2011/05/02 Finally changed CenterCoords declaration in qcprot.h * Also changed some functions to static * 2011/07/08 put in fabs() to fix taking sqrt of small neg numbers, fp error - * + * ******************************************************************************/ #include "qcprot.h" @@ -123,7 +123,7 @@ A[6] += (z1 * x2); A[7] += (z1 * y2); - A[8] += (z1 * z2); + A[8] += (z1 * z2); } } else @@ -152,7 +152,7 @@ A[6] += (z1 * x2); A[7] += (z1 * y2); - A[8] += (z1 * z2); + A[8] += (z1 * z2); } } @@ -170,15 +170,15 @@ SxzmSzx, SxymSyx, SxxpSyy, SxxmSyy; double C[4]; int i; - double mxEigenV; + double mxEigenV; double oldg = 0.0; double b, a, delta, rms, qsqr; double q1, q2, q3, q4, normq; double a11, a12, a13, a14, a21, a22, a23, a24; double a31, a32, a33, a34, a41, a42, a43, a44; - double a2, x2, y2, z2; - double xy, az, zx, ay, yz, ax; - double a3344_4334, a3244_4234, a3243_4233, a3143_4133,a3144_4134, a3142_4132; + double a2, x2, y2, z2; + double xy, az, zx, ay, yz, ax; + double a3344_4334, a3244_4234, a3243_4233, a3143_4133,a3144_4134, a3142_4132; double evecprec = 1e-6; double evalprec = 1e-11; @@ -221,6 +221,7 @@ + (+(SxypSyx)*(SyzpSzy)+(SxzpSzx)*(SxxmSyy+Szz)) * (-(SxymSyx)*(SyzmSzy)+(SxzpSzx)*(SxxpSyy+Szz)) + (+(SxypSyx)*(SyzmSzy)+(SxzmSzx)*(SxxmSyy-Szz)) * (-(SxymSyx)*(SyzpSzy)+(SxzmSzx)*(SxxpSyy-Szz)); + //printf("\n"); mxEigenV = E0; for (i = 0; i < 50; ++i) { @@ -230,22 +231,23 @@ a = b + C[1]; delta = ((a*mxEigenV + C[0])/(2.0*x2*mxEigenV + b + a)); mxEigenV -= delta; - printf("\n diff[%3d]: %16g %16g %16g", i, mxEigenV - oldg, evalprec*mxEigenV, mxEigenV); + //printf("\n diff[%3d]: %16g %16g %16g", i, mxEigenV - oldg, evalprec*mxEigenV, mxEigenV); if (fabs(mxEigenV - oldg) < fabs(evalprec*mxEigenV)) break; } + //printf("\n"); - if (i == 50) + if (i == 50) fprintf(stderr,"\nMore than %d iterations needed!\n", i); /* the fabs() is to guard against extremely small, but *negative* numbers due to floating point error */ rms = sqrt(fabs(2.0 * (E0 - mxEigenV)/len)); (*rmsd) = rms; - printf(" %16g %16g %16g ", rms, E0, 2.0 * (E0 - mxEigenV)/len); + //printf("\n %16g %16g %16g \n", rms, E0, 2.0 * (E0 - mxEigenV)/len); fflush(NULL); - if (minScore > 0) + if (minScore > 0) if (rms < minScore) - return (-1); // Don't bother with rotation. + return (-1); // Don't bother with rotation. a11 = SxxpSyy + Szz-mxEigenV; a12 = SyzmSzy; a13 = - SxzmSzx; a14 = SxymSyx; a21 = SyzmSzy; a22 = SxxmSyy - Szz-mxEigenV; a23 = SxypSyx; a24= SxzpSzx; @@ -261,10 +263,10 @@ qsqr = q1 * q1 + q2 * q2 + q3 * q3 + q4 * q4; -/* The following code tries to calculate another column in the adjoint matrix when the norm of the +/* The following code tries to calculate another column in the adjoint matrix when the norm of the current column is too small. Usually this commented block will never be activated. To be absolutely safe this should be - uncommented, but it is most likely unnecessary. + uncommented, but it is most likely unnecessary. */ if (qsqr < evecprec) { @@ -293,12 +295,13 @@ q3 = a31 * a1224_1422 - a32 * a1124_1421 + a34 * a1122_1221; q4 = -a31 * a1223_1322 + a32 * a1123_1321 - a33 * a1122_1221; qsqr = q1*q1 + q2 *q2 + q3*q3 + q4*q4; - + if (qsqr < evecprec) { /* if qsqr is still too small, return the identity matrix. */ rot[0] = rot[4] = rot[8] = 1.0; - rot[1] = rot[2] = rot[3] = rot[5] = rot[6] = rot[7] = 0.0; + rot[1] = rot[2] = rot[3] = + rot[5] = rot[6] = rot[7] = 0.0; return(0); } @@ -338,7 +341,7 @@ } -/* +/* static void CenterCoords(double **coords, const int len, const double *weight) { @@ -356,7 +359,7 @@ xsum += weight[i] * x[i]; ysum += weight[i] * y[i]; zsum += weight[i] * z[i]; - + wsum += weight[i]; } diff -Nru theseus-2.0.6/qcprot.h theseus-3.0.0/qcprot.h --- theseus-2.0.6/qcprot.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/qcprot.h 2014-05-13 16:48:52.000000000 +0000 @@ -40,7 +40,7 @@ * in press, Journal of Computational Chemistry * * - * Copyright (c) 2009, Pu Liu and Douglas L. Theobald + * Copyright (c) 2009-2014, Pu Liu and Douglas L. Theobald * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are permitted diff -Nru theseus-2.0.6/QuarticHornFrag.c theseus-3.0.0/QuarticHornFrag.c --- theseus-2.0.6/QuarticHornFrag.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/QuarticHornFrag.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -55,7 +55,7 @@ q1sqr = q[1] * q[1]; q2sqr = q[2] * q[2]; q3sqr = q[3] * q[3]; - + q12 = q[1] * q[2]; q03 = q[0] * q[3]; q13 = q[1] * q[3]; @@ -101,7 +101,7 @@ oldg = guess; /* guess -= (eval_horn_quart(coeff, guess) / eval_horn_quart_deriv(coeff, guess)); */ guess -= eval_horn_NR_corrxn(coeff, guess); - + if (fabs(guess - oldg) < fabs(delta*guess)) return(guess); } @@ -152,7 +152,7 @@ { int i; double x, xold; - + i = 0; x = guess; @@ -160,10 +160,10 @@ { xold = x; x -= (eval_horn_quart(coeff, x) / eval_horn_quart_deriv(coeff, x)); - + if (fabs(x - xold) < delta) return(x); - + ++i; } @@ -195,8 +195,8 @@ x2 = fx2[i]; y2 = fy2[i]; z2 = fz2[i]; - - + + Sxx += (x1 * x2); Sxy += (x1 * y2); Sxz += (x1 * z2); @@ -207,7 +207,7 @@ Szx += (z1 * x2); Szy += (z1 * y2); - Szz += (z1 * z2); + Szz += (z1 * z2); } Sxx2 = Sxx * Sxx; @@ -237,11 +237,10 @@ } - /* A lot of register variables, but this sort of thing scales very well with new and improved processors */ double -CalcQuarticCoeffsPu2(const FragCds *frag1, const FragCds *frag2, const int len, double *coeff, const double innerprod, double *quat) +CalcQuarticCoeffsPu2(const FragCds *frag1, const FragCds *frag2, const int len, double *coeff, const double outerprod, double *quat) { double Sxx, Sxy, Sxz, Syx, Syy, Syz, Szx, Szy, Szz; double Szz2, Syy2, Sxx2, Sxy2, Syz2, Sxz2, Syx2, Szy2, Szx2, @@ -264,7 +263,7 @@ x2 = fx2[i]; y2 = fy2[i]; z2 = fz2[i]; - + Sxx += (x1 * x2); Sxy += (x1 * y2); Sxz += (x1 * z2); @@ -275,7 +274,7 @@ Szx += (z1 * x2); Szy += (z1 * y2); - Szz += (z1 * z2); + Szz += (z1 * z2); } Sxx2 = Sxx * Sxx; @@ -315,27 +314,25 @@ + (+(SxypSyx)*(SyzpSzy)+(SxzpSzx)*(SxxmSyy+Szz)) * (-(SxymSyx)*(SyzmSzy)+(SxzpSzx)*(SxxpSyy+Szz)) + (+(SxypSyx)*(SyzmSzy)+(SxzmSzx)*(SxxmSyy-Szz)) * (-(SxymSyx)*(SyzpSzy)+(SxzmSzx)*(SxxpSyy-Szz)); - lambdamax = QCProot(coeff, 0.5 * innerprod, precision); + lambdamax = QCProot(coeff, 0.5 * outerprod, precision); /* Now calculate the optimal rotation from one row of the cofactor matrix */ - double a11, a12, a13, a14, a21, a22, a23, a24; + double a13, a14, a21, a22, a23, a24; double a31, a32, a33, a34, a41, a42, a43, a44; - a11 = SxxpSyy + Szz - lambdamax; - a12 = SyzmSzy; - a13 = -SxzmSzx; + a13 = -SxzmSzx; a14 = SxymSyx; - a21 = SyzmSzy; - a22 = SxxmSyy - Szz - lambdamax; - a23 = SxypSyx; + a21 = SyzmSzy; + a22 = SxxmSyy - Szz - lambdamax; + a23 = SxypSyx; a24 = SxzpSzx; - a31 = a13; - a32 = a23; + a31 = a13; + a32 = a23; a33 = Syy - Sxx - Szz - lambdamax; a34 = SyzpSzy; - a41 = a14; - a42 = a24; - a43 = a34; + a41 = a14; + a42 = a24; + a43 = a34; a44 = Szz - SxxpSyy - lambdamax; double a3344_4334 = a33 * a44 - a43 * a34; @@ -349,7 +346,7 @@ double q3 = a21*a3244_4234 - a22*a3144_4134 + a24*a3142_4132; double q4 = -a21*a3243_4233 + a22*a3143_4133 - a23*a3142_4132; double dq = sqrt(q1*q1 + q2*q2 + q3*q3 + q4*q4); - + quat[0] = q1/dq; quat[1] = q2/dq; quat[2] = q3/dq; @@ -362,10 +359,10 @@ /* A lot of register variables, but this sort of thing scales very well with new and improved processors */ /* This version has the conditional to test whether the row of the cofactor matrix is too small. - It goes through all four rows if necessary to find one that is large enough to avoid floating + It goes through all four rows if necessary to find one that is large enough to avoid floating point error. */ static double -CalcQuarticCoeffsPu(const FragCds *frag1, const FragCds *frag2, const int len, double *coeff, const double innerprod, double *quat) +CalcQuarticCoeffsPu(const FragCds *frag1, const FragCds *frag2, const int len, double *coeff, const double outerprod, double *quat) { double Sxx, Sxy, Sxz, Syx, Syy, Syz, Szx, Szy, Szz; double Szz2, Syy2, Sxx2, Sxy2, Syz2, Sxz2, Syx2, Szy2, Szx2, @@ -389,7 +386,7 @@ x2 = fx2[i]; y2 = fy2[i]; z2 = fz2[i]; - + Sxx += (x1 * x2); Sxy += (x1 * y2); Sxz += (x1 * z2); @@ -400,7 +397,7 @@ Szx += (z1 * x2); Szy += (z1 * y2); - Szz += (z1 * z2); + Szz += (z1 * z2); } Sxx2 = Sxx * Sxx; @@ -440,27 +437,27 @@ + (+(SxypSyx)*(SyzpSzy)+(SxzpSzx)*(SxxmSyy+Szz)) * (-(SxymSyx)*(SyzmSzy)+(SxzpSzx)*(SxxpSyy+Szz)) + (+(SxypSyx)*(SyzmSzy)+(SxzmSzx)*(SxxmSyy-Szz)) * (-(SxymSyx)*(SyzpSzy)+(SxzmSzx)*(SxxpSyy-Szz)); - lambdamax = QCProot(coeff, 0.5 * innerprod, precision); + lambdamax = QCProot(coeff, 0.5 * outerprod, precision); /* Now calculate the optimal rotation from one row of the cofactor matrix */ double a11, a12, a13, a14, a21, a22, a23, a24; double a31, a32, a33, a34, a41, a42, a43, a44; - a11 = SxxpSyy + Szz - lambdamax; - a12 = SyzmSzy; - a13 = -SxzmSzx; + a11 = SxxpSyy + Szz - lambdamax; + a12 = SyzmSzy; + a13 = -SxzmSzx; a14 = SxymSyx; - a21 = SyzmSzy; - a22 = SxxmSyy - Szz - lambdamax; - a23 = SxypSyx; + a21 = SyzmSzy; + a22 = SxxmSyy - Szz - lambdamax; + a23 = SxypSyx; a24 = SxzpSzx; - a31 = a13; - a32 = a23; + a31 = a13; + a32 = a23; a33 = Syy - Sxx - Szz - lambdamax; a34 = SyzpSzy; - a41 = a14; - a42 = a24; - a43 = a34; + a41 = a14; + a42 = a24; + a43 = a34; a44 = Szz - SxxpSyy - lambdamax; double a3344_4334 = a33 * a44 - a43 * a34; @@ -543,42 +540,42 @@ double trace, s, w, x, y, z; /* convert to quaternion */ - trace = rot[0][0] + rot[1][1] + rot[2][2] + 1.0; + trace = rot[0][0] + rot[1][1] + rot[2][2] + 1.0; if( trace > FLT_EPSILON ) { - s = 0.5 / sqrt(trace); - w = 0.25 / s; - x = ( rot[2][1] - rot[1][2] ) * s; - y = ( rot[0][2] - rot[2][0] ) * s; - z = ( rot[1][0] - rot[0][1] ) * s; + s = 0.5 / sqrt(trace); + w = 0.25 / s; + x = ( rot[2][1] - rot[1][2] ) * s; + y = ( rot[0][2] - rot[2][0] ) * s; + z = ( rot[1][0] - rot[0][1] ) * s; } else { - if (rot[0][0] > rot[1][1] && rot[0][0] > rot[2][2]) - { - s = 2.0 * sqrt( 1.0 + rot[0][0] - rot[1][1] - rot[2][2]); - x = 0.25 * s; - y = (rot[0][1] + rot[1][0] ) / s; - z = (rot[0][2] + rot[2][0] ) / s; - w = (rot[1][2] - rot[2][1] ) / s; - } - else if (rot[1][1] > rot[2][2]) - { - s = 2.0 * sqrt(1.0 + rot[1][1] - rot[0][0] - rot[2][2]); - x = (rot[0][1] + rot[1][0] ) / s; - y = 0.25 * s; - z = (rot[1][2] + rot[2][1] ) / s; - w = (rot[0][2] - rot[2][0] ) / s; - } - else - { - s = 2.0 * sqrt(1.0 + rot[2][2] - rot[0][0] - rot[1][1]); - x = (rot[0][2] + rot[2][0] ) / s; - y = (rot[1][2] + rot[2][1] ) / s; - z = 0.25 * s; - w = (rot[0][1] - rot[1][0] ) / s; - } + if (rot[0][0] > rot[1][1] && rot[0][0] > rot[2][2]) + { + s = 2.0 * sqrt( 1.0 + rot[0][0] - rot[1][1] - rot[2][2]); + x = 0.25 * s; + y = (rot[0][1] + rot[1][0] ) / s; + z = (rot[0][2] + rot[2][0] ) / s; + w = (rot[1][2] - rot[2][1] ) / s; + } + else if (rot[1][1] > rot[2][2]) + { + s = 2.0 * sqrt(1.0 + rot[1][1] - rot[0][0] - rot[2][2]); + x = (rot[0][1] + rot[1][0] ) / s; + y = 0.25 * s; + z = (rot[1][2] + rot[2][1] ) / s; + w = (rot[0][2] - rot[2][0] ) / s; + } + else + { + s = 2.0 * sqrt(1.0 + rot[2][2] - rot[0][0] - rot[1][1]); + x = (rot[0][2] + rot[2][0] ) / s; + y = (rot[1][2] + rot[2][1] ) / s; + z = 0.25 * s; + w = (rot[0][1] - rot[1][0] ) / s; + } } quat[0] = -w; @@ -608,7 +605,7 @@ printf("\n% -14.6e % -14.6e % -14.6e % -14.6e", quat[0], quat[1], quat[2], quat[3]); QuatToRotmat(rotmat, quat); Mat3Print(rotmat); - + double **Rmat = MatAlloc(3, 3); double **Umat = MatAlloc(3, 3); double **VTmat = MatAlloc(3, 3); @@ -652,22 +649,21 @@ double *coeff = NULL; double var; FILE *distfile = NULL, *distfile2 = NULL; - double biggest; double *array = NULL; - double **Rmat = MatAlloc(3, 3); - double **Umat = MatAlloc(3, 3); - double **VTmat = MatAlloc(3, 3); - double *sigma = malloc(3 * sizeof(double)); - double **rotmat2 = MatAlloc(3, 3); - - double *quat = malloc(4 * sizeof(double)); - double **rotmat = MatAlloc(3, 3); - - double frag1_radgyr; - double frag2_radgyr; - double rmsd2, lambdamax; -// int n = 1000; + double **Rmat = MatAlloc(3, 3); + double **Umat = MatAlloc(3, 3); + double **VTmat = MatAlloc(3, 3); + double *sigma = malloc(3 * sizeof(double)); + double **rotmat2 = MatAlloc(3, 3); + + double *quat = malloc(4 * sizeof(double)); + double **rotmat = MatAlloc(3, 3); + + double frag1_radgyr; + double frag2_radgyr; + double rmsd2, lambdamax; +// int n = 1000; clock_t start_time, end_time; // unsigned long seed = (unsigned long) time(NULL); @@ -686,7 +682,6 @@ start_time = clock(); - biggest = 0.0; count = 0; for (coord1 = 0; coord1 < cdsA->cnum; ++coord1) { @@ -701,13 +696,13 @@ frag1->x[k] = cdsA->cds[coord1]->x[tmp]; frag1->y[k] = cdsA->cds[coord1]->y[tmp]; frag1->z[k] = cdsA->cds[coord1]->z[tmp]; - + frag2->x[k] = cdsA->cds[coord2]->x[tmp]; frag2->y[k] = cdsA->cds[coord2]->y[tmp]; frag2->z[k] = cdsA->cds[coord2]->z[tmp]; } - if (center_ca == 1) + if (center_ca) { CenterFragCA(frag1); CenterFragCA(frag2); @@ -720,37 +715,37 @@ //var = QuarticHornFrag((const FragCds *) frag1, (const FragCds *) frag2, coeff); - if (pu == 1) + if (pu) + { + frag1_radgyr = RadGyrSqrFrag(frag1); + frag2_radgyr = RadGyrSqrFrag(frag2); + lambdamax = CalcQuarticCoeffsPu(frag1, frag2, frag1->fraglen, coeff, frag1_radgyr + frag2_radgyr, quat); + rmsd2 = (frag1_radgyr + frag2_radgyr) - (2.0 * lambdamax); + //var = KabschFrag(frag1, frag2, rotmat2, Rmat, Umat, VTmat, sigma); + var = ProcGSLSVDFrag(frag1, frag2, rotmat2, Rmat, Umat, VTmat, sigma); + printf("\nQCP rmsd = %e %e\n", sqrt(rmsd2/fraglen), sqrt(var/fraglen)); + QuatToRotmat(rotmat, quat); + } + else { - frag1_radgyr = RadGyrSqrFrag(frag1); - frag2_radgyr = RadGyrSqrFrag(frag2); - lambdamax = CalcQuarticCoeffsPu(frag1, frag2, frag1->fraglen, coeff, frag1_radgyr + frag2_radgyr, quat); - rmsd2 = (frag1_radgyr + frag2_radgyr) - (2.0 * lambdamax); - //var = KabschFrag(frag1, frag2, rotmat2, Rmat, Umat, VTmat, sigma); - var = ProcGSLSVDFrag(frag1, frag2, rotmat2, Rmat, Umat, VTmat, sigma); - printf("\nQCP rmsd = %e %e\n", sqrt(rmsd2/fraglen), sqrt(var/fraglen)); - QuatToRotmat(rotmat, quat); - } - else - { - // printf("\n% -14.6e % -14.6e % -14.6e % -14.6e", quat[0], quat[1], quat[2], quat[3]); - // Mat3Print(rotmat); - - var = ProcGSLSVDFrag(frag1, frag2, rotmat2, Rmat, Umat, VTmat, sigma); - //var = KabschFrag(frag1, frag2, rotmat2, Rmat, Umat, VTmat, sigma); - //var = ProcJacobiSVDFrag(frag1, frag2, rotmat2, Rmat, Umat, VTmat, sigma); - //RotMatToQuaternion((const double **) rotmat2, quat); - - // printf("\n% -14.6e % -14.6e % -14.6e % -14.6e", quat[0], quat[1], quat[2], quat[3]); - // Mat3Print(rotmat2); + // printf("\n% -14.6e % -14.6e % -14.6e % -14.6e", quat[0], quat[1], quat[2], quat[3]); + // Mat3Print(rotmat); + + var = ProcGSLSVDFrag(frag1, frag2, rotmat2, Rmat, Umat, VTmat, sigma); + //var = KabschFrag(frag1, frag2, rotmat2, Rmat, Umat, VTmat, sigma); + //var = ProcJacobiSVDFrag(frag1, frag2, rotmat2, Rmat, Umat, VTmat, sigma); + //RotMatToQuaternion((const double **) rotmat2, quat); + + // printf("\n% -14.6e % -14.6e % -14.6e % -14.6e", quat[0], quat[1], quat[2], quat[3]); + // Mat3Print(rotmat2); } -// if (Mat3Eq((const double **) rotmat, (const double **) rotmat2, 1e-8) == 0) -// { -// printf("\n Not equal\n"); -// Mat3Print(rotmat); -// Mat3Print(rotmat2); -// } +// if (Mat3Eq((const double **) rotmat, (const double **) rotmat2, 1e-8) == 0) +// { +// printf("\n Not equal\n"); +// Mat3Print(rotmat); +// Mat3Print(rotmat2); +// } // fprintf(distfile, "%-16.8e \n", var); // fprintf(distfile2, "%-16.8e \n", sqrt(var)); @@ -764,19 +759,19 @@ } } - end_time = clock(); - double milliseconds = (double) (end_time - start_time) / ((double) CLOCKS_PER_SEC * 0.001); - - printf( "\n milliseconds: %-16.8e -- iters = %d \n", milliseconds, count); - - MatDestroy(&rotmat); - MatDestroy(&rotmat2); - MatDestroy(&Umat); - MatDestroy(&Rmat); - MatDestroy(&VTmat); + end_time = clock(); + double milliseconds = (double) (end_time - start_time) / ((double) CLOCKS_PER_SEC * 0.001); + + printf( "\n milliseconds: %-16.8e -- iters = %d \n", milliseconds, count); - free(sigma); - free(quat); + MatDestroy(&rotmat); + MatDestroy(&rotmat2); + MatDestroy(&Umat); + MatDestroy(&Rmat); + MatDestroy(&VTmat); + + free(sigma); + free(quat); FragCdsFree(&frag1); FragCdsFree(&frag2); diff -Nru theseus-2.0.6/QuarticHornFrag.h theseus-3.0.0/QuarticHornFrag.h --- theseus-2.0.6/QuarticHornFrag.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/QuarticHornFrag.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/RandCds.c theseus-3.0.0/RandCds.c --- theseus-2.0.6/RandCds.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/RandCds.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -57,10 +57,10 @@ // { // while(x < 0.0) // x += 2.0*MY_PI; -// +// // while(x > 2.0*MY_PI) // x -= 2.0*MY_PI; -// +// // return(x); // } @@ -148,72 +148,6 @@ } -/*not work, not uniform*/ -void -myrand_rotation(const double *x, double **M) -{ - const double alph = x[0] * 2.0 * MY_PI; - const double beta = x[1] * 2.0 * MY_PI; - const double gamm = x[2] * 2.0 * MY_PI; - - const double calph = cos(alph); - const double cbeta = cos(beta); - const double cgamm = cos(gamm); - const double salph = sin(alph); - const double sbeta = sin(beta); - const double sgamm = sin(gamm); - - M[0][0] = calph*cbeta*cgamm - salph*sgamm; - M[0][1] = -calph*cbeta*sgamm - salph*cgamm; - M[0][2] = calph*sbeta; - - M[1][0] = salph*cbeta*cgamm + calph*sgamm; - M[1][1] = -salph*cbeta*sgamm + calph*cgamm; - M[1][2] = salph*sbeta; - - M[2][0] = -sbeta*cgamm; - M[2][1] = sbeta*sgamm; - M[2][2] = cbeta; - -/* VerifyRotMat(M, 1e-12); */ -} - - -/* does not work, not uniform */ -void -myrand_rotation_polar(const double *x, double **M) -{ - const double ome = x[0] * 2.0*MY_PI; - const double phi = x[1] * 1.0*MY_PI; - const double kap = x[2] * 1.0*MY_PI; - - const double come = cos(ome); - const double cphi = cos(phi); - const double ckap = cos(kap); - const double some = sin(ome); - const double sphi = sin(phi); - const double skap = sin(kap); - - const double l = some*cphi; - const double m = some*sphi; - const double n = come; - - M[0][0] = l*l + (m*m + n*n)*ckap; - M[0][1] = l*m*(1-ckap) - n*skap; - M[0][2] = n*l*(1-ckap) + m*skap; - - M[1][0] = l*m*(1-ckap) + n*skap; - M[1][1] = m*m + (n*n + l*l)*ckap; - M[1][2] = m*n*(1-ckap) - l*skap; - - M[2][0] = n*l*(1-ckap) - m*skap;; - M[2][1] = m*n*(1-ckap) + l*skap; - M[2][2] = n*n + (l*l + m*m) * ckap; - -/* VerifyRotMat(M, 1e-12); */ -} - - void rand_translation(double *x, double var, const gsl_rng *r2) { @@ -231,8 +165,6 @@ double **rotmat = MatAlloc(3, 3); int i; -/* #include "vonmises_dist.h" */ -/* #include "uniform_dist.h" */ for (i = 0; i < cdsA->cnum; ++i) { randx[0] = gsl_rng_uniform(r2); @@ -318,8 +250,8 @@ double *p = calloc(protlen, sizeof(double)); double **randv = MatAlloc(protlen, 3), **randn = MatAlloc(protlen, 3); double *diag = malloc(protlen * sizeof(double)); - Cds **cds = cdsA->cds; - Cds *mcds = cdsA->cds[0]; + Cds **cds = cdsA->cds; + Cds *mcds = cdsA->cds[0]; double scale = 1.0; // for (i = 0; i < protlen; ++i) @@ -337,7 +269,7 @@ for (i = 0; i < protlen; ++i) diag[i] = covmat[i][i]; - //diag[i] = invgamma_dev(cdsA->algo->param[0], cdsA->algo->param[1]); + //diag[i] = invgamma_dev(algo->param[0], algo->param[1]); CovMat2CorMat(covmat, protlen); @@ -345,9 +277,9 @@ /* printf("%-3d %f\n", i+1, covmat[i][i]); */ // for (i = 0; i < protlen; ++i) -// diag[i] = invgamma_dev(cdsA->algo->param[0], cdsA->algo->param[1]); +// diag[i] = invgamma_dev(algo->param[0], algo->param[1]); - PrintCovMatGnuPlot((const double **) covmat, protlen, mystrcat(cdsA->algo->rootname, "_cor.mat")); + PrintCovMatGnuPlot((const double **) covmat, protlen, mystrcat(algo->rootname, "_cor.mat")); for (i = 0; i < protlen; ++i) printf("%-3d %f\n", i+1, diag[i]); @@ -355,11 +287,11 @@ CenMass(mcds); ApplyCenterIp(mcds); - if (mcds->innerprod == NULL) - mcds->innerprod = MatAlloc(protlen, protlen); + if (mcds->outerprod == NULL) + mcds->outerprod = MatAlloc(protlen, protlen); CdsInnerProd(mcds); - PrintCovMatGnuPlot((const double **) mcds->innerprod, protlen, mystrcat(cdsA->algo->rootname, "_mean.mat")); + PrintCovMatGnuPlot((const double **) mcds->outerprod, protlen, mystrcat(algo->rootname, "_mean.mat")); /* LAPACK dpotrf calculates the Cholesky decomposition, here upper triangular */ //dpotrf_opt_dest(covmat, protlen); @@ -368,12 +300,12 @@ for (i = cdsA->cnum - 1; i >=0; --i) { - RandFillMat(randn, protlen, 3, cdsA->algo->random, 0, 0, 0, r2); + RandFillMat(randn, protlen, 3, algo->random, 0, 0, 0, r2); memset(&randv[0][0], 0, protlen * 3 * sizeof(double)); for (m = 0; m < 3; ++m) for (j = 0; j < protlen; ++j) - for (k = 0; k < protlen; ++k) /* because covmat is upper diagonal, lower is all zeros */ + for (k = 0; k < protlen; ++k) /* because covmat is upper diagonal, lower is all zeros */ randv[j][m] += covmat[k][j] * randn[k][m]; /* for (j = 0; j < protlen; ++j) */ @@ -381,7 +313,7 @@ for (j = 0; j < protlen; ++j) { - scale = RandScale(diag[j], cdsA->algo->random, 0, 0); + scale = RandScale(diag[j], algo->random, 0, 0); /* printf("\n%d %f", j, scale); */ cds[i]->x[j] = mcds->x[j] + randv[j][0] * scale; @@ -414,32 +346,11 @@ double *invdiag = malloc(protlen * sizeof(double)); double a, b, c, d, sum/* , smallest */; double B, /* C, */D; - Cds **cds = cdsA->cds; - Cds *mcds = cdsA->cds[0]; - double axes[3]; - double axes_sqrt[3]; + Cds **cds = cdsA->cds; + Cds *mcds = cdsA->cds[0]; /* FILE *fp; */ double x1 = 0.0, x2 = 0.0, scale = 1.0; - sum = 0.0; - for (i = 0; i < 3; ++i) - sum += cdsA->algo->raxes[i]; - sum /= 3.0; - - for (i = 0; i < 3; ++i) - cdsA->algo->raxes[i] /= sum; - - printf(" axes vars: %f %f %f\n", - cdsA->algo->raxes[0], - cdsA->algo->raxes[1], - cdsA->algo->raxes[2]); - - for (i = 0; i < 3; ++i) - axes_sqrt[i] = sqrt(cdsA->algo->raxes[i]); - - for (i = 0; i < 3; ++i) - axes[i] = cdsA->algo->raxes[i]; - /* B = 0.7480208079921; */ B = 0.2; a = 15.0; @@ -453,7 +364,7 @@ /* D = 0.03135645239684; */ b = B, /* C = c, */d = D; -/* CalcCovMat(cdsA); */ +/* CalcCovMat(cdsA); */ memcpy(covmat[0], internmat[0], protlen * protlen * sizeof(double)); @@ -506,7 +417,7 @@ // for (i = 0; i < protlen; ++i) // { // /* diag[i] = covmat[i][i] = invgamma_dev(3.468e-03, 3.779e-01); */ -// diag[i] = invgamma_dev(cdsA->algo->param[0], cdsA->algo->param[1]); +// diag[i] = invgamma_dev(algo->param[0], algo->param[1]); // } // CorMat2CovMat(covmat, diag, protlen); @@ -524,7 +435,7 @@ for (i = 0; i < protlen; ++i) diag[i] = covmat[i][i]; - PrintCovMatGnuPlot((const double **) covmat, protlen, mystrcat(cdsA->algo->rootname, "_cov.mat")); + PrintCovMatGnuPlot((const double **) covmat, protlen, mystrcat(algo->rootname, "_cov.mat")); for (i = 0; i < protlen; ++i) printf("%-3d %f\n", i+1, diag[i]); @@ -544,17 +455,17 @@ CenMass(mcds); ApplyCenterIp(mcds); - if (mcds->innerprod == NULL) - mcds->innerprod = MatAlloc(protlen, protlen); + if (mcds->outerprod == NULL) + mcds->outerprod = MatAlloc(protlen, protlen); CdsInnerProd(mcds); - PrintCovMatGnuPlot((const double **) mcds->innerprod, protlen, mystrcat(cdsA->algo->rootname, "_mean.mat")); + PrintCovMatGnuPlot((const double **) mcds->outerprod, protlen, mystrcat(algo->rootname, "_mean.mat")); sum = 0.0; for (i = 0; i < protlen; ++i) - sum += (1.0 / (diag[i]*axes[0]) + 1.0 / (diag[i]*axes[1]) + 1.0 / (diag[i]*axes[2])); + sum += 1.0 / diag[i]; - printf("\n\n-> sum: %f sigma^2: %f sigma: %f\n", sum/3, 3*protlen/sum, sqrt(3*protlen/sum)); + printf("\n\n-> sum: %f sigma^2: %f sigma: %f\n", sum, protlen/sum, sqrt(protlen/sum)); /* MatInvLAPACK(covmat, covmat, protlen); */ /* */ @@ -583,32 +494,21 @@ /* PrintCovMatGnuPlot((const double **) covmat, protlen, "corr.mat"); */ /* */ -/* FILE *fp2 = fopen("intdist.mat", "w"); */ -/* for (i = 0; i < protlen; ++i) */ -/* { */ -/* fprintf(fp2, "\n"); */ -/* for (j = 0; j < protlen; ++j) */ -/* fprintf(fp2, "% 12.8f\t", */ -/* exp(-0.1*CdsDist(cds[0], i, cds[0], j))); */ -/* } */ -/* fprintf(fp2, "\n"); */ -/* fclose(fp2); */ /* LAPACK dpotrf calculates the Cholesky decomposition, here upper triangular */ //dpotrf_opt_dest(covmat, protlen); CholeskyGSLDest(covmat, protlen); /* write_C_mat((const double **) covmat, protlen, 6, 10); */ - printf(" true axes sqrt: %f %f %f\n", axes_sqrt[0], axes_sqrt[1], axes_sqrt[2]); /* fp = fopen("rand.txt", "w"); */ for (i = cdsA->cnum - 1; i >=0; --i) { - RandFillMat(randn, protlen, 3, cdsA->algo->random, b, c, d, r2); + RandFillMat(randn, protlen, 3, algo->random, b, c, d, r2); memset(&randv[0][0], 0, protlen * 3 * sizeof(double)); for (m = 0; m < 3; ++m) for (j = 0; j < protlen; ++j) - for (k = 0; k < protlen; ++k) /* because covmat is upper diagonal, lower is all zeros */ + for (k = 0; k < protlen; ++k) /* because covmat is upper diagonal, lower is all zeros */ randv[j][m] += covmat[k][j] * randn[k][m]; /* for (j = 0; j < protlen; ++j) */ @@ -616,15 +516,15 @@ for (j = 0; j < protlen; ++j) { - scale = RandScale(diag[j], cdsA->algo->random, a, b); + scale = RandScale(diag[j], algo->random, a, b); /* printf("\n%d %f", j, scale); */ cds[i]->x[j] = mcds->x[j] + - randv[j][0] * scale * axes_sqrt[0]; + randv[j][0] * scale; cds[i]->y[j] = mcds->y[j] + - randv[j][1] * scale * axes_sqrt[1]; + randv[j][1] * scale; cds[i]->z[j] = mcds->z[j] + - randv[j][2] * scale * axes_sqrt[2]; + randv[j][2] * scale; /* if (j == 0) */ /* printf("\n%-3d % 8.3f % 8.3f % 8.3f", */ /* i, cds[i]->x[j], cds[i]->y[j], cds[i]->z[j]); */ @@ -665,9 +565,9 @@ CenMass(mcds); ApplyCenterIp(mcds); - CalcCdsPrincAxes(mcds, mcds->matrix); - Mat3TransposeIp(mcds->matrix); - RotateCdsIp(mcds, (const double **) mcds->matrix); +// CalcCdsPrincAxes(mcds, mcds->matrix); +// Mat3TransposeIp(mcds->matrix); +// RotateCdsIp(mcds, (const double **) mcds->matrix); for (i = cdsA->cnum - 1; i >=0; --i) { @@ -932,7 +832,7 @@ cds->x[i] = normal_dev3(0.0, radii[0]*radii[0], r2); /* 11 30-37 8 Real(8.3) x Orthogonal coordinates for X */ cds->y[i] = normal_dev3(0.0, radii[1]*radii[1], r2); /* 12 30-37 8 Real(8.3) y Orthogonal coordinates for Y */ cds->z[i] = normal_dev3(0.0, radii[2]*radii[2], r2); /* 13 30-37 8 Real(8.3) z Orthogonal coordinates for Z */ - cds->occupancy[i] = 1.0; /* 14 54-59 6 Real(6.2) occupancy Occupancy. */ + cds->occupancy[i] = 1.0; /* 14 54-59 6 Real(6.2) occupancy Nuupancy. */ cds->tempFactor[i] = 10.0; /* 15 60-65 6 Real(6.2) tempFactor Temperature factor. */ } diff -Nru theseus-2.0.6/RandCds.h theseus-3.0.0/RandCds.h --- theseus-2.0.6/RandCds.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/RandCds.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff -Nru theseus-2.0.6/README theseus-3.0.0/README --- theseus-2.0.6/README 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/README 2014-05-13 16:48:52.000000000 +0000 @@ -1,4 +1,4 @@ -THESEUS(1) Likelihood Rocks THESEUS(1) +THESEUS(1) Likelihood (and Bayes) Rocks THESEUS(1) @@ -20,9 +20,9 @@ theseus -a0 -e2 -g1 -i200 -k-1 -p1e-7 -r theseus -v -P0 your.pdb DESCRIPTION - Theseus superpositions a set of macromolecular structures simultane- - ously using the method of maximum likelihood (ML), rather than the con- - ventional least-squares criterion. Theseus assumes that the structures + Theseus superposes a set of macromolecular structures simultaneously + using the method of maximum likelihood (ML), rather than the conven- + tional least-squares criterion. Theseus assumes that the structures are distributed according to a matrix Gaussian distribution and that the eigenvalues of the atomic covariance matrix are hierarchically dis- tributed according to an inverse gamma distribution. This ML superposi- @@ -619,4 +619,4 @@ -Brandeis University 11 October 2012 THESEUS(1) +Brandeis University 13 May 2014 THESEUS(1) diff -Nru theseus-2.0.6/statken.c theseus-3.0.0/statken.c --- theseus-2.0.6/statken.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/statken.c 2014-05-13 16:48:52.000000000 +0000 @@ -94,17 +94,16 @@ f function to be evaluated */ -double +double adaptive(n, a, b, eps, f) int n; - double a, b, eps, (*f) (); + double a , b, eps, (*f) (); { - double TOL[20], A[20], H[20], FA[20], FC[20], FB[20], S[20], - V[7]; - int L[20]; - double APP, FD, FE, S1, S2, dabs(); - int I, LEV; + double TOL [20], A[20], H[20], FA[20], FC[20], FB[20], S[20], V[7]; + int L [20]; + double APP , FD, FE, S1, S2, dabs(); + int I , LEV; APP = 0.0; @@ -123,60 +122,60 @@ while ((I > 0)) { - FD = f((A[I] + 0.5 * H[I])); - FE = f((A[I] + 1.5 * H[I])); + FD = f((A[I] + 0.5 * H[I])); + FE = f((A[I] + 1.5 * H[I])); - S1 = H[I] * (FA[I] + 4.0 * FD + FC[I]) / 6.0; - S2 = H[I] * (FC[I] + 4.0 * FE + FB[I]) / 6.0; + S1 = H[I] * (FA[I] + 4.0 * FD + FC[I]) / 6.0; + S2 = H[I] * (FC[I] + 4.0 * FE + FB[I]) / 6.0; - V[0] = A[I]; - V[1] = FA[I]; - V[2] = FC[I]; - V[3] = FB[I]; - V[4] = H[I]; - V[5] = TOL[I]; - V[6] = S[I]; - LEV = L[I]; - - I--; /* line 100 */ - - /* printf("S0 %7.4f S1 %7.4f S2 %7.4f \n", V[6], S1, S2) ; */ - - if (dabs(S1 + S2 - V[6]) < V[5]) - { - APP = APP + (S1 + S2); - /* printf("app %7.4f \n", APP) ; */ - } else - { - if (LEV >= n) - { - printf("Number of levels exceeded in adaptive quadrature\n"); - exit(1); - } else - { - I++; - A[I] = V[0] + V[4]; - FA[I] = V[2]; - FC[I] = FE; - FB[I] = V[3]; - H[I] = 0.5 * V[4]; - TOL[I] = 0.5 * V[5]; - S[I] = S2; - L[I] = LEV + 1; - - I++; - A[I] = V[0]; - FA[I] = V[1]; - FC[I] = FD; - FB[I] = V[2]; - H[I] = H[I - 1]; - TOL[I] = TOL[I - 1]; - S[I] = S1; - L[I] = L[I - 1]; - /* printf("level %d \n", L[I]) ; */ - } - } - /* printf("i %d\n", I) ; */ + V[0] = A[I]; + V[1] = FA[I]; + V[2] = FC[I]; + V[3] = FB[I]; + V[4] = H[I]; + V[5] = TOL[I]; + V[6] = S[I]; + LEV = L[I]; + + I--; /* line 100 */ + + /* printf("S0 %7.4f S1 %7.4f S2 %7.4f \n", V[6], S1, S2) ; */ + + if (dabs(S1 + S2 - V[6]) < V[5]) + { + APP = APP + (S1 + S2); + /* printf("app %7.4f \n", APP) ; */ + } else + { + if (LEV >= n) + { + printf("Number of levels exceeded in adaptive quadrature\n"); + exit(1); + } else + { + I++; + A[I] = V[0] + V[4]; + FA[I] = V[2]; + FC[I] = FE; + FB[I] = V[3]; + H[I] = 0.5 * V[4]; + TOL[I] = 0.5 * V[5]; + S[I] = S2; + L[I] = LEV + 1; + + I++; + A[I] = V[0]; + FA[I] = V[1]; + FC[I] = FD; + FB[I] = V[2]; + H[I] = H[I - 1]; + TOL[I] = TOL[I - 1]; + S[I] = S1; + L[I] = L[I - 1]; + /* printf("level %d \n", L[I]) ; */ + } + } + /* printf("i %d\n", I) ; */ } /* printf("just before return APP %7.4f\n", APP) ; */ @@ -189,23 +188,23 @@ /* parameters are L^T, e, z, and dim z */ /* ********************************************** */ -void +void backsubst(Lt, e, z, n) - double Lt[], e[], z[]; + double Lt [], e[], z[]; int n; { - int i, j, k; + int i , j, k; double tempsum; e[n - 1] = z[n - 1] / Lt[n * (n - 1) + n - 1]; for (i = n - 2; i >= 0; i--) { - tempsum = 0.0; - for (j = i + 1; j < n; j++) - tempsum += Lt[n * i + j] * e[j]; + tempsum = 0.0; + for (j = i + 1; j < n; j++) + tempsum += Lt[n * i + j] * e[j]; - e[i] = (z[i] - tempsum) / Lt[n * i + i]; + e[i] = (z[i] - tempsum) / Lt[n * i + i]; } } @@ -216,11 +215,11 @@ /* bivariate normal density */ -double +double bivnormd(x1, x2, mu1, mu2, sigma1sq, sigma2sq, sigma12) - double x1, x2, mu1, mu2, sigma1sq, sigma2sq, sigma12; + double x1 , x2, mu1, mu2, sigma1sq, sigma2sq, sigma12; { - double sqrt(), exp(), pow(), PI = 3.14159265359; + double sqrt (), exp(), pow(), PI = 3.14159265359; double sigma1, sigma2, rho, det, denom, inside, whole; sigma1 = sqrt(sigma1sq); @@ -229,11 +228,11 @@ det = sigma1sq * sigma2sq - sigma12 * sigma12; denom = 1.0 - rho * rho; inside = pow(((x1 - mu1) / sigma1), 2.0) - - 2.0 * rho * (x1 - mu1) * (x2 - mu2) / (sigma1 * sigma2) - + pow(((x2 - mu2) / sigma2), 2.0); + - 2.0 * rho * (x1 - mu1) * (x2 - mu2) / (sigma1 * sigma2) + + pow(((x2 - mu2) / sigma2), 2.0); whole = exp(-inside / (2.0 * denom)) / (2.0 * PI * sigma1 * sigma2 * - sqrt(denom)); + sqrt(denom)); return (whole); } @@ -244,37 +243,37 @@ /* parameters are (choleski decomp of A), b, and x, and dim x */ /* *********************************************************** */ -double +double cholleast(C, b, x, n) - double C[], b[], x[]; + double C [], b[], x[]; int n; { - int i, j, k; + int i , j, k; double *ty, tempsum; char *malloc(); - ty = (double *) malloc(n * sizeof(double)); + ty = (double *)malloc(n * sizeof(double)); ty[0] = b[0] / C[0]; for (i = 1; i < n; i++) { - tempsum = 0.0; - for (j = 0; j < i; j++) - tempsum += C[n * i + j] * ty[j]; - ty[i] = (b[i] - tempsum) / C[n * i + i]; + tempsum = 0.0; + for (j = 0; j < i; j++) + tempsum += C[n * i + j] * ty[j]; + ty[i] = (b[i] - tempsum) / C[n * i + i]; } x[n - 1] = ty[n - 1] / C[n * (n - 1) + n - 1]; for (i = n - 2; i >= 0; i--) { - tempsum = 0.0; - for (j = i + 1; j < n; j++) - tempsum += C[n * j + i] * x[j]; + tempsum = 0.0; + for (j = i + 1; j < n; j++) + tempsum += C[n * j + i] * x[j]; - x[i] = (ty[i] - tempsum) / C[n * i + i]; + x[i] = (ty[i] - tempsum) / C[n * i + i]; } free(ty); @@ -287,37 +286,37 @@ /* parameters are (upper tri left sq rt of A), b, and x, and dim x */ /* *********************************************************** */ -double +double choltleast(C, b, x, n) - double C[], b[], x[]; + double C [], b[], x[]; int n; { - int i, j, k; + int i , j, k; double *ty, tempsum; char *malloc(); - ty = (double *) malloc(n * sizeof(double)); + ty = (double *)malloc(n * sizeof(double)); ty[n - 1] = b[n - 1] / C[n * (n - 1) + n - 1]; for (i = n - 2; i >= 0; i--) { - tempsum = 0.0; - for (j = i + 1; j < n; j++) - tempsum += C[n * i + j] * ty[j]; + tempsum = 0.0; + for (j = i + 1; j < n; j++) + tempsum += C[n * i + j] * ty[j]; - ty[i] = (b[i] - tempsum) / C[n * i + i]; + ty[i] = (b[i] - tempsum) / C[n * i + i]; } x[0] = ty[0] / C[0]; for (i = 1; i < n; i++) { - tempsum = 0.0; - for (j = 0; j < i; j++) - tempsum += C[n * j + i] * x[j]; - x[i] = (ty[i] - tempsum) / C[n * i + i]; + tempsum = 0.0; + for (j = 0; j < i; j++) + tempsum += C[n * j + i] * x[j]; + x[i] = (ty[i] - tempsum) / C[n * i + i]; } free(ty); @@ -331,62 +330,62 @@ /* a is matrix to be factored; lower triangular factorization will be returned in l. n is dimension of a */ -void +void chol(a, l, n) - double a[], l[]; + double a [], l[]; long n; { - long i, j, k, nsq, errflag; - double pow(), sqrt(), sum; + long i , j, k, nsq, errflag; + double pow (), sqrt(), sum; double tol = .000000000000001; nsq = n * n; if (a[0] > tol) - l[0] = sqrt(a[0]); + l[0] = sqrt(a[0]); else { - printf("Error 1 in choleski decomposition.\n"); - exit(1); + printf("Error 1 in choleski decomposition.\n"); + exit(1); } for (j = 1; j < n; j++) - l[n * j] = a[n * j] / l[0]; + l[n * j] = a[n * j] / l[0]; for (i = 1; i < n - 1; i++) { - sum = 0.0; - for (k = 0; k < i; k++) - sum += l[n * i + k] * l[n * i + k]; - if (a[n * i + i] - sum > tol) - l[n * i + i] = sqrt(a[n * i + i] - sum); - else - { - printf("Error 2 in choleski decomposition.\n"); - exit(1); - } - for (j = i + 1; j < n; j++) - { - sum = 0.0; - for (k = 0; k < i; k++) - sum += l[n * j + k] * l[n * i + k]; - l[n * j + i] = (a[n * j + i] - sum) / l[n * i + i]; - } + sum = 0.0; + for (k = 0; k < i; k++) + sum += l[n * i + k] * l[n * i + k]; + if (a[n * i + i] - sum > tol) + l[n * i + i] = sqrt(a[n * i + i] - sum); + else + { + printf("Error 2 in choleski decomposition.\n"); + exit(1); + } + for (j = i + 1; j < n; j++) + { + sum = 0.0; + for (k = 0; k < i; k++) + sum += l[n * j + k] * l[n * i + k]; + l[n * j + i] = (a[n * j + i] - sum) / l[n * i + i]; + } } sum = 0.0; for (k = 0; k < n - 1; k++) - sum += l[n * (n - 1) + k] * l[n * (n - 1) + k]; + sum += l[n * (n - 1) + k] * l[n * (n - 1) + k]; if (a[n * (n - 1) + n - 1] - sum >= tol) - l[n * (n - 1) + n - 1] = sqrt(a[n * (n - 1) + n - 1] - sum); + l[n * (n - 1) + n - 1] = sqrt(a[n * (n - 1) + n - 1] - sum); else { - printf("a[%d][%d] %7.4f sum %7.4f \n", n - 1, n - 1, a[n * (n - 1) + n - 1], sum); - printf("Error 3 in choleski decomposition.\n"); - exit(1); + printf("a[%d][%d] %7.4f sum %7.4f \n", n - 1, n - 1, a[n * (n - 1) + n - 1], sum); + printf("Error 3 in choleski decomposition.\n"); + exit(1); } for (i = 0; i < n; i++) - for (j = i + 1; j < n; j++) - l[n * i + j] = 0.0; + for (j = i + 1; j < n; j++) + l[n * i + j] = 0.0; } @@ -399,72 +398,72 @@ /* d is vector of diagonal entries of second matrix */ /* Cholesky decomp is returned in l. n is dimension of orig mat */ -void +void cholsum(oldl, d, l, n) - double oldl[], d[], l[]; + double oldl [], d[], l[]; long n; { - long i, j, k, nsq, errflag; - double pow(), sqrt(), sum1, sum2, tol = 0.000000001; + long i , j, k, nsq, errflag; + double pow (), sqrt(), sum1, sum2, tol = 0.000000001; nsq = n * n; l[0] = sqrt(oldl[0] * oldl[0] + d[0]); if (l[0] < tol) { - printf("Error 1 in choleski decomposition.\n"); - exit(1); + printf("Error 1 in choleski decomposition.\n"); + exit(1); } for (j = 1; j < n; j++) - l[n * j] = oldl[0] * oldl[n * j] / l[0]; + l[n * j] = oldl[0] * oldl[n * j] / l[0]; for (i = 1; i < n - 1; i++) { - sum1 = sum2 = 0.0; - for (k = 0; k < i; k++) - { - sum1 += oldl[n * i + k] * oldl[n * i + k]; - sum2 += l[n * i + k] * l[n * i + k]; - } - sum1 += oldl[n * i + i] * oldl[n * i + i]; /* sum1 is orig - * mat[i][i] */ - l[n * i + i] = sqrt(sum1 + d[i] - sum2); - if (l[n * i + i] < tol) - { - printf("Error 2 in choleski decomposition.\n"); - exit(1); - } - for (j = i + 1; j < n; j++) - { - sum1 = sum2 = 0.0; - for (k = 0; k < i; k++) - { - sum1 += oldl[n * i + k] * oldl[n * j + k]; - sum2 += l[n * j + k] * l[n * i + k]; - } - sum1 += oldl[n * i + i] * oldl[n * j + i]; /* sum1 is orig - * mat[i][i] */ - l[n * j + i] = (sum1 - sum2) / l[n * i + i]; - } + sum1 = sum2 = 0.0; + for (k = 0; k < i; k++) + { + sum1 += oldl[n * i + k] * oldl[n * i + k]; + sum2 += l[n * i + k] * l[n * i + k]; + } + sum1 += oldl[n * i + i] * oldl[n * i + i]; /* sum1 is orig + * mat[i][i] */ + l[n * i + i] = sqrt(sum1 + d[i] - sum2); + if (l[n * i + i] < tol) + { + printf("Error 2 in choleski decomposition.\n"); + exit(1); + } + for (j = i + 1; j < n; j++) + { + sum1 = sum2 = 0.0; + for (k = 0; k < i; k++) + { + sum1 += oldl[n * i + k] * oldl[n * j + k]; + sum2 += l[n * j + k] * l[n * i + k]; + } + sum1 += oldl[n * i + i] * oldl[n * j + i]; /* sum1 is orig + * mat[i][i] */ + l[n * j + i] = (sum1 - sum2) / l[n * i + i]; + } } sum1 = sum2 = 0.0; for (k = 0; k < n - 1; k++) { - sum1 += oldl[n * (n - 1) + k] * oldl[n * (n - 1) + k]; - sum2 += l[n * (n - 1) + k] * l[n * (n - 1) + k]; + sum1 += oldl[n * (n - 1) + k] * oldl[n * (n - 1) + k]; + sum2 += l[n * (n - 1) + k] * l[n * (n - 1) + k]; } - sum1 += oldl[n * (n - 1) + i] * oldl[n * (n - 1) + i]; /* sum1 is orig - * mat[n-1][n-1] */ + sum1 += oldl[n * (n - 1) + i] * oldl[n * (n - 1) + i]; /* sum1 is orig + * mat[n-1][n-1] */ l[n * (n - 1) + n - 1] = sqrt(sum1 + d[n - 1] - sum2); if (l[n * (n - 1) + n - 1] < tol) { - printf("Error 3 in choleski decomposition.\n"); - exit(1); + printf("Error 3 in choleski decomposition.\n"); + exit(1); } for (i = 0; i < n; i++) - for (j = i + 1; j < n; j++) - l[n * i + j] = 0.0; + for (j = i + 1; j < n; j++) + l[n * i + j] = 0.0; } /* ******* */ @@ -476,14 +475,14 @@ /* d is vector of diagonal entries of second matrix */ /* result s returned in l. n is dimension of orig mat */ -void +void choltsum(oldl, d, l, n) - double oldl[], d[], l[]; + double oldl [], d[], l[]; long n; { - long i, j, k, nsq, errflag, index; - double pow(), sqrt(), sum1, sum2, tol = 0.000000000001; + long i , j, k, nsq, errflag, index; + double pow (), sqrt(), sum1, sum2, tol = 0.000000000001; nsq = n * n; @@ -491,62 +490,62 @@ l[index] = sqrt(oldl[index] * oldl[index] + d[n - 1]); if (l[index] < tol) { - printf("Error 1 in choleski decomposition.\n"); - exit(1); + printf("Error 1 in choleski decomposition.\n"); + exit(1); } for (j = n - 2; j >= 0; j--) - l[n * j + n - 1] = oldl[index] * oldl[n * j + n - 1] / l[index]; + l[n * j + n - 1] = oldl[index] * oldl[n * j + n - 1] / l[index]; for (i = n - 2; i > 0; i--) { - sum1 = sum2 = 0.0; - for (k = n - 1; k > i; k--) - { - sum1 += oldl[n * i + k] * oldl[n * i + k]; - sum2 += l[n * i + k] * l[n * i + k]; - } - sum1 += oldl[n * i + i] * oldl[n * i + i]; /* sum1 is orig - * mat[i][i] */ - l[n * i + i] = sqrt(sum1 + d[i] - sum2); - if (l[n * i + i] < tol) - { - printf("Error 2 in choleski decomposition.\n"); - exit(1); - } - for (j = i - 1; j >= 0; j--) - { - sum1 = sum2 = 0.0; - for (k = n - 1; k > i; k--) - { - sum1 += oldl[n * i + k] * oldl[n * j + k]; - sum2 += l[n * j + k] * l[n * i + k]; - } - sum1 += oldl[n * i + i] * oldl[n * j + i]; /* sum1 is orig - * mat[i][i] */ - l[n * j + i] = (sum1 - sum2) / l[n * i + i]; - } + sum1 = sum2 = 0.0; + for (k = n - 1; k > i; k--) + { + sum1 += oldl[n * i + k] * oldl[n * i + k]; + sum2 += l[n * i + k] * l[n * i + k]; + } + sum1 += oldl[n * i + i] * oldl[n * i + i]; /* sum1 is orig + * mat[i][i] */ + l[n * i + i] = sqrt(sum1 + d[i] - sum2); + if (l[n * i + i] < tol) + { + printf("Error 2 in choleski decomposition.\n"); + exit(1); + } + for (j = i - 1; j >= 0; j--) + { + sum1 = sum2 = 0.0; + for (k = n - 1; k > i; k--) + { + sum1 += oldl[n * i + k] * oldl[n * j + k]; + sum2 += l[n * j + k] * l[n * i + k]; + } + sum1 += oldl[n * i + i] * oldl[n * j + i]; /* sum1 is orig + * mat[i][i] */ + l[n * j + i] = (sum1 - sum2) / l[n * i + i]; + } } sum1 = sum2 = 0.0; for (k = n - 1; k > 0; k--) { - sum1 += oldl[0 + k] * oldl[0 + k]; - sum2 += l[0 + k] * l[0 + k]; + sum1 += oldl[0 + k] * oldl[0 + k]; + sum2 += l[0 + k] * l[0 + k]; } - sum1 += oldl[0 + i] * oldl[0 + i]; /* sum1 is orig mat[0][0] */ + sum1 += oldl[0 + i] * oldl[0 + i]; /* sum1 is orig mat[0][0] */ l[0] = sqrt(sum1 + d[0] - sum2); if (l[0] < tol) { - printf("Error 3 in choleski decomposition.\n"); - exit(1); + printf("Error 3 in choleski decomposition.\n"); + exit(1); } for (i = 0; i < n; i++) - for (j = 0; j < i; j++) - l[n * i + j] = 0.0; + for (j = 0; j < i; j++) + l[n * i + j] = 0.0; } /* Return double precision absolute value */ -double +double dabs(x) double x; { @@ -557,9 +556,9 @@ /* Return the maximum of 2 double-precision numbers */ -double +double dmax(x, y) - double x, y; + double x , y; { return ((x >= y) ? x : y); } @@ -567,31 +566,31 @@ /* Return the minimum of 2 double-precision numbers */ -double +double dmin(x, y) - double x, y; + double x , y; { return ((x <= y) ? x : y); } /* return double precision determinant of double prec 2x2 matrix */ -double +double det2(a) - double a[]; + double a []; { - return ((double) (a[0] * a[3] - a[1] * a[2])); + return ((double)(a[0] * a[3] - a[1] * a[2])); } /* return double precision determinant of double prec 3x3 matrix */ -double +double det3(a) - double a[]; + double a []; { return (a[0] * a[4] * a[8] + 2.0 * a[1] * a[2] * a[5] - a[1] * a[1] * a[8] - - a[0] * a[5] * a[5] - a[4] * a[2] * a[2]); + - a[0] * a[5] * a[5] - a[4] * a[2] * a[2]); } /***********************/ @@ -604,30 +603,30 @@ n is length of alpha retvect is vector of length n in which to return variate */ -double +double diricvar(alpha, n, retvect) double alpha[], retvect[]; long n; { double gammavar(), *y, sumy; - long i, j, k; + long i , j, k; char *malloc(); - y = (double *) malloc(n, sizeof(double)); + y = (double *)malloc(n, sizeof(double)); for (i = 0; i < n; i++) - y[i] = 0.0; + y[i] = 0.0; sumy = 0.0; for (i = 0; i < n; i++) { - y[i] = gammavar(alpha[i], 1.0); - sumy += y[i]; + y[i] = gammavar(alpha[i], 1.0); + sumy += y[i]; } for (i = 0; i < n; i++) - retvect[i] = y[i] / sumy; + retvect[i] = y[i] / sumy; free(y); @@ -636,43 +635,43 @@ /* extreme value cdf */ -double +double extremec(x) double x; { - double exp(); + double exp (); return (1.0 - exp(-exp(x))); } /* extreme value density */ -double +double extremed(x) double x; { - double exp(); + double exp (); return (exp(x - exp(x))); } /* return quantile of extreme value distribution */ -double +double extremeq(x) double x; { - double log(); + double log (); return (log(-log(1.0 - x))); } /* return random variate from extreme value distribution */ -double +double extrevar() { - double log(), u, drand48(); + double log (), u, drand48(); u = drand48(); return (log(-log(1.0 - u))); @@ -684,23 +683,23 @@ /* parameters are L, e, z, and dim z */ /* ********************************************** */ -void +void forsubst(L, e, z, n) - double L[], e[], z[]; + double L [], e[], z[]; int n; { - int i, j, k; + int i , j, k; double tempsum; e[0] = z[0] / L[0]; for (i = 1; i < n; i++) { - tempsum = 0.0; - for (j = 0; j < i; j++) - tempsum += L[n * i + j] * e[j]; + tempsum = 0.0; + for (j = 0; j < i; j++) + tempsum += L[n * i + j] * e[j]; - e[i] = (z[i] - tempsum) / L[n * i + i]; + e[i] = (z[i] - tempsum) / L[n * i + i]; } } @@ -714,90 +713,69 @@ beta -- scale parameter; double prec */ -double +double gammavar(alpha, beta) double alpha, beta; { - double u0, u1, u2, x, c1, c2, c3, c4, w; - double E, pow(), log(), drand48(), exp(), sqrt(); + double u0 , u1, u2, x, c1, c2, c3, c4, w; + double E , pow(), log(), drand48(), exp(), sqrt(); int done = 0, done2; /* if (alpha <= 0.0 || beta <= 0.0) */ if (alpha < 0.0 || beta < 0.0) { - printf("negative parm for gamma\n"); - exit(1); + printf("negative parm for gamma\n"); + exit(1); } E = exp(1.0); - if (alpha < 1.0) /* Ahrens and Dieter */ + if (alpha < 1.0) /* Ahrens and Dieter */ { - while (done == 0) - { - u0 = drand48(); - u1 = drand48(); - if (u0 > E / (alpha + E)) - { - x = -log((alpha + E) * (1.0 - u0) / (alpha * E)); - if (u1 <= pow(x, alpha - 1)) - done = 1; - } else - { - x = pow((alpha + E) * u0 / E, 1.0 / alpha); - if (u1 <= exp(-x)) - done = 1; - } - } - return (x / beta); - } else if (alpha > 1.0) /* Cheng and Feast */ - { - c1 = alpha - 1.0; - c2 = (alpha - 1.0 / (6.0 * alpha)) / c1; - c3 = 2.0 / c1; - c4 = c3 + 2.0; - while (done == 0) - { - do - { - u1 = drand48(); - u2 = drand48(); - if (alpha > 2.5) - u1 = u2 + (1.0 - 1.86 * u1) / sqrt(alpha); - } while (!(0.0 < u1 && u1 < 1.0)); - w = c2 * u2 / u1; - if ((c3 * u1 + w + 1.0 / w) <= c4) - done = 1; - else if (c3 * log(u1) - log(w) + w < 1.0) - done = 1; - } - return (c1 * w / beta); - } else /* alpha = 1.0 so exponential */ - /* - c1 = 0.0 ; - while (done==0) - { - u0 = drand48() ; - c2 = u0 ; - done2 = 0 ; - while (done2==0 && done==0 ) - { - u1 = drand48() ; - if (u0 <= u1) - { - x = c1 + c2 ; - done = 1 ; - done2 = 1 ; - } - else - { - u0 = drand48() ; - if (u0 >= u1) - done2 = 1 ; - } - } - c1++ ; - } /* while done==0 */ - return (x / beta); + while (done == 0) + { + u0 = drand48(); + u1 = drand48(); + if (u0 > E / (alpha + E)) + { + x = -log((alpha + E) * (1.0 - u0) / (alpha * E)); + if (u1 <= pow(x, alpha - 1)) + done = 1; + } else + { + x = pow((alpha + E) * u0 / E, 1.0 / alpha); + if (u1 <= exp(-x)) + done = 1; + } + } + return (x / beta); + } else if (alpha > 1.0) /* Cheng and Feast */ + { + c1 = alpha - 1.0; + c2 = (alpha - 1.0 / (6.0 * alpha)) / c1; + c3 = 2.0 / c1; + c4 = c3 + 2.0; + while (done == 0) + { + do + { + u1 = drand48(); + u2 = drand48(); + if (alpha > 2.5) + u1 = u2 + (1.0 - 1.86 * u1) / sqrt(alpha); + } while (!(0.0 < u1 && u1 < 1.0)); + w = c2 * u2 / u1; + if ((c3 * u1 + w + 1.0 / w) <= c4) + done = 1; + else if (c3 * log(u1) - log(w) + w < 1.0) + done = 1; + } + return (c1 * w / beta); + } else /* alpha = 1.0 so exponential */ + /* c1 = 0.0 ; while (done==0) { u0 = drand48() ; c2 = u0 ; done2 = 0 ; + * while (done2==0 && done==0 ) { u1 = drand48() ; if (u0 <= u1) { x = + * c1 + c2 ; done = 1 ; done2 = 1 ; } else { u0 = drand48() ; if (u0 >= + * u1) done2 = 1 ; } } c1++ ; } /* while done==0 */ + return (x / beta); } } @@ -811,198 +789,161 @@ retvar is double precision variable for variate to be returned in */ geweke(a, b, mu, sigma, la, lb, retvar) - double a, b, sigma, mu, *retvar; - int la, lb; + double a , b, sigma, mu, *retvar; + int la , lb; { int lflip; double eps = 2.0, t1 = 0.375, t2 = 2.18, t3 = 0.725, t4 = 0.45; - double f(), normvect[2], drand48(), c1, c2, c, x, cdel, f1, - f2, z, dabs(), y; + double f (), normvect[2], drand48(), c1, c2, c, x, cdel, f1, f2, + z , dabs(), y; double exprej(), f(), halfline(), halfnorm(), normrej(), unifrej(); void normvar(); - if (la & lb) /* both endpoints infinite; return untruncated - * normal */ + if (la & lb) /* both endpoints infinite; return untruncated normal */ { - normvar(normvect); - *retvar = normvect[0] * sigma + mu; - return; + normvar(normvect); + *retvar = normvect[0] * sigma + mu; + return; } - if (b <= a) /* 0-width interval */ + if (b <= a) /* 0-width interval */ { - *retvar = a; - return; + *retvar = a; + return; } a = (a - mu) / sigma; b = (b - mu) / sigma; - lflip = 0; /* haven't reversed signs */ + lflip = 0; /* haven't reversed signs */ - if (la | lb) /* one endpoint is infinite */ - x = halfline(a, b, &lflip, lb); + if (la | lb) /* one endpoint is infinite */ + x = halfline(a, b, &lflip, lb); else { - c1 = a; - c2 = b; - if ((c1 * c2) < 0.0) /* (a,b) includes 0 */ - { - if ((c1 > -t1) && (c2 < t1)) - x = unifrej(c1, c2, 1.0); - else - x = normrej(c1, c2); - } else - { - if (c1 < 0.0) - { - c = c1; - c1 = -c2; - c2 = -c; - lflip = 1; - } - f1 = f(c1); - f2 = f(c2); - if ((f2 < eps) || ((f1 / f2) > t2)) - { - if (c1 > t3) - x = exprej(c1, c2); - else - x = halfnorm(c1, c2); - } else - x = unifrej(c1, c2); - } + c1 = a; + c2 = b; + if ((c1 * c2) < 0.0) /* (a,b) includes 0 */ + { + if ((c1 > -t1) && (c2 < t1)) + x = unifrej(c1, c2, 1.0); + else + x = normrej(c1, c2); + } else + { + if (c1 < 0.0) + { + c = c1; + c1 = -c2; + c2 = -c; + lflip = 1; + } + f1 = f(c1); + f2 = f(c2); + if ((f2 < eps) || ((f1 / f2) > t2)) + { + if (c1 > t3) + x = exprej(c1, c2); + else + x = halfnorm(c1, c2); + } else + x = unifrej(c1, c2); + } } if (lflip) - x = -x; + x = -x; y = x * sigma + mu; *retvar = y; } -double +double exprej(c1, c2) - double c1, c2; + double c1 , c2; { - double x, drand48(), z, u, c, log(), f(); + double x , drand48(), z, u, c, log(), f(); c = c2 - c1; do { - z = -log(drand48()) / c1; + z = -log(drand48()) / c1; } while ((z > c) || (drand48() > f(z))); x = c1 + z; return (x); } -double +double f(x) double x; { - double exp(); + double exp (); return (exp(-0.5 * x * x)); } -double +double halfline(a, b, lflipptr, lb) - double a, b; + double a , b; int *lflipptr, lb; { - double c1, c2, t4 = 0.45, u, x, z, drand48(), normvect[2], f(), - log(); + double c1 , c2, t4 = 0.45, u, x, z, drand48(), normvect[2], f(), log(); void normvar(); c1 = a; if (lb == 0) { - c1 = -b; - *lflipptr = 1; + c1 = -b; + *lflipptr = 1; } - if (c1 > t4) /* a large; exponential importance sampling */ + if (c1 > t4) /* a large; exponential importance sampling */ { - do - { - z = -log(drand48()) / c1; - } while (drand48() > f(z)); - x = c1 + z; - } else /* a not large; full normal with rej */ - /* - do - { - normvar(normvect) ; - x = normvect[0]; - if (x < c1) - x = normvect[1] ; - - } while (x < c1) ; - } - return(x) ; - } - - double halfnorm(c1,c2) - double c1,c2 ; - { - double x, dabs(), normvect[2]; - void normvar() ; - - do - { - normvar(normvect) ; - x = dabs(normvect[0]) ; - if (x < c1 || x > c2) - x = dabs(normvect[1]) ; - } while (x < c1 || x > c2) ; - return(x) ; - } - - - double normrej(c1,c2) - double c1,c2 ; - { - double x, normvect[2]; - void normvar() ; - - do - { - normvar(normvect) ; - x = normvect[0] ; - if (x < c1 || x > c2) - x = normvect[1] ; - } while (x < c1 || x > c2) ; - return(x) ; - } - - double unifrej(c1,c2,f1) - double c1,c2,f1 ; - { - double cdel, u, x, drand48(), f() ; - - cdel = c2 - c1 ; - do - { - x = c1 + cdel * drand48() ; - } while (drand48() > (f(x)/f1) ) ; - return(x) ; - } - - /* ******** */ - /* inprod */ - /* ******** */ - /* generate inner product of two double precision vectors a[] and b[] - * are the vectors, n is their length */ - double inprod(a, b, n) - double a[], b[]; + do + { + z = -log(drand48()) / c1; + } while (drand48() > f(z)); + x = c1 + z; + } else /* a not large; full normal with rej */ + /* do { normvar(normvect) ; x = normvect[0]; if (x < c1) x = + * normvect[1] ; + * + * } while (x < c1) ; } return(x) ; } + * + * double halfnorm(c1,c2) double c1,c2 ; { double x, dabs(), normvect[2]; + * void normvar() ; + * + * do { normvar(normvect) ; x = dabs(normvect[0]) ; if (x < c1 || x > c2) + * x = dabs(normvect[1]) ; } while (x < c1 || x > c2) ; return(x) ; } + * + * + * double normrej(c1,c2) double c1,c2 ; { double x, normvect[2]; void + * normvar() ; + * + * do { normvar(normvect) ; x = normvect[0] ; if (x < c1 || x > c2) x = + * normvect[1] ; } while (x < c1 || x > c2) ; return(x) ; } + * + * double unifrej(c1,c2,f1) double c1,c2,f1 ; { double cdel, u, x, + * drand48(), f() ; + * + * cdel = c2 - c1 ; do { x = c1 + cdel * drand48() ; } while (drand48() > + * (f(x)/f1) ) ; return(x) ; } + * + /* ******** */ + /* inprod */ + /* ******** */ + /* generate inner product of two double precision vectors a[] and b[] + * are the vectors, n is their length */ + double inprod (a, b, n) + double a [], b[]; long n; { - int i; - double retval = 0.0; + int i; + double retval = 0.0; - for (i = 0; i < n; i++) - retval += a[i] * b[i]; + for (i = 0; i < n; i++) + retval += a[i] * b[i]; - return (retval); + return (retval); } @@ -1012,24 +953,24 @@ void invwish(parmmat, df, n, retmat) double parmmat[], retmat[]; - int df, n; + int df , n; { - double *temp; - void wishart(), matinvrt(), matprint(); + double *temp; + void wishart (), matinvrt(), matprint(); - char *malloc(); + char *malloc(); - temp = (double *) malloc(n * n * sizeof(double)); + temp = (double *)malloc(n * n * sizeof(double)); - wishart(parmmat, df, n, temp); - printf("temp \n"); - matprint(temp, n, n); + wishart(parmmat, df, n, temp); + printf("temp \n"); + matprint(temp, n, n); - matinvrt(temp, retmat, n); + matinvrt(temp, retmat, n); - free(temp); + free(temp); } @@ -1037,9 +978,9 @@ double logisvar() { - double u, drand48(), log(); - u = drand48(); - return (log(u / (1.0 - u))); + double u , drand48(), log(); + u = drand48 (); + return (log(u / (1.0 - u))); } /* evaluate standard logistic cdf */ @@ -1048,10 +989,10 @@ double x; { - double y, exp(); + double y , exp(); - y = exp(x); - return (y / (1.0 + y)); + y = exp(x); + return (y / (1.0 + y)); } /* evaluate standard logistic density */ @@ -1060,10 +1001,10 @@ double x; { - double y, exp(), pow(); + double y , exp(), pow(); - y = exp(x); - return (y / pow((1.0 + y), 2.0)); + y = exp(x); + return (y / pow((1.0 + y), 2.0)); } /* quantile of standard logistic */ @@ -1072,9 +1013,9 @@ double x; { - double log(); + double log (); - return (log(x / (1.0 - x))); + return (log(x / (1.0 - x))); } /* add one double prec matrix to another */ @@ -1086,35 +1027,35 @@ */ void mataccum(dest, src, rows, cols) - double dest[], src[]; - int rows, cols; + double dest [], src[]; + int rows , cols; { - int i, j, index; + int i , j, index; - for (i = 0; i < rows; i++) - for (j = 0; j < cols; j++) - { - index = cols * i + j; - dest[index] += src[index]; - } + for (i = 0; i < rows; i++) + for (j = 0; j < cols; j++) + { + index = cols * i + j; + dest[index] += src[index]; + } } /* Copy src matrix into dest matrix */ void matcopy(dest, src, rows, cols) - double dest[], src[]; - int rows, cols; + double dest [], src[]; + int rows , cols; { - int i, j, index; + int i , j, index; - for (i = 0; i < rows; i++) - for (j = 0; j < cols; j++) - { - index = cols * i + j; - dest[index] = src[index]; - } + for (i = 0; i < rows; i++) + for (j = 0; j < cols; j++) + { + index = cols * i + j; + dest[index] = src[index]; + } } /* Return double-prec inverse of square non-singular double-prec matrix */ @@ -1129,7 +1070,7 @@ /* *************************** */ void matinvrt(b, binv, n) - double b[], binv[]; + double b [], binv[]; long n; /* b is matrix to be inverted */ @@ -1137,137 +1078,137 @@ /* n is dimension of b and binv */ { - /* The dimensions of a depend on the value of n that is passed to the - * function. Therefore, we declare pointer to this array rather than - * dimensioning it directly */ + /* The dimensions of a depend on the value of n that is passed to the + * function. Therefore, we declare pointer to this array rather than + * dimensioning it directly */ /* char *calloc() ; */ - long i, j, k, nsq, twon, p, errflag; - double pow(), sqrt(), dabs(), sum, m, *a, *temp, tol; - char *malloc(); - - nsq = n * n; - a = (double *) malloc(2 * nsq * sizeof(double)); - temp = (double *) malloc(2 * n * sizeof(double)); - - for (i = 0; i < (2 * nsq); i++) - a[i] = 0; - for (i = 0; i < (2 * n); i++) - temp[i] = 0; - - tol = 0.00000000001; - /* changed from 0.00001 5/9/00 */ - twon = 2 * n; - - for (i = 0; i < n; i++) /* construct augmented matrix */ - { - for (j = 0; j < n; j++) - { - a[twon * i + j] = b[n * i + j]; - if (i == j) - a[twon * i + j + n] = 1.0; - else - a[twon * i + j + n] = 0.0; - } - } - - for (i = 0; i < n - 1; i++) - { - for (p = i; (dabs(a[twon * p + i]) < tol) && (p < n); p++); - if (p == n) - { - printf("Matrix inversion fails.\n"); - exit(1); - } else if (p != i) /* exchange rows */ - { - for (j = 0; j < twon; j++) - { - temp[j] = a[twon * p + j]; - a[twon * p + j] = a[twon * i + j]; - a[twon * i + j] = temp[j]; - } - } - for (j = i + 1; j < n; j++) - { - m = a[twon * j + i] / a[twon * i + i]; - for (k = 0; k < twon; k++) - a[twon * j + k] -= m * a[twon * i + k]; - } - } - - if (dabs(a[twon * (n - 1) + (n - 1)]) < tol) - { - printf("Matrix inversion fails.\n"); - exit(1); - } else - { - for (i = (n - 1); i >= 0; i--) - { - m = a[twon * (i) + i]; - for (j = 0; j < twon; j++) - a[twon * (i) + j] /= m; - for (j = i - 1; j >= 0; j--) - { - m = a[twon * j + i]; - for (k = 0; k < twon; k++) - a[twon * j + k] -= m * a[twon * i + k]; - } - } - for (i = 0; i < n; i++) - for (j = 0; j < n; j++) - binv[n * i + j] = a[twon * i + n + j]; - } + long i , j, k, nsq, twon, p, errflag; + double pow (), sqrt(), dabs(), sum, m, *a, *temp, tol; + char *malloc(); + + nsq = n * n; + a = (double *)malloc(2 * nsq * sizeof(double)); + temp = (double *)malloc(2 * n * sizeof(double)); + + for (i = 0; i < (2 * nsq); i++) + a[i] = 0; + for (i = 0; i < (2 * n); i++) + temp[i] = 0; + + tol = 0.00000000001; + /* changed from 0.00001 5/9/00 */ + twon = 2 * n; + + for (i = 0; i < n; i++) /* construct augmented matrix */ + { + for (j = 0; j < n; j++) + { + a[twon * i + j] = b[n * i + j]; + if (i == j) + a[twon * i + j + n] = 1.0; + else + a[twon * i + j + n] = 0.0; + } + } + + for (i = 0; i < n - 1; i++) + { + for (p = i; (dabs(a[twon * p + i]) < tol) && (p < n); p++); + if (p == n) + { + printf("Matrix inversion fails.\n"); + exit(1); + } else if (p != i) /* exchange rows */ + { + for (j = 0; j < twon; j++) + { + temp[j] = a[twon * p + j]; + a[twon * p + j] = a[twon * i + j]; + a[twon * i + j] = temp[j]; + } + } + for (j = i + 1; j < n; j++) + { + m = a[twon * j + i] / a[twon * i + i]; + for (k = 0; k < twon; k++) + a[twon * j + k] -= m * a[twon * i + k]; + } + } + + if (dabs(a[twon * (n - 1) + (n - 1)]) < tol) + { + printf("Matrix inversion fails.\n"); + exit(1); + } else + { + for (i = (n - 1); i >= 0; i--) + { + m = a[twon * (i) + i]; + for (j = 0; j < twon; j++) + a[twon * (i) + j] /= m; + for (j = i - 1; j >= 0; j--) + { + m = a[twon * j + i]; + for (k = 0; k < twon; k++) + a[twon * j + k] -= m * a[twon * i + k]; + } + } + for (i = 0; i < n; i++) + for (j = 0; j < n; j++) + binv[n * i + j] = a[twon * i + n + j]; + } - free(a); - free(temp); + free(a); + free(temp); } /* Return double-prec inverse of 2x2 non-singular double-prec matrix */ double matinvt2(a, b) - double a[], b[]; + double a [], b[]; { - double det2(), dabs(), d; + double det2 (), dabs(), d; - d = det2(a); - if (dabs(d) < .00000000001) - { - printf("Attempt to invert singular 2x2 matrix.\n"); - exit(1); - } else - { - b[0] = a[3] / d; - b[1] = -a[1] / d; - b[2] = -a[2] / d; - b[3] = a[0] / d; - } - return; + d = det2(a); + if (dabs(d) < .00000000001) + { + printf("Attempt to invert singular 2x2 matrix.\n"); + exit(1); + } else + { + b[0] = a[3] / d; + b[1] = -a[1] / d; + b[2] = -a[2] / d; + b[3] = a[0] / d; + } + return; } /* Return double-prec inverse of 3x3 SYMMETRIC non-sing double-prec matrix */ double matinvt3(a, b) - double a[], b[]; + double a [], b[]; { - double det3(), dabs(), d; + double det3 (), dabs(), d; - d = det3(a); - if (dabs(d) < .00001 || a[1] != a[3] || a[2] != a[6] || a[5] != a[7]) - { - printf("Attempt to invert singular or non-symmetric 3x3 matrix.\n"); - exit(1); - } else - { - b[0] = (a[4] * a[8] - a[5] * a[5]) / d; - b[1] = b[3] = (-a[1] * a[8] + a[2] * a[5]) / d; - b[2] = b[6] = (a[1] * a[5] - a[4] * a[2]) / d; - b[4] = (a[0] * a[8] - a[2] * a[2]) / d; - b[5] = b[7] = (-a[0] * a[5] + a[2] * a[1]) / d; - b[8] = (a[0] * a[4] - a[1] * a[1]) / d; - } - return; + d = det3(a); + if (dabs(d) < .00001 || a[1] != a[3] || a[2] != a[6] || a[5] != a[7]) + { + printf("Attempt to invert singular or non-symmetric 3x3 matrix.\n"); + exit(1); + } else + { + b[0] = (a[4] * a[8] - a[5] * a[5]) / d; + b[1] = b[3] = (-a[1] * a[8] + a[2] * a[5]) / d; + b[2] = b[6] = (a[1] * a[5] - a[4] * a[2]) / d; + b[4] = (a[0] * a[8] - a[2] * a[2]) / d; + b[5] = b[7] = (-a[0] * a[5] + a[2] * a[1]) / d; + b[8] = (a[0] * a[4] - a[1] * a[1]) / d; + } + return; } /* Return product of 2 double prec matrices */ @@ -1282,22 +1223,22 @@ void matmult(mat1, mat2, m1, n1, n2, retmat) - double mat1[], mat2[], retmat[]; - int m1, n1, n2; + double mat1 [], mat2[], retmat[]; + int m1 , n1, n2; { - int i, j, k; + int i , j, k; - for (i = 0; i < m1; i++) - { - for (j = 0; j < n2; j++) - { -/* printf("%g\n",mat1[n1 * i +j]); */ - retmat[n2 * i + j] = 0.0; - for (k = 0; k < n1; k++) - retmat[n2 * i + j] += mat1[n1 * i + k] * mat2[n2 * k + j]; - } - } + for (i = 0; i < m1; i++) + { + for (j = 0; j < n2; j++) + { +/* printf("%g\n",mat1[n1 * i +j]); */ + retmat[n2 * i + j] = 0.0; + for (k = 0; k < n1; k++) + retmat[n2 * i + j] += mat1[n1 * i + k] * mat2[n2 * k + j]; + } + } } @@ -1307,13 +1248,13 @@ int m1, n1, n2 ; { int i, j, k ; - for (i=0; i 0.0) - phix = (1 + erf(x / sqrt(2.0))) / 2.0; - else - { - if (x < 0) - phix = erfc((-x) / sqrt(2.0)) / 2.0; - else - phix = .50000; - } - return (phix); + if (x > 0.0) + phix = (1 + erf(x / sqrt(2.0))) / 2.0; + else + { + if (x < 0) + phix = erfc((-x) / sqrt(2.0)) / 2.0; + else + phix = .50000; + } + return (phix); } /* Return quantile of standard normal */ @@ -1579,33 +1520,33 @@ double p1; { - double p[5], q[5], pow(), log(), sqrt(), lim = pow(10.0, -20.0), - xp, pval, y; + double p [5], q[5], pow(), log(), sqrt(), lim = pow(10.0, -20.0), xp, + pval , y; - p[1] = -0.322232431088; - p[2] = -1.0; - p[3] = -0.342242088547; - p[4] = -0.0204231210245; - p[5] = -0.453642210148 * 0.0001; - q[1] = 0.0993484626060; - q[2] = 0.588581570495; - q[3] = 0.531103462366; - q[4] = 0.103537752850; - q[5] = 0.38560700634 * .01; - xp = 0.0; - if (p1 > 0.5) - pval = 1.0 - p1; - else - pval = p1; - if (pval >= lim && pval != 0.5) - { - y = sqrt(log(1.0 / pow(pval, 2.0))); - xp = y + ((((y * p[5] + p[4]) * y + p[3]) * y + p[2]) * y + p[1]) / - ((((y * q[5] + q[4]) * y + q[3]) * y + q[2]) * y + q[1]); - if (p1 < 0.5) - xp = -xp; - } - return (xp); + p[1] = -0.322232431088; + p[2] = -1.0; + p[3] = -0.342242088547; + p[4] = -0.0204231210245; + p[5] = -0.453642210148 * 0.0001; + q[1] = 0.0993484626060; + q[2] = 0.588581570495; + q[3] = 0.531103462366; + q[4] = 0.103537752850; + q[5] = 0.38560700634 * .01; + xp = 0.0; + if (p1 > 0.5) + pval = 1.0 - p1; + else + pval = p1; + if (pval >= lim && pval != 0.5) + { + y = sqrt(log(1.0 / pow(pval, 2.0))); + xp = y + ((((y * p[5] + p[4]) * y + p[3]) * y + p[2]) * y + p[1]) / + ((((y * q[5] + q[4]) * y + q[3]) * y + q[2]) * y + q[1]); + if (p1 < 0.5) + xp = -xp; + } + return (xp); } @@ -1617,20 +1558,20 @@ double *nrm; { - double drand48(); - double u1, u2, w, c, log(), sqrt(); + double drand48 (); + double u1 , u2, w, c, log(), sqrt(); - w = 3.0; - while (w > 1.0) - { - u1 = 2.0 * drand48() - 1.0; - u2 = 2.0 * drand48() - 1.0; - w = u1 * u1 + u2 * u2; - } - - c = sqrt(-2.0 / w * log(w)); - nrm[0] = c * u1; - nrm[1] = c * u2; + w = 3.0; + while (w > 1.0) + { + u1 = 2.0 * drand48() - 1.0; + u2 = 2.0 * drand48() - 1.0; + w = u1 * u1 + u2 * u2; + } + + c = sqrt(-2.0 / w * log(w)); + nrm[0] = c * u1; + nrm[1] = c * u2; } @@ -1638,39 +1579,39 @@ generate a random permutation of integers 0 to n - 1 */ randseq(intvect, n) - int intvect[], n; + int intvect[], n; { - double drand48(), *a, tempd; - int i, j, tempi; - char *malloc(); - - a = (double *) malloc(n * sizeof(double)); - - for (i = 0; i < n; i++) - { - a[i] = drand48(); - intvect[i] = i; - } - - /* bubble sort the random numbers and their indices */ - - for (i = 0; i < n; i++) - { - for (j = i + 1; j < n; j++) - if (a[j] < a[i]) - { - tempd = a[i]; - tempi = intvect[i]; - a[i] = a[j]; - intvect[i] = intvect[j]; - a[j] = tempd; - intvect[j] = tempi; - } - } + double drand48 (), *a, tempd; + int i , j, tempi; + char *malloc(); + + a = (double *)malloc(n * sizeof(double)); + + for (i = 0; i < n; i++) + { + a[i] = drand48(); + intvect[i] = i; + } + + /* bubble sort the random numbers and their indices */ + + for (i = 0; i < n; i++) + { + for (j = i + 1; j < n; j++) + if (a[j] < a[i]) + { + tempd = a[i]; + tempi = intvect[i]; + a[i] = a[j]; + intvect[i] = intvect[j]; + a[j] = tempd; + intvect[j] = tempi; + } + } - free(a); + free(a); } @@ -1679,18 +1620,18 @@ /* Initialize seed for random number generator */ /* */ /* ******************************************* - * + * * readrand() - * + * * { FILE *in ; int i, j, tempseed[3] ; unsigned short *seed48() ; - * - * + * + * * if ( (in = fopen( "/space/kcowles/libraries/seedfile","r" )) != NULL) { for * (i = 0; i < 3; i++) { fscanf( in, "%d", &tempseed[i] ) ; seed16v[i] = * tempseed[i] ; } } - * + * * else { printf("Seed file cannot be opened for reading.\n") ; exit(1) ; } - * + * * fclose(in) ; myptr = seed48(seed16v) ; } */ /* Composite Simpson's algorith for evaluating 1-dimensional definite integral*/ @@ -1700,64 +1641,64 @@ n -- even pos integer; number of subintervals */ double simpson(a, b, n, f) - double a, b, (*f) (); + double a , b, (*f) (); int n; { - double h, XI0, XI1, XI2, X, XI, fx; - int i; + double h , XI0, XI1, XI2, X, XI, fx; + int i; - h = (b - a) / n; - XI0 = (*f) (a) + (*f) (b); - XI1 = XI2 = 0.0; - - for (i = 1; i < n; i++) - { - X = a + (double) i *h; - - fx = (*f) (X); - if ((i % 2) == 0) - XI2 += fx; - else - XI1 += fx; - } - XI = h * (XI0 + 2.0 * XI2 + 4.0 * XI1) / 3.0; + h = (b - a) / n; + XI0 = (*f) (a) + (*f) (b); + XI1 = XI2 = 0.0; + + for (i = 1; i < n; i++) + { + X = a + (double)i *h; + + fx = (*f) (X); + if ((i % 2) == 0) + XI2 += fx; + else + XI1 += fx; + } + XI = h * (XI0 + 2.0 * XI2 + 4.0 * XI1) / 3.0; - return (XI); + return (XI); } /* Sort array; Shell-Metzner algorithm */ void sortarry(d, n) - double d[]; + double d []; int n; { - int p, k, j, i, l, flips; - double tmp; + int p , k, j, i, l, flips; + double tmp; - for (p = n / 2; p > 0; p /= 2) - { - k = n - p; - for (j = 1; j <= k; j++) - { - i = j; - do - { - flips = 0; - l = i + p; - if (d[i - 1] > d[l - 1]) - { - tmp = d[i - 1]; - d[i - 1] = d[l - 1]; - d[l - 1] = tmp; - i = i - p; - flips = 1; - } - } while (flips == 1 && i >= 1); - } - } + for (p = n / 2; p > 0; p /= 2) + { + k = n - p; + for (j = 1; j <= k; j++) + { + i = j; + do + { + flips = 0; + l = i + p; + if (d[i - 1] > d[l - 1]) + { + tmp = d[i - 1]; + d[i - 1] = d[l - 1]; + d[l - 1] = tmp; + i = i - p; + flips = 1; + } + } while (flips == 1 && i >= 1); + } + } } @@ -1767,21 +1708,21 @@ /* ****************************************** */ void stdnorm(hold, n) - double hold[]; + double hold []; int n; { - void normvar(); - double normvect[2]; - int i, j; - - for (i = 0; i < n; i += 2) - { - normvar(normvect); - hold[i] = normvect[0]; - if (i < n - 1) - hold[i + 1] = normvect[1]; - } + void normvar (); + double normvect[2]; + int i , j; + + for (i = 0; i < n; i += 2) + { + normvar(normvect); + hold[i] = normvect[0]; + if (i < n - 1) + hold[i + 1] = normvect[1]; + } } @@ -1793,28 +1734,28 @@ /* so inverse is overwritten into A */ double triinvrt(A, n, hold) - double A[], hold[]; + double A [], hold[]; int n; { - int i, j, k; - double aii; + int i , j, k; + double aii; - for (i = 0; i < n; i++) - { - A[n * i + i] = 1.0 / A[n * i + i]; - aii = -A[n * i + i]; - if (i > 0) - { - for (j = 0; j < i; j++) - { - hold[j] = 0; - for (k = j; k < i; k++) - hold[j] += A[n * j + k] * A[n * k + i]; - A[n * j + i] = aii * hold[j]; - } - } - } + for (i = 0; i < n; i++) + { + A[n * i + i] = 1.0 / A[n * i + i]; + aii = -A[n * i + i]; + if (i > 0) + { + for (j = 0; j < i; j++) + { + hold[j] = 0; + for (k = j; k < i; k++) + hold[j] += A[n * j + k] * A[n * k + i]; + A[n * j + i] = aii * hold[j]; + } + } + } } @@ -1824,33 +1765,33 @@ /* so inverse is overwritten into A */ double triinvrtl(A, n, hold) - double A[], hold[]; + double A [], hold[]; int n; { - int i, j, k; - double aii, tol = 0.0000000000001; + int i , j, k; + double aii , tol = 0.0000000000001; - for (i = 0; i < n; i++) - { - if (A[n * i + i] < tol) - { - printf("Error 1 in triinvrtl \n"); - exit(1); - } - A[n * i + i] = 1.0 / A[n * i + i]; - aii = -A[n * i + i]; - if (i > 0) - { - for (j = 0; j < i; j++) - { - hold[j] = 0; - for (k = j; k < i; k++) - hold[j] += A[n * i + k] * A[n * k + j]; - A[n * i + j] = aii * hold[j]; - } - } - } + for (i = 0; i < n; i++) + { + if (A[n * i + i] < tol) + { + printf("Error 1 in triinvrtl \n"); + exit(1); + } + A[n * i + i] = 1.0 / A[n * i + i]; + aii = -A[n * i + i]; + if (i > 0) + { + for (j = 0; j < i; j++) + { + hold[j] = 0; + for (k = j; k < i; k++) + hold[j] += A[n * i + k] * A[n * k + j]; + A[n * i + j] = aii * hold[j]; + } + } + } } @@ -1862,48 +1803,48 @@ /* Devroye's c.d.f. inversion algorithm */ double truncnrm(mu, sigma, a, b) - double mu, sigma, a, b; + double mu , sigma, a, b; { - double normalphi(), normalz(), drand48(), u, g, h, x; + double normalphi(), normalz(), drand48(), u, g, h, x; - u = drand48(); - h = normalphi((a - mu) / sigma); - printf("%10.7f %10.7f\n", h, normalphi((b - mu) / sigma)); - g = h + u * (normalphi((b - mu) / sigma) - h); - x = mu + sigma * normalz(g); - return (x); + u = drand48(); + h = normalphi((a - mu) / sigma); + printf("%10.7f %10.7f\n", h, normalphi((b - mu) / sigma)); + g = h + u * (normalphi((b - mu) / sigma) - h); + x = mu + sigma * normalz(g); + return (x); } /* Generate a Student's t variate with d.f. = nu1 */ /* Ratio of uniforms algorithm */ - double tvar(nu1) + double tvar (nu1) int nu1; { - double a, bmin, bplus, u, u1, u2, x, v, bdiff; - double sqrt(), pow(), drand48(); - int done; - - a = 1.0; - bplus = sqrt((2.0 * nu1) / (nu1 - 1.0) * pow(((nu1 + 1.0) / (nu1 - 1.0)), - (-(nu1 + 1.0) / 2.0))); - bmin = -1.0 * bplus; - bdiff = bplus - bmin; - done = 0; - while (done == 0) - { - u1 = drand48(); - u2 = drand48(); - u = a * u1; - v = bmin + bdiff * u2; - x = v / u; - if (u <= pow((1.0 + x * x / nu1), -(nu1 + 1.0) / 4.0)) - done = 1; - } - return (x); + double a , bmin, bplus, u, u1, u2, x, v, bdiff; + double sqrt (), pow(), drand48(); + int done; + + a = 1.0; + bplus = sqrt((2.0 * nu1) / (nu1 - 1.0) * pow(((nu1 + 1.0) / (nu1 - 1.0)), + (-(nu1 + 1.0) / 2.0))); + bmin = -1.0 * bplus; + bdiff = bplus - bmin; + done = 0; + while (done == 0) + { + u1 = drand48(); + u2 = drand48(); + u = a * u1; + v = bmin + bdiff * u2; + x = v / u; + if (u <= pow((1.0 + x * x / nu1), -(nu1 + 1.0) / 4.0)) + done = 1; + } + return (x); } @@ -1911,11 +1852,11 @@ /* updtseed */ /* Update seed file */ /* **************** - * + * * updtseed() - * + * * { FILE *out ; int i ; unsigned short *seed48() ; - * + * * myptr = seed48(seed16v) ; out = * fopen("/space/kcowles/libraries/seedfile","w") ; for (i = 0; i < 3; i++) * fprintf( out, "%10u", *(myptr + i) ) ; fprintf( out, "\n" ) ; @@ -1925,17 +1866,17 @@ /* Multiply a column vector times a row vector to return a matrix */ void vectmult(colv, rowv, collen, rowlen, retmat) - double colv[], rowv[], retmat[]; + double colv [], rowv[], retmat[]; int collen, rowlen; { - int i, j; + int i , j; - for (i = 0; i < collen; i++) - { - for (j = 0; j < rowlen; j++) - retmat[rowlen * i + j] = colv[i] * rowv[j]; - } + for (i = 0; i < collen; i++) + { + for (j = 0; j < rowlen; j++) + retmat[rowlen * i + j] = colv[i] * rowv[j]; + } } /* Return the l-infinity norm of a vector */ @@ -1945,15 +1886,15 @@ int n; { - double norm = 0.0; - int i; + double norm = 0.0; + int i; - for (i = 0; i < n; i++) - { - if (dabs(marray[i]) > norm) - norm = dabs(marray[i]); - } - return (norm); + for (i = 0; i < n; i++) + { + if (dabs(marray[i]) > norm) + norm = dabs(marray[i]); + } + return (norm); } @@ -1963,16 +1904,16 @@ double lambda, p; { - double y, u, temp, drand48(), pow(), log(); - long i, j, k; + double y , u, temp, drand48(), pow(), log(); + long i , j, k; - u = drand48(); + u = drand48(); - temp = -log(1.0 - u); + temp = -log(1.0 - u); - y = pow(temp, (1.0 / p)) / lambda; + y = pow(temp, (1.0 / p)) / lambda; - return (y); + return (y); } @@ -1980,95 +1921,94 @@ /* Generate a random Wishart matrix -- Odell and Feiveson algorithm */ void wishart(parmmat, df, n, retmat) - int df, n; + int df , n; double parmmat[], retmat[]; { - unsigned int i, j, k, l, nsq; - double gammavar(), normvect[2]; - void normvar(), mattrans(), chol(), matmult(); - double sqrt(); - long p = n; - char *calloc(); - double *chisqs, *normals, *cholparm, *choltran, *temp1, *temp2, - *work; - long *jpvt; - char *malloc(); - - nsq = n * n; - chisqs = (double *) malloc(n * sizeof(double)); - normals = (double *) malloc(nsq * sizeof(double)); - cholparm = (double *) malloc(nsq * sizeof(double)); - choltran = (double *) malloc(nsq * sizeof(double)); - temp1 = (double *) malloc(nsq * sizeof(double)); - temp2 = (double *) malloc(nsq * sizeof(double)); - work = (double *) malloc(n * sizeof(double)); - jpvt = (long *) malloc(n * sizeof(double)); - - for (i = 0; i < (n); i++) - chisqs[i] = 0; - for (i = 0; i < (nsq); i++) - normals[i] = 0; - for (i = 0; i < (nsq); i++) - cholparm[i] = 0; - for (i = 0; i < (nsq); i++) - choltran[i] = 0; - for (i = 0; i < (nsq); i++) - temp1[i] = 0; - for (i = 0; i < (nsq); i++) - temp2[i] = 0; - for (i = 0; i < (n); i++) - work[i] = 0; - for (i = 0; i < (n); i++) - jpvt[i] = 0; - - for (i = 0; i < n; i++) - { - chisqs[i] = gammavar((double) (df - i) / 2.0, 0.5); - for (j = 0; j < n; j += 2) - { - normvar(normvect); - normals[i * n + j] = normvect[0]; - if (j < n - 1) - normals[i * n + j + 1] = normvect[1]; - } - } - temp1[0] = chisqs[0]; - for (j = 1; j < n; j++) - { - temp1[(n + 1) * j] = chisqs[j]; - for (i = 0; i < j; i++) - { - l = n * i + j; - temp1[(n + 1) * j] += (normals[l] * normals[l]); - if (i != j) - { - temp1[l] = normals[l] * sqrt(chisqs[i]); - for (k = 0; k < i; k++) - temp1[l] += (normals[n * k + i] * normals[n * k + j]); - temp1[n * j + i] = temp1[l]; - } - } - } - - /* cholparm will be lower triangular choleski decomp of parmmat */ - - chol(parmmat, cholparm, n); - - mattrans(cholparm, choltran, n, n); /* choltran = cholparm - * transposed */ - - matmult(cholparm, temp1, n, n, n, temp2); - matmult(temp2, choltran, n, n, n, retmat); - - free(chisqs); - free(normals); - free(cholparm); - free(choltran); - free(temp1); - free(temp2); - free(work); - free(jpvt); + unsigned int i , j, k, l, nsq; + double gammavar(), normvect[2]; + void normvar (), mattrans(), chol(), matmult(); + double sqrt (); + long p = n; + char *calloc(); + double *chisqs, *normals, *cholparm, *choltran, *temp1, *temp2, *work; + long *jpvt; + char *malloc(); + + nsq = n * n; + chisqs = (double *)malloc(n * sizeof(double)); + normals = (double *)malloc(nsq * sizeof(double)); + cholparm = (double *)malloc(nsq * sizeof(double)); + choltran = (double *)malloc(nsq * sizeof(double)); + temp1 = (double *)malloc(nsq * sizeof(double)); + temp2 = (double *)malloc(nsq * sizeof(double)); + work = (double *)malloc(n * sizeof(double)); + jpvt = (long *)malloc(n * sizeof(double)); + + for (i = 0; i < (n); i++) + chisqs[i] = 0; + for (i = 0; i < (nsq); i++) + normals[i] = 0; + for (i = 0; i < (nsq); i++) + cholparm[i] = 0; + for (i = 0; i < (nsq); i++) + choltran[i] = 0; + for (i = 0; i < (nsq); i++) + temp1[i] = 0; + for (i = 0; i < (nsq); i++) + temp2[i] = 0; + for (i = 0; i < (n); i++) + work[i] = 0; + for (i = 0; i < (n); i++) + jpvt[i] = 0; + + for (i = 0; i < n; i++) + { + chisqs[i] = gammavar((double)(df - i) / 2.0, 0.5); + for (j = 0; j < n; j += 2) + { + normvar(normvect); + normals[i * n + j] = normvect[0]; + if (j < n - 1) + normals[i * n + j + 1] = normvect[1]; + } + } + temp1[0] = chisqs[0]; + for (j = 1; j < n; j++) + { + temp1[(n + 1) * j] = chisqs[j]; + for (i = 0; i < j; i++) + { + l = n * i + j; + temp1[(n + 1) * j] += (normals[l] * normals[l]); + if (i != j) + { + temp1[l] = normals[l] * sqrt(chisqs[i]); + for (k = 0; k < i; k++) + temp1[l] += (normals[n * k + i] * normals[n * k + j]); + temp1[n * j + i] = temp1[l]; + } + } + } + + /* cholparm will be lower triangular choleski decomp of parmmat */ + + chol(parmmat, cholparm, n); + + mattrans(cholparm, choltran, n, n); /* choltran = cholparm + * transposed */ + + matmult(cholparm, temp1, n, n, n, temp2); + matmult(temp2, choltran, n, n, n, retmat); + + free(chisqs); + free(normals); + free(cholparm); + free(choltran); + free(temp1); + free(temp2); + free(work); + free(jpvt); } @@ -2079,27 +2019,27 @@ double parmmat[], retmat[]; { - double gammavar(), normvect[2]; - void normvar(), mattrans(), chol(), matmult(); - double sqrt(), chisqs[2], normals, cholparm[2][2], choltran[2][2]; - double temp1[2][2], temp2[2][2]; - long i, j, k; - - - for (i = 0; i < 2; i++) - chisqs[i] = gammavar((double) (df - i) / 2.0, 0.5); - - normvar(normvect); - normals = normvect[0]; - - temp1[0][0] = chisqs[0]; - temp1[0][1] = temp1[1][0] = normals * sqrt(chisqs[0]); - temp1[1][1] = chisqs[1] + normals * normals; - - chol(parmmat, cholparm, 2); - mattrans(cholparm, choltran, 2, 2); - matmult(cholparm, temp1, 2, 2, 2, temp2); - matmult(temp2, choltran, 2, 2, 2, retmat); + double gammavar(), normvect[2]; + void normvar (), mattrans(), chol(), matmult(); + double sqrt (), chisqs[2], normals, cholparm[2][2], choltran[2][2]; + double temp1 [2][2], temp2[2][2]; + long i , j, k; + + + for (i = 0; i < 2; i++) + chisqs[i] = gammavar((double)(df - i) / 2.0, 0.5); + + normvar(normvect); + normals = normvect[0]; + + temp1[0][0] = chisqs[0]; + temp1[0][1] = temp1[1][0] = normals * sqrt(chisqs[0]); + temp1[1][1] = chisqs[1] + normals * normals; + + chol(parmmat, cholparm, 2); + mattrans(cholparm, choltran, 2, 2); + matmult(cholparm, temp1, 2, 2, 2, temp2); + matmult(temp2, choltran, 2, 2, 2, retmat); } @@ -2107,29 +2047,29 @@ double parmmat[], retmat[], df; { - double gammavar(), normvect[2]; - void normvar(), mattrans(), chol(), matmult(); - double sqrt(), chisqs[2], normals, cholparm[2][2], choltran[2][2]; - double temp1[2][2], temp2[2][2]; - long i, j, k; - - - for (i = 0; i < 2; i++) - { - chisqs[i] = gammavar((df - (double) i) / 2.0, 0.5); - printf("chisqs[%d] %7.4f \n", i, chisqs[i]); - } - - normvar(normvect); - normals = normvect[0]; - - temp1[0][0] = chisqs[0]; - temp1[0][1] = temp1[1][0] = normals * sqrt(chisqs[0]); - temp1[1][1] = chisqs[1] + normals * normals; - - chol(parmmat, cholparm, 2); - mattrans(cholparm, choltran, 2, 2); - matmult(cholparm, temp1, 2, 2, 2, temp2); - matmult(temp2, choltran, 2, 2, 2, retmat); + double gammavar(), normvect[2]; + void normvar (), mattrans(), chol(), matmult(); + double sqrt (), chisqs[2], normals, cholparm[2][2], choltran[2][2]; + double temp1 [2][2], temp2[2][2]; + long i , j, k; + + + for (i = 0; i < 2; i++) + { + chisqs[i] = gammavar((df - (double)i) / 2.0, 0.5); + printf("chisqs[%d] %7.4f \n", i, chisqs[i]); + } + + normvar(normvect); + normals = normvect[0]; + + temp1[0][0] = chisqs[0]; + temp1[0][1] = temp1[1][0] = normals * sqrt(chisqs[0]); + temp1[1][1] = chisqs[1] + normals * normals; + + chol(parmmat, cholparm, 2); + mattrans(cholparm, choltran, 2, 2); + matmult(cholparm, temp1, 2, 2, 2, temp2); + matmult(temp2, choltran, 2, 2, 2, retmat); } diff -Nru theseus-2.0.6/statken_old.c theseus-3.0.0/statken_old.c --- theseus-2.0.6/statken_old.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/statken_old.c 2014-05-13 16:48:52.000000000 +0000 @@ -121,28 +121,28 @@ while ((I > 0) ) { - FD = f((A[I] + 0.5 * H[I])); - FE = f((A[I] + 1.5 * H[I])); + FD = f((A[I] + 0.5 * H[I])); + FE = f((A[I] + 1.5 * H[I])); - S1 = H[I] * (FA[I] + 4.0 * FD + FC[I]) / 6.0; - S2 = H[I] * (FC[I] + 4.0 * FE + FB[I]) / 6.0; + S1 = H[I] * (FA[I] + 4.0 * FD + FC[I]) / 6.0; + S2 = H[I] * (FC[I] + 4.0 * FE + FB[I]) / 6.0; - V[0] = A[I]; - V[1] = FA[I]; - V[2] = FC[I]; - V[3] = FB[I]; - V[4] = H[I]; - V[5] = TOL[I]; - V[6] = S[I]; - LEV = L[I]; - - I--; /* line 100 */ - + V[0] = A[I]; + V[1] = FA[I]; + V[2] = FC[I]; + V[3] = FB[I]; + V[4] = H[I]; + V[5] = TOL[I]; + V[6] = S[I]; + LEV = L[I]; + + I--; /* line 100 */ + /* printf("S0 %7.4f S1 %7.4f S2 %7.4f \n", V[6], S1, S2) ; */ - if (dabs(S1 + S2 - V[6]) < V[5]) + if (dabs(S1 + S2 - V[6]) < V[5]) { - APP = APP + (S1 + S2); + APP = APP + (S1 + S2); /* printf("app %7.4f \n", APP) ; */ } else @@ -155,29 +155,29 @@ else { I++ ; - A[I] = V[0] + V[4]; - FA[I] = V[2]; - FC[I] = FE; - FB[I] = V[3]; - H[I] = 0.5 * V[4]; - TOL[I] = 0.5 * V[5]; - S[I] = S2; - L[I] = LEV + 1; - - I++; - A[I] = V[0]; - FA[I] = V[1]; - FC[I] = FD; - FB[I] = V[2]; - H[I] = H[I-1]; - TOL[I] = TOL[I-1]; - S[I] = S1; - L[I] = L[I-1]; + A[I] = V[0] + V[4]; + FA[I] = V[2]; + FC[I] = FE; + FB[I] = V[3]; + H[I] = 0.5 * V[4]; + TOL[I] = 0.5 * V[5]; + S[I] = S2; + L[I] = LEV + 1; + + I++; + A[I] = V[0]; + FA[I] = V[1]; + FC[I] = FD; + FB[I] = V[2]; + H[I] = H[I-1]; + TOL[I] = TOL[I-1]; + S[I] = S1; + L[I] = L[I-1]; /* printf("level %d \n", L[I]) ; */ } - } + } /* printf("i %d\n", I) ; */ - } + } /* printf("just before return APP %7.4f\n", APP) ; */ return(APP); @@ -695,7 +695,7 @@ if (alpha < 0.0 || beta < 0.0) { printf("negative parm for gamma\n") ; - exit(1); + exit(1); } E = exp(1.0) ; if (alpha <1.0) /* Ahrens and Dieter */ @@ -1254,7 +1254,7 @@ { for (j = 0; j < n2; j++) { -/* printf("%g\n",mat1[n1 * i +j]); */ +/* printf("%g\n",mat1[n1 * i +j]); */ retmat[n2 * i + j] = 0.0 ; for (k = 0; k < n1; k++) retmat[n2 * i + j] += mat1[n1 * i + k] * mat2[n2 * k + j]; @@ -1269,13 +1269,13 @@ int m1, n1, n2 ; { int i, j, k ; - for (i=0; i #ifdef _WIN32 - /* regular */ - const char tc_black[] = ""; - const char tc_red[] = ""; - const char tc_green[] = ""; - const char tc_yellow[] = ""; - const char tc_blue[] = ""; - const char tc_purple[] = ""; - const char tc_cyan[] = ""; - const char tc_white[] = ""; - - /* bold (xterm) or light (console) */ - const char tc_BLACK[] = ""; - const char tc_RED[] = ""; - const char tc_GREEN[] = ""; - const char tc_YELLOW[] = ""; - const char tc_BLUE[] = ""; - const char tc_PURPLE[] = ""; - const char tc_CYAN[] = ""; - const char tc_WHITE[] = ""; - - /* underline */ - const char tc_ublack[] = ""; - const char tc_ured[] = ""; - const char tc_ugreen[] = ""; - const char tc_uyellow[] = ""; - const char tc_ublue[] = ""; - const char tc_upurple[] = ""; - const char tc_ucyan[] = ""; - const char tc_uwhite[] = ""; - - /* blink */ - const char tc_bblack[] = ""; - const char tc_bred[] = ""; - const char tc_bgreen[] = ""; - const char tc_byellow[] = ""; - const char tc_bblue[] = ""; - const char tc_bpurple[] = ""; - const char tc_bcyan[] = ""; - const char tc_bwhite[] = ""; - - /* inverse */ - const char tc_iblack[] = ""; - const char tc_ired[] = ""; - const char tc_igreen[] = ""; - const char tc_iyellow[] = ""; - const char tc_iblue[] = ""; - const char tc_ipurple[] = ""; - const char tc_icyan[] = ""; - const char tc_iwhite[] = ""; - - /* concealed */ - const char tc_cblack[] = ""; - const char tc_cred[] = ""; - const char tc_cgreen[] = ""; - const char tc_cyellow[] = ""; - const char tc_cblue[] = ""; - const char tc_cpurple[] = ""; - const char tc_ccyan[] = ""; - const char tc_cwhite[] = ""; - - const char tc_NC[] = ""; + /* regular */ + const char tc_black[] = ""; + const char tc_red[] = ""; + const char tc_green[] = ""; + const char tc_yellow[] = ""; + const char tc_blue[] = ""; + const char tc_purple[] = ""; + const char tc_cyan[] = ""; + const char tc_white[] = ""; + + /* bold (xterm) or light (console) */ + const char tc_BLACK[] = ""; + const char tc_RED[] = ""; + const char tc_GREEN[] = ""; + const char tc_YELLOW[] = ""; + const char tc_BLUE[] = ""; + const char tc_PURPLE[] = ""; + const char tc_CYAN[] = ""; + const char tc_WHITE[] = ""; + + /* underline */ + const char tc_ublack[] = ""; + const char tc_ured[] = ""; + const char tc_ugreen[] = ""; + const char tc_uyellow[] = ""; + const char tc_ublue[] = ""; + const char tc_upurple[] = ""; + const char tc_ucyan[] = ""; + const char tc_uwhite[] = ""; + + /* blink */ + const char tc_bblack[] = ""; + const char tc_bred[] = ""; + const char tc_bgreen[] = ""; + const char tc_byellow[] = ""; + const char tc_bblue[] = ""; + const char tc_bpurple[] = ""; + const char tc_bcyan[] = ""; + const char tc_bwhite[] = ""; + + /* inverse */ + const char tc_iblack[] = ""; + const char tc_ired[] = ""; + const char tc_igreen[] = ""; + const char tc_iyellow[] = ""; + const char tc_iblue[] = ""; + const char tc_ipurple[] = ""; + const char tc_icyan[] = ""; + const char tc_iwhite[] = ""; + + /* concealed */ + const char tc_cblack[] = ""; + const char tc_cred[] = ""; + const char tc_cgreen[] = ""; + const char tc_cyellow[] = ""; + const char tc_cblue[] = ""; + const char tc_cpurple[] = ""; + const char tc_ccyan[] = ""; + const char tc_cwhite[] = ""; + + const char tc_NC[] = ""; #else - /* regular */ - const char tc_black[] = "\033[0;30m"; - const char tc_red[] = "\033[0;31m"; - const char tc_green[] = "\033[0;32m"; - const char tc_yellow[] = "\033[0;33m"; - const char tc_blue[] = "\033[0;34m"; - const char tc_purple[] = "\033[0;35m"; - const char tc_cyan[] = "\033[0;36m"; - const char tc_white[] = "\033[0;37m"; - - /* bold (xterm) or light (console) */ - const char tc_BLACK[] = "\033[1;30m"; - const char tc_RED[] = "\033[1;31m"; - const char tc_GREEN[] = "\033[1;32m"; - const char tc_YELLOW[] = "\033[1;33m"; - const char tc_BLUE[] = "\033[1;34m"; - const char tc_PURPLE[] = "\033[1;35m"; - const char tc_CYAN[] = "\033[1;36m"; - const char tc_WHITE[] = "\033[1;37m"; - - /* underline */ - const char tc_ublack[] = "\033[4;30m"; - const char tc_ured[] = "\033[4;31m"; - const char tc_ugreen[] = "\033[4;32m"; - const char tc_uyellow[] = "\033[4;33m"; - const char tc_ublue[] = "\033[4;34m"; - const char tc_upurple[] = "\033[4;35m"; - const char tc_ucyan[] = "\033[4;36m"; - const char tc_uwhite[] = "\033[4;37m"; - - /* blink */ - const char tc_bblack[] = "\033[5;30m"; - const char tc_bred[] = "\033[5;31m"; - const char tc_bgreen[] = "\033[5;32m"; - const char tc_byellow[] = "\033[5;33m"; - const char tc_bblue[] = "\033[5;34m"; - const char tc_bpurple[] = "\033[5;35m"; - const char tc_bcyan[] = "\033[5;36m"; - const char tc_bwhite[] = "\033[5;37m"; - - /* inverse */ - const char tc_iblack[] = "\033[7;30m"; - const char tc_ired[] = "\033[7;31m"; - const char tc_igreen[] = "\033[7;32m"; - const char tc_iyellow[] = "\033[7;33m"; - const char tc_iblue[] = "\033[7;34m"; - const char tc_ipurple[] = "\033[7;35m"; - const char tc_icyan[] = "\033[7;36m"; - const char tc_iwhite[] = "\033[7;37m"; - - /* concealed */ - const char tc_cblack[] = "\033[8;30m"; - const char tc_cred[] = "\033[8;31m"; - const char tc_cgreen[] = "\033[8;32m"; - const char tc_cyellow[] = "\033[8;33m"; - const char tc_cblue[] = "\033[8;34m"; - const char tc_cpurple[] = "\033[8;35m"; - const char tc_ccyan[] = "\033[8;36m"; - const char tc_cwhite[] = "\033[8;37m"; - - const char tc_NC[] = "\033[0m"; + /* regular */ + const char tc_black[] = "\033[0;30m"; + const char tc_red[] = "\033[0;31m"; + const char tc_green[] = "\033[0;32m"; + const char tc_yellow[] = "\033[0;33m"; + const char tc_blue[] = "\033[0;34m"; + const char tc_purple[] = "\033[0;35m"; + const char tc_cyan[] = "\033[0;36m"; + const char tc_white[] = "\033[0;37m"; + + /* bold (xterm) or light (console) */ + const char tc_BLACK[] = "\033[1;30m"; + const char tc_RED[] = "\033[1;31m"; + const char tc_GREEN[] = "\033[1;32m"; + const char tc_YELLOW[] = "\033[1;33m"; + const char tc_BLUE[] = "\033[1;34m"; + const char tc_PURPLE[] = "\033[1;35m"; + const char tc_CYAN[] = "\033[1;36m"; + const char tc_WHITE[] = "\033[1;37m"; + + /* underline */ + const char tc_ublack[] = "\033[4;30m"; + const char tc_ured[] = "\033[4;31m"; + const char tc_ugreen[] = "\033[4;32m"; + const char tc_uyellow[] = "\033[4;33m"; + const char tc_ublue[] = "\033[4;34m"; + const char tc_upurple[] = "\033[4;35m"; + const char tc_ucyan[] = "\033[4;36m"; + const char tc_uwhite[] = "\033[4;37m"; + + /* blink */ + const char tc_bblack[] = "\033[5;30m"; + const char tc_bred[] = "\033[5;31m"; + const char tc_bgreen[] = "\033[5;32m"; + const char tc_byellow[] = "\033[5;33m"; + const char tc_bblue[] = "\033[5;34m"; + const char tc_bpurple[] = "\033[5;35m"; + const char tc_bcyan[] = "\033[5;36m"; + const char tc_bwhite[] = "\033[5;37m"; + + /* inverse */ + const char tc_iblack[] = "\033[7;30m"; + const char tc_ired[] = "\033[7;31m"; + const char tc_igreen[] = "\033[7;32m"; + const char tc_iyellow[] = "\033[7;33m"; + const char tc_iblue[] = "\033[7;34m"; + const char tc_ipurple[] = "\033[7;35m"; + const char tc_icyan[] = "\033[7;36m"; + const char tc_iwhite[] = "\033[7;37m"; + + /* concealed */ + const char tc_cblack[] = "\033[8;30m"; + const char tc_cred[] = "\033[8;31m"; + const char tc_cgreen[] = "\033[8;32m"; + const char tc_cyellow[] = "\033[8;33m"; + const char tc_cblue[] = "\033[8;34m"; + const char tc_cpurple[] = "\033[8;35m"; + const char tc_ccyan[] = "\033[8;36m"; + const char tc_cwhite[] = "\033[8;37m"; + + const char tc_NC[] = "\033[0m"; #endif diff -Nru theseus-2.0.6/termcol.h theseus-3.0.0/termcol.h --- theseus-2.0.6/termcol.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/termcol.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/._theseus.1 and /tmp/g2bOMTRwaC/theseus-3.0.0/._theseus.1 differ diff -Nru theseus-2.0.6/theseus.1 theseus-3.0.0/theseus.1 --- theseus-2.0.6/theseus.1 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/theseus.1 2014-05-13 16:48:52.000000000 +0000 @@ -1,6 +1,6 @@ .\" use 'man groff_man' to see the man page format macros .\" ---------------------------------------------------------------------------- -.TH THESEUS 1 "11 October 2012" "Brandeis University" "Likelihood Rocks" +.TH THESEUS 1 "13 May 2014" "Brandeis University" "Likelihood (and Bayes) Rocks" .\" ---------------------------------------------------------------------------- .SH NAME .\" ---- @@ -32,7 +32,7 @@ .\" ----------- .\" .B Theseus -superpositions a set of macromolecular structures simultaneously using the +superposes a set of macromolecular structures simultaneously using the method of maximum likelihood (ML), rather than the conventional least-squares criterion. .B Theseus diff -Nru theseus-2.0.6/theseus.c theseus-3.0.0/theseus.c --- theseus-2.0.6/theseus.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/theseus.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,91 +1,74 @@ /******************************************************************* - * -/_|:|_|_\- - * - * File: theseus.c - * - * Function: THESEUS: Maximum likelihood superpositioning of - * multiple macromolecular structures - * - * Author(s): Douglas L. Theobald - * Biochemistry Department - * Brandeis University - * MS 009 - * 415 South St - * Waltham, MA 02454-9110 - * - * dtheobald@gmail.com - * dtheobald@brandeis.edu - * - * Copyright: Copyright (c) 2004-2013 Douglas L. Theobald - * - * THESEUS is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * THESEUS is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with THESEUS in the file 'COPYING'; if not, write - * to the: - * - * Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, - * Boston, MA 02111-1307 USA - * - * Source: started anew. - * - * Notes: - * - * Change History: - * 9/6/04 3:24 PM Started source - * - ******************************************************************/ +* -/_|:|_|_\- +* +* File: theseus.c +* +* Function: THESEUS: Maximum likelihood superpositioning of +* multiple macromolecular structures +* +* Author(s): Douglas L. Theobald +* Biochemistry Department +* Brandeis University +* MS 009 +* 415 South St +* Waltham, MA 02454-9110 +* +* dtheobald@gmail.com +* dtheobald@brandeis.edu +* +* Copyright: Copyright (c) 2004-2014 Douglas L. Theobald +* +* THESEUS is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of +* the License, or (at your option) any later version. +* +* THESEUS is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public +* License along with THESEUS in the file 'COPYING'; if not, write +* to the: +* +* Free Software Foundation, Inc., +* 59 Temple Place, Suite 330, +* Boston, MA 02111-1307 USA +* +* Source: started anew. +* +* Notes: +* +* Change History: +* 9/6/04 3:24 PM Started source +* +* -/_|:|_|_\- +* +******************************************************************/ #include "theseus.h" #include "theseuslib.h" -const gsl_rng_type *T; -gsl_rng *r2; -#if 0 //defined(__APPLE__) - #include - #include - #include - #include - #include - #include - - static double start_time, end_time; -#endif - -/* global declarations (necessary for leave(), I think) */ -CdsArray *baseA = NULL; /* main array of selected pdb cds, never modified after read */ -PDBCdsArray *pdbA = NULL; /* array holding all of the pdb file coordinate info, - much of it unused in the actual calculations */ - -static void +void leave(int sig); -int -ConvertDryden(char *fp_name, const int dim, const int forms, const int lmarks); - -int -ConvertLele_freeform(char *fp_name, const int dim, const int forms, const int lmarks); - -int -ConvertLele(char *fp_name, const int dim, const int forms, const int lmarks); +/* global declarations (necessary for leave(), I think) */ +const gsl_rng_type *T = NULL; +gsl_rng *r2 = NULL; -void -WriteLeleModelFile(PDBCdsArray *pdbAr); +CdsArray *cdsA = NULL; /* main array of selected pdb cds, never modified */ +PDBCdsArray *pdbA = NULL; /* pdb file coordinate info, much of it unused */ +Algorithm *algo = NULL; +Statistics *stats = NULL; +/* void testcrush(CdsArray *cdsA) { int i, j, m, n; + double lndistsqr; for (i = 0; i < cdsA->cnum; ++i) { @@ -93,25 +76,148 @@ { for (m = 0; m < cdsA->cds[i]->vlen; ++m) for (n = 0; n < cdsA->cds[j]->vlen; ++n) - log(SqrCdsDist(cdsA->cds[i], m, cdsA->cds[j], n)); + lndistsqr = log(SqrCdsDist(cdsA->cds[i], m, cdsA->cds[j], n)); } } } +*/ + + +static void +RotPrincAxes(CdsArray *cdsA) +{ + int i; +// double **x90z90 = MatAlloc(3,3); + /* double x90[3][3] = {{ 1, 0, 0}, { 0, 0, 1}, { 0,-1, 0}}; */ + /* double z90[3][3] = {{ 0, 1, 0}, {-1, 0, 0}, { 0, 0, 1}}; */ + /* double x90z90[3][3] = {{ 0, 1, 0}, { 0, 0, 1}, { 1, 0, 0}}; */ + + /* this orients the least -> most variable axes along x, y, z respectively (??) */ + CalcCdsPrincAxes(cdsA->avecds, cdsA->avecds->matrix, cdsA->tmpmat3a, cdsA->tmpmat3b, cdsA->tmpvec3a, cdsA->w); + +// memset(&x90z90[0][0], 0, 9 * sizeof(double)); +// x90z90[0][1] = x90z90[1][2] = x90z90[2][0] = 1.0; +// +// /* Rotate the family 90deg along x and then along z. +// This puts the most variable axis horizontal, the second most variable +// axis vertical, and the least variable in/out of screen. */ +// Mat3MultIp(cdsA->avecds->matrix, (const double **) x90z90); + + for (i = 0; i < cdsA->cnum; ++i) + Mat3MultIp(cdsA->cds[i]->matrix, (const double **) cdsA->avecds->matrix); + +// MatDestroy(&x90z90); +} + + +static double +SuperPoseCds(double **c1, double **c2, const int *nu, const int vlen, + double **rotmat, double *trans, + double *norm1, double *norm2, double *innprod) +{ + double **tmpmat1 = MatAlloc(3, 3); + double **tmpmat2 = MatAlloc(3, 3); + double **tmpmat3 = MatAlloc(3, 3); + double *tmpvec = malloc(3 * sizeof(double)); + double *newtrans = malloc(3 * sizeof(double)); + double *cen1 = calloc(3, sizeof(double)); + double *cen2 = calloc(3, sizeof(double)); + double sumdev; + int i; + + CenMassNuVec((const double **) c1, nu, cen1, vlen); + CenMassNuVec((const double **) c2, nu, cen2, vlen); + + NegTransCdsIp(c1, cen1, vlen); + NegTransCdsIp(c2, cen2, vlen); + + sumdev = ProcGSLSVDvanNu2((const double **) c1, (const double **) c2, nu, + vlen, rotmat, + tmpmat1, tmpmat2, tmpmat3, tmpvec, + norm1, norm2, innprod); + + TransCdsIp(c1, cen1, vlen); + TransCdsIp(c2, cen2, vlen); + + InvRotVec(newtrans, cen2, rotmat); + + for (i = 0; i < 3; ++i) + trans[i] = newtrans[i] - cen1[i]; + + MatDestroy(&tmpmat1); + MatDestroy(&tmpmat2); + MatDestroy(&tmpmat3); + free(tmpvec); + free(newtrans); + free(cen1); + free(cen2); + + return(sumdev); +} + + +static double +SuperPose2Anchor(CdsArray *cdsA, char *anchorf_name) +{ + double **anchormat = MatAlloc(3, 3); + double *anchortrans = malloc(3 * sizeof(double)); + double *tmpanchortrans = malloc(3 * sizeof(double)); + double *trans = malloc(3 * sizeof(double)); + double norm1, norm2, innprod, sumdev; + const int cnum = cdsA->cnum; + int i, j, anchor = 0; + + for (i = 0; i < cnum; ++i) + { + if (strncmp(anchorf_name, cdsA->cds[i]->filename, FILENAME_MAX - 1) == 0) + { + anchor = i; + break; + } + } + + sumdev = SuperPoseCds(cdsA->cds[anchor]->wc, cdsA->cds[anchor]->sc, + cdsA->cds[anchor]->nu, cdsA->vlen, + anchormat, anchortrans, + &norm1, &norm2, &innprod); + + for (i = 0; i < cnum; ++i) + { + InvRotVec(tmpanchortrans, anchortrans, cdsA->cds[i]->matrix); + + for (j = 0; j < 3; ++j) + cdsA->cds[i]->center[j] = cdsA->cds[i]->translation[j] = + cdsA->cds[i]->center[j] - tmpanchortrans[j]; + + Mat3MultIp(cdsA->cds[i]->matrix, (const double **) anchormat); + } + + for (j = 0; j < 3; ++j) + cdsA->avecds->center[j] = cdsA->avecds->translation[j] = + anchortrans[j]; + + Mat3Cpy(cdsA->avecds->matrix, (const double **) anchormat); + + free(trans); + free(anchortrans); + free(tmpanchortrans); + MatDestroy(&anchormat); + + return(sumdev); +} int main(int argc, char *argv[]) { int i = 0, j; - - Algorithm *algo = NULL; - Statistics *stats = NULL; char *sup_name = NULL, *ave_name = NULL, *transf_name = NULL, - *rand_transf_name = NULL, *mean_ip_name = NULL, *sup_var_name = NULL; - //#if !defined(__APPLE__) - clock_t start_time, end_time; - //#endif - //unsigned long seed = (unsigned long) time(NULL); + *rand_transf_name = NULL, *mean_ip_name = NULL, *sup_var_name = NULL, + *tps_sup_name = NULL, *tps_ave_name = NULL; + + int cnum; + + clock_t start_time, end_time; signal(SIGINT, leave); signal(SIGABRT, leave); @@ -127,10 +233,11 @@ // printf("\n WARNING2: could not set core limit size to 0. \n\n"); //#endif - baseA = CdsArrayInit(); - algo = baseA->algo; - stats = baseA->stats; - ParseCmdLine(argc, argv, baseA); + cdsA = CdsArrayInit(); + algo = AlgorithmInit(); + stats = StatsInit(); + + ParseCmdLine(argc, argv, cdsA); algo->infiles = &argv[optind]; algo->filenum = argc - optind; @@ -151,25 +258,30 @@ if (algo->convlele > 0) { - //ConvertDryden(algo->infiles[0], 2, algo->iterations /* # coordinates/forms */, algo->landmarks /* # of landmarks */); - ConvertLele_freeform(algo->infiles[0], 3, algo->iterations /* # coordinates/forms */, algo->landmarks /* # of landmarks */); +// ConvertDryden(algo->infiles[0], 2, +// algo->iterations, /* # coordinates/forms */ +// algo->landmarks /* # of landmarks */); + ConvertLele_freeform(algo->infiles[0], 3, + algo->iterations, /* # coordinates/forms */ + algo->landmarks /* # of landmarks */); PrintTheseusTag(); exit(EXIT_SUCCESS); } - if (/* algo->random > */ 0) /* for benchmarking, testing ML algorithm */ + // for benchmarking, testing ML algorithm + if (/* algo->random > */ 0) { - gsl_rng_env_setup(); - gsl_rng_default_seed = time(NULL); - T = gsl_rng_ranlxs2; - r2 = gsl_rng_alloc(T); + gsl_rng_env_setup(); + gsl_rng_default_seed = time(NULL) + getpid() + clock(); + T = gsl_rng_ranlxs2; + r2 = gsl_rng_alloc(T); if (algo->random != '0') { strcat(algo->rootname, "_rand"); //printf("\nfmodel\n = %d", algo->fmodel); pdbA = GetPDBCds(algo->infiles, algo->filenum, algo->fmodel, algo->amber, algo->atom_names); -// if (algo->fmodel == 1) +// if (algo->fmodel) // { // pdbA = MakeRandPDBCds(algo->iterations, /* # coordinates/forms */ // algo->landmarks, /* # of landmarks */ @@ -182,24 +294,26 @@ pdbA->vlen = pdbA->cds[0]->vlen; PDBCdsArrayAllocLen(pdbA, pdbA->vlen); - GetCdsSelection(baseA, pdbA); + cnum = pdbA->cnum; + GetCdsSelection(cdsA, pdbA); + //printf("HERE2\n\n"); fflush(NULL); - RandCds_2sdf(baseA, r2); - memcpy(baseA->avecds->resSeq, baseA->cds[0]->resSeq, baseA->vlen * sizeof(int)); - memcpy(baseA->avecds->chainID, baseA->cds[0]->chainID, baseA->vlen * sizeof(char)); - memcpy(baseA->avecds->resName_space, baseA->cds[0]->resName_space, baseA->vlen * 4 * sizeof(char)); - AveCds(baseA); + RandCds_2sdf(cdsA, r2); + memcpy(cdsA->avecds->resSeq, cdsA->cds[0]->resSeq, cdsA->vlen * sizeof(int)); + memcpy(cdsA->avecds->chainID, cdsA->cds[0]->chainID, cdsA->vlen * sizeof(char)); + memcpy(cdsA->avecds->resName_space, cdsA->cds[0]->resName_space, cdsA->vlen * 4 * sizeof(char)); + AveCds(cdsA); - for (i = 0; i < baseA->cnum; ++i) - CopyCds2PDB(pdbA->cds[i], baseA->cds[i]); + for (i = 0; i < cnum; ++i) + CopyCds2PDB(pdbA->cds[i], cdsA->cds[i]); printf(" Writing CA coordinates of random structures ... \n"); fflush(NULL); sup_name = mystrcat(algo->rootname, "_sup.pdb"); WriteTheseusModelFile(pdbA, algo, stats, sup_name); - CopyCds2PDB(pdbA->avecds, baseA->avecds); + CopyCds2PDB(pdbA->avecds, cdsA->avecds); printf(" Writing CA coordinates of average of random structures ... \n"); fflush(NULL); @@ -209,26 +323,26 @@ printf(" Calculating statistics of random structures ... \n"); fflush(NULL); - CalcPreStats(baseA); - PrintSuperposStats(baseA); + CalcPreStats(cdsA); + PrintSuperposStats(cdsA); } - RandRotCdsArray(baseA, r2); - /* RandTransCdsArray(baseA, 0.004); */ + RandRotCdsArray(cdsA, r2); + RandTransCdsArray(cdsA, 0.004, r2); printf(" Writing CA coordinates of transformed random structures ... \n"); fflush(NULL); - for (i = 0; i < baseA->cnum; ++i) - CopyCds2PDB(pdbA->cds[i], baseA->cds[i]); + for (i = 0; i < cnum; ++i) + CopyCds2PDB(pdbA->cds[i], cdsA->cds[i]); transf_name = mystrcat(algo->rootname, "_transf.pdb"); WriteTheseusModelFile(pdbA, algo, stats, transf_name); - WriteLeleModelFile(pdbA); + // WriteLeleModelFile(pdbA); PrintTheseusTag(); -/* CdsArrayDestroy(&baseA); */ -/* PDBCdsArrayDestroy(&pdbA); */ +// CdsArrayDestroy(&cdsA); +// PDBCdsArrayDestroy(&pdbA); exit(EXIT_SUCCESS); } @@ -241,9 +355,19 @@ pdbA = ReadBinPDBCdsArray(algo->infiles[0]); } + else if (algo->morphfile) + { + if (algo->filenum) + printf(" Reading tps file ... \n"); + else + printf(" Reading %d tps files ... \n", algo->filenum); + fflush(NULL); + + pdbA = GetTPSCds(algo->infiles, algo->filenum); + } else { - if (algo->filenum == 1) + if (algo->filenum) printf(" Reading pdb file ... \n"); else printf(" Reading %d pdb files ... \n", algo->filenum); @@ -253,31 +377,32 @@ /* PrintPDBCds(stdout, pdbA->cds[0]); */ } - if (algo->fasta == 1) + cnum = pdbA->cnum; + if (algo->fasta) { - if (pdbA->cnum < 1) + if (cnum < 1) { printf("\n -> Found no PDB cds. Could not determine a sequence. <- \n"); Usage(0); exit(EXIT_FAILURE); } - printf(" Writing FASTA format .fst files (%d) ... \n", pdbA->cnum); + printf(" Writing FASTA format .fst files (%d) ... \n", cnum); pdb2fst(pdbA); PrintTheseusTag(); exit(EXIT_SUCCESS); } - if (pdbA->cnum < 2) + if (cnum < 2) { printf("\n -> Found less than two PDB cds. Could not do superposition. <- \n"); Usage(0); exit(EXIT_FAILURE); } - printf(" Successfully read %d models and/or structures \n", pdbA->cnum); + printf(" Successfully read %d models and/or structures \n", cnum); - if (algo->binary == 1) + if (algo->binary) { printf(" Writing binary coordinates file ... \n"); fflush(NULL); @@ -286,13 +411,13 @@ PrintTheseusTag(); exit(EXIT_SUCCESS); } - else if (algo->alignment == 1) + else if (algo->alignment) { printf(" Reading multiple sequence alignment ... \n"); fflush(NULL); - Align2MSA(pdbA, baseA, baseA->msafile_name, baseA->mapfile_name); - PDBCdsArrayAllocLen(pdbA, baseA->vlen); + Align2MSA(pdbA, cdsA, cdsA->msafile_name, cdsA->mapfile_name); + PDBCdsArrayAllocLen(pdbA, cdsA->vlen); } else { @@ -301,39 +426,35 @@ pdbA->vlen = NMRCheckPDBCdsArray(pdbA); PDBCdsArrayAllocLen(pdbA, pdbA->vlen); - GetCdsSelection(baseA, pdbA); + GetCdsSelection(cdsA, pdbA); } - baseA->pdbA = pdbA; - pdbA->cdsA = baseA; + cdsA->pdbA = pdbA; + pdbA->cdsA = cdsA; if (algo->scalefactor > 1.0 || algo->scalefactor < 1.0) { - for (i = 0; i < baseA->cnum; ++i) - ScaleCds(baseA->cds[i], algo->scalefactor); + for (i = 0; i < cnum; ++i) + ScaleCds(cdsA->cds[i], algo->scalefactor); } - /* CalcPreStats(baseA); */ - - #if 0 // defined(__APPLE__) - start_time = seconds(); - #else - start_time = clock(); - #endif + /* CalcPreStats(cdsA); */ if (algo->random == '0') { - gsl_rng_env_setup(); - gsl_rng_default_seed = time(NULL); - T = gsl_rng_ranlxs2; - r2 = gsl_rng_alloc(T); + gsl_rng_env_setup(); + gsl_rng_default_seed = time(NULL) + getpid() + clock(); + T = gsl_rng_ranlxs2; + r2 = gsl_rng_alloc(T); - RandRotCdsArray(baseA, r2); - RandTransCdsArray(baseA, 30, r2); + RandRotCdsArray(cdsA, r2); + RandTransCdsArray(cdsA, 300, r2); printf(" Writing CA coordinates of transformed random structures ... \n"); fflush(NULL); - for (i = 0; i < baseA->cnum; ++i) - CopyCds2PDB(pdbA->cds[i], baseA->cds[i]); + + for (i = 0; i < cnum; ++i) + CopyCds2PDB(pdbA->cds[i], cdsA->cds[i]); + rand_transf_name = mystrcat(algo->rootname, "_rand_transf.pdb"); WriteTheseusModelFile(pdbA, algo, stats, rand_transf_name); } @@ -342,16 +463,23 @@ { printf(" FUN!!!!! %d \n", algo->FragDist); fflush(NULL); - FragDistPu(baseA, algo->FragDist, 2, algo->pu); + FragDistPu(cdsA, algo->FragDist, 2, algo->pu); } - else if (algo->info == 1) + else if (algo->info) { printf(" Calculating superposition statistics ... \n"); fflush(NULL); - memsetd(baseA->w, 1.0, baseA->vlen); + memsetd(cdsA->w, 1.0, cdsA->vlen); algo->rounds = 100; - CalcStats(baseA); + + if (algo->covweight) + { + SetupCovWeighting(cdsA); + memsetd(cdsA->evals, 1.0, cdsA->vlen); + } + + CalcStats(cdsA); } else if (algo->mixture > 1) { @@ -362,106 +490,194 @@ { printf(" Using %d threads ... \n", algo->threads); fflush(NULL); - Mixture_pth(baseA, pdbA); + Mixture_pth(cdsA, pdbA); } else { - Mixture(baseA, pdbA); + Mixture(cdsA, pdbA); } } + else if (algo->bayes > 0) + { + printf(" Calculating Gibbs-Metropolis Bayesian superposition ... \n"); + fflush(NULL); + + InitializeStates(cdsA); + + if (algo->domp) + MultiPose(cdsA); + + GibbsMet(cdsA); + } else { printf(" Calculating superposition transformations ... \n"); fflush(NULL); + start_time = clock(); + if (algo->threads > 0) { printf(" Using %d threads ... \n", algo->threads); fflush(NULL); - MultiPose_pth(baseA); - //MultiPoseLib(baseA); - //test_charmm(baseA); + //MultiPose_pth(cdsA); + InitializeStates(cdsA); + MultiPoseLib(cdsA); // DLT Broken, needs initialization + //test_charmm(cdsA); } else { - MultiPose(baseA); - //testcrush(baseA); + InitializeStates(cdsA); + MultiPose(cdsA); + //testcrush(cdsA); } - } - #if 0 //defined(__APPLE__) - end_time = seconds(); - algo->milliseconds = (double) (end_time - start_time) / 0.001; - #else end_time = clock(); algo->milliseconds = (double) (end_time - start_time) / ((double) CLOCKS_PER_SEC * 0.001); - #endif + } -/* if (algo->print_weight == 1) */ -/* { */ -/* fputc('\n', stdout); */ -/* for (i = 0; i < baseA->vlen; ++i) */ -/* printf(" atom[%4d] weight = %f, variance = %f, \n", */ -/* baseA->cds[0]->resSeq[i], baseA->w[i], baseA->var[i]); */ -/* } */ +// if (algo->scale > 0) +// { +// putchar('\n'); +// for (i = 0; i < cnum; ++i) +// printf("scale[%3d]: %20.8f\n", i, cds[i]->scale / cds[0]->scale); +// putchar('\n'); +// fflush(NULL); +// } + + printf(" Calculating statistics ... \n"); + fflush(NULL); - PrintSuperposStats(baseA); +/* + fp = fopen("distcor.txt", "w"); + if (CovMat == NULL) + CovMat = MatAlloc(vlen, vlen); - if (algo->write_file == 1) + CalcCovMat(cdsA); + DistMatsAlloc(cdsA); + + CalcMLDistMat(cdsA); + + for (i = 0; i < vlen; ++i) + for (j = 0; j < i; ++j) + fprintf(fp, "%6d % 10.3f % 8.3e\n", + i-j, + cdsA->Dij_matrix[i][j], + CovMat[i][j] / sqrt(CovMat[i][i] * CovMat[j][j])); + + fclose(fp); +*/ + +/* if (algo->weight == 200) */ +/* unremlvar(cdsA); */ + +/* #include "internmat.h" */ +/* if (algo->doave) */ +/* AveCds(cdsA); */ +/* CalcCovMat(cdsA); */ +/* PrintCovMatGnuPlot((const double **) CovMat, vlen, "cov.mat"); */ +/* for (i = 0; i < vlen; ++i) */ +/* for (j = 0; j < vlen; ++j) */ +/* CovMat[i][j] -= internmat[i][j]; */ +/* PrintCovMatGnuPlot((const double **) CovMat, vlen, "covdiff.mat"); */ + +/* CovMat2CorMat(CovMat, vlen); */ +/* PrintCovMatGnuPlot((const double **) CovMat, vlen, "corr.mat"); */ +/* memcpy(&CovMat[0][0], &internmat[0][0], vlen * vlen * sizeof(double)); */ +/* PrintCovMatGnuPlot((const double **) CovMat, vlen, "cov_true.mat"); */ +/* CovMat2CorMat(CovMat, vlen); */ +/* PrintCovMatGnuPlot((const double **) CovMat, vlen, "corr_true.mat"); */ + +/* CovMatsDestroy(cdsA); */ + + if (algo->covweight && (algo->write_file > 0 || algo->info)) + { + double *evals = malloc(cdsA->vlen * sizeof(double)); + char *mp_cov_name = NULL; + + EigenvalsGSL((const double **) cdsA->CovMat, cdsA->vlen, evals); + + /* VecPrint(evals, vlen); */ + mp_cov_name = mystrcat(algo->rootname, "_mp_cov.mat"); + PrintCovMatGnuPlot((const double **) cdsA->CovMat, cdsA->vlen, mp_cov_name); + free(mp_cov_name); +/* CovMat2CorMat(CovMat, vlen); */ +/* PrintCovMatGnuPlot((const double **) CovMat, vlen, mystrcat(algo->rootname, "_cor.mat")); */ + CalcPRMSD(cdsA); + WriteInstModelFile("_mp.pdb", cdsA); + free(evals); + } + + WriteDistMatTree(cdsA); + + CalcStats(cdsA); + + if (algo->ssm) + WriteEdgarSSM(cdsA); + + if (cdsA->anchorf_name != NULL) /* orient to a user-specified structure */ + SuperPose2Anchor(cdsA, cdsA->anchorf_name); + else if (algo->princaxes) /* orient perpendicular to principal axes of mean cds */ + RotPrincAxes(cdsA); /* makes for nice viewing */ + + if (algo->olve && algo->write_file) + WriteOlveFiles(cdsA); + + if (algo->bayes == 0) + PrintSuperposStats(cdsA); + + if (algo->write_file && algo->bayes == 0) { printf(" Transforming coordinates ... \n"); fflush(NULL); if (algo->atoms == 2) /* 2 = all atoms */ { - for (j = 0; j < baseA->cnum; ++j) - memcpy(pdbA->cds[j]->tempFactor, baseA->avecds->b, baseA->vlen * sizeof(double)); + for (j = 0; j < cnum; ++j) + memcpy(pdbA->cds[j]->tempFactor, cdsA->avecds->b, cdsA->vlen * sizeof(double)); } - for (i = 0; i < pdbA->cnum; ++i) + for (i = 0; i < cnum; ++i) { - Mat3Cpy(pdbA->cds[i]->matrix, (const double **) baseA->cds[i]->matrix); - memcpy(pdbA->cds[i]->translation, baseA->cds[i]->translation, 3 * sizeof(double)); + Mat3Cpy(pdbA->cds[i]->matrix, (const double **) cdsA->cds[i]->matrix); + memcpy(pdbA->cds[i]->translation, cdsA->cds[i]->translation, 3 * sizeof(double)); + pdbA->cds[i]->scale = cdsA->cds[i]->scale; } - Mat3Cpy(pdbA->avecds->matrix, (const double **) baseA->avecds->matrix); - memcpy(pdbA->avecds->translation, baseA->avecds->translation, 3 * sizeof(double)); + Mat3Cpy(pdbA->avecds->matrix, (const double **) cdsA->avecds->matrix); + memcpy(pdbA->avecds->translation, cdsA->avecds->translation, 3 * sizeof(double)); - for (i = 0; i < pdbA->cnum; ++i) + for (i = 0; i < cnum; ++i) TransformPDBCdsIp(pdbA->cds[i]); - if (algo->fullpca == 1 && algo->alignment == 0) + transf_name = mystrcat(algo->rootname, "_transf.txt"); + WriteTransformations(cdsA, transf_name); + + if (algo->fullpca && algo->alignment == 0) { printf(" Writing anisotropic Principal Component coordinate files ... \n"); fflush(NULL); - if (algo->morph == 1) - WritePCAMorphFile(pdbA, baseA, algo->rootname); + if (algo->morph) + WritePCAMorphFile(pdbA, cdsA, algo->rootname); else - WritePCAProjections(pdbA, baseA, algo->rootname); + WritePCAProjections(pdbA, cdsA, algo->rootname); } else if (algo->pca > 0 && algo->alignment == 0) { printf(" Writing isotropic Principal Component coordinate files ... \n"); fflush(NULL); - WritePCAFile(pdbA, baseA, algo->rootname); + WritePCAFile(pdbA, cdsA, algo->rootname); } - if (algo->modelpca > 0) - { - printf(" Writing model Principal Component coordinate files ... \n"); - fflush(NULL); - WriteModelPCAFile(pdbA, baseA, algo->rootname); - } - - if (algo->alignment == 1) + if (algo->alignment) { Align2segID(pdbA); - + if (algo->olve > 0) { double olve; - for (i = 0; i < pdbA->cnum; ++i) + for (i = 0; i < cnum; ++i) { for (j = 0; j < pdbA->cds[i]->vlen; ++j) { @@ -478,7 +694,13 @@ sup_name = mystrcat(algo->rootname, "_sup.pdb"); WriteTheseusModelFile(pdbA, algo, stats, sup_name); - if (algo->alignment == 1) + if (algo->morphfile) + { + tps_sup_name = mystrcat(algo->rootname, "_sup.tps"); + WriteTheseusTPSModelFile(pdbA, tps_sup_name); + } + + if (algo->alignment) WriteTheseusPDBFiles(pdbA, algo, stats); if (algo->binary == 3 || algo->binary == 4) @@ -492,65 +714,49 @@ printf(" Writing average coordinate file ... \n"); fflush(NULL); - TransformCdsIp(baseA->avecds); - CopyCds2PDB(pdbA->avecds, baseA->avecds); + TransformCdsIp(cdsA->avecds); + CopyCds2PDB(pdbA->avecds, cdsA->avecds); ave_name = mystrcat(algo->rootname, "_ave.pdb"); WriteAvePDBCdsFile(pdbA, ave_name); - if (baseA->avecds->innerprod == NULL) - baseA->avecds->innerprod = MatAlloc(baseA->vlen, baseA->vlen); + if (algo->morphfile) + { + tps_ave_name = mystrcat(algo->rootname, "_ave.tps"); + WriteAveTPSCdsFile(pdbA, tps_ave_name); + } + + if (cdsA->avecds->outerprod == NULL) + cdsA->avecds->outerprod = MatAlloc(cdsA->vlen, cdsA->vlen); - if (algo->ipmat == 1) + if (algo->ipmat) { - printf(" Writing mean inner product file ... \n"); - fflush(NULL); + printf(" Writing mean inner product file ... \n"); + fflush(NULL); - CenMass(baseA->avecds); - ApplyCenterIp(baseA->avecds); - CdsInnerProd(baseA->avecds); + CenMass(cdsA->avecds); + ApplyCenterIp(cdsA->avecds); + CdsInnerProd(cdsA->avecds); mean_ip_name = mystrcat(algo->rootname, "_mean_ip.mat"); - PrintCovMatGnuPlot((const double **) baseA->avecds->innerprod, baseA->vlen, mean_ip_name); + PrintCovMatGnuPlot((const double **) cdsA->avecds->outerprod, cdsA->vlen, mean_ip_name); } - if (algo->alignment == 1) + if (algo->alignment) { sup_var_name = mystrcat(algo->rootname, "_sup_var.pdb"); - Align2Vars(pdbA, baseA); + Align2Vars(pdbA, cdsA); WriteTheseusModelFile(pdbA, algo, stats, sup_var_name); strcat(algo->rootname, "_var"); WriteTheseusPDBFiles(pdbA, algo, stats); } } - -/* FILE *fp = fopen("pdbdists.txt" ,"w"); */ -/* double xx,yy,zz; */ -/* double vv, vsum; */ -/* */ -/* vsum = 0.0; */ -/* for (j = 0; j < baseA->vlen; ++j) */ -/* vsum += 1.0/baseA->var[j];; */ -/* */ -/* for (i = 0; i < baseA->cnum; ++i) */ -/* {i=0; */ -/* for (j = 0; j < baseA->vlen; ++j) */ -/* { */ -/* xx = baseA->cds[i]->x[j]; */ -/* yy = baseA->cds[i]->y[j]; */ -/* zz = baseA->cds[i]->z[j]; */ -/* vv = vsum / baseA->var[j]; */ -/* vv=1.0; */ -/* //fprintf(fp, "%8.3e\n", xx*xx+yy*yy+zz*zz); */ -/* fprintf(fp, "%8.3e\n", xx/vv); */ -/* fprintf(fp, "%8.3e\n", yy/vv); */ -/* fprintf(fp, "%8.3e\n", zz/vv); */ -/* } */ -/* break; */ -/* } */ -/* fprintf(fp, "\n\n"); */ PrintTheseusTag(); - CdsArrayDestroy(&baseA); + AlgorithmDestroy(algo); + free(stats); + stats = NULL; + + CdsArrayDestroy(&cdsA); PDBCdsArrayDestroy(&pdbA); if (sup_name != NULL) @@ -566,9 +772,7 @@ if (mean_ip_name != NULL) free(mean_ip_name); -// pthread_exit(NULL); - - return (EXIT_SUCCESS); + exit(EXIT_SUCCESS); } @@ -577,9 +781,8 @@ { int option; extern char *optarg; - extern int optind, opterr, optopt; - Algorithm *algo = bseA->algo; - + extern int optind, optopt; + int option_index = 0; int i, cmdlinelen, argvlen; char space[] = " "; @@ -603,18 +806,14 @@ strncpy(algo->argv[i], argv[i], argvlen); } - /* */ - int option_index = 0; - char short_options[] = "a:A:b:B:cCd:D:e:EfFg:GhHi:Ij:Jk:K:lLm:M:nNo:Op:P:q:Q:r:R:s:S:tT:uUvVw:WxXyYz:Z0123:4:56789"; + char short_options[] = "a:A:b:B:cCd:D:e:EfFg:GhHi:Ij:Jk:K:lLM:nNo:Op:P:qQ:r:R:s:S:T:uUvVw:WxXyYz:Z0123:4:69"; struct option long_options[] = { - {"add", required_argument, 0, 0 }, - {"delete", required_argument, 0, 0 }, {"alignment", required_argument, 0, 'A'}, {"amber", no_argument, 0, 0 }, {"bayes", required_argument, 0, 'b'}, - {"covariance", no_argument, 0, 'c'}, // this one is aliased to the short_option 'c' + {"covariance", no_argument, 0, 'c'}, // aliased to the short_option 'c' {"fasta", no_argument, 0, 'F'}, {"help", no_argument, 0, 'h'}, {"info", no_argument, 0, 'I'}, @@ -622,11 +821,19 @@ {"iterations", required_argument, 0, 'i'}, {"leastsquares", no_argument, 0, 'l'}, {"mapfile", required_argument, 0, 'M'}, - {"notrans", no_argument, 0, 0 }, + {"morphfile", no_argument, 0, 'q'}, + {"noave" , no_argument, 0, 'y'}, + {"nomp" , no_argument, 0, 0 }, + {"notrans", no_argument, 0, '0'}, + {"norot", no_argument, 0, '1'}, + {"nohierarch", no_argument, 0, 0 }, + {"nocovars", no_argument, 0, 0 }, {"orient", required_argument, 0, 'o'}, {"pca", required_argument, 0, 'P'}, {"precision", required_argument, 0, 'p'}, + {"randgibbs", no_argument, 0, 0 }, {"rootname", required_argument, 0, 'r'}, + {"scaleanchor", required_argument, 0, 0 }, {"seed", required_argument, 0, 'X'}, {"selection", required_argument, 0, 's'}, {"verbose", optional_argument, 0, 'W'}, @@ -639,17 +846,21 @@ switch (option) /* See Algorithm structure in Cds.h for explanations of these flags/options */ { case 0: - if (strcmp(long_options[option_index].name, "add") == 0) + if (strcmp(long_options[option_index].name, "notrans") == 0) + { + algo->dotrans = 0; + } + else if (strcmp(long_options[option_index].name, "nohierarch") == 0) { - printf("add with arg %s\n", optarg); + algo->dohierarch = 0; } - else if (strcmp(long_options[option_index].name, "delete") == 0) + else if (strcmp(long_options[option_index].name, "nocovars") == 0) { - printf("delete with arg %s\n", optarg); + algo->docovars = 0; } - else if (strcmp(long_options[option_index].name, "notrans") == 0) + else if (strcmp(long_options[option_index].name, "nomp") == 0) { - algo->notrans = 1; + algo->domp = 0; } else if (strcmp(long_options[option_index].name, "amber") == 0) { @@ -659,7 +870,15 @@ { algo->bayes = (int) strtol(optarg, NULL, 10); } -/* + else if (strcmp(long_options[option_index].name, "scaleanchor") == 0) + { + algo->scaleanchor = (int) strtol(optarg, NULL, 10); + } + else if (strcmp(long_options[option_index].name, "randgibbs") == 0) + { + algo->randgibbs = 1; + } +/* else { printf("\n Bad option '--%s' \n", long_options[option_index].name); @@ -669,10 +888,10 @@ */ break; case '0': /* don't do translations */ - algo->notrans = 1; + algo->dotrans = 0; break; case '1': /* don't estimate rotations */ - algo->norot = 1; + algo->dorot = 0; break; case '2': /* convert a Lele formatted file to PDB */ algo->convlele = 1; @@ -689,24 +908,14 @@ &algo->radii[2]); algo->fmodel = 1; break; - case '5': - //algo->missing = 1; - algo->tenberge = 1; - break; case '6': algo->ssm = 1; break; - case '7': /* very specific fix for structured Lele 5x5 covariance matrix, testset data */ - algo->lele5 = 1; - break; - case '8': - algo->atom_names = 1; - break; case '9': /* write out the mean inner product matrix (Lele uses this) */ algo->ipmat = 1; break; case 'a': - if (algo->alignment == 1) + if (algo->alignment) { printf("\n\n -> Only alpha carbons can be selected <-"); printf("\n -> when superimposing to an alignment <-\n"); @@ -793,8 +1002,6 @@ case 'g': if (isdigit(optarg[0])) algo->hierarch = (int) strtol(optarg, NULL, 10); - if (algo->bfact > 0) - algo->hierarch = 3; break; case 'G': //algo->fullpca = 1; @@ -835,40 +1042,8 @@ case 'L': algo->instfile = 1; break; - case 'm': - if (isdigit(optarg[0])) - algo->method = (int) strtol(optarg, NULL, 10); - else - { - strtoupper(optarg); - if (strncmp(optarg, "KABSCH", 6) == 0) - algo->method = 0; - else if (strncmp(optarg, "KEARSLEY", 8) == 0) - algo->method = 1; - else if (strncmp(optarg, "HORN", 4) == 0) - algo->method = 2; - else if (strncmp(optarg, "LKEARSLEY", 9) == 0) - algo->method = 4; - else if (strncmp(optarg, "LKABSCH", 7) == 0) - algo->method = 5; - else if (strncmp(optarg, "FULLAX", 6) == 0) - algo->method = 6; - else if (strncmp(optarg, "SVD", 3) == 0) - algo->method = 7; - else if (strncmp(optarg, "JACOBI", 6) == 0) - algo->method = 8; - else if (strncmp(optarg, "JSVD", 4) == 0) - algo->method = 10; - else if (strncmp(optarg, "JCYC", 4) == 0) - algo->method = 11; - else - { - printf("\n Bad -m string '-%s' \n", optarg); - Usage(0); - exit(EXIT_FAILURE); - } - } - break; +// case 'm': +// break; case 'M': bseA->mapfile_name = (char *) malloc((strlen(optarg) + 2) * sizeof(char)); mystrncpy(bseA->mapfile_name, optarg, strlen(optarg) + 1); @@ -902,13 +1077,9 @@ algo->pca = (double) strtod(optarg, NULL); break; case 'q': - sscanf(optarg, "%lf:%lf:%lf", - &algo->raxes[0], - &algo->raxes[1], - &algo->raxes[2]); + algo->morphfile = 1; break; case 'Q': - //algo->modelpca = 1; algo->scalefactor = (double) strtod(optarg, NULL); break; case 'r': @@ -926,12 +1097,8 @@ mystrncpy(algo->selection, optarg, FILENAME_MAX - 1); algo->revsel = 1; break; - case 't': - if (isdigit(optarg[0])) - algo->bfact = (int) strtol(optarg, NULL, 10); - if (algo->bfact > 0) - algo->hierarch = 3; - break; +// case 't': +// break; case 'T': algo->threads = (int) strtol(optarg, NULL, 10); break; @@ -964,7 +1131,7 @@ algo->seed = 1; break; case 'y': - algo->noave = 1; + algo->doave = 0; break; case 'Y': algo->stats = 1; @@ -987,33 +1154,20 @@ } -void +static void PrintSuperposStats(CdsArray *cdsA) { int i, j; - /* double ave_wRMSD_from_mean; */ - int Ftest_M, df1, df2, newlen; - double Pftest, F_ratio, F1, F2; - double logLF; + int newlen; const int vlen = cdsA->vlen, cnum = cdsA->cnum; - Statistics *stats = cdsA->stats; - Algorithm *algo = cdsA->algo; - if (algo->stats == 1) + if (algo->stats) { - if (algo->verbose == 1) + if (algo->verbose) { for (i = 0; i < cnum; ++i) { printf(" -> radius of gyration, cds[%3d] = %8.3f \n", i+1, cdsA->cds[i]->radgyr); - - if (algo->method == 1 || algo->method == 2 || algo->method == 4) - { - printf(" quaternion rotation eigenvectors \n"); - printf(" [ w x y z ]\n"); - Mat4Print(cdsA->cds[i]->evecs); - } - for (j = 0; j < 4; ++j) printf(" -> eigenvalue[%d] = %10.3f \n", j, cdsA->cds[i]->evals[j]); } @@ -1023,26 +1177,27 @@ } printf(" %d models superimposed in %.1f ms \n", cnum, algo->milliseconds); + fflush(NULL); printf(" * Least-squares %10.5f\n", stats->stddev); + fflush(NULL); + printf(" * Classical LS pairwise %10.5f\n", stats->ave_paRMSD); - /* printf("\n * Weighted pairwise %10.5f %10.5f * ", stats->starting_pawRMSD, stats->ave_pawRMSD); */ - /* printf("\n * Weighted from the mean %10.5f %10.5f * <- ", stats->starting_ave_wRMSD_from_mean, ave_wRMSD_from_mean); */ printf(" * Maximum Likelihood %10.5f\n", stats->mlRMSD); - /* printf("\n * Multipose weighted from the mean %10.5f * ", stats->wRMSD_from_mean); */ - printf(" ~ Log Likelihood %11.2f\n", stats->logL); - printf(" ~ Marginal Log Likelihood %11.2f\n", stats->mglogl); + printf(" ~ Marginal Log Likelihood %11.2f\n", stats->mlogL); printf(" ~ AIC %11.2f\n", stats->AIC); printf(" ~ BIC %11.2f\n", stats->BIC); printf(" + Rotational, translational, covar chi^2 %11.2f (P:%3.2e)\n", stats->chi2, chisqr_sdf(stats->chi2 * vlen * cnum * 3, vlen * cnum * 3, 0)); + if (algo->hierarch > 0 && algo->hierarch <= 5) printf(" + Hierarchical minimum var (sigma) %3.2e (%3.2e)\n", 2.0*stats->hierarch_p1 / (3*cnum + 2.0*(1.0 + stats->hierarch_p2)), sqrt(2.0*stats->hierarch_p1 / (3*cnum + 2.0*(1.0 + stats->hierarch_p2)))); - if (algo->hierarch != 0) + + if (algo->hierarch) { - if (cdsA->algo->varweight != 0) + if (algo->varweight) { if (vlen - 3 < 3*cnum - 6) newlen = vlen - 3; @@ -1062,8 +1217,6 @@ (newlen * stats->hierarch_chi2 + vlen * cnum * 3 * stats->chi2) / (vlen * cnum * 3 + newlen), chisqr_sdf(newlen * stats->hierarch_chi2 + vlen * cnum * 3 * stats->chi2, (vlen * cnum * 3 + newlen), 0)); } - if (algo->htrans != 0) - printf(" * Translation normal chi^2 %10.5f\n", stats->htrans_chi2); printf(" < skewness %11.2f (P:%3.2e)\n", stats->skewness[3], 2.0 * normal_sdf(fabs(stats->skewness[3]/stats->SES), 0.0, 1.0)); @@ -1071,52 +1224,9 @@ printf(" < kurtosis %11.2f (P:%3.2e)\n", stats->kurtosis[3], 2.0 * normal_sdf(fabs(stats->kurtosis[3]/stats->SEK), 0.0, 1.0)); printf(" < kurtosis Z-value %11.2f\n", fabs(stats->kurtosis[3]/stats->SEK)); - /* for chi^2 test of normality with kurtosis & skewness, see: - Lynch, M. and B.Walsh (1998). - Genetics and Analysis of Quantitative Traits. */ -/* normtest = (((n-2) * (n+1) * (n+3)) * mysquare(stats->skewness[3]) / (6.0 * n * (n-1))) + */ -/* (((n-3) * (n-2) * (n+3) * (n+5)) * mysquare(stats->kurtosis[3]) / (24.0 * n * (n-1) * (n-1))); */ -/* printf("\n -> kurtosis/skewness chi^2 normality test %7.3f x p = %3.2e ", */ -/* normtest, chisqr_sdf(normtest, 2.0, 0)); */ - - printf(" FP error in transformed coordinates: %3.2e\n", stats->fperr); - printf(" Minimum RMS error per atom: %3.2e\n", stats->minvar); printf(" Data pts = %d, Free params = %d, D/P = %-5.1f\n", (int) stats->ndata, (int) stats->nparams, (stats->ndata / stats->nparams)); - if (algo->reflection == 1) - { - CalcANOVAF(cdsA); - F1 = mysquare(stats->anova_RMSD); - F2 = mysquare(stats->mlRMSD); - F_ratio = F1/F2; - Ftest_M = (vlen - 2); - df1 = (cnum - 1) * 2 * Ftest_M; - df2 = (cnum - 1) * Ftest_M; - - printf(" * Reflected from the mean %10.6f\n", stats->anova_RMSD); - Pftest = gsl_sf_beta_inc((df2 / 2.0), (df1 / 2.0), (df2 / (df2 + df1 * F_ratio))); - printf(" * F(refl, %6d, %6d) = %8.4f p = %3.2e\n", df1, df2, F_ratio, Pftest); - - logLF = (df1 * log(1.0 + (df2 / (df1 * F_ratio)))) - + (df2 * log(1.0 + ((df1 * F_ratio) / df2))) - + (df1 * log(df1)) - + (df2 * log(df2)) - - ((df1 + df2) * log(df1 + df2)); - logLF *= 0.5; - printf(" * likelihood F(%8.4f) %3.2e\n", F_ratio, logLF); - printf(" * Ref Log Likelihood %11.3f\n", stats->anova_logL); - printf(" * Ref AIC %11.3f\n", stats->anova_AIC); - printf(" * Sign-test p = %3.2e\n", stats->signp); - printf(" * Wilcoxon Ranked Sign-test p = %3.2e\n", stats->wilcoxonp); - } - - if (algo->stats == 1) - { - printf(" * Durbin-Watson autocorrelation D %5.3f\n", stats->dw); - printf(" * Durbin-Watson correlation coeff %5.3f\n", 1.0 - stats->dw/2.0); - } - printf(" * Median structure = #%d\n", stats->median + 1); printf(" N(total) = %d, N(atoms) = %d, N(structures) = %d\n", (cnum * vlen), vlen, cnum); @@ -1135,12 +1245,12 @@ void leave(int sig) { - if (baseA->scratchA->algo->rounds > 0) + if (algo->rounds > 0) { - printf(" Aborting at iteration %d ....\n", - baseA->scratchA->algo->rounds+1); + printf(" Aborting at iteration %d ....\n", algo->rounds+1); fflush(NULL); - baseA->scratchA->algo->abort = 1; + + algo->abort = 1; signal(sig, SIG_IGN); } else @@ -1150,252 +1260,3 @@ } } - -int -ConvertLele_freeform(char *fp_name, const int dim, const int forms, const int lmarks) -{ - int i, j, k, lines, numscanned; - FILE *fp0 = NULL, *fp1 = NULL; - double *vals = calloc(dim, sizeof(double)); - - fp0 = fopen(fp_name, "r"); - fp1 = fopen("lele.pdb", "w"); - if (fp0 == NULL || fp1 == NULL) - { - fprintf(stderr, "\n ERROR6969: cannot open file \"%s\" \n", fp_name); - exit(EXIT_FAILURE); - } - - i = j = 0; - lines = 0; - for (i = 0; i < forms; ++i) - { - fprintf(fp1, "MODEL %8d\n", i+1); - - for (j = 0; j < lmarks; ++j) - { - for (k = 0; k < dim; ++k) - { - numscanned = fscanf(fp0, "%le ", &vals[k]); - //printf("\n**** %f", vals[k]); - //fflush(NULL); - - if (numscanned < 1 || numscanned == EOF) - { - fprintf(stderr, - "\n ERROR6968: %d number of coordinates on line %d \n", - numscanned, lines); - exit(EXIT_FAILURE); - } - } - - /* r s Hn ar xc r i x y z o tF sI e c */ - /* ATOM 1949 1HB ARG A 255 19.326 -3.835 -3.438 1.00 1.31 H */ - - fprintf(fp1, - /* r s H n aL rN x c rSiC x y z o tF sI e c */ - "%-6.6s%5u %3.3s %-3.3s %1c%4d %8.3f%8.3f%8.3f%6.2f%6.2f\n", - "ATOM ", i*lmarks + j, "CA ", "ALA", 'A', j+1, - vals[0], vals[1], vals[2], - 1.0, 10.0); - - ++lines; - } - - fprintf(fp1, "ENDMDL\n"); - } - - fprintf(fp1, "END\n"); - fclose(fp0); - fclose(fp1); - free(vals); - - return(1); -} - - -int -ConvertDryden(char *fp_name, const int dim, const int forms, const int lmarks) -{ - int i, j, lines, numscanned; - FILE *fp0 = NULL, *fp1 = NULL; - double vals[2]; - char line[512]; - - fp0 = fopen(fp_name, "r"); - fp1 = fopen("dryden.pdb", "w"); - if (fp0 == NULL || fp1 == NULL) - { - fprintf(stderr, "\n ERROR6969: cannot open file \"%s\" \n", fp_name); - exit(EXIT_FAILURE); - } - -/* *length = 0; */ -/* while(1) */ -/* { */ -/* ch = getc(fp); */ -/* */ -/* if (ch == EOF || ch == '\n') */ -/* ++(*length); */ -/* */ -/* if (ch == EOF) */ -/* break; */ -/* } */ -/* */ -/* array = calloc((*length + 1), sizeof(double)); */ - -/* rewind(fp); */ - - fgets(line, 512, fp0); - - i = j = 0; - lines = 0; - for (i = 0; i < forms; ++i) - { - fprintf(fp1, "MODEL %8d\n", i+1); - - fscanf(fp0, "%*s"); - - for (j = 0; j < lmarks; ++j) - { - numscanned = fscanf(fp0, "%le %le ", &vals[0], &vals[1]); - - if (numscanned < dim || numscanned == EOF) - { - fprintf(stderr, - "\n ERROR6968: %d number of coordinates on line %d \n", - numscanned, lines); - exit(EXIT_FAILURE); - } - - /* r s Hn ar xc r i x y z o tF sI e c */ - /* ATOM 1949 1HB ARG A 255 19.326 -3.835 -3.438 1.00 1.31 H */ - - fprintf(fp1, - /* r s H n aL rN x c rSiC x y z o tF sI e c */ - "%-6.6s%5u %3.3s %-3.3s %1c%4d %8.3f%8.3f%8.3f%6.2f%6.2f\n", - "ATOM ", i*lmarks + j, "CA ", "ALA", 'A', j+1, - vals[0], vals[1], 0.0, - 1.0, 10.0); - - ++lines; - } - - fprintf(fp1, "ENDMDL\n"); - } - - fprintf(fp1, "END\n"); - fclose(fp0); - fclose(fp1); - - return(1); -} - - -int -ConvertLele(char *fp_name, const int dim, const int forms, const int lmarks) -{ - int i, j, lines, numscanned; - FILE *fp0 = NULL, *fp1 = NULL; - double vals[3]; - char line[512]; - - fp0 = fopen(fp_name, "r"); - fp1 = fopen("lele.pdb", "w"); - if (fp0 == NULL || fp1 == NULL) - { - fprintf(stderr, "\n ERROR6969: cannot open file \"%s\" \n", fp_name); - exit(EXIT_FAILURE); - } - -/* *length = 0; */ -/* while(1) */ -/* { */ -/* ch = getc(fp); */ -/* */ -/* if (ch == EOF || ch == '\n') */ -/* ++(*length); */ -/* */ -/* if (ch == EOF) */ -/* break; */ -/* } */ -/* */ -/* array = calloc((*length + 1), sizeof(double)); */ - -/* rewind(fp); */ - - i = j = 0; - lines = 0; - for (i = 0; i < forms; ++i) - { - fprintf(fp1, "MODEL %8d\n", i+1); - - for (j = 0; j < lmarks; ++j) - { - fgets(line, 512, fp0); - numscanned = sscanf(line, "%le %le %le ", &vals[0], &vals[1], &vals[2]); - - if (numscanned < dim || numscanned == EOF) - { - fprintf(stderr, - "\n ERROR6968: %d number of coordinates on line %d \n", - numscanned, lines); - exit(EXIT_FAILURE); - } - - /* r s Hn ar xc r i x y z o tF sI e c */ - /* ATOM 1949 1HB ARG A 255 19.326 -3.835 -3.438 1.00 1.31 H */ - - fprintf(fp1, - /* r s H n aL rN x c rSiC x y z o tF sI e c */ - "%-6.6s%5u %3.3s %-3.3s %1c%4d %8.3f%8.3f%8.3f%6.2f%6.2f\n", - "ATOM ", i*lmarks + j, "CA ", "ALA", 'A', j+1, - vals[0], vals[1], vals[2], - 1.0, 10.0); - - ++lines; - } - - fprintf(fp1, "ENDMDL\n"); - } - - fprintf(fp1, "END\n"); - fclose(fp0); - fclose(fp1); - - return(1); -} - - -void -WriteLeleModelFile(PDBCdsArray *pdbAr) -{ - FILE *pdbfile = NULL; - int i, j; - char outfile_name[] = "lele.txt"; - - pdbfile = myfopen(outfile_name, "w"); - if (pdbfile == NULL) - { - perror("\n ERROR"); - fprintf(stderr, - "\n ERROR99: could not open file '%s' for writing. \n", outfile_name); - PrintTheseusTag(); - exit(EXIT_FAILURE); - } - - for (i = 0; i < pdbAr->cnum; ++i) - { - for (j = 0; j < pdbAr->vlen; ++j) - { - fprintf(pdbfile, "%.3f\t%.3f\t%.3f\n", - pdbAr->cds[i]->x[j], - pdbAr->cds[i]->y[j], - pdbAr->cds[i]->z[j]); - } - } - - fprintf(pdbfile, "\n"); - - fclose(pdbfile); -} diff -Nru theseus-2.0.6/theseus.h theseus-3.0.0/theseus.h --- theseus-2.0.6/theseus.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/theseus.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -26,24 +26,19 @@ #ifndef THESEUS_SEEN #define THESEUS_SEEN -/* #undef __APPLE__ */ #if __STDC__ != 1 #error NOT a Standard C environment #endif /* Temporary workaround for broken OSX system headers, shipped in - ÊÊXCode 1.5. They're declaring stuff 'static inline', and this is - ÊÊincompatible with the '-ansi' flag I pass to gcc. + XCode 1.5. They're declaring stuff 'static inline', and this is + incompatible with the '-ansi' flag I pass to gcc. See Apple bug #3805571. */ #if defined(__APPLE__) && !defined(inline) #define inline __inline__ #endif -#if defined(__APPLE__) - #include -#endif - #include #include #include @@ -79,26 +74,13 @@ #include "MultiPoseMix.h" #include "QuarticHornFrag.h" #include "GibbsMet.h" +#include "ProcGSLSVDNu.h" #include "termcol.h" - -#if defined(DEBUG) - #include - #if defined(__GNUC__) - #include - #endif - #define TRACE0 /* set to 0 to disable the trace */ - #if TRACE - #define START_TRACE() {int result = __mfspr(1023);} - #else - #define START_TRACE() - #endif -#endif - static void ParseCmdLine(int argc, char *argv[], CdsArray *baseA); -void +static void PrintSuperposStats(CdsArray *cdsA); #endif diff -Nru theseus-2.0.6/theseuslib.c theseus-3.0.0/theseuslib.c --- theseus-2.0.6/theseuslib.c 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/theseuslib.c 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -35,7 +35,7 @@ #include "pdbIO.h" #include "pdbUtils.h" #include "ProcGSLSVD.h" -#include "ProcGSLSVDOcc.h" +#include "ProcGSLSVDNu.h" #include "CovMat.h" #include "MultiPose.h" #include "MultiPose2MSA.h" @@ -92,11 +92,11 @@ cdsA = CdsArrayInit(); - if (*ls == 1) + if (*ls) { - cdsA->algo->leastsquares = 1; - cdsA->algo->varweight = 0; - cdsA->algo->hierarch = 0; + algo->leastsquares = 1; + algo->varweight = 0; + algo->hierarch = 0; } CdsArrayAlloc(cdsA, cnum, vlen); @@ -104,7 +104,7 @@ /* DLT debug - shouldn't need to do this, I think prob is in SuperPose() */ for (j = 0; j < cnum; ++j) for (i = 0; i < vlen; ++i) - cdsA->cds[j]->o[i] = 1.0; + cdsA->cds[j]->nu[i] = 1; for (i = 0; i < cnum; ++i) { @@ -138,11 +138,11 @@ cdsA = CdsArrayInit(); - if (*ls == 1) + if (*ls) { - cdsA->algo->leastsquares = 1; - cdsA->algo->varweight = 0; - cdsA->algo->hierarch = 0; + algo->leastsquares = 1; + algo->varweight = 0; + algo->hierarch = 0; } CdsArrayAlloc(cdsA, cnum, vlen); @@ -150,14 +150,14 @@ /* DLT debug - shouldn't need to do this, I think prob is in SuperPose() */ for (j = 0; j < cnum; ++j) for (i = 0; i < vlen; ++i) - cdsA->cds[j]->o[i] = 1.0; + cdsA->cds[j]->nu[i] = 1; for (i = 0; i < cnum; ++i) { free(cdsA->cds[i]->x); free(cdsA->cds[i]->y); free(cdsA->cds[i]->z); - + cdsA->cds[i]->x = &xbuf[i*vlen]; cdsA->cds[i]->y = &ybuf[i*vlen]; cdsA->cds[i]->z = &zbuf[i*vlen]; @@ -179,7 +179,7 @@ void -CalcS2(CdsArray *cdsA, const int nsell, double *bxij, double *byij, double *bzij, +CalcS2(CdsArray *cdsA, const int nsell, double *bxij, double *byij, double *bzij, double *rij, double *s2, const int whoiam) { int i, j, k, m; @@ -231,7 +231,7 @@ rijk = rij[k]; // order parameter: - s2[k] = (1.5/(rijk*rijk*rijk*rijk)) * (sx2*sx2 + sy2*sy2 + sz2*sz2 + + s2[k] = (1.5/(rijk*rijk*rijk*rijk)) * (sx2*sx2 + sy2*sy2 + sz2*sz2 + 2.0*(sxy*sxy + sxz*sxz + syz*syz)) - 0.5; // components of force: @@ -267,11 +267,11 @@ cdsA = CdsArrayInit(); - if (*ls == 1) + if (*ls) { - cdsA->algo->leastsquares = 1; - cdsA->algo->varweight = 0; - cdsA->algo->hierarch = 0; + algo->leastsquares = 1; + algo->varweight = 0; + algo->hierarch = 0; } //printf(" ENSS2ML>: cnum=%d len=%d vlen=%d nsell=%d\n", cnum, len, vlen, *nsell); //fflush(NULL); @@ -280,7 +280,7 @@ /* DLT debug - shouldn't need to do this, I think prob is in SuperPose() */ for (j = 0; j < cnum; ++j) for (i = 0; i < vlen; ++i) - cdsA->cds[j]->o[i] = 1.0; + cdsA->cds[j]->nu[i] = 1; for (i = 0; i < cnum; ++i) { @@ -332,31 +332,22 @@ CdsArraySetup(cdsA); /* setup local aliases based on cdsA */ - algo = cdsA->algo; - stats = cdsA->stats; cds = cdsA->cds; avecds = cdsA->avecds; - if (algo->covweight == 1) + if (algo->covweight) SetupCovWeighting(cdsA); /* DLT debug */ stats->hierarch_p1 = 1.0; stats->hierarch_p2 = 1.0; - algo->constant = 0.001; /* randomly select a structure to use as the initial mean structure */ //slxn = (int) (genrand_real2() * cnum); slxn = gsl_rng_uniform_int(r2, cnum); - - CdsCopyAll(avecds, cdsA->cds[slxn]); - if (algo->bfact > 0) - { - for (i = 0; i < cnum; ++i) - Bfacts2PrVars(cdsA, i); - } + CdsCopyAll(avecds, cdsA->cds[slxn]); - if (algo->alignment == 1) + if (algo->alignment) CalcDf(cdsA); // WriteCdsFile(cdsA->cds[0], "charmm_inp0.pdb"); @@ -367,7 +358,7 @@ (1) First calculates the translations (2) Does inner loop -- calc rotations and average till convergence (3) Holding the superposition constant, calculates the covariance - matrices and corresponding weight matrices, looping till + matrices and corresponding weight matrices, looping till convergence when using a dimensional/axial covariance matrix */ round = 0; while(1) @@ -376,7 +367,7 @@ algo->rounds = round; /* Estimate Translations: Find weighted center and translate all cds */ - CalcTranslationsIp(cdsA, algo); + //CalcTranslationsIp(cdsA, algo); // DLT OP for (i = 0; i < cnum; ++i) ApplyCenterIp(cds[i]); @@ -397,12 +388,12 @@ ++innerround; /* find the optimal rotation matrices */ - if (algo->alignment == 1) - CalcRotationsOcc(cdsA); + if (algo->alignment) + CalcRotationsNu(cdsA); else CalcRotations(cdsA); - if ((innerround == 1) && (CheckConvergenceOuter(cdsA, round, algo->precision) == 1)) + if ((innerround == 1) && CheckConvergenceOuter(cdsA, round, algo->precision)) goto outsidetheloops; /* rotate the scratch cds with new rotation matrix */ @@ -410,9 +401,9 @@ RotateCdsIp(cds[i], (const double **) cds[i]->matrix); /* find global rmsd and average cds (both held in structure) */ - if (algo->alignment == 1) + if (algo->alignment) { - AveCdsOcc(cdsA); + AveCdsNu(cdsA); EM_MissingCds(cdsA); } else @@ -425,6 +416,8 @@ /* Holding the superposition constant, calculate the covariance matrix and corresponding weight matrix, looping till convergence. */ CalcCovariances(cdsA); + if (algo->varweight || algo->covweight) + HierarchVars(cdsA); // VecPrint(cdsA->var, cdsA->vlen); @@ -465,14 +458,11 @@ CdsArraySetup(cdsA); /* setup local aliases based on cdsA */ - algo = cdsA->algo; - stats = cdsA->stats; cds = cdsA->cds; avecds = cdsA->avecds; stats->hierarch_p1 = 1.0; stats->hierarch_p2 = 1.0; - algo->constant = 0.001; /* randomly select a structure to use as the initial mean structure */ slxn = gsl_rng_uniform_int(r2, cnum); @@ -491,7 +481,7 @@ algo->rounds = round; /* Estimate Translations: Find weighted center and translate all cds */ - CalcTranslationsIp(cdsA, algo); + //CalcTranslationsIp(cdsA, algo); // DLT OP for (i = 0; i < cnum; ++i) ApplyCenterIp(cds[i]); @@ -514,7 +504,7 @@ /* find the optimal rotation matrices */ CalcRotations(cdsA); - if ((innerround == 1) && (CheckConvergenceOuter(cdsA, round, algo->precision) == 1)) + if ((innerround == 1) && (CheckConvergenceOuter(cdsA, round, algo->precision))) return(round); /* rotate the scratch cds with new rotation matrix */ @@ -529,6 +519,8 @@ /* Holding the superposition constant, calculate the covariance matrix and corresponding weight matrix, looping till convergence. */ CalcCovariances(cdsA); + if (algo->varweight || algo->covweight) + HierarchVars(cdsA); /* calculate the weights/weight matrices */ CalcWts(cdsA); diff -Nru theseus-2.0.6/theseuslib.h theseus-3.0.0/theseuslib.h --- theseus-2.0.6/theseuslib.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/theseuslib.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by Binary files /tmp/V7iqsWxQ7o/theseus-2.0.6/theseus_man.pdf and /tmp/g2bOMTRwaC/theseus-3.0.0/theseus_man.pdf differ diff -Nru theseus-2.0.6/Threads.h theseus-3.0.0/Threads.h --- theseus-2.0.6/Threads.h 2013-06-05 17:20:03.000000000 +0000 +++ theseus-3.0.0/Threads.h 2014-05-13 16:48:52.000000000 +0000 @@ -1,7 +1,7 @@ /* Theseus - maximum likelihood superpositioning of macromolecular structures - Copyright (C) 2004-2013 Douglas L. Theobald + Copyright (C) 2004-2014 Douglas L. Theobald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -30,8 +30,8 @@ typedef struct { - Cds **cds; - Cds *tcds; + Cds **cds; + Cds *tcds; /* double **tmpmat3a, **tmpmat3b, **tmpmat3c, *tmpvec3a; */ int vlen, start, end; } RotData; @@ -39,14 +39,14 @@ typedef struct { - CdsArray *cdsA; + CdsArray *cdsA; int vlen, cnum, start, end; } AveData; typedef struct { - CdsArray *cdsA; + CdsArray *cdsA; double *probs; double *vars; int rounds;