diff -Nru eigensoft-6.1.2+dfsg/bin/evec2pca-ped.perl eigensoft-6.1.3+dfsg/bin/evec2pca-ped.perl --- eigensoft-6.1.2+dfsg/bin/evec2pca-ped.perl 1970-01-01 00:00:00.000000000 +0000 +++ eigensoft-6.1.3+dfsg/bin/evec2pca-ped.perl 2016-07-29 14:26:24.000000000 +0000 @@ -0,0 +1,79 @@ +#!/usr/bin/perl + +### translate .evec file to .pca file expected by eigenstrat program +### Note: .evec file does not contain entries for outliers +### .pca file does contain entries (set to all 0.0) for outliers + +# ----- This is a new version for PLINK input files. It differs from the +# ----- original in two ways. (1) The indiv name is in the second column +# ----- of the .fam file, but the first of a .ind file. (2) If the +# ----- indiv names are not found in the .evec file, try the +# ----- familyname:indivname combination. + +$k = $ARGV[0]; +$evec = $ARGV[1]; +$ind = $ARGV[2]; +$pca = $ARGV[3]; +open(EVEC,$evec) || die("OOPS couldn't open file $evec for reading"); +open(PCA,">$pca") || die("OOPS couldn't open file $pca for writing"); + +print PCA ("$k\n"); # number of output eigenvectors/eigenvalues +$line = ; chomp($line); # eigvals line +my @array = split(/[\t ]+/,$line); +for($x=0; $x<$k; $x++) { printf PCA ("%.04f\n",$array[$x+2]); } # x-th eval +while($line = ) +{ + chomp($line); + $line = " " . $line; + my @array = split(/[\t ]+/,$line); + $l = @array; + unless($l == 3+$k) { die("OOPS #evec in $evec is different from $k"); } + $sample = $array[1]; + for($x=0; $x<$k; $x++) { $evecarray{$sample}[$x] = $array[$x+2]; } + $found{$sample} = 1; +} + +# ----- Figure out which name convention to use +my $count1 = 0; +my $count2 = 0; +open(IND,$ind) || die("OOPS couldn't open indiv file $ind for reading"); +while ( my $line = ) { + chomp($line); + $line =~ s/^[\s]+//; # remove leading white-space + my @E = split(/[\s]+/,$line); + + my $s = $E[1]; + my $t = $E[0] . ":" . $E[1]; + + if ( exists $found{$s} ) { + $count1++; + } + if ( exists $found{$t} ) { + $count2++; + } +} +close(IND); + +open(IND,$ind) || die("OOPS couldn't open indiv file $ind for reading"); +while($line = ) +{ + chomp($line); + $line =~ s/^[\s]+//; + my @array = split(/[\s]+/,$line); + $sample = ($count1 >= $count2 ? $array[1] : $array[0] . ":" . $array[1]); + if($sample eq "") { $sample = $array[1]; } + unless($found{$sample}) + { + for($x=0; $x<$k; $x++) { $evecarray{$sample}[$x] = 0.0; } + } + for($x=0; $x<$k; $x++) + { + printf PCA (" "); + if($evecarray{$sample}[$x] > 0) { printf PCA (" "); } + printf PCA ("%.04f",$evecarray{$sample}[$x]); + } + printf PCA ("\n"); +} + + + diff -Nru eigensoft-6.1.2+dfsg/bin/evec2pca-ped.pl eigensoft-6.1.3+dfsg/bin/evec2pca-ped.pl --- eigensoft-6.1.2+dfsg/bin/evec2pca-ped.pl 2016-07-11 14:18:38.000000000 +0000 +++ eigensoft-6.1.3+dfsg/bin/evec2pca-ped.pl 1970-01-01 00:00:00.000000000 +0000 @@ -1,79 +0,0 @@ -#!/usr/bin/perl - -### translate .evec file to .pca file expected by eigenstrat program -### Note: .evec file does not contain entries for outliers -### .pca file does contain entries (set to all 0.0) for outliers - -# ----- This is a new version for PLINK input files. It differs from the -# ----- original in two ways. (1) The indiv name is in the second column -# ----- of the .fam file, but the first of a .ind file. (2) If the -# ----- indiv names are not found in the .evec file, try the -# ----- familyname:indivname combination. - -$k = $ARGV[0]; -$evec = $ARGV[1]; -$ind = $ARGV[2]; -$pca = $ARGV[3]; -open(EVEC,$evec) || die("OOPS couldn't open file $evec for reading"); -open(PCA,">$pca") || die("OOPS couldn't open file $pca for writing"); - -print PCA ("$k\n"); # number of output eigenvectors/eigenvalues -$line = ; chomp($line); # eigvals line -my @array = split(/[\t ]+/,$line); -for($x=0; $x<$k; $x++) { printf PCA ("%.04f\n",$array[$x+2]); } # x-th eval -while($line = ) -{ - chomp($line); - $line = " " . $line; - my @array = split(/[\t ]+/,$line); - $l = @array; - unless($l == 3+$k) { die("OOPS #evec in $evec is different from $k"); } - $sample = $array[1]; - for($x=0; $x<$k; $x++) { $evecarray{$sample}[$x] = $array[$x+2]; } - $found{$sample} = 1; -} - -# ----- Figure out which name convention to use -my $count1 = 0; -my $count2 = 0; -open(IND,$ind) || die("OOPS couldn't open indiv file $ind for reading"); -while ( my $line = ) { - chomp($line); - $line =~ s/^[\s]+//; # remove leading white-space - my @E = split(/[\s]+/,$line); - - my $s = $E[1]; - my $t = $E[0] . ":" . $E[1]; - - if ( exists $found{$s} ) { - $count1++; - } - if ( exists $found{$t} ) { - $count2++; - } -} -close(IND); - -open(IND,$ind) || die("OOPS couldn't open indiv file $ind for reading"); -while($line = ) -{ - chomp($line); - $line =~ s/^[\s]+//; - my @array = split(/[\s]+/,$line); - $sample = ($count1 >= $count2 ? $array[1] : $array[0] . ":" . $array[1]); - if($sample eq "") { $sample = $array[1]; } - unless($found{$sample}) - { - for($x=0; $x<$k; $x++) { $evecarray{$sample}[$x] = 0.0; } - } - for($x=0; $x<$k; $x++) - { - printf PCA (" "); - if($evecarray{$sample}[$x] > 0) { printf PCA (" "); } - printf PCA ("%.04f",$evecarray{$sample}[$x]); - } - printf PCA ("\n"); -} - - - diff -Nru eigensoft-6.1.2+dfsg/bin/evec2pca.perl eigensoft-6.1.3+dfsg/bin/evec2pca.perl --- eigensoft-6.1.2+dfsg/bin/evec2pca.perl 1970-01-01 00:00:00.000000000 +0000 +++ eigensoft-6.1.3+dfsg/bin/evec2pca.perl 2016-07-29 14:26:24.000000000 +0000 @@ -0,0 +1,52 @@ +#!/usr/bin/perl + +### translate .evec file to .pca file expected by eigenstrat program +### Note: .evec file does not contain entries for outliers +### .pca file does contain entries (set to all 0.0) for outliers + +$k = $ARGV[0]; +$evec = $ARGV[1]; +$ind = $ARGV[2]; +$pca = $ARGV[3]; + +open(EVEC,$evec) || die("OOPS couldn't open file $evec for reading"); +open(IND,$ind) || die("OOPS couldn't open indiv file $ind for reading"); +open(PCA,">$pca") || die("OOPS couldn't open file $pca for writing"); + +print PCA ("$k\n"); # number of output eigenvectors/eigenvalues +$line = ; chomp($line); # eigvals line +my @array = split(/[\t ]+/,$line); +for($x=0; $x<$k; $x++) { printf PCA ("%.04f\n",$array[$x+2]); } # x-th eval +while($line = ) +{ + chomp($line); + $line = " " . $line; + my @array = split(/[\t ]+/,$line); + $l = @array; + unless($l == 3+$k) { die("OOPS #evec in $evec is different from $k"); } + $sample = $array[1]; + for($x=0; $x<$k; $x++) { $evecarray{$sample}[$x] = $array[$x+2]; } + $found{$sample} = 1; +} + +while($line = ) +{ + chomp($line); + my @array = split(/[\t ]+/,$line); + $sample = $array[0]; + if($sample eq "") { $sample = $array[1]; } + unless($found{$sample}) + { + for($x=0; $x<$k; $x++) { $evecarray{$sample}[$x] = 0.0; } + } + for($x=0; $x<$k; $x++) + { + printf PCA (" "); + if($evecarray{$sample}[$x] > 0) { printf PCA (" "); } + printf PCA ("%.04f",$evecarray{$sample}[$x]); + } + printf PCA ("\n"); +} + + + diff -Nru eigensoft-6.1.2+dfsg/bin/evec2pca.pl eigensoft-6.1.3+dfsg/bin/evec2pca.pl --- eigensoft-6.1.2+dfsg/bin/evec2pca.pl 2016-07-11 14:18:38.000000000 +0000 +++ eigensoft-6.1.3+dfsg/bin/evec2pca.pl 1970-01-01 00:00:00.000000000 +0000 @@ -1,52 +0,0 @@ -#!/usr/bin/perl - -### translate .evec file to .pca file expected by eigenstrat program -### Note: .evec file does not contain entries for outliers -### .pca file does contain entries (set to all 0.0) for outliers - -$k = $ARGV[0]; -$evec = $ARGV[1]; -$ind = $ARGV[2]; -$pca = $ARGV[3]; - -open(EVEC,$evec) || die("OOPS couldn't open file $evec for reading"); -open(IND,$ind) || die("OOPS couldn't open indiv file $ind for reading"); -open(PCA,">$pca") || die("OOPS couldn't open file $pca for writing"); - -print PCA ("$k\n"); # number of output eigenvectors/eigenvalues -$line = ; chomp($line); # eigvals line -my @array = split(/[\t ]+/,$line); -for($x=0; $x<$k; $x++) { printf PCA ("%.04f\n",$array[$x+2]); } # x-th eval -while($line = ) -{ - chomp($line); - $line = " " . $line; - my @array = split(/[\t ]+/,$line); - $l = @array; - unless($l == 3+$k) { die("OOPS #evec in $evec is different from $k"); } - $sample = $array[1]; - for($x=0; $x<$k; $x++) { $evecarray{$sample}[$x] = $array[$x+2]; } - $found{$sample} = 1; -} - -while($line = ) -{ - chomp($line); - my @array = split(/[\t ]+/,$line); - $sample = $array[0]; - if($sample eq "") { $sample = $array[1]; } - unless($found{$sample}) - { - for($x=0; $x<$k; $x++) { $evecarray{$sample}[$x] = 0.0; } - } - for($x=0; $x<$k; $x++) - { - printf PCA (" "); - if($evecarray{$sample}[$x] > 0) { printf PCA (" "); } - printf PCA ("%.04f",$evecarray{$sample}[$x]); - } - printf PCA ("\n"); -} - - - diff -Nru eigensoft-6.1.2+dfsg/bin/gc.perl eigensoft-6.1.3+dfsg/bin/gc.perl --- eigensoft-6.1.2+dfsg/bin/gc.perl 1970-01-01 00:00:00.000000000 +0000 +++ eigensoft-6.1.3+dfsg/bin/gc.perl 2016-07-29 14:26:24.000000000 +0000 @@ -0,0 +1,76 @@ +#!/usr/bin/perl + +$P = $ARGV[0]; +$out = $ARGV[1]; + +# get data +$m=0; +open(P,"$P") || die("COF"); +while($line =

) { if($line =~ /Chisq/) { last; } } # header lines +while($line =

) +{ + chomp($line); + if($line =~ /NA/) + { + $chisq1[$m] = -100; + $chisq2[$m] = -100; + $m++; + next; + } + my @array = split(/[\t ]+/,$line); + $chisq1[$m] = $array[0]; # Chisq + $chisq2[$m] = $array[1]; # EIGENSTRAT + $m++; + $mvalid++; +} +close(P); +$nSNP = $m; +$nSNPvalid = $mvalid; + +# compute $lambda1 (Chisq) +$CHISQTHRESH = 0.456; +$step = 0.25; +$oktoreducestep = 0; +for($iter=0; $iter<20; $iter++) +{ + $mm = 0; + for($m=0; $m<$nSNP; $m++) + { + if($chisq1[$m] > $CHISQTHRESH) { $mm++; } + } + $frac = $mm/$nSNPvalid; # frac of SNPs exceeding CHISQTHRESH + if($frac > 0.5) { $CHISQTHRESH += $step; } + else { $CHISQTHRESH -= $step; $oktoreducestep = 1; } + if($oktoreducestep) { $step *= 0.5; } +} +$lambda1 = $CHISQTHRESH/0.456; # 0.456 is median if no inflation +if($lambda1 < 1) { $lambda1 = 1; } # not allowed to be less than 1 + +# compute $lambda2 (EIGENSTRAT) +$CHISQTHRESH = 0.456; +$step = 0.25; +$oktoreducestep = 1; +for($iter=0; $iter<20; $iter++) +{ + $mm = 0; + for($m=0; $m<$nSNP; $m++) + { + if($chisq2[$m] > $CHISQTHRESH) { $mm++; } + } + $frac = $mm/$nSNPvalid; # frac of SNPs exceeding CHISQTHRESH + if($frac > 0.5) { $CHISQTHRESH += $step; } + else { $CHISQTHRESH -= $step; $oktoreducestep = 1; } + if($oktoreducestep) { $step *= 0.5; } +} +$lambda2 = $CHISQTHRESH/0.456; # 0.456 is median if no inflation +if($lambda2 < 1) { $lambda2 = 1; } # not allowed to be less than 1 + +# output +open(OUT,">$out") || die("COF"); +print OUT ("Chisq EIGENSTRAT\n"); +printf OUT ("lambda=%.03f lambda=%.03f\n",$lambda1,$lambda2); +for($m=0; $m<$nSNP; $m++) +{ + if($chisq1[$m] < 0) { print OUT ("NA NA\n"); next; } + printf OUT ("%.04f %.04f\n",$chisq1[$m]/$lambda1,$chisq2[$m]/$lambda2); +} diff -Nru eigensoft-6.1.2+dfsg/bin/gc.pl eigensoft-6.1.3+dfsg/bin/gc.pl --- eigensoft-6.1.2+dfsg/bin/gc.pl 2016-07-11 14:18:38.000000000 +0000 +++ eigensoft-6.1.3+dfsg/bin/gc.pl 1970-01-01 00:00:00.000000000 +0000 @@ -1,76 +0,0 @@ -#!/usr/bin/perl - -$P = $ARGV[0]; -$out = $ARGV[1]; - -# get data -$m=0; -open(P,"$P") || die("COF"); -while($line =

) { if($line =~ /Chisq/) { last; } } # header lines -while($line =

) -{ - chomp($line); - if($line =~ /NA/) - { - $chisq1[$m] = -100; - $chisq2[$m] = -100; - $m++; - next; - } - my @array = split(/[\t ]+/,$line); - $chisq1[$m] = $array[0]; # Chisq - $chisq2[$m] = $array[1]; # EIGENSTRAT - $m++; - $mvalid++; -} -close(P); -$nSNP = $m; -$nSNPvalid = $mvalid; - -# compute $lambda1 (Chisq) -$CHISQTHRESH = 0.456; -$step = 0.25; -$oktoreducestep = 0; -for($iter=0; $iter<20; $iter++) -{ - $mm = 0; - for($m=0; $m<$nSNP; $m++) - { - if($chisq1[$m] > $CHISQTHRESH) { $mm++; } - } - $frac = $mm/$nSNPvalid; # frac of SNPs exceeding CHISQTHRESH - if($frac > 0.5) { $CHISQTHRESH += $step; } - else { $CHISQTHRESH -= $step; $oktoreducestep = 1; } - if($oktoreducestep) { $step *= 0.5; } -} -$lambda1 = $CHISQTHRESH/0.456; # 0.456 is median if no inflation -if($lambda1 < 1) { $lambda1 = 1; } # not allowed to be less than 1 - -# compute $lambda2 (EIGENSTRAT) -$CHISQTHRESH = 0.456; -$step = 0.25; -$oktoreducestep = 1; -for($iter=0; $iter<20; $iter++) -{ - $mm = 0; - for($m=0; $m<$nSNP; $m++) - { - if($chisq2[$m] > $CHISQTHRESH) { $mm++; } - } - $frac = $mm/$nSNPvalid; # frac of SNPs exceeding CHISQTHRESH - if($frac > 0.5) { $CHISQTHRESH += $step; } - else { $CHISQTHRESH -= $step; $oktoreducestep = 1; } - if($oktoreducestep) { $step *= 0.5; } -} -$lambda2 = $CHISQTHRESH/0.456; # 0.456 is median if no inflation -if($lambda2 < 1) { $lambda2 = 1; } # not allowed to be less than 1 - -# output -open(OUT,">$out") || die("COF"); -print OUT ("Chisq EIGENSTRAT\n"); -printf OUT ("lambda=%.03f lambda=%.03f\n",$lambda1,$lambda2); -for($m=0; $m<$nSNP; $m++) -{ - if($chisq1[$m] < 0) { print OUT ("NA NA\n"); next; } - printf OUT ("%.04f %.04f\n",$chisq1[$m]/$lambda1,$chisq2[$m]/$lambda2); -} diff -Nru eigensoft-6.1.2+dfsg/bin/smarteigenstrat.perl eigensoft-6.1.3+dfsg/bin/smarteigenstrat.perl --- eigensoft-6.1.2+dfsg/bin/smarteigenstrat.perl 1970-01-01 00:00:00.000000000 +0000 +++ eigensoft-6.1.3+dfsg/bin/smarteigenstrat.perl 2016-07-29 14:26:24.000000000 +0000 @@ -0,0 +1,76 @@ +#!/usr/bin/perl + +# perl wrapper for smarteigenstrat program. Run smarteigenstrat.perl with no options for usage + +use Getopt::Std; + +my @flaglist = ("i","a","b","o","p","q","l","k"); +$x = @ARGV; +for($n=0;$n<$x;$n++) { + foreach $flag (@flaglist) { + if ($ARGV[$n] eq "-$flag") { + $specified{$flag} = 1; + } + } +} + +# check for mandatory options +foreach $flag ("i","a","b","p","o","l") { + unless ($specified{$flag}) { + usage(); + die("Error: -$flag not specified"); + } +} + +# get opts from hash +getopts('i:a:b:p:o:q:k:l:', \%opts); +$genofilename = $opts{"i"}; +$indfilename = $opts{"b"}; +$snpfilename = $opts{"a"}; +$pcafilename = $opts{"p"}; +$outfilename = $opts{"o"}; +$logfilename = $opts{"l"}; +$qtmode = "NO"; +if ( $specified{"q"} ) { + $qtmode = $opts{"q"}; +} +$k = 1; +if ( $specified{"k"} ) { + $k = $opts{"k"}; +} + +# write parameter file +open(PAR, ">$outfilename.par") || die("Error: unable to open $outfilename.par\n"); +print PAR "genotypename: $genofilename\n"; +print PAR "snpname: $snpfilename\n"; +print PAR "indivname: $indfilename\n"; +print PAR "pcaname: $pcafilename\n"; +print PAR "outputname: $outfilename\n"; +print PAR "numpc: $k\n"; +print PAR "qtmode: $qtmode\n"; +close(PAR); + +# run smarteigenstrat +$cmd = "smarteigenstrat -p $outfilename.par >$logfilename"; +print "$cmd\n"; +system($cmd); + +sub usage { + print "smarteigenstrat.perl -i -a -b -p -o "; + print " -l -k -q "; + print "\n"; + print "-i genotype file (PED, PACKEDPED, EIGENSTRAT, ANCESTRYMAP or PACKEDANCESTRYMAP format)"; + print "-o output file (chisq)\n"; + print "-l logfile (screen output,including error messages)\n"; + print "-q YES for quantitative phenotype or NO otherwise\n"; + print "\n"; + print "For quantitative phenotype, sixth column of .ped file or third column of EIGENSTRAT .ind file\n"; + print "should be real numbers. For non-quantitative phenotype, sixth column of .ped or third column\n"; + print "should be 'Case' or 'Control'\n"; +} + + + + + + diff -Nru eigensoft-6.1.2+dfsg/bin/smarteigenstrat.pl eigensoft-6.1.3+dfsg/bin/smarteigenstrat.pl --- eigensoft-6.1.2+dfsg/bin/smarteigenstrat.pl 2016-07-11 14:18:38.000000000 +0000 +++ eigensoft-6.1.3+dfsg/bin/smarteigenstrat.pl 1970-01-01 00:00:00.000000000 +0000 @@ -1,76 +0,0 @@ -#!/usr/bin/perl - -# perl wrapper for smarteigenstrat program. Run smarteigenstrat.perl with no options for usage - -use Getopt::Std; - -my @flaglist = ("i","a","b","o","p","q","l","k"); -$x = @ARGV; -for($n=0;$n<$x;$n++) { - foreach $flag (@flaglist) { - if ($ARGV[$n] eq "-$flag") { - $specified{$flag} = 1; - } - } -} - -# check for mandatory options -foreach $flag ("i","a","b","p","o","l") { - unless ($specified{$flag}) { - usage(); - die("Error: -$flag not specified"); - } -} - -# get opts from hash -getopts('i:a:b:p:o:q:k:l:', \%opts); -$genofilename = $opts{"i"}; -$indfilename = $opts{"b"}; -$snpfilename = $opts{"a"}; -$pcafilename = $opts{"p"}; -$outfilename = $opts{"o"}; -$logfilename = $opts{"l"}; -$qtmode = "NO"; -if ( $specified{"q"} ) { - $qtmode = $opts{"q"}; -} -$k = 1; -if ( $specified{"k"} ) { - $k = $opts{"k"}; -} - -# write parameter file -open(PAR, ">$outfilename.par") || die("Error: unable to open $outfilename.par\n"); -print PAR "genotypename: $genofilename\n"; -print PAR "snpname: $snpfilename\n"; -print PAR "indivname: $indfilename\n"; -print PAR "pcaname: $pcafilename\n"; -print PAR "outputname: $outfilename\n"; -print PAR "numpc: $k\n"; -print PAR "qtmode: $qtmode\n"; -close(PAR); - -# run smarteigenstrat -$cmd = "smarteigenstrat -p $outfilename.par >$logfilename"; -print "$cmd\n"; -system($cmd); - -sub usage { - print "smarteigenstrat.perl -i -a -b -p -o "; - print " -l -k -q "; - print "\n"; - print "-i genotype file (PED, PACKEDPED, EIGENSTRAT, ANCESTRYMAP or PACKEDANCESTRYMAP format)"; - print "-o output file (chisq)\n"; - print "-l logfile (screen output,including error messages)\n"; - print "-q YES for quantitative phenotype or NO otherwise\n"; - print "\n"; - print "For quantitative phenotype, sixth column of .ped file or third column of EIGENSTRAT .ind file\n"; - print "should be real numbers. For non-quantitative phenotype, sixth column of .ped or third column\n"; - print "should be 'Case' or 'Control'\n"; -} - - - - - - diff -Nru eigensoft-6.1.2+dfsg/bin/smartpca.perl eigensoft-6.1.3+dfsg/bin/smartpca.perl --- eigensoft-6.1.2+dfsg/bin/smartpca.perl 1970-01-01 00:00:00.000000000 +0000 +++ eigensoft-6.1.3+dfsg/bin/smartpca.perl 2016-07-29 14:26:24.000000000 +0000 @@ -0,0 +1,156 @@ +#!/usr/bin/perl + +use Getopt::Std ; +use File::Basename ; + +### process flags +# -w poplist is compute eigenvectors using populations in poplist, then project +# -y poplistplot is use populations in poplistplot for plot +# -z badsnpfile is use badsnpname: badsnpfile in call to smartpca +my @flaglist = ("i","a","b","k","o","p","e","l","m","q","t","s","w","y","z"); +$x = @ARGV; +for($n=0; $n<$x; $n++) +{ + foreach $flag (@flaglist) + { + if($ARGV[$n] eq "-$flag") { $specified{$flag} = 1; } + } +} +foreach $flag ("i","a","b","o","p","e","l") +{ + unless($specified{$flag}) { die("OOPS -$flag flag not specified"); } +} +getopts('i:a:b:k:o:p:e:l:m:t:s:w:y:z:q:',\%opts); +$i = $opts{"i"}; +$a = $opts{"a"}; +$b = $opts{"b"}; +$k = 10; if($specified{"k"}) { $k = $opts{"k"}; } +$o = $opts{"o"}; +$q = 0; if($specified{"q"}) { $q = $opts{"q"}; } +$p = $opts{"p"}; +$e = $opts{"e"}; +$l = $opts{"l"}; +$m = 5; if($specified{"m"}) { $m = $opts{"m"}; } +$t = 10; if($specified{"t"}) { $t = $opts{"t"}; } +$s = 6.0; if($specified{"s"}) { $s = $opts{"s"}; } +if($specified{"w"}) { $w = $opts{"w"}; } +if($specified{"y"}) { $y = $opts{"y"}; } +if($specified{"z"}) { $z = $opts{"z"}; } + +### run smartpca +$parfile = "$o.par"; +$evec = "$o.evec"; +open(PARFILE,">$parfile") || die("OOPS couldn't open file $parfile for writing"); +print PARFILE ("genotypename: $i\n"); +print PARFILE ("snpname: $a\n"); +print PARFILE ("indivname: $b\n"); +print PARFILE ("evecoutname: $evec\n"); +print PARFILE ("evaloutname: $e\n"); +print PARFILE ("altnormstyle: NO\n"); +print PARFILE ("numoutevec: $k\n"); +print PARFILE ("numoutlieriter: $m\n"); +print PARFILE ("numoutlierevec: $t\n"); +print PARFILE ("outliersigmathresh: $s\n"); +print PARFILE ("qtmode: $q\n"); +if($specified{"w"}) { print PARFILE ("poplistname: $w\n"); } +if($specified{"z"}) { print PARFILE ("badsnpname: $z\n"); } +close(PARFILE); +$command = "smartpca"; # MUST put bin directory in path +$command .= " -p $parfile >$l"; +print("$command\n"); +system("$command"); + +### make string of populations for ploteig +$popstring = ""; +open(EVEC,$evec) || die("OOPS couldn't open file $evec for reading"); +while($line = ) +{ + chomp($line); + my @array = split(/[\t ]+/,$line); + $x = @array; + if($array[1] =~ /eigvals/) { next; } # eigvals header line + $pop = $array[$x-1]; + if($popfound{$pop}) { next; } + $popstring = $popstring . "$pop:"; + $popfound{$pop} = 1; +} +close(EVEC); +chop($popstring); # remove last ":" + +if($specified{"y"}) +{ + ### make string of populations for ploteig based on -y flag input + $popstring = ""; + open(Y,$y) || die("COF"); + while($line = ) + { + chomp($line); + $popstring .= "$line:"; + } + chop($popstring); +} + +### cax ploteig +$command = "ploteig"; # MUST put bin directory in path +$command .= " -i $evec"; +$command .= " -c 1:2 "; +$command .= " -p $popstring "; +$command .= " -x "; +$command .= " -y "; +$command .= " -o $p.xtxt "; # must end in .xtxt +print("$command\n"); +system("$command"); + +### translate .evec file to .pca file expected by eigenstrat program +### Note: .evec file does not contain entries for outliers +### .pca file does contain entries (set to all 0.0) for outliers + +# ----- If this looks like a PLINK run, call the PLINK kludge +if ( $i =~ m/\.ped$/ || $i =~ m/\.PED/ ) { + $command = "evec2pca-ped.perl $k $evec $b $o"; +} +else { + $command = "evec2pca.perl $k $evec $b $o"; +} +print("$command\n"); +system("$command"); + +### If labels are Case and Control only, compute correlations between +### Case/Control status and each eigenvector. Append to logfile. +if(($popstring eq "Case:Control") || ($popstring eq "Control:Case")) +{ + open(LOG,">>$l") || die("OOPS couldn't open file $l for appending"); + print LOG ("\n"); + for($x=0; $x<$k; $x++) # compute correlation for evec $x + { + open(EVEC,$evec) || die("OOPS couldn't open file $evec for reading"); + $sum1=0; $sumx=0; $sumxx=0; $sumy=0; $sumyy=0; $sumxy=0; + $line = ; chomp($line); # eigvals line + while($line = ) + { + chomp($line); + my @array = split(/[\t ]+/,$line); + $this = $array[2+$x]; + $sumy += $this; + $sumyy += $this*$this; + $sum1 += 1; + if($line =~ /Case/) # Case is 1, Control is 0 + { + $sumx += 1; + $sumxx += 1; + $sumxy += $this; + } + } + close(EVEC); + $meanx = $sumx/$sum1; + $meany = $sumy/$sum1; + if($sum1 == 0) { next; } + $sdevx = sqrt($sumxx/$sum1 - $meanx*$meanx); + $sdevy = sqrt($sumyy/$sum1 - $meany*$meany); + if($sdevx * $sdevy == 0) { next; } + $corr = ($sumxy/$sum1) / ($sdevx*$sdevy); + $x1 = $x+1; + printf LOG ("Correlation between eigenvector $x1 (of $k) and Case/Control status is %.03f\n",$corr); + } + close(LOG); +} diff -Nru eigensoft-6.1.2+dfsg/bin/smartpca.pl eigensoft-6.1.3+dfsg/bin/smartpca.pl --- eigensoft-6.1.2+dfsg/bin/smartpca.pl 2016-07-11 14:18:38.000000000 +0000 +++ eigensoft-6.1.3+dfsg/bin/smartpca.pl 1970-01-01 00:00:00.000000000 +0000 @@ -1,156 +0,0 @@ -#!/usr/bin/perl - -use Getopt::Std ; -use File::Basename ; - -### process flags -# -w poplist is compute eigenvectors using populations in poplist, then project -# -y poplistplot is use populations in poplistplot for plot -# -z badsnpfile is use badsnpname: badsnpfile in call to smartpca -my @flaglist = ("i","a","b","k","o","p","e","l","m","q","t","s","w","y","z"); -$x = @ARGV; -for($n=0; $n<$x; $n++) -{ - foreach $flag (@flaglist) - { - if($ARGV[$n] eq "-$flag") { $specified{$flag} = 1; } - } -} -foreach $flag ("i","a","b","o","p","e","l") -{ - unless($specified{$flag}) { die("OOPS -$flag flag not specified"); } -} -getopts('i:a:b:k:o:p:e:l:m:t:s:w:y:z:q:',\%opts); -$i = $opts{"i"}; -$a = $opts{"a"}; -$b = $opts{"b"}; -$k = 10; if($specified{"k"}) { $k = $opts{"k"}; } -$o = $opts{"o"}; -$q = 0; if($specified{"q"}) { $q = $opts{"q"}; } -$p = $opts{"p"}; -$e = $opts{"e"}; -$l = $opts{"l"}; -$m = 5; if($specified{"m"}) { $m = $opts{"m"}; } -$t = 10; if($specified{"t"}) { $t = $opts{"t"}; } -$s = 6.0; if($specified{"s"}) { $s = $opts{"s"}; } -if($specified{"w"}) { $w = $opts{"w"}; } -if($specified{"y"}) { $y = $opts{"y"}; } -if($specified{"z"}) { $z = $opts{"z"}; } - -### run smartpca -$parfile = "$o.par"; -$evec = "$o.evec"; -open(PARFILE,">$parfile") || die("OOPS couldn't open file $parfile for writing"); -print PARFILE ("genotypename: $i\n"); -print PARFILE ("snpname: $a\n"); -print PARFILE ("indivname: $b\n"); -print PARFILE ("evecoutname: $evec\n"); -print PARFILE ("evaloutname: $e\n"); -print PARFILE ("altnormstyle: NO\n"); -print PARFILE ("numoutevec: $k\n"); -print PARFILE ("numoutlieriter: $m\n"); -print PARFILE ("numoutlierevec: $t\n"); -print PARFILE ("outliersigmathresh: $s\n"); -print PARFILE ("qtmode: $q\n"); -if($specified{"w"}) { print PARFILE ("poplistname: $w\n"); } -if($specified{"z"}) { print PARFILE ("badsnpname: $z\n"); } -close(PARFILE); -$command = "smartpca"; # MUST put bin directory in path -$command .= " -p $parfile >$l"; -print("$command\n"); -system("$command"); - -### make string of populations for ploteig -$popstring = ""; -open(EVEC,$evec) || die("OOPS couldn't open file $evec for reading"); -while($line = ) -{ - chomp($line); - my @array = split(/[\t ]+/,$line); - $x = @array; - if($array[1] =~ /eigvals/) { next; } # eigvals header line - $pop = $array[$x-1]; - if($popfound{$pop}) { next; } - $popstring = $popstring . "$pop:"; - $popfound{$pop} = 1; -} -close(EVEC); -chop($popstring); # remove last ":" - -if($specified{"y"}) -{ - ### make string of populations for ploteig based on -y flag input - $popstring = ""; - open(Y,$y) || die("COF"); - while($line = ) - { - chomp($line); - $popstring .= "$line:"; - } - chop($popstring); -} - -### cax ploteig -$command = "ploteig"; # MUST put bin directory in path -$command .= " -i $evec"; -$command .= " -c 1:2 "; -$command .= " -p $popstring "; -$command .= " -x "; -$command .= " -y "; -$command .= " -o $p.xtxt "; # must end in .xtxt -print("$command\n"); -system("$command"); - -### translate .evec file to .pca file expected by eigenstrat program -### Note: .evec file does not contain entries for outliers -### .pca file does contain entries (set to all 0.0) for outliers - -# ----- If this looks like a PLINK run, call the PLINK kludge -if ( $i =~ m/\.ped$/ || $i =~ m/\.PED/ ) { - $command = "evec2pca-ped.perl $k $evec $b $o"; -} -else { - $command = "evec2pca.perl $k $evec $b $o"; -} -print("$command\n"); -system("$command"); - -### If labels are Case and Control only, compute correlations between -### Case/Control status and each eigenvector. Append to logfile. -if(($popstring eq "Case:Control") || ($popstring eq "Control:Case")) -{ - open(LOG,">>$l") || die("OOPS couldn't open file $l for appending"); - print LOG ("\n"); - for($x=0; $x<$k; $x++) # compute correlation for evec $x - { - open(EVEC,$evec) || die("OOPS couldn't open file $evec for reading"); - $sum1=0; $sumx=0; $sumxx=0; $sumy=0; $sumyy=0; $sumxy=0; - $line = ; chomp($line); # eigvals line - while($line = ) - { - chomp($line); - my @array = split(/[\t ]+/,$line); - $this = $array[2+$x]; - $sumy += $this; - $sumyy += $this*$this; - $sum1 += 1; - if($line =~ /Case/) # Case is 1, Control is 0 - { - $sumx += 1; - $sumxx += 1; - $sumxy += $this; - } - } - close(EVEC); - $meanx = $sumx/$sum1; - $meany = $sumy/$sum1; - if($sum1 == 0) { next; } - $sdevx = sqrt($sumxx/$sum1 - $meanx*$meanx); - $sdevy = sqrt($sumyy/$sum1 - $meany*$meany); - if($sdevx * $sdevy == 0) { next; } - $corr = ($sumxy/$sum1) / ($sdevx*$sdevy); - $x1 = $x+1; - printf LOG ("Correlation between eigenvector $x1 (of $k) and Case/Control status is %.03f\n",$corr); - } - close(LOG); -} diff -Nru eigensoft-6.1.2+dfsg/debian/changelog eigensoft-6.1.3+dfsg/debian/changelog --- eigensoft-6.1.2+dfsg/debian/changelog 2016-07-21 06:49:12.000000000 +0000 +++ eigensoft-6.1.3+dfsg/debian/changelog 2016-08-05 21:40:38.000000000 +0000 @@ -1,3 +1,10 @@ +eigensoft (6.1.3+dfsg-1) unstable; urgency=medium + + * New upstream version + * Upstream has changed extensions from .pl to .perl + + -- Andreas Tille Fri, 05 Aug 2016 23:40:38 +0200 + eigensoft (6.1.2+dfsg-1) unstable; urgency=medium [ Andreas Tille ] diff -Nru eigensoft-6.1.2+dfsg/debian/install eigensoft-6.1.3+dfsg/debian/install --- eigensoft-6.1.2+dfsg/debian/install 2016-07-21 06:49:12.000000000 +0000 +++ eigensoft-6.1.3+dfsg/debian/install 2016-08-05 21:40:38.000000000 +0000 @@ -1,6 +1,6 @@ bin/smarteigenstrat usr/lib/eigensoft bin/smartpca usr/lib/eigensoft -bin/gc.pl usr/lib/debian-med/bin +bin/gc.perl usr/lib/debian-med/bin bin/* usr/bin CONVERTF usr/share/eigensoft EIGENSTRAT usr/share/eigensoft diff -Nru eigensoft-6.1.2+dfsg/debian/links eigensoft-6.1.3+dfsg/debian/links --- eigensoft-6.1.2+dfsg/debian/links 2016-07-21 06:49:12.000000000 +0000 +++ eigensoft-6.1.3+dfsg/debian/links 2016-08-05 21:40:38.000000000 +0000 @@ -1 +1 @@ -usr/lib/debian-med/bin/gc.pl usr/bin/gc-eigensoft +usr/lib/debian-med/bin/gc.perl usr/bin/gc-eigensoft diff -Nru eigensoft-6.1.2+dfsg/debian/rules eigensoft-6.1.3+dfsg/debian/rules --- eigensoft-6.1.2+dfsg/debian/rules 2016-07-21 06:49:12.000000000 +0000 +++ eigensoft-6.1.3+dfsg/debian/rules 2016-08-05 21:40:38.000000000 +0000 @@ -28,7 +28,7 @@ find debian/eigensoft/usr/share/ -type f -empty -delete # file consists only from single line with wrong interpreter find debian/eigensoft/usr/share/ -type f -name HGDP.X.perl -delete - # Fix wrappers and drop *.pl extension in /usr/bin while keeping + # Fix wrappers and drop *.perl extension in /usr/bin while keeping # the original name in /usr/lib/debian-med/bin mkdir -p debian/$(DEBPKGNAME)/usr/lib/debian-med/bin for pl in $(WRAPPERS) ; do \ @@ -38,11 +38,11 @@ -e "s?\(cmd[[:space:]]*=[[:space:]]*\"\)\($${pl}\)?\1/usr/lib/$(DEBPKGNAME)/\2?" \ -e "s?\(cmd[[:space:]]*=[[:space:]]*\"\)\(evec2pca.*\)\.perl?\1?" \ -e "s?\($${pl}\)\.perl?\1?" \ - debian/$(DEBPKGNAME)/usr/bin/$${pl}.pl ; \ - cp -a debian/$(DEBPKGNAME)/usr/bin/$${pl}.pl debian/$(DEBPKGNAME)/usr/bin/$${pl} ; \ - mv debian/$(DEBPKGNAME)/usr/bin/$${pl}.pl debian/$(DEBPKGNAME)/usr/lib/debian-med/bin ; \ + debian/$(DEBPKGNAME)/usr/bin/$${pl}.perl ; \ + cp -a debian/$(DEBPKGNAME)/usr/bin/$${pl}.perl debian/$(DEBPKGNAME)/usr/bin/$${pl} ; \ + mv debian/$(DEBPKGNAME)/usr/bin/$${pl}.perl debian/$(DEBPKGNAME)/usr/lib/debian-med/bin ; \ done - rm debian/$(DEBPKGNAME)/usr/bin/gc.pl + rm debian/$(DEBPKGNAME)/usr/bin/gc.perl override_dh_fixperms: dh_fixperms diff -Nru eigensoft-6.1.2+dfsg/.gitignore eigensoft-6.1.3+dfsg/.gitignore --- eigensoft-6.1.2+dfsg/.gitignore 1970-01-01 00:00:00.000000000 +0000 +++ eigensoft-6.1.3+dfsg/.gitignore 2016-07-29 14:26:24.000000000 +0000 @@ -0,0 +1,32 @@ +# Object files +*.o +*.ko +*.obj +*.elf + +# Precompiled Headers +*.gch +*.pch + +# Libraries +*.lib +*.a +*.la +*.lo + +# Shared objects (inc. Windows DLLs) +*.dll +*.so +*.so.* +*.dylib + +# Executables +*.exe +*.out +*.app +*.i*86 +*.x86_64 +*.hex + +# Debug files +*.dSYM/ diff -Nru eigensoft-6.1.2+dfsg/README eigensoft-6.1.3+dfsg/README --- eigensoft-6.1.2+dfsg/README 2016-07-11 14:18:38.000000000 +0000 +++ eigensoft-6.1.3+dfsg/README 2016-07-29 14:26:24.000000000 +0000 @@ -1,10 +1,13 @@ -EIGENSOFT version 6.1.2, 6/27/16 (for Linux only) +EIGENSOFT version 6.1.3, 7/29/16 (for Linux only) The EIGENSOFT package implements methods from the following 3 papers: Patterson et al. 2006 PLoS Genet (population structure) Price et al. 2006 Nat Genet (EIGENSTRAT stratification correction) Galinsky et al. 2016 Am J Hum Genet (FastPCA and PC-based selection statistic) +NEW features of EIGENSOFT version 6.1.3 include: +-- Restored script file extensions to .perl instead of .pl + NEW features of EIGENSOFT version 6.1.2 include: -- Updated license info to be GPL compliant required by linking the GSL