--- muscle-3.8.31.orig/debian/muscle.manpages +++ muscle-3.8.31/debian/muscle.manpages @@ -0,0 +1 @@ +debian/muscle.1 --- muscle-3.8.31.orig/debian/muscle.install +++ muscle-3.8.31/debian/muscle.install @@ -0,0 +1 @@ +muscle /usr/bin --- muscle-3.8.31.orig/debian/dirs +++ muscle-3.8.31/debian/dirs @@ -0,0 +1 @@ +usr/bin --- muscle-3.8.31.orig/debian/watch +++ muscle-3.8.31/debian/watch @@ -0,0 +1,2 @@ +version=3 +http://www.drive5.com/muscle/downloads.htm http://www.drive5.com/muscle/downloads.+?/muscle(.*)_src\.tar\.gz --- muscle-3.8.31.orig/debian/README.source +++ muscle-3.8.31/debian/README.source @@ -0,0 +1,3 @@ +The original upstream archive have been repacked to remove the muscle21 +program, for which it was unsure if source was provided. To simplify building, +the contents of the src directory were moved to the root. --- muscle-3.8.31.orig/debian/muscle.docs +++ muscle-3.8.31/debian/muscle.docs @@ -0,0 +1 @@ +debian/muscle.html --- muscle-3.8.31.orig/debian/control +++ muscle-3.8.31/debian/control @@ -0,0 +1,26 @@ +Source: muscle +Section: science +Priority: optional +Maintainer: Debian Med Packaging Team +DM-Upload-Allowed: yes +Uploaders: Steffen Moeller , + Charles Plessy +Build-Depends: debhelper (>= 8), cdbs +Standards-Version: 3.9.2 +Vcs-Browser: http://svn.debian.org/wsvn/debian-med/trunk/packages/muscle +Vcs-Svn: svn://svn.debian.org/svn/debian-med/trunk/packages/muscle/trunk/ +Homepage: http://www.drive5.com/muscle/ + +Package: muscle +Architecture: any +Depends: ${shlibs:Depends}, ${misc:Depends} +Conflicts: muscle-doc +Replaces: muscle-doc +Provides: muscle-doc +Enhances: seaview, t-coffee +Description: Multiple alignment program of protein sequences + MUSCLE is a multiple alignment program for protein sequences. MUSCLE + stands for multiple sequence comparison by log-expectation. In the + authors tests, MUSCLE achieved the highest scores of all tested + programs on several alignment accuracy benchmarks, and is also one of + the fastest programs out there. --- muscle-3.8.31.orig/debian/changelog +++ muscle-3.8.31/debian/changelog @@ -0,0 +1,101 @@ +muscle (1:3.8.31-1) unstable; urgency=low + + [ Charles Plessy ] + * New upstream release (Closes: #643443). + * debian/control: Enhances: t-coffee. + * Changed the doc-base section according to the new policy. + * Updated my email address. + * Updated debian/watch to new version scheme and download location. + * Repack usptream archive and implemented a get-orig-source target + (debian/rules, debian/README.source, debian/copyright). + * Use Debhelper 8 (debian/control, debian/compat). + * Build directly from debian/rules targets. + * Corrected VCS URLs in debian/control. + * Conforms to Debian Policy 3.9.2 (debian/control, no changes needed). + + [ David Paleino ] + * removed myself from Uploaders (debian/control). + + -- Charles Plessy Sun, 13 Nov 2011 18:38:06 +0900 + +muscle (3.70+fix1-2) unstable; urgency=low + + * debian/control Conflicts: and Replaces: muscle-doc (Closes: #465607) + + -- Charles Plessy Thu, 14 Feb 2008 10:44:17 +0900 + +muscle (3.70+fix1-1) unstable; urgency=low + + [ Charles Plessy ] + * New upstream version, buildable with GCC 4.3 (Closes: #462707) + The version number was not increased upstream when the sources were + changed. We name this new version in Debian "3.70+fix1". + * Updated manual page. + * Converted the source package to CDBS, dropped Makefile patch. + * Fused muscle and muscle-doc. + + [ Nelson A. de Oliveira ] + * Fixed watch file (Closes: #462827) + + -- Charles Plessy Wed, 06 Feb 2008 12:04:31 +0900 + +muscle (3.70-1) unstable; urgency=low + + [ Charles Plessy ] + * New upstream release (bugfixes plus undocumented new features). + * debian/control: + - Add Subversion repository. + - Swiched to quilt. + - Enhaces: seaview because SeaView can call muscle to re-align sequences. + - Moved the Homepage: field out from the package's description. + - Using debhelper 5. + - Removed [Biology] from package description as there are Debtags now. + - Checked that muscle conforms to Policy 3.7.3. + - Updated Steffen's email address. + * Handling nostrip build option (policy 10.1) (Closes: #437599). + * Updated manpage. + * debian/copyright made machine-readable. + + [ Nelson A. de Oliveira ] + * Added watch file. + + [ David Paleino ] + * debian/manpage.xml moved to debian/muscle.1.xml + * debian/muscle.1 added - statically built + * debian/manpages removed - passing arguments to dh_installman + directly + * debian/control: + - B-D updated (see above) + - added myself to Uploaders + - moved XS-Vcs-* to Vcs-* + * debian/rules: + - manpages statically built + - minor changes + + -- Charles Plessy Sat, 12 Jan 2008 16:55:48 +0900 + +muscle (3.60-1) unstable; urgency=low + + * New upstram release (Closes: Bug#361742). + * New maintainers email addresses. + + -- Charles Plessy Sat, 5 Aug 2006 09:57:27 +0900 + +muscle (3.52-2) unstable; urgency=low + + * Added missing build dependencies (Closes: Bug#287684). + + -- Steffen Moeller Wed, 29 Dec 2004 21:50:47 +0200 + +muscle (3.52-1) unstable; urgency=low + + * New upstream version. + * Fix build on arch other than Pentium (Closes: Bug#285000). + + -- Steffen Moeller Sun, 18 Dec 2004 00:06:00 +0200 + +muscle (3.51-1) unstable; urgency=low + + * Initial Release (Closes: Bug#280411). + + -- Steffen Moeller Sun, 19 Sep 2004 00:51:19 +0200 --- muscle-3.8.31.orig/debian/copyright +++ muscle-3.8.31/debian/copyright @@ -0,0 +1,24 @@ +Format: http://dep.debian.net/deps/dep5/ +Source: http://www.drive5.com/muscle/downloads3.8.31/muscle3.8.31_src.tar.gz +Comment: This release contains a potentially sourceless binary file, muscle21, that was removed. + +Files: * +Copyright: © Robert C. Edgar "Bob" +License: PD-dedication + MUSCLE is public domain software + The MUSCLE software, including object and source code, is hereby donated + to the public domain. + . + Disclaimer of warranty + THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER + EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + +Files: debian/* +Copyright: © 2004 Steffen Moeller + © 2007 Nelson A. de Oliveira + © 2007 David Paleino + © 2006-2008 Charles Plessy +License: PD-dedication + Please treat this work as if it were in public domain. + --- muscle-3.8.31.orig/debian/muscle.1 +++ muscle-3.8.31/debian/muscle.1 @@ -0,0 +1,135 @@ +.\" Title: MUSCLE +.\" Author: Robert Elgar +.\" Generator: DocBook XSL Stylesheets v1.73.2 +.\" Date: 02/06/2008 +.\" Manual: Muscle Manual +.\" Source: muscle 3.7 +.\" +.TH "MUSCLE" "1" "02/06/2008" "muscle 3.7" "Muscle Manual" +.\" disable hyphenation +.nh +.\" disable justification (adjust text to left margin only) +.ad l +.SH "NAME" +muscle - Multiple Protein Sequence Alignment +.SH "SYNOPSIS" +.HP 7 +\fBmuscle\fR \fB\-in\ \fR\fB\fIinput\ file\ (fasta)\fR\fR [\fB\-out\ \fR\fB\fIoutput\ file\ (default\ fasta)\fR\fR] [\fB\-diags\fR] [\fB\-log\ \fR\fB\fIlog\ file\fR\fR] [\fB\-maxiters\ \fR\fB\fIn\fR\fR] [\fB\-maxhours\ \fR\fB\fIn\fR\fR] [\fB\-maxmb\ \fR\fB\fIm\fR\fR] [\fB\-html\fR] [\fB\-msf\fR] [\fB\-clw\fR] [\fB\-clwstrict\fR] [\fB\-log[a]\ \fR\fB\fIlogfile\fR\fR] [\fB\-quiet\fR] [\fB\-stable\fR] [\fB\-group\fR] [\fB\-version\fR] +.SH "DESCRIPTION" +.PP +This manual page documents briefly the +\fBmuscle\fR +command\. +.PP +\fBmuscle\fR +aligns protein sequences and is considered superior and faster than Clustal\ W\. +.SH "OPTIONS" +.PP +\fB\-in \fR\fB\fIinput file\fR\fR +.RS 4 +Path to FASTA formatted input file +.RE +.PP +\fB\-out \fR\fB\fIoutput file\fR\fR +.RS 4 +Path to output file, FASTA formatted by default +.RE +.PP +\fB\-diags\fR +.RS 4 +Find diagonals (faster for similar sequences) +.RE +.PP +\fB\-maxiters \fR\fB\fIn\fR\fR +.RS 4 +Maximum number of iterations (integer, default 16) +.RE +.PP +\fB\-maxhours \fR\fB\fIn\fR\fR +.RS 4 +Maximum time to iterate in hours (default no limit) +.RE +.PP +\fB\-maxmb \fR\fB\fIm\fR\fR +.RS 4 +Maximum memory to allocate in Mb (default 80% of RAM) +.RE +.PP +\fB\-html\fR +.RS 4 +Write output in HTML format (default FASTA) +.RE +.PP +\fB\-msf\fR +.RS 4 +Write output in MSF format (default FASTA) +.RE +.PP +\fB\-clw\fR +.RS 4 +Write output in Clustal\ W format (default FASTA) +.RE +.PP +\fB\-clwstrict\fR +.RS 4 +As \-clw, with \'CLUSTAL W (1\.81)\' header +.RE +.PP +\fB\-log[a] \fR\fB\fIlogfile\fR\fR +.RS 4 +Log to file (append if \-loga, overwrite if \-log) +.RE +.PP +\fB\-quiet\fR +.RS 4 +Do not write progress messages to stderr +.RE +.PP +\fB\-stable\fR +.RS 4 +Output sequences in input order (default is \-group) +.RE +.PP +\fB\-group\fR +.RS 4 +Group sequences by similarity (this is the default) +.RE +.PP +\fB\-version\fR +.RS 4 +Display version information and exit +.RE +.SH "SEE ALSO" +.PP + +\fBclustalw\fR(1), +\fBseaview\fR(1), +\fBt_coffee\fR(1)\. +.SH "AUTHORS" +.PP +\fBRobert Elgar\fR +.sp -1n +.IP "" 4 +Wrote Muscle\. +.PP +\fBSteffen Moeller\fR <\&moeller@debian\.org\&> +.sp -1n +.IP "" 4 +Wrote this manpage\. +.PP +\fBCharles Plessy\fR <\&charles\-debian\-nospam@plessy\.org\&> +.sp -1n +.IP "" 4 +Updated this manpage\. +.SH "COPYRIGHT" +Copyright \(co 2003, 2004 Steffen Moeller (manpage) +.br +Copyright \(co 2007, 2008 Charles Plessy (manpage) +.br +.PP +Muscle is in the public domain, and therefore not subjected to copyright\. +.PP +This manual page was written by Steffen Moeller moeller@debian\.org for the +Debian(TM) +system (but may be used by others)\. Permission is granted to copy, distribute and/or modify this document as if it were in public domain\. +.sp --- muscle-3.8.31.orig/debian/muscle.html +++ muscle-3.8.31/debian/muscle.html @@ -0,0 +1,2042 @@ + + + + + +MUSCLE User Guide + + + + + + + +
+ +

 

+ +

 

+ +

 

+ +

 

+ +

MUSCLE User Guide

+ +

                                                                                                                                                                            

+ +

 

+ +

 

+ +

 

+ +

 

+ +

 

+ +

 

+ +

Multiple sequence comparison +by log-expectation

+ +

by Robert C. Edgar

+ +

 

+ +

Version 3.5

+ +

August 2004

+ +

 

+ +

 

+ +

http://www.drive5.com/muscle

+ +

email: muscle (at) drive5.com

+ +

 

+ +

MUSCLE is updated regularly. +Send me an e-mail if you would like to be notified of new releases.

+ +

 

+ +

 

+ +

Citation:

+ +

 

+ +

Edgar, +Robert C. (2004), MUSCLE: multiple sequence alignment with high accuracy and +high throughput, Nucleic Acids Research 32(5), 1792-97.

+ +
+
+ +

Table of +Contents

+ +

1 Introduction. 3

+ +

2 Quick Start 3

+ +

2.1 +Installation. 3

+ +

2.2 Making an +alignment 3

+ +

2.3 Large +alignments. 3

+ +

2.4 Fastest +speed. 3

+ +

2.5 Huge +alignments. 4

+ +

2.6 Accuracy: +caveat emptor 4

+ +

2.7 +Pipelining. 4

+ +

2.8 Refining +an existing alignment 4

+ +

2.9 +Profile-profile alignment 4

+ +

2.10 Sequence +clustering. 5

+ +

3 File +Formats. 5

+ +

3.1 Input +files. 5

+ +

3.1.1 Amino +acid sequences. 5

+ +

3.1.2 +Nucleotide sequences. 5

+ +

3.1.3 +Determining sequence type. 5

+ +

3.2 Output +files. 6

+ +

3.2.1 +Sequence grouping. 6

+ +

3.3 CLUSTALW +format 6

+ +

3.4 MSF format 6

+ +

3.5 HTML +format 6

+ +

4 Using +MUSCLE. 6

+ +

4.1 How the +algorithm works. 6

+ +

4.2 +Command-line options. 7

+ +

4.3 The +maxiters option. 7

+ +

4.4 The +maxtrees option. 8

+ +

4.5 The +maxhours option. 8

+ +

4.6 The +profile scoring function. 8

+ +

4.7 Diagonal +optimization. 8

+ +

4.8 Anchor +optimization. 8

+ +

4.9 Log file. 8

+ +

4.10 Progress +messages. 9

+ +

4.11 Running +out of memory. 9

+ +

4.12 +Troubleshooting. 9

+ +

4.13 +Technical support 10

+ +

5 Command +Line Reference. 10

+ +

 

+ +
+
+ +

1 Introduction

+ +

MUSCLE is a program for creating multiple alignments of +amino acid or nucleotide sequences. A range of options is provided that give +you the choice of optimizing accuracy, speed, or some compromise between the +two. Default parameters are those that give the best average accuracy in our +tests. Using versions current at the time of writing, my tests show that MUSCLE +can achieve both better average accuracy and better speed than CLUSTALW or T‑Coffee, +depending on the chosen options.

+ +

2 Quick +Start

+ +

The MUSCLE algorithm is delivered as a command-line program +called muscle. If you are running under Linux or Unix you will be +working at a shell prompt. If you are running under Windows, you should be in a +command window (nostalgically known to us older people as a DOS prompt). If you +don't know how to use command-line programs, you should get help from a local +guru.

+ +

2.1 Installation

+ +

Copy the muscle binary file to a directory that is +accessible from your computer. That's it—there are no configuration files, +libraries, environment variables or other settings to worry about. If you are +using Windows, then the binary file is named muscle.exe. From now on muscle +should be understood to mean "muscle if you are using Linux or Unix, +muscle.exe if you are using Windows".

+ +

2.2 Making an +alignment

+ +

Make a FASTA file containing some sequences. (If you are not +familiar with FASTA format, it is described in detail later in this Guide.) For +now, just to make things fast, limit the number of sequence in the file to no +more than 50 and the sequence length to be no more than 500. Call the input +file seqs.fa. (An example file named seqs.fa is distributed with +the standard MUSCLE package). Make sure the directory containing the muscle +binary is in your path. (If it isn't, you can run it by typing the full path +name, and the following example command lines must be changed accordingly). Now +type:

+ +

 

+ +

muscle -in seqs.fa -out seqs.afa

+ +

 

+ +

You should see some progress messages. If muscle +completes successfully, it will create a file seqs.afa containing the +alignment. By default, output is created in "aligned FASTA" format +(hence the .afa extension). This is just like regular FASTA except that +gaps are added in order to align the sequences. This is a nice format for +computers but not very readable for people, so to look at the alignment you +will want an alignment viewer such as Belvu, or a script that converts FASTA to +a more readable format. You can also use the –msf command-line option to +request output in MSF format, which is easier to understand for people. If muscle +gives an error message and you don't know how to fix it, please read the +Troubleshooting section.

+ +

 

+ +

The default settings are designed to give the best accuracy, +so this may be all you need to know.

+ +

2.3 Large +alignments

+ +

If you have a large number of sequences (a few thousand), or +they are very long, then the default settings of may be too slow for practical +use. A good compromise between speed and accuracy is to run just the first two +iterations of the algorithm. On average, this gives accuracy equal to T-Coffee +and speeds much faster than CLUSTALW. This is done by the option –maxiters 2, +as in the following example.

+ +

 

+ +

muscle -in seqs.fa -out seqs.afa -maxiters 2

+ +

2.4 Fastest +speed

+ +

If you want the fastest possible speed, then the following +example shows the applicable options for proteins.

+ +

 

+ +

muscle -in seqs.fa -out seqs.afa -maxiters 1 -diags1 -sv -distance1 +kbit20_3

+ +

 

+ +

For nucleotides, use:

+ +

 

+ +

muscle -in seqs.fa -out seqs.afa -maxiters 1 -diags1

+ +

 

+ +

At the time of writing, muscle with these options is faster +than any other multiple sequence alignment program that I have tested. The +alignments are not bad, especially when the sequences are closely related. +However, as you might expect, this blazing speed comes at the cost of the +lowest average accuracy of the options that muscle provides.

+ +

2.5 Huge +alignments

+ +

If you have a very large number of sequences (several +thousand), or they are very long, then the kbit20_3 option may cause +problems because it needs a relatively large amount of memory. Better is to use +the default distance measure, which is roughly 2× or 3× slower but needs less +memory, like this:

+ +

 

+ +

muscle -in seqs.fa -out seqs.afa -maxiters 1 -diags1 -sv

+ +

2.6 Accuracy: +caveat emptor

+ +

Why do I keep using the clumsy phrase "average +accuracy" instead of just saying "accuracy"? That's because the +quality of alignments produced by MUSCLE varies, as do those produced other programs +such as CLUSTALW and T-Coffee. The state of the art leaves plenty of room for +improvement. Sometimes the fastest speed options to muscle give +alignments that are better than T-Coffee, though the reverse will more often be +the case. With challenging sets of sequences, it is a good idea to make several +different alignments using different muscle options and to try other programs +too. Regions where different alignments agree are more believable than regions +where they disagree.

+ +

2.7 Pipelining

+ +

Input can be taken from standard input, and output can be +written to standard output. This is the default, so our first example would +also work like this:

+ +

 

+ +

muscle < seqs.fa > seqs.afa

+ +

2.8 Refining +an existing alignment

+ +

You can ask muscle to try to improve an existing alignment +by using the –refine option. The input file must then be a FASTA file +containing an alignment. All sequences must be of equal length, gaps can be +specified using dots "." or dashes "–". For example:

+ +

 

+ +

muscle -in seqs.afa -out refined.afa -refine

+ +

2.9 Profile-profile +alignment

+ +

A fundamental step in the MUSCLE algorithm is aligning two +multiple sequence alignments, each of which contain some of the input +sequences. This operation is sometimes called "profile-profile +alignment". If you have two existing alignments of related sequences you +can use the –profile option of MUSCLE to align those two sequences. +Typical usage is:

+ +

 

+ +

muscle -profile -in1 one.afa -in2 two.afa -out both.afa

+ +

 

+ +

The alignments in one.afa and two.afa, which +must be in aligned FASTA format, are aligned to each other, keeping input +columns intact and inserting columns of gaps where needed. Output is stored in both.afa.

+ +

 

+ +

MUSCLE does not compute a similarity measure or measure of +statistical significance (such as an E-value), so this option is not useful for +discriminating homologs from unrelated sequences. For this task, I recommend Sadreyev +& Grishin's COMPASS program.

+ +

2.10 Sequence +clustering

+ +

The first stage in MUSCLE is a fast clustering algorithm. +This may be of use in other applications. Typical usage is:

+ +

 

+ +

muscle -cluster -in seqs.fa -tree1 tree.phy

+ +

 

+ +

The sequences will be clustered, and a tree written to tree.phy. +Options –weight1, –distance1, –cluster1 and –root1 can +be applied if desired. Note that by default, UPGMA clustering is used. You can +use

+ +

 –neighborjoining if you prefer, but note that this +is substantially slower than UPGMA for large numbers of sequences.

+ +

3 File +Formats

+ +

MUSCLE uses FASTA format for both input and output. For +output only, it also offers CLUSTALW, MSF and HTML formats using the –clw, +–msf and –html command-line options.

+ +

3.1 Input +files

+ +

Input files must be in FASTA format. These are plain text +files (word processing files such as Word documents are not understood!). Unix, +Windows and DOS text files are supported (end-of-line may be NL or CR NL). There +is no explicit limit on the length of a sequence, however if you are running a +32-bit version of muscle then the maximum will be very roughly 10,000 letters +due to maximum addressable size of tables required in memory. Each sequence starts +with an annotation line, which is recognized by having a greater-than symbol +">" as its first character. There is no limit on the length of an +annotation line (this is new as of version 3.5), and there is no requirement +that the annotation be unique. The sequence itself follows on one or more +subsequent lines, and is terminated either by the next annotation line or by +the end of the file.

+ +

3.1.1 Amino +acid sequences

+ +

The standard single-letter amino acid alphabet is used. Upper +and lower case is allowed, the case is not significant. The special characters +X, B, Z and U are understood. X means "unknown amino acid", B is D or +N, Z is E or Q. U is understood to be the 21st amino acid Selenocysteine. White +space (spaces, tabs and the end-of-line characters CR and NL) is allowed inside +sequence data. Dots "." and dashes "–" in sequences are +allowed and are discarded unless the input is expected to be aligned (e.g. for +the –refine option).

+ +

3.1.2 Nucleotide +sequences

+ +

The usual letters A, G, C, T and U stand for nucleotides. +The letters T and U are equivalent as far as MUSCLE is concerned. N is the +wildcard meaning "unknown nucleotide". R means A or G, Y means C or +T/U. Other wildcards, such as those used by RFAM, are not understood in this +version and will be replaced by Ns. If you would like support for other DNA / +RNA alphabets, please let me know.

+ +

3.1.3 Determining +sequence type

+ +

By default, MUSCLE looks at the first 100 letters in the +input sequence data (excluding gaps). If 95% or more of those letters are valid +nucleotides (AGCTUN), then the file is treated as nucleotides, otherwise as +amino acids. This method almost always guesses correctly, but you can make sure +by specifying the sequence type on the command line. This is done using the –seqtype +option, which can take the following values:

+ +

 

+ +

        –­seqtype protein                          Amino +acid

+ +

        –seqtype nucleo                          Nucleotide

+ +

        –seqtype auto                               Automatic +detection (default).

+ +

3.2 Output +files

+ +

By default, output is also written in FASTA format. All +letters are upper-case and gaps are represented by dashes "–".

+ +

3.2.1 Sequence +grouping

+ +

By default, MUSCLE re-arranges sequences so that similar +sequences are adjacent in the output file. (This is done by ordering sequences +according to a prefix traversal of the guide tree). This makes the alignment +easier to evaluate by eye. If you want to the sequences to be output in the +same order as the input file, you can use the –stable option.

+ +

3.3 CLUSTALW +format

+ +

You can request CLUSTALW output by using the –clw +option. This should be compatible with CLUSTALW, with the exception of the +program name in the file header. You can ask MUSCLE to impersonate CLUSTALW by +writing "CLUSTAL W (1.81)" as the program name by using –clwstrict. +If you have problems parsing MUSCLE output with scripts designed for CLUSTALW, +please let me know.

+ +

3.4 MSF format

+ +

MSF format (similar to CLUSTALW) is requested by using the –msf +option. As with CLUSTALW format, this is easier for people to read than FASTA.

+ +

3.5 HTML +format

+ +

I've added an experimental feature starting in version 3.4. To +get a Web page as output, use the –html option. The alignment is colored +using a color scheme from Eric Sonnhammer's Belvu editor, which is my personal +favorite. A drawback of this option is that the Web page typically contains a +very large number of HTML tags, which can be slow to display in the Internet +Explorer browser. The Netscape browser works much better. If you have any ideas +about good ways to make Web pages, please let me know.

+ +

4 Using +MUSCLE

+ +

In this section we give more details of the MUSCLE algorithm +and the more important options offered by the muscle implementation.

+ +

4.1 How the +algorithm works

+ +

We won't give a complete description of the MUSCLE algorithm +here—for that, you will have to read the paper. But hopefully a summary will +help explain what some of the command-line options do and how they might be +useful in your work.

+ +

 

+ +

The first step is to calculate a tree. In CLUSTALW, this is +done as follows. Each pair of input sequences is aligned, and used to compute +the pair-wise identity of the pair. Identities are converted to a measure of +distance. Finally, the distance matrix is converted to a tree using a +clustering method (CLUSTALW uses neighbor-joining). If you have 1,000 +sequences, there are (1,000 ´ 999)/2 = +499,500 pairs, so aligning every pair can take a while. MUSCLE uses a much +faster, but somewhat more approximate, method to compute distances: it counts +the number of short sub-sequences (known as k-mers, k-tuples or +words) that two sequences have in common, without constructing an alignment. +This is typically around 3,000 times faster that CLUSTALW's method, but the trees +will generally be less accurate. We call this step "k-mer +clustering".

+ +

 

+ +

The second step is to use the tree to construct what is +known as a progressive alignment. At each node of the binary tree, a pair-wise +alignment is constructed, progressing from the leaves towards the root. The +first alignment will be made from two sequences. Later alignments will be one +of the three following types: sequence-sequence, profile-sequence or +profile-profile, where "profile" means the multiple alignment of the sequences +under a given internal node of the tree. This is very similar to what CLUSTALW +does once it has built a tree.

+ +

 

+ +

Now we have a multiple +alignment, which has been built very quickly compared with conventional methods, +mainly because of the distance calculation using k-mers rather than +alignments. The quality of this alignment is typically pretty good—it will +often tie or beat a T-Coffee alignment on our tests. However, on average, we +find that it can be improved by proceeding through the following steps.

+ +

 

+ +

From the multiple alignment, +we can now compute the pair-wise identities of each pair of sequences. This +gives us a new distance matrix, from which we estimate a new tree. We compare +the old and new trees, and re-align subgroups where needed to produce a +progressive multiple alignment from the new tree. If the two trees are +identical, there is nothing to do; if there are no subtrees that agree (very unusual), +then the whole progressive alignment procedure must be repeated from scratch. +Typically we find that the tree is pretty stable near the leaves, but some +re-alignments are needed closer the root. This procedure (compute pair-wise +identities, estimate new tree, compare trees, re-align) is iterated until the +tree stabilizes or until a specified maximum number of iterations has been +done. We call this process "tree refinement", although it also tends +to improve the alignment.

+ +

 

+ +

We now keep the tree fixed +and move to a new procedure which is designed to improve the multiple +alignment. The set of sequences is divided into two subsets (i.e., we make a +bipartition on the set of sequences). A profile is constructed for each of the +two subsets based on the current multiple alignment. These two profiles are +then re-aligned to each other using the same pair-wise alignment algorithm as +used in the progressive stage. If this improves an "objective score" +that measures the quality of the alignment, then the new multiple alignment is +kept, otherwise it is discarded. By default, the objective score is the classic +sum-of-pairs score that takes the (sequence weighted) average of the pair-wise +alignment score of every pair of sequences in the alignment. Bipartitions are +chosen by deleting an edge in the guide tree, each of the two resulting +subtrees defines a subset of sequences. This procedure is called "tree +dependent refinement". One iteration of tree dependent refinement tries +bipartitions produced by deleting every edge of the tree in depth order moving +from the leaves towards the center of the tree. Iterations continue until +convergence or up to a specified maximum.

+ +

 

+ +

For convenience, the major +steps in MUSCLE are described as "iterations", though the first three +iterations all do quite different things and may take very different lengths of +time to complete. The tree-dependent refinement iterations 3, 4 ... are true +iterations and will take similar lengths of time.

+ +

 

+ + + + + + + + + + + + + + + + + + +
+

Iteration

+
+

Actions

+
+

1

+
+

Distance matrix by k-mer + clustering, estimate tree, progressive alignment according to this tree.

+

 

+
+

2

+
+

Distance matrix by + pair-wise identities from current multiple alignment, estimate tree, + progressive alignment according to new tree, repeat until convergence or specified + maximum number of times.

+

 

+
+

3, 4 ...

+
+

Tree-dependent refinement. One + iteration visits every edge in the tree one time.

+
+ +

4.2 Command-line +options

+ +

There are two types of command-line options: value options +and flag options. Value options are followed by the value of the given +parameter, for example –in <filename>; flag options just stand for +themselves, such as –msf. All options are a dash (not two dashes!) +followed by a long name; there are no single-letter equivalents. Value options +must be separated from their values by white space in the command line. Thus, muscle +does not follow Unix, Linux or Posix standards, for which we apologize. The +order in which options are given is irrelevant unless two options contradict, +in which case the right-most option silently wins.

+ +

4.3 The +maxiters option

+ +

You can control the number of iterations that MUSCLE does by +specifying the –maxiters option. If you specify 1, 2 or 3, then this is +exactly the number of iterations that will be performed. If the value is +greater than 3, then muscle will continue up to the maximum you specify +or until convergence is reached, which ever happens sooner. The default is 16. +If you have a large number of sequences, refinement may be rather slow.

+ +

4.4 The maxtrees +option

+ +

This option controls the maximum number of new trees to +create in iteration 2. Our experience suggests that a point of diminishing +returns is typically reached after the first tree, so the default value is 1. +If a larger value is given, the process will repeat until convergence or until +this number of trees has been created, which ever comes first.

+ +

4.5 The maxhours +option

+ +

If you have a large alignment, muscle may take a long +time to complete. It is sometimes convenient to say "I want the best +alignment I can get in 24 hours" rather than specifying a set of options +that will take an unknown length of time. This is done by using –maxhours, +which specifies a floating-point number of hours. If this time is exceeded, muscle +will write out current alignment and stop. For example,

+ +

 

+ +

muscle -in huge.fa -out huge.afa -maxiters 9999 -maxhours 24.0

+ +

 

+ +

Note that the actual time may exceed the specified limit by +a few minutes while muscle finishes up on a step. It is also possible +for no alignment to be produced if the time limit is too small.

+ +

4.6 The +profile scoring function

+ +

Three different protein profile scoring functions are +supported, the log-expectation score (–le option) and a sum of pairs +score using either the PAM200 matrix (–sp) or the VTML240 matrix (–sv). +The log-expectation score is the default as it gives better results on our +tests, but is typically somewhere between two or three times slower than the +sum-of-pairs score. For nucleotides, –spn is currently the only option +(which is of course the default for nucleotide data, so you don't need to +specify this option).

+ +

4.7 Diagonal +optimization

+ +

Creating a pair-wise alignment by dynamic programming +requires computing an L1 ´ +L2 matrix, where L1 and L2 +are the sequence lengths. A trick used in algorithms such as BLAST is to reduce +the size of this matrix by using fast methods to find "diagonals", +i.e. short regions of high similarity between the two sequences. This speeds up +the algorithm at the expense of some reduction in accuracy. MUSCLE uses a +technique called k-mer extension to find diagonals. It is disabled by +default because of the slight reduction in average accuracy and can be turned +on by specifying the –diags1 and –diags2 options.

+ +

4.8 Anchor +optimization

+ +

Tree-dependent refinement (iterations 3, 4 ... ) can be +speeded up by dividing the alignment vertically into blocks. Block boundaries +are found by identifying high-scoring columns (e.g., a perfectly conserved +column of Cs or Ws would be a candidate). Each vertical block is then refined +independently before reassembling the complete alignment, which is faster +because of the L2 factor in dynamic programming (e.g., +suppose the alignment is split into two vertical blocks, then 2 ´ 0.52 = 0.5, so the dynamic +programming time is roughly halved). The –noanchors option is used to disable +this feature. This option has no effect if –maxiters 1 or –maxiters 2 +is specified. On benchmark tests, enabling anchors has little or no effect on +accuracy, but if you want to be very conservative and are striving for the best +possible accuracy then –noanchors is a reasonable choice.

+ +

4.9 Log file

+ +

You can specify a log file by using –log <filename> +or –loga <filename>. Using –log causes any existing file to +be deleted, –loga appends to any existing file. A message will be +written to the log file when muscle starts and stops. Error and warning +messages will also be written to the log. If –verbose is specified, then +more information will be written, including the command line used to invoke muscle, +the resulting internal parameter settings, and also progress messages. The +content and format of verbose log file output is subject to change in future +versions.

+ +

 

+ +

The use of a log file may seem contrary to Unix conventions for +using standard output and standard error. I like these conventions, but never +found a fully satisfactory way to use them. I like progress messages (see +below), but they mess up a file if you re-direct standard error and there are +errors or warning messages too. I could try to detect whether a standard file +handle is a tty device or a disk file and change behavior accordingly, +but I regard this as too complicated and too hard for the user to understand. On +Windows it can be hard to re-direct standard file handles, especially when +working in a GUI debugger. Maybe one day I will figure out a better solution +(suggestions welcomed).

+ +

 

+ +

I highly recommend using –verbose and ­–log[a], +especially when running muscle in a batch mode. This enables you to +verify whether a particular alignment was completed and to review any errors or +warnings that occurred.

+ +

4.10 Progress +messages

+ +

By default, muscle writes progress messages to +standard error periodically so that you know it's doing something and get some +feedback about the time and memory requirements for the alignment. Here is a +typical progress message.

+ +

 

+ +

00:00:23     25 Mb  Iter   2  87.20%  Build guide tree

+ +

 

+ +

The fields are as follows.

+ +

 

+ + + + + + + + + + + + + + + + + + + + + + +
+

00:00:23

+
+

Elapsed time since muscle + started.

+
+

25 Mb

+
+

Peak memory use in megabytes + (i.e., not the current usage, but the maximum amount of memory used since muscle + started).

+
+

Iter 2

+
+

Iteration currently in + progress.

+
+

87.20%

+
+

How much of the current step + has been completed (percentage).

+
+

Build...

+
+

A brief description of the current step.

+
+ +

 

+ +

The –quiet command-line option disables writing +progress messages to standard error. If the –verbose command-line option +is specified, a progress message will be written to the log file when each +iteration completes. So –quiet and –verbose are not +contradictory.

+ +

4.11 Running +out of memory

+ +

The muscle code tries to deal gracefully with +low-memory conditions by using the following technique. A block of "emergency +reserve" memory is allocated when muscle starts. If a later request +to allocate memory fails, this reserve block is made available, and muscle +attempts to save the current alignment. With luck, the reserved memory will be +enough to allow muscle to save the alignment and exit gracefully with an +informative error message.

+ +

4.12 Troubleshooting

+ +

Here is some general advice on what to do if muscle +fails and you don't understand what happened. The code is designed to fail +gracefully with an informative error message when something goes wrong, but +there will no doubt be situations I haven't anticipated (not to mention bugs).

+ +

 

+ +

Check the MUSCLE web site for updates, bug reports and other +relevant information.

+ +

 

+ +

        http://www.drive5.com/muscle

+ +

 

+ +

Check the input file to make sure it is in valid FASTA format. +Try giving it to another sequence analysis program that can accept large FASTA +files (e.g., the NCBI formatdb utility) to see if you get an informative +error message. Try dividing the file into two halves and using each half +individually as input. If one half fails and the other does not, repeat until +the problem is localized as far as possible.

+ +

 

+ +

Use –log or –loga and –­verbose and +check the log file to see if there are any messages that give you a hint about the +problem. Look at the peak memory requirements (reported in progress messages) +to see if you may be exceeding the physical or virtual memory capacity of your +computer.

+ +

 

+ +

If muscle crashes without giving an error message, or +hangs, then you may need to refer to the source code or use a debugger. A +"debug" version, muscled, may be provided. This is built from +the same source code but with the DEBUG macro defined and without compiler +optimizations. This version runs much more slowly (perhaps by a factor of three +or more), but does a lot more internal checking and may be able to catch +something that is going wrong in the code. The –­core option specifies +that muscle should not catch exceptions. When –core is specified, +an exception may result in a debugger trap or a core dump, depending on the +execution environment. The –nocore option has the opposite effect. In muscle, +–nocore is the default, –­core is the default in muscled.

+ +

4.13 Technical +support

+ +

I am happy to provide support. But I am busy, and am +offering this program at no charge, so I ask you to make a reasonable effort to +figure things out for yourself before contacting me.

+ +

5 Command Line +Reference

+ +

 

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

Value option

+
+

Legal values

+
+

Default

+
+

Description

+
+

anchorspacing

+
+

Integer

+
+

32

+
+

Minimum spacing between + anchor columns.

+

 

+
+

center

+
+

Floating point

+
+

[1]

+
+

Center parameter. Should be + negative.

+

 

+
+

cluster1

+

cluster2

+
+

upgma

+

upgmb

+

neighborjoining

+
+

upgmb

+
+

Clustering method. cluster1 + is used in iteration 1 and 2, cluster2 in later iterations.

+

 

+
+

diaglength

+
+

Integer

+
+

24

+
+

Minimum length of diagonal.

+

 

+
+

diagmargin

+
+

Integer

+
+

5

+
+

Discard this many positions + at ends of diagonal.

+

 

+
+

distance1

+

 

+
+

kmer6_6

+

kmer20_3

+

kmer20_4

+

kbit20_3

+

kmer4_6

+

 

+
+

Kmer6_6 + (amino) or Kmer4_6 (nucleo)

+
+

Distance measure for iteration 1.

+
+

distance2

+

 

+
+

kmer6_6

+

kmer20_3

+

kmer20_4

+

kbit20_3

+

pctid_kimura

+

pctid_log

+

 

+
+

pctid_kimura

+
+

Distance measure for iterations 2, 3 ...

+

 

+

 

+

 

+

 

+
+

gapopen

+
+

Floating point

+
+

[1]

+
+

The gap open score. Must be negative.

+

 

+
+

hydro

+
+

Integer

+
+

5

+
+

Window size for determining whether a region is + hydrophobic.

+

 

+
+

hydrofactor

+
+

Floating point

+
+

1.2

+
+

Multiplier for gap open/close penalties in hydrophobic + regions.

+

 

+
+

in

+
+

Any file name

+
+

standard input

+
+

Where to find the input sequences.

+

 

+
+

log

+
+

File name

+
+

None.

+
+

Log file name (delete existing file).

+

 

+
+

loga

+
+

File name

+
+

None.

+
+

Log file name (append to existing file).

+

 

+
+

maxdiagbreak

+
+

Integer

+
+

1

+
+

Maximum distance between two diagonals that allows them to + merge into one diagonal.

+

 

+
+

maxhours

+
+

Floating point

+
+

None.

+
+

Maximum time to run in hours. The actual time may exceed + the requested limit by a few minutes. Decimals are allowed, so 1.5 means one + hour and 30 minutes.

+

 

+
+

maxiters

+
+

Integer 1, 2 ...

+
+

16

+
+

Maximum number of iterations.

+

 

+
+

maxtrees

+
+

Integer

+
+

1

+
+

Maximum number of new trees to build in iteration 2.

+

 

+
+

minbestcolscore

+
+

Floating point

+
+

[1]

+
+

Minimum score a column must + have to be an anchor.

+

 

+
+

minsmoothscore

+
+

Floating point

+
+

[1]

+
+

Minimum smoothed score a + column must have to be an anchor.

+

 

+
+

objscore

+
+

sp

+

ps

+

dp

+

xp

+

spf

+

spm

+
+

spm

+
+

Objective score used by tree dependent refinement.

+

sp=sum-of-pairs score.

+

spf=sum-of-pairs score (dimer approximation)

+

spm=sp for < 100 seqs, otherwise spf

+

dp=dynamic programming score.

+

ps=average profile-sequence score.

+

xp=cross profile score.

+

 

+
+

out

+
+

File name

+
+

standard output

+
+

Where to write the alignment.

+

 

+
+

root1

+

root2

+
+

pseudo

+

midlongestspan

+

minavgleafdist

+
+

psuedo

+
+

Method used to root tree; root1 is used in iteration 1 and + 2, root2 in later iterations.

+

 

+

 

+
+

seqtype

+
+

protein

+

nucleo

+

auto

+

 

+
+

auto

+
+

Sequence type.

+
+

smoothscoreceil

+
+

Floating point

+
+

[1]

+
+

Maximum value of column score for smoothing purposes.

+

 

+
+

smoothwindow

+
+

Integer

+
+

7

+
+

Window used for anchor column smoothing.

+

 

+
+

SUEFF

+
+

Floating point value between 0 and 1.

+

 

+
+

0.1

+
+

Constant used in UPGMB clustering. Determines the relative + fraction of average linkage (SUEFF) vs. nearest-neighbor linkage (1 – SUEFF).
+
+

+
+

tree1

+

tree2

+
+

File name

+
+

None

+
+

Save tree produced in first or second iteration to given + file in Newick (Phylip-compatible) format.

+

 

+
+

weight1

+

weight2

+
+

none

+

henikoff

+

henikoffpb

+

gsc

+

clustalw

+

threeway

+
+

clustalw

+

 

+
+

Sequence weighting scheme.

+

weight1 is used in + iterations 1 and 2.

+

weight2 is used for + tree-dependent refinement.

+

none=all sequences have + equal weight.

+

henikoff=Henikoff & + Henikoff weighting scheme.

+

henikoffpb=Modified + Henikoff scheme as used in PSI-BLAST.

+

clustalw=CLUSTALW method.

+

threeway=Gotoh three-way + method.

+

 

+
+ +

 

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

Flag option

+
+

Set by default?

+
+

Description

+
+

anchors

+
+

yes

+
+

Use anchor optimization in + tree dependent refinement iterations.

+

 

+
+

clw

+
+

no

+
+

Write output in CLUSTALW + format (default is FASTA).

+

 

+
+

clwstrict

+
+

no

+
+

Write output in CLUSTALW + format with the "CLUSTAL W (1.81)" header rather than the MUSCLE + version. This is useful when a post-processing step is picky about the file + header.

+

 

+
+

core

+
+

yes in muscle,

+

no in muscled.

+
+

Do not catch exceptions.

+

 

+

 

+
+

fasta

+
+

yes

+
+

Write output in FASTA + format. Alternatives include –clw,

+

clwstrict, –msf and + –html.

+

 

+
+

group

+
+

yes

+
+

Group similar sequences + together in the output. This is the default. See also –stable.

+

 

+
+

html

+
+

no

+
+

Write output in HTML format + (default is FASTA).

+

 

+
+

le

+
+

maybe

+
+

Use log-expectation profile score (VTML240). Alternatives + are to use –sp or –sv. This is the default for amino acid + sequences.

+

 

+
+

msf

+
+

no

+
+

Write output in MSF format (default is FASTA).

+

 

+
+

noanchors

+
+

no

+
+

Disable anchor optimization. Default is –anchors.

+

 

+
+

nocore

+
+

no in muscle,

+

yes in muscled.

+
+

Catch exceptions and give an error message if possible.

+

 

+

 

+
+

quiet

+
+

no

+
+

Do not display progress messages.

+

 

+
+

refine

+
+

no

+
+

Input file is already aligned, skip first two iterations + and begin tree dependent refinement.

+

 

+
+

sp

+
+

no

+
+

Use sum-of-pairs protein profile score (PAM200). Default + is –le.

+

 

+
+

spn

+
+

maybe

+

 

+
+

Use sum-of-pairs nucleotide profile score (BLASTZ + parameters). This is the only option for nucleotides, and is therefore the + default.

+

 

+
+

stable

+
+

no

+
+

Preserve input order of sequences in output file. Default + is to group sequences by similarity (–group).

+

 

+
+

sv

+
+

no

+
+

Use sum-of-pairs profile score (VTML240). Default is –le.

+

 

+
+

termgapsfull

+
+

no

+
+

Terminal gaps penalized with full penalty.

+

[1] Not fully supported in this version.

+

 

+
+

termgapshalf

+
+

yes

+
+

Terminal gaps penalized with half penalty.

+

[1] Not fully supported in this version.

+

 

+
+

termgapshalflonger

+
+

no

+
+

Terminal gaps penalized with half penalty if gap relative + to

+

longer sequence, otherwise with full penalty.

+

[1] Not fully supported in this version.

+

 

+
+

verbose

+
+

no

+
+

Write parameter settings and progress messages to log + file.

+

 

+
+

version

+
+

no

+
+

Write version string to stdout and exit.

+
+ +

 

+ +

Notes

+ +

[1] Default depends on the profile scoring function. To +determine the default, use –verbose –log and check the log file.

+ +

 

+ +
+ + + + --- muscle-3.8.31.orig/debian/muscle.1.xml +++ muscle-3.8.31/debian/muscle.1.xml @@ -0,0 +1,270 @@ + + +.
will be generated. You may view the +manual page with: nroff -man .
| less'. A +typical entry in a Makefile or Makefile.am is: + +DB2MAN=/usr/share/sgml/docbook/stylesheet/xsl/nwalsh/\ +manpages/docbook.xsl +XP=xsltproc -''-nonet + +manpage.1: manpage.dbk + $(XP) $(DB2MAN) $< + +The xsltproc binary is found in the xsltproc package. The +XSL files are in docbook-xsl. Please remember that if you +create the nroff version in one of the debian/rules file +targets (such as build), you will need to include xsltproc +and docbook-xsl in your Build-Depends control field. + +--> + + + + 1"> + + + MUSCLE"> + + Debian"> + GNU"> + GPL"> + + +]> + + + + &dhtitle; + &dhpackage; + &dhrelease; + + + Robert + C. + Elgar + Wrote Muscle. + + + &dhfirstname; + &dhsurname; + Wrote this manpage. +
&dhemail;
+
+ + Charles + Plessy + Updated this manpage. +
charles-debian-nospam@plessy.org
+
+
+ + 2003 + 2004 + &dhusername; (manpage) + + + 2007 + 2008 + Charles Plessy (manpage) + + + + Muscle is in the public domain, and therefore not subjected to copyright. + + + + This manual page was written by &dhusername; &dhemail; for the &debian; system (but may be used by others). Permission is granted to copy, distribute and/or modify this document as if it were in public domain. + + +
+ + + &dhucpackage; + &dhsection; + + + + &dhpackage; + Multiple Protein Sequence Alignment + + + + + &dhpackage; + + + + + + + + + + + + + + + + + + + + + DESCRIPTION + + This manual page documents briefly the + &dhpackage; command. + + + &dhpackage; aligns protein sequences and is considered superior and faster than Clustal W. + + + + OPTIONS + + + + + + Path to FASTA formatted input file + + + + + + + + Path to output file, FASTA formatted by default + + + + + + + + Find diagonals (faster for similar sequences) + + + + + + + + Maximum number of iterations (integer, default 16) + + + + + + + + Maximum time to iterate in hours (default no limit) + + + + + + + + Maximum memory to allocate in Mb (default 80% of RAM) + + + + + + + + Write output in HTML format (default FASTA) + + + + + + + Write output in MSF format (default FASTA) + + + + + + + + Write output in Clustal W format (default FASTA) + + + + + + + As -clw, with 'CLUSTAL W (1.81)' header + + + + + + + + Log to file (append if -loga, overwrite if -log) + + + + + + + Do not write progress messages to stderr + + + + + + + Output sequences in input order (default is -group) + + + + + + + Group sequences by similarity (this is the default) + + + + + + + Display version information and exit + + + + + + SEE ALSO + + + clustalw + 1 + , + + + seaview + 1 + , + + + t_coffee + 1 + . + + +
--- muscle-3.8.31.orig/debian/README.Debian +++ muscle-3.8.31/debian/README.Debian @@ -0,0 +1,23 @@ +muscle for Debian +----------------- + +Please cite + + Edgar, Robert C. (2004), MUSCLE: multiple sequence alignment with + high accuracy and high throughput, Nucleic Acids Research 32(5), 1792-97. + +when publishing results achieved with this software. + +Steffen + + -- Steffen Moeller , Sun, 19 Sep 2004 00:51:19 +0200 + + + +There has been two different upstream releases with the same name, +muscle3.7_src.tar.gz, the second one being corrected for building with GCC 4.3. +The Debian package 3.70-1 contained the first release (md5sum: +55c9fe99b9c0bccd41e3ed18f8b9d0d99). The Debian package 3.70+fix1-1 contains the +corrected release (md5sum: 45930141f334b89d927b3cfee6fc7857). + + -- Charles Plessy Wed, 06 Feb 2008 12:04:31 +0900 --- muscle-3.8.31.orig/debian/muscle.doc-base +++ muscle-3.8.31/debian/muscle.doc-base @@ -0,0 +1,15 @@ +Document: muscle-manual +Title: MUSCLE User Guide 3.5 +Author: Robert C. Edgar +Abstract: MUSCLE is a program for creating multiple alignments of amino acid or + nucleotide sequences. A range of options is provided that give you the choice + of optimizing accuracy, speed, or some compromise between the two. Default + parameters are those that give the best average accuracy in our tests. Using + versions current at the time of writing, my tests show that MUSCLE can achieve + both better average accuracy and better speed than Clustal W or T‑Coffee, + depending on the chosen options. +Section: Science/Biology + +Format: HTML +Index: /usr/share/doc/muscle/muscle.html +Files: /usr/share/doc/muscle/muscle.html --- muscle-3.8.31.orig/debian/rules +++ muscle-3.8.31/debian/rules @@ -0,0 +1,35 @@ +#!/usr/bin/make -f + +include /usr/share/cdbs/1/rules/debhelper.mk +include /usr/share/cdbs/1/class/makefile-vars.mk + +DEB_MAKE_BUILD_TARGET := + +CPPNames = aligngivenpath aligngivenpathsw aligntwomsas aligntwoprofs aln alpha anchors bittraceback blosum62 blosumla clust cluster clwwt color cons diaglist diffobjscore diffpaths difftrees difftreese distcalc distfunc distpwkimura domuscle dosp dpreglist drawtree edgelist enumopts enumtostr estring fasta fasta2 fastclust fastdist fastdistjones fastdistkbit fastdistkmer fastdistmafft fastdistnuc fastscorepath2 finddiags finddiagsn glbalign glbalign352 glbaligndiag glbalignle glbalignsimple glbalignsp glbalignspn glbalignss glbalndimer globals globalslinux globalsosx globalsother globalswin32 gonnet henikoffweight henikoffweightpb html hydro intmath local main makerootmsa makerootmsab maketree mhack mpam200 msa msa2 msadistkimura msf muscle muscleout nucmx nwdasimple nwdasimple2 nwdasmall nwrec nwsmall objscore objscore2 objscoreda onexception options outweights pam200mafft params phy phy2 phy3 phy4 phyfromclust phyfromfile physeq phytofile posgap ppscore profdb profile profilefrommsa progalign progress progressivealign pwpath readmx realigndiffs realigndiffse refine refinehoriz refinesubfams refinetree refinetreee refinevert refinew savebest scoredist scoregaps scorehistory scorepp seq seqvect setblosumweights setgscweights setnewhandler spfast sptest stabilize subfam subfams sw termgaps textfile threewaywt tomhydro traceback tracebackopt tracebacksw treefrommsa typetostr upgma2 usage validateids vtml2 writescorefile + +ObjNames = aligngivenpath.o aligngivenpathsw.o aligntwomsas.o aligntwoprofs.o aln.o alpha.o anchors.o bittraceback.o blosum62.o blosumla.o clust.o cluster.o clwwt.o color.o cons.o diaglist.o diffobjscore.o diffpaths.o difftrees.o difftreese.o distcalc.o distfunc.o distpwkimura.o domuscle.o dosp.o dpreglist.o drawtree.o edgelist.o enumopts.o enumtostr.o estring.o fasta.o fasta2.o fastclust.o fastdist.o fastdistjones.o fastdistkbit.o fastdistkmer.o fastdistmafft.o fastdistnuc.o fastscorepath2.o finddiags.o finddiagsn.o glbalign.o glbalign352.o glbaligndiag.o glbalignle.o glbalignsimple.o glbalignsp.o glbalignspn.o glbalignss.o glbalndimer.o globals.o globalslinux.o globalsosx.o globalsother.o globalswin32.o gonnet.o henikoffweight.o henikoffweightpb.o html.o hydro.o intmath.o local.o main.o makerootmsa.o makerootmsab.o maketree.o mhack.o mpam200.o msa.o msa2.o msadistkimura.o msf.o muscle.o muscleout.o nucmx.o nwdasimple.o nwdasimple2.o nwdasmall.o nwrec.o nwsmall.o objscore.o objscore2.o objscoreda.o onexception.o options.o outweights.o pam200mafft.o params.o phy.o phy2.o phy3.o phy4.o phyfromclust.o phyfromfile.o physeq.o phytofile.o posgap.o ppscore.o profdb.o profile.o profilefrommsa.o progalign.o progress.o progressivealign.o pwpath.o readmx.o realigndiffs.o realigndiffse.o refine.o refinehoriz.o refinesubfams.o refinetree.o refinetreee.o refinevert.o refinew.o savebest.o scoredist.o scoregaps.o scorehistory.o scorepp.o seq.o seqvect.o setblosumweights.o setgscweights.o setnewhandler.o spfast.o sptest.o stabilize.o subfam.o subfams.o sw.o termgaps.o textfile.o threewaywt.o tomhydro.o traceback.o tracebackopt.o tracebacksw.o treefrommsa.o typetostr.o upgma2.o usage.o validateids.o vtml2.o writescorefile.o + +%.o: $.cpp + $(CXX) -c -D_FILE_OFFSET_BITS=64 -DNDEBUG=1 $(LDLIBS) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) -o $@ $< + +build/muscle:: $(ObjNames) + g++ $(LDLIBS) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) -o muscle $(ObjNames) + +clean:: + $(RM) *.o make.err make.out muscle + +SRC_TMP := $(shell mktemp --tmpdir --directory muscle-build.XXXXXXXXXX) +UP_TARBALL := $(SRC_TMP)/muscle$(DEB_UPSTREAM_VERSION)_src.tar.gz +REPACKED := muscle-$(DEB_UPSTREAM_VERSION).orig +TARBALLS := $(CURDIR)/../tarballs + +get-orig-source: + rm -rf $(SRC_TMP)/$(REPACKED) + [ -f $(UP_TARBALL) ] || wget -q -O $(UP_TARBALL) http://www.drive5.com/muscle/downloads3.8.31/muscle$(DEB_UPSTREAM_VERSION)_src.tar.gz + cd $(SRC_TMP) && \ + tar xvf $(UP_TARBALL) && \ + find . -name "muscle21" -delete && \ + mv $(SRC_TMP)/muscle$(DEB_UPSTREAM_VERSION)/src $(SRC_TMP)/$(REPACKED) + [ -d $(TARBALLS) ] || mkdir $(TARBALLS) + cd $(SRC_TMP) && GZIP="--best --no-name" tar -czf $(TARBALLS)/muscle_$(DEB_UPSTREAM_VERSION).orig.tar.gz $(REPACKED) + rm -rf $(SRC_TMP) --- muscle-3.8.31.orig/debian/compat +++ muscle-3.8.31/debian/compat @@ -0,0 +1 @@ +8