diff -Nru python-cogent-2020.12.21a+dfsg/ChangeLog python-cogent-2021.10.12a1+dfsg/ChangeLog --- python-cogent-2020.12.21a+dfsg/ChangeLog 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/ChangeLog 1970-01-01 00:00:00.000000000 +0000 @@ -1,8 +0,0 @@ -Change log -========== - -The porting from PyCogent 1.9 to 3.0 has involved a massive number of changes! - -Please see the `wiki pages `_ for a summary of the API changes. - -But the best way to see them is all to use mercurial. \ No newline at end of file diff -Nru python-cogent-2020.12.21a+dfsg/changelog.md python-cogent-2021.10.12a1+dfsg/changelog.md --- python-cogent-2020.12.21a+dfsg/changelog.md 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/changelog.md 2021-10-12 00:17:34.000000000 +0000 @@ -0,0 +1,59 @@ +# Since release 2021.5.7a1 + +## Contributors + +- GavinHuttley + +## DEV + +- added missing `dev` requires-extras to pyproject.toml for installing all packages required for development + +# Since release 2021.5.7a + +## Contributors + +- GavinHuttley +- jamesmartini +- KatherineCaley + +## API + +- ValueError if any tips missing in TreeNode.lowest_common_ancestor() +- added index_name argument to Table.to_categorical(), allows specifying the category column and getting the categorical table in one statement. + +## BUG + +- DataStore.write() requires identifiers end with indicated suffix +- cogent3.app.tree.quicktree() now works for 2 sequences +- Alignment.degap() now preserves sequence names +- cogent3.app.io.load_aligned() handles paml format +- fast_slow_dist results can now be saved by write_tabular, a DistanceMatrix.source attribute is created on-the-fly by the fast_slow_dist calculator, enabling it be written +- Alignment.variable_positions(), always report a position as variable if > 1 non-gap characters are present +- SequenceCollection.dotplot() method defaults handle single sequence +- app.evo.boostrap() can now be composed, + +## DEV + +- change to using flit for package management. This change requires you `python -m pip install flit`. If you clone this repository and want to do a developer install, you should first remove your existing one + + ````bash + $ python -m pip uninstall cogent3 + ```` + + then + + ```bash + $ flit install -s --python `which python` + ``` + +## DEP + +- removed WritableZippedDataStore, the zip archive format is inefficient for incremental inclusion of files. Use a tinydb instead. +- replaced interleave_len argument with wrap in sequence format writers +- removed Table.to_rich_html() method, use Table.to_html() instead + +## ENH + +- More robust alignment to reference algorithm. Builds a multiple sequence alignment from a series of pairwise alignments to a reference sequence. cogent3.app.align.align_to_ref() now retains gaps in the reference. This will be modestly slower than previously, but avoids losing information if the choice of reference sequence is a bad one. +- cogent3.app.composable.appify decorator class, simplifies converting a user defined function into a cogent3 composable app +- JSD calculation now uses more accurate math.fsum() \ No newline at end of file diff -Nru python-cogent-2020.12.21a+dfsg/debian/changelog python-cogent-2021.10.12a1+dfsg/debian/changelog --- python-cogent-2020.12.21a+dfsg/debian/changelog 2021-02-09 13:42:13.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/debian/changelog 2021-11-04 08:29:04.000000000 +0000 @@ -1,3 +1,38 @@ +python-cogent (2021.10.12a1+dfsg-1) unstable; urgency=medium + + * New upstream version + * Upstream switched to filt build system + + -- Andreas Tille Thu, 04 Nov 2021 09:29:04 +0100 + +python-cogent (2021.5.7a+dfsg-3) unstable; urgency=medium + + [ Andreas Tille ] + * Deactivate sphinx bibtex which is not used + Closes: #997772 + + [ Nilesh Patra ] + * Add patch to fix FTBFS with new setuptools + + -- Andreas Tille Wed, 03 Nov 2021 11:28:50 +0100 + +python-cogent (2021.5.7a+dfsg-2) unstable; urgency=medium + + * Add missing Depends: python3-numba + * Disable reprotest + + -- Andreas Tille Thu, 09 Sep 2021 08:12:26 +0200 + +python-cogent (2021.5.7a+dfsg-1) unstable; urgency=medium + + * New upstream version + * Fix watchfile to detect new versions on github (routine-update) + * Standards-Version: 4.6.0 (routine-update) + * Apply multi-arch hints. + + python-cogent-doc: Add Multi-Arch: foreign. + + -- Andreas Tille Wed, 01 Sep 2021 16:35:41 +0200 + python-cogent (2020.12.21a+dfsg-4) unstable; urgency=high * Team upload. diff -Nru python-cogent-2020.12.21a+dfsg/debian/control python-cogent-2021.10.12a1+dfsg/debian/control --- python-cogent-2020.12.21a+dfsg/debian/control 2021-02-07 21:13:43.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/debian/control 2021-11-04 08:29:04.000000000 +0000 @@ -7,6 +7,7 @@ Priority: optional Build-Depends: debhelper-compat (= 13), dh-python, + flit, python3-all-dev, python3-matplotlib, python3-numba, @@ -15,17 +16,15 @@ python3-scitrack, python3-tqdm, python3-tinydb, - python3-setuptools, python3-sphinx, python3-numpydoc, python3-nbsphinx, python3-sphinx-bootstrap-theme, python3-sphinx-gallery, - python3-sphinxcontrib.bibtex, python3-pytest , cython3, pandoc -Standards-Version: 4.5.1 +Standards-Version: 4.6.0 Vcs-Browser: https://salsa.debian.org/med-team/python-cogent Vcs-Git: https://salsa.debian.org/med-team/python-cogent.git Homepage: https://github.com/cogent3/cogent3 @@ -36,6 +35,7 @@ Depends: ${shlibs:Depends}, ${misc:Depends}, ${python3:Depends}, + python3-numba, python3-matplotlib, python3-pandas, python3-plotly, @@ -67,6 +67,7 @@ libjs-jquery, libjs-underscore Suggests: python3-cogent3 (= ${binary:Version}) +Multi-Arch: foreign Description: docs for python3-cogent3 PyCogent is a software library for genomic biology. . diff -Nru python-cogent-2020.12.21a+dfsg/debian/patches/fix_interpreter.patch python-cogent-2021.10.12a1+dfsg/debian/patches/fix_interpreter.patch --- python-cogent-2020.12.21a+dfsg/debian/patches/fix_interpreter.patch 2020-11-30 09:40:38.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/debian/patches/fix_interpreter.patch 2021-11-04 08:29:04.000000000 +0000 @@ -3,14 +3,6 @@ Description: Fix broken interpreter lines Forwarded: not-needed ---- a/src/cogent3/phylo/maximum_likelihood.py -+++ b/src/cogent3/phylo/maximum_likelihood.py -@@ -1,4 +1,4 @@ --#!/usr/bin/env python' -+#!/usr/bin/python3 - from math import exp - - from .least_squares import WLS --- a/src/cogent3/recalculation/__init__.py +++ b/src/cogent3/recalculation/__init__.py @@ -1,4 +1,4 @@ @@ -27,14 +19,6 @@ """Instances of these classes are assigned to different parameter/scopes by a parameter controller""" ---- a/src/cogent3/align/dp_calculation.py -+++ b/src/cogent3/align/dp_calculation.py -@@ -1,4 +1,4 @@ --#!/usr/bin/env/python -+#!/usr/bin/python3 - - import numpy - --- a/src/cogent3/data/molecular_weight.py +++ b/src/cogent3/data/molecular_weight.py @@ -1,4 +1,4 @@ diff -Nru python-cogent-2020.12.21a+dfsg/debian/patches/py39_union_dict python-cogent-2021.10.12a1+dfsg/debian/patches/py39_union_dict --- python-cogent-2020.12.21a+dfsg/debian/patches/py39_union_dict 2021-02-07 21:25:02.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/debian/patches/py39_union_dict 1970-01-01 00:00:00.000000000 +0000 @@ -1,29 +0,0 @@ -From: Michael R. Crusoe -Subject: Add Python 3.9 compatibility -Forwarded: https://github.com/cogent3/cogent3/pull/850 ---- python-cogent.orig/src/cogent3/util/union_dict.py -+++ python-cogent/src/cogent3/util/union_dict.py -@@ -2,7 +2,7 @@ - """UnionDict extension of dict. - """ - from cogent3.util.misc import extend_docstring_from -- -+import sys - - __author__ = "Thomas La" - __copyright__ = "Copyright 2007-2020, The Cogent Project" -@@ -64,6 +64,14 @@ - self.union(other) - return self - -+ if sys.version_info[:2] == (3, 9): -+ def __ror__(self, other): -+ other.union(self) -+ return other -+ -+ def __ior__(self, other): -+ return self.__or__(other) -+ - def union(self, other): - """returns the union of self with other - diff -Nru python-cogent-2020.12.21a+dfsg/debian/patches/series python-cogent-2021.10.12a1+dfsg/debian/patches/series --- python-cogent-2020.12.21a+dfsg/debian/patches/series 2021-02-07 16:23:21.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/debian/patches/series 2021-11-04 08:29:04.000000000 +0000 @@ -1,3 +1,2 @@ sphinx.patch fix_interpreter.patch -py39_union_dict diff -Nru python-cogent-2020.12.21a+dfsg/debian/patches/sphinx.patch python-cogent-2021.10.12a1+dfsg/debian/patches/sphinx.patch --- python-cogent-2020.12.21a+dfsg/debian/patches/sphinx.patch 2021-02-07 21:13:43.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/debian/patches/sphinx.patch 2021-11-04 08:29:04.000000000 +0000 @@ -3,9 +3,9 @@ Description: Work around failure in sphinx doc processing Forwarded: not-needed ---- python-cogent.orig/doc/conf.py -+++ python-cogent/doc/conf.py -@@ -19,18 +19,19 @@ +--- a/doc/conf.py ++++ b/doc/conf.py +@@ -20,18 +20,18 @@ add_module_names = False # don't includ numpydoc_class_members_toctree = False extensions = [ @@ -22,10 +22,20 @@ "sphinx.ext.todo", - "sphinx_gallery.gen_gallery", - "sphinx_panels", +- "sphinxcontrib.bibtex", +# "sphinx_gallery.gen_gallery", +# "sphinx_panels", - "sphinxcontrib.bibtex", + "sphinx.ext.napoleon", # "sphinxcontrib.spelling", ] +--- a/doc/rtd-environment.yml ++++ b/doc/rtd-environment.yml +@@ -20,7 +20,6 @@ dependencies: + - nbsphinx + - nbformat + - nbconvert!=5.4 +- - sphinxcontrib-bibtex + - sphinx-gallery + - sphinx_bootstrap_theme + - ../ diff -Nru python-cogent-2020.12.21a+dfsg/debian/rules python-cogent-2021.10.12a1+dfsg/debian/rules --- python-cogent-2020.12.21a+dfsg/debian/rules 2021-02-09 13:08:00.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/debian/rules 2021-11-04 08:29:04.000000000 +0000 @@ -16,6 +16,8 @@ include /usr/share/dpkg/default.mk +export PYBUILD_SYSTEM=flit + %: dh $@ --with python3 --buildsystem=pybuild diff -Nru python-cogent-2020.12.21a+dfsg/debian/salsa-ci.yml python-cogent-2021.10.12a1+dfsg/debian/salsa-ci.yml --- python-cogent-2020.12.21a+dfsg/debian/salsa-ci.yml 2020-11-30 09:40:38.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/debian/salsa-ci.yml 2021-11-04 08:29:04.000000000 +0000 @@ -2,3 +2,8 @@ include: - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/salsa-ci.yml - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/pipeline-jobs.yml +# R creates .rdb files and .rds with some randomness. +# https://tests.reproducible-builds.org/debian/issues/unstable/randomness_in_r_rdb_rds_databases_issue.html +# Thus reprotest is disabled here +variables: + SALSA_CI_DISABLE_REPROTEST: 1 diff -Nru python-cogent-2020.12.21a+dfsg/debian/watch python-cogent-2021.10.12a1+dfsg/debian/watch --- python-cogent-2020.12.21a+dfsg/debian/watch 2020-11-30 09:40:38.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/debian/watch 2021-11-04 08:29:04.000000000 +0000 @@ -1,10 +1,4 @@ version=4 -opts="repacksuffix=+dfsg,dversionmangle=auto,repack,compression=xz" \ - https://github.com/cogent3/cogent3/releases .*/archive/@ANY_VERSION@@ARCHIVE_EXT@ - -# Project moved to Github -# https://bitbucket.org/pycogent3/cogent3/downloads/?tab=tags .*/cogent3/get/@ANY_VERSION@\.tar\.gz - -# This is the old Python2 version -# https://github.com/pycogent/pycogent/releases .*/archive/(\d[\d.-]+)\.(?:tar(?:\.gz|\.bz2)?|tgz) +opts="repacksuffix=+dfsg,dversionmangle=auto,repack,compression=xz,filenamemangle=s%(?:.*?)?v?(\d[\d.]*)\.tar\.gz%@PACKAGE@-$1.tar.gz%" \ + https://github.com/cogent3/cogent3/tags (?:.*?/)?v?@ANY_VERSION@@ARCHIVE_EXT@ diff -Nru python-cogent-2020.12.21a+dfsg/doc/api/index.rst python-cogent-2021.10.12a1+dfsg/doc/api/index.rst --- python-cogent-2020.12.21a+dfsg/doc/api/index.rst 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/doc/api/index.rst 2021-10-12 00:17:34.000000000 +0000 @@ -20,6 +20,7 @@ __init__/cogent3.__init__.load_unaligned_seqs __init__/cogent3.__init__.load_aligned_seqs + __init__/cogent3.__init__.load_delimited __init__/cogent3.__init__.load_table __init__/cogent3.__init__.load_tree @@ -55,6 +56,7 @@ __init__/cogent3.__init__.get_code __init__/cogent3.__init__.get_moltype + __init__/cogent3.__init__.get_model Displaying cogent3 builtins =========================== diff -Nru python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.get_model.rst python-cogent-2021.10.12a1+dfsg/doc/api/__init__/cogent3.__init__.get_model.rst --- python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.get_model.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/doc/api/__init__/cogent3.__init__.get_model.rst 2021-10-12 00:17:34.000000000 +0000 @@ -0,0 +1,6 @@ +get_model +========= + +.. currentmodule:: cogent3.__init__ + +.. autofunction:: get_model diff -Nru python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.load_delimited.rst python-cogent-2021.10.12a1+dfsg/doc/api/__init__/cogent3.__init__.load_delimited.rst --- python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.load_delimited.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/doc/api/__init__/cogent3.__init__.load_delimited.rst 2021-10-12 00:17:34.000000000 +0000 @@ -0,0 +1,6 @@ +load_delimited +============== + +.. currentmodule:: cogent3.__init__ + +.. autofunction:: load_delimited diff -Nru python-cogent-2020.12.21a+dfsg/doc/api/table/classes/cogent3.parse.table.FilteringParser.rst python-cogent-2021.10.12a1+dfsg/doc/api/table/classes/cogent3.parse.table.FilteringParser.rst --- python-cogent-2020.12.21a+dfsg/doc/api/table/classes/cogent3.parse.table.FilteringParser.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/doc/api/table/classes/cogent3.parse.table.FilteringParser.rst 2021-10-12 00:17:34.000000000 +0000 @@ -0,0 +1,9 @@ +FilteringParser +=============== + +.. currentmodule:: cogent3.parse.table + +.. autoclass:: FilteringParser + :members: + :undoc-members: + :inherited-members: \ No newline at end of file diff -Nru python-cogent-2020.12.21a+dfsg/doc/api/table/table.rst python-cogent-2021.10.12a1+dfsg/doc/api/table/table.rst --- python-cogent-2020.12.21a+dfsg/doc/api/table/table.rst 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/doc/api/table/table.rst 2021-10-12 00:17:34.000000000 +0000 @@ -9,3 +9,11 @@ Table Columns + +.. currentmodule:: cogent3.parse.table + +.. autosummary:: + :toctree: classes + :template: class.rst + + FilteringParser diff -Nru python-cogent-2020.12.21a+dfsg/doc/app/app-overview.rst python-cogent-2021.10.12a1+dfsg/doc/app/app-overview.rst --- python-cogent-2020.12.21a+dfsg/doc/app/app-overview.rst 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/doc/app/app-overview.rst 2021-10-12 00:17:34.000000000 +0000 @@ -1,3 +1,5 @@ +.. _apps: + ********************** Overview of using apps ********************** @@ -15,7 +17,7 @@ reader = io.load_aligned(format="fasta", moltype="dna") cpos3 = sample.take_codon_positions(3) - writer = io.write_seqs("path/to/write/thirdpos.zip", format="fasta") + writer = io.write_seqs("path/to/write/thirdpos", format="fasta") Using apps like functions ------------------------- @@ -26,7 +28,7 @@ just3rd = cpos3(data) m = writer(just3rd, identifier="3rdpos_data.fasta") -In the above, ``m`` is a ``DataStoreMember``. The result will be written into the zip archive specified in constructing the ``writer``. +In the above, ``m`` is a ``DataStoreMember``. The result will be written into the directory specified in constructing the ``writer``. Composing a multi-step process from several apps ------------------------------------------------ @@ -50,7 +52,6 @@ Here ``r`` is a list of all the ``DataStoreMember`` instances. - Other important features ------------------------ @@ -68,7 +69,7 @@ process.apply_to(dstore, parallel=True) -By default, this will use all available processors on your machine. If you are running in an mpi environment, you can add the argument ``par_kw=dict(use_mpi=True)``. +By default, this will use all available processors on your machine. If you are running in an mpi environment, you can add the argument ``par_kw=dict(use_mpi=True)``. For more details, see :ref:`parallel`. You can log the settings and data analysed ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff -Nru python-cogent-2020.12.21a+dfsg/doc/app/dstore.rst python-cogent-2021.10.12a1+dfsg/doc/app/dstore.rst --- python-cogent-2020.12.21a+dfsg/doc/app/dstore.rst 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/doc/app/dstore.rst 2021-10-12 00:17:34.000000000 +0000 @@ -71,6 +71,8 @@ The creation of a writeable data store is handled for you by the different writers we provide under ``cogent3.app.io``. +.. warning:: The ``WritableZippedDataStore`` is deprecated. + TinyDB data stores are special ------------------------------ diff -Nru python-cogent-2020.12.21a+dfsg/doc/app/user_function.rst python-cogent-2021.10.12a1+dfsg/doc/app/user_function.rst --- python-cogent-2020.12.21a+dfsg/doc/app/user_function.rst 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/doc/app/user_function.rst 2021-10-12 00:17:34.000000000 +0000 @@ -1,12 +1,12 @@ -Custom composable apps -====================== +Turn your functions into composable apps +======================================== -You can make a simple customised app using the ``user_function`` app. This is a wrapper class that takes a reference to your function and the input, output and data types. The resulting app can then become part of a composed function. +This is super easy -- just use the ``appify`` decorator! This generates a ``user_function`` wrapper class that takes a reference to your function and the input, output and data types. The resulting app can then become part of a composed function. -Defining a ``user_function`` requires you consider four things. +You need four things. ``func`` - A function you have written. This is required. + A function to decorate ... duh! ``input_types`` A type, or collection of type that your function can handle. This setting dictates what other apps have an output that is a compatable input for your function. @@ -20,42 +20,40 @@ A simple example ---------------- -We make a very simple function ``first4``, that returns the first 4 elements of an alignment. +Let's make an app that returns the elements of an alignment up to a specified index, with the index being a keyword argument. We now define a decorated function ``up_to()`` .. jupyter-execute:: - def first4(val): - return val[:4] + from cogent3.app.composable import ALIGNED_TYPE, appify -Now we define a ``user_function`` instance that takes and returns an ``ALIGNED_TYPE``. + @appify(ALIGNED_TYPE, ALIGNED_TYPE, data_types="Alignment") + def up_to(val, index=4): + return val[:index] + +Now we define a ``user_function`` instance that takes and ret +The ``repr()`` of your ``user_function`` instance indicates the wrapped function and the module it's in. .. jupyter-execute:: - from cogent3.app.composable import user_function, ALIGNED_TYPE + up_to - just4 = user_function( - first4, - input_types=ALIGNED_TYPE, - output_types=ALIGNED_TYPE, - data_types="Alignment", - ) - -The ``repr()`` of your ``user_function`` instance indicates the wrapped function and the module it's in. +We create an app instance for a specific value of ``index`` .. jupyter-execute:: - just4 + first4 = up_to(index=4) + first4 -You use it like all composable apps which we demonstrate using a small sample alignment. +You use ``first4()`` like all composable apps, e.g. .. jupyter-execute:: from cogent3 import make_aligned_seqs aln = make_aligned_seqs( - data=dict(a="GCAAGCGTTTAT", b="GCTTTTGTCAAT"), array_align=False + data=dict(a="GCAAGCGTTTAT", b="GCTTTTGTCAAT"), array_align=False, moltype="dna" ) - result = just4(aln) + result = first4(aln) result Renaming sequences @@ -68,46 +66,40 @@ .. jupyter-execute:: from cogent3.app.composable import ( - user_function, ALIGNED_TYPE, SEQUENCE_TYPE, SERIALISABLE_TYPE, + appify, ) - def renamer(aln): + @appify((ALIGNED_TYPE, SEQUENCE_TYPE), SERIALISABLE_TYPE) + def rename_seqs(aln): """upper case names""" return aln.rename_seqs(lambda x: x.upper()) - rename_seqs = user_function( - renamer, - input_types=(ALIGNED_TYPE, SEQUENCE_TYPE), - output_types=SERIALISABLE_TYPE, - data_types=("SequenceCollection", "Alignment", "ArrayAlignment"), - ) - result = rename_seqs(aln) - result.names + renamer = rename_seqs() + result = renamer(aln) + result -A user function for with a different output type ------------------------------------------------- +A user app with a different output type +--------------------------------------- In this example, we make an function that returns ``DistanceMatrix`` of an alignment. .. jupyter-execute:: from cogent3.app.composable import ( - user_function, ALIGNED_TYPE, PAIRWISE_DISTANCE_TYPE, + appify, ) - def _get_dist(aln): - return aln.distance_matrix(calc="hamming", show_progress=False) + @appify(ALIGNED_TYPE, PAIRWISE_DISTANCE_TYPE) + def get_dists(aln, calc="hamming"): + return aln.distance_matrix(calc=calc, show_progress=False) - get_dist = user_function( - _get_dist, - input_types=ALIGNED_TYPE, - output_types=PAIRWISE_DISTANCE_TYPE, - data_types=("Alignment", "ArrayAlignment"), - ) - result = get_dist(aln) - result \ No newline at end of file + percent_dist = get_dists(calc="percent") + result = percent_dist(aln) + result + +.. note:: We omitted the ``data_types`` argument just for demonstration purposes. diff -Nru python-cogent-2020.12.21a+dfsg/doc/conf.py python-cogent-2021.10.12a1+dfsg/doc/conf.py --- python-cogent-2020.12.21a+dfsg/doc/conf.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/doc/conf.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,3 +1,4 @@ +import datetime import os import shutil import sys @@ -63,10 +64,12 @@ master_doc = "index" # General information about the project. +today = datetime.date.today() +year = today.strftime("%Y") project = "cogent3" -copyright = "2020, cogent3 Team" +copyright = f"2020-{year}, cogent3 Team" -release = "2020.12.21a" +release = "2021.10.12a1" version = "" @@ -84,7 +87,10 @@ html_theme_options = { "navbar_title": "Docs", "navbar_site_name": "Sections", - "navbar_links": [("Install", "install"), ("Gallery", "draw/index.html", True),], + "navbar_links": [ + ("Install", "install"), + ("Gallery", "draw/index.html", True), + ], "navbar_class": "navbar navbar-inverse", "navbar_fixed_top": "true", "source_link_position": "skipped", diff -Nru python-cogent-2020.12.21a+dfsg/doc/cookbook/loading_tabular.rst python-cogent-2021.10.12a1+dfsg/doc/cookbook/loading_tabular.rst --- python-cogent-2020.12.21a+dfsg/doc/cookbook/loading_tabular.rst 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/doc/cookbook/loading_tabular.rst 2021-10-12 00:17:34.000000000 +0000 @@ -31,6 +31,34 @@ table = load_table("data/stats.tsv", sep="\t") table +Loading delimited data without a header line +============================================ + +To create a table from the follow examples, you specify your header and use ``make_table()``. + +Using ``load_delimited()`` +-------------------------- + +This is just a standard parsing function which does not do any filtering or converting elements to non-string types. + +.. jupyter-execute:: + + from cogent3.parse.table import load_delimited + + header, rows, title, legend = load_delimited("data/CerebellumDukeDNaseSeq.pk", header=False, sep="\t") + rows[:4] + +Using ``FilteringParser`` +------------------------- + +.. jupyter-execute:: + + from cogent3.parse.table import FilteringParser + + reader = FilteringParser(with_header=False, sep="\t") + rows = list(reader("data/CerebellumDukeDNaseSeq.pk")) + rows[:4] + Selectively loading parts of a big file ======================================= @@ -163,7 +191,7 @@ table["NP_055852", "Region"] -.. note:: The ``index`` argument also applies when using ``make_table()``. +.. note:: The ``index_name`` argument also applies when using ``make_table()``. Create a table from a ``pandas.DataFrame`` ========================================== diff -Nru python-cogent-2020.12.21a+dfsg/doc/cookbook/tables.rst python-cogent-2021.10.12a1+dfsg/doc/cookbook/tables.rst --- python-cogent-2020.12.21a+dfsg/doc/cookbook/tables.rst 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/doc/cookbook/tables.rst 2021-10-12 00:17:34.000000000 +0000 @@ -352,7 +352,7 @@ Get a table of counts as a contingency table ============================================ -If our table consists of counts data, the ``Table`` can convert it into a ``CategoryCount`` instance that can be used for performing basic contingency table statistical tests, e.g. chisquare, G-test of independence, etc.. To do this, we must specify which column contains the row names using the ``index`` argument. +If our table consists of counts data, the ``Table`` can convert it into a ``CategoryCount`` instance that can be used for performing basic contingency table statistical tests, e.g. chisquare, G-test of independence, etc.. To do this, we must specify which column contains the row names using the ``index_name`` argument. .. jupyter-execute:: @@ -369,6 +369,13 @@ g_test = contingency.G_independence() g_test +Alternatively, you could also specify the ``index_name`` of the category column as + +.. jupyter-execute:: + + table = make_table(data={"Ts": [31, 58], "Tv": [36, 138], "": ["syn", "nsyn"]}) + contingency = table.to_categorical(["Ts", "Tv"], index_name="") + Appending tables ================ @@ -770,39 +777,7 @@ from cogent3 import load_table table = load_table("data/stats.tsv") - straight_html = table.to_rich_html(compact=True) - -We can provide customised formatting via a callback function. - -.. jupyter-execute:: - - def format_cell(value, row_num, col_num): - style = 'style="background: rgba(176, 245, 102, 0.25);"' if value else "" - return f"{value}" - - rich_html = table.to_rich_html(row_cell_func=format_cell, compact=False) - -Which produces the following... - -.. jupyter-execute:: - :hide-code: - - from IPython.core.display import HTML - HTML(rich_html) - -We could also use control html element format. - -.. jupyter-execute:: - - element_format = dict(thead=f'') - rich_html = table.to_rich_html(element_formatters=element_format) - -Which produces the following... - -.. jupyter-execute:: - :hide-code: - - HTML(rich_html) + straight_html = table.to_html() What formats can be written? ============================ diff -Nru python-cogent-2020.12.21a+dfsg/doc/data/CerebellumDukeDNaseSeq.pk python-cogent-2021.10.12a1+dfsg/doc/data/CerebellumDukeDNaseSeq.pk --- python-cogent-2020.12.21a+dfsg/doc/data/CerebellumDukeDNaseSeq.pk 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/doc/data/CerebellumDukeDNaseSeq.pk 2021-10-12 00:17:34.000000000 +0000 @@ -0,0 +1,21 @@ +chr1 29214 29566 chr1.1 626 . 0.0724 3.9 -1 159 +chr1 89933 90118 chr1.2 511 . 0.0313 1.59 -1 94 +chr1 545979 546193 chr1.3 543 . 0.0428 2.23 -1 100 +chr1 713797 714639 chr1.4 1000 . 0.3215 16.0 -1 380 +chr1 730348 730656 chr1.5 536 . 0.0401 2.08 -1 146 +chr1 739926 740065 chr1.6 499 . 0.0271 1.36 -1 56 +chr1 752669 753301 chr1.7 587 . 0.0584 3.11 -1 190 +chr1 754039 754513 chr1.8 525 . 0.0361 1.85 -1 130 +chr1 762083 763578 chr1.9 1000 . 0.3678 16.0 -1 849 +chr1 793849 794496 chr1.10 738 . 0.1124 6.2 -1 344 +chr1 805098 805560 chr1.11 760 . 0.1201 6.64 -1 221 +chr1 839130 840574 chr1.12 1000 . 0.3903 16.0 -1 927 +chr1 840756 840987 chr1.13 501 . 0.0276 1.38 -1 107 +chr1 841364 841765 chr1.14 506 . 0.0296 1.49 -1 153 +chr1 856340 856826 chr1.15 690 . 0.0953 5.21 -1 261 +chr1 858947 860832 chr1.16 767 . 0.1228 6.79 -1 306 +chr1 870733 871885 chr1.17 561 . 0.0491 2.58 -1 634 +chr1 872594 872749 chr1.18 498 . 0.0267 1.33 -1 63 +chr1 875353 878884 chr1.19 706 . 0.1011 5.55 -1 375 +chr1 879713 880143 chr1.20 514 . 0.0323 1.64 -1 126 +chr1 887447 887661 chr1.21 500 . 0.0272 1.36 -1 77 diff -Nru python-cogent-2020.12.21a+dfsg/doc/data_file_links.rst python-cogent-2021.10.12a1+dfsg/doc/data_file_links.rst --- python-cogent-2020.12.21a+dfsg/doc/data_file_links.rst 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/doc/data_file_links.rst 2021-10-12 00:17:34.000000000 +0000 @@ -54,3 +54,5 @@ :download:`tbp.jaspar ` :download:`stats.tsv ` + +:download:`CerebellumDukeDNaseSeq.pk ` diff -Nru python-cogent-2020.12.21a+dfsg/doc/doctest2script.py python-cogent-2021.10.12a1+dfsg/doc/doctest2script.py --- python-cogent-2020.12.21a+dfsg/doc/doctest2script.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/doc/doctest2script.py 2021-10-12 00:17:34.000000000 +0000 @@ -8,7 +8,7 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __contributors__ = ["Gavin Huttley", "Peter Maxwell"] __license__ = "BSD-3" __version__ = "2020.2.7a" diff -Nru python-cogent-2020.12.21a+dfsg/doc/doctest_rsts.py python-cogent-2021.10.12a1+dfsg/doc/doctest_rsts.py --- python-cogent-2020.12.21a+dfsg/doc/doctest_rsts.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/doc/doctest_rsts.py 2021-10-12 00:17:34.000000000 +0000 @@ -13,8 +13,9 @@ from cogent3.util.misc import atomic_write + __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" __version__ = "2020.2.7a" diff -Nru python-cogent-2020.12.21a+dfsg/doc/examples/demo-mpi-parallel.py python-cogent-2021.10.12a1+dfsg/doc/examples/demo-mpi-parallel.py --- python-cogent-2020.12.21a+dfsg/doc/examples/demo-mpi-parallel.py 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/doc/examples/demo-mpi-parallel.py 2021-10-12 00:17:34.000000000 +0000 @@ -0,0 +1,64 @@ +import math +import os +import time + +from cogent3.util import parallel + + +# the following environment variable is created by PBS on job execution +PBS_NCPUS = os.environ.get("PBS_NCPUS", None) +if PBS_NCPUS is None: + raise RuntimeError("did not get cpu number from environment") + +PBS_NCPUS = int(PBS_NCPUS) + + +def is_prime(n): + # Postprocess the processor MPI rank to check your job got the resources + # you requested + r = parallel.get_rank() + print(f"MPI Rank: {r}") + + if n % 2 == 0: + return False + + sqrt_n = int(math.floor(math.sqrt(n))) + for i in range(3, sqrt_n + 1, 2): + if n % i == 0: + return False + + return True + + +def main(): + # Each worker will evaluate 20 prime numbers. This is just to slow the + # script down! + PRIMES = ( + [ + 112272535095293, + 112582705942171, + 112272535095293, + 115280095190773, + 115797848077099, + 117450548693743, + 993960000099397, + ] + * PBS_NCPUS + * 20 + ) + + print(f"MPI World size: {parallel.size}\n") + start = time.time() + + result = parallel.map(is_prime, PRIMES, use_mpi=True, max_workers=PBS_NCPUS) + if result != [True] * len(PRIMES): + print(" failed\n") + else: + print(f"{time.time() - start:.2f} seconds\n") + + +if __name__ == "__main__": + # This block is crucial! See + # https://mpi4py.readthedocs.io/en/stable/mpi4py.futures.html + # for why it needs to be done + main() diff -Nru python-cogent-2020.12.21a+dfsg/doc/examples/demo-multiprocess-parallel.py python-cogent-2021.10.12a1+dfsg/doc/examples/demo-multiprocess-parallel.py --- python-cogent-2020.12.21a+dfsg/doc/examples/demo-multiprocess-parallel.py 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/doc/examples/demo-multiprocess-parallel.py 2021-10-12 00:17:34.000000000 +0000 @@ -0,0 +1,39 @@ +import math +import os +import time + +from cogent3.util import parallel + + +def is_prime(n): + r = parallel.get_rank() + print(f"Rank: {r}") + + if n % 2 == 0: + return False + + sqrt_n = int(math.floor(math.sqrt(n))) + for i in range(3, sqrt_n + 1, 2): + if n % i == 0: + return False + + return True + + +PRIMES = [ + 112272535095293, + 112582705942171, + 112272535095293, + 115280095190773, + 115797848077099, + 117450548693743, + 993960000099397, +] * 4 # multiplying just to increase the amount of data to calculate + + +print(f"World size: {parallel.size}\n") + +start = time.time() +result = parallel.map(is_prime, PRIMES, max_workers=4) + +print(f"{time.time() - start:.2f} seconds\n") diff -Nru python-cogent-2020.12.21a+dfsg/doc/examples/index.rst python-cogent-2021.10.12a1+dfsg/doc/examples/index.rst --- python-cogent-2020.12.21a+dfsg/doc/examples/index.rst 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/doc/examples/index.rst 2021-10-12 00:17:34.000000000 +0000 @@ -57,3 +57,11 @@ calculate_UPGMA_cluster phylo_by_ls +********* +Utilities +********* + +.. toctree:: + :maxdepth: 1 + + parallel diff -Nru python-cogent-2020.12.21a+dfsg/doc/examples/parallel.rst python-cogent-2021.10.12a1+dfsg/doc/examples/parallel.rst --- python-cogent-2020.12.21a+dfsg/doc/examples/parallel.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/doc/examples/parallel.rst 2021-10-12 00:17:34.000000000 +0000 @@ -0,0 +1,93 @@ +.. _parallel: + +Parallel computations +===================== + +``cogent3`` supports parallel computation explicitly for the case where the same calculations need to be performed on many different data sets. As an example, consider the case of aligning all the one-to-one orthologs of protein coding genes sampled from 100 vertebrate species where the data for each gene is stored in a separate text file. These files are used as input for an alignment algorithm that will produce a corresponding output file. In other words, applying the alignment algorithm to ``"homologs1.fasta"`` produces ``"aligned-homologs1.fasta"``. + +We could perform the alignments in serial, one after the other, on one CPU core of a single computer. But what if we have 18,000 such files? If we had 18,000 CPUs then we could assign one alignment task to each file and be done in the same time as aligning a single file! This case is an example of "data parallelism" or "data level parallelism". + +There are multiple algorithmic approaches to solving parallel computation problems. The approach ``cogent3`` adopts is that of a master process and helper (or worker) processes. The master process splits the work up amongst the available CPU cores. Using our alignment example, the master process assigns sets of files to each worker CPU core. Each worker then performs the alignment step on its designated files and returns each alignment to the master process. + +.. warning:: + + It is not always faster to split tasks between processes. You should see a performance gain if the calculation time per task of the worker is significantly greater than the time it will take the master process to deal with the result -- in our example, the time it takes to write the alignment to file. + + While the alignment problem indicated above stipulated writing all results to separate files, this is not always a good idea. It can prove very inefficient if the individual alignment files are small. In such a case, storing the result in a single file (e.g. as a ``tinydb`` database) is better. + +Parallel computation on a single computer +----------------------------------------- + +This is the simplest case to implement, requires no additional software installs and will work with standalone scripts or within Jupyter notebooks. For this use case, ``cogent3.util.parallel`` uses the Python standard library ``concurrent.futures`` module. + +Using ``app.apply_to()`` +^^^^^^^^^^^^^^^^^^^^^^^^ + +If you are using ``cogent3`` :ref:`composable apps `, then the simplest approach is to use the ``apply_to()`` method. The conditions of parallel execution are controlled using the keyword arguments ``parallel`` and ``par_kw``. The former indicates parallel execution is to be undertaken. The latter is how additional arguments are provided to ``parallel.map()``. For instance, using 4 workers would be specified as: + +.. code-block:: python + + results = app.apply_to(data, parallel=True, par_kw=dict(max_workers=4)) + +.. note:: If you are using mpi, set ``par_kw=dict(max_workers=4, use_mpi=True)``. + +Directly using ``cogent3.util.parallel.map()`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This function behaves like the standard python builtin ``map()`` except it enables distribution of calculations across CPUs. The demo script shown below calculates a small number of prime numbers by splitting chunks of numbers across the provided cores. The key line is + +.. code-block:: python + + result = parallel.map(is_prime, PRIMES, max_workers=4) + +The first argument, ``is_prime``, is the function to be called with values from the data, ``PRIMES``. The ``max_workers`` argument indicates how many worker processes to use. The elements of ``PRIMES`` will be broken into ``max_workers`` number of equal sized chunks. Each such chunk is applied to ``is_prime`` on a separate CPU. In this case, the returned results will be a series of ``bool`` values. + +.. note:: If you don't specify ``max_workers``, all available CPUs will be used. + +.. literalinclude:: demo-multiprocess-parallel.py + +Parallel computation on multiple computers +------------------------------------------ + +On systems consisting of multiple separate computers, we use the mpi4py_ bindings to the message passing interface (MPI) standard. Specifically, ``cogent3.util.parallel.map(..., use_mpi=True, ...)`` uses the `mpi4py futures`_ module of mpi4py_. This module is modelled after that of ``concurrent.futures`` but using it has some important differences. + +First, you must install additional software. You will need to install a tool implementing the MPI system (e.g. `openmpi `_) and the MPI python bindings library ``mpi4py``. To install ``openmpi``, you can use conda, homebrew or your preferred package manager. You can just pip install ``mpi4py``. + +Second, as described in the documentation on `mpi4py futures`_, you need to write your code slightly differently. We provide an example that runs on a supercomputer. To execute a program on this facility, we submit a "job" to a "queuing system" (e.g. `PBS `_) which controls the scheduling of our job with the computing resources we requested (how many CPUs, how much RAM, etc..). There are many such job control systems and the specifics of how to select the resources your job needs can vary between them. In general, however, our experience is the user writes two scripts. + +1. a script performing the computations you actually care about +2. a bash script for the queuing system setting out the job parameters and invoking (1) + +The example code presented below is based on the ``mpi4py`` demo script for computing prime numbers. In addition to validating the prime numbers, it also prints out the "MPI rank" of the processor [1]_. The script relies on the environment variable, ``PBS_NCPUS`` [2]_, to establish the number of CPUs that are available. It prints to stdout, the rank of each processor [3]_. + +To execute this script as part of a PBS job script you need to use the following command:: + +$ mpiexec -n $PBS_NCPUS python3 -m mpi4py.futures demo-mpi-parallel.py + +.. note:: + + To execute it directly with 4 CPUs do:: + + $ PBS_NCPUS=4 mpiexec -n 4 python3 -m mpi4py.futures demo-mpi-parallel.py + +The ``-n`` argument tells ``mpiexec`` to use this number of CPUs. + +In the ``demo-mpi-parallel.py`` script, the key line is + +.. code-block:: python + + result = parallel.map(is_prime, PRIMES, use_mpi=True, max_workers=PBS_CPUS) + +The ``use_mpi`` argument invokes the correct back end, otherwise the interface is the same as described above. + +.. note:: You can use ``mpi`` for parallel execution on a single computer. This can be useful for checking your code prior to migrating to a larger system. + +.. literalinclude:: demo-mpi-parallel.py + +.. _mpi4py futures: https://mpi4py.readthedocs.io/en/stable/mpi4py.futures.html +.. _mpi4py: https://mpi4py.readthedocs.io/ + +.. [1] On MPI, the main process has rank 0, all others have rank > 0. +.. [2] This environment variable is created by the PBS system on executing the job script. +.. [3] You can check your execution of the script is correct by validating you get all the ranks up to one minus the number of CPUs you requested. + diff -Nru python-cogent-2020.12.21a+dfsg/doc/set_working_directory.py python-cogent-2021.10.12a1+dfsg/doc/set_working_directory.py --- python-cogent-2020.12.21a+dfsg/doc/set_working_directory.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/doc/set_working_directory.py 2021-10-12 00:17:34.000000000 +0000 @@ -6,7 +6,7 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" __version__ = "2020.2.7a" diff -Nru python-cogent-2020.12.21a+dfsg/.github/workflows/testing_develop.yml python-cogent-2021.10.12a1+dfsg/.github/workflows/testing_develop.yml --- python-cogent-2020.12.21a+dfsg/.github/workflows/testing_develop.yml 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/.github/workflows/testing_develop.yml 2021-10-12 00:17:34.000000000 +0000 @@ -14,74 +14,32 @@ strategy: matrix: os: [macos-latest, ubuntu-latest, windows-latest] - python-version: [3.6, 3.7, 3.8] + python-version: [3.7, 3.8, 3.9] steps: - uses: "actions/checkout@v2" - - # caching - - uses: actions/cache@v2 - if: startsWith(runner.os, 'Linux') - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} - ${{ runner.os }}-pip- - - - uses: actions/cache@v2 - if: startsWith(runner.os, 'macOS') - with: - path: ~/Library/Caches/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} - ${{ runner.os }}-pip- - - - uses: actions/cache@v2 - if: startsWith(runner.os, 'Windows') with: - path: ~\AppData\Local\pip\Cache - key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} - ${{ runner.os }}-pip- + fetch-depth: 0 - - uses: "actions/setup-python@v1" + # Setup env + - uses: "actions/setup-python@v2" with: - python-version: "${{ matrix.python-version }}" + python-version: "${{ matrix.python-version }}" - # Setup virtualenv - - name: "Setup virtualenv for ${{ matrix.python-version }} on Linux or MacOS" - if: startsWith(runner.os, 'macOS') || startsWith(runner.os, 'Linux') + - name: "Installs for ${{ matrix.python-version }}" run: | python --version - python -m venv .venv - source .venv/bin/activate - python -m pip install --upgrade pip - python -m pip install -r requirements.txt - - - name: "Run tox targets for ${{ matrix.python-version }} on Linux or MacOS" - if: startsWith(runner.os, 'macOS') || startsWith(runner.os, 'Linux') - shell: bash -l {0} - run: | - source .venv/bin/activate - python -m tox - ls - ls tests - - - name: "Install requirements and run tox targets for ${{ matrix.python-version }} on Windows" - if: startsWith(runner.os, 'Windows') - shell: bash -l {0} - run: | - python -m pip install --upgrade pip + python -m pip install --upgrade pip wheel setuptools flit + python -m pip install --upgrade tox tox-gh-actions python -m pip install -r requirements.txt - python -m tox - ls - ls tests + - name: "Run tox targets for ${{ matrix.python-version }}" + run: tox + env: + PLATFORM: ${{ matrix.platform }} + - name: Upload coverage to Codecov uses: codecov/codecov-action@v1 with: file: ./tests/junit-*.xml - fail_ci_if_error: true \ No newline at end of file + fail_ci_if_error: true diff -Nru python-cogent-2020.12.21a+dfsg/.gitignore python-cogent-2021.10.12a1+dfsg/.gitignore --- python-cogent-2020.12.21a+dfsg/.gitignore 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/.gitignore 2021-10-12 00:17:34.000000000 +0000 @@ -17,14 +17,16 @@ .installed.cfg lib lib64 +doc/draw # Installer logs pip-log.txt # Unit test / coverage reports -.coverage +.coverage* .tox coverage.xml +junit-*.xml nosetests.xml tests/draw_results @@ -38,6 +40,8 @@ .idea/* .DS_Store __pycache__ +*.code-workspace +*.wpu # vi .*.swp diff -Nru python-cogent-2020.12.21a+dfsg/.hgignore python-cogent-2021.10.12a1+dfsg/.hgignore --- python-cogent-2020.12.21a+dfsg/.hgignore 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/.hgignore 2021-10-12 00:17:34.000000000 +0000 @@ -15,7 +15,7 @@ *htmlcov* *.idea draw_results -*.coverage +*.coverage* *egg-info* *.wpu .cache* @@ -28,4 +28,8 @@ *.vscode *.code-workspace coverage.xml -__pycache__ \ No newline at end of file +__pycache__ +junit-*.xml +doc/draw* +dist/* +working/* \ No newline at end of file diff -Nru python-cogent-2020.12.21a+dfsg/.hgtags python-cogent-2021.10.12a1+dfsg/.hgtags --- python-cogent-2020.12.21a+dfsg/.hgtags 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/.hgtags 2021-10-12 00:17:34.000000000 +0000 @@ -16,3 +16,7 @@ 6f2c23976dd39e605fc1bd82d3092013a9ebd99c 2019.12.6a 453a7de7f08b2e18ac73924492fbdb344188a048 2020.6.30a0 743ea31a2fdef619b87165267e90a48523e8fceb 2020.12.14a +31dd23a514abd6fc973dfa575387c9cbad2bc27e 2020.12.21a +b306abbf306afd5b63932244b816862210bdc371 2021.04.20a +9b3797a262b6c880803b32f7633ae3b9ff831f6c 2021.5.7a +a206748ce4632ae95320494f55612def65d2055c 2021.10.12a diff -Nru python-cogent-2020.12.21a+dfsg/LICENSE python-cogent-2021.10.12a1+dfsg/LICENSE --- python-cogent-2020.12.21a+dfsg/LICENSE 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/LICENSE 2021-10-12 00:17:34.000000000 +0000 @@ -1,4 +1,4 @@ -Copyright 2019-2020 Gavin Huttley +Copyright 2019-2021 Gavin Huttley Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff -Nru python-cogent-2020.12.21a+dfsg/MANIFEST.in python-cogent-2021.10.12a1+dfsg/MANIFEST.in --- python-cogent-2020.12.21a+dfsg/MANIFEST.in 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/MANIFEST.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,3 +0,0 @@ -include MANIFEST.in LICENSE -include requirements.txt -recursive-include src * diff -Nru python-cogent-2020.12.21a+dfsg/pyproject.toml python-cogent-2021.10.12a1+dfsg/pyproject.toml --- python-cogent-2020.12.21a+dfsg/pyproject.toml 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/pyproject.toml 2021-10-12 00:17:34.000000000 +0000 @@ -1,10 +1,120 @@ [build-system] -requires = ["setuptools", "wheel"] -build-backend = "setuptools.build_meta" +requires = ["flit_core >=2,<4"] +build-backend = "flit_core.buildapi" + +[tool.flit.metadata] +module = "cogent3" +author = "Gavin Huttley" +author-email = "Gavin.Huttley@anu.edu.au" +home-page = "https://cogent3.org" +keywords = "biology, genomics, statistics, phylogeny, evolution, bioinformatics" +description-file = "README.md" +license = "BSD-3" +requires-python = ">=3.7" +requires = ["chardet", + "numpy", + "numba>0.48.0;python_version<'3.9'", + "numba>0.53; python_version>='3.9'", + "scitrack", + "tqdm", + "tinydb"] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: BSD License", + "Topic :: Scientific/Engineering :: Bio-Informatics", + "Topic :: Software Development :: Libraries :: Python Modules", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + ] + +[tool.flit.metadata.urls] +Documentation = "https://www.cogent3.org/" +"Bug Tracker" = "https://github.com/cogent3/cogent3/issues" +"Source Code" = "https://github.com/cogent3/cogent3" +Changelog = "https://github.com/cogent3/cogent3/blob/develop/changelog.md" + +[tool.flit.sdist] +include = ["doc/", "requirements.txt", "src/*", "pyproject.toml"] +exclude = ["doc/*.html"] + +[tool.flit.metadata.requires-extra] +test = [ + "black", + "isort", + "pytest", + "pytest-cov", + "tox"] +doc = ["click", + "ipykernel", + "ipython", + "ipywidgets", + "jupyter-sphinx", + "jupyter_client", + "jupyterlab", + "jupytext", + "kaleido", + "matplotlib", + "nbconvert", + "nbconvert!=5.4", + "nbformat", + "nbsphinx", + "numpydoc", + "pillow", + "plotly", + "psutil", + "sphinx", + "sphinx-autobuild", + "sphinx-gallery==0.7", + "sphinx>=1.6", + "sphinx_bootstrap_theme", + "sphinx_panels", + "sphinxcontrib-bibtex<2.0.0"] +extra = ["pandas", "plotly", "psutil", "kaleido"] +dev = ["black", + "click", + "flit", + "ipykernel", + "ipython", + "ipywidgets", + "isort", + "jupyter-sphinx", + "jupyter_client", + "jupyterlab", + "jupytext", + "kaleido", + "matplotlib", + "nbconvert", + "nbconvert!=5.4", + "nbformat", + "nbsphinx", + "numpydoc", + "pandas", + "pillow", + "plotly", + "psutil", + "pytest", + "pytest-cov", + "sphinx", + "sphinx-autobuild", + "sphinx-gallery==0.7", + "sphinx>=1.6", + "sphinx_bootstrap_theme", + "sphinx_panels", + "sphinxcontrib-bibtex", + "sphinxcontrib-bibtex<2.0.0", + "tox"] + +[tool.pytest.ini_options] +testpaths = [ + "tests", +] [tool.black] line-length = 88 -target-version = ['py36', 'py37', 'py38'] +target-version = ['py37', 'py38', 'py39'] exclude = ''' /( \.eggs diff -Nru python-cogent-2020.12.21a+dfsg/README.md python-cogent-2021.10.12a1+dfsg/README.md --- python-cogent-2020.12.21a+dfsg/README.md 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/README.md 2021-10-12 00:17:34.000000000 +0000 @@ -1,8 +1,8 @@ [![Build Status](https://github.com/cogent3/cogent3/workflows/CI/badge.svg?branch=develop)](https://github.com/cogent3/cogent3/actions?workflow=CI) [![codecov](https://codecov.io/gh/cogent3/cogent3/branch/master/graph/badge.svg)](https://codecov.io/gh/cogent3/cogent3) ![Using Black Formatting](https://img.shields.io/badge/code%20style-black-000000.svg) -[![Python 3.6+](https://img.shields.io/badge/python-3.6+-blue.svg)](https://www.python.org/downloads/release/python-360/) - +[![Python 3.7+](https://img.shields.io/badge/python-3.7+-blue.svg)](https://www.python.org/downloads/release/python-370/) +[![Language grade: Python](https://img.shields.io/lgtm/grade/python/g/cogent3/cogent3.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/cogent3/cogent3/context:python) ## `cogent3` diff -Nru python-cogent-2020.12.21a+dfsg/requirements.txt python-cogent-2021.10.12a1+dfsg/requirements.txt --- python-cogent-2020.12.21a+dfsg/requirements.txt 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/requirements.txt 2021-10-12 00:17:34.000000000 +0000 @@ -1,6 +1,8 @@ -pillow==8.0.1 -psutil==5.7.3 -scipy==1.5.4 -tox==3.20.1 -tox-gh-actions==2.2.0 -.[dev] \ No newline at end of file +chardet==4.0.0 +pillow>=8.3.2 +psutil==5.8.0 +scipy==1.5.4; python_version < '3.7' +scipy==1.6.1; python_version > '3.6' +tox==3.23.1 +tox-gh-actions==2.5.0 +.[dev] diff -Nru python-cogent-2020.12.21a+dfsg/setup.py python-cogent-2021.10.12a1+dfsg/setup.py --- python-cogent-2020.12.21a+dfsg/setup.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/setup.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,117 +0,0 @@ -#!/usr/bin/env python -import pathlib -import sys - -from setuptools import find_packages, setup - - -__author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" -__contributors__ = [ - "Peter Maxwell", - "Gavin Huttley", - "Matthew Wakefield", - "Greg Caporaso", - "Daniel McDonald", -] -__license__ = "BSD-3" -__version__ = "2020.12.21a" -__maintainer__ = "Gavin Huttley" -__email__ = "Gavin.Huttley@anu.edu.au" -__status__ = "Production" - -# Check Python version, no point installing if unsupported version inplace -min_version = (3, 6) -if sys.version_info < min_version: - py_version = ".".join([str(n) for n in sys.version_info]) - msg = ( - f"Python-{'.'.join(min_version)} or greater is required, " - f"Python-{py_version} used." - ) - raise RuntimeError(msg) - - -# On windows with no commandline probably means we want to build an installer. -if sys.platform == "win32" and len(sys.argv) < 2: - sys.argv[1:] = ["bdist_wininst"] - - -short_description = "COmparative GENomics Toolkit 3" - -readme_path = pathlib.Path(__file__).parent / "README.md" - -long_description = readme_path.read_text() - -PACKAGE_DIR = "src" - -PROJECT_URLS = { - "Documentation": "https://www.cogent3.org/", - "Bug Tracker": "https://github.com/cogent3/cogent3/issues", - "Source Code": "https://github.com/cogent3/cogent3", -} - -setup( - name="cogent3", - version=__version__, - url="https://github.com/cogent3/cogent3", - author="Gavin Huttley", - author_email="gavin.huttley@anu.edu.au", - description=short_description, - long_description=long_description, - long_description_content_type="text/markdown", - platforms=["any"], - license=["BSD"], - keywords=[ - "biology", - "genomics", - "statistics", - "phylogeny", - "evolution", - "bioinformatics", - ], - classifiers=[ - "Development Status :: 5 - Production/Stable", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: BSD License", - "Topic :: Scientific/Engineering :: Bio-Informatics", - "Topic :: Software Development :: Libraries :: Python Modules", - "Operating System :: OS Independent", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - ], - packages=find_packages(where="src"), - package_dir={"": PACKAGE_DIR}, - install_requires=["numba>0.48.0", "numpy", "scitrack", "tqdm", "tinydb"], - extras_require={ - "dev": [ - "black", - "click", - "ipykernel", - "ipywidgets", - "isort", - "jupyter-sphinx", - "jupyter_client", - "jupyterlab", - "jupytext", - "kaleido", - "nbconvert", - "nbformat", - "nbsphinx", - "numpydoc", - "pandas", - "plotly", - "psutil", - "pytest", - "pytest-cov", - "pytest>=4.3.0", - "sphinx", - "sphinx-autobuild", - "sphinxcontrib-bibtex", - "sphinx_panels", - "tox", - ], - "extra": ["pandas", "plotly", "psutil", "kaleido"], - }, - project_urls=PROJECT_URLS, -) diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/align/align.py python-cogent-2021.10.12a1+dfsg/src/cogent3/align/align.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/align/align.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/align/align.py 2021-10-12 00:17:34.000000000 +0000 @@ -10,10 +10,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/align/compare_numba.py python-cogent-2021.10.12a1+dfsg/src/cogent3/align/compare_numba.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/align/compare_numba.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/align/compare_numba.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,27 +1,16 @@ -import numba import numpy as np -from numba import int64, njit -from numba.core.types import List -from numba.core.types.containers import Tuple - __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Stephen Ma"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" -# @njit( -# List(dtype=Tuple(types=(int64, int64)))( -# numba.typeof(b"seq"), numba.typeof(b"seq"), int64, int64, int64, int64 -# ), -# cache=True, -# ) def segments_from_diagonal(seq1, seq2, window, threshold, min_gap_length, diagonal): assert window < 100 scores = np.zeros(window) diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/align/dp_calculation.py python-cogent-2021.10.12a1+dfsg/src/cogent3/align/dp_calculation.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/align/dp_calculation.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/align/dp_calculation.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,7 +1,3 @@ -#!/usr/bin/env/python - -import numpy - from cogent3.align import indel_model, pairwise from cogent3.maths.markov import SiteClassTransitionMatrix from cogent3.recalculation.definition import ( @@ -9,16 +5,15 @@ CalculationDefn, NonParamDefn, PartitionDefn, - PositiveParamDefn, ProbabilityParamDefn, ) __author__ = "Gavin Huttley and Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttleuy" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/align/indel_model.py python-cogent-2021.10.12a1+dfsg/src/cogent3/align/indel_model.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/align/indel_model.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/align/indel_model.py 2021-10-12 00:17:34.000000000 +0000 @@ -7,10 +7,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/align/indel_positions.py python-cogent-2021.10.12a1+dfsg/src/cogent3/align/indel_positions.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/align/indel_positions.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/align/indel_positions.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,10 +1,8 @@ -#!/usr/bin/env python - __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -25,15 +23,13 @@ for dim in [0, 1]: align_builder.add_skipped(dim, upto[dim], len(pogs[dim])) - result_pog = align_builder.get_pog() - - return result_pog + return align_builder.get_pog() class POGBuilder(object): def __init__(self, children): self.children = children - self.remap = [{} for child in children] + self.remap = [{} for _ in children] self.started = [False, False] self.last = [None, None] self.result = [[]] diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/align/__init__.py python-cogent-2021.10.12a1+dfsg/src/cogent3/align/__init__.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/align/__init__.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/align/__init__.py 2021-10-12 00:17:34.000000000 +0000 @@ -22,10 +22,10 @@ ] __author__ = "" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Jeremy Widmann", "Gavin Huttley", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/align/pairwise_pogs_numba.py python-cogent-2021.10.12a1+dfsg/src/cogent3/align/pairwise_pogs_numba.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/align/pairwise_pogs_numba.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/align/pairwise_pogs_numba.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,15 +1,14 @@ -import numba import numpy as np -from numba import boolean, float64, int32, int64, njit, optional, types, uint8 +from numba import boolean, float64, int64, njit, optional, uint8 from numba.core.types.containers import Tuple __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Stephen Ma"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -83,21 +82,16 @@ source_row_index_cache = np.zeros(256) N = max(T.shape[0], T.shape[1]) - row_count = plan.shape[0] dest_states = max(0, state_directions.shape[0]) - d4 = max(4, state_directions.shape[1]) row_count = x_index.shape[0] row_length = y_index.shape[0] - bin_count = match_scores.shape[0] max_x = match_scores.shape[1] max_y = match_scores.shape[2] - bin_count = max(xgap_scores.shape[0], bin_count) max_x = max(xgap_scores.shape[1], max_x) - bin_count = max(ygap_scores.shape[0], bin_count) max_y = max(ygap_scores.shape[1], max_y) for i in range(row_count): @@ -108,20 +102,10 @@ assert j_low >= 0 and j_high > j_low and j_high <= row_length - j_link_count = j_sources.shape[0] - row_length1 = row_length + 1 - row_length1 = max(row_length1, j_sources_offsets.shape[0]) - - i_link_count = i_sources.shape[0] - row_count1 = row_count + 1 - row_count1 = max(row_count1, i_sources_offsets.shape[0]) - - tmp_rows = mantissas.shape[0] row_length = max(mantissas.shape[1], row_length) N = max(mantissas.shape[2], N) if use_scaling: - tmp_rows = max(exponents.shape[0], tmp_rows) row_length = max(exponents.shape[1], row_length) N = max(exponents.shape[2], N) @@ -131,8 +115,6 @@ impossible = 0.0 if viterbi and track is not None and track_enc is not None: - row_count = max(track.shape[0], row_count) - row_length = max(track.shape[1], row_length) N = max(track.shape[2], N) (tcode_x, tcode_y, tcode_s) = track_enc else: diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/align/pairwise.py python-cogent-2021.10.12a1+dfsg/src/cogent3/align/pairwise.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/align/pairwise.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/align/pairwise.py 2021-10-12 00:17:34.000000000 +0000 @@ -29,10 +29,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -687,7 +687,7 @@ to_end = numpy.array([(len(T) - 1, 0, 0, 0)]) for (state, (i, j)) in cells: if i > last_i: - rr = pair.calc_rows( + _ = pair.calc_rows( last_i + 1, i + 1, 0, @@ -816,8 +816,6 @@ memory = problem_size * encoder.bytes / 10 ** 6 if dp_options.local: msg = "Local alignment" - elif cells is not None: - msg = "Posterior probs" elif ( self.pair.size[0] - 2 >= 3 and not backward diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/align/pairwise_seqs_numba.py python-cogent-2021.10.12a1+dfsg/src/cogent3/align/pairwise_seqs_numba.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/align/pairwise_seqs_numba.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/align/pairwise_seqs_numba.py 2021-10-12 00:17:34.000000000 +0000 @@ -5,10 +5,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Stephen Ma"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -81,21 +81,16 @@ MIN_FLOAT_VALUE = 1.0 / SCALE_STEP N = max(T.shape[0], T.shape[1]) - row_count = plan.shape[0] dest_states = max(0, state_directions.shape[0]) - d4 = max(4, state_directions.shape[1]) row_count = x_index.shape[0] row_length = y_index.shape[0] - bin_count = match_scores.shape[0] max_x = match_scores.shape[1] max_y = match_scores.shape[2] - bin_count = max(xgap_scores.shape[0], bin_count) max_x = max(xgap_scores.shape[1], max_x) - bin_count = max(ygap_scores.shape[0], bin_count) max_y = max(ygap_scores.shape[1], max_y) for i in range(row_count): @@ -106,12 +101,10 @@ assert j_low >= 0 and j_high > j_low and j_high <= row_length - tmp_rows = mantissas.shape[0] row_length = max(mantissas.shape[1], row_length) N = max(mantissas.shape[2], N) if use_scaling: - tmp_rows = max(exponents.shape[0], tmp_rows) row_length = max(exponents.shape[1], row_length) N = max(exponents.shape[2], N) @@ -121,8 +114,6 @@ impossible = 0.0 if viterbi and track is not None and track_enc is not None: - row_count = max(track.shape[0], row_count) - row_length = max(track.shape[1], row_length) N = max(track.shape[2], N) (tcode_x, tcode_y, tcode_s) = track_enc else: diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/align/progressive.py python-cogent-2021.10.12a1+dfsg/src/cogent3/align/progressive.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/align/progressive.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/align/progressive.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,18 +1,14 @@ -#!/usr/bin/env python - - from cogent3 import make_tree -from cogent3.core.info import Info from cogent3.evolve.distance import EstimateDistances from cogent3.phylo import nj as NJ from cogent3.util import progress_display as UI __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/align/pycompare.py python-cogent-2021.10.12a1+dfsg/src/cogent3/align/pycompare.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/align/pycompare.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/align/pycompare.py 2021-10-12 00:17:34.000000000 +0000 @@ -8,10 +8,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/align/traceback.py python-cogent-2021.10.12a1+dfsg/src/cogent3/align/traceback.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/align/traceback.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/align/traceback.py 2021-10-12 00:17:34.000000000 +0000 @@ -7,10 +7,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Rob Knight", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/app/align.py python-cogent-2021.10.12a1+dfsg/src/cogent3/app/align.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/app/align.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/app/align.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,5 +1,7 @@ import warnings +from bisect import bisect_left + from cogent3 import make_tree from cogent3.align import ( global_pairwise, @@ -8,6 +10,8 @@ ) from cogent3.align.progressive import TreeAlign from cogent3.app import dist +from cogent3.core.alignment import Aligned, Alignment +from cogent3.core.location import gap_coords_to_map from cogent3.core.moltype import get_moltype from cogent3.evolve.models import get_model @@ -22,36 +26,315 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" -class _GapInRef: - """assumes first element of series is reference, returns True if that matches - gap_state""" - - def __init__(self, moltype, gap): - self.gap_state = moltype.alphabet.to_indices(gap)[0] - self.func = self._ref_gap if gap == "-" else self._array_ref_gap +class _GapOffset: + """computes sum of gap lengths preceding a position. Acts like a dict + for getting the offset for an integer key with the __getitem__ returning + the offset. + If your coordinate is an alignment position, set invert=True. + Examples + -------- + From sequence coordinate to an alignment coordinate + + >>> seq2aln = _GapOffset({1:3, 7:1}) + >>> seq_pos = 2 + >>> aln_pos = seq_pos + seq2aln[seq_pos] + >>> aln_pos + 5 + + From alignment coordinate to a sequence coordinate + + >>> aln2seq = _GapOffset({1:3, 7:1}, invert=True) + >>> seq_pos = aln_pos - aln2seq[seq_pos] + >>> seq_pos + 2 + """ - def _ref_gap(self, x): - return x[0] != self.gap_state + def __init__(self, gaps_lengths, invert=False): + """ + Parameters + ---------- + gaps_lengths : dict + {pos: length, ...} where pos is a gap insert position and length + how long the gap is. + invert : bool + if True, query keys are taken as being in alignment coordinates + """ + offset = 0 + min_val = None + result = {} + k = -1 + for k, l in sorted(gaps_lengths.items()): + if invert: + result[k + offset + l] = offset + l + result[k + offset] = offset + else: + result[k] = offset + + offset += l + if min_val is None: + min_val = k + + self._store = result + self.min_pos = min_val + self.max_pos = k + offset if invert else k + self.total = offset + self._ordered = None + self._invert = invert + + def __repr__(self): + return repr(self._store) + + def __str__(self): + return str(self._store) + + def __getitem__(self, k): + if not self._store: + return 0 + + if k in self._store: + return self._store[k] + + if k < self.min_pos: + return 0 + + if k > self.max_pos: + return self.total + + if self._ordered is None: + self._ordered = sorted(self._store) + + # k is definitely bounded by min and max positions here + i = bisect_left(self._ordered, k) + pos = self._ordered[i] + if self._invert: + pos = pos if pos in [k, 0] else self._ordered[i - 1] + return self._store[pos] + + +def _gap_union(seqs) -> dict: + """returns the union of all gaps in seqs""" + seq_name = None + all_gaps = {} + for seq in seqs: + if not isinstance(seq, Aligned): + raise TypeError(f"must be Aligned instances, not {type(seq)}") + if seq_name is None: + seq_name = seq.name + if seq.name != seq_name: + raise ValueError("all sequences must have the same name") + + gaps_lengths = dict(seq.map.get_gap_coordinates()) + all_gaps = _merged_gaps(all_gaps, gaps_lengths) + return all_gaps + + +def _gap_difference(seq_gaps: dict, union_gaps: dict) -> tuple: + """ + + Parameters + ---------- + seq_gaps + {gap pos: length, } of a sequence used to generate the gap union + union_gaps + {gap pos: maximum length, ...} derived from the same seq aligned + to different sequences + + Returns + ------- + gaps missing from seq_gaps, seq_gaps that overlap with union gaps + """ + missing = {} + overlapping = {} + for position, length in union_gaps.items(): + if position not in seq_gaps: + missing[position] = length + elif seq_gaps[position] != length: + overlapping[position] = length - seq_gaps[position] + return missing, overlapping + + +def _merged_gaps(a_gaps: dict, b_gaps: dict) -> dict: + """merges gaps that occupy same position + + Parameters + ---------- + a_gaps, b_gaps + [(gap position, length),...] + + Returns + ------- + Merged places as {gap position: length, ...} + + Notes + ----- + If a_gaps and b_gaps are from the same underlying sequence, set + function to 'max'. Use 'sum' when the gaps derive from different + sequences. + """ + + if not a_gaps: + return b_gaps + + if not b_gaps: + return a_gaps + + places = set(a_gaps) | set(b_gaps) + return { + place: max( + a_gaps.get(place, 0), + b_gaps.get(place, 0), + ) + for place in places + } - def _array_ref_gap(self, x): - return x.flatten()[0] != self.gap_state - def __call__(self, x): - return self.func(x) +def _subset_gaps_to_align_coords( + subset_gaps: dict, orig_gaps: dict, seq_2_aln: _GapOffset +) -> dict: + """compute alignment coords of subset gaps + + Parameters + ---------- + subset_gaps : dict + {position: length delta} lengths are the adjusted gap lengths + orig_gaps : dict + {position: orig length} the original gap lengths are from a pairwise + alignment + seq_2_aln : dict + {seq position: alignment position, ...} + + Returns + ------- + dict + {alignment position + orig length: length delta, ...} + + Notes + ----- + """ + result = {} + for p in subset_gaps: + offset = seq_2_aln[p] + result[offset + p + orig_gaps[p]] = subset_gaps[p] + + return result + + +def _combined_refseq_gaps(seq_gaps: dict, union_gaps: dict) -> dict: + # takes union gaps and refseq gaps, converts into diffs and + # subset diffs + seq2aln = _GapOffset(seq_gaps) + diff_gaps, subset_gaps = _gap_difference(seq_gaps, union_gaps) + align_coord_gaps = _subset_gaps_to_align_coords(subset_gaps, seq_gaps, seq2aln) + align_coord_gaps.update({p + seq2aln[p]: diff_gaps[p] for p in diff_gaps}) + return align_coord_gaps + + +def _gaps_for_injection(other_seq_gaps: dict, refseq_gaps: dict, seqlen: int) -> dict: + """projects refseq aligned gaps into otherseq + + Parameters + ---------- + other_seq_gaps : dict + {gap in other seq position: gap length} + refseq_gaps : dict + {gap as alignment position: gap length} + seqlen : int + length of sequence being injected into + + Returns + ------- + dict + {gap in other seq position: gap length} + """ + aln2seq = _GapOffset(other_seq_gaps, invert=True) + # to inject a gap means to convert it from alignment coordinates into + # sequence coordinates + # we probably need to include the refseq gap union because we need to + # establish whether a refseq gap overlaps with a gap in other seq + # and + all_gaps = {} + all_gaps.update(other_seq_gaps) + for gap_pos, gap_length in sorted(refseq_gaps.items()): + offset = aln2seq[gap_pos] + gap_pos -= offset + gap_pos = min(seqlen, gap_pos) + if gap_pos < 0: + raise ValueError( + f"computed gap_pos {gap_pos} < 0, correct reference sequence?" + ) + if gap_pos in all_gaps: + gap_length += all_gaps[gap_pos] + + all_gaps[gap_pos] = gap_length + + return all_gaps + + +def pairwise_to_multiple(pwise, ref_seq, moltype, info=None): + """ + turns pairwise alignments to a reference into a multiple alignment + + Parameters + ---------- + pwise + Series of pairwise alignments to ref_seq as + [(non-refseq name, aligned pair), ...] + ref_seq + The sequence common in all pairwise alignments + moltype + molecular type for the returned alignment + info + info object + + Returns + ------- + ArrayAlign + """ + if not hasattr(ref_seq, "name"): + raise TypeError(f"ref_seq must be a cogent3 sequence, not {type(ref_seq)}") + + refseqs = [s for _, aln in pwise for s in aln.seqs if s.name == ref_seq.name] + ref_gaps = _gap_union(refseqs) + + m = gap_coords_to_map(ref_gaps, len(ref_seq)) + aligned = [Aligned(m, ref_seq)] + for other_name, aln in pwise: + curr_ref = aln.named_seqs[ref_seq.name] + curr_ref_gaps = dict(curr_ref.map.get_gap_coordinates()) + other_seq = aln.named_seqs[other_name] + other_gaps = dict(other_seq.map.get_gap_coordinates()) + diff_gaps = _combined_refseq_gaps(curr_ref_gaps, ref_gaps) + inject = _gaps_for_injection(other_gaps, diff_gaps, len(other_seq.data)) + if inject: + m = gap_coords_to_map(inject, len(other_seq.data)) + other_seq = Aligned(m, other_seq.data) + + aligned.append(other_seq) + # default to ArrayAlign + return Alignment(aligned, moltype=moltype, info=info).to_type( + array_align=True, moltype=moltype + ) class align_to_ref(ComposableSeq): - """Aligns to a reference seq, no gaps in the reference. - Returns an Alignment object.""" + """Aligns sequences to a nominated reference in the unaligned collection. + This is much faster, and requires much less memory, than progressive_align + but the quality will likely be lower. Alignment quality will be strongly + affected by choice of reference. + + Returns + ------- + ArrayAlignment. + """ _input_types = SEQUENCE_TYPE _output_types = (ALIGNED_TYPE, SERIALISABLE_TYPE) @@ -122,27 +405,16 @@ seqs = seqs.to_moltype(self._moltype) ref_seq = seqs.get_seq(self._ref_name) - aligned = None kwargs = self._kwargs.copy() - no_ref_gap = None - - for i in range(seqs.num_seqs): - seq = seqs.seqs[i] + pwise = [] + for seq in seqs.seqs: if seq.name == self._ref_name: continue - result = global_pairwise(ref_seq, seq, **kwargs) - if no_ref_gap is None: - no_ref_gap = _GapInRef(result.moltype, seqs.moltype.gap) - - # as we're going to be using a pairwise distance that excludes gaps - # eliminating positions with deletions in the reference - result = result.filtered(no_ref_gap) - aligned = result if aligned is None else aligned.add_from_ref_aln(result) - - # default to ArrayAlign - new = aligned.to_type(array_align=True, moltype=self._moltype) - return new + aln = global_pairwise(ref_seq, seq, **kwargs).to_type(array_align=False) + pwise.append(((seq.name, aln))) + + return pairwise_to_multiple(pwise, ref_seq, self._moltype, info=seqs.info) class progressive_align(ComposableSeq): diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/app/composable.py python-cogent-2021.10.12a1+dfsg/src/cogent3/app/composable.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/app/composable.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/app/composable.py 2021-10-12 00:17:34.000000000 +0000 @@ -7,12 +7,18 @@ import time import traceback +from copy import deepcopy + import scitrack from cogent3 import make_aligned_seqs, make_unaligned_seqs from cogent3.core.alignment import SequenceCollection from cogent3.util import progress_display as UI -from cogent3.util.misc import get_object_provenance, open_ +from cogent3.util.misc import ( + extend_docstring_from, + get_object_provenance, + open_, +) from .data_store import ( IGNORE, @@ -22,15 +28,14 @@ DataStoreMember, SingleReadDataStore, WritableDirectoryDataStore, - WritableZippedDataStore, ) __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -39,9 +44,7 @@ def _make_logfile_name(process): text = str(process) text = re.split(r"\s+\+\s+", text) - parts = [] - for part in text: - parts.append(part[: part.find("(")]) + parts = [part[: part.find("(")] for part in text] result = "-".join(parts) pid = os.getpid() result = f"{result}-pid{pid}.log" @@ -50,15 +53,16 @@ def _get_source(source): if isinstance(source, str): - result = str(source) - else: + return str(source) + + # todo maybe a dict? see about getting keys + try: + result = source.source + except AttributeError: try: - result = source.source + result = source.info.source except AttributeError: - try: - result = source.info.source - except AttributeError: - result = None + result = None return result @@ -115,14 +119,13 @@ def to_rich_dict(self): """returns components for to_json""" - data = { + return { "type": get_object_provenance(self), "not_completed_construction": dict( args=self._persistent[0], kwargs=self._persistent[1] ), "version": __version__, } - return data def to_json(self): """returns json string""" @@ -194,7 +197,7 @@ class Composable(ComposableType): def __init__(self, **kwargs): super(Composable, self).__init__(**kwargs) - self.func = None # over-ride in subclass + # self.func = None # over-ride in subclass self._in = None # input rules self._out = None # rules receiving output # rules operating on result but not part of a chain @@ -207,7 +210,9 @@ if txt: txt += " + " txt += "%s(%s)" % (self.__class__.__name__, ", ".join(self._formatted)) - txt = textwrap.fill(txt, width=80, break_long_words=False) + txt = textwrap.fill( + txt, width=80, break_long_words=False, break_on_hyphens=False + ) return txt def __repr__(self): @@ -405,6 +410,7 @@ Returns ------- Result of the process as a list + Notes ----- If run in parallel, this instance serves as the master object and @@ -414,33 +420,34 @@ dstore = [dstore] dstore = [e for e in dstore if e] - if len(dstore) == 0: + if not dstore: raise ValueError("dstore is empty") start = time.time() loggable = hasattr(self, "data_store") - if not loggable: + if ( + not loggable + or type(logger) != scitrack.CachingLogger + and type(logger) != str + and logger != True + ): LOGGER = None elif type(logger) == scitrack.CachingLogger: LOGGER = logger elif type(logger) == str: LOGGER = scitrack.CachingLogger LOGGER.log_file_path = logger - elif logger == True: + else: log_file_path = pathlib.Path(_make_logfile_name(self)) src = pathlib.Path(self.data_store.source) log_file_path = src.parent / log_file_path LOGGER = scitrack.CachingLogger() LOGGER.log_file_path = str(log_file_path) - else: - LOGGER = None - if LOGGER: LOGGER.log_message(str(self), label="composable function") LOGGER.log_versions(["cogent3"]) results = [] - i = 0 - process = self.input if self.input else self + process = self.input or self if self.input: # As we will be explicitly calling the input object, we disconnect # the two-way interaction between input and self. This means self @@ -452,8 +459,10 @@ # with a tinydb dstore, this also excludes data that failed to complete todo = [m for m in dstore if not self.job_done(m)] - for result in ui.imap( - process, todo, parallel=parallel, par_kw=par_kw, mininterval=mininterval + for i, result in enumerate( + ui.imap( + process, todo, parallel=parallel, par_kw=par_kw, mininterval=mininterval + ) ): outcome = result if process is self else self(result) results.append(outcome) @@ -488,8 +497,6 @@ f"{outcome.origin} : {outcome.message}", label=outcome.type ) - i += 1 - finish = time.time() taken = finish - start if LOGGER: @@ -627,14 +634,7 @@ ), "invalid value for if_exists" self._if_exists = if_exists - if writer_class: - klass = writer_class - else: - klass = ( - WritableZippedDataStore - if data_path.endswith(".zip") - else WritableDirectoryDataStore - ) + klass = writer_class or WritableDirectoryDataStore self.data_store = klass( data_path, suffix=suffix, create=create, if_exists=if_exists ) @@ -650,8 +650,7 @@ if self._callback: data = self._callback(data) - identifier = self.data_store.make_absolute_identifier(data) - return identifier + return self.data_store.make_absolute_identifier(data) def job_done(self, data): identifier = self._make_output_identifier(data) @@ -674,19 +673,112 @@ _type = "function" - def __init__(self, func, input_types, output_types, data_types=None): + @extend_docstring_from(ComposableType.__init__, pre=False) + def __init__( + self, func, input_types, output_types, *args, data_types=None, **kwargs + ): + """ + func : callable + user specified function + *args + positional arguments to append to incoming values prior to calling + func + **kwargs + keyword arguments to include when calling func + + Notes + ----- + Known types are defined as constants in ``cogent3.app.composable``, e.g. + ALIGNED_TYPE, SERIALISABLE_TYPE, RESULT_TYPE. + + If you create a function ``foo(arg1, arg2, kwarg1=False)``. You can + turn this into a user function, e.g. + + >>> ufunc = user_function(foo, in_types, out_types, arg1val, kwarg1=True) + + Then + + >>> ufunc(arg2val) == foo(arg1val, arg2val, kwarg1=True) + """ super(user_function, self).__init__( - input_types=input_types, output_types=output_types + input_types=input_types, output_types=output_types, data_types=data_types ) - self.func = func + self._user_func = func + self._args = args + self._kwargs = kwargs def func(self, *args, **kwargs): - self._func(self, *args, **kwargs) + """ + Parameters + ---------- + args + self._args + args are passed to the user function + kwargs + a deep copy of self._kwargs is updated by kwargs and passed to the + user function + + Returns + ------- + the result of the user function + """ + args = self._args + args + kwargs_ = deepcopy(self._kwargs) + kwargs_.update(kwargs) + return self._user_func(*args, **kwargs_) def __str__(self): - name = self.func.__name__ - module = self.func.__module__ - return f"user_function(name='{name}', module='{module}')" + txt = "" if not self.input else str(self.input) + if txt: + txt += " + " + txt += f"user_function(name='{self._user_func.__name__}', module='{self._user_func.__module__}')" + txt = textwrap.fill( + txt, width=80, break_long_words=False, break_on_hyphens=False + ) + return txt def __repr__(self): return str(self) + + +class appify: + """function decorator for generating user apps. Simplifies creation of + user_function() instancese, e.g. + + >>> @appify(SEQUENCE_TYPE, SEQUENCE_TYPE, data_types="SequenceCollection") + ... def omit_seqs(seqs, quantile=None, gap_fraction=1, moltype="dna"): + ... return seqs.omit_bad_seqs(quantile=quantile, gap_fraction=gap_fraction, moltype="dna") + ... + + `omit_seqs()` is now an app factory, allowing creating variants of the app. + + >>> omit_bad = omit_seqs(quantile=0.95) + + omit_bad is now a composable user_function app. Calling with different + args/kwargs values returns a variant app, as per the behaviour of builtin + apps. + """ + + @extend_docstring_from(ComposableType.__init__) + def __init__(self, input_types, output_types, data_types=None) -> None: + self._it = input_types + self._ot = output_types + self._dt = data_types + self._func = None + + def __call__(self, func): + # executed on use as decorator + self._func = func + # makes the returned reference have the name, docs etc. + # of original function + self._make_app.__func__.__doc__ = f"appify: {func.__doc__}" + self._make_app.__func__.__repr__ = lambda x: repr(func) + self._make_app.__func__.__name__ = func.__name__ + self._make_app.__func__.__module__ = func.__module__ + + return self._make_app + + def _make_app(self, *args, **kwargs): + # construct the user_function app + return user_function( + self._func, self._it, self._ot, *args, data_types=self._dt, **kwargs + ) diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/app/data_store.py python-cogent-2021.10.12a1+dfsg/src/cogent3/app/data_store.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/app/data_store.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/app/data_store.py 2021-10-12 00:17:34.000000000 +0000 @@ -27,15 +27,16 @@ get_format_suffixes, open_, ) +from cogent3.util.parallel import is_master_process from cogent3.util.table import Table from cogent3.util.union_dict import UnionDict __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -106,7 +107,6 @@ class ReadOnlyDataStoreBase: - """a read only data store""" store_suffix = None @@ -151,8 +151,7 @@ self._checksums = {} def __getstate__(self): - data = self._persistent.copy() - return data + return self._persistent.copy() def __setstate__(self, data): new = self.__class__(**data) @@ -168,8 +167,7 @@ num = len(self) name = self.__class__.__name__ - txt = f"{num}x member {name}(source='{self.source}', members={sample})" - return txt + return f"{num}x member {name}(source='{self.source}', members={sample})" def __str__(self): return str(list(self)) @@ -328,8 +326,7 @@ if not os.path.exists(identifier): raise ValueError(f"path '{identifier}' does not exist") - infile = open_(identifier) - return infile + return open_(identifier) class SingleReadDataStore(ReadOnlyDirectoryDataStore): @@ -389,13 +386,13 @@ class WritableDataStoreBase: + """a writeable data store""" + def __init__(self, if_exists=RAISE, create=False): """ - Parameters - ---------- if_exists : str behaviour when the destination already exists. Valid constants are - defined in this file as OVERWRITE, SKIP, RAISE, IGNORE (they + defined in this file as OVERWRITE, RAISE, IGNORE (they correspond to lower case version of the same word) create : bool if True, the destination is created @@ -410,26 +407,21 @@ self._members = [] if_exists = if_exists.lower() assert if_exists in (OVERWRITE, SKIP, RAISE, IGNORE) - if create is False and if_exists == OVERWRITE: - warn(f"'{OVERWRITE}' reset to '{IGNORE}' and create=True", UserWarning) - create = True self._source_create_delete(if_exists, create) def make_relative_identifier(self, data): """returns identifier for a new member relative to source""" + from cogent3.app.composable import _get_source + if isinstance(data, DataStoreMember): data = data.name elif type(data) != str: - try: - data = data.info.source - except AttributeError: - try: - data = data.source - except AttributeError: - raise ValueError( - "objects for storage require either a " - "source or info.source string attribute" - ) + data = _get_source(data) + if data is None: + raise ValueError( + "objects for storage require either a " + "source or info.source string attribute" + ) basename = os.path.basename(data) suffix, comp = get_format_suffixes(basename) if suffix and comp: @@ -448,8 +440,7 @@ def make_absolute_identifier(self, data): """returns a absolute identifier for a new member, includes source""" basename = self.make_relative_identifier(data) - identifier = self.get_absolute_identifier(basename, from_relative=True) - return identifier + return self.get_absolute_identifier(basename, from_relative=True) def add_file(self, path, make_unique=True, keep_suffix=True, cleanup=False): """ @@ -516,26 +507,37 @@ record = json.dumps(record) self.write(identifier, record) - def write(self, *args, **kwargs): + def write(self, identifier, data, *args, **kwargs): """ Parameters ---------- identifier : str - identifier that data wil be saved under + identifier that data will be saved under. Must have a suffix matching + self.suffix or ``.log``. data - data to be saved + data to be saved. If a tinydb, must be an object that can be + converted to json, or has a to_json() method. Otherwise, it must be a string. Returns ------- DataStoreMember instance """ - raise NotImplementedError + if not isinstance(data, str): + raise TypeError(f"data must be a string type, not {type(data)}") + + id_suffix = identifier.split(".")[-1] + if id_suffix not in (self.suffix, "log"): + raise ValueError( + f"identifier does not end with required suffix {self.suffix}" + ) def close(self): pass class WritableDirectoryDataStore(ReadOnlyDirectoryDataStore, WritableDataStoreBase): + @extend_docstring_from(ReadOnlyDirectoryDataStore.__init__, pre=False) + @extend_docstring_from(WritableDataStoreBase.__init__, pre=False) def __init__( self, source, @@ -547,22 +549,10 @@ **kwargs, ): """ - Parameters - ---------- - source - path to directory / zip file - suffix - only members whose name matches the suffix are considered included - mode : str - file opening mode, defaults to write - if_exists : str - behaviour when the destination already exists. Valid constants are - defined in this file as OVERWRITE, SKIP, RAISE, IGNORE (they - correspond to lower case version of the same word) - create : bool - if True, the destination is created md5 : bool record md5 hexadecimal checksum of data when possible + mode : str + file opening mode, defaults to write """ assert "w" in mode or "a" in mode ReadOnlyDirectoryDataStore.__init__(self, source=source, suffix=suffix, md5=md5) @@ -581,10 +571,13 @@ return False def _source_create_delete(self, if_exists, create): - exists = os.path.exists(self.source) - if exists and if_exists == RAISE: - raise RuntimeError(f"'{self.source}' exists") - elif exists and if_exists == OVERWRITE: + if not is_master_process(): + return + + path = Path(self.source) + if path.exists() and if_exists == RAISE: + raise FileExistsError(f"'{self.source}' exists") + elif path.exists() and if_exists == OVERWRITE: if self._has_other_suffixes(self.source, self.suffix): raise RuntimeError( f"Unsafe to delete {self.source} as it contains ", @@ -595,101 +588,28 @@ shutil.rmtree(self.source) except NotADirectoryError: os.remove(self.source) - elif not exists and not create: - raise RuntimeError(f"'{self.source}' does not exist") + elif not path.exists() and not create: + raise FileNotFoundError(f"'{self.source}' does not exist") if create: - os.makedirs(self.source, exist_ok=True) + path.mkdir(parents=True, exist_ok=True) @extend_docstring_from(WritableDataStoreBase.write) def write(self, identifier, data): - relative_id = self.get_relative_identifier(identifier) - absolute_id = self.get_absolute_identifier(relative_id, from_relative=True) - - if self._md5: - self._checksums[absolute_id] = get_text_hexdigest(data) - - with atomic_write(str(absolute_id), in_zip=False) as out: - out.write(data) - - member = DataStoreMember(relative_id, self) - if relative_id not in self and relative_id.endswith(self.suffix): - self._members.append(member) - - return member - - -class WritableZippedDataStore(ReadOnlyZippedDataStore, WritableDataStoreBase): - def __init__( - self, - source, - suffix, - mode="a", - if_exists=RAISE, - create=False, - md5=True, - **kwargs, - ): - """ - Parameters - ---------- - source - path to directory / zip file - suffix - only members whose name matches the suffix are considered included - mode : str - file opening mode, defaults to append - if_exists : str - behaviour when the destination already exists. Valid constants are - defined in this file as OVERWRITE, SKIP, RAISE, IGNORE (they - correspond to lower case version of the same word) - create : bool - if True, the destination is created - md5 : bool - record md5 hexadecimal checksum of data when possible - """ - ReadOnlyZippedDataStore.__init__(self, source=source, suffix=suffix, md5=md5) - WritableDataStoreBase.__init__(self, if_exists=if_exists, create=create) - - d = locals() - self._persistent = {k: v for k, v in d.items() if k != "self"} - self.mode = "a" or mode - - def _has_other_suffixes(self, path, suffix): - allowed = {str(suffix), "log"} - for f in zipfile.ZipFile(path).namelist(): - if get_format_suffixes(str(f))[0] not in allowed: - return True - return False - - def _source_create_delete(self, if_exists, create): - exists = os.path.exists(self.source) - dirname = os.path.dirname(self.source) - if exists and if_exists == RAISE: - raise RuntimeError(f"'{self.source}' exists") - elif exists and if_exists == OVERWRITE: - if self._has_other_suffixes(self.source, self.suffix): - raise RuntimeError( - f"Unsafe to delete {self.source} as it contains ", - f"files other than .{self.suffix} or .log files." - " You will need to remove this directly yourself.", - ) - os.remove(self.source) - elif dirname and not os.path.exists(dirname) and not create: - raise RuntimeError(f"'{dirname}' does not exist") - - if create and dirname: - os.makedirs(dirname, exist_ok=True) + super().write(identifier, data) + id_suffix = identifier.split(".")[-1] + if id_suffix not in (self.suffix, "log"): + raise ValueError( + f"identifier does not end with required suffix {self.suffix}" + ) - @extend_docstring_from(WritableDataStoreBase.write) - def write(self, identifier, data): relative_id = self.get_relative_identifier(identifier) absolute_id = self.get_absolute_identifier(relative_id, from_relative=True) if self._md5: self._checksums[absolute_id] = get_text_hexdigest(data) - with atomic_write(str(relative_id), in_zip=self.source) as out: + with atomic_write(str(absolute_id), in_zip=False) as out: out.write(data) member = DataStoreMember(relative_id, self) @@ -703,11 +623,12 @@ """returns value for pid in LOCK record or None""" if not os.path.exists(path): return None - db = TinyDB(path) - query = Query().identifier.matches("LOCK") - got = db.get(query) - lockid = None if not got else got["pid"] - db.close() + + with TinyDB(path) as db: + query = Query().identifier.matches("LOCK") + got = db.get(query) + lockid = None if not got else got["pid"] + return lockid @@ -716,6 +637,7 @@ store_suffix = "tinydb" + @extend_docstring_from(ReadOnlyDirectoryDataStore.__init__) def __init__(self, *args, **kwargs): kwargs["suffix"] = "json" super(ReadOnlyTinyDbDataStore, self).__init__(*args, **kwargs) @@ -846,8 +768,7 @@ ] rows.append(row) - table = Table(header=header, data=rows, title="incomplete records") - return table + return Table(header=header, data=rows, title="incomplete records") @property def members(self): @@ -952,12 +873,11 @@ ] ) rows.append(row) - table = Table( + return Table( header=["time", "name", "python version", "who", "command", "composable"], data=rows, title="summary of log files", ) - return table @property def describe(self): @@ -972,7 +892,7 @@ num_incomplete = len(self.incomplete) num_complete = len(self.members) num_logs = len(self.logs) - summary = Table( + return Table( header=["record type", "number"], data=[ ["completed", num_complete], @@ -981,38 +901,67 @@ ], title=title, ) - return summary class WritableTinyDbDataStore(ReadOnlyTinyDbDataStore, WritableDataStoreBase): + @extend_docstring_from(WritableDirectoryDataStore.__init__) def __init__(self, *args, **kwargs): + """ + + Notes + ----- + A TinyDb file can be locked. In which case, ``if_exists=OVERWRITE`` + will be converted to RAISE. + """ if_exists = kwargs.pop("if_exists", RAISE) create = kwargs.pop("create", True) ReadOnlyTinyDbDataStore.__init__(self, *args, **kwargs) WritableDataStoreBase.__init__(self, if_exists=if_exists, create=create) def _source_create_delete(self, if_exists, create): - if _db_lockid(self.source): + if not is_master_process(): return - exists = os.path.exists(self.source) - dirname = os.path.dirname(self.source) - if exists and if_exists == RAISE: - raise RuntimeError(f"'{self.source}' exists") - elif exists and if_exists == OVERWRITE: + path = Path(self.source) + if if_exists == OVERWRITE and path.exists(): try: - os.remove(self.source) + path.unlink() except PermissionError: # probably user accidentally created a directory - shutil.rmtree(self.source) - elif dirname and not os.path.exists(dirname) and not create: - raise RuntimeError(f"'{dirname}' does not exist") + shutil.rmtree(path) + return + + locked_id = _db_lockid(self.source) + pid = os.getpid() + if path.exists() and if_exists == RAISE: + msg = f"'{path}' exists" + if locked_id is not None: + msg = ( + f"{msg}, and is locked by process pid {locked_id}." + f" Current pid is {pid}" + ) + raise FileExistsError(msg) + + if if_exists == IGNORE and locked_id is not None: + warn(f"'{self.source}' is locked to {locked_id}, current pid is {pid}.") + return - if create and dirname: - os.makedirs(dirname, exist_ok=True) + if path.parent and not path.parent.exists() and not create: + raise FileNotFoundError(f"'{path.parent}' does not exist, set create=True") + + path.parent.mkdir(parents=True, exist_ok=True) @extend_docstring_from(WritableDataStoreBase.write) def write(self, identifier, data): + # writing into a tinydb has its own logic for conversion to json + # so we don't validate data is a string for this case + super().write(identifier, "") + id_suffix = identifier.split(".")[-1] + if id_suffix not in (self.suffix, "log"): + raise ValueError( + f"identifier does not end with required suffix {self.suffix}" + ) + matches = self.filtered(identifier) if matches: return matches[0] @@ -1039,9 +988,7 @@ record = make_record_for_json(relative_id, not_completed, False) doc_id = self.db.insert(record) - member = DataStoreMember(relative_id, self, id=doc_id) - - return member + return DataStoreMember(relative_id, self, id=doc_id) def add_file(self, path, make_unique=True, keep_suffix=True, cleanup=False): """ diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/app/dist.py python-cogent-2021.10.12a1+dfsg/src/cogent3/app/dist.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/app/dist.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/app/dist.py 2021-10-12 00:17:34.000000000 +0000 @@ -11,15 +11,16 @@ ALIGNED_TYPE, PAIRWISE_DISTANCE_TYPE, SERIALISABLE_TYPE, + TABULAR_TYPE, ComposableDistance, ) __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -33,7 +34,7 @@ """ _input_types = ALIGNED_TYPE - _output_types = (PAIRWISE_DISTANCE_TYPE, SERIALISABLE_TYPE) + _output_types = (PAIRWISE_DISTANCE_TYPE, TABULAR_TYPE, SERIALISABLE_TYPE) _data_types = ("ArrayAlignment", "Alignment") def __init__(self, distance=None, moltype=None, fast_calc=None, slow_calc=None): @@ -121,6 +122,7 @@ else: empty = {p: 0 for p in itertools.product(aln.names, aln.names)} dists = DistanceMatrix(empty) + dists.source = aln.info.source if self._sm: for a in dists.template.names[0]: for b in dists.template.names[1]: diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/app/evo.py python-cogent-2021.10.12a1+dfsg/src/cogent3/app/evo.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/app/evo.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/app/evo.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,6 +1,4 @@ -import os - -from tqdm import tqdm +import warnings from cogent3 import load_tree, make_tree from cogent3.core.tree import TreeNode @@ -23,16 +21,17 @@ from .result import ( bootstrap_result, hypothesis_result, + model_collection_result, model_result, tabular_result, ) __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -50,6 +49,7 @@ self, sm, tree=None, + unique_trees=False, name=None, sm_args=None, lf_args=None, @@ -69,12 +69,15 @@ if None, assumes a star phylogeny (only valid for 3 taxa). Can be a newick formatted tree, a path to a file containing one, or a Tree instance. + unique_trees: bool + whether to specify a unique tree per alignment. Only applies if + number of sequences equals 3. name name of the model - sm_args + sm_args : dict arguments to be passed to the substitution model constructor, e.g. dict(optimise_motif_probs=True) - lf_args + lf_args : dict arguments to be passed to the likelihood function constructor time_het 'max' or a list of dicts corresponding to edge_sets, e.g. @@ -84,7 +87,7 @@ param_rules other parameter rules, passed to the likelihood function set_param_rule() method - opt_args + opt_args : dict arguments for the numerical optimiser, e.g. dict(max_restarts=5, tolerance=1e-6, max_evaluations=1000, limit_action='ignore') @@ -109,6 +112,10 @@ ) self._verbose = verbose self._formatted_params() + assert not ( + tree and unique_trees + ), "cannot provide a tree when unique_trees is True" + self._unique_trees = unique_trees sm_args = sm_args or {} if type(sm) == str: sm = get_model(sm, **sm_args) @@ -175,7 +182,7 @@ lf.apply_param_rules([rule]) if initialise: - initialise(lf, identifier) + lf = initialise(lf, identifier) self._lf = lf @@ -212,8 +219,8 @@ return NotCompleted("ERROR", self, msg, source=aln) evaluation_limit = opt_args.get("max_evaluations", None) - if self._tree is None: - assert len(aln.names) == 3 + if self._tree is None or self._unique_trees: + assert len(aln.names) == 3, "to model more than 3, you must provide a tree" self._tree = make_tree(tip_names=aln.names) result = model_result( @@ -251,15 +258,32 @@ return result -class hypothesis(ComposableHypothesis): - """Specify a hypothesis through defining two models. Returns a - hypothesis_result.""" +class _InitFrom: + """holds a likelihood function that will be used to initialise others""" + + def __init__(self, nested): + """nested: a model_result or a likelihood function""" + if hasattr(nested, "lf"): + nested = nested.lf + self.nested = nested + + def __call__(self, other, *args, **kwargs): + try: + other.initialise_from_nested(self.nested) + except: + pass + return other + + +class model_collection(ComposableHypothesis): + """Fits a collection of models. Returns a + model_collection_result.""" _input_types = (ALIGNED_TYPE, SERIALISABLE_TYPE) _output_types = (RESULT_TYPE, HYPOTHESIS_RESULT_TYPE, SERIALISABLE_TYPE) _data_types = ("ArrayAlignment", "Alignment") - def __init__(self, null, *alternates, init_alt=None): + def __init__(self, null, *alternates, sequential=True, init_alt=None): """ Parameters ---------- @@ -267,18 +291,31 @@ The null model instance alternates : model or series of models The alternate model or a series of them + sequential : bool + initialise each likelihood function from the preceding model fit. + If False, and init_alt is not specified, each function is optimised + from default values. init_alt : callable A callback function for initialising the alternate model - likelihood function prior to optimisation. Defaults to using - MLEs from the null model. + likelihood function prior to optimisation. It must take 2 input + arguments and return the modified alternate likelihood function. + Default is to use MLEs from the null model. + + Notes + ----- + To stop the null MLEs from being used, provide a lambda function that + just returns the likelihood function, e.g. init_alt=lambda lf, identifier: lf """ - # todo document! init_alt needs to be able to take null, alt and *args - super(hypothesis, self).__init__( + super(model_collection, self).__init__( input_types=self._input_types, output_types=self._output_types, data_types=self._data_types, ) self._formatted_params() + if sequential and init_alt: + warnings.warn("init_alt is specified, ignoring sequential") + sequential = False + self.null = null names = {a.name for a in alternates} names.add(null.name) @@ -289,30 +326,30 @@ self._alts = alternates self.func = self.test_hypothesis self._init_alt = init_alt + self._sequential = sequential - def _initialised_alt_from_null(self, null, aln): - def init(alt, *args, **kwargs): - try: - alt.initialise_from_nested(null.lf) - except: - pass - return alt + def _make_result(self, aln): + return model_collection_result(source=aln.info) + def _initialised_alt(self, null, aln): if callable(self._init_alt): - init_func = self._init_alt(null) - else: - init_func = init + init_func = self._init_alt + elif not self._sequential: + init_func = None results = [] for alt in self._alts: + if self._sequential: + init_func = _InitFrom(null) result = alt(aln, initialise=init_func) results.append(result) + null = result return results def test_hypothesis(self, aln): try: null = self.null(aln) - except ValueError as err: + except ValueError: msg = f"Hypothesis null had bounds error {aln.info.source}" return NotCompleted("ERROR", self, msg, source=aln) @@ -320,8 +357,8 @@ return null try: - alts = [alt for alt in self._initialised_alt_from_null(null, aln)] - except ValueError as err: + alts = [alt for alt in self._initialised_alt(null, aln)] + except ValueError: msg = f"Hypothesis alt had bounds error {aln.info.source}" return NotCompleted("ERROR", self, msg, source=aln) @@ -333,15 +370,23 @@ results = {alt.name: alt for alt in alts} results.update({null.name: null}) - result = hypothesis_result(name_of_null=null.name, source=aln.info.source) + result = self._make_result(aln) result.update(results) return result +class hypothesis(model_collection): + """Specify a hypothesis through defining two models. Returns a + hypothesis_result.""" + + def _make_result(self, aln): + return hypothesis_result(name_of_null=self.null.name, source=aln.info) + + class bootstrap(ComposableHypothesis): """Parametric bootstrap for a provided hypothesis. Returns a bootstrap_result.""" - _input_types = ALIGNED_TYPE + _input_types = (ALIGNED_TYPE, SERIALISABLE_TYPE) _output_types = (RESULT_TYPE, BOOTSTRAP_RESULT_TYPE, SERIALISABLE_TYPE) _data_types = ("ArrayAlignment", "Alignment") @@ -751,8 +796,7 @@ ) ) alt_args["param_rules"] = rules - alt = model(**alt_args) - return alt + return model(**alt_args) def test_hypothesis(self, aln, *args, **kwargs): null_result = self.null(aln) @@ -911,8 +955,7 @@ ) ) alt_args["param_rules"] = rules - alt = model(**alt_args) - return alt + return model(**alt_args) def test_hypothesis(self, aln, *args, **kwargs): null_result = self.null(aln) diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/app/__init__.py python-cogent-2021.10.12a1+dfsg/src/cogent3/app/__init__.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/app/__init__.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/app/__init__.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,14 +1,12 @@ import importlib import inspect -from warnings import filterwarnings - __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -39,7 +37,7 @@ def available_apps(): - """returns table of all available apps""" + """returns Table listing the available apps""" from cogent3.util.table import Table from .composable import Composable, user_function @@ -64,5 +62,4 @@ ) ) header = ["module", "name", "composable", "doc", "inputs", "outputs", "data type"] - table = Table(header, rows) - return table + return Table(header, rows) diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/app/io.py python-cogent-2021.10.12a1+dfsg/src/cogent3/app/io.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/app/io.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/app/io.py 2021-10-12 00:17:34.000000000 +0000 @@ -21,6 +21,7 @@ from .composable import ( ALIGNED_TYPE, IDENTIFIER_TYPE, + PAIRWISE_DISTANCE_TYPE, SEQUENCE_TYPE, SERIALISABLE_TYPE, TABULAR_RESULT_TYPE, @@ -31,6 +32,7 @@ ComposableTabular, NotCompleted, _checkpointable, + _get_source, ) from .data_store import ( IGNORE, @@ -48,10 +50,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -145,7 +147,7 @@ return seqs -class load_aligned(_seq_loader, ComposableAligned): +class load_aligned(ComposableAligned, _seq_loader): """Loads aligned sequences. Returns an Alignment object.""" klass = ArrayAlignment @@ -163,7 +165,7 @@ format : str sequence file format """ - super(ComposableAligned, self).__init__( + super(load_aligned, self).__init__( input_types=self._input_types, output_types=self._output_types, data_types=self._data_types, @@ -194,7 +196,7 @@ format : str sequence file format """ - super(ComposableSeq, self).__init__( + super(load_unaligned, self).__init__( input_types=self._input_types, output_types=self._output_types, data_types=self._data_types, @@ -238,7 +240,7 @@ strict all rows MUST have the same number of records """ - super(ComposableTabular, self).__init__( + super(load_tabular, self).__init__( input_types=self._input_types, output_types=self._output_types, data_types=self._data_types, @@ -306,7 +308,7 @@ try: header, data, title = self._parse(path) except Exception as err: - result = NotCompleted("ERROR", self, err.args[0], source=str(path)) + return NotCompleted("ERROR", self, err.args[0], source=str(path)) if self.as_type == "table": return Table(header=header, data=data, title=title) @@ -330,7 +332,7 @@ class write_tabular(_checkpointable, ComposableTabular): """writes tabular data""" - _input_types = (TABULAR_RESULT_TYPE, TABULAR_TYPE) + _input_types = (TABULAR_RESULT_TYPE, TABULAR_TYPE, PAIRWISE_DISTANCE_TYPE) _output_types = IDENTIFIER_TYPE _data_types = ("Table", "DictArray", "DistanceMatrix") @@ -579,6 +581,25 @@ self._load_checkpoint = self def write(self, data, identifier=None): + """ + + Parameters + ---------- + data + object that has a `to_json()` method, or can be json serialised + identifier : str + if not provided, taken from data.source or data.info.source + + Returns + ------- + identifier + """ + data_source = _get_source(data) + if (data_source and identifier is not None) and str(data_source) != str( + identifier + ): + raise ValueError(f"identifier {identifier} != data source {data_source}") + if identifier is None: identifier = self._make_output_identifier(data) # todo revisit this when we establish immutability behaviour of database diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/app/result.py python-cogent-2021.10.12a1+dfsg/src/cogent3/app/result.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/app/result.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/app/result.py 2021-10-12 00:17:34.000000000 +0000 @@ -12,10 +12,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -25,6 +25,7 @@ """A dict style container for storing results.""" _type = "generic_result" + _item_types = () def __init__(self, source): self._store = {} @@ -32,6 +33,23 @@ self.source = source def __setitem__(self, key, val): + if isinstance(val, dict): + type_name = val.get("type", None) + type_name = type_name or "" + else: + type_name = val.__class__.__name__ + + for item_type in self._item_types: + if item_type in type_name: + break + else: + if self._item_types: + msg = f"{type_name} not in supported types {self._item_types}" + raise TypeError(msg) + + if not hasattr(val, "to_json"): + json.dumps(val) + self._store[key] = val def __getitem__(self, key): @@ -49,7 +67,8 @@ def __repr__(self): name = self.__class__.__name__ num = len(self) - types = [f"{repr(k)}: {self[k].__class__.__name__}" for k in self.keys()[:4]] + types = [f"{repr(k)}: {self[k].__class__.__name__}" for k in self] + types = types[:3] + ["..."] if num > 5 else types types = ", ".join(types) return f"{num}x {name}({types})" @@ -100,6 +119,7 @@ _type = "model_result" _stat_attrs = ("lnL", "nfp", "DLC", "unique_Q") + _item_types = ("AlignmentLikelihoodFunction",) def __init__( self, @@ -138,6 +158,9 @@ self._unique_Q = unique_Q def _get_repr_data_(self): + if len(self) == 0: + return f"{self.__class__.__name__}(name={self.name}, source={self.source})" + self.deserialised_values() # making sure we're fully reloaded attrs = list(self._stat_attrs) header = ["key"] + attrs[:] @@ -162,16 +185,6 @@ return repr(table) def __setitem__(self, key, lf): - if isinstance(lf, dict): - type_name = lf.get("type", None) - type_name = type_name or "" - else: - type_name = lf.__class__.__name__ - - if "AlignmentLikelihoodFunction" not in type_name: - msg = f"{type_name} not a supported type" - raise TypeError(msg) - super(self.__class__, self).__setitem__(key, lf) self._init_stats() @@ -377,6 +390,7 @@ """Storage of a collection of model_result.""" _type = "model_collection_result" + _item_types = ("model_result",) def __init__(self, name=None, source=None): """ @@ -407,6 +421,9 @@ return table._repr_html_() def __repr__(self): + if len(self) == 0: + return f"{self.__class__.__name__}(name={self.name}, source={self.source})" + table = self._get_repr_data_() return str(table._get_repr_()) @@ -475,12 +492,28 @@ return selected[0] + def get_hypothesis_result(self, name_null, name_alt): + """returns a hypothesis result with two models + + Parameters + ---------- + name_null : str + name of the null model + name_alt : str + name of the alternate model + """ + result = hypothesis_result(name_of_null=name_null, source=self.source) + result[name_null] = self[name_null] + result[name_alt] = self[name_alt] + return result + class hypothesis_result(model_collection_result): """Storage of a collection of model_result instances that are hierarchically related.""" _type = "hypothesis_result" + _item_types = ("model_result",) @extend_docstring_from(model_collection_result.__init__, pre=True) def __init__(self, name_of_null, name=None, source=None): @@ -509,9 +542,13 @@ table = table.sorted(columns="nfp") table.set_repr_policy(show_shape=False) stats = [[self.LR, self.df, self.pvalue]] - col_templates = { - "pvalue": "%.4f" if self.pvalue > 1e-3 else "%.2e", - } + col_templates = ( + None + if self.pvalue is None + else { + "pvalue": "%.4f" if self.pvalue > 1e-3 else "%.2e", + } + ) stats = Table( header=["LR", "df", "pvalue"], data=stats, @@ -527,6 +564,9 @@ return "\n".join(result) def __repr__(self): + if len(self) == 0: + return f"{self.__class__.__name__}(name={self.name}, source={self.source})" + stats, table = self._get_repr_data_() result = [] for t in (stats, table): @@ -572,6 +612,7 @@ class bootstrap_result(generic_result): _type = "bootstrap_result" + _item_types = ("hypothesis_result", "model_collection_result") def __init__(self, source=None): super(bootstrap_result, self).__init__(source) @@ -598,10 +639,18 @@ class tabular_result(generic_result): - """stores one or multiple tabular data sets, keyed by a title""" + """stores one or multiple cogent3 Tables, DictArray""" _type = "tabular_result" _stat_attrs = ("header", "rows") + _item_types = ( + "Table", + "DictArray", + "MotifCounts", + "MotifFreqs", + "PSSM", + "DistanceMatrix", + ) def __init__(self, source=None): super(tabular_result, self).__init__(source) diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/app/sample.py python-cogent-2021.10.12a1+dfsg/src/cogent3/app/sample.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/app/sample.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/app/sample.py 2021-10-12 00:17:34.000000000 +0000 @@ -19,10 +19,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -35,8 +35,7 @@ def intersection(groups): """returns the intersection of all groups""" common = set(groups.pop()) - intersect = common.intersection(*map(set, groups)) - return intersect + return common.intersection(*map(set, groups)) def union(groups): @@ -75,10 +74,25 @@ data series of alignment instances """ - names = self._name_callback(list(aln.names for aln in data)) + if len(data) == 0: + raise ValueError("no data") + + names = [] + for aln in data: + if not (isinstance(aln, ArrayAlignment) or isinstance(aln, Alignment)): + raise TypeError(f"{type(aln)} invalid for concat") + names.append(aln.names) + + names = self._name_callback(names) collated = defaultdict(list) + if self._moltype is None: + self._moltype = aln.moltype + for aln in data: - assert isinstance(aln, ArrayAlignment) or isinstance(aln, Alignment) + if self._moltype and aln.moltype != self._moltype: + # try converting + aln = aln.to_moltype(self.moltype) + if self._intersect: seqs = aln.take_seqs(names).to_dict() else: @@ -331,6 +345,76 @@ except KeyError: missing = set(self._names) - set(data.names) msg = f"named seq(s) {missing} not in {data.names}" + data = NotCompleted("FALSE", self, msg, source=data) + return data + + +class take_n_seqs(ComposableSeq): + """Selects n sequences from a collection. Chooses first n sequences, or selects randomly if specified. + + Returns original object type with the selected sequences, NotCompleted object otherwise. + """ + + _input_types = (SEQUENCE_TYPE, ALIGNED_TYPE, SERIALISABLE_TYPE) + _output_types = (SEQUENCE_TYPE, ALIGNED_TYPE, SERIALISABLE_TYPE) + _data_types = ("ArrayAlignment", "Alignment", "SequenceCollection") + + def __init__(self, number, random=False, seed=None, fixed_choice=True): + """ + selects n sequences from a collection + + Parameters + ---------- + number: int + number of sequences to sample. If number of sequences in a collectionis < n, returns NotCompleted + indicating a FAIL. + random: bool + Whether to choose the sequences randomly. + seed: int + Seed for the numpy random number generator. + fixed_choice: bool + sequence names selected from the first alignment are used for all others. + + Returns + ------- + A new sequence collection, or NotCompleted if not insufficient sequences are in the collection. + """ + super(take_n_seqs, self).__init__( + input_types=self._input_types, + output_types=self._output_types, + data_types=self._data_types, + ) + self._formatted_params() + + if seed: + np_random.seed(seed) + + self._names = None + self._number = number + self._random = random + self._fixed_choice = fixed_choice + self.func = self.take_seqs + + def _set_names(self, data): + """set the names attribute""" + if not self._random: + self._names = data.names[: self._number] + return + + self._names = np_random.choice(data.names, self._number, replace=False) + + def take_seqs(self, data): + if len(data.names) < self._number: + return NotCompleted("FALSE", self.take_seqs, "not enough sequences") + + if self._names is None or not self._fixed_choice: + self._set_names(data) + + try: + data = data.take_seqs(self._names) + except KeyError: + missing = set(self._names) - set(data.names) + msg = f"named seq(s) {missing} not in {data.names}" data = NotCompleted("FALSE", self, msg, source=data) return data diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/app/translate.py python-cogent-2021.10.12a1+dfsg/src/cogent3/app/translate.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/app/translate.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/app/translate.py 2021-10-12 00:17:34.000000000 +0000 @@ -13,10 +13,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -286,5 +286,4 @@ if self._trim_terminal_stop: seqs = seqs.trim_stop_codons(gc=self._gc) - aa = seqs.get_translation(gc=self._gc) - return aa + return seqs.get_translation(gc=self._gc) diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/app/tree.py python-cogent-2021.10.12a1+dfsg/src/cogent3/app/tree.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/app/tree.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/app/tree.py 2021-10-12 00:17:34.000000000 +0000 @@ -10,10 +10,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -147,18 +147,14 @@ size = dists.shape[0] dists = dists.drop_invalid() if self._drop_invalid else dists if dists is None or (dists.shape[0] != size and not self._drop_invalid): - msg = ( - f"some pairwise distances could not be computed with" - " {self._distance}, pick a different distance" - ) - raise ValueError(msg) + raise ValueError("invalid pairwise distances") # how many species do we have - species = dists.keys() - if len(species) == 2: - dist = list(dists.values())[0] / 2.0 - treestring = "(%s:%.4f,%s:%.4f)" % (species[0], dist, species[1], dist) - tree = make_tree(treestring=treestring, underscore_unmunge=True) + if size == 2: + dist = dists.array[0, 1] / 2.0 + newick = ",".join(f"{sp}:{dist:.4f}" for sp in dists.names) + newick = f"({newick})" + tree = make_tree(treestring=newick, underscore_unmunge=True) else: (result,) = gnj(dists.to_dict(), keep=1, show_progress=False) (score, tree) = result diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/cluster/__init__.py python-cogent-2021.10.12a1+dfsg/src/cogent3/cluster/__init__.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/cluster/__init__.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/cluster/__init__.py 2021-10-12 00:17:34.000000000 +0000 @@ -5,10 +5,10 @@ __all__ = ["UPGMA"] __author__ = "" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Catherine Lozuopone", "Rob Knight", "Peter Maxwell", "Justin Kuczynski"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Catherine Lozupone" __email__ = "lozupone@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/cluster/UPGMA.py python-cogent-2021.10.12a1+dfsg/src/cogent3/cluster/UPGMA.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/cluster/UPGMA.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/cluster/UPGMA.py 2021-10-12 00:17:34.000000000 +0000 @@ -12,17 +12,17 @@ import numpy -from numpy import argmin, array, average, diag, ma, ravel, sum, take +from numpy import argmin, average, diag, ravel, take from cogent3.core.tree import PhyloNode from cogent3.util.dict_array import DictArray __author__ = "Catherine Lozupone" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Catherine Lozuopone", "Rob Knight", "Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Catherine Lozupone" __email__ = "lozupone@colorado.edu" __status__ = "Production" @@ -150,7 +150,7 @@ if index1 == index2: matrix[diag([True] * len(matrix))] = large_number smallest_index = find_smallest_index(matrix) - row_order = condense_node_order(matrix, smallest_index, node_order) + _ = condense_node_order(matrix, smallest_index, node_order) matrix = condense_matrix(matrix, smallest_index, large_number) tree = node_order[smallest_index[0]] return tree diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/core/alignment.py python-cogent-2021.10.12a1+dfsg/src/cogent3/core/alignment.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/core/alignment.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/core/alignment.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Code for handling multiple sequence alignments. In particular: - SequenceCollection handles both aligned and unaligned sequences. @@ -79,7 +78,7 @@ __author__ = "Peter Maxwell and Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Peter Maxwell", "Rob Knight", @@ -92,7 +91,7 @@ "Jan Kosinski", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -211,7 +210,7 @@ def gap_run_ok(self, seq): """runs of gaps <= allowed_run""" - curr_run = max_run = 0 + curr_run = 0 is_gap = self.gap_chars.__contains__ result = True for i in seq: @@ -236,22 +235,6 @@ return ["%s_%s" % (base_name, i) for i in range(start_at, start_at + num_seqs)] -class SeqLabeler(object): - """Allows flexible seq labeling in to_fasta().""" - - def __init__(self, aln, label_f=assign_sequential_names, **kwargs): - """Initializes a new seq labeler.""" - self._aln = aln - self._label_f = label_f - self._map = { - orig: new for orig, new in zip(aln.names, label_f(len(aln.names, **kwargs))) - } - - def __call__(self, s): - """Returns seq name from seq id""" - return self._map[s.name] - - def coerce_to_string(s): """Converts an arbitrary sequence into a string.""" if isinstance(s, str): # if it's a string, OK as is @@ -472,6 +455,7 @@ data = list(data) # set the name self.name = name + names = list(names) if names is not None else names # figure out alphabet and moltype self.alphabet, self.moltype = self._get_alphabet_and_moltype( alphabet, moltype, data @@ -528,8 +512,7 @@ """Returns self in FASTA-format, respecting name order.""" from cogent3.format.alignment import FORMATTERS - fasta = FORMATTERS["fasta"](self.to_dict()) - return fasta + return FORMATTERS["fasta"](self.to_dict()) def _make_named_seqs(self, names, seqs): """Returns named_seqs: dict of name:seq.""" @@ -555,8 +538,7 @@ def copy(self): """Returns deep copy of self.""" - result = self.__class__(self, moltype=self.moltype, info=self.info) - return result + return self.__class__(self, moltype=self.moltype, info=self.info) def deepcopy(self, sliced=True): """Returns deep copy of self.""" @@ -675,10 +657,9 @@ # if we got names from the sequences, but otherwise assign the # names to successive sequences in order if (names is None) or (None in names): - per_seq_names = name_order = name_order + per_seq_names = name_order else: # got names from seqs, so assume name_order is in Names per_seq_names = names - name_order = name_order # check for duplicate names duplicates, fixed_names, fixed_seqs = self._strip_duplicates( @@ -906,13 +887,12 @@ # TODO handle case of not strict by building mask of degen positions # per seq if mask_degen and not hasattr(self.moltype, "alphabets"): - UserWarning( - "in get_identical_sets, strict has no effect as moltype " - "has no degenerate characters" + warnings.warn( + "in get_identical_sets, mask_degen has no effect as moltype " + f"{self.moltype.label!r} has no degenerate characters", + UserWarning, ) mask_degen = False - elif mask_degen: - degens = list(self.moltype.degenerates) + [self.moltype.gap] def reduced(seq, indices): s = "".join(seq[i] for i in range(len(seq)) if i not in indices) @@ -1103,7 +1083,7 @@ """ return alignment_to_fasta(self.to_dict()) - def to_nexus(self, seq_type, wrap=50, interleave_len=None): + def to_nexus(self, seq_type, wrap=50): """ Return alignment in NEXUS format and mapping to sequence ids @@ -1115,12 +1095,6 @@ Raises exception if invalid alignment """ - if interleave_len is not None: - cogent3.util.warning.deprecated( - "argument", "interleave_len", "wrap", "2021.6" - ) - wrap = interleave_len - return nexus_from_alignment(self, seq_type, wrap=wrap) @property @@ -1427,8 +1401,7 @@ counts = self.counts_per_seq( motif_length=1, include_ambiguity=include_ambiguity, allow_gap=allow_gap ) - lengths = counts.row_sum() - return lengths + return counts.row_sum() def counts_per_seq( self, @@ -1613,8 +1586,9 @@ raise ValueError(f"unknown moltype '{moltype}'") data = [s.to_moltype(moltype) for s in self.seqs] - new = self.__class__(data=data, moltype=moltype, name=self.name, info=self.info) - return new + return self.__class__( + data=data, moltype=moltype, name=self.name, info=self.info + ) def to_dna(self): """returns copy of self as an alignment of DNA moltype seqs""" @@ -1704,11 +1678,7 @@ def strand_symmetry(self, motif_length=1): """returns dict of strand symmetry test results per seq""" - result = { - s.name: s.strand_symmetry(motif_length=motif_length) for s in self.seqs - } - - return result + return {s.name: s.strand_symmetry(motif_length=motif_length) for s in self.seqs} def dotplot( self, @@ -1752,7 +1722,9 @@ from cogent3.draw.dotplot import Dotplot from cogent3.draw.drawable import AnnotatedDrawable - if name1 is None and name2 is None: + if len(self.names) == 1: + name1 = name2 = self.names[0] + elif name1 is None and name2 is None: name1, name2 = list(choice(self.names, size=2, replace=False)) elif not (name1 and name2): names = list(set(self.names + [None]) ^ set([name1, name2])) @@ -2200,8 +2172,7 @@ return self.__class__(map=self.map, data=data) def remapped_to(self, map): - result = Aligned(map[self.map.inverse()].inverse(), self.data) - return result + return Aligned(map[self.map.inverse()].inverse(), self.data) def get_annotations_matching(self, alignment, annotation_type="*", **kwargs): for annot in self.data.get_annotations_matching( @@ -2533,8 +2504,7 @@ chars = list(map(alpha.index, chars)) predicate = AllowedCharacters(chars, is_array=is_array) - new = self.filtered(predicate, motif_length=motif_length) - return new + return self.filtered(predicate, motif_length=motif_length) def omit_gap_pos(self, allowed_gap_frac=1 - eps, motif_length=1): """Returns new alignment where all cols (motifs) have <= allowed_gap_frac gaps. @@ -2810,7 +2780,6 @@ def to_html( self, name_order=None, - interleave_len=None, wrap=60, limit=None, ref_name="longest", @@ -2824,10 +2793,8 @@ ---------- name_order order of names for display. - interleave_len - will be replaced by wrap in version 2021.6 wrap - number of alignment columns per row, old name is interleave_len + number of alignment columns per row limit truncate alignment to this length ref_name @@ -2848,12 +2815,6 @@ >>> from IPython.core.display import HTML >>> HTML(aln.to_html()) """ - if interleave_len is not None: - cogent3.util.warning.deprecated( - "argument", "interleave_len", "wrap", "2021.6" - ) - wrap = interleave_len if wrap == 60 else wrap - css, styles = self.moltype.get_css_style( colors=colors, font_size=font_size, font_family=font_family ) @@ -2975,24 +2936,16 @@ ] return "\n".join(text) - def to_pretty(self, name_order=None, wrap=None, interleave_len=None): + def to_pretty(self, name_order=None, wrap=None): """returns a string representation of the alignment in pretty print format Parameters ---------- name_order order of names for display. - interleave_len - will be replaced by wrap in version 2021.6 wrap - maximum number of printed bases, old name is interleave_len + maximum number of printed bases """ - if interleave_len is not None: - cogent3.util.warning.deprecated( - "argument", "interleave_len", "wrap", "2021.6" - ) - wrap = interleave_len - names, output = self._get_raw_pretty(name_order=name_order) label_width = max(list(map(len, names))) name_template = "{:>%d}" % label_width @@ -3143,19 +3096,13 @@ column are ignored. """ - seqs = [str(self.named_seqs[n]) for n in self.names] - seq1 = seqs[0] - positions = list(zip(*seqs[1:])) + gaps = set(self.moltype.gaps) result = [] - for (position, (motif1, column)) in enumerate(zip(seq1, positions)): - for motif in column: - if motif != motif1: - if include_gap_motif: - result.append(position) - break - elif motif != "-" and motif1 != "-": - result.append(position) - break + for position, column in enumerate(self.iter_positions()): + column = set(column) + num_states = len(column) if include_gap_motif else len(column - gaps) + if num_states > 1: + result.append(position) return result @@ -3189,8 +3136,7 @@ moltype = ArrayAlignment.moltype if array_align else Alignment.moltype else: moltype = self.moltype - new = klass(data=data, moltype=moltype, info=self.info, names=self.names) - return new + return klass(data=data, moltype=moltype, info=self.info, names=self.names) def distance_matrix(self, calc="percent", show_progress=False, drop_invalid=False): """Returns pairwise distances between sequences. @@ -3271,7 +3217,7 @@ else: subaln = self - for i in ui.series(range(bootstrap), count=bootstrap, noun="bootstrap"): + for _ in ui.series(range(bootstrap), count=bootstrap, noun="bootstrap"): b = subaln.sample(with_replacement=True) try: bdist = b.distance_matrix( @@ -3619,7 +3565,7 @@ WARNING: Data type of return array is not guaranteed -- check in caller! """ - result = array(list(map(alphabet.to_indices, data))) + result = array([alphabet.to_indices(v) for v in data], dtype=object).astype(int) names = [] for d in data: if hasattr(d, "name"): @@ -3915,8 +3861,6 @@ Should be able to handle joint alphabets, e.g. codons. """ result = [] - names = list(map(str, self.names)) - max_label_length = max(list(map(len, names))) + 1 seq2str = self.alphabet.from_indices for l, s in zip(self.names, self.array_seqs): result.append(">" + str(l) + "\n" + "".join(seq2str(s))) @@ -3999,14 +3943,13 @@ wrapped_locations = locations.reshape((n, motif_length)) wrapped_locations += arange(motif_length) positions = take(self.array_positions, locations, 0) - result = self.__class__( + return self.__class__( positions.T, moltype=self.moltype, force_same_data=True, info=self.info, names=self.names, ) - return result def filtered(self, predicate, motif_length=1, drop_remainder=True, **kwargs): """The alignment positions where predicate(column) is true. @@ -4048,14 +3991,13 @@ return None positions = self.array_seqs.take(indices, axis=1) - result = self.__class__( + return self.__class__( positions, force_same_data=True, moltype=self.moltype, info=self.info, names=self.names, ) - return result def get_gapped_seq(self, seq_name, recode_gaps=False, moltype=None): """Return a gapped Sequence object for the specified seqname. @@ -4218,8 +4160,7 @@ new = new.add_from_ref_aln( ref_aln, before_name=before_name, after_name=after_name ) - result = new.to_type(array_align=True, moltype=self.moltype) - return result + return new.to_type(array_align=True, moltype=self.moltype) def replace_seqs(self, seqs, aa_to_codon=True): """Returns new alignment with same shape but with data taken from seqs. @@ -4294,12 +4235,12 @@ ---------- mask_degen if True, degenerate characters are ignored - """ if mask_degen and not hasattr(self.moltype, "alphabets"): - UserWarning( - "in get_identical_sets, strict has no effect as moltype " - "has no degenerate characters" + warnings.warn( + "in get_identical_sets, mask_degen has no effect as moltype " + f"{self.moltype.label!r} has no degenerate characters", + UserWarning, ) mask_degen = False @@ -4404,17 +4345,13 @@ # check if gap runs bad if ( b"\x01" * gap_run - in logical_and(seq_gaps, logical_not(template_gaps)) - .astype(uint8) - .tostring() + in logical_and(seq_gaps, logical_not(template_gaps)).astype(uint8).tobytes() ): return False # check if insertion runs bad elif ( b"\x01" * gap_run - in logical_and(template_gaps, logical_not(seq_gaps)) - .astype(uint8) - .tostring() + in logical_and(template_gaps, logical_not(seq_gaps)).astype(uint8).tobytes() ): return False return True @@ -4451,7 +4388,7 @@ aligned_seqs.append(s) else: aligned_seqs.append(self._seq_to_aligned(s, n)) - self.named_seqs = self.named_seqs = dict(list(zip(names, aligned_seqs))) + self.named_seqs = dict(list(zip(names, aligned_seqs))) self.seq_data = self._seqs = aligned_seqs def _coerce_seqs(self, seqs, is_array): @@ -4770,9 +4707,7 @@ self.__class__({new_seq.name: str(new_seq)}) ) - aln = self.add_seqs(temp_aln, before_name, after_name) - - return aln + return self.add_seqs(temp_aln, before_name, after_name) def replace_seqs(self, seqs, aa_to_codon=True): """Returns new alignment with same shape but with data taken from seqs. diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/core/alphabet.py python-cogent-2021.10.12a1+dfsg/src/cogent3/core/alphabet.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/core/alphabet.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/core/alphabet.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,4 +1,3 @@ -#!/usr/bin/env python """ alphabet.py @@ -18,8 +17,6 @@ """ import json -import re -import string from itertools import product @@ -32,7 +29,6 @@ frombuffer, newaxis, ravel, - remainder, sum, take, transpose, @@ -42,17 +38,17 @@ zeros, ) -from cogent3.util.misc import bytes_to_string, get_object_provenance +from cogent3.util.misc import get_object_provenance Float = numpy.core.numerictypes.sctype2char(float) Int = numpy.core.numerictypes.sctype2char(int) __author__ = "Peter Maxwell, Gavin Huttley and Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Rob Knight", "Andrew Butterfield"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -182,7 +178,6 @@ accidentally use negative numbers as indices (this is very bad when doing indexed lookups). """ - data = data or [] self.moltype = moltype # check if motif lengths are homogeneous -- if so, set length @@ -239,8 +234,7 @@ would produce the result [1,1,2,0], returning the index of each element in the input. """ - result = [self._obj_to_index[e] for e in data] - return result + return [self._obj_to_index[e] for e in data] def is_valid(self, seq): """Returns True if seq contains only items in self.""" @@ -264,18 +258,7 @@ """ # if it's a normal Python type, map will work - try: - return list(map(self.__getitem__, data)) - # otherwise, it's probably an array object. - except TypeError: - try: - data = list(map(int, data)) - except (TypeError, ValueError): # might be char array? - print("DATA", data) - print("FIRST MAP:", list(map(str, data))) - print("SECOND MAP:", list(map(ord, list(map(str, data))))) - data = list(map(ord, list(map(str, data)))) - return list(map(self.__getitem__, data)) + return [self[index] for index in data] def __pow__(self, num): """Returns JointEnumeration with num copies of self. @@ -595,11 +578,20 @@ def from_seq_to_array(self, sequence): """Returns an array of indices corresponding to items in sequence. + Parameters + ---------- + sequence: Sequence + A cogent3 sequence object + + Returns + ------- + ndarray + + Notes + ----- Unlike to_indices() in superclass, this method returns a numpy array object. It also breaks the seqeunce into items in the current alphabet (e.g. breaking a raw DNA sequence into codons), which to_indices() does - not do. It also requires the sequence to be a Sequence object rather - than an arbitrary string, tuple, etc. """ sequence = sequence.get_in_motif_size(self._motiflen) return array(list(map(self.index, sequence))) @@ -607,11 +599,22 @@ def from_ordinals_to_seq(self, data): """Returns a Sequence object corresponding to indices in data. - Unlike from_indices() in superclass, this method uses the MolType to - coerce the result into a sequence of the correct class. Note that if - the MolType is not set, this method will raise an AttributeError. + Parameters + ---------- + data: series + series of int + + Returns + ------- + Sequence with self.moltype + + Notes + ----- + Unlike from_indices(), this method uses the MolType to + coerce the result into a sequence of the correct class. + + Raises an AttributeError if MolType is not set. """ - result = "" return self.moltype.make_seq("".join(self[i] for i in data)) def get_matched_array(self, motifs, dtype=Float): @@ -829,9 +832,9 @@ return "" elif len(s) == 1: val = self.to_chars(data) - val = val.tostring().decode("utf-8") + val = val.tobytes().decode("utf-8") return val else: return delimiter.join( - [i.tostring().decode("utf-8") for i in self.to_chars(data)] + [i.tobytes().decode("utf-8") for i in self.to_chars(data)] ) diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/core/annotation.py python-cogent-2021.10.12a1+dfsg/src/cogent3/core/annotation.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/core/annotation.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/core/annotation.py 2021-10-12 00:17:34.000000000 +0000 @@ -12,10 +12,10 @@ __author__ = "Peter Maxwell and Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -80,7 +80,6 @@ ) for base in containers: feature = feature.remapped_to(base, base.map) - index = map else: map = as_map(index, len(self)) return map @@ -290,8 +289,12 @@ return json.dumps(self.to_rich_dict()) +# https://pythonspeed.com/products/filmemoryprofiler/ + + class _Feature(_Annotatable, _Serialisable): qualifier_names = ["type", "name"] + __slots__ = ["parent", "map", "original", "_serialisable", "base", "base_map"] def __init__(self, parent, map, original=None, **kw): d = locals() @@ -328,8 +331,7 @@ """returns plotly trace""" from cogent3.draw.drawable import make_shape - result = make_shape(type_=self) - return result + return make_shape(type_=self) def attach(self): self.parent.attach_annotations([self]) @@ -496,8 +498,7 @@ keep = self.map.nongap() indices = numpy.concatenate([list(span) for span in keep.spans]) data = numpy.asarray(self.data)[indices] - new = self.__class__(self.parent, self.map[keep], data=data, original=self) - return new + return self.__class__(self.parent, self.map[keep], data=data, original=self) def SimpleVariable(parent, type, name, data): diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/core/genetic_code.py python-cogent-2021.10.12a1+dfsg/src/cogent3/core/genetic_code.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/core/genetic_code.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/core/genetic_code.py 2021-10-12 00:17:34.000000000 +0000 @@ -13,10 +13,10 @@ __author__ = "Greg Caporaso and Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Greg Caporaso", "Rob Knight", "Peter Maxwell", "Thomas La"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Greg Caporaso" __email__ = "caporaso@colorado.edu" __status__ = "Production" @@ -497,11 +497,10 @@ all_keys = sorted({int(k) for k in GeneticCodes if str(k).isdigit()}) rows = [(k, GeneticCodes[k].name) for k in all_keys] header = ["Code ID", "Name"] - table = Table( + return Table( header=header, data=rows, index_name="Code ID", title="Specify a genetic code using either 'Name' or " "Code ID (as an integer or string)", ) - return table diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/core/info.py python-cogent-2021.10.12a1+dfsg/src/cogent3/core/info.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/core/info.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/core/info.py 2021-10-12 00:17:34.000000000 +0000 @@ -10,10 +10,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Prototype" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/core/__init__.py python-cogent-2021.10.12a1+dfsg/src/cogent3/core/__init__.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/core/__init__.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/core/__init__.py 2021-10-12 00:17:34.000000000 +0000 @@ -14,7 +14,7 @@ ] __author__ = "" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Jeremy Widmann", "Gavin Huttley", @@ -25,7 +25,7 @@ "Greg Caporaso", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/core/location.py python-cogent-2021.10.12a1+dfsg/src/cogent3/core/location.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/core/location.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/core/location.py 2021-10-12 00:17:34.000000000 +0000 @@ -53,10 +53,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight", "Peter Maxwell", "Matthew Wakefield", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Prototype" @@ -141,7 +141,7 @@ """Returns length of self.""" raise NotImplementedError - def __lt__(self): + def __lt__(self, other): """Compares indices of self with indices of other.""" raise NotImplementedError @@ -551,7 +551,7 @@ class Map(object): - """A map holds a list of spans. """ + """A map holds a list of spans.""" def __init__( self, @@ -713,6 +713,18 @@ spans = [s.reversed_relative_to(self.parent_length) for s in self.spans] return Map(spans=spans, parent_length=self.parent_length) + def get_gap_coordinates(self): + """returns [(gap pos, gap length), ...]""" + gap_pos = [] + for i, span in enumerate(self.spans): + if not span.lost: + continue + + pos = self.spans[i - 1].end if i else 0 + gap_pos.append((pos, len(span))) + + return gap_pos + def gaps(self): """The gaps (lost spans) in this map""" locations = [] @@ -1046,3 +1058,38 @@ else: result.spans.append(Span(int(p))) return result + + +def gap_coords_to_map(gaps_lengths: dict, seq_length: int) -> Map: + """ + Parameters + ---------- + gaps_lengths + {gap insertion pos: gap length, ...} + seq_length : int + length of unaligned sequence + + Returns + ------- + Map + """ + + if not gaps_lengths: + return Map([(0, seq_length)], parent_length=seq_length) + + spans = [] + last = pos = 0 + for pos in sorted(gaps_lengths): + if pos > seq_length: + raise ValueError( + f"cannot have gap at position {pos} beyond seq_length= {seq_length}" + ) + + gap = LostSpan(length=gaps_lengths[pos]) + spans.extend([gap] if pos == 0 else [Span(last, pos), gap]) + last = pos + + if pos < seq_length: + spans.append(Span(last, seq_length)) + + return Map(spans=spans, parent_length=seq_length) diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/core/moltype.py python-cogent-2021.10.12a1+dfsg/src/cogent3/core/moltype.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/core/moltype.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/core/moltype.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,4 +1,3 @@ -#!/usr/bin/env python """ moltype.py @@ -13,10 +12,10 @@ """ __author__ = "Peter Maxwell, Gavin Huttley and Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Rob Knight", "Daniel McDonald"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -31,21 +30,6 @@ import numpy -from numpy import ( - arange, - array, - asarray, - newaxis, - ravel, - remainder, - take, - transpose, - uint8, - uint16, - uint32, - zeros, -) - from cogent3.core.alignment import ( Alignment, ArrayAlignment, @@ -132,9 +116,6 @@ IUPAC_DNA_complements = {"A": "T", "C": "G", "G": "C", "T": "A", "-": "-"} -IUPAC_DNA_complements = {"A": "T", "C": "G", "G": "C", "T": "A", "-": "-"} - - # note change in standard order from DNA IUPAC_RNA_chars = ["U", "C", "A", "G"] IUPAC_RNA_ambiguities = { @@ -490,12 +471,16 @@ degens = "".join(degens) else: constructor = Alphabet # assume multi-char - self.base = constructor(chars, moltype=moltype) - self.degen = constructor(chars + degens, moltype=moltype) - self.gapped = constructor(chars + gap, gap, moltype=moltype) - self.degen_gapped = constructor( - chars + gap + degens + missing, gap, moltype=moltype + + super(AlphabetGroup, self).__init__( + base=constructor(chars, moltype=moltype), + degen=constructor(chars + degens, moltype=moltype), + gapped=constructor(chars + gap, gap, moltype=moltype), + degen_gapped=constructor( + chars + gap + degens + missing, gap, moltype=moltype + ), ) + self._items = [self.base, self.degen, self.gapped, self.degen_gapped] self._set_relationships() # set complements if MolType was specified @@ -874,7 +859,6 @@ """ all = {} for i in self.alphabet: - curr = str(i) all[i] = i for key, val in list(self.degenerates.items()): all[key] = val @@ -1112,8 +1096,7 @@ if include_gap: degen.append(self.gap) - pos = [i for i, c in enumerate(sequence) if c in degen] - return pos + return [i for i, c in enumerate(sequence) if c in degen] def count_degenerate(self, sequence): """Counts the degenerate bases in the specified sequence.""" @@ -1491,7 +1474,7 @@ def available_moltypes(): - """returns Table listing available moltypes""" + """returns Table listing the available moltypes""" from cogent3.util.table import Table rows = [] diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/core/profile.py python-cogent-2021.10.12a1+dfsg/src/cogent3/core/profile.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/core/profile.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/core/profile.py 2021-10-12 00:17:34.000000000 +0000 @@ -9,10 +9,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -420,8 +420,7 @@ ylim=ylim, colours=colours, ) - xtick_vals = [j for j in range(0, segment.shape[0], 20)] - xtick_text = [f"{i + j}" for j in range(0, segment.shape[0], 20)] + sublogo.layout[f"x{axis}"].showticklabels = False sublogo.layout[f"x{axis}"].domain = [0, segment.shape[0] / wrap] diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/core/sequence.py python-cogent-2021.10.12a1+dfsg/src/cogent3/core/sequence.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/core/sequence.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/core/sequence.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Contains classes that represent biological sequence data. These provide generic biological sequence manipulation functions, plus functions that are critical for the EVOLVE calculations. @@ -12,7 +11,6 @@ creation. """ -import copy import json import re import warnings @@ -35,13 +33,11 @@ ) from numpy.random import permutation -import cogent3 - from cogent3.core.alphabet import AlphabetError from cogent3.core.genetic_code import get_code from cogent3.core.info import Info as InfoClass from cogent3.format.fasta import alignment_to_fasta -from cogent3.maths.stats.contingency import CategoryCounts, TestResult +from cogent3.maths.stats.contingency import CategoryCounts from cogent3.maths.stats.number import CategoryCounter from cogent3.parse import gff from cogent3.util.dict_array import DictArrayTemplate @@ -51,18 +47,13 @@ get_object_provenance, get_setting_from_environ, ) -from cogent3.util.transform import ( - KeepChars, - for_seq, - per_longest, - per_shortest, -) +from cogent3.util.transform import for_seq, per_shortest from .annotation import Map, _Annotatable __author__ = "Rob Knight, Gavin Huttley, and Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Rob Knight", "Peter Maxwell", @@ -71,7 +62,7 @@ "Daniel McDonald", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -342,7 +333,7 @@ def degap(self): """Deletes all gap characters from sequence.""" - return self.__class__(self.moltype.degap(self), info=self.info) + return self.__class__(self.moltype.degap(self), name=self.name, info=self.info) def gap_indices(self): """Returns list of indices of all gaps in the sequence, or [].""" @@ -641,7 +632,7 @@ last_nongap = i missing = self.moltype.missing if first_nongap is None: # sequence was all gaps - result = self.__class__([missing for i in len(self)], info=self.info) + result = self.__class__([missing for _ in len(self)], info=self.info) else: prefix = missing * first_nongap mid = str(self[first_nongap : last_nongap + 1]) @@ -672,17 +663,14 @@ colors=None, font_size=12, font_family="Lucida Console", - interleave_len=None, ): """returns html with embedded styles for sequence colouring Parameters ---------- - interleave_len - replaced by wrap in version 2021.6 wrap maximum number of printed bases, defaults to - alignment length, old name is interleave_len + alignment length limit truncate alignment to this length colors @@ -699,13 +687,6 @@ >>> from IPython.core.display import HTML >>> HTML(aln.to_html()) """ - if interleave_len is not None: - cogent3.util.warning.deprecated( - "argument", "interleave_len", "wrap", "2021.6" - ) - wrap = interleave_len if wrap == 60 else wrap - - # todo refactor interleave_len to be wrap css, styles = self.moltype.get_css_style( colors=colors, font_size=font_size, font_family=font_family ) @@ -1031,8 +1012,7 @@ def _mapped(self, map): # Called by generic __getitem__ segments = self.gapped_by_map_segment_iter(map, allow_gaps=False) - new = self.__class__("".join(segments), self.name, info=self.info) - return new + return self.__class__("".join(segments), self.name, info=self.info) def __add__(self, other): """Adds two sequences (other can be a string as well).""" @@ -1197,15 +1177,13 @@ return [] num_match = len(pos) if allow_multiple else 1 - annot = [ + return [ self.add_feature( annot_type, f"{name}:{i}" if allow_multiple else name, [pos[i]] ) for i in range(num_match) ] - return annot - class ProteinSequence(Sequence): """Holds the standard Protein sequence.""" @@ -1379,8 +1357,7 @@ template = DictArrayTemplate(motifs, ["+", "-"]) obs = template.wrap(obs) cat = CategoryCounts(obs) - result = cat.G_fit() - return result + return cat.G_fit() class DnaSequence(NucleicAcidSequence): @@ -1872,7 +1849,7 @@ """Returns new ArrayRnaSequence, converting T -> U""" if hasattr(data, "upper"): data = data.upper().replace("T", "U") - return super(ArrayNucleicAcidSequence, self).__init__(data, *args, **kwargs) + return super(ArrayRnaSequence, self).__init__(data, *args, **kwargs) class ArrayDnaSequence(ArrayNucleicAcidSequence): @@ -1883,7 +1860,7 @@ """Returns new ArrayRnaSequence, converting U -> T""" if hasattr(data, "upper"): data = data.upper().replace("U", "T") - return super(ArrayNucleicAcidSequence, self).__init__(data, *args, **kwargs) + return super(ArrayDnaSequence, self).__init__(data, *args, **kwargs) class ArrayCodonSequence(ArraySequence): diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/core/tree.py python-cogent-2021.10.12a1+dfsg/src/cogent3/core/tree.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/core/tree.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/core/tree.py 2021-10-12 00:17:34.000000000 +0000 @@ -45,7 +45,7 @@ __author__ = "Gavin Huttley, Peter Maxwell and Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Gavin Huttley", "Peter Maxwell", @@ -59,7 +59,7 @@ "Justin Kuczynski", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -634,8 +634,9 @@ tipnames = set(tipnames) tips = [tip for tip in self.tips() if tip.name in tipnames] - if len(tips) == 0: - return None + if len(tips) != len(tipnames): + missing = tipnames - set(self.get_tip_names()) + raise ValueError(f"tipnames {missing} not present in self") # scrub tree if hasattr(self, "black"): @@ -1778,8 +1779,7 @@ if style not in types: raise ValueError(f"{style} not in supported types {types}") - dnd = Dendrogram(self, style=style, **kwargs) - return dnd + return Dendrogram(self, style=style, **kwargs) class PhyloNode(TreeNode): @@ -1865,8 +1865,6 @@ to_process = [(self, 0.0)] tips_to_save = [] - curr_node, curr_dist = to_process[0] - seen = set([id(self)]) while to_process: curr_node, curr_dist = to_process.pop(0) diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/data/energy_params.py python-cogent-2021.10.12a1+dfsg/src/cogent3/data/energy_params.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/data/energy_params.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/data/energy_params.py 2021-10-12 00:17:34.000000000 +0000 @@ -15,10 +15,10 @@ """ __author__ = "Amanda Birmingham" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Amanda Birmingham", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Amanda Birmingham" __email__ = "amanda.birmingham@thermofisher.com" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/data/__init__.py python-cogent-2021.10.12a1+dfsg/src/cogent3/data/__init__.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/data/__init__.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/data/__init__.py 2021-10-12 00:17:34.000000000 +0000 @@ -3,10 +3,10 @@ __all__ = ["energy_params", "molecular_weight"] __author__ = "" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight", "Amanda Birmingham"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/data/molecular_weight.py python-cogent-2021.10.12a1+dfsg/src/cogent3/data/molecular_weight.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/data/molecular_weight.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/data/molecular_weight.py 2021-10-12 00:17:34.000000000 +0000 @@ -2,10 +2,10 @@ """Data for molecular weight calculations on proteins and nucleotides.""" __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/draw/dendrogram.py python-cogent-2021.10.12a1+dfsg/src/cogent3/draw/dendrogram.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/draw/dendrogram.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/draw/dendrogram.py 2021-10-12 00:17:34.000000000 +0000 @@ -10,10 +10,10 @@ __author__ = "Rahul Ghangas, Peter Maxwell and Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Rahul Ghangas"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" @@ -245,7 +245,7 @@ return None x = self.x - data = UnionDict( + return UnionDict( x=x, y=self.y, xshift=xshift, @@ -255,7 +255,6 @@ text=f"{val:.2f}", xanchor="center", ) - return data class SquareTreeGeometry(TreeGeometryBase): @@ -303,12 +302,10 @@ value = self.params.get(attr, None) if value is None: value = getattr(self, attr, None) - data = UnionDict( + return UnionDict( x=x, y=y, textangle=self.theta, showarrow=False, text=value, xanchor="left" ) - return data - class _AngularGeometry: """directly connects child to parents""" @@ -379,7 +376,7 @@ @property def x(self): if self._x is None: - y = self.y # triggers populating values + _ = self.y # triggers populating values return self._x @property @@ -424,7 +421,7 @@ radius = np.sqrt(self.x ** 2 + self.y ** 2) + padding x, y = polar_2_cartesian(self.theta, radius) - data = UnionDict( + return UnionDict( x=x, y=y, textangle=textangle, @@ -433,7 +430,6 @@ xanchor="center", yanchor="middle", ) - return data @extend_docstring_from(TreeGeometryBase.support_text_coord) def support_text_coord(self, xshift, yshift, threshold=1, max_attr_length=4): @@ -465,7 +461,7 @@ new_yshift = float(d.T[1]) x = self.x - data = UnionDict( + return UnionDict( x=x, y=self.y, xshift=new_xshift, @@ -475,7 +471,6 @@ text=f"{val:.2f}", xanchor="center", ) - return data def get_segment_to_child(self, child): """returns coordinates connecting a child to self and descendants""" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/draw/dotplot.py python-cogent-2021.10.12a1+dfsg/src/cogent3/draw/dotplot.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/draw/dotplot.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/draw/dotplot.py 2021-10-12 00:17:34.000000000 +0000 @@ -5,10 +5,10 @@ __author__ = "Rahul Ghangas, Peter Maxwell and Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley", "Peter Maxwell", "Rahul Ghangas"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" @@ -84,7 +84,6 @@ acceptable_noise = min(len1, len2) / window threshold = suitable_threshold(window, acceptable_noise / universe) - key = (window, threshold, min_gap) fwd = dotplot( str(seq1), str(seq2), @@ -195,7 +194,6 @@ show_progress : bool displays progress bar """ - from cogent3.core.alignment import Aligned # we ensure sequences have gaps parsed and the calculate aspect ratio if hasattr(seq1, "moltype"): diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/draw/drawable.py python-cogent-2021.10.12a1+dfsg/src/cogent3/draw/drawable.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/draw/drawable.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/draw/drawable.py 2021-10-12 00:17:34.000000000 +0000 @@ -5,14 +5,13 @@ from cogent3.util.misc import extend_docstring_from from cogent3.util.union_dict import UnionDict -from cogent3.util.warning import deprecated __author__ = "Rahul Ghangas and Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rahul Ghangas", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" @@ -174,7 +173,7 @@ else: try: self._traces = [UnionDict(trace) for trace in traces] - except ValueError as msg: + except ValueError: raise TypeError(f"expected a series of dicts, got {traces}") title = title if title is None else dict(text=title) self._default_layout = UnionDict( @@ -574,9 +573,7 @@ else: func = self._build_fig - result = func() - - return result + return func() def remove_track(self, left_track=False, bottom_track=False): """ @@ -659,7 +656,7 @@ def as_trace(self, name=None): """returns component for plotly display""" name = name or self.name - data = UnionDict( + return UnionDict( type="scatter", x=self.x, y=self.y, @@ -673,7 +670,6 @@ showlegend=self._showlegend, hoverinfo="text", ) - return data class Rectangle(Shape): diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/draw/__init__.py python-cogent-2021.10.12a1+dfsg/src/cogent3/draw/__init__.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/draw/__init__.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/draw/__init__.py 2021-10-12 00:17:34.000000000 +0000 @@ -2,7 +2,7 @@ __all__ = ["dendrogram", "dotplot", "drawable", "letter", "logo"] -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __contributors__ = [ "Peter Maxwell", "Gavin Huttley", @@ -14,5 +14,5 @@ "Sheng Han Moses Koh", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/draw/letter.py python-cogent-2021.10.12a1+dfsg/src/cogent3/draw/letter.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/draw/letter.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/draw/letter.py 2021-10-12 00:17:34.000000000 +0000 @@ -15,19 +15,16 @@ SVG path formatting and then into an array format for inclusion here. """ -import os - import numpy -from cogent3.util.misc import extend_docstring_from from cogent3.util.union_dict import UnionDict __author__ = "Sheng Han Moses Koh" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Sheng Han Moses Koh", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/draw/logo.py python-cogent-2021.10.12a1+dfsg/src/cogent3/draw/logo.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/draw/logo.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/draw/logo.py 2021-10-12 00:17:34.000000000 +0000 @@ -5,10 +5,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/best_likelihood.py python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/best_likelihood.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/best_likelihood.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/best_likelihood.py 2021-10-12 00:17:34.000000000 +0000 @@ -13,11 +13,11 @@ __author__ = "Helen Lindsay, Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Helen Lindsay", "Gavin Huttley", "Daniel McDonald"] cite = "Goldman, N. (1993). Statistical tests of models of DNA substitution. J Mol Evol, 36: 182-98" __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/bootstrap.py python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/bootstrap.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/bootstrap.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/bootstrap.py 2021-10-12 00:17:34.000000000 +0000 @@ -24,12 +24,11 @@ import random -from cogent3.util import parallel from cogent3.util import progress_display as UI __author__ = "Gavin Huttley, Andrew Butterfield and Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Gavin Huttley", "Andrew Butterfield", @@ -38,7 +37,7 @@ "Peter Maxwell", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/coevolution.py python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/coevolution.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/coevolution.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/coevolution.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,7 +1,3 @@ -#!/usr/bin/env python -# Authors: Greg Caporaso (gregcaporaso@gmail.com), Brett Easton, Gavin Huttley -# coevolution.py - """ Description File created on 03 May 2007. @@ -47,44 +43,31 @@ """ -from optparse import make_option -from os.path import basename, exists, splitext +from os.path import basename from pickle import Pickler, Unpickler from random import shuffle -from sys import exit from numpy import ( - abs, array, e, - exp, float, - float64, greater_equal, isnan, less_equal, log, - mean, nan, nonzero, ones, - put, ravel, - reshape, - sqrt, - take, - transpose, - tril, zeros, ) from numpy.linalg import norm from cogent3 import PROTEIN, make_aligned_seqs -from cogent3.core.alignment import ArrayAlignment, seqs_from_fasta -from cogent3.core.alphabet import Alphabet, CharAlphabet +from cogent3.core.alignment import ArrayAlignment +from cogent3.core.alphabet import CharAlphabet from cogent3.core.moltype import IUPAC_gap, IUPAC_missing from cogent3.core.sequence import Sequence -from cogent3.core.tree import TreeError from cogent3.evolve.substitution_model import ( EmpiricalProteinMatrix, Parametric, @@ -92,19 +75,10 @@ from cogent3.maths.stats.distribution import binomial_exact from cogent3.maths.stats.number import CategoryCounter, CategoryFreqs from cogent3.maths.stats.special import ROUND_ERROR -from cogent3.parse.newick import TreeParseError -from cogent3.parse.record import FileFormatError, RecordError -from cogent3.util.recode_alignment import ( - alphabets, - recode_counts_and_freqs, - recode_dense_alignment, - recode_freq_vector, - square_matrix_to_dict, -) __author__ = "Greg Caporaso" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Greg Caporaso", "Gavin Huttley", @@ -113,7 +87,7 @@ "Rob Knight", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Greg Caporaso" __email__ = "gregcaporaso@gmail.com" __status__ = "Beta" @@ -142,7 +116,7 @@ def normalized_mi(h1, h2, joint_h): - """ MI normalized by joint entropy, as described in Martin 2005 """ + """MI normalized by joint entropy, as described in Martin 2005""" return mi(h1, h2, joint_h) / joint_h @@ -165,7 +139,7 @@ def joint_entropy(pos1, pos2): - """ Calculate the joint entroy of a pair of positions """ + """Calculate the joint entroy of a pair of positions""" return CategoryCounter(join_positions(pos1, pos2)).entropy @@ -226,7 +200,7 @@ for exclude in excludes: if exclude in states: try: - col = exclude_handler(col, excludes) + _ = exclude_handler(col, excludes) break except TypeError: return null_value @@ -641,7 +615,7 @@ # pos_freq as a float could be greater than scaled_aln_size. # In this case I cast it to an int. I don't like this alignment # scaling stuff though. - except ValueError as e: + except ValueError: results.append(binomial_exact(int(pos_freq), scaled_aln_size, natural_prob)) return array(results) @@ -787,8 +761,6 @@ sca_pair(aln,10,20,0.85) != \ sca_pair(aln.take_positions([10,20]),0,1,0.85) """ - num_positions = len(alignment) - num_seqs = alignment.num_seqs # Calculate frequency distributions natural_probs = probs_from_dict(background_freqs, alphabet) @@ -919,7 +891,6 @@ or a string. """ - num_seqs = alignment.num_seqs natural_probs = probs_from_dict(background_freqs, alphabet) aln_freqs = freqs_from_aln(alignment, alphabet, scaled_aln_size) aln_probs = get_positional_probabilities(aln_freqs, natural_probs, scaled_aln_size) @@ -1008,7 +979,6 @@ or a string. """ - num_seqs = alignment.num_seqs natural_probs = probs_from_dict(background_freqs, alphabet) aln_freqs = freqs_from_aln(alignment, alphabet, scaled_aln_size) aln_probs = get_positional_probabilities(aln_freqs, natural_probs, scaled_aln_size) @@ -1171,7 +1141,7 @@ for exclude in excludes: if exclude in states: try: - col = exclude_handler(col, excludes) + _ = exclude_handler(col, excludes) break except TypeError: return null_value @@ -1307,7 +1277,7 @@ def ancestral_state_pair( aln, tree, pos1, pos2, ancestral_seqs=None, null_value=DEFAULT_NULL_VALUE ): - """""" + """ """ ancestral_seqs = ancestral_seqs or get_ancestral_seqs(aln, tree) ancestral_names_to_seqs = dict( list(zip(ancestral_seqs.names, ancestral_seqs.array_seqs)) @@ -1404,7 +1374,7 @@ def sca_input_validation(alignment, **kwargs): - """SCA specific validations steps """ + """SCA specific validations steps""" # check that all required parameters are present in kwargs required_parameters = ["cutoff"] @@ -1449,7 +1419,7 @@ def ancestral_states_input_validation(alignment, **kwargs): - """Ancestral States (AS) specific validations steps """ + """Ancestral States (AS) specific validations steps""" # check that all required parameters are present in kwargs required_parameters = ["tree"] for rp in required_parameters: @@ -1497,7 +1467,7 @@ def validate_position(alignment, position): - """ValueError if position is outside the range of the alignment """ + """ValueError if position is outside the range of the alignment""" if not 0 <= position < len(alignment): raise ValueError( "Position is outside the range of the alignment: " + str(position) @@ -1635,7 +1605,7 @@ result[merged_name] = Sequence( alignment1.get_gapped_seq(orig_name) ) + Sequence(alignment2.get_gapped_seq(aln2_name_map[merged_name])) - except KeyError as e: + except KeyError: raise KeyError( "A sequence identifier is in alignment2 " + "but not alignment1 -- did you filter out sequences identifiers" @@ -2056,9 +2026,9 @@ pass ignored = None if not ignored else list(set(ignored) & set(column_freqs.keys())) if ignored: - for e in ignored: + for e_ in ignored: try: - del column_freqs[e] + del column_freqs[e_] except KeyError: pass @@ -2324,7 +2294,7 @@ null_value=DEFAULT_NULL_VALUE, intermolecular_data_only=False, ): - """wrapper function for aln_position_pairs_cmp_threshold """ + """wrapper function for aln_position_pairs_cmp_threshold""" return aln_position_pairs_cmp_threshold( coevolution_matrix, threshold, @@ -2340,7 +2310,7 @@ null_value=DEFAULT_NULL_VALUE, intermolecular_data_only=False, ): - """wrapper function for aln_position_pairs_cmp_threshold """ + """wrapper function for aln_position_pairs_cmp_threshold""" return aln_position_pairs_cmp_threshold( coevolution_matrix, threshold, less_equal, null_value, intermolecular_data_only ) @@ -2403,7 +2373,7 @@ def count_ge_threshold( m, threshold, null_value=DEFAULT_NULL_VALUE, symmetric=False, ignore_diagonal=False ): - """wrapper function for count_cmp_threshold """ + """wrapper function for count_cmp_threshold""" return count_cmp_threshold( m, threshold, greater_equal, null_value, symmetric, ignore_diagonal ) @@ -2412,14 +2382,14 @@ def count_le_threshold( m, threshold, null_value=DEFAULT_NULL_VALUE, symmetric=False, ignore_diagonal=False ): - """wrapper function for count_cmp_threshold """ + """wrapper function for count_cmp_threshold""" return count_cmp_threshold( m, threshold, less_equal, null_value, symmetric, ignore_diagonal ) def ltm_to_symmetric(m): - """ Copies values from lower triangle to upper triangle""" + """Copies values from lower triangle to upper triangle""" assert ( m.shape[0] == m.shape[1] ), "Making matrices symmetric only supported for square matrices" @@ -2517,7 +2487,7 @@ alignment_id = fields[0] alphabet_id = fields[1] method_id = fields[2] - extension = fields[3] + _ = fields[3] # extension except IndexError: raise ValueError( "output filepath not in parsable format: %s. See doc string for format definition." diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/discrete_markov.py python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/discrete_markov.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/discrete_markov.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/discrete_markov.py 2021-10-12 00:17:34.000000000 +0000 @@ -9,10 +9,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -54,7 +54,6 @@ PartitionDefn.check_value_is_valid(self, part, is_constant) def make_cells(self, input_soup=None, variable=None): - input_soup = input_soup or {} uniq_cells = [] all_cells = [] for (i, v) in enumerate(self.uniq): diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/distance.py python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/distance.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/distance.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/distance.py 2021-10-12 00:17:34.000000000 +0000 @@ -12,10 +12,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley", "Peter Maxwell", "Matthew Wakefield"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -290,8 +290,7 @@ dists = self.get_pairwise_param( "length", summary_function=summary_function, **kwargs ) - result = None if not dists else DistanceMatrix(dists) - return result + return None if not dists else DistanceMatrix(dists) def get_param_values(self, param, **kwargs): """Returns a Numbers object with all estimated values of param. @@ -343,13 +342,12 @@ except KeyError: row.append(d[(s2, s1)]) twoD.append(row) - T = table.Table( + return table.Table( [r"Seq1 \ Seq2"] + self._seqnames, twoD, index_name=r"Seq1 \ Seq2", missing_data="*", ) - return T def get_newick_trees(self): """Returns a list of Newick format trees for supertree methods.""" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/fast_distance.py python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/fast_distance.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/fast_distance.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/fast_distance.py 2021-10-12 00:17:34.000000000 +0000 @@ -4,7 +4,7 @@ import numpy from numpy import array, diag, dot, eye, float64, int32, log, sqrt, zeros -from numpy.linalg import LinAlgError, det, inv, norm +from numpy.linalg import det, inv from cogent3 import DNA, RNA, get_moltype from cogent3.util.dict_array import DictArray @@ -15,10 +15,10 @@ __author__ = "Gavin Huttley, Yicheng Zhu and Ben Kaehler" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley", "Yicheng Zhu", "Ben Kaehler"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" # pending addition of protein distance metrics @@ -75,8 +75,7 @@ def seq_to_indices(seq, char_to_index): """returns an array with sequence characters replaced by their index""" ords = list(map(ord, seq)) - indices = char_to_index.take(ords) - return indices + return char_to_index.take(ords) def _fill_diversity_matrix(matrix, seq1, seq2): @@ -302,10 +301,9 @@ for i in range(len(names)): rows[i].insert(0, names[i]) - table = Table( + return Table( header=header, data=rows, index_name=r"Seq1 \ Seq2", missing_data="*", **kwargs ) - return table class _PairwiseDistance(object): @@ -449,8 +447,7 @@ if include_duplicates: dists = self._expand(dists) - result = DistanceMatrix(dists) - return result + return DistanceMatrix(dists) def _expand(self, pwise): """returns a pwise statistic dict that includes duplicates""" @@ -491,8 +488,7 @@ stats = {k: sqrt(self._dists[k].variance) for k in self._dists} stats = self._expand(stats) kwargs = dict(title="Standard Error of Pairwise Distances", digits=4) - t = _make_stat_table(stats, self.names, **kwargs) - return t + return _make_stat_table(stats, self.names, **kwargs) @property def variances(self): @@ -516,8 +512,7 @@ stats = {k: self._dists[k].fraction_variable for k in self._dists} stats = self._expand(stats) kwargs = dict(title="Proportion variable sites", digits=4) - t = _make_stat_table(stats, self.names, **kwargs) - return t + return _make_stat_table(stats, self.names, **kwargs) @property def lengths(self): @@ -527,8 +522,7 @@ stats = {k: self._dists[k].length for k in self._dists} stats = self._expand(stats) kwargs = dict(title="Pairwise Aligned Lengths", digits=0) - t = _make_stat_table(stats, self.names, **kwargs) - return t + return _make_stat_table(stats, self.names, **kwargs) class HammingPair(_PairwiseDistance): @@ -568,8 +562,7 @@ if include_duplicates: dists = self._expand(dists) - result = DistanceMatrix(dists) - return result + return DistanceMatrix(dists) class _NucleicSeqPair(_PairwiseDistance): @@ -743,8 +736,7 @@ column = self.array[:, i] data[name] = column header = ["names"] + list(self.names) - table = Table(header=header, data=data, index_name="names") - return table + return Table(header=header, data=data, index_name="names") def to_dict(self, **kwargs): """Returns a flattened dict with diagonal elements removed""" @@ -758,13 +750,12 @@ # a list of tuples dists = self.to_dict() json_safe = [(k[0], k[1], dists[k]) for k in dists] - data = dict( + return dict( dists=json_safe, invalid=self._invalid, type=get_object_provenance(self), version=__version__, ) - return data def take_dists(self, names, negate=False): """ @@ -817,8 +808,7 @@ exclude += names[rows != 0].tolist() exclude = set(exclude) keep = set(names) ^ exclude - result = self.take_dists(keep) - return result + return self.take_dists(keep) def quick_tree(self, show_progress=False): """returns a neighbour joining tree diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/__init__.py python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/__init__.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/__init__.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/__init__.py 2021-10-12 00:17:34.000000000 +0000 @@ -17,7 +17,7 @@ ] __author__ = "" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Gavin Huttley", "Peter Maxwell", @@ -29,7 +29,7 @@ "Greg Caporaso", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/likelihood_calculation.py python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/likelihood_calculation.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/likelihood_calculation.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/likelihood_calculation.py 2021-10-12 00:17:34.000000000 +0000 @@ -20,7 +20,6 @@ NonParamDefn, ProbabilityParamDefn, SumDefn, - _FuncDefn, ) @@ -28,10 +27,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/likelihood_function.py python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/likelihood_function.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/likelihood_function.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/likelihood_function.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,5 +1,3 @@ -#!/usr/bin/env python - import json import random @@ -24,7 +22,7 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Gavin Huttley", "Andrew Butterfield", @@ -36,7 +34,7 @@ "Ananias Iliadis", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -292,7 +290,18 @@ return result def get_psub_for_edge(self, name, **kw): - """returns the substitution probability matrix for the named edge""" + """returns the substitution probability matrix for the named edge + + Parameters + ---------- + name : str + name of the edge + + Returns + ------- + DictArray + """ + # todo handle case of multiple loci try: # For PartialyDiscretePsubsDefn array = self.get_param_value("dpsubs", edge=name, **kw) @@ -306,12 +315,19 @@ Parameters ---------- calibrated : bool - scales the rate matrix by branch length for each edge. If a rate - heterogeneity model, then the matrix is further scaled by rate - for a bin + If True, the rate matrix is scaled such that + ``sum(pi_i * Qii) == 1``. If False, the calibrated matrix is + multiplied by the length parameter (and the rate parameter for a + bin if it is a rate heterogeneity model). + Returns ------- - If a single rate matrix, the key is an empty tuple + {scope: DictArray, ...} + + Notes + ----- + If a single rate matrix (e.g. it's a time-homogeneous model), the key + is an empty tuple. """ defn = self.defn_for["Q"] @@ -359,8 +375,22 @@ def get_rate_matrix_for_edge(self, name, calibrated=True, **kw): """returns the rate matrix (Q) for the named edge - If calibrated=False, expm(Q) will give the same result as - get_psub_for_edge(name)""" + Parameters + ---------- + name : str + name of the edge + calibrated : bool + If True, the rate matrix is scaled such that + ``sum(pi_i * Qii) == 1``. If False, the calibrated matrix is + multiplied by the length parameter (and the rate parameter for a + bin if it is a rate heterogeneity model). + + Notes + ----- + If ``calibrated=False``, ``expm(Q)`` will give the same result as + ``self.get_psub_for_edge(name)`` + """ + # todo handle case of multiple loci try: array = self.get_param_value("Q", edge=name, **kw) array = array.copy() @@ -399,14 +429,21 @@ return root_lht.calc_G_statistic(root_lh, return_table) def reconstruct_ancestral_seqs(self, locus=None): - """returns a dict of DictArray objects containing probabilities - of each alphabet state for each node in the tree. + """computes the conditional probabilities of each state for each node + in the tree. Parameters ---------- locus a named locus + Returns + ------- + {node_name: DictArray, ...} + + Notes + ----- + Alignment columns are rows in the DictArray. """ result = {} array_template = None @@ -443,7 +480,7 @@ ) return result - def likely_ancestral_seqs(self, locus=None): + def likely_ancestral_seqs(self, locus=None) -> ArrayAlignment: """Returns the most likely reconstructed ancestral sequences as an alignment. @@ -451,7 +488,6 @@ ---------- locus a named locus - """ prob_array = self.reconstruct_ancestral_seqs(locus=locus) seqs = [] @@ -500,16 +536,18 @@ def _for_display(self): """processes statistics tables for display""" - title = self.name if self.name else "Likelihood function statistics" + title = self.name or "Likelihood function statistics" result = [] result += self.get_statistics(with_motif_probs=True, with_titles=True) - for i, table in enumerate(result): + for i, table_ in enumerate(result): if ( - "motif" in table.title and table.shape[1] == 2 and table.shape[0] >= 60 + "motif" in table_.title + and table_.shape[1] == 2 + and table_.shape[0] >= 60 ): # just sort codon motif probs, then truncate - table = table.sorted(columns="motif") - table.set_repr_policy(head=5, tail=5, show_shape=False) - result[i] = table + table_ = table_.sorted(columns="motif") + table_.set_repr_policy(head=5, tail=5, show_shape=False) + result[i] = table_ return title, result def _repr_html_(self): @@ -522,10 +560,10 @@ nfp = "

number of free parameters = %d

" % self.get_num_free_params() title, results = self._for_display() - for i, table in enumerate(results): - table.title = table.title.capitalize() - table.set_repr_policy(show_shape=False) - results[i] = table._repr_html_() + for i, table_ in enumerate(results): + table_.title = table_.title.capitalize() + table_.set_repr_policy(show_shape=False) + results[i] = table_._repr_html_() results = ["

%s

" % title, lnL, nfp] + results return "\n".join(results) @@ -542,14 +580,10 @@ lnL = None nfp = "number of free parameters = %d" % self.get_num_free_params() - for table in results: - table.title = "" - - if lnL: - results = [title, lnL, nfp] + results - else: - results = [title, nfp] + results + for table_ in results: + table_.title = "" + results = [title, lnL, nfp] + results if lnL else [title, nfp] + results return "\n".join(map(str, results)) def get_annotated_tree(self, length_as=None): @@ -1024,20 +1058,27 @@ a random number generator. exclude_internal if True, only sequences for tips are returned. + locus + if fit to multiple alignments, select the values corresponding to + locus for generating data + seed + seed value for the random number generator root_sequence - a sequence from which all others evolve. - + a sequence from which all others evolve """ - + orig_ambig = {} if sequence_length is None: lht = self.get_param_value("lht", locus=locus) - sequence_length = len(lht.index) + try: + sequence_length = len(lht.index) + except AttributeError: + raise ValueError( + "Must provide sequence_length since no alignment set on self" + ) + leaves = self.get_param_value("leaf_likelihoods", locus=locus) - orig_ambig = {} for (seq_name, leaf) in list(leaves.items()): orig_ambig[seq_name] = leaf.get_ambiguous_positions() - else: - orig_ambig = {} if random_series is None: random_series = random.Random() @@ -1096,7 +1137,7 @@ return True def initialise_from_nested(self, nested_lf): - from cogent3.evolve.substitution_model import TimeReversible + from cogent3.evolve.substitution_model import Stationary assert ( self.get_num_free_params() > nested_lf.get_num_free_params() @@ -1104,11 +1145,11 @@ compatible_likelihood_functions(self, nested_lf) same = ( - isinstance(self.model, TimeReversible) - and isinstance(nested_lf.model, TimeReversible) + isinstance(self.model, Stationary) + and isinstance(nested_lf.model, Stationary) ) or ( - not isinstance(self.model, TimeReversible) - and not isinstance(nested_lf.model, TimeReversible) + not isinstance(self.model, Stationary) + and not isinstance(nested_lf.model, Stationary) ) mprobs = nested_lf.get_motif_probs() diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/likelihood_tree_numba.py python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/likelihood_tree_numba.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/likelihood_tree_numba.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/likelihood_tree_numba.py 2021-10-12 00:17:34.000000000 +0000 @@ -7,7 +7,7 @@ __copyright__ = "Copyright 2007-2019, The Cogent Project" __credits__ = ["Peter Maxwell", "Rob Knight", "Gavin Huttley", "Stephen Ma"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/likelihood_tree.py python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/likelihood_tree.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/likelihood_tree.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/likelihood_tree.py 2021-10-12 00:17:34.000000000 +0000 @@ -13,10 +13,10 @@ numerictypes = numpy.core.numerictypes.sctype2char __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -274,7 +274,7 @@ def backward(self): index = numpy.array(self.index[::-1, ...]) - result = self.__class__( + return self.__class__( self.uniq, self.input_likelihoods, self.counts, @@ -283,7 +283,6 @@ self.alphabet, None, ) - return result def __len__(self): return len(self.index) diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/models.py python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/models.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/models.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/models.py 2021-10-12 00:17:34.000000000 +0000 @@ -22,10 +22,10 @@ __author__ = "Matthew Wakefield" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Matthew Wakefield", "Peter Maxwell", "Gavin Huttley", "James Kondilios"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Matthew Wakefield" __email__ = "wakefield@wehi.edu.au" __status__ = "Production" @@ -113,8 +113,7 @@ kw["optimise_motif_probs"] = optimise_motif_probs kw["mprob_model"] = "tuple" kw["name"] = kw.get("name", f"DT-{motif_length}") - sm = ns_substitution_model.DiscreteSubstitutionModel(alpha, **kw) - return sm + return ns_substitution_model.DiscreteSubstitutionModel(alpha, **kw) def GN(optimise_motif_probs=True, **kw): @@ -127,8 +126,7 @@ kwargs = dict(recode_gaps=True, model_gaps=False) kwargs.update(kw) kwargs.update(required) - sm = ns_substitution_model.NonReversibleNucleotide(**kwargs) - return sm + return ns_substitution_model.NonReversibleNucleotide(**kwargs) def ssGN(optimise_motif_probs=True, **kw): @@ -144,7 +142,7 @@ def K80(**kw): """Kimura 1980""" - required = dict(equal_motif_probs=True, optimise_motif_probs=False) + required = dict(name="K80", equal_motif_probs=True, optimise_motif_probs=False) kwargs = {} kwargs.update(kw) kwargs.update(required) @@ -153,7 +151,7 @@ def JC69(**kw): """Jukes and Cantor's 1969 model""" - required = dict(equal_motif_probs=True, optimise_motif_probs=False) + required = dict(name="JC69", equal_motif_probs=True, optimise_motif_probs=False) kwargs = {} kwargs.update(kw) kwargs.update(required) @@ -244,15 +242,23 @@ """Goldman and Yang 1994 codon substitution model. N Goldman and Z Yang, 1994, Mol Biol Evol, 11(5):725-36.""" - return Y98(**kw) + required = dict(name="GY94") + kwargs = {} + kwargs.update(kw) + kwargs.update(required) + return Y98(**kwargs) def Y98(**kw): """Yang's 1998 substitution model, a derivative of the GY94. Z Yang, 1998, Mol Biol Evol, 15(5):568-73""" + required = dict( - name="Y98", predicates=[_kappa, _omega], mprob_model="tuple", model_gaps=False + predicates=[_kappa, _omega], + mprob_model="tuple", + model_gaps=False, + name=kw.get("name", "Y98"), ) kwargs = dict(recode_gaps=True, motif_probs=None) kwargs.update(kw) @@ -2680,12 +2686,10 @@ Atlas of protein sequence and structure, Vol 5, Suppl. 3. National Biomedical Research Foundation, Washington D. C Matrix imported from PAML dayhoff.dat file""" - sm = substitution_model.EmpiricalProteinMatrix( + return substitution_model.EmpiricalProteinMatrix( DSO78_matrix, DSO78_freqs, name="DSO78", **kw ) - return sm - def JTT92(**kw): """Jones, Taylor and Thornton 1992 empirical protein model @@ -2693,12 +2697,10 @@ The rapid generation of mutation data matrices from protein sequences. Comput Appl Biosci. 1992 Jun;8(3):275-82. Matrix imported from PAML jones.dat file""" - sm = substitution_model.EmpiricalProteinMatrix( + return substitution_model.EmpiricalProteinMatrix( JTT92_matrix, JTT92_freqs, name="JTT92", **kw ) - return sm - def AH96(**kw): """Adachi and Hasegawa 1996 empirical model for mitochondrial proteins. @@ -2706,17 +2708,28 @@ Model of amino acid substitution in proteins encoded by mitochondrial DNA. J Mol Evol. 1996 Apr;42(4):459-68. Matrix imported from PAML mtREV24.dat file""" - sm = substitution_model.EmpiricalProteinMatrix( + return substitution_model.EmpiricalProteinMatrix( AH96_matrix, AH96_freqs, name="AH96_mtREV24", **kw ) - return sm - def get_model(name, **kw): """returns an instance of the named model - name is case sensitive and must be in the models attribute""" + name is case sensitive. + + Parameters + ---------- + optimise_motif_probs: bool + Treat like other free parameters. + recode_gaps: bool + Whether gaps in an alignment should be treated as an ambiguous state + instead. + + Notes + ----- + See available_models() for the full list. + """ if isinstance(name, _SubstitutionModel): # already a substitution model return name @@ -2739,12 +2752,10 @@ Model of amino acid substitution in proteins encoded by mitochondrial DNA. J Mol Evol. 1996 Apr;42(4):459-68. Matrix imported from PAML mtmam.dat file""" - sm = substitution_model.EmpiricalProteinMatrix( + return substitution_model.EmpiricalProteinMatrix( AH96_mtmammals_matrix, AH96_mtmammals_freqs, name="AH96_mtmammals", **kw ) - return sm - def mtmam(**kw): return AH96_mtmammals(**kw) @@ -2757,16 +2768,13 @@ families using a maximum-likelihood approach. Mol Biol Evol. 2001 May;18(5):691-9. Matrix imported from PAML wag.dat file""" - sm = substitution_model.EmpiricalProteinMatrix( + return substitution_model.EmpiricalProteinMatrix( WG01_matrix, WG01_freqs, name="WG01", **kw ) - return sm - def available_models(model_types=None): - """This function returns a cogent3 Table instance with header - ['Model Type', 'Abbreviation', 'Description'].""" + """returns Table listing the pre-defined substitution models""" column_headings = ["Model Type", "Abbreviation", "Description"] _model_types = { "nucleotide": nucleotide_models, @@ -2787,9 +2795,8 @@ description = "" rows.append([mod_type, abbreviation, description]) - t = Table( + return Table( header=column_headings, data=rows, title="Specify a model using 'Abbreviation' (case sensitive).", ) - return t diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/motif_prob_model.py python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/motif_prob_model.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/motif_prob_model.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/motif_prob_model.py 2021-10-12 00:17:34.000000000 +0000 @@ -5,15 +5,14 @@ import numpy from cogent3.evolve.likelihood_tree import make_likelihood_tree_leaf - -from . import substitution_calculation +from cogent3.recalculation.definition import CalcDefn, PartitionDefn __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -47,7 +46,7 @@ def make_motif_probs_defn(self): """Makes the first part of a parameter controller definition for this model, the calculation of motif probabilities""" - return substitution_calculation.PartitionDefn( + return PartitionDefn( name="mprobs", default=None, dimensions=("locus", "edge"), @@ -185,17 +184,14 @@ return result def calc_word_weight_matrix(self, monomer_probs): - result = monomer_probs.take(self.mutant_motif) * self.mask - return result + return monomer_probs.take(self.mutant_motif) * self.mask def make_motif_word_prob_defns(self): monomer_probs = self.make_motif_probs_defn() - word_probs = substitution_calculation.CalcDefn( - self.calc_word_probs, name="wprobs" - )(monomer_probs) - mprobs_matrix = substitution_calculation.CalcDefn( - self.calc_word_weight_matrix, name="mprobs_matrix" - )(monomer_probs) + word_probs = CalcDefn(self.calc_word_probs, name="wprobs")(monomer_probs) + mprobs_matrix = CalcDefn(self.calc_word_weight_matrix, name="mprobs_matrix")( + monomer_probs + ) return (monomer_probs, word_probs, mprobs_matrix) def adapt_motif_probs(self, motif_probs, auto=False): @@ -232,21 +228,15 @@ return result def calc_word_weight_matrix(self, monomer_probs): - positions = list(range(self.word_length)) monomer_probs = numpy.array(monomer_probs) # so [posn, motif] size = monomer_probs.shape[-1] # should be constant extended_indices = self.mutated_posn * size + self.mutant_motif - # print size, self.word_length - # for a in [extended_indices, self.mutated_posn, self.mutant_motif, - # monomer_probs]: - # print a.shape, a.max() - result = monomer_probs.take(extended_indices) * self.mask return result def make_motif_word_prob_defns(self): - monomer_probs = substitution_calculation.PartitionDefn( + monomer_probs = PartitionDefn( name="psmprobs", default=None, dimensions=("locus", "position", "edge"), @@ -255,15 +245,13 @@ monomer_probs3 = monomer_probs.across_dimension( "position", [str(i) for i in range(self.word_length)] ) - monomer_probs3 = substitution_calculation.CalcDefn( - lambda *x: numpy.array(x), name="mprobs" - )(*monomer_probs3) - word_probs = substitution_calculation.CalcDefn( - self.calc_word_probs, name="wprobs" - )(monomer_probs3) - mprobs_matrix = substitution_calculation.CalcDefn( - self.calc_word_weight_matrix, name="mprobs_matrix" - )(monomer_probs3) + monomer_probs3 = CalcDefn(lambda *x: numpy.array(x), name="mprobs")( + *monomer_probs3 + ) + word_probs = CalcDefn(self.calc_word_probs, name="wprobs")(monomer_probs3) + mprobs_matrix = CalcDefn(self.calc_word_weight_matrix, name="mprobs_matrix")( + monomer_probs3 + ) return (monomer_probs, word_probs, mprobs_matrix) def set_param_controller_motif_probs(self, pc, motif_probs, **kw): @@ -292,12 +280,11 @@ def calc_word_weight_matrix(self, motif_probs): context_probs = numpy.dot(motif_probs, self.w2c) context_probs[context_probs == 0.0] = numpy.inf - result = motif_probs / context_probs.take(self.context_indices) - return result + return motif_probs / context_probs.take(self.context_indices) def make_motif_word_prob_defns(self): mprobs = self.make_motif_probs_defn() - mprobs_matrix = substitution_calculation.CalcDefn( - self.calc_word_weight_matrix, name="mprobs_matrix" - )(mprobs) + mprobs_matrix = CalcDefn(self.calc_word_weight_matrix, name="mprobs_matrix")( + mprobs + ) return (mprobs, mprobs, mprobs_matrix) diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/ns_substitution_model.py python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/ns_substitution_model.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/ns_substitution_model.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/ns_substitution_model.py 2021-10-12 00:17:34.000000000 +0000 @@ -4,22 +4,21 @@ from cogent3.evolve.discrete_markov import PsubMatrixDefn from cogent3.evolve.predicate import MotifChange from cogent3.maths.optimisers import ParameterOutOfBoundsError +from cogent3.util.misc import extend_docstring_from from .substitution_model import ( Parametric, Stationary, - TimeReversibleNucleotide, _Codon, - _ContinuousSubstitutionModel, _SubstitutionModel, ) __author__ = "Peter Maxwell, Gavin Huttley and Andrew Butterfield" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __contributors__ = ["Gavin Huttley", "Peter Maxwell", "Ben Kaeheler", "Ananias Iliadis"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -61,7 +60,7 @@ y = alphabet[j] self.parameter_order.append("%s/%s" % (x, y)) self.param_pick[i, j] = len(self.parameter_order) - const_param = self.parameter_order.pop() + _ = self.parameter_order.pop() self.symmetric = False self.check_params_exist() @@ -74,30 +73,45 @@ every possible instantaneous substitution, except the last in each column. As general as can be while still having stationary motif probabilities""" - # @extend_docstring_from(_ContinuousSubstitutionModel) def __init__(self, alphabet, **kw): Stationary.__init__(self, alphabet, **kw) alphabet = self.get_alphabet() # as may be altered by recode_gaps etc. mask = self._instantaneous_mask N = len(alphabet) - self.param_pick = numpy.zeros([N, N], int) - self.parameter_order = [] - self.last_in_column = [] - for (d, (row, col)) in enumerate(zip(mask, mask.T)): + param_pick = numpy.zeros([N, N], int) + predicates = [] + last_in_column = [] + for d, (row, col) in enumerate(zip(mask, mask.T)): row = list(numpy.flatnonzero(row[d:]) + d) col = list(numpy.flatnonzero(col[d:]) + d) if col: - self.last_in_column.append((col.pop(), d)) + last_in_column.append((col.pop(), d)) else: assert not row + inst = [(d, j) for j in row] + [(i, d) for i in col] for (i, j) in inst: (x, y) = [alphabet[k] for k in [i, j]] - self.parameter_order.append("%s/%s" % (x, y)) - self.param_pick[i, j] = len(self.parameter_order) - const_param = self.parameter_order.pop() + predicates.append(MotifChange(x, y, forward_only=True)) + param_pick[i, j] = len(predicates) + + self.param_pick = param_pick + self.last_in_column = last_in_column + + predicate_masks, predicate_order = self._adapt_predicates(predicates) + self.predicate_masks = predicate_masks + self.parameter_order = [] + self.predicate_indices = [] + + for pred in predicate_order: + mask = predicate_masks[pred] + indices = numpy.nonzero(mask) + assert numpy.alltrue(mask[indices] == 1) + self.parameter_order.append(pred) + self.predicate_indices.append(indices) + self.symmetric = False self.check_params_exist() @@ -139,27 +153,33 @@ class NonReversibleNucleotide(Parametric): - """A nucleotide substitution model.""" + """Base non-reversible nucleotide substitution model.""" + @extend_docstring_from(Parametric.__init__) def __init__(self, *args, **kw): Parametric.__init__(self, moltype.DNA.alphabet, *args, **kw) class NonReversibleDinucleotide(Parametric): - """A dinucleotide substitution model.""" + """Base non-reversible dinucleotide substitution model.""" + @extend_docstring_from(Parametric.__init__) def __init__(self, *args, **kw): Parametric.__init__(self, moltype.DNA.alphabet, motif_length=2, *args, **kw) class NonReversibleTrinucleotide(Parametric): - """A trinucleotide substitution model.""" + """Base non-reversible trinucleotide substitution model.""" + @extend_docstring_from(Parametric.__init__) def __init__(self, *args, **kw): Parametric.__init__(self, moltype.DNA.alphabet, motif_length=3, *args, **kw) class NonReversibleCodon(_Codon, Parametric): + """Base non-reversible codon substitution model.""" + + @extend_docstring_from(Parametric.__init__) def __init__(self, alphabet=None, gc=None, **kw): if gc is not None: alphabet = moltype.CodonAlphabet(gc=gc) @@ -177,7 +197,7 @@ class NonReversibleProtein(Parametric): - """base protein substitution model.""" + """Base non-reversible protein substitution model.""" def __init__(self, with_selenocysteine=False, *args, **kw): alph = moltype.PROTEIN.alphabet diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/pairwise_distance_numba.py python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/pairwise_distance_numba.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/pairwise_distance_numba.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/pairwise_distance_numba.py 2021-10-12 00:17:34.000000000 +0000 @@ -5,7 +5,7 @@ __copyright__ = "Copyright 2007-2019, The Cogent Project" __credits__ = ["Gavin Huttley", "Yicheng Zhu", "Ben Kaehler", "Stephen Ma"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/parameter_controller.py python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/parameter_controller.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/parameter_controller.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/parameter_controller.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,4 +1,3 @@ -#!/usr/bin/env python """ This file defines a class for controlling the scope and heterogeneity of parameters involved in a maximum-likelihood based tree analysis. @@ -21,10 +20,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Andrew Butterfield", "Peter Maxwell", "Gavin Huttley", "Helen Lindsay"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.ed.au" __status__ = "Production" @@ -85,7 +84,7 @@ self.motifs = self._motifs = model.get_motifs() self._mprob_motifs = list(model.get_mprob_alphabet()) defn = self.make_likelihood_defn(**kw) - super(_LF, self).__init__(defn) + super(_LikelihoodParameterController, self).__init__(defn) self.set_default_param_rules() self.set_default_tree_parameter_rules() self.mprobs_from_alignment = motif_probs_from_align @@ -201,7 +200,7 @@ self.set_param_rule("expm", is_constant=True, value=expm) def make_calculator(self, **kw): - return super(_LF, self).make_calculator(**kw) + return super(_LikelihoodParameterController, self).make_calculator(**kw) def _process_scope_info( self, diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/predicate.py python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/predicate.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/predicate.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/predicate.py 2021-10-12 00:17:34.000000000 +0000 @@ -15,10 +15,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -99,8 +99,7 @@ return self.name def interpret(self, model): - subpred = self.subpredicate.interpret(model) - return subpred + return self.subpredicate.interpret(model) class _UnaryPredicate(predicate): @@ -221,7 +220,7 @@ return positions def test_motifs(self, from_motifs, to_motifs, x, y): - """"positions where both motifs patterns are found""" + """ "positions where both motifs patterns are found""" pre = self.test_motif(from_motifs, x) post = self.test_motif(to_motifs, y) return pre & post diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/simulate.py python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/simulate.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/simulate.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/simulate.py 2021-10-12 00:17:34.000000000 +0000 @@ -7,10 +7,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/solved_models_numba.py python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/solved_models_numba.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/solved_models_numba.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/solved_models_numba.py 2021-10-12 00:17:34.000000000 +0000 @@ -9,7 +9,7 @@ __copyright__ = "Copyright 2007-2019, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Stephen Ma"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/solved_models.py python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/solved_models.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/solved_models.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/solved_models.py 2021-10-12 00:17:34.000000000 +0000 @@ -17,10 +17,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -64,13 +64,13 @@ assert_allclose(P1, P2) -def _solved_nucleotide(name, predicates, rate_matrix_required=True, **kw): +def _solved_nucleotide(predicates, rate_matrix_required=True, **kw): if _solved_models is not None and not rate_matrix_required: klass = PredefinedNucleotide else: klass = TimeReversibleNucleotide kw["model_gaps"] = False - return klass(name=name, predicates=predicates, **kw) + return klass(predicates=predicates, **kw) kappa_y = MotifChange("T", "C").aliased("kappa_y") @@ -81,16 +81,21 @@ def TN93(**kw): """Tamura and Nei 1993 model""" kw["recode_gaps"] = True - return _solved_nucleotide("TN93", [kappa_y, kappa_r], **kw) + kw["name"] = "TN93" + return _solved_nucleotide([kappa_y, kappa_r], **kw) def HKY85(**kw): """Hasegawa, Kishino and Yano 1985 model""" kw["recode_gaps"] = True - return _solved_nucleotide("HKY85", [kappa], **kw) + # this function called by others, so we don't overwrite name if it exists + kw["name"] = kw.get("name", "HKY85") + return _solved_nucleotide([kappa], **kw) def F81(**kw): """Felsenstein's 1981 model""" kw["recode_gaps"] = True - return _solved_nucleotide("F81", [], **kw) + # this function called by others, so we don't overwrite name if it exists + kw["name"] = kw.get("name", "F81") + return _solved_nucleotide([], **kw) diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/substitution_calculation.py python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/substitution_calculation.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/substitution_calculation.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/substitution_calculation.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,36 +1,25 @@ #!/usr/bin/env python import warnings -import numpy +from numpy.linalg import LinAlgError from cogent3.maths.matrix_exponentiation import ( CheckedExponentiator, FastExponentiator, - LinAlgError, PadeExponentiator, ) from cogent3.recalculation.definition import ( - CalcDefn, CalculationDefn, - CallDefn, - ConstDefn, - GammaDefn, - MonotonicDefn, - NonParamDefn, - PartitionDefn, PositiveParamDefn, - ProductDefn, RatioParamDefn, - SelectForDimension, - WeightedPartitionDefn, ) __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/substitution_model.py python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/substitution_model.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/substitution_model.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/substitution_model.py 2021-10-12 00:17:34.000000000 +0000 @@ -30,7 +30,6 @@ >>> parameter_controller = model.make_likelihood_function(tree) """ -import inspect import json import warnings @@ -42,33 +41,32 @@ from cogent3.core import moltype from cogent3.evolve import motif_prob_model, parameter_controller, predicate -from cogent3.evolve.discrete_markov import PsubMatrixDefn from cogent3.evolve.likelihood_tree import make_likelihood_tree_leaf from cogent3.evolve.substitution_calculation import ( AlignmentAdaptDefn, + ExpDefn, + LengthDefn, +) +from cogent3.evolve.substitution_calculation import ( + SubstitutionParameterDefn as ParamDefn, +) +from cogent3.recalculation.definition import ( CalcDefn, CallDefn, ConstDefn, - ExpDefn, GammaDefn, - LengthDefn, MonotonicDefn, NonParamDefn, PartitionDefn, ProductDefn, - RateDefn, SelectForDimension, + WeightedPartitionDefn, ) -from cogent3.evolve.substitution_calculation import ( - SubstitutionParameterDefn as ParamDefn, -) -from cogent3.evolve.substitution_calculation import WeightedPartitionDefn -from cogent3.maths.optimisers import ParameterOutOfBoundsError from cogent3.util.misc import extend_docstring_from, get_object_provenance __author__ = "Peter Maxwell, Gavin Huttley and Andrew Butterfield" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __contributors__ = [ "Gavin Huttley", "Andrew Butterfield", @@ -79,7 +77,7 @@ "Von Bing Yap", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -144,31 +142,37 @@ name="", motifs=None, ): - # subclasses can extend this incomplete docstring """ - - alphabet: - - alphabet - An Alphabet object - - motif_length: Use a tuple alphabet based on 'alphabet'. - - motifs: Use a subalphabet that only contains those motifs. - - model_gaps: Whether the gap motif should be included as a state. - - recode_gaps: Whether gaps in an alignment should be treated as an - ambiguous state instead. - - Motif Probability: - - motif_probs: Dictionary of probabilities. - - equal_motif_probs: Flag to set alignment motif probs equal. - - motif_probs_alignment: An alignment from which motif probs are set. - - If none of these options are set then motif probs will be derived - from the data: ie the particular alignment provided later. - - - optimise_motif_probs: Treat like other free parameters. Any values - set by the other motif_prob options will be used as initial values. - - - mprob_model: 'tuple', 'conditional', 'monomer' or 'monomers' to specify how + Parameters + ---------- + alphabet + An Alphabet object + motif_probs + Dictionary of probabilities. + optimise_motif_probs: bool + Treat like other free parameters. Any values set by the other + motif_prob options will be used as initial values. + equal_motif_probs: bool + Flag to set alignment motif probs equal. + motif_probs_from_data: bool + Get motif probabilities from data provided to likelihood function. + motif_probs_alignment + An alignment from which motif probs are set. + mprob_model: str + 'tuple', 'conditional', 'monomer' or 'monomers' to specify how tuple-alphabet (including codon) motif probs are used. - + model_gaps: bool + Whether the gap motif should be included as a state. + recode_gaps: bool + Whether gaps in an alignment should be treated as an ambiguous + state instead. + motif_length: int + Based on 'alphabet', uses a tuple alphabet where individual words + have motif_length number of characters. + name: str + Name of this model + motifs + Use a subalphabet that only contains those motifs. """ d = locals() exclude = ("self", "__class__") @@ -426,8 +430,7 @@ def non_zero_coords(matrix): dim = matrix.shape[0] - coords = [(i, j) for i in range(dim) for j in range(dim) if matrix[i, j] != 0] - return coords + return [(i, j) for i in range(dim) for j in range(dim) if matrix[i, j] != 0] class _ContinuousSubstitutionModel(_SubstitutionModel): @@ -462,11 +465,15 @@ **kw, ): """ - - with_rate: Add a 'rate' parameter which varies by bin. - - ordered_param: name of a single parameter which distinguishes any bins. - - distribution: choices of 'free' or 'gamma' or an instance of some - distribution. Could probably just deprecate free - - partitioned_params: names of params to be partitioned across bins + with_rate: bool + Add a 'rate' parameter which varies by bin. + ordered_param: str + name of a single parameter which distinguishes any bins. + distribution: str + choices of 'free' or 'gamma' or an instance of some distribution + partitioned_params + names of params to be partitioned across bins + kw """ _SubstitutionModel.__init__(self, alphabet, **kw) @@ -572,13 +579,11 @@ for m in self.predicate_masks.values(): mats += m ref_mask = self._instantaneous_mask - mats - ref_cells = set(non_zero_coords(ref_mask)) - return ref_cells + return set(non_zero_coords(ref_mask)) def get_param_matrix_coords(self, include_ref_cell=False): """returncoordinates for every predicate""" dim = len(self.alphabet) - mats = numpy.zeros((dim, dim), dtype=int) param_coords = {} for key, m in self.predicate_masks.items(): coords = [(i, j) for i in range(dim) for j in range(dim) if m[i, j] != 0] @@ -594,8 +599,7 @@ Q = CalcDefn(self.calcQ, name="Q")(word_probs, mprobs_matrix, *rate_params) expm = NonParamDefn("expm") exp = ExpDefn(expm) - Qd = CallDefn(exp, Q, name="Qd") - return Qd + return CallDefn(exp, Q, name="Qd") def _make_bin_param_defn(self, edge_par_name, bin_par_name, bprob_defn): # if no ordered param defined, behaves as old, everything indexed by @@ -638,10 +642,9 @@ def make_psubs_defn(self, bprobs, word_probs, mprobs_matrix, rate_params): distance = self.make_distance_defn(bprobs) - P = self.make_continuous_psub_defn( + return self.make_continuous_psub_defn( word_probs, mprobs_matrix, distance, rate_params ) - return P def make_distance_defn(self, bprobs): length = LengthDefn() @@ -656,8 +659,7 @@ self, word_probs, mprobs_matrix, distance, rate_params ): Qd = self.make_Qd_defn(word_probs, mprobs_matrix, rate_params) - P = CallDefn(Qd, distance, name="psubs") - return P + return CallDefn(Qd, distance, name="psubs") class StationaryQ: @@ -709,8 +711,11 @@ @extend_docstring_from(_ContinuousSubstitutionModel.__init__) def __init__(self, alphabet, predicates=None, scales=None, **kw): """ - - predicates: a dict of {name:predicate}. See cogent3.evolve.predicate - - scales: scale rules, dict with predicates + predicates: dict + a dict of {name:predicate}. See cogent3.evolve.predicate + scales: dict + scale rules, dict with predicates + kw """ self._canned_predicates = None _ContinuousSubstitutionModel.__init__(self, alphabet, **kw) @@ -893,13 +898,16 @@ class Stationary(StationaryQ, Parametric): + @extend_docstring_from(Parametric.__init__) def __init__(self, *args, **kw): + """ """ Parametric.__init__(self, *args, **kw) class TimeReversible(Stationary): + @extend_docstring_from(Stationary.__init__) def __init__(self, *args, **kw): - """""" + """ """ Stationary.__init__(self, *args, **kw) if not self.symmetric: raise ValueError( @@ -1015,6 +1023,7 @@ class TimeReversibleCodon(_Codon, _TimeReversibleNucleotide): """Core substitution model for codons""" + @extend_docstring_from(_TimeReversibleNucleotide.__init__) def __init__(self, alphabet=None, gc=None, **kw): if gc is not None: alphabet = moltype.CodonAlphabet(gc=gc) diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/format/alignment.py python-cogent-2021.10.12a1+dfsg/src/cogent3/format/alignment.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/format/alignment.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/format/alignment.py 2021-10-12 00:17:34.000000000 +0000 @@ -11,10 +11,10 @@ __author__ = "Peter Maxwell and Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Thomas La"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/format/bedgraph.py python-cogent-2021.10.12a1+dfsg/src/cogent3/format/bedgraph.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/format/bedgraph.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/format/bedgraph.py 2021-10-12 00:17:34.000000000 +0000 @@ -2,10 +2,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "alpha" @@ -57,7 +57,7 @@ def raise_invalid_vals(key, val): - """raises RuntimeError on invalid values for keys """ + """raises RuntimeError on invalid values for keys""" if key not in valid_values: return True if not str(val) in valid_values[key]: diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/format/clustal.py python-cogent-2021.10.12a1+dfsg/src/cogent3/format/clustal.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/format/clustal.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/format/clustal.py 2021-10-12 00:17:34.000000000 +0000 @@ -8,16 +8,16 @@ __author__ = "Jeremy Widmann" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Jeremy Widmann"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Jeremy Widmann" __email__ = "jeremy.widmann@colorado.edu" __status__ = "Development" -def clustal_from_alignment(aln, interleave_len=None, wrap=None): +def clustal_from_alignment(aln, wrap=None): """ Parameters ---------- @@ -31,12 +31,6 @@ ------- Returns a string in Clustal format """ - if interleave_len is not None: - from cogent3.util.warning import deprecated - - deprecated("argument", "interleave_len", "wrap", "2021.6") - wrap = interleave_len if wrap == 60 else wrap - if not aln: return "" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/format/fasta.py python-cogent-2021.10.12a1+dfsg/src/cogent3/format/fasta.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/format/fasta.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/format/fasta.py 2021-10-12 00:17:34.000000000 +0000 @@ -6,10 +6,10 @@ __author__ = "Jeremy Widmann" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Jeremy Widmann", "Rob Knight", "Gavin Huttley", "Thomas La"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Jeremy Widmann" __email__ = "jeremy.widmann@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/format/gde.py python-cogent-2021.10.12a1+dfsg/src/cogent3/format/gde.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/format/gde.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/format/gde.py 2021-10-12 00:17:34.000000000 +0000 @@ -6,10 +6,10 @@ __author__ = "Thomas La" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Jeremy Widmann", "Rob Knight", "Gavin Huttley", "Thomas La"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Thomas La" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/format/__init__.py python-cogent-2021.10.12a1+dfsg/src/cogent3/format/__init__.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/format/__init__.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/format/__init__.py 2021-10-12 00:17:34.000000000 +0000 @@ -18,7 +18,7 @@ ] __author__ = "" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Jeremy Widmann", "Gavin Huttley", @@ -29,7 +29,7 @@ "Marcin Cieslik", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/format/nexus.py python-cogent-2021.10.12a1+dfsg/src/cogent3/format/nexus.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/format/nexus.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/format/nexus.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,16 +1,14 @@ -#!/usr/bin/env python - __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" -def nexus_from_alignment(aln, seq_type, wrap=50, interleave_len=None): +def nexus_from_alignment(aln, seq_type, wrap=50): """returns a nexus formatted string Parameters @@ -19,14 +17,7 @@ dna, rna, or protein wrap the line width - interleave_len - will be replaced by wrap in version 2021.6 - """ - if interleave_len is not None: - cogent3.util.warning.deprecated("argument", "interleave_len", "wrap", "2021.6") - wrap = interleave_len if wrap == 50 else wrap - if aln.is_ragged(): raise ValueError( "Sequences in alignment are not all the same " diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/format/paml.py python-cogent-2021.10.12a1+dfsg/src/cogent3/format/paml.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/format/paml.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/format/paml.py 2021-10-12 00:17:34.000000000 +0000 @@ -6,10 +6,10 @@ __author__ = "Thomas La" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Jeremy Widmann", "Rob Knight", "Gavin Huttley", "Thomas La"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Thomas La" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/format/phylip.py python-cogent-2021.10.12a1+dfsg/src/cogent3/format/phylip.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/format/phylip.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/format/phylip.py 2021-10-12 00:17:34.000000000 +0000 @@ -6,10 +6,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley", "Thomas La"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/format/table.py python-cogent-2021.10.12a1+dfsg/src/cogent3/format/table.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/format/table.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/format/table.py 2021-10-12 00:17:34.000000000 +0000 @@ -15,10 +15,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley", "Peter Maxwell", "Matthew Wakefield", "Jeremy Widmann"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -60,7 +60,7 @@ def _merged_cell_text_wrap(text, max_line_length, space): - """ left justify wraps text into multiple rows""" + """left justify wraps text into multiple rows""" max_line_width = max_line_length - (2 * space) if len(text) < max_line_length: return [text] @@ -157,7 +157,6 @@ data += [thead] + row + [""] formatted_rows = [] - td = formatted("td", "") for ridx, row in enumerate(rows): new = [formatted("tr", "")] for cidx, cell in row_iterator(row): diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/format/util.py python-cogent-2021.10.12a1+dfsg/src/cogent3/format/util.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/format/util.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/format/util.py 2021-10-12 00:17:34.000000000 +0000 @@ -3,10 +3,10 @@ """ __author__ = "Thomas La" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Jeremy Widmann", "Rob Knight", "Gavin Huttley", "Thomas La"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/__init__.py python-cogent-2021.10.12a1+dfsg/src/cogent3/__init__.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/__init__.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/__init__.py 2021-10-12 00:17:34.000000000 +0000 @@ -44,10 +44,11 @@ from cogent3.util.misc import get_format_suffixes, open_ from cogent3.util.table import Table as _Table from cogent3.util.table import cast_str_to_array +from cogent3.util.warning import deprecated __author__ = "" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Gavin Huttley", "Rob Knight", @@ -71,13 +72,11 @@ "Daniel McDonald", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" -from cogent3.util.warning import deprecated, discontinued - if sys.version_info < (3, 6): PY_VERSION = ".".join([str(n) for n in sys.version_info]) @@ -324,7 +323,6 @@ legend="", missing_data="", column_templates=None, - dtype=None, data_frame=None, format="simple", **kwargs, @@ -368,17 +366,13 @@ output format when using str(Table) """ - if any([isinstance(a, str) for a in (header, data)]): - raise TypeError(f"str type invalid, if its a path use load_table()") + if any(isinstance(a, str) for a in (header, data)): + raise TypeError("str type invalid, if it's a path use load_table()") if "index" in kwargs: deprecated("argument", "index", "index_name", "2021.11") index_name = kwargs.pop("index", index_name) - if "dtype" in kwargs: - kwargs.pop("dtype") - discontinued("argument", "dtype", "2021.04") - data = kwargs.get("rows", data) if data_frame is not None: from pandas import DataFrame @@ -388,13 +382,12 @@ data = {c: data_frame[c].to_numpy() for c in data_frame} - table = _Table( + return _Table( header=header, data=data, digits=digits, row_order=row_order, title=title, - dtype=dtype, column_templates=column_templates, space=space, missing_data=missing_data, @@ -405,8 +398,6 @@ format=format, ) - return table - def load_table( filename, @@ -479,17 +470,12 @@ deprecated("argument", "index", "index_name", "2021.11") index_name = kwargs.pop("index", index_name) - if "dtype" in kwargs: - kwargs.pop("dtype") - discontinued("argument", "dtype", "2021.04") - sep = sep or kwargs.pop("delimiter", None) file_format, compress_format = get_format_suffixes(filename) if file_format == "json": return load_from_json(filename, (_Table,)) - - if file_format in ("pickle", "pkl"): + elif file_format in ("pickle", "pkl"): f = open_(filename, mode="rb") loaded_table = pickle.load(f) f.close() @@ -497,14 +483,19 @@ r.__setstate__(loaded_table) return r - if not reader: + if reader: + with open_(filename, newline=None) as f: + data = [row for row in reader(f)] + header = data[0] + data = {column[0]: column[1:] for column in zip(*data)} + else: if file_format == "csv": sep = sep or "," elif file_format == "tsv": sep = sep or "\t" header, rows, loaded_title, legend = load_delimited( - filename, delimiter=sep, limit=limit, **kwargs + filename, sep=sep, limit=limit, **kwargs ) if skip_inconsistent: num_fields = len(header) @@ -517,12 +508,6 @@ title = title or loaded_title data = {column[0]: column[1:] for column in zip(header, *rows)} - else: - f = open_(filename, newline=None) - data = [row for row in reader(f)] - header = data[0] - data = {column[0]: column[1:] for column in zip(*data)} - f.close() for key, value in data.items(): data[key] = cast_str_to_array(value, static_type=static_column_types) diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/maths/distance_transform.py python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/distance_transform.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/maths/distance_transform.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/distance_transform.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,4 +1,3 @@ -#!/usr/bin/env python """ matrix based distance metrics, and related coordinate transforms functions to compute distance matrices row by row from abundance matrices, @@ -62,44 +61,21 @@ argsort, array, asarray, - geterr, isfinite, isnan, logical_and, - logical_not, - logical_or, - logical_xor, mean, min, multiply, nan_to_num, - nonzero, - ravel, - seterr, - shape, - sqrt, - square, - std, - sum, - take, - where, - zeros, ) +from numpy import ndim as rank +from numpy import ravel, seterr, shape, sqrt, square, sum, take, where, zeros from numpy.linalg import norm -try: - from numpy import ndim as rank -except ImportError: - from numpy import rank - -# any, all from numpy override built in any, all, preventing: -# ValueError: The truth value of an array with more than one element is -# ambiguous. Use a.any() or a.all() - - __author__ = "Justin Kuczynski" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Rob Knight", "Micah Hamady", @@ -110,7 +86,7 @@ "Greg Caporaso", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Justin Kuczynski" __email__ = "justinak@gmail.com" __status__ = "Prototype" @@ -151,8 +127,7 @@ """ m = array(m) row_norms = sqrt(sum(square(m), axis=1, keepdims=True)) - result = m / row_norms - return result + return m / row_norms def trans_chisq(m): @@ -185,8 +160,7 @@ """ m = array(m) row_sums = sum(m, axis=1, keepdims=True) - result = m / row_sums - return result + return m / row_sums def trans_hellinger(m): @@ -200,8 +174,7 @@ """ m = array(m) row_sums = sum(m, axis=1, keepdims=True) - result = sqrt(m / row_sums) - return result + return sqrt(m / row_sums) def dist_bray_curtis(datamtx, strict=True): @@ -360,7 +333,6 @@ r1 = datamtx[i] for j in range(i): r2 = datamtx[j] - dist = 0.0 net = abs(r1 - r2) / (r1 + r2) num_nonzeros = 0 for k in range(r1.size): @@ -577,7 +549,6 @@ r1 = datamtx[i] for j in range(i): r2 = datamtx[j] - rowdiff = r2 - r1 dist = sum(abs(r1 - r2) / coldiffs) dists[i, j] = dists[j, i] = dist @@ -926,10 +897,7 @@ return zeros((0, 0), "d") rowmeans = mean(datamtx, axis=1) - rowstds = std(datamtx, axis=1) - dists = zeros((numrows, numrows), "d") - n = float(numrows) for i in range(numrows): r1 = datamtx[i, :] @@ -1487,7 +1455,7 @@ if __name__ == "__main__": - """ just a test run""" + """just a test run""" matrix1 = array( [ [10, 8, 4, 1], diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/maths/geometry.py python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/geometry.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/maths/geometry.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/geometry.py 2021-10-12 00:17:34.000000000 +0000 @@ -2,37 +2,29 @@ """Code for geometric operations, e.g. distances and center of mass.""" from numpy import ( - allclose, any, append, array, - c_, cos, delete, - dot, exp, - identity, insert, linalg, log, mean, min, - minimum, newaxis, nonzero, - ones, pi, sin, sqrt, sum, take, - vstack, ) -from numpy.linalg import norm __author__ = "Sandra Smit" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Sandra Smit", "Gavin Huttley", @@ -42,7 +34,7 @@ "Helmut Simon", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Sandra Smit" __email__ = "sandra.smit@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/maths/__init__.py python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/__init__.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/maths/__init__.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/__init__.py 2021-10-12 00:17:34.000000000 +0000 @@ -17,7 +17,7 @@ __author__ = "" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Gavin Huttley", "Peter Maxwell", @@ -29,7 +29,7 @@ "Ben Kaehler", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/maths/markov.py python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/markov.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/maths/markov.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/markov.py 2021-10-12 00:17:34.000000000 +0000 @@ -8,10 +8,10 @@ Float = numpy.core.numerictypes.sctype2char(float) __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/maths/matrix_exponential_integration.py python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/matrix_exponential_integration.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/maths/matrix_exponential_integration.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/matrix_exponential_integration.py 2021-10-12 00:17:34.000000000 +0000 @@ -19,7 +19,7 @@ __copyright__ = "Copyright 2007-2014, The Cogent Project" __credits__ = ["Ben Kaehler", "Von Bing Yap", "Gavin Huttley", "Ananias Iliadis"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Ben Kaehler" __email__ = "benjamin.kaehler@anu.edu.au" __status__ = "Production" @@ -48,7 +48,7 @@ exponentiator -- Exponentiator used in Van Loan method. Defaults to RobustEstimator. """ - self.Q = Q + super(VanLoanIntegratingExponentiator, self).__init__(Q) Qdim = len(Q) if R is None: self.R = identity(Qdim) @@ -76,7 +76,7 @@ """ Q -- a diagonisable matrix. """ - self.Q = Q + super(VonBingIntegratingExponentiator, self).__init__(Q) self.roots, self.evT = eig(Q) self.evI = inv(self.evT.T) # Remove following check if performance is a concern diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/maths/matrix_exponentiation.py python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/matrix_exponentiation.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/maths/matrix_exponentiation.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/matrix_exponentiation.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,4 +1,3 @@ -#!/usr/bin/env python # 4 implementations of P = exp(Q*t) # APIs along the lines of: # exponentiator = WhateverExponenentiator(Q or Q derivative(s)) @@ -14,14 +13,14 @@ import numpy -from numpy.linalg import LinAlgError, eig, inv, solve +from numpy.linalg import eig, inv, solve __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Zongzhi Liu"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/maths/matrix_logarithm.py python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/matrix_logarithm.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/maths/matrix_logarithm.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/matrix_logarithm.py 2021-10-12 00:17:34.000000000 +0000 @@ -8,7 +8,7 @@ from numpy import allclose, argmin, array, diag, dot, exp, eye from numpy import inner as innerproduct -from numpy import isclose, log, ones, pi, transpose, zeros +from numpy import isclose, log, ones, pi, zeros from numpy.linalg import eig as eigenvectors from numpy.linalg import inv as inverse from numpy.linalg import norm @@ -18,7 +18,7 @@ __copyright__ = "Copyright 2007-2014, The Cogent Project" __credits__ = ["Rob Knight", "Gavin Huttley", "Von Bing Yap", "Ben Kaehler"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/maths/measure.py python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/measure.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/maths/measure.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/measure.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,4 +1,6 @@ -from numpy import array, diag, diagonal, dot, eye, log, sqrt +from math import fsum + +from numpy import array, diag, diagonal, dot, eye, isclose, log, sqrt from numpy.linalg import slogdet from numpy.testing import assert_allclose, assert_equal @@ -6,14 +8,12 @@ from cogent3.maths.util import safe_p_log_p, validate_freqs_array -from .util import safe_log - __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -115,9 +115,17 @@ except ValueError as err: raise AssertionError("freqs not valid") from err - H_mn = safe_p_log_p(freqs1 / 2 + freqs2 / 2).sum() - mn_H = sum([sum(i) for i in map(safe_p_log_p, [freqs1, freqs2])]) / 2 - return H_mn - mn_H + H_mn = fsum(safe_p_log_p(freqs1 / 2 + freqs2 / 2)) + mn_H = fsum([fsum(i) for i in map(safe_p_log_p, [freqs1, freqs2])]) / 2 + jsd_ = H_mn - mn_H + if jsd_ < 0 and isclose(jsd_, 0, atol=1e-10): + jsd_ = 0 + elif jsd_ < 0: + raise ArithmeticError( + f"{jsd_} is negative and below defined precision threshold" + ) + + return jsd_ @cogent3.util.misc.extend_docstring_from(jsd) diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/maths/optimisers.py python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/optimisers.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/maths/optimisers.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/optimisers.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,6 +1,3 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - """Local or Global-then-local optimisation with progress display """ @@ -18,10 +15,10 @@ LocalOptimiser = Powell __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Andrew Butterfield", "Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -109,7 +106,7 @@ if not acceptable_inf(result): warnings.warn("Non-finite f %s from %s" % (result, x)) raise ParameterOutOfBoundsError - except (ArithmeticError, ParameterOutOfBoundsError) as detail: + except (ArithmeticError, ParameterOutOfBoundsError): result = out_of_bounds_value return result @@ -184,17 +181,7 @@ try: # Global optimisation if do_global: - if 0 and not do_local: - warnings.warn( - "local=False causes the post-global optimisation local " - '"polishing" optimisation to be skipped entirely, which seems ' - "pointless, so its meaning may change to a simple boolean " - "flag: local or global." - ) - # It also needlessly complicates this function. - gend = 1.0 - else: - gend = 0.9 + gend = 0.9 callback = unsteadyProgressIndicator(ui.display, "Global", 0.0, gend) gtol = [tolerance, global_tolerance][do_local] opt = GlobalOptimiser(filename=filename, interval=interval) diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/maths/period_numba.py python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/period_numba.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/maths/period_numba.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/period_numba.py 2021-10-12 00:17:34.000000000 +0000 @@ -8,7 +8,7 @@ __copyright__ = "Copyright 2007-2019, The Cogent Project" __credits__ = ["Julien Epps", "Hua Ying", "Gavin Huttley", "Stephen Ma"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/maths/period.py python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/period.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/maths/period.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/period.py 2021-10-12 00:17:34.000000000 +0000 @@ -8,9 +8,7 @@ multiply, pi, polyval, - power, sqrt, - sum, zeros, ) @@ -18,10 +16,10 @@ __author__ = "Hua Ying, Julien Epps and Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Julien Epps", "Hua Ying", "Gavin Huttley", "Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -35,8 +33,7 @@ s = x[n] + coeff * s_prev - s_prev2 s_prev2 = s_prev s_prev = s - pwr = sqrt(s_prev2 ** 2 + s_prev ** 2 - coeff * s_prev2 * s_prev) - return pwr + return sqrt(s_prev2 ** 2 + s_prev ** 2 - coeff * s_prev2 * s_prev) def _ipdft_inner(x, X, W, ulim, N): # naive python @@ -217,8 +214,7 @@ def getNumStats(self): """the number of stats computed by this calculator""" - num = [1, 3][self._return_all] - return num + return [1, 3][self._return_all] def evaluate(self, x): if self.period is None: diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/maths/scipy_optimisers.py python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/scipy_optimisers.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/maths/scipy_optimisers.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/scipy_optimisers.py 2021-10-12 00:17:34.000000000 +0000 @@ -7,10 +7,10 @@ __author__ = "Peter Maxwell and Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -69,7 +69,7 @@ if tolerance is None: tolerance = 1e-6 - fval_last = fval = numpy.inf + fval_last = numpy.inf if len(xopt) == 0: return function(xopt), xopt diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/maths/scipy_optimize.py python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/scipy_optimize.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/maths/scipy_optimize.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/scipy_optimize.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,4 +1,3 @@ -#!/usr/bin/env python # We don't want to depend on the monolithic, fortranish, # Num-overlapping, mac-unfriendly SciPy. But this # module is too good to pass up. It has been lightly customised for @@ -23,14 +22,9 @@ import numpy -from numpy import absolute, asarray, atleast_1d, eye, isinf, sqrt, squeeze +from numpy import absolute, asarray, atleast_1d, eye, sqrt, squeeze -try: - import linesearch # from SciPy -except ImportError: - linesearch = None - # These have been copied from Numeric's MLab.py # I don't think they made the transition to scipy_core @@ -56,7 +50,7 @@ pymin = builtins.min pymax = builtins.max -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" _epsilon = sqrt(numpy.finfo(float).eps) @@ -154,8 +148,6 @@ tol2 = 2.0 * tol1 xmid = 0.5 * (a + b) if abs(x - xmid) < (tol2 - 0.5 * (b - a)): # check for convergence - xmin = x - fval = fx break infinities_present = [f for f in [fw, fv, fx] if numpy.isposinf(f)] if infinities_present or (abs(deltax) <= tol1): diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/maths/simannealingoptimiser.py python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/simannealingoptimiser.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/maths/simannealingoptimiser.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/simannealingoptimiser.py 2021-10-12 00:17:34.000000000 +0000 @@ -19,10 +19,10 @@ __author__ = "Andrew Butterfield and Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley", "Andrew Butterfield", "Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/maths/solve.py python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/solve.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/maths/solve.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/solve.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,10 +1,10 @@ #!/usr/bin/env python __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/contingency.py python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/contingency.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/contingency.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/contingency.py 2021-10-12 00:17:34.000000000 +0000 @@ -2,17 +2,16 @@ from numpy.random import shuffle from numpy.testing import assert_allclose -from cogent3.format.table import formatted_cells, rich_html, simple_format from cogent3.maths.stats import chisqprob -from cogent3.maths.stats.test import G_fit, G_ind -from cogent3.util.dict_array import DictArray, DictArrayTemplate +from cogent3.maths.stats.test import G_fit +from cogent3.util.dict_array import DictArray __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -280,7 +279,7 @@ else: pval = estimate_pval(self.observed.array, calc_chisq, num_reps=shuffled) title = "Chisq-test for independence" - result = TestResult( + return TestResult( self.observed, self.expected, self.residuals, @@ -290,7 +289,6 @@ pval, test_name=title, ) - return result def G_independence(self, pseudo_count=0, williams=True, shuffled=0): """performs the independence G test @@ -334,7 +332,7 @@ if amendments: title = f"{title} (with {amendments})" - result = TestResult( + return TestResult( obs, exp, self.residuals, @@ -344,7 +342,6 @@ pval, test_name=title, ) - return result def G_fit(self, williams=True): """performs the goodness-of-fit G test @@ -360,7 +357,7 @@ if williams: title = f"{title} (with Williams correction)" - result = TestResult( + return TestResult( self.observed, self.expected, self.residuals, @@ -370,15 +367,13 @@ pval, test_name=title, ) - return result def to_dict(self): - result = dict( + return dict( observed=self.observed.to_dict(), expected=self.expected.to_dict(), residuals=self.residuals.to_dict(), ) - return result class TestResult: diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/distribution.py python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/distribution.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/distribution.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/distribution.py 2021-10-12 00:17:34.000000000 +0000 @@ -30,10 +30,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight", "Sandra Smit", "Gavin Huttley", "Daniel McDonald"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/information_criteria.py python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/information_criteria.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/information_criteria.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/information_criteria.py 2021-10-12 00:17:34.000000000 +0000 @@ -2,10 +2,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/__init__.py python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/__init__.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/__init__.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/__init__.py 2021-10-12 00:17:34.000000000 +0000 @@ -20,7 +20,7 @@ __author__ = "" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Gavin Huttley", "Rob Knight", @@ -29,7 +29,7 @@ "Micah Hamady", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/jackknife.py python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/jackknife.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/jackknife.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/jackknife.py 2021-10-12 00:17:34.000000000 +0000 @@ -4,16 +4,38 @@ __author__ = "Anuj Pahwa, Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Anuj Pahwa", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" def index_gen(length): + """returns a callable + + Parameters + ---------- + length : int + length of series of integers to generate + + Returns + ------- + callable + + Notes + ----- + When invoked with an int, returns all indices except that provided. + The result can be used with a numpy.take(data, indices). + + >>> gen_series = index_gen(4) + >>> gen_series(0) + [1, 2, 3] + >>> gen_series(1) + [0, 2, 3] + """ data = tuple(range(length)) def gen(i): diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/kendall.py python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/kendall.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/kendall.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/kendall.py 2021-10-12 00:17:34.000000000 +0000 @@ -13,10 +13,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley", "Daniel McDonald"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -83,7 +83,6 @@ """ ranked = as_paired_ranks(x, y) n = len(ranked) - denom = n * (n - 1) / 2 con = 0 discor = 0 x_tied = 0 diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/ks.py python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/ks.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/ks.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/ks.py 2021-10-12 00:17:34.000000000 +0000 @@ -25,10 +25,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -74,7 +74,6 @@ m = 2 * k - 1 h = k - n * statistic H = ones(m ** 2, "d") - Q = zeros(m ** 2, "d") for i in range(m): for j in range(m): if i - j + 1 < 0: diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/number.py python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/number.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/number.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/number.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,5 +1,5 @@ from collections import Counter, defaultdict -from collections.abc import Mapping, MutableMapping +from collections.abc import MutableMapping import numpy @@ -7,10 +7,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -92,20 +92,17 @@ return result def expanded_values(self): - values = list(self.values()) - return values + return list(self.values()) def copy(self): data = self.to_dict().copy() - new = self.__class__(data) - return new + return self.__class__(data) def __setitem__(self, key, val): self.__dict__[key] = val def __getitem__(self, key): - val = 0 if key not in self.__dict__ else self.__dict__[key] - return val + return 0 if key not in self.__dict__ else self.__dict__[key] def __delitem__(self, key): del self.__dict__[key] @@ -127,7 +124,16 @@ return self def __repr__(self): - return repr(self.__dict__) + return f"{self.__class__.__name__}({repr(self.__dict__)})" + + def keys(self): + return list(self) + + def values(self): + return [self[k] for k in self] + + def items(self): + return [(k, self[k]) for k in self] def to_dict(self): return dict(self) @@ -136,8 +142,7 @@ """return values for these keys as a list""" if keys is None: keys = list(self) - result = [self[key] for key in keys] - return result + return [self[key] for key in keys] def to_array(self, keys=None): """return values for these keys as an array""" @@ -177,7 +182,7 @@ darr = DictArrayTemplate(*categories).wrap(numpy.zeros(shape, dtype=int)) for comb in product(*categories): indices = [[categories[i].index(c)] for i, c in enumerate(comb)] - darr.array[indices] = self[comb] + darr.array[tuple(indices)] = self[comb] return darr @@ -249,8 +254,7 @@ def to_freqs(self): """returns dict of {key: val/total, ..}""" - result = CategoryFreqs(self, total=self.sum) - return result + return CategoryFreqs(self, total=self.sum) def count(self, indices): """ @@ -277,8 +281,7 @@ raise IndexError(msg) counts[sub_key] += self[key] - result = self.__class__(data=counts) - return result + return self.__class__(data=counts) class CategoryFreqs(MutableMapping, SummaryStatBase): @@ -308,20 +311,17 @@ assert_allclose(self.sum, 1) def expanded_values(self): - values = list(self.values()) - return values + return list(self.values()) def copy(self): data = self.to_dict().copy() - new = self.__class__(data=data) - return new + return self.__class__(data=data) def __setitem__(self, key, val): self.__dict__[key] = val def __getitem__(self, key): - val = 0 if key not in self.__dict__ else self.__dict__[key] - return val + return 0 if key not in self.__dict__ else self.__dict__[key] def __delitem__(self, key): del self.__dict__[key] @@ -333,7 +333,16 @@ return iter(self.__dict__) def __repr__(self): - return repr(self.__dict__) + return f"{self.__class__.__name__}({repr(self.__dict__)})" + + def keys(self): + return list(self) + + def values(self): + return [self[k] for k in self] + + def items(self): + return [(k, self[k]) for k in self] def to_dict(self): return dict(self) @@ -342,8 +351,7 @@ """return values for these keys as a list""" if keys is None: keys = list(self) - result = [self[key] for key in keys] - return result + return [self[key] for key in keys] def to_array(self, keys=None): """return just these keys as an array""" @@ -358,8 +366,7 @@ def to_normalized(self): """returns rescaled self so sum is 1""" - result = CategoryFreqs(self, total=self.sum, assert_unity=True) - return result + return CategoryFreqs(self, total=self.sum, assert_unity=True) class NumberCounter(CategoryCounter): diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/period.py python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/period.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/period.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/period.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,4 +1,4 @@ -from random import choice, random, shuffle +from random import choice import numpy @@ -14,10 +14,10 @@ __author__ = "Hua Ying, Julien Epps and Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Julien Epps", "Hua Ying", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/special.py python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/special.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/special.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/special.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Translations of functions from Release 2.3 of the Cephes Math Library, (c) Stephen L. Moshier 1984, 1995. """ @@ -7,10 +6,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley", "Rob Knight", "Sandra Smit", "Daniel McDonald"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -458,18 +457,13 @@ def lgam(x): """Natural log of the gamma fuction: see Cephes docs for details""" - sgngam = 1 if x < -34: q = -x w = lgam(q) p = floor(q) if p == q: raise OverflowError("lgam returned infinity.") - i = p - if i & 1 == 0: - sgngam = -1 - else: - sgngam = 1 + z = q - p if z > 0.5: p += 1 @@ -479,6 +473,7 @@ raise OverflowError("lgam returned infinity.") z = LOGPI - log(z) - w return z + if x < 13: z = 1 p = 0 @@ -494,10 +489,7 @@ p += 1 u = x + p if z < 0: - sgngam = -1 z = -z - else: - sgngam = 1 if u == 2: return log(z) p -= 2 @@ -1121,11 +1113,11 @@ def incbi(aa, bb, yy0): """Incomplete beta inverse function. See Cephes for docs.""" # handle easy cases first - i = 0 if yy0 <= 0: return 0.0 elif yy0 >= 1.0: return 1.0 + # define inscrutable parameters x0 = 0.0 yl = 0.0 diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/test.py python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/test.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/test.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/test.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Provides standard statistical tests. Tests produce statistic and P-value. """ @@ -56,7 +55,7 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Gavin Huttley", "Rob Knight", @@ -69,7 +68,7 @@ "Michael Dwan", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -604,7 +603,7 @@ AttributeError, TypeError, FloatingPointError, - ) as e: + ): # invalidate if the sample sizes are wrong, the values aren't numeric or # aren't present, etc. result = (None, None) @@ -1172,7 +1171,6 @@ Sx = npsum(x) Sy = npsum(y) Sxx = npsum(x * x) - Syy = npsum(y * y) Sxy = npsum(x * y) det = Sxx * N - Sx * Sx return (Sxy * N - Sy * Sx) / det, (Sxx * Sy - Sx * Sxy) / det @@ -1376,7 +1374,7 @@ """ try: return chi_high(-2 * npsum(list(map(log, probs))), 2 * len(probs)) - except OverflowError as e: + except OverflowError: return 0.0 @@ -1660,17 +1658,14 @@ combined.sort(order="stat") prev = None start = None - ties = False T = 0.0 for index in range(combined.shape[0]): value = combined["stat"][index] - sample = combined["sample"][index] if value == prev and start is None: start = index continue if value != prev and start is not None: - ties = True ave_rank = _average_rank(start, index) num_tied = index - start + 1 T += num_tied ** 3 - num_tied diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/maths/util.py python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/util.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/maths/util.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/util.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,44 +1,8 @@ -#!/usr/bin/env python """Provides small utility functions for numpy arrays. """ -from operator import __getitem__ as getitem -from operator import mul - import numpy -from numpy import ( - arange, - argmin, - argsort, - array, - clip, - compress, - concatenate, - cumsum, - identity, - less, - log, - logical_not, - maximum, - min, - newaxis, - nonzero, - pi, - product, - put, - ravel, - repeat, - reshape, - searchsorted, - sort, - sqrt, - sum, - take, - trace, - where, - zeros, -) -from numpy.random import normal, randint +from numpy import array, clip, cumsum, searchsorted, sort, sum numerictypes = numpy.core.numerictypes.sctype2char @@ -47,10 +11,10 @@ err = numpy.seterr(divide="raise") __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight", "Sandra Smit", "Thomas La"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Development" @@ -222,3 +186,89 @@ result = data.sum(axis=axis) if not numpy.allclose(result[numpy.isnan(result) == False], 1): raise ValueError("invalid frequencies, sum(axis=1) is not equal to 1") + + +def ratios_to_proportions(total, params) -> list: + """Produces a list of N proportions from N-1 ratios and a total + + A recursive function that is the inverse of proportions_to_ratios. + + Paramters + --------- + total: int + The sum of `values` the array put into proportions_to_ratios + + params: sequence + The sequence output by proportions_to_ratios, with int values + between 0 and infinity + + Returns + ------- + sequence: The `values` , the array that was put into + proportions_to_ratios to get `params` + + Examples + -------- + >>> ratios_to_proportions(1.0, [3, 1, 1]) + [0.125, 0.125, 0.375, 0.375] + """ + + if len(params) == 0: + return [total] + + assert params[0] > 0, f"Ratios must be positive: {params[0]}" + half = (len(params) + 1) // 2 + part = 1.0 / (params[0] + 1.0) # ratio -> proportion + return ratios_to_proportions(total * part, params[1:half]) + ratios_to_proportions( + total * (1.0 - part), params[half:] + ) + + +def proportions_to_ratios(values) -> list: + """Produces a list of N-1 ratios from N proportions + + An invertible map that takes `values` an array of N numbers > 0 + whose sum is total and converts to an array of N-1 numbers between 0 and + infinity. + + Parameters + ---------- + values: sequence + A sequence of N ints, where the ints + have values between 0 and 1 (non inclusive). + + Raises + ------ + AssertionError Exception + Raises if there will be a negative or 0 value in the output array. + + Returns + ------- + list: returns a list of size N-1 ints, where the unts take values + between 0 and infinity. + + Notes: + ------ + The function recursively halves the list into left side and right + side (in that order) and for each halving divides the sum of the right half by the + sum of the left half and adds the resulting value to the returned list. + + Examples + ------- + >>> proportions_to_ratios([0.125, 0.125, 0.375, 0.375]) + [3, 1, 1] + + >>> proportions_to_ratios([0.1, 0.2, 0.9, 0]) + AssertionError + """ + if len(values) == 1: + return [] + half = len(values) // 2 + (num, denom) = (sum(values[half:]), sum(values[:half])) + assert num > 0 and denom > 0 + ratio = num / denom + return ( + [ratio] + + proportions_to_ratios(values[:half]) + + proportions_to_ratios(values[half:]) + ) diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/blast.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/blast.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/blast.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/blast.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,7 +1,5 @@ -#!/usr/bin/env python """Parsers for blast, psi-blast and blat. """ -from cogent3.parse.record import RecordError from cogent3.parse.record_finder import ( DelimitedRecordFinder, LabeledRecordFinder, @@ -10,10 +8,10 @@ __author__ = "Micah Hamady" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Micah Hamady", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Micah Hamady" __email__ = "hamady@colorado.edu" __status__ = "Prototype" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/blast_xml.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/blast_xml.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/blast_xml.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/blast_xml.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,22 +1,19 @@ -#!/usr/bin/env python """Parsers for XML output of blast, psi-blast and blat. """ __author__ = "Kristian Rother" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __contributors__ = ["Micah Hamady"] __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Kristian Rother" __email__ = "krother@rubor.de" __status__ = "Prototype" import xml.dom.minidom -from operator import eq as _eq from operator import gt as _gt -from operator import le as _le from operator import lt as _lt from cogent3.parse.blast import MinimalBlastParser9, MinimalPsiBlastParser9 @@ -261,19 +258,14 @@ # code below copied from BlastResult, unchanged. mp = parser(data, True) - for props, rec_data in mp: - - iteration = 1 - if self.ITERATION in props: - iteration = int(props[self.ITERATION]) - + for _, rec_data in mp: hits = [] # check if found any hits if len(rec_data) > 1: for h in rec_data[1:]: hits.append(dict(list(zip(rec_data[0], h)))) else: - hits.append(dict(list(zip(rec_data[0], ["" for x in rec_data[0]])))) + hits.append(dict(list(zip(rec_data[0], ["" for _ in rec_data[0]])))) # get blast version of query id query_id = hits[0][self.QUERY_ID] diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/cigar.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/cigar.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/cigar.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/cigar.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Parsers for the cigar format Cigar stands for Compact Idiosyncratic gapped Alignment Report and defines the sequence @@ -19,14 +18,14 @@ import re from cogent3 import DNA, make_aligned_seqs -from cogent3.core.location import LostSpan, Map, Span, _LostSpan +from cogent3.core.location import LostSpan, Map, Span __author__ = "Hua Ying" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Hua Ying"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Hua Ying" __email__ = "hua.ying@anu.edu.au" __status__ = "Production" @@ -75,8 +74,7 @@ if isinstance(seq, str): seq = moltype.make_seq(seq) map = cigar_to_map(cigar_text) - aligned_seq = seq.gapped_by_map(map) - return aligned_seq + return seq.gapped_by_map(map) def _slice_by_aln(map, left, right): @@ -165,5 +163,4 @@ data[seqname] = seq[seq_loc[0] : seq_loc[1]].gapped_by_map(m) else: data[seqname] = DNA.make_seq("-" * (aln_loc[1] - aln_loc[0])) - aln = make_aligned_seqs(data) - return aln + return make_aligned_seqs(data) diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/cisbp.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/cisbp.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/cisbp.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/cisbp.py 2021-10-12 00:17:34.000000000 +0000 @@ -8,7 +8,7 @@ __copyright__ = "Copyright 2007-2012, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/clustal.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/clustal.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/clustal.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/clustal.py 2021-10-12 00:17:34.000000000 +0000 @@ -22,10 +22,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight", "Sandra Smit", "Gavin Huttley", "Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Development" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/cogent3_json.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/cogent3_json.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/cogent3_json.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/cogent3_json.py 2021-10-12 00:17:34.000000000 +0000 @@ -8,10 +8,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley", "Stephen Ma"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Development" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/dialign.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/dialign.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/dialign.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/dialign.py 2021-10-12 00:17:34.000000000 +0000 @@ -6,10 +6,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/ebi.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/ebi.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/ebi.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/ebi.py 2021-10-12 00:17:34.000000000 +0000 @@ -3,7 +3,7 @@ """ import sys -from pprint import pformat, pprint +from pprint import pprint from cogent3.core.sequence import Sequence from cogent3.parse.record import FieldError, RecordError @@ -11,13 +11,12 @@ DelimitedRecordFinder, LabeledRecordFinder, TailedRecordFinder, - is_empty, ) -from cogent3.util.misc import NestedSplitter, curry, identity, list_flatten +from cogent3.util.misc import NestedSplitter, curry, list_flatten __author__ = "Zongzhi Liu and Sandra Smit" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Zongzhi Liu", "Sandra Smit", @@ -26,7 +25,7 @@ "Daniel McDonald", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Zongzhi Liu" __email__ = "zongzhi.liu@gmail.com" __status__ = "Development" @@ -254,7 +253,7 @@ try: dict(pairs) # catch error for any not splitted pair. - except ValueError as e: # dictionary update sequence element #1 has length 1; + except ValueError: # dictionary update sequence element #1 has length 1; if strict: raise ValueError("e\nFailed to get a dict from pairs: %s" % pairs) else: @@ -270,8 +269,7 @@ if len(v) > 1: pairs[i][1] = v - result = pairs_to_dict(pairs, dict_mode) - return result + return pairs_to_dict(pairs, dict_mode) def mapping_parser(line, fields, delimiters=None, flatten=list_flatten): @@ -530,8 +528,7 @@ """ lines = labeloff(lines) keyvalues = list(map(dr_itemparser, period_tail_finder(lines))) - result = pairs_to_dict(keyvalues, "always_multi_value") - return result + return pairs_to_dict(keyvalues, "always_multi_value") def dr_itemparser(lines): @@ -822,7 +819,7 @@ desc = description.rstrip(" )") try: mutation, comment = desc.split(mutation_comment_delimiter, 1) - except ValueError as e: # too many values to unpack + except ValueError: # too many values to unpack mutation, comment = desc, "" # split mutation into mut_from, mut_to @@ -830,7 +827,7 @@ mutation_delimiter = "->" try: mut_from, mut_to = list(map(strip, mutation.split(mutation_delimiter, 1))) - except ValueError as e: # too many values to unpack + except ValueError: # too many values to unpack mut_from, mut_to = mutation, "" # replace desc in fields with mut_from, mut_to and comment to get the @@ -863,8 +860,7 @@ """ desc_id_dict = ft_id_parser(description) desc = desc_id_dict.pop("Description") - result = dict(desc_id_dict, **ft_mutation_parser(desc)) - return result + return dict(desc_id_dict, **ft_mutation_parser(desc)) ft_description_parsers = { @@ -1504,7 +1500,6 @@ "KW": kw_parser, "FT": ft_parser, "SQ": sq_parser, - "XX": None, "PR": pr_parser, "XX": None, "FH": None, @@ -1545,7 +1540,7 @@ sequence = seq_constructor(sequence) try: header = header_constructor(header_dict, strict=strict) - except (RecordError, FieldError, ValueError) as e: + except (RecordError, FieldError, ValueError): if strict: #!! just raise is better than raise RecordError raise # RecordError, str(e) diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/fasta.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/fasta.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/fasta.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/fasta.py 2021-10-12 00:17:34.000000000 +0000 @@ -14,10 +14,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight", "Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Development" @@ -104,8 +104,7 @@ (lo, hi) = (hi, lo) else: assert strand == "+" - name = "%s:%s:%s-%s" % (sp, contig, lo, hi) - return name + return "%s:%s:%s-%s" % (sp, contig, lo, hi) def is_xmfa_blank_or_comment(x): @@ -155,7 +154,7 @@ try: name, info = info_maker(label) # will raise exception if bad yield name, seq_maker(seq, name=name, info=info) - except Exception as e: + except Exception: raise RecordError( "Sequence construction failed on record with label %s" % label ) @@ -164,7 +163,7 @@ try: name, info = info_maker(label) yield (name, seq_maker(seq, name=name, info=info)) - except Exception as e: + except Exception: continue diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/gbseq.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/gbseq.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/gbseq.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/gbseq.py 2021-10-12 00:17:34.000000000 +0000 @@ -9,10 +9,10 @@ __author__ = "Matthew Wakefield" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Matthew Wakefield", "Peter Maxwell", "Gavin Huttley", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Matthew Wakefield" __email__ = "wakefield@wehi.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/gcg.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/gcg.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/gcg.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/gcg.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,10 +1,8 @@ -#!/usr/bin/env python - __author__ = "Matthew Wakefield" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Matthew Wakefield", "Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Matthew Wakefield" __email__ = "wakefield@wehi.edu.au" __status__ = "Production" @@ -14,11 +12,10 @@ def MsfParser(f): """Read sequences from a msf format file""" - alignmentdict = {} # parse optional header # parse optional text information # file header and sequence header are seperated by a line ending in '..' - line = f.readline().strip() + _ = f.readline() for line in f: line = line.strip() if line.endswith(".."): @@ -36,23 +33,23 @@ sequences = {} for line in f: line = line.strip().split() - if line and line[0] in sequences: - sequences[line[0]] += "".join(line[1:]) - elif line and line[0] in seqinfo: - sequences[line[0]] = "".join(line[1:]) + if line: + if line[0] in sequences: + sequences[line[0]] += "".join(line[1:]) + elif line[0] in seqinfo: + sequences[line[0]] = "".join(line[1:]) # consistency check if len(sequences) != len(seqinfo): warnings.warn( "Number of loaded seqs[%s] not same as " "expected[%s]." % (len(sequences), len(seqinfo)) ) - for name in sequences: - if len(sequences[name]) != seqinfo[name]: + for name, value_ in sequences.items(): + if len(value_) != seqinfo[name]: warnings.warn( "Length of loaded seqs [%s] is [%s] not " "[%s] as expected." % (name, len(sequences[name]), seqinfo[name]) ) # yield sequences - for name in sequences: - yield (name, sequences[name]) + yield from sequences.items() diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/genbank.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/genbank.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/genbank.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/genbank.py 2021-10-12 00:17:34.000000000 +0000 @@ -11,10 +11,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight", "Peter Maxwell", "Matthew Wakefield", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/gff.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/gff.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/gff.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/gff.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,7 +1,7 @@ #!/usr/bin/env python __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Peter Maxwell", "Matthew Wakefield", @@ -9,7 +9,7 @@ "Christopher Bradley", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -107,8 +107,7 @@ name = attributes[attributes.find('"') + 1 :] if '"' in name: name = name[: name.find('"')] - attr_dict = {"ID": name, "Info": attributes} - return attr_dict + return {"ID": name, "Info": attributes} def parse_attributes_gff3(attributes, span): diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/greengenes.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/greengenes.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/greengenes.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/greengenes.py 2021-10-12 00:17:34.000000000 +0000 @@ -11,10 +11,10 @@ __author__ = "Daniel McDonald" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Daniel McDonald"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Daniel McDonald" __email__ = "daniel.mcdonald@colorado.edu" __status__ = "Prototype" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/__init__.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/__init__.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/__init__.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/__init__.py 2021-10-12 00:17:34.000000000 +0000 @@ -29,7 +29,7 @@ ] __author__ = "" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Gavin Huttley", "Peter Maxwell", @@ -48,7 +48,7 @@ "Marcin Cieslik", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/jaspar.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/jaspar.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/jaspar.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/jaspar.py 2021-10-12 00:17:34.000000000 +0000 @@ -10,7 +10,7 @@ __copyright__ = "Copyright 2007-2012, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/locuslink.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/locuslink.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/locuslink.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/locuslink.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Parsers for the LL_tmpl file from LocusLink. Notes: @@ -53,10 +52,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Development" @@ -373,12 +372,3 @@ for record in LLFinder(lines): curr = LinesToLocusLink(record) yield curr - - -if __name__ == "__main__": - from sys import argv, stdout - - filename = argv[1] - count = 0 - for record in LocusLinkParser(open(filename)): - print(record) diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/ncbi_taxonomy.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/ncbi_taxonomy.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/ncbi_taxonomy.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/ncbi_taxonomy.py 2021-10-12 00:17:34.000000000 +0000 @@ -7,10 +7,10 @@ __author__ = "Jason Carnes" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Jason Carnes", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Jason Carnes" __email__ = "jason.carnes@sbri.org" __status__ = "Development" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/newick.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/newick.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/newick.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/newick.py 2021-10-12 00:17:34.000000000 +0000 @@ -22,10 +22,10 @@ EOT = None __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Andrew Butterfield", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/nexus.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/nexus.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/nexus.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/nexus.py 2021-10-12 00:17:34.000000000 +0000 @@ -13,10 +13,10 @@ __author__ = "Catherine Lozupone" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Catherine Lozuopone", "Rob Knight", "Micah Hamady", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Catherine Lozupone" __email__ = "lozupone@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/paml_matrix.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/paml_matrix.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/paml_matrix.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/paml_matrix.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,18 +1,14 @@ -#!/usr/bin/env python - import numpy -from cogent3.evolve import substitution_model - Float = numpy.core.numerictypes.sctype2char(float) __author__ = "Matthew Wakefield" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Matthew Wakefield", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Matthew Wakefield" __email__ = "wakefield@wehi.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/paml.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/paml.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/paml.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/paml.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,32 +1,43 @@ #!/usr/bin/env python +from io import TextIOWrapper + __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" -def PamlParser(f): - d = f.readline().split() - numseqs, seqlen = int(d[0]), int(d[1]) - for i in range(numseqs): - seqname = f.readline().strip() - if not seqname: - raise ValueError("Sequence name missing") - currseq = [] - length = 0 - while length < seqlen: - seq_line = f.readline() - if not seq_line: - raise ValueError( - 'Sequence "%s" is short: %s < %s' % (seqname, length, seqlen) - ) - seq_line = seq_line.strip() - length += len(seq_line) - currseq.append(seq_line) +def PamlParser(data): + if isinstance(data, TextIOWrapper): + data = data.read().splitlines() + num_seqs, seq_len = [int(v) for v in data[0].split()] + curr_seq = [] + curr_length = 0 + seqname = None + n = 0 + for line in data[1:]: + line = line.strip() + if not line: + continue + + if seqname is None: + seqname = line + continue + + curr_length += len(line) + curr_seq.append(line) + if curr_length == seq_len: + yield seqname, "".join(curr_seq) + + seqname = None + curr_seq = [] + curr_length = 0 + n += 1 - yield (seqname, "".join(currseq)) + if n != num_seqs: + raise ValueError(f"read {n} seqs, expected {num_seqs}") diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/phylip.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/phylip.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/phylip.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/phylip.py 2021-10-12 00:17:34.000000000 +0000 @@ -4,10 +4,10 @@ __author__ = "Micah Hamady" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Micah Hamady", "Peter Maxwell", "Gavin Huttley", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Micah Hamady" __email__ = "hamady@colorado.edu" __status__ = "Prototype" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/psl.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/psl.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/psl.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/psl.py 2021-10-12 00:17:34.000000000 +0000 @@ -7,10 +7,10 @@ __author__ = "Gavin Huttley, Anuj Pahwa" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight", "Peter Maxwell", "Gavin Huttley", "Anuj Pahwa"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Development" @@ -77,5 +77,4 @@ version = next(parser) header = next(parser) rows = [row for row in parser] - table = Table(header=header, data=rows, title=version) - return table + return Table(header=header, data=rows, title=version) diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/rdb.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/rdb.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/rdb.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/rdb.py 2021-10-12 00:17:34.000000000 +0000 @@ -6,16 +6,16 @@ from cogent3.core.alphabet import AlphabetError from cogent3.core.info import Info -from cogent3.core.sequence import RnaSequence, Sequence +from cogent3.core.sequence import RnaSequence from cogent3.parse.record import RecordError from cogent3.parse.record_finder import DelimitedRecordFinder __author__ = "Sandra Smit" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Sandra Smit", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Sandra Smit" __email__ = "sandra.smit@colorado.edu" __status__ = "Development" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/record_finder.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/record_finder.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/record_finder.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/record_finder.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Provides some classes for treating files as sequences of records. Typically more useful as subclasses. Covers the three main types of records: @@ -13,14 +12,14 @@ DelimitedRecordFinder except that it accept a is_tail function instead of a str. Note that its default constuctor is rstrip instead of strip. """ -from cogent3.parse.record import FieldError, RecordError +from cogent3.parse.record import RecordError __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight", "Gavin Huttley", "Zongzhi Liu"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/record.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/record.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/record.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/record.py 2021-10-12 00:17:34.000000000 +0000 @@ -7,10 +7,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight", "Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Development" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/sequence.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/sequence.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/sequence.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/sequence.py 2021-10-12 00:17:34.000000000 +0000 @@ -21,7 +21,7 @@ __author__ = "Cath Lawrence" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Cath Lawrence", "Gavin Huttley", @@ -30,7 +30,7 @@ "Rob Knight", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/table.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/table.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/table.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/table.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,21 +1,17 @@ -#!/usr/bin/env python - import csv import pathlib -from collections.abc import Callable - from cogent3.util.misc import open_ -from cogent3.util.warning import discontinued +from cogent3.util.warning import deprecated from .record_finder import is_empty __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -85,9 +81,22 @@ self.columns = indices def __call__(self, lines): + """a generator that yields individual lines processed according to the + provided conditions + + Parameters + ---------- + lines: path or iterable + If file path, handles file open and close. Will expand user + component (i.e. '~/') of path. + + Notes + ----- + Elements within a row are strings + """ input_from_path = False if isinstance(lines, str) or isinstance(lines, pathlib.Path): - path = pathlib.Path(lines) + path = pathlib.Path(lines).expanduser() input_from_path = path.exists() if input_from_path: @@ -129,16 +138,47 @@ def load_delimited( filename, header=True, - delimiter=",", + sep=",", + delimiter=None, with_title=False, with_legend=False, limit=None, ): - if limit is not None: + """ + basic processing of tabular data + + Parameters + ---------- + filename: Path + path to delimited file (can begin with ~) + header: bool + whether the first line of the file (after the title, if present) is a header + sep: str + the character separating columns + with_title: bool + whether the first line of the file is a title + with_legend: bool + whether the last line of the file is a legend + limit: int + maximum number of lines to read from the file + + Returns + ------- + header, rows, title, legend + + Notes + ----- + All row values remain as strings. + """ + if delimiter: + sep = delimiter + deprecated("argument", "delimiter", "sep", "2022.1") + + if limit is not None and header: limit += 1 # don't count header line with open_(filename) as f: - reader = csv.reader(f, dialect="excel", delimiter=delimiter) + reader = csv.reader(f, dialect="excel", delimiter=sep) title = "".join(next(reader)) if with_title else "" rows = [] num_lines = 0 diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/tinyseq.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/tinyseq.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/tinyseq.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/tinyseq.py 2021-10-12 00:17:34.000000000 +0000 @@ -9,10 +9,10 @@ __author__ = "Matthew Wakefield" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Matthew Wakefield", "Peter Maxwell", "Gavin Huttley", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Matthew Wakefield" __email__ = "wakefield@wehi.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/tree.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/tree.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/tree.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/tree.py 2021-10-12 00:17:34.000000000 +0000 @@ -22,10 +22,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight", "Catherine Lozupone", "Daniel McDonald"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Development" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/tree_xml.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/tree_xml.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/tree_xml.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/tree_xml.py 2021-10-12 00:17:34.000000000 +0000 @@ -32,10 +32,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/parse/unigene.py python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/unigene.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/parse/unigene.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/unigene.py 2021-10-12 00:17:34.000000000 +0000 @@ -14,10 +14,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Development" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/consensus.py python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/consensus.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/consensus.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/consensus.py 2021-10-12 00:17:34.000000000 +0000 @@ -11,10 +11,10 @@ __author__ = "Matthew Wakefield" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Matthew Wakefield", "Peter Maxwell", "Gavin Huttley", "Ben Kaehler"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Matthew Wakefield" __email__ = "wakefield@wehi.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/__init__.py python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/__init__.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/__init__.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/__init__.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,8 +1,5 @@ -#!/usr/bin/env python - __all__ = [ "consensus", - "distance", "least_squares", "maximum_likelihood", "nj", @@ -11,10 +8,10 @@ ] __author__ = "" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley", "Peter Maxwell", "Matthew Wakefield"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/least_squares.py python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/least_squares.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/least_squares.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/least_squares.py 2021-10-12 00:17:34.000000000 +0000 @@ -12,10 +12,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/maximum_likelihood.py python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/maximum_likelihood.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/maximum_likelihood.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/maximum_likelihood.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,17 +1,13 @@ -#!/usr/bin/env python' -from math import exp - from .least_squares import WLS -from .tree_collection import make_trees # only for back compat. from .tree_collection import LogLikelihoodScoredTreeCollection from .tree_space import TreeEvaluator, ancestry2tree __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/nj.py python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/nj.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/nj.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/nj.py 2021-10-12 00:17:34.000000000 +0000 @@ -21,10 +21,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley", "Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/tree_collection.py python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/tree_collection.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/tree_collection.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/tree_collection.py 2021-10-12 00:17:34.000000000 +0000 @@ -6,10 +6,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Ben Kaehler"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/tree_space.py python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/tree_space.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/tree_space.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/tree_space.py 2021-10-12 00:17:34.000000000 +0000 @@ -11,10 +11,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -156,10 +156,9 @@ assert set(ordered_names).issubset(all_names) else: ordered_names = self.names - names = list(fixed_names) + [ + return list(fixed_names) + [ n for n in ordered_names if n not in fixed_names_set ] - return names @UI.display_wrap def trex( diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/util.py python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/util.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/util.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/util.py 2021-10-12 00:17:34.000000000 +0000 @@ -9,10 +9,10 @@ # reconstruction algorithms. __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "pm67nz@gmail.com" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/recalculation/calculation.py python-cogent-2021.10.12a1+dfsg/src/cogent3/recalculation/calculation.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/recalculation/calculation.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/recalculation/calculation.py 2021-10-12 00:17:34.000000000 +0000 @@ -2,7 +2,6 @@ import os import time -import warnings import numpy @@ -17,10 +16,10 @@ TRACE_SCALE = 100000 __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Daniel McDonald"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -213,8 +212,8 @@ other_cells.append(cell) self._cells = self.opt_pars + other_cells data_sets = [[0], [0, 1]][self.with_undo] - self.cell_values = [[None] * len(self._cells) for switch in data_sets] - self.arg_ranks = [[] for cell in self._cells] + self.cell_values = [[None] * len(self._cells) for _ in data_sets] + self.arg_ranks = [[] for _ in self._cells] for (i, cell) in enumerate(self._cells): cell.rank = i cell.consequences = {} @@ -236,7 +235,7 @@ cell.prime(self.cell_values) except KeyboardInterrupt: raise - except Exception as detail: + except Exception: print(("Failed initial calculation of %s" % cell.name)) raise else: @@ -352,11 +351,10 @@ def get_value_array(self): """This being a caching function, you can ask it for its current input! Handy for initialising the optimiser.""" - values = [ + return [ p.transform_to_optimiser(self._get_current_cell_value(p)) for p in self.opt_pars ] - return values # get_bounds_vectors and testoptparvector make up the old LikelihoodFunction # interface expected by the optimiser. @@ -532,9 +530,9 @@ elapsed[cell.rank] = t1 - t0 tds = [] - for ((name, cells), width) in self._cellsGroupedForDisplay: - text = "".join([" +"[cell.rank in elapsed] for cell in cells]) - elap = sum([elapsed.get(cell.rank, 0) for cell in cells]) + for ((_, cells), width) in self._cellsGroupedForDisplay: + text = "".join(" +"[cell.rank in elapsed] for cell in cells) + elap = sum(elapsed.get(cell.rank, 0) for cell in cells) if len(text) > width - 4: edge_width = min(len(text), (width - 4 - 3)) // 2 elipsis = [" ", "..."][not not text.strip()] diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/recalculation/definition.py python-cogent-2021.10.12a1+dfsg/src/cogent3/recalculation/definition.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/recalculation/definition.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/recalculation/definition.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,4 +1,3 @@ -#!/usr/bin/env python """A recalculation engine, something like a spreadsheet. Goals: @@ -64,13 +63,12 @@ the one to be used next for recycling. """ -import warnings from collections import defaultdict import numpy -from cogent3.maths.stats.distribution import chdtri +from cogent3.maths.util import proportions_to_ratios, ratios_to_proportions from cogent3.util.dict_array import DictArrayTemplate from .calculation import ConstCell, EvaluatedCell, LogOptPar, OptPar @@ -85,10 +83,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -144,10 +142,9 @@ def make_cell(self, *args): calc = self.make_calc_function() - cell = EvaluatedCell( + return EvaluatedCell( self.name, calc, args, recycling=self.recycling, default=self.default ) - return cell def make_cells(self, input_soup, variable=None): # input soups contains all necessary values for calc on self. @@ -192,11 +189,13 @@ """Uses a PartitionDefn (ie: N-1 optimiser parameters) to make an array of floats with weighted average of 1.0""" - def __init__(self, weights, name): + def __init__(self, weights, name=None): N = len(weights.bin_names) partition = PartitionDefn(size=N, name=name + "_partition") partition.user_param = False - CalculationDefn.__init__(self, weights, partition, name=name + "_distrib") + super(WeightedPartitionDefn, self).__init__( + weights, partition, name=name + "_distrib" + ) def calc(self, weights, values): scale = numpy.sum(weights * values) @@ -417,37 +416,8 @@ pass -def _proportions(total, params): - """List of N proportions from N-1 ratios - - >>> _proportions(1.0, [3, 1, 1]) - [0.125, 0.125, 0.375, 0.375]""" - if len(params) == 0: - return [total] - half = (len(params) + 1) // 2 - part = 1.0 / (params[0] + 1.0) # ratio -> proportion - return _proportions(total * part, params[1:half]) + _proportions( - total * (1.0 - part), params[half:] - ) - - -def _unpack_proportions(values): - """List of N-1 ratios from N proportions""" - if len(values) == 1: - return [] - half = len(values) // 2 - (num, denom) = (sum(values[half:]), sum(values[:half])) - assert num > 0 and denom > 0 - ratio = num / denom - return ( - [ratio] - + _unpack_proportions(values[:half]) - + _unpack_proportions(values[half:]) - ) - - def _ratio_to_proportion(*ratios): - return numpy.asarray(_proportions(1.0, ratios)) + return numpy.asarray(ratios_to_proportions(1.0, ratios)) class PartitionDefn(_InputDefn): @@ -527,9 +497,8 @@ # This was originally put in its own function so as to provide a # closure containing the value of sum(value), which is no longer # required since it is now always 1.0. - N = len(value) assert abs(sum(value) - 1.0) < 0.00001 - ratios = _unpack_proportions(value) + ratios = proportions_to_ratios(value) ratios = [LogOptPar(name + "_ratio", scope, (1e-6, r, 1e6)) for r in ratios] partition = EvaluatedCell(name, _ratio_to_proportion, tuple(ratios)) diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/recalculation/__init__.py python-cogent-2021.10.12a1+dfsg/src/cogent3/recalculation/__init__.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/recalculation/__init__.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/recalculation/__init__.py 2021-10-12 00:17:34.000000000 +0000 @@ -2,10 +2,10 @@ __all__ = ["calculation", "definition", "scope", "setting"] __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/recalculation/scope.py python-cogent-2021.10.12a1+dfsg/src/cogent3/recalculation/scope.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/recalculation/scope.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/recalculation/scope.py 2021-10-12 00:17:34.000000000 +0000 @@ -14,10 +14,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -452,7 +452,7 @@ class _NonLeafDefn(_Defn): def __init__(self, *args, **kw): - _Defn.__init__(self) + super(_NonLeafDefn, self).__init__() valid_dimensions = [] for arg in args: assert isinstance(arg, _Defn), type(arg) @@ -508,7 +508,7 @@ def __init__( self, name=None, extra_label=None, dimensions=None, independent_by_default=None ): - _Defn.__init__(self) + super(_LeafDefn, self).__init__() if dimensions is not None: assert type(dimensions) in [list, tuple], type(dimensions) self.valid_dimensions = tuple(dimensions) diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/recalculation/setting.py python-cogent-2021.10.12a1+dfsg/src/cogent3/recalculation/setting.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/recalculation/setting.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/recalculation/setting.py 2021-10-12 00:17:34.000000000 +0000 @@ -6,10 +6,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/util/checkpointing.py python-cogent-2021.10.12a1+dfsg/src/cogent3/util/checkpointing.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/util/checkpointing.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/util/checkpointing.py 2021-10-12 00:17:34.000000000 +0000 @@ -5,10 +5,10 @@ __author__ = ["Peter Maxwell", "Gavin Huttley"] -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/util/deserialise.py python-cogent-2021.10.12a1+dfsg/src/cogent3/util/deserialise.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/util/deserialise.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/util/deserialise.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,6 +1,5 @@ #!/usr/bin/env python import json -import os from importlib import import_module @@ -13,10 +12,10 @@ __author__ = ["Gavin Huttley"] -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -69,8 +68,7 @@ init = data.pop("not_completed_construction") args = init.pop("args") kwargs = init.pop("kwargs") - result = klass(*args, **kwargs) - return result + return klass(*args, **kwargs) def deserialise_map_spans(map_element): @@ -84,8 +82,7 @@ spans.append(instance) map_element["spans"] = spans - map_instance = map_klass(**map_element) - return map_instance + return map_klass(**map_element) def deserialise_annotation(data, parent): @@ -178,8 +175,7 @@ data["moltype"] = get_moltype(data.pop("moltype")) annotations = data.pop("annotations", None) make_seq = data["moltype"].make_seq - type_ = data.pop("type") - klass = _get_class(type_) + _ = data.pop("type") if "-" in data["seq"]: aligned = True diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/util/dict_array.py python-cogent-2021.10.12a1+dfsg/src/cogent3/util/dict_array.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/util/dict_array.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/util/dict_array.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,4 +1,3 @@ -#!/usr/bin/env python """Wrapper for numpy arrays so that they can be indexed by name >>> a = numpy.identity(3, int) @@ -31,10 +30,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Ben Kaehler"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -249,8 +248,7 @@ """a distinct numerical type for use as a DictArray key""" def __new__(cls, val): - result = int.__new__(cls, val) - return result + return int.__new__(cls, val) class DictArrayTemplate(object): @@ -390,6 +388,17 @@ def to_array(self): return self.array + def __add__(self, other): + if not isinstance(other, type(self)): + raise TypeError(f"Incompatible types: {type(self)} and {type(other)}") + + if other.template.names != self.template.names: + raise ValueError( + f"unequal dimension names {self.template.names} != {other.template.names}" + ) + + return self.template.wrap(self.array + other.array) + def __array__(self, dtype=None): array = self.array if dtype is not None: @@ -408,15 +417,20 @@ shape = self.shape result = {} if len(names) == 1: - result = {names[0][i]: self.array[i] for i in range(len(names[0]))} + result = { + names[0][i]: v.item() if hasattr(v, "item") else v + for i, v in enumerate(self.array) + } elif flatten: for indices in product(*[range(n) for n in shape]): value = self.array[indices] + value = value.item() if hasattr(value, "item") else value coord = tuple(n[i] for n, i in zip(names, indices)) result[coord] = value else: for indices in product(*[range(n) for n in shape]): value = self.array[indices] + value = value.item() if hasattr(value, "item") else value coord = tuple(n[i] for n, i in zip(names, indices)) current = result nested = coord[0] @@ -428,13 +442,12 @@ def to_rich_dict(self): data = self.array.tolist() - result = { + return { "type": get_object_provenance(self.template), "array": data, "names": self.template.names, "version": __version__, } - return result def to_json(self): return json.dumps(self.to_rich_dict()) @@ -532,7 +545,7 @@ def row_sum(self): """returns DictArray summed across rows""" axis = 1 if len(self.shape) == 2 else 0 - result = self.array.sum(axis=1) + result = self.array.sum(axis=axis) template = DictArrayTemplate(self.template.names[0]) return template.wrap(result) diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/util/__init__.py python-cogent-2021.10.12a1+dfsg/src/cogent3/util/__init__.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/util/__init__.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/util/__init__.py 2021-10-12 00:17:34.000000000 +0000 @@ -2,18 +2,19 @@ __all__ = [ "checkpointing", "deserialise", + "dict_array", "misc", - "modules", "parallel", + "progress_display", + "recode_alignment", "table", "transform", "union_dict", "warning", - "recode_alignment", ] __author__ = "" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Gavin Huttley", "Rob Knight", @@ -27,7 +28,7 @@ "Thomas La", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/util/misc.py python-cogent-2021.10.12a1+dfsg/src/cogent3/util/misc.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/util/misc.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/util/misc.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,9 +1,9 @@ -#!/usr/bin/env python """Generally useful utility classes and methods. """ import os -import pathlib import re +import shutil +import uuid import warnings import zipfile @@ -13,18 +13,19 @@ from os import path as os_path from os import remove from pathlib import Path -from random import choice, randint -from tempfile import NamedTemporaryFile, gettempdir +from random import randint +from tempfile import mkdtemp from warnings import warn from zipfile import ZipFile import numpy -from numpy import array, ceil, finfo, float64, floor, log10, logical_not, sum +from chardet import detect +from numpy import array, finfo, float64 __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Rob Knight", "Peter Maxwell", @@ -36,7 +37,7 @@ "Marcin Cieslik", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -144,6 +145,10 @@ If mode="w", returns an atomic_write() instance. """ + binary_mode = "b" in mode + mode = mode[:1] + + encoding = kwargs.pop("encoding") if "encoding" in kwargs else "latin-1" if mode.startswith("w"): return atomic_write(filename, mode=mode, in_zip=True) @@ -151,52 +156,135 @@ with ZipFile(filename) as zf: if len(zf.namelist()) != 1: raise ValueError("Archive is supposed to have only one record.") + opened = zf.open(zf.namelist()[0], mode=mode, **kwargs) - return TextIOWrapper(opened, encoding="latin-1") + + if binary_mode: + return opened + + return TextIOWrapper(opened, encoding=encoding) def open_(filename, mode="rt", **kwargs): """open that handles different compression""" + filename = Path(filename).expanduser().absolute() op = {".gz": gzip_open, ".bz2": bzip_open, ".zip": open_zip}.get( filename.suffix, open ) - return op(filename, mode, **kwargs) + + encoding = kwargs.pop("encoding", None) + need_encoding = mode.startswith("r") and "b" not in mode + if need_encoding: + if "encoding" not in kwargs: + with op(filename, mode="rb") as infile: + data = infile.read(100) + + encoding = detect(data) + encoding = encoding["encoding"] + + return op(filename, mode, encoding=encoding, **kwargs) + + +def _path_relative_to_zip_parent(zip_path, member_path): + """returns member_path relative to zip_path + + Parameters + ---------- + zip_path: Path + member_path: Path + + Notes + ----- + with zip_path = "parentdir/named.zip", then member_path="named/member.tsv" + or path="member.tsv" will return "named/member.tsv" + """ + zip_name = zip_path.name.replace(".zip", "") + if zip_name not in member_path.parts: + return Path(zip_name) / member_path + + return Path(*member_path.parts[member_path.parts.index(zip_name) :]) class atomic_write: """performs atomic write operations, cleans up if fails""" - def __init__(self, path, tmpdir=None, in_zip=None, mode="w"): - path = pathlib.Path(path).expanduser() + def __init__(self, path, tmpdir=None, in_zip=None, mode="w", encoding=None): + """ + + Parameters + ---------- + path + path to file, or relative to directory specified by in_zip + tmpdir + directory where temporary file will be created + in_zip + path to the zip archive containing path, + e.g. if in_zip="path/to/data.zip", then path="data/seqs.tsv" + Decompressing the archive will produce the "data/seqs.tsv" + mode + file writing mode + encoding + text encoding + """ + path = Path(path).expanduser() + in_zip = Path(in_zip) if isinstance(in_zip, str) else in_zip _, cmp = get_format_suffixes(path) if in_zip and cmp == "zip": in_zip = path if isinstance(in_zip, bool) else in_zip - path = pathlib.Path(str(path)[: str(path).rfind(".zip")]) + path = Path(str(path)[: str(path).rfind(".zip")]) + + if in_zip: + path = _path_relative_to_zip_parent(in_zip, path) self._path = path + self._cmp = cmp self._mode = mode self._file = None + self._encoding = encoding self._in_zip = in_zip + self._tmppath = self._make_tmppath(tmpdir) + self.succeeded = None self._close_func = ( self._close_rename_zip if in_zip else self._close_rename_standard ) - if tmpdir is None: - tmpdir = self._get_tmp_dir() - self._tmpdir = tmpdir - - def _get_tmp_dir(self): - """returns parent of destination file""" - parent = Path(self._in_zip).parent if self._in_zip else Path(self._path).parent - if not parent.exists(): - raise FileNotFoundError(f"{parent} directory does not exist") - return parent + + def _make_tmppath(self, tmpdir): + """returns path of temporary file + + Parameters + ---------- + tmpdir: Path + to directory + + Returns + ------- + full path to a temporary file + + Notes + ----- + Uses a random uuid as the file name, adds suffixes from path + """ + suffixes = ( + "".join(self._path.suffixes) + if not self._in_zip + else "".join(self._path.suffixes[:-1]) + ) + parent = self._in_zip.parent if self._in_zip else self._path.parent + name = f"{uuid.uuid4()}{suffixes}" + tmpdir = Path(mkdtemp(dir=parent)) if tmpdir is None else Path(tmpdir) + + if not tmpdir.exists(): + raise FileNotFoundError(f"{tmpdir} directory does not exist") + + tmp_path = tmpdir / name + return tmp_path def _get_fileobj(self): """returns file to be written to""" if self._file is None: - self._file = NamedTemporaryFile(self._mode, delete=False, dir=self._tmpdir) + self._file = open_(self._tmppath, self._mode, encoding=self._encoding) return self._file @@ -212,21 +300,22 @@ finally: src.rename(dest) + shutil.rmtree(src.parent) + def _close_rename_zip(self, src): with zipfile.ZipFile(self._in_zip, "a") as out: out.write(str(src), arcname=self._path) - src.unlink() + shutil.rmtree(src.parent) def __exit__(self, exc_type, exc_val, exc_tb): self._file.close() - tmpfile_name = Path(self._file.name) if exc_type is None: - self._close_func(tmpfile_name) + self._close_func(self._tmppath) self.succeeded = True else: self.succeeded = False - tmpfile_name.unlink() + shutil.rmtree(self._tmppath.parent) def write(self, text): """writes text to file""" @@ -314,7 +403,7 @@ """return True if obj is iterable""" try: iter(obj) - except TypeError as e: + except TypeError: return False else: return True @@ -1068,7 +1157,6 @@ starts, ends, vals = list(zip(*spans_value)) indices_distinct_vals = get_run_start_indices(vals, digits=digits) data = [] - i = 0 for index, val in indices_distinct_vals: start = starts[index] end = ends[index] diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/util/parallel.py python-cogent-2021.10.12a1+dfsg/src/cogent3/util/parallel.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/util/parallel.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/util/parallel.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,25 +1,21 @@ -#!/usr/bin/env python - import concurrent.futures as concurrentfutures -import math import multiprocessing import os -import random import sys -import threading -import time import warnings -import numpy - from cogent3.util.misc import extend_docstring_from +multiprocessing.set_start_method( + "fork" if sys.platform == "darwin" else "spawn", force=True +) + __author__ = "Sheng Han Moses Koh" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Sheng Han Moses Koh", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" @@ -108,7 +104,7 @@ max_workers : int or None maximum number of workers. Defaults to 1-maximum available. use_mpi : bool - use MPI for parallel execution + use MPI for parallel execution. if_serial : str action to take if conditions will result in serial execution. Valid values are 'raise', 'ignore', 'warn'. Defaults to 'raise'. @@ -120,6 +116,14 @@ ------- imap is a generator yielding result of f(s[i]), map returns the result series + + Notes + ----- + To use MPI, you must have openmpi (use conda or your preferred package manager) + and mpi4py (use pip or conda) installed. In addition, your initial script must + have a ``if __name__ == '__main__':`` block. You then invoke your program using + + $ mpiexec -n python3 -m mpi4py.futures """ if_serial = if_serial.lower() @@ -133,7 +137,7 @@ err_msg = ( "Execution in serial. For parallel MPI execution, use:\n" - " $ mpirun -n 1 " + " $ mpiexec -n python3 -m mpi4py.futures " ) if COMM.Get_attr(MPI.UNIVERSE_SIZE) == 1 and if_serial == "raise": @@ -141,7 +145,7 @@ elif COMM.Get_attr(MPI.UNIVERSE_SIZE) == 1 and if_serial == "warn": warnings.warn(err_msg, UserWarning) - max_workers = max_workers or 0 + max_workers = max_workers or 1 if max_workers > COMM.Get_attr(MPI.UNIVERSE_SIZE): warnings.warn( @@ -149,7 +153,6 @@ ) max_workers = min(max_workers, COMM.Get_attr(MPI.UNIVERSE_SIZE) - 1) - if not chunksize: chunksize = set_default_chunksize(s, max_workers) diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/util/progress_display.py python-cogent-2021.10.12a1+dfsg/src/cogent3/util/progress_display.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/util/progress_display.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/util/progress_display.py 2021-10-12 00:17:34.000000000 +0000 @@ -10,10 +10,10 @@ __author__ = "Sheng Han Moses Koh" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Sheng Han Moses Koh"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/util/recode_alignment.py python-cogent-2021.10.12a1+dfsg/src/cogent3/util/recode_alignment.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/util/recode_alignment.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/util/recode_alignment.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,7 +1,3 @@ -#!/usr/bin/env python -# Author: Greg Caporaso (gregcaporaso@gmail.com) -# recode_alignment.py - """This file contains functions for recoding alignment objects with reduced-state alphabets, and also defines some reduced-state alphabets. @@ -38,27 +34,25 @@ """ -from optparse import OptionParser from numpy import array, take, zeros -from cogent3 import PROTEIN -from cogent3.core.alignment import Alignment, ArrayAlignment +from cogent3.core.alignment import ArrayAlignment from cogent3.evolve.models import DSO78_freqs, DSO78_matrix __author__ = "Greg Caporaso" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Greg Caporaso"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Greg Caporaso" __email__ = "gregcaporaso@gmail.com" __status__ = "Beta" class RecodeError(Exception): - """ A generic error to be raised when errors occur in recoding """ + """A generic error to be raised when errors occur in recoding""" pass diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/util/table.py python-cogent-2021.10.12a1+dfsg/src/cogent3/util/table.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/util/table.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/util/table.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,4 +1,3 @@ -#!/usr/bin/env python """ A light-weight Table class for manipulating 2D data and representing it as text, or writing to file for import into other packages. @@ -12,6 +11,7 @@ import csv import json +import pathlib import pickle import re @@ -41,10 +41,10 @@ display = lambda x: print(repr(x)) __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley", "Felix Schill", "Sheng Koh"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -175,8 +175,7 @@ key = list(data.keys())[0] row_order = list(data[key]) - result = {c: [data[c][r] for r in row_order] for c in data} - return result + return {c: [data[c][r] for r in row_order] for c in data} def cast_to_1d_dict(data, row_order=None): @@ -283,7 +282,7 @@ return len(self._order) def __setitem__(self, key, val): - key = str(key) + key = str(key).strip() if isinstance(val, str): val = [val] try: @@ -339,8 +338,7 @@ v = d[:5] if num > 5: v.append(f"... + {num - 5} more") - txt = f"{self.__class__.__name__}({', '.join(v)})" - return txt + return f"{self.__class__.__name__}({', '.join(v)})" def __str__(self): return repr(self) @@ -430,8 +428,7 @@ def to_dict(self): """returns column based dict""" - result = {c: self[c].tolist() for c in self} - return result + return {c: self[c].tolist() for c in self} def to_rich_dict(self): data = self.__getstate__() @@ -536,7 +533,7 @@ f"only str type supported for index_name, not {type(index_name)}" ) - if data: + if len(data) if hasattr(data, "__len__") else 0: row_order = kwargs.get("row_order", None) data = cast_to_1d_dict(data, row_order=row_order) if has_index: @@ -668,12 +665,11 @@ if not self._repr_policy["show_shape"]: shape_info = "" - result = ( + return ( "\n".join([str(table), shape_info, unset_columns]) if unset_columns else "\n".join([str(table), shape_info]) ) - return result def __str__(self): if self.shape == (0, 0): @@ -1734,7 +1730,6 @@ formatted_table = bedgraph.bedgraph(self.sorted().array.tolist(), **kwargs) return formatted_table - missing_data = "0.0000" if format.lower() == "phylip" else self._missing_data if format.lower() in ("tsv", "csv"): sep = sep or {"tsv": "\t", "csv": ","}[format.lower()] format = "" @@ -1758,7 +1753,7 @@ return self.to_latex(concat_title_legend=concat_title_legend, **kwargs) if format == "html": - return self.to_rich_html(**kwargs) + return self.to_html(**kwargs) if format == "phylip": # need to eliminate row identifiers @@ -1815,90 +1810,6 @@ header, formatted_table, title=title, legend=legend, sep="\t" ) - def to_rich_html( - self, - row_cell_func=None, - header_cell_func=None, - element_formatters=None, - merge_identical=False, - compact=False, - ): # pragma: no cover - """returns just the table as html. - - Parameters - ---------- - row_cell_func - callback function that formats the row values. Must - take the row value and coordinates (row index, column index). - header_cell_func - callback function that formats the column headings - must take the header label value and coordinate - element_formatters - a dictionary of specific callback funcs for - formatting individual html table elements. - e.g. {'table': lambda x: ''} - merge_identical - cells within a row are merged to one span. - - """ - deprecated("method", "to_rich_html", "to_html", "2021.10") - - element_formatters = element_formatters or {} - formatted_table = self.array.tolist() - header, formatted_table = table_format.formatted_cells( - formatted_table, - self.header, - digits=self._digits, - column_templates=self._column_templates, - missing_data=self._missing_data, - ) - subtables = table_format.get_continuation_tables( - header, - formatted_table, - identifiers=self.index_name, - max_width=self._max_width, - ) - tables = [] - title = self.title if self.title else "" - if title: - title = escape(title) - legend = self.legend if self.legend else "" - if legend: - legend = escape(legend) - - for i, (h, t) in enumerate(subtables): - # but we strip the cell spacing - sh = [v.strip() for v in h] - t = [[c.strip() for c in r] for r in t] - - if title and i == 0: - st = element_formatters.get( - "caption", f'{title}' - ) - elif title: - st = element_formatters.get( - "caption", f'continuation' - ) - else: - st = None - - if legend and i == 0: - title = f"{st} {legend}" if st else legend - - caption = st if st else None - subtable = table_format.rich_html( - t, - row_cell_func=row_cell_func, - header=sh, - header_cell_func=header_cell_func, - element_formatters=element_formatters, - merge_identical=merge_identical, - compact=compact, - caption=caption, - ) - tables.append(subtable) - return "\n".join(tables) - def to_html(self, column_alignment=None): """construct html table @@ -1964,11 +1875,11 @@ for v in cols[c] ] - title = self.title if self.title else "" + title = self.title or "" if title and not table_format.is_html_markup(title): title = escape(title) - legend = self.legend if self.legend else "" + legend = self.legend or "" if legend and not table_format.is_html_markup(legend): legend = escape(legend) @@ -1988,7 +1899,7 @@ caption = str(HtmlElement(st, "caption", newline=True)) if st else "" rows = [] for i, row in enumerate(zip(*[cols[c] for c in header])): - txt = HtmlElement("".join([str(e) for e in row]), "tr") + txt = HtmlElement("".join(str(e) for e in row), "tr") rows.append(str(txt)) rows = str(HtmlElement("\n".join(rows), "tbody", newline=True)) @@ -1999,7 +1910,7 @@ for c in header ] - header = "".join([str(HtmlElement(c, "th")) for c in header]) + header = "".join(str(HtmlElement(c, "th")) for c in header) header = str( HtmlElement(header, "thead", css_classes=["head_cell"], newline=True) ) @@ -2120,7 +2031,7 @@ draw.layout |= default_layout return draw - def to_categorical(self, columns=None): + def to_categorical(self, columns=None, index_name=None): """construct object that can be used for statistical tests Parameters @@ -2143,8 +2054,9 @@ from cogent3.maths.stats.contingency import CategoryCounts from cogent3.util.dict_array import DictArrayTemplate + self.index_name = index_name if index_name is not None else self.index_name if self.index_name is None: - raise ValueError(f"requires index_name be set") + raise ValueError("requires index_name be set") columns = list(self.header) if columns is None else columns @@ -2236,6 +2148,7 @@ Unformatted numerical values are written to file in order to preserve numerical accuracy. """ + filename = pathlib.Path(filename) file_suffix, compress_suffix = get_format_suffixes(filename) format = format or file_suffix compress = compress or compress_suffix is not None @@ -2248,18 +2161,13 @@ return if compress: - if not filename.endswith(".gz"): - filename = "%s.gz" % filename + if ".gz" not in filename.suffixes: + filename = pathlib.Path(f"{filename}.gz") mode = "wt" outfile = atomic_write(filename, mode=mode) - if format is None: - # try guessing from filename suffix - index = -2 if compress else -1 - suffix = filename.split(".") - if len(suffix) > 1: - format = suffix[index] + format = format if format else file_suffix if format == "csv": sep = sep or "," diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/util/transform.py python-cogent-2021.10.12a1+dfsg/src/cogent3/util/transform.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/util/transform.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/util/transform.py 2021-10-12 00:17:34.000000000 +0000 @@ -14,10 +14,10 @@ """ __author__ = "Sandra Smit" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Sandra Smit", "Rob Knight", "Zongzhi Liu"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Sandra Smit" __email__ = "sandra.smit@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/util/union_dict.py python-cogent-2021.10.12a1+dfsg/src/cogent3/util/union_dict.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/util/union_dict.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/util/union_dict.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,14 +1,12 @@ -#!/usr/bin/env python """UnionDict extension of dict. """ -from cogent3.util.misc import extend_docstring_from __author__ = "Thomas La" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley", "Thomas La"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -43,8 +41,9 @@ def __getattr__(self, item): if item in self: return self.get(item) + try: - result = super().__getattr__(item) + return super().__getattr__(item) except AttributeError: raise AttributeError(f"'{item}' not a key or attribute") @@ -61,6 +60,11 @@ self.update({key: value}) def __or__(self, other): + result = self.__class__(self) + result.union(other) + return result + + def __ior__(self, other): self.union(other) return self diff -Nru python-cogent-2020.12.21a+dfsg/src/cogent3/util/warning.py python-cogent-2021.10.12a1+dfsg/src/cogent3/util/warning.py --- python-cogent-2020.12.21a+dfsg/src/cogent3/util/warning.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/src/cogent3/util/warning.py 2021-10-12 00:17:34.000000000 +0000 @@ -4,16 +4,16 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley", "Jai Ram Rideout"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" -def deprecated(_type, old, new, version, stack_level=2): +def deprecated(_type, old, new, version, reason=None, stack_level=2): """a convenience function for deprecating classes, functions, arguments. Parameters @@ -25,25 +25,24 @@ version the version by which support for the old name will be discontinued + reason + why, and what choices users have stack_level as per warnings.warn """ - msg = "use %s %s instead of %s, support discontinued in version %s" % ( - _type, - new, - old, - version, + msg = ( + f"use {_type} {new} instead of {old}, support discontinued in version {version}" ) + if reason is not None: + msg = f"{msg}\n{reason}" - # DeprecationWarnings are ignored by default in python 2.7, so temporarily - # force them to be handled. with catch_warnings(): simplefilter("always") _warn(msg, DeprecationWarning, stack_level) -def discontinued(_type, name, version, stack_level=2): +def discontinued(_type, name, version, reason=None, stack_level=2): """convenience func to warn about discontinued attributes Parameters @@ -55,15 +54,17 @@ version the version by which support for the old name will be discontinued + reason + why, and what choices users have stack_level as per warnings.warn """ - msg = "%s %s is discontinued, support will be stopped in version %s" % ( - _type, - name, - version, + msg = ( + f"{_type} {name} is discontinued, support will be stopped in version {version}" ) + if reason is not None: + msg = f"{msg}\n{reason}" with catch_warnings(): simplefilter("always") diff -Nru python-cogent-2020.12.21a+dfsg/tests/benchmark_aligning.py python-cogent-2021.10.12a1+dfsg/tests/benchmark_aligning.py --- python-cogent-2020.12.21a+dfsg/tests/benchmark_aligning.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/benchmark_aligning.py 2021-10-12 00:17:34.000000000 +0000 @@ -9,10 +9,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/benchmark.py python-cogent-2021.10.12a1+dfsg/tests/benchmark.py --- python-cogent-2020.12.21a+dfsg/tests/benchmark.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/benchmark.py 2021-10-12 00:17:34.000000000 +0000 @@ -13,10 +13,10 @@ __author__ = "Peter Maxwell and Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -104,9 +104,9 @@ (pc, aln) = quiet(makePC, *args) speed2 = measure_evals_per_sec(pc, aln) if speed1 < speed2: - speed = "+%2.1f" % (speed2 / speed1) + speed = f"+{speed2 / speed1:2.1f}" else: - speed = "-%2.1f" % (speed1 / speed2) + speed = f"-{speed1 / speed2:2.1f}" if speed in ["+1.0", "-1.0"]: speed = "" return speed diff -Nru python-cogent-2020.12.21a+dfsg/tests/__init__.py python-cogent-2021.10.12a1+dfsg/tests/__init__.py --- python-cogent-2020.12.21a+dfsg/tests/__init__.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/__init__.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,12 +1,17 @@ #!/usr/bin/env python +import os +import pathlib + + +os.chdir(pathlib.Path(__file__).parent) sub_modules = ["test_draw", "test_phylo"] for sub_module in sub_modules: - exec("from %s import %s" % (__name__, sub_module)) + exec(f"from {__name__} import {sub_module}") __author__ = "" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Peter Maxwell", "Gavin Huttley", @@ -16,7 +21,7 @@ "Edward Lang", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_align/__init__.py python-cogent-2021.10.12a1+dfsg/tests/test_align/__init__.py --- python-cogent-2020.12.21a+dfsg/tests/test_align/__init__.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_align/__init__.py 2021-10-12 00:17:34.000000000 +0000 @@ -2,10 +2,10 @@ __all__ = ["test_align"] __author__ = "" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Jeremy Widmann", "Peter Maxwell", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Jeremy Widmann" __email__ = "jeremy.widmann@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_align/test_align.py python-cogent-2021.10.12a1+dfsg/tests/test_align/test_align.py --- python-cogent-2020.12.21a+dfsg/tests/test_align/test_align.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_align/test_align.py 2021-10-12 00:17:34.000000000 +0000 @@ -22,10 +22,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_app/test_align.py python-cogent-2021.10.12a1+dfsg/tests/test_app/test_align.py --- python-cogent-2020.12.21a+dfsg/tests/test_app/test_align.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_app/test_align.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,16 +1,33 @@ from unittest import TestCase, main -from cogent3 import DNA, get_moltype, make_tree, make_unaligned_seqs +from cogent3 import ( + DNA, + get_moltype, + make_aligned_seqs, + make_tree, + make_unaligned_seqs, +) from cogent3.align.align import make_generic_scoring_dict from cogent3.app import align as align_app +from cogent3.app.align import ( + _combined_refseq_gaps, + _gap_difference, + _gap_union, + _GapOffset, + _gaps_for_injection, + _merged_gaps, + pairwise_to_multiple, +) from cogent3.app.composable import NotCompleted +from cogent3.core.alignment import Aligned +from cogent3.core.location import gap_coords_to_map __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -48,9 +65,29 @@ ] +def make_pairwise(data, refseq_name, moltype="dna", array_align=False): + """returns series of refseq, [(n, pwise aln),..]. All alignments are to ref_seq""" + aln = make_aligned_seqs( + data, + array_align=array_align, + moltype=moltype, + ) + refseq = aln.get_seq(refseq_name) + pwise = [ + (n, aln.take_seqs([refseq_name, n]).omit_gap_pos()) + for n in aln.names + if n != refseq_name + ] + return refseq, pwise + + +def make_aligned(gaps_lengths, seq, name="seq1"): + seq = seq.moltype.make_seq(seq, name=name) + return Aligned(gap_coords_to_map(gaps_lengths, len(seq)), seq) + + class RefalignmentTests(TestCase): seqs = make_unaligned_seqs(_seqs, moltype=DNA) - treestring = "(Bandicoot:0.4,FlyingFox:0.05,(Rhesus:0.06," "Human:0.0):0.04);" def test_align_to_ref(self): """correctly aligns to a reference""" @@ -81,6 +118,210 @@ got = aligner(self.seqs) self.assertEqual(got.moltype.label, "dna") + def test_merged_gaps(self): + """correctly merges gaps""" + a = dict([(2, 3), (4, 9)]) + b = dict([(2, 6), (8, 5)]) + # omitting one just returns the other + self.assertIs(_merged_gaps(a, {}), a) + self.assertIs(_merged_gaps({}, b), b) + got = _merged_gaps(a, b) + self.assertEqual(got, [(2, 6), (4, 9), (8, 5)]) + + def test_aln_to_ref_known(self): + """correctly recapitulates known case""" + orig = make_aligned_seqs( + { + "Ref": "CAG---GAGAACAGAAACCCAT--TACTCACT", + "Qu1": "CAG---GAGAACAG---CCCGTGTTACTCACT", + "Qu2": "CAGCATGAGAACAGAAACCCGT--TA---ACT", + "Qu3": "CAGCATGAGAACAGAAACCCGT----CTCACT", + "Qu4": "CAGCATGAGAACAGAAACCCGTGTTACTCACT", + "Qu5": "CAG---GAGAACAG---CCCAT--TACTCACT", + "Qu6": "CAG---GA-AACAG---CCCAT--TACTCACT", + "Qu7": "CAG---GA--ACAGA--CCCGT--TA---ACT", + }, + moltype="dna", + ) + expect = orig.to_dict() + aligner = align_app.align_to_ref(ref_seq="Ref") + aln = aligner(orig.degap()) + self.assertEqual(aln.to_dict(), expect) + + def test_gap_union(self): + """correctly identifies the union of all gaps""" + # fails if not all sequences same + seq = DNA.make_seq("AACCCGTT") + all_gaps = dict([(0, 3), (2, 1), (5, 3), (6, 3)]) + final_seq = make_aligned(all_gaps, seq) + gap_sets = [ + dict([(5, 1), (6, 3)]), + dict([(2, 1), (5, 3)]), + dict([(2, 1), (5, 1), (6, 2)]), + dict([(0, 3)]), + ] + seqs = [make_aligned(gaps, seq) for gaps in gap_sets] + got = _gap_union(seqs) + self.assertEqual(got, dict(all_gaps)) + + # must all be Aligned instances + with self.assertRaises(TypeError): + _gap_union(seqs + ["GGGGGGGG"]) + + # must all have the same name + with self.assertRaises(ValueError): + _gap_union(seqs + [make_aligned({}, seq, name="blah")]) + + def test_gap_difference(self): + """correctly identifies the difference in gaps""" + seq = DNA.make_seq("AACCCGTT") + all_gaps = dict([(0, 3), (2, 1), (5, 3), (6, 3)]) + gap_sets = [ + dict([(5, 1), (6, 3)]), + dict([(2, 1), (5, 3)]), + dict([(2, 1), (5, 1), (6, 2)]), + dict([(0, 3)]), + ] + seqs = [make_aligned(gaps, seq) for gaps in gap_sets] + union = _gap_union(seqs) + expects = [ + [dict([(0, 3), (2, 1)]), dict([(5, 2)])], + [dict([(0, 3), (6, 3)]), {}], + [dict([(0, 3)]), dict([(5, 2), (6, 1)])], + [dict([(2, 1), (5, 3), (6, 3)]), {}], + ] + for seq, (plain, overlap) in zip(seqs, expects): + seq_gaps = dict(seq.map.get_gap_coordinates()) + got_plain, got_overlap = _gap_difference(seq_gaps, union) + self.assertEqual(got_plain, dict(plain)) + self.assertEqual(got_overlap, dict(overlap)) + + def test_merged_gaps(self): + """correctly handles gap values""" + a_gaps = {0: 2} + b_gaps = {2: 2} + self.assertEqual(_merged_gaps(a_gaps, {}), a_gaps) + self.assertEqual(_merged_gaps({}, b_gaps), b_gaps) + + def test_combined_refseq_gaps(self): + union = dict([(0, 3), (2, 1), (5, 3), (6, 3)]) + gap_sets = [ + [(5, 1), (6, 3)], + [(2, 1), (5, 3)], + [(2, 1), (5, 1), (6, 2)], + [(0, 3)], + ] + # for subset gaps, their alignment position is the + # offset + their position + their gap length + expects = [ + dict([(6, 2), (0, 3), (2, 1)]), + dict([(0, 3), (10, 3)]), + dict([(0, 3), (5 + 1 + 1, 2), (6 + 2 + 2, 1)]), + dict([(2 + 3, 1), (5 + 3, 3), (6 + 3, 3)]), + ] + for i, gap_set in enumerate(gap_sets): + got = _combined_refseq_gaps(dict(gap_set), union) + self.assertEqual(got, expects[i]) + + # if union gaps equals ref gaps + got = _combined_refseq_gaps({2: 2}, {2: 2}) + self.assertEqual(got, {}) + + def test_gaps_for_injection(self): + # for gaps before any otherseq gaps, alignment coord is otherseq coord + oseq_gaps = {2: 1, 6: 2} + rseq_gaps = {0: 3} + expect = {0: 3, 2: 1, 6: 2} + seqlen = 50 + got = _gaps_for_injection(oseq_gaps, rseq_gaps, seqlen) + self.assertEqual(got, expect) + # for gaps after otherseq gaps seq coord is align coord minus gap + # length totals + got = _gaps_for_injection(oseq_gaps, {4: 3}, seqlen) + expect = {2: 1, 3: 3, 6: 2} + self.assertEqual(got, expect) + got = _gaps_for_injection(oseq_gaps, {11: 3}, seqlen) + expect = {2: 1, 6: 2, 8: 3} + self.assertEqual(got, expect) + # gaps beyond sequence length added to end of sequence + got = _gaps_for_injection({2: 1, 6: 2}, {11: 3, 8: 3}, 7) + expect = {2: 1, 6: 2, 7: 6} + self.assertEqual(got, expect) + + def test_pairwise_to_multiple(self): + """the standalone function constructs a multiple alignment""" + expect = { + "Ref": "CAG---GAGAACAGAAACCCAT--TACTCACT", + "Qu1": "CAG---GAGAACAG---CCCGTGTTACTCACT", + "Qu2": "CAGCATGAGAACAGAAACCCGT--TA---ACT", + "Qu3": "CAGCATGAGAACAGAAACCCGT----CTCACT", + "Qu7": "CAG---GA--ACAGA--CCCGT--TA---ACT", + "Qu4": "CAGCATGAGAACAGAAACCCGTGTTACTCACT", + "Qu5": "CAG---GAGAACAG---CCCAT--TACTCACT", + "Qu6": "CAG---GA-AACAG---CCCAT--TACTCACT", + } + aln = make_aligned_seqs(expect, moltype="dna").omit_gap_pos() + expect = aln.to_dict() + for refseq_name in ["Qu3"]: + refseq, pwise = make_pairwise(expect, refseq_name) + got = pairwise_to_multiple(pwise, ref_seq=refseq, moltype=refseq.moltype) + self.assertEqual(len(got), len(aln)) + orig = dict(pwise) + _, pwise = make_pairwise(got.to_dict(), refseq_name) + got = dict(pwise) + # should be able to recover the original pairwise alignments + for key, value in got.items(): + self.assertEqual(value.to_dict(), orig[key].to_dict(), msg=refseq_name) + + with self.assertRaises(TypeError): + pairwise_to_multiple(pwise, "ACGG", DNA) + + def test_pairwise_to_multiple_2(self): + """correctly handle alignments with gaps beyond end of query""" + # cogent3.core.alignment.DataError: Not all sequences are the same length: + # max is 425, min is 419 + def make_pwise(data, ref_name): + result = [] + for n, seqs in data.items(): + result.append( + [n, make_aligned_seqs(data=seqs, moltype="dna", array_align=False)] + ) + ref_seq = result[0][1].get_seq(ref_name) + return result, ref_seq + + pwise = { + "Platypus": { + "Opossum": "-----------------GTGC------GAT-------------------------------CCAAAAACCTGTGTC--ACCGT--------GCC----CAGAGCCTCC----CTCAGGCCGCTCGGGGAG---TG-------GCCCCCCG--GC-GGAGGGCAGGGATGGGGAGT-AGGGGTGGCAGTC----GGAACTGGAAGAGCTT-TACAAACC---------GA--------------------GGCT-AGAGGGTC-TGCTTAC-------TTTTTACCTTGG------------GTTTG-CCAGGAGGTAG----------AGGATGA-----------------CTAC--ATCAAG----AGC------------TGGG-------------", + "Platypus": "CAGGATGACTACATCAAGAGCTGGGAAGATAACCAGCAAGGAGATGAAGCTCTGGACACTACCAAAGACCCCTGCCAGAACGTGAAGTGCAGCCGACACAAGGTCTGCATCGCTCAGGGCTACCAGAGAGCCATGTGTATCAGCCGCAAGAAGCTGGAGCACAGGATCAAGCAGCCAGCCCTGAAACTCCATGGAAACAGAGAGAGCTTCTGCAAGCCTTGTCACATGACCCAGCTGGCCTCTGTCTGCGGCTCGGACGGACACACTTACAGCTCCGTGTGCAAACTGGAGCAGCAGGCCTGTCTGACCAGCAAGCAGCTGACAGTCAAGTGTGAAGGCCAGTGCCCGTGCCCCACCGATCATGTTCCAGCCTCCACCGCTGATGGAAAACAAGAGACCT", + }, + "Wombat": { + "Opossum": "GTGCGATCCAAAAACCTGTGTCACCGTGCCCAGAGCCTCCCTCAGGCCGCTCGG-GGAGTGGCCCCCCGGCGGAGGGCAGGGATGGGGAGTAGGGGTGGCAGTCGGAACTGGAAGAGCTTTACAAACCGAGGCTAGAGGGTCTGCTTACTTTTTACCTTGG------GTTT--GC-CAGGA---GGT----AGAGGATGACTACATCAAGAGCTGGG---------------------------", + "Wombat": "--------CA----------TCACCGC-CCCTGCACC---------CGGCTCGGCGGAGGGGGATTCTAA-GGGGGTCAAGGATGGCGAG-ACCCCTGGCAATTTCA--TGGAGGA------CGAGCAATGGCT-----GTC-GTCCATCTCCCAGTATAGCGGCAAGATCAAGCACTGGAACCGCTTCCGAGACGATGACTACATCAAGAGCTGGGAGGACAGTCAGCAAGGAGATGAAGCGC", + }, + } + pwise, ref_seq = make_pwise(pwise, "Opossum") + aln = pairwise_to_multiple(pwise, ref_seq, ref_seq.moltype) + self.assertNotIsInstance(aln, NotCompleted) + + pwise = { + "Platypus": { + "Opossum": "-----------------GTGC------GAT-------------------------------CCAAAAACCTGTGTC", + "Platypus": "CAGGATGACTACATCAAGAGCTGGGAAGATAACCAGCAAGGAGATGAAGCTCTGGACACTACCAAAGACCCCTGCC", + }, + "Wombat": { + "Opossum": "GTGCGATCCAAAAACCTGTGTC", + "Wombat": "--------CA----------TC", + }, + } + pwise, ref_seq = make_pwise(pwise, "Opossum") + aln = pairwise_to_multiple(pwise, ref_seq, ref_seq.moltype) + self.assertNotIsInstance(aln, NotCompleted) + + +class ProgressiveAlignment(TestCase): + seqs = make_unaligned_seqs(_seqs, moltype=DNA) + treestring = "(Bandicoot:0.4,FlyingFox:0.05,(Rhesus:0.06," "Human:0.0):0.04);" + def test_progressive_align_protein_moltype(self): """tests guide_tree is None and moltype is protein""" from cogent3 import load_aligned_seqs @@ -204,5 +445,45 @@ self.assertEqual(len(aln), 14) +class GapOffsetTests(TestCase): + def test_empty(self): + """create an empty offset""" + goff = _GapOffset({}) + for i in range(4): + self.assertEqual(goff[i], 0) + + goff = _GapOffset({}, invert=True) + for i in range(4): + self.assertEqual(goff[i], 0) + + def test_repr_str(self): + """repr and str work""" + goff = _GapOffset({}, invert=True) + for func in (str, repr): + self.assertEqual(func(goff), "{}") + + def test_gap_offset(self): + goff = _GapOffset({1: 2, 3: 4}) + self.assertEqual(goff.min_pos, 1) + self.assertEqual(goff.max_pos, 3) + self.assertEqual(goff.total, 6) + self.assertEqual(goff[0], 0) + self.assertEqual(goff[1], 0) + self.assertEqual(goff[2], 2) + self.assertEqual(goff[3], 2) + self.assertEqual(goff[4], 6) + + def test_gap_offset_invert(self): + aln2seq = _GapOffset({2: 1, 5: 2, 7: 2}, invert=True) + self.assertEqual(aln2seq._store, {3: 1, 2: 0, 8: 3, 6: 1, 12: 5, 10: 3}) + self.assertEqual(aln2seq.max_pos, 12) + self.assertEqual(aln2seq.min_pos, 2) + self.assertEqual(aln2seq[11], 3) + seq2aln = _GapOffset({2: 1, 5: 2, 7: 2}) + for seq_pos in range(20): + aln_pos = seq_pos + seq2aln[seq_pos] + self.assertEqual(aln_pos - aln2seq[aln_pos], seq_pos) + + if __name__ == "__main__": main() diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_app/test_app_mpi.py python-cogent-2021.10.12a1+dfsg/tests/test_app/test_app_mpi.py --- python-cogent-2020.12.21a+dfsg/tests/test_app/test_app_mpi.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_app/test_app_mpi.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,3 +1,5 @@ +from pathlib import Path +from tempfile import TemporaryDirectory from unittest import TestCase, main, skipUnless from cogent3.app import align as align_app @@ -7,10 +9,10 @@ __author__ = "Sheng Han Moses Koh" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley", "Sheng Han Moses Koh"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -24,27 +26,29 @@ """writing with overwrite in MPI should reset db""" dstore = io_app.get_data_store("data", suffix="fasta") members = dstore.filtered(callback=lambda x: "brca1.fasta" not in x.split("/")) - reader = io_app.load_unaligned() - aligner = align_app.align_to_ref() - writer = write_db("delme.tinydb", create=True, if_exists="overwrite") - process = reader + aligner + writer - - r = process.apply_to( - members, - logger=False, - show_progress=False, - parallel=True, - par_kw=dict(use_mpi=True), - ) - - expect = [str(m) for m in process.data_store] - process.data_store.close() - - # now get read only and check what's in there - result = io_app.get_data_store("delme.tinydb") - got = [str(m) for m in result] + with TemporaryDirectory(dir=".") as dirname: + path = Path(dirname) / "delme.tinydb" + reader = io_app.load_unaligned() + aligner = align_app.align_to_ref() + writer = write_db(path, create=True, if_exists="overwrite") + process = reader + aligner + writer + + r = process.apply_to( + members, + logger=False, + show_progress=False, + parallel=True, + par_kw=dict(use_mpi=True), + ) + + expect = [str(m) for m in process.data_store] + process.data_store.close() + + # now get read only and check what's in there + result = io_app.get_data_store(path) + got = [str(m) for m in result] - assert got == expect + assert got == expect if __name__ == "__main__": diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_app/test_composable.py python-cogent-2021.10.12a1+dfsg/tests/test_app/test_composable.py --- python-cogent-2020.12.21a+dfsg/tests/test_app/test_composable.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_app/test_composable.py 2021-10-12 00:17:34.000000000 +0000 @@ -7,7 +7,13 @@ from cogent3.app import io as io_app from cogent3.app import sample as sample_app -from cogent3.app.composable import ComposableSeq, NotCompleted, user_function +from cogent3.app.composable import ( + SERIALISABLE_TYPE, + ComposableSeq, + NotCompleted, + appify, + user_function, +) from cogent3.app.sample import min_length, omit_degenerates from cogent3.app.translate import select_translatable from cogent3.app.tree import quick_tree @@ -15,10 +21,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -194,6 +200,7 @@ writer.data_store.close() dstore = io_app.get_data_store(outpath) self.assertEqual(len(dstore), num_records) + dstore.close() class TestNotCompletedResult(TestCase): @@ -362,6 +369,23 @@ self.assertEqual(got_1.to_dict(), {"a": "GCAA", "b": "GCTT"}) self.assertEqual(got_2, {("s1", "s2"): 2.0, ("s2", "s1"): 2.0}) + def test_appify(self): + """acts like a decorator should!""" + + @appify(SERIALISABLE_TYPE, SERIALISABLE_TYPE) + def slicer(val, index=2): + """my docstring""" + return val[:index] + + self.assertEqual(slicer.__doc__, "appify: my docstring") + self.assertEqual(slicer.__name__, "slicer") + app = slicer() + self.assertTrue(SERIALISABLE_TYPE in app._input_types) + self.assertTrue(SERIALISABLE_TYPE in app._output_types) + self.assertEqual(app(list(range(4))), [0, 1]) + app2 = slicer(index=3) + self.assertEqual(app2(list(range(4))), [0, 1, 2]) + def test_user_function_repr(self): u_function_1 = user_function(self.foo, "aligned", "aligned") u_function_2 = user_function(self.bar, "aligned", "pairwise_distances") @@ -381,6 +405,54 @@ self.assertEqual( str(u_function_2), "user_function(name='bar', module='test_composable')" ) + # added into a composable func + loader = io_app.load_aligned() + proc = loader + u_function_1 + got = str(proc) + self.assertTrue(got.startswith("load_aligned")) + + def test_user_function_with_args_kwargs(self): + """correctly handles definition with args, kwargs""" + from math import log + + def product(val, multiplier, take_log=False): + result = val * multiplier + if take_log: + result = log(result) + + return result + + # without defining any args, kwargs + ufunc = user_function( + product, + SERIALISABLE_TYPE, + SERIALISABLE_TYPE, + ) + self.assertEqual(ufunc(2, 2), 4) + self.assertEqual(ufunc(2, 2, take_log=True), log(4)) + + # defining default arg2 + ufunc = user_function( + product, + SERIALISABLE_TYPE, + SERIALISABLE_TYPE, + 2, + ) + self.assertEqual(ufunc(2), 4) + self.assertEqual(ufunc(2, take_log=True), log(4)) + + # defining default kwarg only + ufunc = user_function( + product, SERIALISABLE_TYPE, SERIALISABLE_TYPE, take_log=True + ) + self.assertEqual(ufunc(2, 2), log(4)) + self.assertEqual(ufunc(2, 2, take_log=False), 4) + + # defining default arg and kwarg + ufunc = user_function( + product, SERIALISABLE_TYPE, SERIALISABLE_TYPE, 2, take_log=True + ) + self.assertEqual(ufunc(2), log(4)) if __name__ == "__main__": diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_app/test_data_store.py python-cogent-2021.10.12a1+dfsg/tests/test_app/test_data_store.py --- python-cogent-2020.12.21a+dfsg/tests/test_app/test_data_store.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_app/test_data_store.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,14 +1,18 @@ import json import os +import pathlib import shutil import sys -import zipfile +from pathlib import Path from tempfile import TemporaryDirectory from unittest import TestCase, main, skipIf +from cogent3 import load_aligned_seqs from cogent3.app.data_store import ( + IGNORE, OVERWRITE, + RAISE, DataStoreMember, ReadOnlyDirectoryDataStore, ReadOnlyTinyDbDataStore, @@ -16,23 +20,22 @@ SingleReadDataStore, WritableDirectoryDataStore, WritableTinyDbDataStore, - WritableZippedDataStore, load_record_from_json, ) from cogent3.parse.fasta import MinimalFastaParser __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" -class DataStoreBaseTests: +class DataStoreBaseReadTests: basedir = "data" ReadClass = None WriteClass = None @@ -94,29 +97,6 @@ dstore = self.ReadClass(self.basedir, suffix=".fasta") self.assertEqual(len(dstore), len(dstore.members)) - def test_make_identifier(self): - """correctly construct an identifier for a new member""" - with TemporaryDirectory(dir=".") as dirname: - if dirname.startswith("." + os.sep): - dirname = dirname[2:] - - path = os.path.join(dirname, self.basedir) - base_path = path.replace(".zip", "") - dstore = self.WriteClass(path, suffix=".json", create=True) - name = "brca1.fasta" - got = dstore.make_absolute_identifier(name) - expect = os.path.join(base_path, name.replace("fasta", "json")) - self.assertEqual(got, expect) - - # now using a DataStoreMember - member = DataStoreMember( - os.path.join("blah" + os.sep + "blah", f"2-{name}"), None - ) - got = dstore.make_absolute_identifier(member) - expect = os.path.join(base_path, member.name.replace("fasta", "json")) - self.assertEqual(got, expect) - dstore.close() - def test_read(self): """correctly read content""" with open("data" + os.sep + "brca1.fasta") as infile: @@ -143,6 +123,42 @@ member = dstore.get_member(identifier) self.assertEqual(member.md5, md5) + def test_filter(self): + """filter method should return correctly matching members""" + dstore = self.ReadClass(self.basedir, suffix="*") + got = [m.name for m in dstore.filtered(callback=lambda x: "brca1" in str(x))] + self.assertTrue(len(set(got)), 2) + got = dstore.filtered(pattern="*brca1*") + expect = [ + path + for path in os.listdir(self.basedir.replace(".zip", "")) + if "brca1" in path + ] + self.assertEqual(len(got), len(expect)) + + def test_pickleable_roundtrip(self): + """pickling of data stores should be reversible""" + from pickle import dumps, loads + + dstore = self.ReadClass(self.basedir, suffix="*") + re_dstore = loads(dumps(dstore)) + got = re_dstore[0].read() + self.assertEqual(str(dstore), str(re_dstore)) + self.assertEqual(dstore[0].read(), re_dstore[0].read()) + + def test_pickleable_member_roundtrip(self): + """pickling of data store members should be reversible""" + from pickle import dumps, loads + + dstore = self.ReadClass(self.basedir, suffix="*") + re_member = loads(dumps(dstore[0])) + data = re_member.read() + self.assertTrue(len(data) > 0) + + +class DataStoreBaseWriteTests: + WriteClass = None + def test_write(self): """correctly write content""" with open("data" + os.sep + "brca1.fasta") as infile: @@ -157,6 +173,20 @@ self.assertEqual(got, expect) dstore.close() + def test_write_wout_suffix(self): + """appends suffix expected to records""" + with TemporaryDirectory(dir=".") as dirname: + dirname = Path(dirname) + path = dirname / f"{self.basedir}.tinydb" + dstore = self.WriteClass(path, suffix="fasta", create=True) + with self.assertRaises(ValueError): + dstore.write("1", str(dict(a=24, b="some text"))) + + dstore.write("1.fasta", str(dict(a=24, b="some text"))) + dstore.close() + dstore = self.ReadClass(path, suffix="fasta") + self.assertEqual(len(dstore), 1) + @skipIf(sys.platform.lower() != "darwin", "broken on linux") def test_md5_write(self): """tracks md5 sums of written data""" @@ -210,38 +240,6 @@ self.assertEqual(got_b, expect_b) dstore.close() - def test_filter(self): - """filter method should return correctly matching members""" - dstore = self.ReadClass(self.basedir, suffix="*") - got = [m.name for m in dstore.filtered(callback=lambda x: "brca1" in str(x))] - self.assertTrue(len(set(got)), 2) - got = dstore.filtered(pattern="*brca1*") - expect = [ - path - for path in os.listdir(self.basedir.replace(".zip", "")) - if "brca1" in path - ] - self.assertEqual(len(got), len(expect)) - - def test_pickleable_roundtrip(self): - """pickling of data stores should be reversible""" - from pickle import dumps, loads - - dstore = self.ReadClass(self.basedir, suffix="*") - re_dstore = loads(dumps(dstore)) - got = re_dstore[0].read() - self.assertEqual(str(dstore), str(re_dstore)) - self.assertEqual(dstore[0].read(), re_dstore[0].read()) - - def test_pickleable_member_roundtrip(self): - """pickling of data store members should be reversible""" - from pickle import dumps, loads - - dstore = self.ReadClass(self.basedir, suffix="*") - re_member = loads(dumps(dstore[0])) - data = re_member.read() - self.assertTrue(len(data) > 0) - def test_add_file(self): """correctly add an arbitrarily named file""" with open("data" + os.sep + "brca1.fasta") as infile: @@ -273,12 +271,62 @@ self.assertFalse(os.path.exists(log_path)) dstore.close() + def test_make_identifier(self): + """correctly construct an identifier for a new member""" + with TemporaryDirectory(dir=".") as dirname: + if dirname.startswith("." + os.sep): + dirname = dirname[2:] -class DirectoryDataStoreTests(TestCase, DataStoreBaseTests): + path = os.path.join(dirname, self.basedir) + base_path = path.replace(".zip", "") + dstore = self.WriteClass(path, suffix=".json", create=True) + name = "brca1.fasta" + got = dstore.make_absolute_identifier(name) + expect = os.path.join(base_path, name.replace("fasta", "json")) + self.assertEqual(got, expect) + + # now using a DataStoreMember + member = DataStoreMember( + os.path.join("blah" + os.sep + "blah", f"2-{name}"), None + ) + got = dstore.make_absolute_identifier(member) + expect = os.path.join(base_path, member.name.replace("fasta", "json")) + self.assertEqual(got, expect) + dstore.close() + + +class DirectoryDataStoreReadTests( + TestCase, DataStoreBaseReadTests, DataStoreBaseWriteTests +): basedir = "data" ReadClass = ReadOnlyDirectoryDataStore WriteClass = WritableDirectoryDataStore + def setUp(self): + dstore = ReadOnlyDirectoryDataStore("data", suffix="fasta") + data = {m.name: m.read() for m in dstore} + self.data = data + + def test_identifier_write_str_data(self): + """data must be string type""" + data = load_aligned_seqs("data/brca1_5.paml") + with TemporaryDirectory(dir=".") as dirname: + path = pathlib.Path(dirname) / "delme" + dstore = self.WriteClass( + path, suffix=".fasta", if_exists=OVERWRITE, create=True + ) + # fails with not string + with self.assertRaises(TypeError): + dstore.write(data.info.source, data) + + # even bytes + with self.assertRaises(TypeError): + dstore.write(f"{data.info.source}-2.fasta", str(data).encode("utf-8")) + + # but works if data is str + dstore.write(f"{data.info.source}-1.fasta", str(data)) + dstore.close() + def test_write_class_source_create_delete(self): with TemporaryDirectory(dir=".") as dirname: # tests the case when the directory has the file with the same suffix to self.suffix @@ -334,11 +382,79 @@ ) self.assertEqual(len(dstore), 0) + def test_data_store_creation(self): + """overwrite, raise, ignore conditions""" + + def create_data_store(path): + if path.exists(): + shutil.rmtree(path, ignore_errors=True) + + dstore = self.WriteClass(path, suffix=".json", create=True) + for k in self.data: + id_ = dstore.make_relative_identifier(k) + dstore.write(id_, self.data[k]) + + dstore.close() + dstore._members = [] + return dstore + + with TemporaryDirectory(dir=".") as dirname: + dirname = Path(dirname) + path = dirname / self.basedir + _ = create_data_store(path) + + # if_exists=OVERWRITE, correctly overwrite existing directory + # data_store + dstore = self.WriteClass( + path, suffix=".json", create=True, if_exists=OVERWRITE + ) + self.assertEqual(len(dstore), 0) + dstore.write("id.json", "some data") + self.assertEqual(len(dstore), 1) + self.assertTrue(path.exists()) + dstore.close() + + # if_exists=RAISE, correctly raises exception + created = create_data_store(path) + # created._members = [] + with self.assertRaises(FileExistsError): + self.WriteClass(path, suffix=".json", create=True, if_exists=RAISE) + + dstore = self.ReadClass(path, suffix=".json") + dstore._members = [] + self.assertEqual( + len(dstore), len(created), msg=f"got {dstore}, original is {created}" + ) -class ZippedDataStoreTests(TestCase, DataStoreBaseTests): + # if_exists=IGNORE, works + created = create_data_store(path) + # created._members = [] + dstore = self.WriteClass( + path, suffix=".json", create=True, if_exists=IGNORE + ) + self.assertEqual( + len(dstore), len(created), msg=f"got {dstore}, original is {created}" + ) + dstore.write("id.json", "some data") + self.assertEqual(len(dstore), len(created) + 1) + dstore.close() + + def test_data_store_creation2(self): + """handles create path argument""" + with TemporaryDirectory(dir=".") as dirname: + path = Path(dirname) / "subdir" + # raises FileNotFoundError when create is False and full path does + # not exist + with self.assertRaises(FileNotFoundError): + self.WriteClass(path, suffix=".json", create=False) + + # correctly creates tinydb when full path does not exist + _ = self.WriteClass(path, suffix=".json", create=True) + + +class ZippedDataStoreReadTests(TestCase, DataStoreBaseReadTests): basedir = "data.zip" ReadClass = ReadOnlyZippedDataStore - WriteClass = WritableZippedDataStore def setUp(self): basedir = self.basedir.split(".")[0] @@ -349,10 +465,6 @@ def tearDown(self): os.remove(self.basedir) - def test_write_no_parent(self): - """zipped data store handles archive with no parent dir""" - self.WriteClass("delme.zip", create=True, suffix="fa") - def test_store_suffix(self): """data store adds file suffix if not provided""" source = self.basedir.split(".")[0] @@ -360,83 +472,15 @@ self.assertEqual(dstore.source, self.basedir) self.assertTrue(len(dstore) > 1) - def test_write_class_source_create_delete(self): - with TemporaryDirectory(dir=".") as dirname: - path = os.path.join(dirname, "delme_dir") - os.mkdir(path) - - # tests the case when the ZippedDataStore only contains files with the same suffix as self.suffix - test_case1_zip = "delme1.zip" - with zipfile.ZipFile(os.path.join(path, test_case1_zip), "w") as myzip: - test_path = os.path.join(path, "dummyPrefix_.json") - with open(test_path, "w"): - pass - myzip.write(test_path) - dstore = self.WriteClass( - os.path.join(path, test_case1_zip), - suffix=".json", - if_exists=OVERWRITE, - create=True, - ) - self.assertEqual(len(dstore), 0) - - # tests the case when the ZippedDataStore contains both files with the same suffix as self.suffix and log files - test_case2_zip = "delme2.zip" - with zipfile.ZipFile(os.path.join(path, test_case2_zip), "w") as myzip: - test_path = os.path.join(path, "dummyPrefix_.json") - with open(test_path, "w"): - pass - myzip.write(test_path) - test_path = os.path.join(path, "dummyPrefix_.log") - with open(test_path, "w"): - pass - myzip.write(test_path) - dstore = self.WriteClass( - os.path.join(path, test_case2_zip), - suffix=".json", - if_exists=OVERWRITE, - create=True, - ) - self.assertEqual(len(dstore), 0) - - # tests the case when the ZippedDataStore contains files with the different suffixes to self.suffix - test_case3_zip = "delme3.zip" - with zipfile.ZipFile(os.path.join(path, test_case3_zip), "w") as myzip: - test_path = os.path.join(path, "dummyPrefix_.dummySuffix") - with open(test_path, "w"): - pass - myzip.write(test_path) - with self.assertRaises(RuntimeError): - dstore = self.WriteClass( - os.path.join(path, test_case3_zip), - suffix=".json", - if_exists=OVERWRITE, - create=True, - ) - - # tests the case when the ZippedDataStore contains only log files - test_case4_zip = "delme4.zip" - with zipfile.ZipFile(os.path.join(path, test_case4_zip), "w") as myzip: - test_path = os.path.join(path, "dummyPrefix_.log") - with open(test_path, "w"): - pass - myzip.write(test_path) - dstore = self.WriteClass( - os.path.join(path, test_case4_zip), - suffix=".json", - if_exists=OVERWRITE, - create=True, - ) - self.assertEqual(len(dstore), 0) - class TinyDBDataStoreTests(TestCase): basedir = "data" ReadClass = ReadOnlyTinyDbDataStore WriteClass = WritableTinyDbDataStore + suffix = ".json" def setUp(self): - dstore = ReadOnlyDirectoryDataStore(self.basedir, suffix="fasta") + dstore = ReadOnlyDirectoryDataStore("data", suffix="fasta") data = {m.name: m.read() for m in dstore} self.data = data @@ -645,29 +689,30 @@ def test_dblock(self): """locking/unlocking of db""" - from pathlib import Path from cogent3.app.data_store import _db_lockid keys = list(self.data) with TemporaryDirectory(dir=".") as dirname: path = os.path.join(dirname, self.basedir) + # creation automatically locks db to creating process id (pid) dstore = self.WriteClass(path, if_exists="overwrite") for k in keys: id_ = dstore.make_relative_identifier(k) dstore.write(id_, self.data[k]) self.assertTrue(dstore.locked) + + # unlocking dstore.unlock(force=True) - # now introduce an artificial lock, making sure lock flushed to disk + self.assertFalse(dstore.locked) + + # introduce an artificial lock, making sure lock flushed to disk dstore.db.insert({"identifier": "LOCK", "pid": 123}) dstore.db.storage.flush() self.assertTrue(dstore.locked) + # validate the PID of the lock self.assertEqual(_db_lockid(dstore.source), 123) - # now calling _source_create_delete with overwrite should have no - # effect - dstore._source_create_delete("overwrite", False) path = Path(dstore.source) - self.assertTrue(path.exists()) # unlocking with wrong pid has no effect dstore.unlock() self.assertTrue(dstore.locked) @@ -675,9 +720,90 @@ dstore.unlock(force=True) self.assertFalse(dstore.locked) dstore.close() - # and now a call to _source_create_delete will delete - dstore._source_create_delete("overwrite", False) - self.assertFalse(path.exists()) + + def test_db_creation(self): + """overwrite, raise, ignore conditions""" + + def create_tinydb(path, create, locked=False): + if path.exists(): + path.unlink() + + dstore = self.WriteClass(path, create=create) + for k in keys: + id_ = dstore.make_relative_identifier(k) + dstore.write(id_, self.data[k]) + + num_members = len(dstore) + + if locked: + dstore.db.insert({"identifier": "LOCK", "pid": 123}) + dstore.db.storage.flush() + dstore.db.storage.close() + else: + dstore.close() + + return num_members + + keys = list(self.data) + with TemporaryDirectory(dir=".") as dirname: + dirname = Path(dirname) + path = dirname / f"{self.basedir}.tinydb" + # correctly overwrite a tinydb irrespective of lock status + for locked in (False, True): + create_tinydb(path, create=True, locked=locked) + dstore = self.WriteClass(path, create=True, if_exists=OVERWRITE) + self.assertEqual(len(dstore), 0) + self.assertTrue(dstore.locked) + dstore.write("id.json", "some data") + dstore.close() + self.assertTrue(path.exists()) + + # correctly raises exception when RAISE irrespective of lock status + for locked in (False, True): + create_tinydb(path, create=True, locked=locked) + with self.assertRaises(FileExistsError): + self.WriteClass(path, create=True, if_exists=RAISE) + + # correctly warns if IGNORE, irrespective of lock status + for locked in (False, True): + num_members = create_tinydb(path, create=True, locked=locked) + dstore = self.WriteClass(path, create=True, if_exists=IGNORE) + self.assertEqual(len(dstore), num_members) + self.assertTrue(dstore.locked) + dstore.write("id.json", "some data") + self.assertEqual(len(dstore), num_members + 1) + dstore.close() + self.assertTrue(path.exists()) + + def test_db_creation2(self): + """handles create path argument""" + + with TemporaryDirectory(dir=".") as dirname: + dirname = Path(dirname) / "subdir" + path = dirname / f"{self.basedir}.tinydb" + + # raises FileNotFoundError when create is False and full path does + # not exist + with self.assertRaises(FileNotFoundError): + self.WriteClass(path, create=False) + + # correctly creates tinydb when full path does not exist + dstore = self.WriteClass(path, create=True) + dstore.close() + + def test_write_wout_suffix(self): + """appends suffix expected to records""" + with TemporaryDirectory(dir=".") as dirname: + dirname = Path(dirname) + path = dirname / f"{self.basedir}.tinydb" + dstore = self.WriteClass(path, create=True) + with self.assertRaises(ValueError): + dstore.write("1", dict(a=24, b="some text")) + + dstore.write("1.json", dict(a=24, b="some text")) + dstore.close() + dstore = self.ReadClass(path) + self.assertEqual(len(dstore), 1) dstore.close() diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_app/test_dist.py python-cogent-2021.10.12a1+dfsg/tests/test_app/test_dist.py --- python-cogent-2020.12.21a+dfsg/tests/test_app/test_dist.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_app/test_dist.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,5 +1,6 @@ import os +from tempfile import TemporaryDirectory from unittest import TestCase, main from numpy.testing import assert_allclose @@ -12,10 +13,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley", "Stephen Ma"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -205,6 +206,27 @@ aligner = align.progressive_align(model="WG01", guide_tree=treestring) _ = aligner(self.seqs5) + def test_composes_with_write_tabular(self): + """correctly links to tabular""" + with TemporaryDirectory(dir=".") as dirname: + writer = io.write_tabular(dirname) + dist_calc = dist_app.fast_slow_dist(distance="hamming", moltype="protein") + _ = dist_calc + writer + + def test_functions_as_composable(self): + """works as a composable app""" + from pathlib import Path + + loader = io.load_aligned(moltype="dna", format="paml") + dist = dist_app.fast_slow_dist("hamming", moltype="dna") + with TemporaryDirectory(dir=".") as dirname: + dirname = Path(dirname) + writer = io.write_tabular(dirname) + proc = loader + dist + writer + _ = proc("data/brca1_5.250.paml") + output = dirname / "brca1_5.250.tsv" + self.assertTrue(output.exists()) + if __name__ == "__main__": main() diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_app/test_evo.py python-cogent-2021.10.12a1+dfsg/tests/test_app/test_evo.py --- python-cogent-2020.12.21a+dfsg/tests/test_app/test_evo.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_app/test_evo.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,4 +1,5 @@ from os.path import dirname, join +from tempfile import TemporaryDirectory from unittest import TestCase, main from unittest.mock import MagicMock @@ -6,16 +7,22 @@ from cogent3 import load_aligned_seqs, make_aligned_seqs, make_tree from cogent3.app import evo as evo_app -from cogent3.app.result import hypothesis_result +from cogent3.app import io +from cogent3.app.composable import NotCompleted +from cogent3.app.result import ( + hypothesis_result, + model_collection_result, + model_result, +) from cogent3.evolve.models import get_model from cogent3.util.deserialise import deserialise_object __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -29,16 +36,17 @@ def test_model_str(self): """correct str representation""" model = evo_app.model("HKY85", time_het="max") - got = str(model) + got = " ".join(str(model).splitlines()) + expect = ( + "model(type='model', sm='HKY85', tree=None, unique_trees=False, " + "name=None, sm_args=None, lf_args=None, " + "time_het='max', param_rules=None, " + "opt_args=None, split_codons=False, " + "show_progress=False, verbose=False)" + ) self.assertEqual( got, - ( - "model(type='model', sm='HKY85', tree=None, " - "name=None, sm_args=None,\nlf_args=None, " - "time_het='max', param_rules=None, " - "opt_args=None,\nsplit_codons=False, " - "show_progress=False, verbose=False)" - ), + expect, ) def test_model_tree(self): @@ -56,6 +64,81 @@ with self.assertRaises(ValueError): hyp = evo_app.hypothesis(model1, model2) + def test_hyp_init(self): + """uses user specified init_alt function, or not""" + opt_args = dict(max_evaluations=25, limit_action="ignore") + model1 = evo_app.model("F81", opt_args=opt_args) + model2 = evo_app.model("HKY85", opt_args=opt_args) + # defaults to using null for init + hyp = evo_app.hypothesis(model1, model2) + _data = { + "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", + "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", + "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", + } + aln = make_aligned_seqs(data=_data, moltype="dna") + result = hyp(aln) + self.assertEqual(result.df, 1) + + # user specified function + hyp = evo_app.hypothesis(model1, model2, init_alt=lambda x, y: x) + result = hyp(aln) + self.assertEqual(result.df, 1) + + def test_hyp_init_sequential(self): + """uses preceding model to initialise function""" + opt_args = dict(max_evaluations=15, limit_action="ignore") + model1 = evo_app.model("F81", opt_args=opt_args) + model2 = evo_app.model("HKY85", opt_args=opt_args) + model3 = evo_app.model("GTR", opt_args=opt_args) + # defaults to initialise model3 from model 2 from model1 + hyp = evo_app.hypothesis(model1, model2, model3, sequential=True) + _data = { + "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", + "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", + "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", + } + aln = make_aligned_seqs(data=_data, moltype="dna") + result = hyp(aln) + self.assertTrue( + result["F81"].lf.lnL < result["HKY85"].lf.lnL < result["GTR"].lf.lnL + ) + + # can be set to False, in which case all models start at defaults + hyp = evo_app.hypothesis(model1, model2, model3, sequential=False) + result = hyp(aln) + self.assertFalse( + result["F81"].lf.lnL < result["HKY85"].lf.lnL < result["GTR"].lf.lnL + ) + + def test_model_collection_init_sequential(self): + """modelc collection uses preceding model to initialise function""" + opt_args = dict(max_evaluations=15, limit_action="ignore") + model1 = evo_app.model("F81", opt_args=opt_args) + model2 = evo_app.model("HKY85", opt_args=opt_args) + model3 = evo_app.model("GTR", opt_args=opt_args) + # defaults to initialise model3 from model 2 from model1 + mod_coll = evo_app.model_collection(model1, model2, model3, sequential=True) + _data = { + "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", + "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", + "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", + } + aln = make_aligned_seqs(data=_data, moltype="dna") + result = mod_coll(aln) + self.assertTrue( + result["F81"].lf.lnL < result["HKY85"].lf.lnL < result["GTR"].lf.lnL + ) + + # can be set to False, in which case all models start at defaults + mod_coll = evo_app.hypothesis(model1, model2, model3, sequential=False) + result = mod_coll(aln) + self.assertFalse( + result["F81"].lf.lnL < result["HKY85"].lf.lnL < result["GTR"].lf.lnL + ) + + self.assertIsInstance(result, model_collection_result) + def test_model_time_het(self): """support lf time-het argument edge_sets""" _data = { @@ -146,14 +229,14 @@ model1 = evo_app.model("HKY85") model2 = evo_app.model("HKY85", name="hky85-max-het", time_het="max") hyp = evo_app.hypothesis(model1, model2) - got = str(hyp) + got = " ".join(str(hyp).splitlines()) expect = ( "hypothesis(type='hypothesis', null='HKY85', " - "alternates=(model(type='model',\nsm='HKY85', tree=None, " - "name='hky85-max-het', sm_args=None, lf_args=None,\n" + "alternates=(model(type='model', sm='HKY85', tree=None, unique_trees=False, " + "name='hky85-max-het', sm_args=None, lf_args=None, " "time_het='max', param_rules=None, opt_args=None," - " split_codons=False,\nshow_progress=False, verbose=False),)," - " init_alt=None)" + " split_codons=False, show_progress=False, verbose=False),)," + " sequential=True, init_alt=None)" ) self.assertEqual(got, expect) @@ -261,6 +344,42 @@ got = result.total_length(length_as="ENS") assert_allclose(got, expect) + def test_model_tree_unique_trees(self): + """handles case of using unique trees for each alignment""" + with self.assertRaises(AssertionError): + model1 = evo_app.model("GN", tree="(a,b,c)", unique_trees=True) + _data1 = { + "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", + "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", + "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", + } + _data2 = { + "Dog": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", + "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", + "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", + } + + aln1 = make_aligned_seqs(data=_data1, moltype="dna") + aln2 = make_aligned_seqs(data=_data2, moltype="dna") + model = evo_app.model( + "GN", + unique_trees=True, + opt_args=dict(max_evaluations=2, limit_action="ignore"), + ) + for aln in (aln1, aln2): + result = model(aln) + self.assertIsInstance(result, model_result) + + # but the second one fails if unique_trees=False + model = evo_app.model( + "GN", + unique_trees=False, + opt_args=dict(max_evaluations=2, limit_action="ignore"), + ) + for aln, expect_type in ((aln1, model_result), (aln2, NotCompleted)): + result = model(aln) + self.assertIsInstance(result, expect_type) + def _make_getter(val): def call(**kwargs): @@ -270,24 +389,25 @@ def _make_hyp(aic1, aic2, aic3, nfp1, nfp2, nfp3): - null = MagicMock() - null.name = "unrooted" - null.lf.get_aic = _make_getter(aic1) - null.nfp = nfp1 - alt1 = MagicMock() - alt1.name = "alt1" - alt1.lf.get_aic = _make_getter(aic2) - alt1.nfp = nfp2 - alt2 = MagicMock() - alt2.name = "alt2" - alt2.lf.get_aic = _make_getter(aic3) - alt2.nfp = nfp3 + null = _make_mock_result("unrooted", aic1, nfp1) + alt1 = _make_mock_result("alt1", aic2, nfp2) + alt2 = _make_mock_result("alt2", aic3, nfp3) + # this is a really ugly hack to address type validation on result setitem! + hypothesis_result._item_types = ("model_result", "MagicMock") hyp = hypothesis_result("unrooted", source="something") for m in (null, alt1, alt2): hyp[m.name] = m return hyp +def _make_mock_result(arg0, arg1, arg2): + result = MagicMock() + result.name = arg0 + result.lf.get_aic = _make_getter(arg1) + result.nfp = arg2 + return result + + class TestHypothesisResult(TestCase): def test_get_best_model(self): """should correctly identify the best model""" @@ -378,7 +498,6 @@ tree = "(Mouse,Human,Opossum)" m1 = evo_app.model("JTT92", tree=tree) r = m1(aln) - print(r) self.assertEqual(r.origin, "model") @@ -683,6 +802,15 @@ result = strapper(aln) self.assertIsInstance(result, evo_app.bootstrap_result) + def test_bootstrap_composability(self): + """can be composed with load_db and write_db""" + m1 = evo_app.model("F81") + m2 = evo_app.model("HKY85") + hyp = evo_app.hypothesis(m1, m2) + with TemporaryDirectory(dir=".") as dirname: + path = join(dirname, "delme.tinydb") + _ = io.load_db() + evo_app.bootstrap(hyp, num_reps=2) + io.write_db(path) + if __name__ == "__main__": main() diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_app/test_init.py python-cogent-2021.10.12a1+dfsg/tests/test_app/test_init.py --- python-cogent-2020.12.21a+dfsg/tests/test_app/test_init.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_app/test_init.py 2021-10-12 00:17:34.000000000 +0000 @@ -9,10 +9,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -43,6 +43,7 @@ sample.omit_duplicated(), sample.take_codon_positions(1), sample.take_named_seqs(), + sample.take_n_seqs(2), sample.trim_stop_codons(gc=1), translate.select_translatable(), tree.quick_tree(), @@ -52,7 +53,7 @@ return applications -class TestAvalableApps(TestCase): +class TestAvailableApps(TestCase): def test_available_apps(self): """available_apps returns a table""" from cogent3.util.table import Table diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_app/test_io.py python-cogent-2021.10.12a1+dfsg/tests/test_app/test_io.py --- python-cogent-2020.12.21a+dfsg/tests/test_app/test_io.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_app/test_io.py 2021-10-12 00:17:34.000000000 +0000 @@ -4,10 +4,9 @@ import shutil import zipfile -from os.path import basename, join +from os.path import join from tempfile import TemporaryDirectory from unittest import TestCase, main -from unittest.mock import Mock, patch import numpy @@ -17,25 +16,33 @@ from cogent3.app import align as align_app from cogent3.app import io as io_app from cogent3.app.composable import NotCompleted -from cogent3.app.data_store import WritableZippedDataStore from cogent3.app.io import write_db +from cogent3.app.result import generic_result from cogent3.core.alignment import ArrayAlignment, SequenceCollection from cogent3.core.profile import PSSM, MotifCountsArray, MotifFreqsArray from cogent3.evolve.fast_distance import DistanceMatrix from cogent3.maths.util import safe_log from cogent3.util.table import Table +from cogent3.util.union_dict import UnionDict __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" +def _get_generic_result(source): + """creates a generic result with a DNA moltype as the single value""" + gr = generic_result(source=source) + gr["dna"] = DNA + return gr + + class TestIo(TestCase): basedir = "data" @@ -114,6 +121,14 @@ for result in results: self.assertIsInstance(result, ArrayAlignment) + def test_load_aligned_paml(self): + """should handle paml too""" + paml_paths = io_app.get_data_store(self.basedir, suffix="paml") + loader = io_app.load_aligned(format="paml") + results = [loader(m) for m in paml_paths] + for result in results: + self.assertIsInstance(result, ArrayAlignment) + def test_load_aligned_from_zip(self): """correctly loads aligned seqs from a zip archive""" @@ -199,17 +214,16 @@ with TemporaryDirectory(dir=".") as dirname: outpath = join(dirname, "delme") writer = write_db(outpath, create=True, if_exists="ignore") - mock = patch("data.source", autospec=True) - mock.to_json = DNA.to_json - mock.source = join("blah", "delme.json") - got = writer(mock) + gr = _get_generic_result(join("blah", "delme.json")) + got = writer(gr) writer.data_store.db.close() dstore = io_app.get_data_store(f"{outpath}.tinydb", suffix="json") reader = io_app.load_db() got = reader(dstore[0]) dstore.close() - self.assertIsInstance(got, DNA.__class__) - self.assertEqual(got, DNA) + got.deserialised_values() + self.assertIsInstance(got["dna"], DNA.__class__) + self.assertEqual(got["dna"], DNA) def test_write_db_load_db2(self): """correctly write/load built-in python from tinydb""" @@ -225,6 +239,16 @@ dstore.close() self.assertEqual(got, data) + def test_write_db_invalid(self): + """value error if identifier does not match data.info.source""" + with TemporaryDirectory(dir=".") as dirname: + outpath = join(dirname, "delme") + writer = write_db(outpath, create=True, if_exists="ignore") + data = UnionDict(a=[1, 2], b="string", source="delme2.json") + got = writer(data, identifier=join("blah", "delme.json")) + self.assertTrue("ValueError" in got.message) + writer.data_store.db.close() + def test_load_db_failure_json_file(self): """informative load_db error message when given a json file path""" # todo this test has a trapped exception about being unable to delete @@ -232,10 +256,8 @@ with TemporaryDirectory(dir=".") as dirname: outpath = join(dirname, "delme") writer = write_db(outpath, create=True, if_exists="ignore") - mock = patch("data.source", autospec=True) - mock.to_json = DNA.to_json - mock.source = join("blah", "delme.json") - got = writer(mock) + gr = _get_generic_result(join("blah", "delme.json")) + got = writer(gr) writer.data_store.db.close() dstore = io_app.get_data_store(f"{outpath}.tinydb", suffix="json") reader = io_app.load_db() @@ -268,14 +290,6 @@ result = load_table(outpath) self.assertIsInstance(result, NotCompleted) - with TemporaryDirectory(dir=".") as dirname: - outpath = join(dirname, "delme.zip") - dstore = WritableZippedDataStore(outpath, suffix="tsv", create=True) - dstore.write("sample1.tsv", table.to_string("tsv")) - new = load_table(dstore[0]) - self.assertEqual(type(new[0, "B"]), type(table[0, "B"])) - self.assertEqual(type(new[0, "A"]), type(table[0, "A"])) - def test_write_tabular_motif_counts_array(self): """correctly writes tabular data for MotifCountsArray""" @@ -456,34 +470,19 @@ """correctly writes an object with info attribute from json""" # create a mock object that pretends like it's been derived from # something - from cogent3.util.union_dict import UnionDict + from cogent3.app.result import generic_result with TemporaryDirectory(dir=".") as dirname: outdir = join(dirname, "delme") - mock = Mock() - mock.to_rich_dict = DNA.to_rich_dict - mock.info = UnionDict(source=join("blah", "delme.json")) - writer = io_app.write_json(outdir, create=True) - _ = writer(mock) - reader = io_app.load_json() - got = reader(join(outdir, "delme.json")) - self.assertEqual(got, DNA) - # now with a zipped archive - with TemporaryDirectory(dir=".") as dirname: - outdir = join(dirname, "delme.zip") - mock = Mock() - mock.to_rich_dict = DNA.to_rich_dict - mock.info = UnionDict(source=join("blah", "delme.json")) + obj = generic_result(source=join("blah", "delme.json")) + obj["dna"] = DNA writer = io_app.write_json(outdir, create=True) - identifier = writer(mock) + _ = writer(obj) reader = io_app.load_json() - got = reader(writer.data_store[0]) - self.assertEqual(got, DNA) - expect = join(outdir.replace(".zip", ""), "delme.json") - if expect.startswith("." + os.sep): - expect = expect[2:] - self.assertEqual(identifier, expect) + got = reader(join(outdir, "delme.json")) + got.deserialised_values() + self.assertEqual(got["dna"], DNA) def test_write_json_no_info(self): """correctly writes an object with out an info attribute from json""" @@ -491,31 +490,13 @@ # something with TemporaryDirectory(dir=".") as dirname: outdir = join(dirname, "delme") - mock = patch("data.source", autospec=True) - mock.to_rich_dict = DNA.to_rich_dict - mock.source = join("blah", "delme.json") + gr = _get_generic_result(join("blah", "delme.json")) writer = io_app.write_json(outdir, create=True) - _ = writer(mock) + _ = writer(gr) reader = io_app.load_json() got = reader(writer.data_store[0]) - self.assertEqual(got, DNA) - - # now with a zipped archive - with TemporaryDirectory(dir=".") as dirname: - outdir = join(dirname, "delme.zip") - mock = patch("data.source", autospec=True) - mock.to_rich_dict = DNA.to_rich_dict - mock.source = join("blah", "delme.json") - writer = io_app.write_json(outdir, create=True) - identifier = writer(mock) - reader = io_app.load_json() - # checking loadable from a data store member too - got = reader(writer.data_store[0]) - self.assertEqual(got, DNA) - expect = join(outdir.replace(".zip", ""), "delme.json") - if expect.startswith("." + os.sep): - expect = expect[2:] - self.assertEqual(identifier, expect) + got.deserialised_values() + self.assertEqual(got["dna"], DNA) def test_restricted_usage_of_tinydb_suffix(self): """can only use tinydb in a load_db, write_db context""" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_app/test_result.py python-cogent-2021.10.12a1+dfsg/tests/test_app/test_result.py --- python-cogent-2020.12.21a+dfsg/tests/test_app/test_result.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_app/test_result.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,24 +1,28 @@ from unittest import TestCase, main -from cogent3 import make_aligned_seqs +from cogent3 import make_aligned_seqs, make_table from cogent3.app import evo as evo_app from cogent3.app.result import ( generic_result, + hypothesis_result, model_collection_result, model_result, + tabular_result, ) from cogent3.util.deserialise import deserialise_object __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" +from cogent3.util.dict_array import DictArray + class TestGenericResult(TestCase): def test_deserialised_values(self): @@ -62,8 +66,32 @@ keys = result.keys() self.assertEqual(keys, ["key"]) + def test_invalid_setitem(self): + """generic_result raise TypeError if trying to set invalid item type for json""" + gr = generic_result("null") + with self.assertRaises(TypeError): + gr["null"] = {0, 23} + class TestModelResult(TestCase): + def test_repr(self): + """does not fail""" + _data = { + "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", + "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", + "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", + } + aln = make_aligned_seqs(data=_data, moltype="dna") + mod = evo_app.model( + "F81", + show_progress=False, + opt_args=dict(max_evaluations=1, limit_action="ignore"), + ) + result = mod(aln) + self.assertIsInstance(repr(result), str) + # no values set + self.assertIsInstance(repr(model_result(source="blah")), str) + def test_model_result_alignment(self): """returns alignment from lf""" _data = { @@ -97,7 +125,6 @@ ) result = mod(aln) self.assertEqual(result.name, result.lf.name) - print(result) def test_model_result_alignment_split_pos_model(self): """returns alignment from lf with split codon positions""" @@ -153,9 +180,7 @@ result = mod(aln) self.assertTrue(len(result.tree), 3) # check the trees are different by summing lengths - lengths = set() - for i, t in result.tree.items(): - lengths.add(t.total_length()) + lengths = {t.total_length() for _, t in result.tree.items()} self.assertTrue(len(lengths) > 1) def test_model_result_simulate_alignment(self): @@ -214,6 +239,17 @@ with self.assertRaises(TypeError): r["name"] = aln + def test_repr_str(self): + """it works even when no values""" + mr = model_result(source="blah") + self.assertIsInstance(repr(mr), str) + + def test_model_result_invalid_setitem(self): + """model_result raise TypeError if trying to set incorrect item type""" + mr = model_result() + with self.assertRaises(TypeError): + mr["null"] = 23 + class TestModelCollectionResult(TestCase): _model_results = {} @@ -235,10 +271,15 @@ model2 = evo_app.model( "HKY85", opt_args=dict(max_evaluations=25, limit_action="ignore") ) + model3 = evo_app.model( + "GTR", opt_args=dict(max_evaluations=25, limit_action="ignore") + ) mr1 = model1(aln) mr2 = model2(aln) + mr3 = model3(aln) self._model_results[mr1.name] = mr1 self._model_results[mr2.name] = mr2 + self._model_results[mr3.name] = mr3 def test_get_best_model(self): """should correctly identify the best model""" @@ -286,8 +327,32 @@ m = got.select_models() self.assertIsInstance(m[0], model_result) + def test_to_hypothesis(self): + """creates a hypothesis_result from two model results""" + mr = model_collection_result(source="blah") + mr.update(self._model_results) + hyp = mr.get_hypothesis_result("F81", "HKY85") + self.assertIsInstance(hyp, hypothesis_result) + self.assertEqual(hyp.null.name, "F81") + + def test_repr_str(self): + """it works even when no values""" + mr = model_collection_result(source="blah") + self.assertIsInstance(repr(mr), str) + + def test_model_collection_result_invalid_setitem(self): + """model_collection_result raise TypeError if trying to set incorrect item type""" + mcr = model_collection_result() + with self.assertRaises(TypeError): + mcr["null"] = 23 + class TestHypothesisResult(TestCase): + def test_repr_str(self): + """it works even when no values""" + hr = hypothesis_result(name_of_null="null", source="blah") + self.assertIsInstance(repr(hr), str) + def test_pvalue(self): """hypothesis test p-value property""" _data = { @@ -306,6 +371,29 @@ result = hyp(aln) self.assertTrue(0 <= result.pvalue <= 1) + def test_invalid_setitem(self): + """hypothesis_result raise TypeError if trying to set incorrect item type""" + hr = hypothesis_result("null") + with self.assertRaises(TypeError): + hr["null"] = {0, 23} + + +class TestTabularResult(TestCase): + def test_valid_setitem(self): + """tabular_result works when set correct item type""" + tr = tabular_result("null") + tr["result"] = make_table(data={"A": [0, 1]}) + darr = DictArray({"A": [0, 1]}) + tr["result2"] = darr + js = tr.to_json() + self.assertIsInstance(js, str) + + def test_invalid_setitem(self): + """tabular_result raise TypeError if trying to set incorrect item type""" + tr = tabular_result("null") + with self.assertRaises(TypeError): + tr["null"] = {0, 23} + if __name__ == "__main__": main() diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_app/test_sample.py python-cogent-2021.10.12a1+dfsg/tests/test_app/test_sample.py --- python-cogent-2020.12.21a+dfsg/tests/test_app/test_sample.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_app/test_sample.py 2021-10-12 00:17:34.000000000 +0000 @@ -7,10 +7,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -415,6 +415,42 @@ }, ) + def test_concat_handles_moltype(self): + """coerces to type""" + alns = [ + make_aligned_seqs(data=d, moltype=DNA) + for d in [ + {"seq1": "AAA", "seq2": "AAA", "seq3": "AAA"}, + {"seq1": "TTT", "seq2": "TTT", "seq3": "TTT", "seq4": "TTT"}, + {"seq1": "CC", "seq2": "CC", "seq3": "CC"}, + ] + ] + ccat = sample.concat() + got = ccat(alns) + self.assertIsInstance(got.moltype, type(DNA)) + + def test_concat_validates_type(self): + """raises TypeError if not known alignment type""" + data = [ + {"seq1": "AAA", "seq2": "AAA", "seq3": "AAA"}, + make_aligned_seqs( + data={"seq1": "TTT", "seq2": "TTT", "seq3": "TTT", "seq4": "TTT"}, + moltype=DNA, + ), + ] + ccat = sample.concat() + # triggered by first record + with self.assertRaises(TypeError): + ccat(data) + + # triggered by second record + with self.assertRaises(TypeError): + ccat(data[::-1]) + + # triggered by no data + with self.assertRaises(ValueError): + ccat([]) + def test_trim_stop_codons(self): """trims stop codons using the specified genetic code""" trimmer = sample.trim_stop_codons() # defaults to standard code @@ -449,6 +485,70 @@ expect = {"seq1": "AAATTTCCC", "seq2": "AAATTT"} self.assertEqual(got.to_dict(), expect) + def test_take_n_seqs(self): + """select specified number of sequences from a collection""" + seqs1 = make_unaligned_seqs( + data={ + "a": "ACGT", + "b": "ACG-", + "c": "ACGN", + "d": "ACGG", + "e": "ACGG", + "k": "ACGG", + "f": "RAAA", + "g": "YAAA", + "h": "GGGG", + } + ) + seqs2 = seqs1.take_seqs(["a", "c", "e", "g", "h"]) + + # by order, fixed + take = sample.take_n_seqs(3, fixed_choice=True) + got = take(seqs1) + self.assertEqual(len(got.names), 3) + # this should return NotCompleted because it applies the names present in 1 to the next one + got = take(seqs2) + self.assertIsInstance(got, NotCompleted) + + take = sample.take_n_seqs(30) + # this should fail because too few seqs + got = take(seqs1) + self.assertIsInstance(got, NotCompleted) + + # by order, not fixed + take = sample.take_n_seqs(3, fixed_choice=False) + got1 = take(seqs1) + got2 = take(seqs2) + self.assertNotEqual(set(got1.names), set(got2.names)) + + # random choice, fixed + take = sample.take_n_seqs(3, random=True, fixed_choice=True) + self.assertEqual(take._fixed_choice, True) + + got1 = take(seqs2) + got2 = take(seqs1) + self.assertEqual(got1.names, got2.names) + + # random choice, not fixed + take = sample.take_n_seqs(2, random=True, fixed_choice=False) + self.assertEqual(take._fixed_choice, False) + # testing this is hard, we simply expect the labels to differ on subsequent call + # the probability of drawing a specific pair of names on one call is 1/(9 choose 2) = 1/36 + # at n = 11, the probability all the pairs will be identical is ~=0 + first_call = take(seqs1) + for _ in range(11): + got = take(seqs1) + different = first_call.names != got.names + if different: + break + + self.assertTrue(different, msg="failed to generate different random sample") + + # try setting the seed + take = sample.take_n_seqs(2, random=True, seed=123) + got = take(seqs1) + self.assertNotIsInstance(got, NotCompleted) + if __name__ == "__main__": main() diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_app/test_translate.py python-cogent-2021.10.12a1+dfsg/tests/test_app/test_translate.py --- python-cogent-2020.12.21a+dfsg/tests/test_app/test_translate.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_app/test_translate.py 2021-10-12 00:17:34.000000000 +0000 @@ -13,10 +13,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_app/test_tree.py python-cogent-2021.10.12a1+dfsg/tests/test_app/test_tree.py --- python-cogent-2020.12.21a+dfsg/tests/test_app/test_tree.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_app/test_tree.py 2021-10-12 00:17:34.000000000 +0000 @@ -20,10 +20,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -106,7 +106,6 @@ # tests when distances contain None data = dict( seq1="AGGGGGGGGGGCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGCGGTTTTTTTTTTTTTTTTTT", - seq2="TAAAAAAAAAAGGGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCC", ) aln2 = make_aligned_seqs(data=data, moltype=DNA) tree2 = proc(aln2) @@ -129,7 +128,7 @@ self.assertIsInstance(tree, PhyloNode) self.assertIsNotNone(tree.children) self.assertEqual( - set(tree.get_tip_names()), set.union(*(set(tup) for tup in data.keys())) + set(tree.get_tip_names()), set.union(*(set(tup) for tup in data)) ) data = { @@ -159,7 +158,7 @@ self.assertIsInstance(tree, PhyloNode) self.assertIsNotNone(tree.children) self.assertEqual( - set(tree.get_tip_names()), set.union(*(set(tup) for tup in data.keys())) + set(tree.get_tip_names()), set.union(*(set(tup) for tup in data)) ) data = { @@ -181,7 +180,7 @@ self.assertIsInstance(tree, PhyloNode) self.assertIsNotNone(tree.children) self.assertEqual( - set(tree.get_tip_names()), set.union(*(set(tup) for tup in data.keys())) + set(tree.get_tip_names()), set.union(*(set(tup) for tup in data)) ) data = { @@ -234,7 +233,14 @@ self.assertIsInstance(tree, PhyloNode) self.assertIsNotNone(tree.children) self.assertEqual( - set(tree.get_tip_names()), set.union(*(set(tup) for tup in data.keys())) + set(tree.get_tip_names()), set.union(*(set(tup) for tup in data)) + ) + + data = {"a": {"b": 0.1, "a": 0.0}, "b": {"a": 0.1, "b": 0.0}} + darr = DistanceMatrix(data) + tree = quick_tree.quick_tree(darr) + self.assertEqual( + set(tree.get_tip_names()), set.union(*(set(tup) for tup in data)) ) def test_uniformize_tree(self): diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_cluster/__init__.py python-cogent-2021.10.12a1+dfsg/tests/test_cluster/__init__.py --- python-cogent-2020.12.21a+dfsg/tests/test_cluster/__init__.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_cluster/__init__.py 2021-10-12 00:17:34.000000000 +0000 @@ -2,10 +2,10 @@ __all__ = ["test_UPGMA"] __author__ = "" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Catherine Lozuopone", "Peter Maxwell", "Rob Knight", "Justin Kuczynski"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_cluster/test_UPGMA.py python-cogent-2021.10.12a1+dfsg/tests/test_cluster/test_UPGMA.py --- python-cogent-2020.12.21a+dfsg/tests/test_cluster/test_UPGMA.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_cluster/test_UPGMA.py 2021-10-12 00:17:34.000000000 +0000 @@ -24,10 +24,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_core/__init__.py python-cogent-2021.10.12a1+dfsg/tests/test_core/__init__.py --- python-cogent-2020.12.21a+dfsg/tests/test_core/__init__.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_core/__init__.py 2021-10-12 00:17:34.000000000 +0000 @@ -17,7 +17,7 @@ ] __author__ = "" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Catherine Lozupone", "Peter Maxwell", @@ -29,7 +29,7 @@ "Justin Kuczynski", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_core/test_alignment.py python-cogent-2021.10.12a1+dfsg/tests/test_core/test_alignment.py --- python-cogent-2020.12.21a+dfsg/tests/test_core/test_alignment.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_core/test_alignment.py 2021-10-12 00:17:34.000000000 +0000 @@ -63,7 +63,7 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Jeremy Widmann", "Catherine Lozuopone", @@ -73,7 +73,7 @@ "Jan Kosinski", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -278,7 +278,7 @@ self.assertEqual(git([]), "empty") def test_init_aln(self): - """ SequenceCollection should init from existing alignments""" + """SequenceCollection should init from existing alignments""" exp = self.Class(["AAA", "AAA"]) x = self.Class(self.a) y = self.Class(self.b) @@ -1663,7 +1663,7 @@ ) def test_filter_drop_remainder(self): - """filter allows dropping """ + """filter allows dropping""" raw = {"a": "ACGACGACG", "b": "CCC---CCC", "c": "AAAA--AAA"} aln = self.Class(raw) func = _make_filter_func(aln) @@ -2115,16 +2115,6 @@ ] self.assertEqual(got, "\n".join(expect)) - def test_to_pretty_deprecation_warning(self): - """produce correct pretty print formatted text""" - seqs = {"seq1": "ACGAANGA", "seq2": "-CGAACGA", "seq3": "ATGAACGA"} - expect = ["seq1 ACGAANGA", "seq2 -....C..", "seq3 .T...C.."] - - aln = self.Class(data=seqs, moltype=DNA) - # should raise warning here - with self.assertWarns(DeprecationWarning): - aln.to_pretty(name_order=["seq1", "seq2", "seq3"], interleave_len=4) - def test_to_html(self): """produce correct html formatted text""" seqs = {"seq1": "ACG", "seq2": "-CT"} @@ -2169,21 +2159,16 @@ # order now changes self.assertTrue(got.find(ref_row) < got.find(other_row)) - def test_to_html_deprecation_warning(self): - """ should raise warning using wrap and not interleave_len""" - seqs = {"seq1": "ACG", "seq2": "-CT"} - - aln = self.Class(data=seqs, moltype=DNA) - # specify interleave_len in 2 cases, wrap specified and not specified - # both should raise warnings - with self.assertWarns(DeprecationWarning): - aln.to_html(ref_name="seq2", interleave_len=40) - def test_variable_positions(self): """correctly identify variable positions""" - new_seqs = {"seq1": "ACGTACGT", "seq2": "ACCGACGT", "seq3": "ACGTACGT"} + new_seqs = {"A": "-CG-C", "B": "ACAA?", "C": "GCGAC"} + aln = self.Class(data=new_seqs, moltype=DNA) + self.assertEqual(aln.variable_positions(include_gap_motif=True), [0, 2, 3, 4]) + self.assertEqual(aln.variable_positions(include_gap_motif=False), [0, 2]) + new_seqs = {"A": "GCGAC", "B": "GCGAC", "C": "GCGAC"} aln = self.Class(data=new_seqs, moltype=DNA) - self.assertEqual(aln.variable_positions(), [2, 3]) + self.assertEqual(aln.variable_positions(include_gap_motif=True), []) + self.assertEqual(aln.variable_positions(include_gap_motif=False), []) def test_to_type(self): """correctly interconvert between alignment types""" @@ -2982,7 +2967,7 @@ self.assertEqual(len(new_seq.data.annotations), 2) def test_deepcopy2(self): - """"Aligned.deepcopy correctly handles gapped sequences""" + """ "Aligned.deepcopy correctly handles gapped sequences""" seqs = self.Class( data={ "a": "CAGATTTGGCAGTT-", diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_core/test_alphabet.py python-cogent-2021.10.12a1+dfsg/tests/test_core/test_alphabet.py --- python-cogent-2020.12.21a+dfsg/tests/test_core/test_alphabet.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_core/test_alphabet.py 2021-10-12 00:17:34.000000000 +0000 @@ -21,7 +21,7 @@ uint16, uint32, ) -from cogent3.core.moltype import RNA +from cogent3.core.moltype import RNA, get_moltype DnaBases = CharAlphabet("TCAG") @@ -29,10 +29,10 @@ AminoAcids = CharAlphabet("ACDEFGHIKLMNPQRSTVWY") __author__ = "Rob Knight, Peter Maxwell and Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Rob Knight", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -221,7 +221,7 @@ """CharAlphabet init should make correct translation tables""" r = CharAlphabet("UCAG") i2c, c2i = r._indices_nums_to_chars, r._chars_to_indices - s = array([0, 0, 1, 0, 3, 2], "b").tostring() + s = array([0, 0, 1, 0, 3, 2], "b").tobytes() self.assertEqual(s.translate(i2c), b"UUCUGA") self.assertEqual("UUCUGA".translate(c2i), "\000\000\001\000\003\002") @@ -288,6 +288,20 @@ rt2 = r.Triples self.assertIs(rt, rt2) + def test_from_seq_to_array(self): + """convert a sequence into indices""" + dna = get_moltype("dna") + seq = dna.make_seq("ACGG") + got = dna.alphabet.from_seq_to_array(seq) + assert_equal(got, array([dna.alphabet.index(b) for b in seq])) + + def test_from_ordinals_to_seq(self): + """check indices convert to a sequence""" + indices = [2, 1, 3, 3] + dna = get_moltype("dna") + got = dna.alphabet.from_ordinals_to_seq(indices) + self.assertEqual(got, dna.make_seq("ACGG")) + class JointEnumerationTests(TestCase): """Tests of JointEnumerations.""" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_core/test_annotation.py python-cogent-2021.10.12a1+dfsg/tests/test_core/test_annotation.py --- python-cogent-2020.12.21a+dfsg/tests/test_core/test_annotation.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_core/test_annotation.py 2021-10-12 00:17:34.000000000 +0000 @@ -9,10 +9,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_core/test_core_standalone.py python-cogent-2021.10.12a1+dfsg/tests/test_core/test_core_standalone.py --- python-cogent-2020.12.21a+dfsg/tests/test_core/test_core_standalone.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_core/test_core_standalone.py 2021-10-12 00:17:34.000000000 +0000 @@ -29,10 +29,10 @@ __author__ = "Peter Maxwell, Gavin Huttley and Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -215,11 +215,7 @@ aln = load_aligned_seqs(filename, **kw) if test_write: suffix, cmpr = get_format_suffixes(filename) - if not cmpr: - cmpr = "" - else: - cmpr = f".{cmpr}" - + cmpr = "" if not cmpr else f".{cmpr}" fn = tempfile.mktemp(suffix="." + suffix + cmpr) aln.write(filename=fn) os.remove(fn) @@ -326,14 +322,14 @@ sub_align = self.alignment.take_seqs(subset) new = sub_align.names new.sort() - assert new == subset, "included subset didn't work %s, %s" % (new, subset) + assert new == subset, f"included subset didn't work {new}, {subset}" # testing exclusion of one to_exclude = ["NineBande"] sub_align = self.alignment.take_seqs(to_exclude, negate=True) new = sub_align.names new.sort() - assert new == subset, "excluded subset didn't work %s, %s" % (new, subset) + assert new == subset, f"excluded subset didn't work {new}, {subset}" # testing exclusion of two subset = ["DogFaced", "HowlerMon", "NineBande"] @@ -342,7 +338,7 @@ sub_align = self.alignment.take_seqs(to_exclude, negate=True) new = sub_align.names new.sort() - assert new == subset, "excluded subset didn't work %s, %s" % (new, subset) + assert new == subset, f"excluded subset didn't work {new}, {subset}" def test_slice_align(self): """test slicing of sequences""" @@ -380,9 +376,7 @@ alignment = make_aligned_seqs( data={"seq1": "ACGTACGT", "seq2": "ACGTACGT", "seq3": "ACGTACGT"} ) - result = [] - for bit in alignment.sliding_windows(5, 2): - result += [bit] + result = [bit for bit in alignment.sliding_windows(5, 2)] self.assertEqual( result[0].to_dict(), {"seq3": "ACGTA", "seq2": "ACGTA", "seq1": "ACGTA"} ) diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_core/test_features.py python-cogent-2021.10.12a1+dfsg/tests/test_core/test_features.py --- python-cogent-2020.12.21a+dfsg/tests/test_core/test_features.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_core/test_features.py 2021-10-12 00:17:34.000000000 +0000 @@ -7,17 +7,17 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" class FeaturesTest(TestCase): - """ Tests of features in core """ + """Tests of features in core""" def setUp(self): # A Sequence with a couple of exons on it. @@ -176,7 +176,7 @@ self.assertEqual(len(self.s.annotations), 2) def test_feature_reverse(self): - """ reverse complement of features""" + """reverse complement of features""" # When dealing with sequences that can be reverse complemented # (e.g. DnaSequence) features are **not** reversed. @@ -196,7 +196,7 @@ self.assertEqual(str(minus_cds.get_slice()), "GGGGCCCCCTTTTTTTTTT") def test_feature_from_alignment(self): - """ seq features obtained from the alignment""" + """seq features obtained from the alignment""" # Sequence features can be accessed via a containing Alignment: @@ -553,7 +553,7 @@ ) def test_constructor_equivalence(self): - """""" + """ """ # These different constructions should generate the same output. data = [["human", "CGAAACGTTT"], ["mouse", "CTAAACGTCG"]] diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_core/test_genetic_code.py python-cogent-2021.10.12a1+dfsg/tests/test_core/test_genetic_code.py --- python-cogent-2020.12.21a+dfsg/tests/test_core/test_genetic_code.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_core/test_genetic_code.py 2021-10-12 00:17:34.000000000 +0000 @@ -16,10 +16,10 @@ __author__ = "Greg Caporaso" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Greg Caporaso", "Rob Knight", "Peter Maxwell", "Thomas La"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Greg Caporaso" __email__ = "caporaso@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_core/test_info.py python-cogent-2021.10.12a1+dfsg/tests/test_core/test_info.py --- python-cogent-2020.12.21a+dfsg/tests/test_core/test_info.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_core/test_info.py 2021-10-12 00:17:34.000000000 +0000 @@ -9,10 +9,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_core/test_location.py python-cogent-2021.10.12a1+dfsg/tests/test_core/test_location.py --- python-cogent-2020.12.21a+dfsg/tests/test_core/test_location.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_core/test_location.py 2021-10-12 00:17:34.000000000 +0000 @@ -4,14 +4,21 @@ """ from unittest import TestCase, main -from cogent3.core.location import Map, Range, RangeFromString, Span +from cogent3 import DNA +from cogent3.core.location import ( + Map, + Range, + RangeFromString, + Span, + gap_coords_to_map, +) __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -528,6 +535,38 @@ coords = map.get_coordinates() self.assertEqual(coords, spans) + def test_get_gap_coords(self): + """returns gap start and lengths""" + m, seq = DNA.make_seq("-AC--GT-TTA--").parse_out_gaps() + got = m.get_gap_coordinates() + self.assertEqual(dict(got), {0: 1, 2: 2, 4: 1, 7: 2}) + + def test_gap_coords_to_map(self): + """construct a Map from coordinates of gap alone""" + m, seq = DNA.make_seq("-AC--GT-TTA--").parse_out_gaps() + gap_coords = {0: 1, 2: 2, 4: 1, 7: 2} + seqlen = 70 + got = gap_coords_to_map(gap_coords, seqlen) + self.assertEqual(len(got), seqlen + sum(gap_coords.values())) + + gap_coords = {5: 2, 17: 3, 10: 2} + seqlen = 20 + got = gap_coords_to_map(gap_coords, seqlen) + self.assertEqual(len(got), sum(gap_coords.values()) + seqlen) + + # roundtrip from Map.get_gap_coordinates() + self.assertEqual(dict(got.get_gap_coordinates()), gap_coords) + + # and no gaps + m, seq = DNA.make_seq("ACGTTTA").parse_out_gaps() + got = gap_coords_to_map({}, len(seq)) + self.assertEqual(len(got), len(m)) + self.assertEqual(got.get_coordinates(), m.get_coordinates()) + + # and gaps outside sequence + with self.assertRaises(ValueError): + got = gap_coords_to_map({20: 1}, len(seq)) + # run the following if invoked from command-line if __name__ == "__main__": diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_core/test_maps.py python-cogent-2021.10.12a1+dfsg/tests/test_core/test_maps.py --- python-cogent-2020.12.21a+dfsg/tests/test_core/test_maps.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_core/test_maps.py 2021-10-12 00:17:34.000000000 +0000 @@ -8,10 +8,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Rob Knight", "Matthew Wakefield"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_core/test_moltype.py python-cogent-2021.10.12a1+dfsg/tests/test_core/test_moltype.py --- python-cogent-2020.12.21a+dfsg/tests/test_core/test_moltype.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_core/test_moltype.py 2021-10-12 00:17:34.000000000 +0000 @@ -31,10 +31,10 @@ __author__ = "Gavin Huttley, Peter Maxwell, and Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight", "Gavin Huttley", "Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_core/test_profile.py python-cogent-2021.10.12a1+dfsg/tests/test_core/test_profile.py --- python-cogent-2020.12.21a+dfsg/tests/test_core/test_profile.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_core/test_profile.py 2021-10-12 00:17:34.000000000 +0000 @@ -11,10 +11,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Sandra Smit", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_core/test_seq_aln_integration.py python-cogent-2021.10.12a1+dfsg/tests/test_core/test_seq_aln_integration.py --- python-cogent-2020.12.21a+dfsg/tests/test_core/test_seq_aln_integration.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_core/test_seq_aln_integration.py 2021-10-12 00:17:34.000000000 +0000 @@ -11,10 +11,10 @@ __author__ = "Sandra Smit" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Sandra Smit", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Sandra Smit" __email__ = "sandra.smit@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_core/test_sequence.py python-cogent-2021.10.12a1+dfsg/tests/test_core/test_sequence.py --- python-cogent-2020.12.21a+dfsg/tests/test_core/test_sequence.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_core/test_sequence.py 2021-10-12 00:17:34.000000000 +0000 @@ -40,10 +40,10 @@ __author__ = "Rob Knight, Gavin Huttley and Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight", "Gavin Huttley", "Peter Maxwell", "Matthew Wakefield"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -975,12 +975,6 @@ self.assertTrue(seq_row in got) - def test_to_html_deprecation_warning(self): - """produce correct html formatted text""" - seq = DnaSequence("ACGGTGGGGGGGGG") - with self.assertWarns(DeprecationWarning): - seq.to_html(interleave_len=50) - def test_repr_html(self): """correctly uses set_repr and the environment variable settings""" token = 'class="label"' @@ -1108,6 +1102,14 @@ sc = self.SequenceClass self.assertEqual(str(sc("TC").regap(sc("A---A-"))), "T---C-") + def test_degap_name(self): + """degap preserves name attribute""" + # todo this should work for any seq class, but is not + seq = DNA.make_seq("ACG---T", "blah") + got = seq.degap() + self.assertEqual(str(got), "ACGT") + self.assertEqual(got.name, "blah") + class SequenceIntegrationTests(TestCase): """Should be able to convert regular to model sequences, and back""" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_core/test_tree.py python-cogent-2021.10.12a1+dfsg/tests/test_core/test_tree.py --- python-cogent-2020.12.21a+dfsg/tests/test_core/test_tree.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_core/test_tree.py 2021-10-12 00:17:34.000000000 +0000 @@ -3,14 +3,13 @@ """ import json import os -import sys -import unittest from copy import copy, deepcopy from tempfile import TemporaryDirectory from unittest import TestCase, main -from numpy import arange, array +from numpy import array +from numpy.testing import assert_allclose, assert_equal from cogent3 import load_tree, make_tree from cogent3.core.tree import PhyloNode, TreeError, TreeNode @@ -20,7 +19,7 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Rob Knight", "Catherine Lozupone", @@ -34,13 +33,11 @@ "Jose Carlos Clemente Litran", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" -from numpy.testing import assert_allclose, assert_equal - base_path = os.path.dirname(os.path.dirname(__file__)) data_path = os.path.join(base_path, "data") @@ -1070,8 +1067,19 @@ self.assertEqual(obs_1, exp_1) self.assertEqual(obs_2, exp_2) + def test_lowest_common_ancestor_invalid_tips(self): + """fail if tips not present""" + t = DndParser("((a,(b,c)d)e,f,(g,h)i)j;") + # no tips present in tree should raise exception + with self.assertRaises(ValueError): + t.lowest_common_ancestor(["m", "n"]) + + # not all tips present in tree should raise exception + with self.assertRaises(ValueError): + t.lowest_common_ancestor(["a", "n"]) + def test_last_common_ancestor(self): - """TreeNode LastCommonAncestor should provide last common ancestor""" + """TreeNode last_common_ancestor should provide last common ancestor""" nodes, tree = self.TreeNode, self.TreeRoot a = nodes["a"] b = nodes["b"] @@ -1475,20 +1483,16 @@ def test_tip_to_tip_distances_endpoints(self): """Test getting specifc tip distances with tip_to_tip_distances""" - nodes = [ + exp_nodes = [ self.t.get_node_matching_name("H"), self.t.get_node_matching_name("G"), self.t.get_node_matching_name("M"), ] names = ["H", "G", "M"] - exp = (array([[0, 2.0, 6.7], [2.0, 0, 6.7], [6.7, 6.7, 0.0]]), nodes) - obs = self.t.tip_to_tip_distances(endpoints=names) - assert_equal(obs[0], exp[0]) - assert_equal(obs[1], exp[1]) - - obs = self.t.tip_to_tip_distances(endpoints=nodes) - assert_equal(obs[0], exp[0]) - assert_equal(obs[1], exp[1]) + exp_dists = array([[0, 2.0, 6.7], [2.0, 0, 6.7], [6.7, 6.7, 0.0]]) + got_dists, got_nodes = self.t.tip_to_tip_distances(endpoints=names) + assert_equal(got_dists, exp_dists) + self.assertEqual(got_nodes, exp_nodes) def test_prune(self): """prune should reconstruct correct topology and Lengths of tree.""" @@ -1582,7 +1586,7 @@ self.assertEqual(result.get_distances(), tree.get_distances()) def test_root_at_midpoint3(self): - """ midpoint between nodes should behave correctly""" + """midpoint between nodes should behave correctly""" tree = DndParser("(a:1,((c:1,d:2.5)n3:1,b:1)n2:1)rt;") tmid = tree.root_at_midpoint() self.assertEqual(tmid.get_distances(), tree.get_distances()) diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_data/__init__.py python-cogent-2021.10.12a1+dfsg/tests/test_data/__init__.py --- python-cogent-2020.12.21a+dfsg/tests/test_data/__init__.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_data/__init__.py 2021-10-12 00:17:34.000000000 +0000 @@ -2,10 +2,10 @@ __all__ = ["test_molecular_weight"] __author__ = "" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_data/test_molecular_weight.py python-cogent-2021.10.12a1+dfsg/tests/test_data/test_molecular_weight.py --- python-cogent-2020.12.21a+dfsg/tests/test_data/test_molecular_weight.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_data/test_molecular_weight.py 2021-10-12 00:17:34.000000000 +0000 @@ -7,10 +7,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_draw/__init__.py python-cogent-2021.10.12a1+dfsg/tests/test_draw/__init__.py --- python-cogent-2020.12.21a+dfsg/tests/test_draw/__init__.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_draw/__init__.py 2021-10-12 00:17:34.000000000 +0000 @@ -7,10 +7,10 @@ ] __author__ = "Gavin Huttley and Rahul Ghangas" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_draw/test_dendrogram.py python-cogent-2021.10.12a1+dfsg/tests/test_draw/test_dendrogram.py --- python-cogent-2020.12.21a+dfsg/tests/test_draw/test_dendrogram.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_draw/test_dendrogram.py 2021-10-12 00:17:34.000000000 +0000 @@ -16,7 +16,7 @@ __copyright__ = "Copyright 2007-2012, The Cogent Project" __credits__ = ["Gavin Huttley", "Rahul Ghangas"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_draw/test_dotplot.py python-cogent-2021.10.12a1+dfsg/tests/test_draw/test_dotplot.py --- python-cogent-2020.12.21a+dfsg/tests/test_draw/test_dotplot.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_draw/test_dotplot.py 2021-10-12 00:17:34.000000000 +0000 @@ -16,7 +16,7 @@ __copyright__ = "Copyright 2007-2012, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" @@ -105,7 +105,7 @@ aln = ArrayAlignment( {"seq1": "ACGG", "seq2": "CGCA", "seq3": "CCG-"}, moltype="dna" ) - aln_plot = aln.dotplot("seq1", "seq2") + aln_plot = aln.dotplot("seq1") self.assertNotEqual(aln_plot._aligned_coords, None) def test_dotplot_seqcoll(self): @@ -118,6 +118,24 @@ self.assertEqual(len(dp.seq1), 4) self.assertEqual(len(dp.seq2), 3) + def test_dotplot_single(self): + """dotplot with single sequence should not fail""" + seqs = make_unaligned_seqs({"seq1": "ACGG"}, moltype="dna") + dp = seqs.dotplot() + self.assertEqual(dp.seq1, dp.seq2) + + def test_dotplot_missing(self): + """fail if a sequence name not present""" + seqs = make_unaligned_seqs( + {"seq1": "ACGG", "seq2": "CGCA", "seq3": "CCG-"}, moltype="dna" + ) + with self.assertRaises(ValueError): + _ = seqs.dotplot("seq1", "seq5") + with self.assertRaises(ValueError): + _ = seqs.dotplot("seq5", "seq1") + with self.assertRaises(ValueError): + _ = seqs.dotplot("seq5", "seq6") + def test_dotplot_title(self): """setting empty string title works""" seqs = make_unaligned_seqs( diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_draw/test_draw_integration.py python-cogent-2021.10.12a1+dfsg/tests/test_draw/test_draw_integration.py --- python-cogent-2020.12.21a+dfsg/tests/test_draw/test_draw_integration.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_draw/test_draw_integration.py 2021-10-12 00:17:34.000000000 +0000 @@ -13,7 +13,7 @@ __copyright__ = "Copyright 2007-2012, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_draw/test_logo.py python-cogent-2021.10.12a1+dfsg/tests/test_draw/test_logo.py --- python-cogent-2020.12.21a+dfsg/tests/test_draw/test_logo.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_draw/test_logo.py 2021-10-12 00:17:34.000000000 +0000 @@ -9,7 +9,7 @@ __copyright__ = "Copyright 2007-2012, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_draw/test_shapes.py python-cogent-2021.10.12a1+dfsg/tests/test_draw/test_shapes.py --- python-cogent-2020.12.21a+dfsg/tests/test_draw/test_shapes.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_draw/test_shapes.py 2021-10-12 00:17:34.000000000 +0000 @@ -9,7 +9,7 @@ __copyright__ = "Copyright 2007-2012, The Cogent Project" __credits__ = ["Gavin Huttley", "Rahul Ghangas"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_evolve/__init__.py python-cogent-2021.10.12a1+dfsg/tests/test_evolve/__init__.py --- python-cogent-2020.12.21a+dfsg/tests/test_evolve/__init__.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_evolve/__init__.py 2021-10-12 00:17:34.000000000 +0000 @@ -14,10 +14,10 @@ ] __author__ = "" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight", "Peter Maxwell", "Greg Caporaso"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_best_likelihood.py python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_best_likelihood.py --- python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_best_likelihood.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_best_likelihood.py 2021-10-12 00:17:34.000000000 +0000 @@ -17,10 +17,10 @@ __author__ = "Helen Lindsay" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley", "Helen Lindsay"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Helen Lindsay" __email__ = "helen.lindsay@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_bootstrap.py python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_bootstrap.py --- python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_bootstrap.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_bootstrap.py 2021-10-12 00:17:34.000000000 +0000 @@ -8,7 +8,7 @@ __author__ = "Peter Maxwell and Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Peter Maxwell", "Gavin Huttley", @@ -17,7 +17,7 @@ "Andrew Butterfield", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_coevolution.py python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_coevolution.py --- python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_coevolution.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_coevolution.py 2021-10-12 00:17:34.000000000 +0000 @@ -94,10 +94,10 @@ __author__ = "Greg Caporaso" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Greg Caporaso"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Greg Caporaso" __email__ = "gregcaporaso@gmail.com" __status__ = "Beta" @@ -106,10 +106,10 @@ class CoevolutionTests(TestCase): - """ Tests of coevolution.py """ + """Tests of coevolution.py""" def setUp(self): - """Set up variables for us in tests """ + """Set up variables for us in tests""" self.run_slow_tests = int(environ.get("TEST_SLOW_APPC", 0)) # Data used in SCA tests self.dna_aln = ArrayAlignment( @@ -212,7 +212,7 @@ ) def test_alignment_analyses_moltype_protein(self): - """ alignment methods work with moltype = PROTEIN """ + """alignment methods work with moltype = PROTEIN""" r = mi_alignment(self.protein_aln4) self.assertEqual(r.shape, (4, 4)) @@ -225,7 +225,7 @@ self.assertEqual(r.shape, (4, 4)) def test_alignment_analyses_moltype_rna(self): - """ alignment methods work with moltype = RNA """ + """alignment methods work with moltype = RNA""" r = mi_alignment(self.rna_aln4) self.assertEqual(r.shape, (4, 4)) @@ -243,7 +243,7 @@ self.assertEqual(r.shape, (4, 4)) def test_alignment_analyses_moltype_dna(self): - """ alignment methods work with moltype = DNA """ + """alignment methods work with moltype = DNA""" r = mi_alignment(self.dna_aln4) self.assertEqual(r.shape, (4, 4)) @@ -261,7 +261,7 @@ self.assertEqual(r.shape, (4, 4)) def test_join_positions(self): - """ join_positions functions as expected """ + """join_positions functions as expected""" self.assertEqual( join_positions(list("ABCD"), list("WXYZ")), ["AW", "BX", "CY", "DZ"] ) @@ -269,19 +269,19 @@ self.assertEqual(join_positions([], []), []) def test_mi(self): - """ mi calculations function as expected with valid data""" + """mi calculations function as expected with valid data""" assert_allclose(mi(1.0, 1.0, 1.0), 1.0) assert_allclose(mi(1.0, 1.0, 2.0), 0.0) assert_allclose(mi(1.0, 1.0, 1.5), 0.5) def test_normalized_mi(self): - """ normalized mi calculations function as expected with valid data""" + """normalized mi calculations function as expected with valid data""" assert_allclose(normalized_mi(1.0, 1.0, 1.0), 1.0) assert_allclose(normalized_mi(1.0, 1.0, 2.0), 0.0) assert_allclose(normalized_mi(1.0, 1.0, 1.5), 0.3333, 3) def test_mi_pair(self): - """ mi_pair calculates mi from a pair of columns """ + """mi_pair calculates mi from a pair of columns""" aln = ArrayAlignment(data={"1": "AB", "2": "AB"}, moltype=PROTEIN) assert_allclose(mi_pair(aln, pos1=0, pos2=1), 0.0) aln = ArrayAlignment(data={"1": "AB", "2": "BA"}, moltype=PROTEIN) @@ -302,7 +302,7 @@ self.assertRaises(ValueError, coevolve_alignments, sca_alignment, aln, aln) def test_coevolve_pair(self): - """coevolve_pair: returns same as pair methods called directly """ + """coevolve_pair: returns same as pair methods called directly""" aln = ArrayAlignment(data={"1": "AC", "2": "AC"}, moltype=PROTEIN) t = make_tree(treestring="(1:0.5,2:0.5);") cutoff = 0.50 @@ -448,7 +448,7 @@ ) def test_coevolve_alignments_validation_min_num_seqs(self): - """coevolve_alignments_validation: ValueError on fewer than min_num_seqs """ + """coevolve_alignments_validation: ValueError on fewer than min_num_seqs""" method = mi_alignment # too few sequences -> ValueError aln1 = ArrayAlignment(data={"1": "AC", "2": "AD"}, moltype=PROTEIN) @@ -534,7 +534,7 @@ ) def test_coevolve_alignments_watches_min_num_seqs(self): - """ coevolve_alignments: error on too few sequences """ + """coevolve_alignments: error on too few sequences""" aln1 = ArrayAlignment(data={"1": "AC", "2": "AD"}, moltype=PROTEIN) aln2 = ArrayAlignment(data={"1": "EFW", "2": "EGY"}, moltype=PROTEIN) @@ -550,7 +550,7 @@ ) def test_coevolve_alignments_watches_max_num_seqs(self): - """ coevolve_alignments: filtering or error on too many sequences """ + """coevolve_alignments: filtering or error on too many sequences""" aln1 = ArrayAlignment(data={"1": "AC", "2": "AD", "3": "YP"}, moltype=PROTEIN) aln2 = ArrayAlignment( data={"1": "ACP", "2": "EAD", "3": "PYP"}, moltype=PROTEIN @@ -588,7 +588,7 @@ remove(tmp_filepath) def test_coevolve_alignments_different_MolType(self): - """ coevolve_alignments: different MolTypes supported """ + """coevolve_alignments: different MolTypes supported""" aln1 = ArrayAlignment(data={"1": "AC", "2": "AU"}, moltype=RNA) aln2 = ArrayAlignment(data={"1": "EFW", "2": "EGY"}, moltype=PROTEIN) combined_aln = ArrayAlignment(data={"1": "ACEFW", "2": "AUEGY"}) @@ -608,7 +608,7 @@ assert_allclose(coevolve_alignments(nmi_alignment, aln1, aln2), expected) def test_mi_pair_cols_default_exclude_handling(self): - """ mi_pair returns null_value on excluded by default """ + """mi_pair returns null_value on excluded by default""" aln = ArrayAlignment(data={"1": "AB", "2": "-B"}, moltype=PROTEIN) assert_allclose(mi_pair(aln, pos1=0, pos2=1), DEFAULT_NULL_VALUE) aln = ArrayAlignment(data={"1": "-B", "2": "-B"}, moltype=PROTEIN) @@ -619,7 +619,7 @@ assert_allclose(mi_pair(aln, pos1=0, pos2=1, excludes="P"), DEFAULT_NULL_VALUE) def test_mi_pair_cols_non_default_exclude_handling(self): - """ mi_pair uses non-default exclude_handler when provided""" + """mi_pair uses non-default exclude_handler when provided""" aln = ArrayAlignment(data={"1": "A-", "2": "A-"}, moltype=PROTEIN) assert_allclose(mi_pair(aln, pos1=0, pos2=1), DEFAULT_NULL_VALUE) assert_allclose( @@ -639,7 +639,7 @@ assert_allclose(mi_pair(aln, pos1=0, pos2=1, h1=1.0, h2=1.0), 2.0) def test_mi_pair_alt_calculator(self): - """ mi_pair uses alternate mi_calculator when provided """ + """mi_pair uses alternate mi_calculator when provided""" aln = ArrayAlignment(data={"1": "AB", "2": "AB"}, moltype=PROTEIN) assert_allclose(mi_pair(aln, pos1=0, pos2=1), 0.0) assert_allclose( @@ -648,7 +648,7 @@ ) def test_mi_position_valid_input(self): - """ mi_position functions with varied valid input """ + """mi_position functions with varied valid input""" aln = ArrayAlignment(data={"1": "ACG", "2": "GAC"}, moltype=PROTEIN) assert_allclose(mi_position(aln, 0), array([1.0, 1.0, 1.0])) aln = ArrayAlignment(data={"1": "ACG", "2": "ACG"}, moltype=PROTEIN) @@ -657,7 +657,7 @@ assert_allclose(mi_position(aln, 2), array([0.0, 0.0, 0.0])) def test_mi_position_from_alignment_nmi(self): - """mi_position functions w/ alternate mi_calculator """ + """mi_position functions w/ alternate mi_calculator""" aln = ArrayAlignment(data={"1": "ACG", "2": "ACG"}, moltype=PROTEIN) assert_allclose(mi_position(aln, 0), array([0.0, 0.0, 0.0])) aln = ArrayAlignment(data={"1": "ACG", "2": "ACG"}, moltype=PROTEIN) @@ -667,7 +667,7 @@ ) def test_mi_position_from_alignment_default_exclude_handling(self): - """ mi_position handles excludes by setting to null_value""" + """mi_position handles excludes by setting to null_value""" aln = ArrayAlignment(data={"1": "ACG", "2": "G-C"}, moltype=PROTEIN) assert_allclose(mi_position(aln, 0), array([1.0, DEFAULT_NULL_VALUE, 1.0])) aln = ArrayAlignment(data={"1": "ACG", "2": "GPC"}, moltype=PROTEIN) @@ -676,14 +676,14 @@ ) def test_mi_position_from_alignment_non_default_exclude_handling(self): - """ mi_position handles excludes w/ non-default method""" + """mi_position handles excludes w/ non-default method""" aln = ArrayAlignment(data={"1": "ACG", "2": "G-C"}, moltype=PROTEIN) assert_allclose( mi_position(aln, 0, exclude_handler=ignore_excludes), array([1.0, 1.0, 1.0]) ) def test_mi_alignment_excludes(self): - """ mi_alignment handles excludes properly """ + """mi_alignment handles excludes properly""" expected = array( [ [0.0, DEFAULT_NULL_VALUE, 0.0], @@ -711,7 +711,7 @@ assert_allclose(mi_alignment(aln), expected) def test_mi_alignment_high(self): - """ mi_alignment detected perfectly correlated columns """ + """mi_alignment detected perfectly correlated columns""" expected = [[1.0, 1.0], [1.0, 1.0]] aln = ArrayAlignment(data={"1": "AG", "2": "GA"}, moltype=PROTEIN) assert_allclose(mi_alignment(aln), expected) @@ -723,7 +723,7 @@ assert_allclose(mi_alignment(aln), expected) def test_resampled_mi_alignment(self): - """ resampled_mi_alignment returns without error """ + """resampled_mi_alignment returns without error""" aln = ArrayAlignment( data={"1": "ACDEF", "2": "ACFEF", "3": "ACGEF"}, moltype=PROTEIN ) @@ -734,7 +734,7 @@ resampled_mi_alignment(aln) def test_coevolve_alignment(self): - """ coevolve_alignment functions as expected with varied input """ + """coevolve_alignment functions as expected with varied input""" aln1 = ArrayAlignment( data={"1": "ACDEF", "2": "ACFEF", "3": "ACGEF"}, moltype=PROTEIN ) @@ -749,7 +749,7 @@ ) def test_build_coevolution_matrix_filepath(self): - """ build_coevolution_matrix_filepath functions w/ varied input """ + """build_coevolution_matrix_filepath functions w/ varied input""" self.assertEqual(build_coevolution_matrix_filepath("./blah.fasta"), "./blah") self.assertEqual(build_coevolution_matrix_filepath("blah.fasta"), "./blah") self.assertEqual(build_coevolution_matrix_filepath("blah"), "./blah") @@ -858,7 +858,7 @@ remove(filepath) def test_parse_coevolution_matrix_filepath(self): - """Parsing matrix filepaths works as expected. """ + """Parsing matrix filepaths works as expected.""" expected = ("myosin_995", "a1_4", "nmi") self.assertEqual( parse_coevolution_matrix_filepath("pkls/myosin_995.a1_4.nmi.pkl"), expected @@ -871,7 +871,7 @@ self.assertEqual(parse_coevolution_matrix_filepath("p53.orig.mi.csv"), expected) def test_parse_coevolution_matrix_filepath_error(self): - """Parsing matrix file paths handles invalid filepaths """ + """Parsing matrix file paths handles invalid filepaths""" self.assertRaises( ValueError, parse_coevolution_matrix_filepath, "pkls/myosin_995.nmi.pkl" ) @@ -884,7 +884,7 @@ self.assertRaises(ValueError, parse_coevolution_matrix_filepath, "") def test_identify_aln_positions_above_threshold(self): - """Extracting scores above threshold works as expected """ + """Extracting scores above threshold works as expected""" m = array( [ [ @@ -920,7 +920,7 @@ ) def test_count_ge_threshold(self): - """count_ge_threshold works as expected """ + """count_ge_threshold works as expected""" m = array([[DEFAULT_NULL_VALUE] * 3] * 3) self.assertEqual(count_ge_threshold(m, 1.0), (0, 0)) self.assertEqual( @@ -945,7 +945,7 @@ self.assertEqual(count_ge_threshold(m, 9), (0, 6)) def test_count_le_threshold(self): - """count_le_threshold works as expected """ + """count_le_threshold works as expected""" m = array([[DEFAULT_NULL_VALUE] * 3] * 3) self.assertEqual(count_le_threshold(m, 1.0), (0, 0)) self.assertEqual( @@ -1599,7 +1599,7 @@ assert_allclose(m, expected) def test_filter_threshold_based_multiple_interdependency_intermolecular(self): - "multiple interdependency filter functions with intermolecular data " + "multiple interdependency filter functions with intermolecular data" ## cmp_function = ge # lower boundary null = DEFAULT_NULL_VALUE @@ -1683,7 +1683,7 @@ assert_allclose(actual, expected) def test_filter_threshold_based_multiple_interdependency_intramolecular(self): - "multiple interdependency filter functions with intramolecular data " + "multiple interdependency filter functions with intramolecular data" null = DEFAULT_NULL_VALUE ## cmp_function = ge # lower bound, everything filtered @@ -1953,14 +1953,14 @@ validate_alphabet([1, 42, 8], {1: 0.5, 42: 0.25, 8: 0.25}) def test_validate_position_invalid(self): - """validate_position: raises error on invalid position """ + """validate_position: raises error on invalid position""" self.assertRaises(ValueError, validate_position, self.dna_aln, 4) self.assertRaises(ValueError, validate_position, self.dna_aln, 42) self.assertRaises(ValueError, validate_position, self.dna_aln, -1) self.assertRaises(ValueError, validate_position, self.dna_aln, -199) def test_validate_position_valid(self): - """validate_position: does nothing on valid position """ + """validate_position: does nothing on valid position""" validate_position(self.dna_aln, 0) validate_position(self.dna_aln, 1) validate_position(self.dna_aln, 2) @@ -2021,7 +2021,7 @@ assert_allclose(get_dg(p, a), expected) def test_get_dgg(self): - """get_dgg: returns delta_delta_g value given two delta_g vectors """ + """get_dgg: returns delta_delta_g value given two delta_g vectors""" v1 = array([0.05, 0.5, 0.1]) v2 = array([0.03, 0.05, 0.1]) expected = sqrt(sum((v1 - v2) * (v1 - v2))) / 100 * e @@ -2061,7 +2061,7 @@ self.assertRaises(ValueError, get_positional_probabilities, freqs, probs, 50) def test_sca_input_validation(self): - """sca_input_validation: handles sca-specific validation steps """ + """sca_input_validation: handles sca-specific validation steps""" # moltype != PROTEIN makes background freqs required self.assertRaises(ValueError, sca_input_validation, self.dna_aln, cutoff=0.4) self.assertRaises(ValueError, sca_input_validation, self.rna_aln, cutoff=0.4) @@ -2113,7 +2113,7 @@ # it's tested on it's own. def test_sca_pair_no_error(self): - """sca_pair: returns w/o error """ + """sca_pair: returns w/o error""" r = sca_pair( self.dna_aln, 1, @@ -2198,7 +2198,7 @@ assert_allclose(actual[1], expected[1]) def test_sca_pair_error(self): - """sca_pair:returns w/ error when appropriate """ + """sca_pair:returns w/ error when appropriate""" a = "ACGT" # pos1 out of range self.assertRaises( @@ -2274,7 +2274,7 @@ ) def test_sca_position_no_error(self): - """sca_position: returns w/o error """ + """sca_position: returns w/o error""" r = sca_position( self.dna_aln, 1, 0.50, alphabet="ACGT", background_freqs=self.dna_base_freqs ) @@ -2312,7 +2312,7 @@ assert_allclose(r[1], 3.387, 0.01) def test_sca_position_error(self): - """sca_position: returns w/ error when appropriate """ + """sca_position: returns w/ error when appropriate""" a = "ACGT" # position out of range self.assertRaises( @@ -2359,7 +2359,7 @@ ) def test_sca_position_returns_same_as_sca_pair(self): - """sca_position: returns same as sca_pair called on each pos """ + """sca_position: returns same as sca_pair called on each pos""" expected = [] for i in range(len(self.dna_aln)): expected.append( @@ -2404,7 +2404,7 @@ assert_allclose(actual, expected) def test_sca_alignment_no_error(self): - """sca_alignment: returns w/o error """ + """sca_alignment: returns w/o error""" r = sca_alignment( self.dna_aln, 0.50, alphabet="ACGT", background_freqs=self.dna_base_freqs ) @@ -2423,7 +2423,7 @@ assert_allclose(r[0][0], 2.32222608171) def test_sca_alignment_error(self): - """sca_alignment: returns w/ error when appropriate """ + """sca_alignment: returns w/ error when appropriate""" a = "ACGT" # incompatible alphabet and background freqs self.assertRaises( @@ -2517,7 +2517,7 @@ self.assertRaises(AssertionError, ltm_to_symmetric, arange(10).reshape(2, 5)) def test_merge_alignments(self): - """ merging alignments of same moltype functions as expected""" + """merging alignments of same moltype functions as expected""" # PROTEIN aln1 = ArrayAlignment(data={"1": "AC", "2": "AD"}, moltype=PROTEIN) aln2 = ArrayAlignment(data={"1": "EF", "2": "EG"}, moltype=PROTEIN) @@ -2541,7 +2541,7 @@ self.assertEqual(actual.moltype, DNA) def test_merge_alignments_ignores_id_following_plus(self): - """ merge_alignments ignores all seq id characters after '+' """ + """merge_alignments ignores all seq id characters after '+'""" aln1 = ArrayAlignment(data={"1+a": "AC", "2+b": "AD"}, moltype=PROTEIN) aln2 = ArrayAlignment(data={"1 + c": "EFW", "2 + d": "EGY"}, moltype=PROTEIN) combined_aln = ArrayAlignment( @@ -2557,7 +2557,7 @@ self.assertEqual(merge_alignments(aln1, aln2), combined_aln) def test_merge_alignments_different_moltype(self): - """ merging alignments of different moltype functions as expected""" + """merging alignments of different moltype functions as expected""" aln1 = ArrayAlignment(data={"1": "AC", "2": "AU"}, moltype=RNA) aln2 = ArrayAlignment(data={"1": "EF", "2": "EG"}, moltype=PROTEIN) combined_aln = ArrayAlignment(data={"1": "ACEF", "2": "AUEG"}) @@ -2609,7 +2609,7 @@ class AncestorCoevolve(TestCase): - """ Tests of the ancestral state method for detecting coevolution """ + """Tests of the ancestral state method for detecting coevolution""" def setUp(self): """ """ @@ -2798,7 +2798,7 @@ ) def test_validate_tree_valid(self): - """validate_tree: does nothing on compatible tree and aln """ + """validate_tree: does nothing on compatible tree and aln""" t = make_tree(treestring="((A:0.5,B:0.5):0.5,(C:0.5,D:0.5):0.5);") aln = ArrayAlignment( data={"A": "AC", "B": "CA", "C": "CC", "D": "DD"}, moltype=PROTEIN @@ -2809,7 +2809,7 @@ validate_tree(aln, t) def test_validate_tree_invalid(self): - """validate_tree: raises ValueError on incompatible tree and aln """ + """validate_tree: raises ValueError on incompatible tree and aln""" # different scale tree and aln t = make_tree(treestring="((A:0.5,B:0.5):0.5,C:0.5);") aln = ArrayAlignment( @@ -2827,7 +2827,7 @@ self.assertRaises(ValueError, validate_tree, aln, t) def test_get_ancestral_seqs(self): - """get_ancestral_seqs: returns valid collection of ancestral seqs """ + """get_ancestral_seqs: returns valid collection of ancestral seqs""" t = make_tree(treestring="((A:0.5,B:0.5):0.5,C:0.5);") aln = ArrayAlignment(data={"A": "AA", "B": "AA", "C": "AC"}, moltype=PROTEIN) expected = ArrayAlignment(data={"root": "AA", "edge.0": "AA"}, moltype=PROTEIN) @@ -2852,7 +2852,7 @@ self.assertEqual(get_ancestral_seqs(aln, t, optimise=False), expected) def test_get_ancestral_seqs_handles_gaps(self): - """get_ancestral_seqs: handles gaps """ + """get_ancestral_seqs: handles gaps""" # gaps handled OK t = make_tree(treestring="(A:0.5,B:0.5,C:0.5);") aln = ArrayAlignment(data={"A": "A-", "B": "AA", "C": "AA"}, moltype=PROTEIN) @@ -2860,7 +2860,7 @@ self.assertEqual(get_ancestral_seqs(aln, t, optimise=False), expected) def test_get_ancestral_seqs_handles_ambiguous_residues(self): - """get_ancestral_seqs: handles ambiguous residues """ + """get_ancestral_seqs: handles ambiguous residues""" # Non-canonical residues handled OK t = make_tree(treestring="(A:0.5,B:0.5,C:0.5);") aln = ArrayAlignment(data={"A": "AX", "B": "Z-", "C": "BC"}, moltype=PROTEIN) @@ -2895,12 +2895,12 @@ self.assertEqual(ancestral_state_pair(aln, t, 1, 0), 0) def test_ancestral_state_alignment_no_error_on_gap(self): - """ancestral_state_alignment: return w/o error with gapped seqs """ + """ancestral_state_alignment: return w/o error with gapped seqs""" ancestral_state_alignment(self.aln1_w_gaps, self.t1, self.ancestral_states1) ancestral_state_alignment(self.aln1_1, self.t1, self.ancestral_states1_w_gaps) def test_ancestral_state_methods_handle_bad_ancestor_aln(self): - """ancestral state methods raise error on bad ancestor alignment """ + """ancestral state methods raise error on bad ancestor alignment""" # bad length and seq names self.assertRaises( ValueError, @@ -3030,7 +3030,7 @@ ) def test_ancestral_state_alignment_bifurcating_tree(self): - """ancestral_state_alignment: handles bifurcating tree correctly """ + """ancestral_state_alignment: handles bifurcating tree correctly""" assert_allclose( ancestral_state_alignment(self.aln1_5, self.t1, self.ancestral_states1), [[5, 5, 5], [5, 11.6, 11.6], [5, 11.6, 11.6]], @@ -3235,7 +3235,7 @@ assert_allclose(actual, expected) def test_ancestral_state_pair_aln_difference(self): - """acestral_state_pair: different aln -> different result """ + """acestral_state_pair: different aln -> different result""" assert_allclose( ancestral_state_pair(self.aln1_1, self.t1, 0, 0, self.ancestral_states1), 0 ) @@ -3259,7 +3259,7 @@ ) def test_ancestral_state_pair_symmetry(self): - """ancestral_state_pair: value[i,j] == value[j,i] """ + """ancestral_state_pair: value[i,j] == value[j,i]""" assert_allclose( ancestral_state_pair(self.aln1_5, self.t1, 0, 1, self.ancestral_states1), ancestral_state_pair(self.aln1_5, self.t1, 1, 0, self.ancestral_states1), @@ -3274,7 +3274,7 @@ ) def est_ancestral_state_methods_handle_alt_null_value(self): - """ancetral state methods handle non-default null value """ + """ancetral state methods handle non-default null value""" # need to rewrite a test of this -- right now there's no way to get # null values into the ancestral states result, but that will change # when I fix the exclude handling diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_distance.py python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_distance.py --- python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_distance.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_distance.py 2021-10-12 00:17:34.000000000 +0000 @@ -49,10 +49,10 @@ __author__ = "Gavin Huttley, Yicheng Zhu and Ben Kaehler" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley", "Yicheng Zhu", "Ben Kaehler"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_likelihood_function.py python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_likelihood_function.py --- python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_likelihood_function.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_likelihood_function.py 2021-10-12 00:17:34.000000000 +0000 @@ -40,6 +40,7 @@ get_model, ssGN, ) +from cogent3.evolve.ns_substitution_model import GeneralStationary from cogent3.maths.matrix_exponentiation import PadeExponentiator as expm from cogent3.maths.stats.information_criteria import aic, bic @@ -51,7 +52,7 @@ MotifChange = predicate.MotifChange __author__ = "Peter Maxwell and Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Peter Maxwell", "Gavin Huttley", @@ -61,7 +62,7 @@ "Ananias Iliadis", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -99,7 +100,7 @@ def numdiffs_position(motif1, motif2): assert len(motif1) == len( motif2 - ), "motif1[%s] & motif2[%s] have inconsistent length" % (motif1, motif2) + ), f"motif1[{motif1}] & motif2[{motif2}] have inconsistent length" ndiffs, position = 0, -1 for i in range(len(motif1)): @@ -179,7 +180,7 @@ aln["root"] = one aln = make_aligned_seqs(data=aln) submod = get_model("TN93") - tree = make_tree("%s" % str(tuple(aln.names))) + tree = make_tree(f"{str(tuple(aln.names))}") lf = submod.make_likelihood_function(tree) try: lf.set_alignment(aln) @@ -188,7 +189,7 @@ collection = aln.degap().named_seqs collection.pop("Human") - tree = make_tree("%s" % str(tuple(collection.keys()))) + tree = make_tree(f"{str(tuple(collection.keys()))}") lf = submod.make_likelihood_function(tree, aligned=False) try: lf.set_sequences(collection) @@ -499,6 +500,20 @@ lf.set_param_rule("beta", bin="high", value=10.0) simulated_alignment = lf.simulate_alignment(100) + def test_simulate_alignment1(self): + "Simulate alignment when no alignment set" + al = make_aligned_seqs(data={"a": "ggaatt", "c": "cctaat"}) + t = make_tree("(a,c);") + sm = get_model("F81") + lf = sm.make_likelihood_function(t) + # no provided alignment raises an exception + with self.assertRaises(ValueError): + lf.simulate_alignment() + + # unless you provide length + sim_aln = lf.simulate_alignment(sequence_length=10) + self.assertEqual(len(sim_aln), 10) + def test_simulate_alignment2(self): "Simulate alignment with dinucleotide model" al = make_aligned_seqs(data={"a": "ggaatt", "c": "cctaat"}) @@ -990,6 +1005,24 @@ edge_sets = [dict(edges=("Human", "Mouse"))] null.set_time_heterogeneity(edge_sets=edge_sets, is_independent=False) + def test_init_from_nested_genstat(self): + """initialising a general stationary model from a nested time-reversible model works""" + tree = make_tree(tip_names=["Human", "Mouse", "Opossum"]) + gtr = get_model("GTR") + gs = GeneralStationary(gtr.alphabet) + gtr_lf = gtr.make_likelihood_function(tree) + gtr_lf.set_alignment(_aln) + mprobs = dict(A=0.1, T=0.2, C=0.3, G=0.4) + gtr_lf.set_motif_probs(mprobs) + rate_params = {"A/C": 0.75, "A/G": 3, "A/T": 1.5, "C/G": 0.2, "C/T": 6} + for par_name, val in rate_params.items(): + gtr_lf.set_param_rule(par_name, init=val) + + gs_lf = gs.make_likelihood_function(tree) + gs_lf.set_alignment(_aln) + gs_lf.initialise_from_nested(gtr_lf) + assert_allclose(gs_lf.lnL, gtr_lf.lnL) + def test_set_time_heterogeneity(self): """correctly apply time heterogeneity of rate terms""" lf = self.submodel.make_likelihood_function(self.tree) @@ -1102,6 +1135,33 @@ glf.initialise_from_nested(slf) assert_allclose(glf.get_log_likelihood(), slf.get_log_likelihood()) + def test_initialise_from_nested_diff_stat(self): + """non-reversible stationary initialised from nested time-reversible""" + mprobs = {b: p for b, p in zip(DNA, [0.1, 0.2, 0.3, 0.4])} + rate_params = {"A/C": 2.0, "A/G": 3.0, "A/T": 4.0, "C/G": 5.0, "C/T": 6.0} + + simple = GTR() + tree = make_tree(tip_names=["Human", "Mouse", "Opossum"]) + slf = simple.make_likelihood_function(tree, digits=2) + slf.set_alignment(_aln) + slf.set_name("GTR") + slf.set_motif_probs(mprobs) + for param, val in rate_params.items(): + slf.set_param_rule(param, init=val) + lengths = {e: v for e, v in zip(tree.get_tip_names(), (0.2, 0.4, 0.1))} + for e, val in lengths.items(): + slf.set_param_rule("length", edge=e, init=val) + + # set mprobs and then set the rate terms + from cogent3.evolve.ns_substitution_model import GeneralStationary + + rich = GeneralStationary(DNA.alphabet) + glf = rich.make_likelihood_function(tree, digits=2) + glf.set_alignment(_aln) + glf.set_name("GSN") + glf.initialise_from_nested(slf) + assert_allclose(glf.get_log_likelihood(), slf.get_log_likelihood()) + def test_initialise_from_nested_same_type_tr(self): """time-reversible likelihood initialised from nested, non-scoped, time-reversible""" mprobs = {b: p for b, p in zip(DNA, [0.1, 0.2, 0.3, 0.4])} diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_models.py python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_models.py --- python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_models.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_models.py 2021-10-12 00:17:34.000000000 +0000 @@ -21,10 +21,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -100,6 +100,15 @@ # unknown model raises exception _ = get_model("blah") + def test_model_names(self): + """name attribute matches model name""" + for model_name in models: + model = get_model(model_name) + self.assertTrue( + model.name.startswith(model_name), + msg=f"{model.name} does not start with {model_name}", + ) + def get_sample_model_types(mod_type=None): opts = dict( diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_motifchange.py python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_motifchange.py --- python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_motifchange.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_motifchange.py 2021-10-12 00:17:34.000000000 +0000 @@ -7,7 +7,7 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Peter Maxwell", "Gavin Huttley", @@ -16,7 +16,7 @@ "Brett Easton", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_newq.py python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_newq.py --- python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_newq.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_newq.py 2021-10-12 00:17:34.000000000 +0000 @@ -31,10 +31,10 @@ __author__ = "Peter Maxwell and Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_ns_substitution_model.py python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_ns_substitution_model.py --- python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_ns_substitution_model.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_ns_substitution_model.py 2021-10-12 00:17:34.000000000 +0000 @@ -7,7 +7,7 @@ from numpy import array, dot, empty, ones from numpy.testing import assert_allclose -from cogent3 import DNA, make_aligned_seqs, make_tree +from cogent3 import DNA, get_model, make_aligned_seqs, make_tree from cogent3.evolve.ns_substitution_model import ( DiscreteSubstitutionModel, General, @@ -28,10 +28,10 @@ __author__ = "Peter Maxwell and Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley", "Ananias Iliadis"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -140,14 +140,14 @@ self.results["discrete"] = dis_lf def __call__(self, obj_name, **kwargs): - funcs = dict( - general=self.fit_general, - gen_stat=self.fit_gen_stat, - discrete=self.fit_discrete, - constructed_gen=self.fit_constructed_gen, - ) - if obj_name not in self.results: + funcs = dict( + general=self.fit_general, + gen_stat=self.fit_gen_stat, + discrete=self.fit_discrete, + constructed_gen=self.fit_constructed_gen, + ) + funcs[obj_name](results=self.results, **kwargs) return self.results[obj_name] @@ -184,7 +184,6 @@ def test_paralinear_consistent_discrete_continuous(self): """paralinear masure should be consistent between the two classes""" gen_lf = self.make_cached("general", max_evaluations=2) - gen_lnL = gen_lf.get_log_likelihood() dis_lf = self._setup_discrete_from_general(gen_lf) ct_para = gen_lf.get_paralinear_metric() dt_para = dis_lf.get_paralinear_metric() @@ -209,6 +208,12 @@ gen_lnL = gen_lf.get_log_likelihood() self.assertLess(gen_stat_lnL, gen_lnL) + def test_general_stationary_param_list(self): + """general stationary returns parameter list""" + gs = GeneralStationary(DNA.alphabet) + params = gs.get_param_list() + self.assertTrue(params != []) + def test_general_stationary_is_stationary(self): """should be stationary""" gen_stat_lf = self.make_cached("gen_stat") diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_parameter_controller.py python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_parameter_controller.py --- python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_parameter_controller.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_parameter_controller.py 2021-10-12 00:17:34.000000000 +0000 @@ -10,10 +10,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Matthew Wakefield"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_scale_rules.py python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_scale_rules.py --- python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_scale_rules.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_scale_rules.py 2021-10-12 00:17:34.000000000 +0000 @@ -12,10 +12,10 @@ __author__ = "Peter Maxwell and Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -157,8 +157,8 @@ dN = length * a["dN"] / (3.0 * b["dN"]) dS = length * a["dS"] / (3.0 * b["dS"]) # following are results from PAML - self.assertEqual("%.4f" % dN, "0.0325") - self.assertEqual("%.4f" % dS, "0.0514") + self.assertEqual(f"{dN:.4f}", "0.0325") + self.assertEqual(f"{dS:.4f}", "0.0514") if __name__ == "__main__": diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_simulation.py python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_simulation.py --- python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_simulation.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_simulation.py 2021-10-12 00:17:34.000000000 +0000 @@ -10,10 +10,10 @@ __author__ = "Peter Maxwell and Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_substitution_model.py python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_substitution_model.py --- python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_substitution_model.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_substitution_model.py 2021-10-12 00:17:34.000000000 +0000 @@ -10,10 +10,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_format/__init__.py python-cogent-2021.10.12a1+dfsg/tests/test_format/__init__.py --- python-cogent-2020.12.21a+dfsg/tests/test_format/__init__.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_format/__init__.py 2021-10-12 00:17:34.000000000 +0000 @@ -2,7 +2,7 @@ __all__ = ["test_bedgraph", "test_clustal", "test_fasta"] __author__ = "" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Rob Knight", "Gavin Huttley", @@ -11,7 +11,7 @@ "Jeremy Widmann", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_format/test_bedgraph.py python-cogent-2021.10.12a1+dfsg/tests/test_format/test_bedgraph.py --- python-cogent-2020.12.21a+dfsg/tests/test_format/test_bedgraph.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_format/test_bedgraph.py 2021-10-12 00:17:34.000000000 +0000 @@ -7,10 +7,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_format/test_clustal.py python-cogent-2021.10.12a1+dfsg/tests/test_format/test_clustal.py --- python-cogent-2020.12.21a+dfsg/tests/test_format/test_clustal.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_format/test_clustal.py 2021-10-12 00:17:34.000000000 +0000 @@ -10,10 +10,10 @@ __author__ = "Jeremy Widmann" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Jeremy Widmann"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Jeremy Widmann" __email__ = "jeremy.widmann@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_format/test_fasta.py python-cogent-2021.10.12a1+dfsg/tests/test_format/test_fasta.py --- python-cogent-2020.12.21a+dfsg/tests/test_format/test_fasta.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_format/test_fasta.py 2021-10-12 00:17:34.000000000 +0000 @@ -10,10 +10,10 @@ __author__ = "Jeremy Widmann" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Jeremy Widmann", "Gavin Huttley", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Jeremy Widmann" __email__ = "jeremy.widmann@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_maths/__init__.py python-cogent-2021.10.12a1+dfsg/tests/test_maths/__init__.py --- python-cogent-2020.12.21a+dfsg/tests/test_maths/__init__.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_maths/__init__.py 2021-10-12 00:17:34.000000000 +0000 @@ -9,7 +9,7 @@ ] __author__ = "" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Rob Knight", "Peter Maxwell", @@ -20,7 +20,7 @@ "Antonio Gonzalez Pena", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_maths/test_distance_transform.py python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_distance_transform.py --- python-cogent-2020.12.21a+dfsg/tests/test_maths/test_distance_transform.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_distance_transform.py 2021-10-12 00:17:34.000000000 +0000 @@ -44,10 +44,10 @@ __author__ = "Justin Kuczynski" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __contributors__ = ["Justin Kuczynski", "Zongzhi Liu", "Greg Caporaso"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Justin Kuczynski" __email__ = "justinak@gmail.com" __status__ = "Prototype" @@ -292,7 +292,7 @@ # now binary fns def test_binary_dist_otu_gain(self): - """ binary OTU gain functions as expected """ + """binary OTU gain functions as expected""" actual = binary_dist_otu_gain(self.input_binary_dist_otu_gain1) expected = array([[0, 1, 2, 2], [1, 0, 2, 1], [1, 1, 0, 1], [1, 0, 1, 0]]) assert_equal(actual, expected) @@ -518,12 +518,12 @@ assert_allclose(dist, exp) def test_dist_bray_curtis_magurran1(self): - """ zero values should return zero dist, or 1 with nonzero samples""" + """zero values should return zero dist, or 1 with nonzero samples""" res = dist_bray_curtis_magurran(numpy.array([[0, 0, 0], [0, 0, 0], [1, 1, 1]])) assert_allclose(res, numpy.array([[0, 0, 1], [0, 0, 1], [1, 1, 0]])) def test_dist_bray_curtis_magurran2(self): - """ should match hand-calculated values""" + """should match hand-calculated values""" res = dist_bray_curtis_magurran(numpy.array([[1, 4, 3], [1, 3, 5], [0, 2, 0]])) assert_allclose( res, diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_maths/test_geometry.py python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_geometry.py --- python-cogent-2020.12.21a+dfsg/tests/test_maths/test_geometry.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_geometry.py 2021-10-12 00:17:34.000000000 +0000 @@ -35,10 +35,10 @@ __author__ = "Sandra Smit" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Sandra Smit", "Rob Knight", "Helmut Simon"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Sandra Smit" __email__ = "sandra.smit@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_maths/test_matrix_exponential_integration.py python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_matrix_exponential_integration.py --- python-cogent-2020.12.21a+dfsg/tests/test_maths/test_matrix_exponential_integration.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_matrix_exponential_integration.py 2021-10-12 00:17:34.000000000 +0000 @@ -13,7 +13,7 @@ __copyright__ = "Copyright 2007-2014, The Cogent Project" __credits__ = ["Ben Kaehler", "Ananias Iliadis", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Ben Kaehler" __email__ = "benjamin.kaehler@anu.edu.au" __status__ = "Production" @@ -22,14 +22,8 @@ class TestIntegratingExponentiator(TestCase): - def test_van_loan_integrating_exponentiator(self): - """VanLoanIntegratingExponentiator should reproduce Felsenstein - analytic result, should throw if we pass it a defected matrix and ask - it to use CheckedExponentiator, will work with a defective matrix (that - we can integrate by hand) if we use the default RobustExponentiator, - and should work for different choices of R and exponentiatior.""" - # Result from Von Bing's R code - result = 0.7295333 + def setUp(self) -> None: + self.result = 0.7295333 q = array([[0.5, 0.2, 0.1, 0.2]] * 4) for i in range(4): q[i, i] = 0.0 @@ -38,16 +32,24 @@ i, ] ) - p0 = array([0.2, 0.3, 0.3, 0.2]) + self.q = q + self.p0 = array([0.2, 0.3, 0.3, 0.2]) - I = expm.VanLoanIntegratingExponentiator(q, -diag(q))(1.0) - assert_allclose(dot(p0, I), result) + def test_van_loan_integrating_exponentiator(self): + """VanLoanIntegratingExponentiator should reproduce Felsenstein + analytic result, should throw if we pass it a defected matrix and ask + it to use CheckedExponentiator, will work with a defective matrix (that + we can integrate by hand) if we use the default RobustExponentiator, + and should work for different choices of R and exponentiatior.""" + # Result from Von Bing's R code + I = expm.VanLoanIntegratingExponentiator(self.q, -diag(self.q))(1.0) + assert_allclose(dot(self.p0, I), self.result) self.assertRaises( ArithmeticError, expm.VanLoanIntegratingExponentiator, - q, - -diag(q), + self.q, + -diag(self.q), cmme.CheckedExponentiator, ) @@ -73,19 +75,8 @@ should match results obtained from VanLoanIntegratingExponentiator for a diagonisable matrix.""" # Result from Von Bing's R code. - result = 0.7295333 - q = array([[0.5, 0.2, 0.1, 0.2]] * 4) - for i in range(4): - q[i, i] = 0.0 - q[i, i] = -sum( - q[ - i, - ] - ) - p0 = array([0.2, 0.3, 0.3, 0.2]) - - I = expm.VonBingIntegratingExponentiator(q)(1.0) - assert_allclose(dot(dot(p0, I), -diag(q)), result) + I = expm.VonBingIntegratingExponentiator(self.q)(1.0) + assert_allclose(dot(dot(self.p0, I), -diag(self.q)), self.result) self.assertRaises( ArithmeticError, @@ -115,6 +106,16 @@ )(2.0), ) + def test_repr(self): + """repr() works for the integrating exponentiators""" + for klass in ( + expm.VanLoanIntegratingExponentiator, + expm.VonBingIntegratingExponentiator, + ): + i = klass(self.q) + g = repr(i) + self.assertIsInstance(g, str) + def test_calc_number_subs(self): """correctly compute ENS""" mprobs = diag([0.1, 0.2, 0.3, 0.4]) diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_maths/test_matrix_logarithm.py python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_matrix_logarithm.py --- python-cogent-2020.12.21a+dfsg/tests/test_maths/test_matrix_logarithm.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_matrix_logarithm.py 2021-10-12 00:17:34.000000000 +0000 @@ -12,10 +12,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight", "Gavin Huttley", "Ben Kaehler"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_maths/test_measure.py python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_measure.py --- python-cogent-2020.12.21a+dfsg/tests/test_maths/test_measure.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_measure.py 2021-10-12 00:17:34.000000000 +0000 @@ -15,10 +15,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley", "Stephen Ka-Wah Ma"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -214,6 +214,30 @@ atol=self.atol, ) + assert_allclose( + jsd(case4[0], case4[0], validate=True), + 0.0, + err_msg="Testing case4 for jsd failed", + atol=self.atol, + ) + + def test_jsd_precision(self): + """handle case where the difference is incredibly small""" + pi_0 = [ + 0.4398948756903677, + 0.1623791467423164, + 0.31844113569205656, + 0.07928484187525932, + ] + pi_1 = [ + 0.43989487569036767, + 0.16237914674231643, + 0.3184411356920566, + 0.07928484187525933, + ] + result = jsd(pi_0, pi_1) + self.assertTrue(result >= 0) + def test_jsm(self): """evaluate jsm between identical, and non-identical distributions""" case1 = [ diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_maths/test_optimisers.py python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_optimisers.py --- python-cogent-2020.12.21a+dfsg/tests/test_maths/test_optimisers.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_optimisers.py 2021-10-12 00:17:34.000000000 +0000 @@ -13,10 +13,10 @@ __author__ = "Peter Maxwell and Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_maths/test_period.py python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_period.py --- python-cogent-2020.12.21a+dfsg/tests/test_maths/test_period.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_period.py 2021-10-12 00:17:34.000000000 +0000 @@ -12,10 +12,10 @@ __author__ = "Hua Ying, Julien Epps and Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Julien Epps", "Hua Ying", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/__init__.py python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/__init__.py --- python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/__init__.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/__init__.py 2021-10-12 00:17:34.000000000 +0000 @@ -2,10 +2,10 @@ __all__ = ["test_distribution", "test_special", "test_ks", "test_test"] __author__ = "" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight", "Catherine Lozupone", "Gavin Huttley", "Sandra Smit"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_contingency.py python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_contingency.py --- python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_contingency.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_contingency.py 2021-10-12 00:17:34.000000000 +0000 @@ -9,10 +9,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -98,8 +98,7 @@ got = table.G_fit() assert_allclose(got.G, 9.849234) table.expected = None - got = table.G_fit() - print(got) + _ = table.G_fit() def test_zero_observeds(self): """raises ValueError""" @@ -157,8 +156,8 @@ exp = [5.2] * 5 keys = ["Marl", "Chalk", "Sandstone", "Clay", "Limestone"] table = CategoryCounts(dict(zip(keys, obs)), expected=dict(zip(keys, exp))) - got = table._get_repr_() - got = table._get_repr_(html=True) + _ = table._get_repr_() + _ = table._get_repr_(html=True) def test_accessing_elements(self): """successfully access elements""" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_distribution.py python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_distribution.py --- python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_distribution.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_distribution.py 2021-10-12 00:17:34.000000000 +0000 @@ -45,10 +45,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley", "Rob Knight", "Sandra Smit"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_information_criteria.py python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_information_criteria.py --- python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_information_criteria.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_information_criteria.py 2021-10-12 00:17:34.000000000 +0000 @@ -5,10 +5,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_jackknife.py python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_jackknife.py --- python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_jackknife.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_jackknife.py 2021-10-12 00:17:34.000000000 +0000 @@ -6,10 +6,10 @@ __author__ = "Anuj Pahwa, Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Anuj Pahwa", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_ks.py python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_ks.py --- python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_ks.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_ks.py 2021-10-12 00:17:34.000000000 +0000 @@ -11,10 +11,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_number.py python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_number.py --- python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_number.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_number.py 2021-10-12 00:17:34.000000000 +0000 @@ -9,10 +9,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -125,6 +125,33 @@ self.assertEqual(nums.mode, 4) self.assertEqual(len(nums), 6) + def test_keys_values_items(self): + """return a list of these elements""" + data = [0, 0, 2, 4, 4, 4] + nums = number.CategoryCounter(data) + self.assertEqual(nums.keys(), [0, 2, 4]) + self.assertEqual(nums.values(), [2, 1, 3]) + self.assertEqual(nums.items(), [(0, 2), (2, 1), (4, 3)]) + + freqs = nums.to_freqs() + self.assertEqual(freqs.keys(), [0, 2, 4]) + assert_allclose(freqs.values(), [0.3333333333333333, 0.16666666666666666, 0.5]) + self.assertEqual(len(freqs.items()), 3) + self.assertEqual(freqs.items()[-1], (4, 0.5)) + + def test_repr(self): + """should precede with class name""" + data = [0, 0, 2, 4, 4, 4] + nums = number.CategoryCounter(data) + got = repr(nums) + self.assertTrue(got.startswith(nums.__class__.__name__)) + freqs = nums.to_freqs() + got = repr(freqs) + self.assertTrue(got.startswith(freqs.__class__.__name__)) + nums = number.NumberCounter(data) + got = repr(nums) + self.assertTrue(got.startswith(nums.__class__.__name__)) + def test_category_counter_stats(self): """stats from CategoryCounter correct""" data = "TCTTTAGAGAACAGTTTATTATACACTAAA" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_period.py python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_period.py --- python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_period.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_period.py 2021-10-12 00:17:34.000000000 +0000 @@ -15,10 +15,10 @@ __author__ = "Hua Ying, Julien Epps and Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Julien Epps", "Hua Ying", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -186,8 +186,8 @@ def test_chi_square(self): D, cs_p_val = chi_square(self.x, 10) - self.assertEqual("%.4f" % D, "0.4786") - self.assertEqual("%.4f" % cs_p_val, "0.4891") + self.assertEqual(f"{D:.4f}", "0.4786") + self.assertEqual(f"{cs_p_val:.4f}", "0.4891") def test_factorial(self): self.assertEqual(factorial(1), 1) diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_special.py python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_special.py --- python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_special.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_special.py 2021-10-12 00:17:34.000000000 +0000 @@ -23,10 +23,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley", "Rob Knight", "Sandra Smit"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_test.py python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_test.py --- python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_test.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_test.py 2021-10-12 00:17:34.000000000 +0000 @@ -82,7 +82,7 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Rob Knight", "Catherine Lozupone", @@ -93,7 +93,7 @@ "Michael Dwan", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -2117,7 +2117,7 @@ ) def test_distance_matrix_permutation_test_return_scores(self): - """ return_scores=True functions as expected """ + """return_scores=True functions as expected""" # use alt statistical test to make results simple def fake_stat_test(a, b, tails=None): return 42.0, 42.0 diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_maths/test_util.py python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_util.py --- python-cogent-2020.12.21a+dfsg/tests/test_maths/test_util.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_util.py 2021-10-12 00:17:34.000000000 +0000 @@ -38,10 +38,10 @@ Float = numpy.core.numerictypes.sctype2char(float) __author__ = "Rob Knight and Jeremy Widmann" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Jeremy Widmann", "Rob Knight", "Sandra Smit"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -162,5 +162,36 @@ assert_equal(column_degeneracy(array([[]])), []) +class TestUtils(TestCase): + def test_proportions_and_ratios(self): + """interconverts proportions and ratios""" + from cogent3.maths.util import ( + proportions_to_ratios, + ratios_to_proportions, + ) + + probs = array([0.3, 0.1, 0.1, 0.5]) + ratios = proportions_to_ratios(probs) + assert_allclose(ratios, [0.6 / 0.4, 0.1 / 0.3, 0.5 / 0.1]) + + probs = array([0.3, 0.1, 0.6]) + ratios = proportions_to_ratios(probs) + assert_allclose(ratios, [0.7 / 0.3, 0.6 / 0.1]) + + got = ratios_to_proportions(1, ratios) + assert_allclose(got, probs) + + probs = array([0.3, 0.1, -0.1, 0.5]) + with self.assertRaises(AssertionError): + proportions_to_ratios(probs) + + probs = array([0.3, 0.1, 0.0, 0.5]) + with self.assertRaises(AssertionError): + proportions_to_ratios(probs) + + with self.assertRaises(AssertionError): + ratios_to_proportions(1.0, [2.3, 1.1, -0.3]) + + if __name__ == "__main__": main() diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_parse/__init__.py python-cogent-2021.10.12a1+dfsg/tests/test_parse/__init__.py --- python-cogent-2020.12.21a+dfsg/tests/test_parse/__init__.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_parse/__init__.py 2021-10-12 00:17:34.000000000 +0000 @@ -19,7 +19,7 @@ ] __author__ = "" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Jeremy Widmann", "Catherine Lozuopone", @@ -36,7 +36,7 @@ "Marcin Cieslik", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_parse/test_blast.py python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_blast.py --- python-cogent-2020.12.21a+dfsg/tests/test_parse/test_blast.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_blast.py 2021-10-12 00:17:34.000000000 +0000 @@ -25,7 +25,7 @@ __copyright__ = "Copyright 2007-2016, The Cogent Project" __credits__ = ["Micah Hamady", "Rob Knight"] __license__ = "GPL" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Micah Hamady" __email__ = "hamady@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_parse/test_blast_xml.py python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_blast_xml.py --- python-cogent-2020.12.21a+dfsg/tests/test_parse/test_blast_xml.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_blast_xml.py 2021-10-12 00:17:34.000000000 +0000 @@ -4,11 +4,11 @@ # __author__ = "Kristian Rother" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __contributors__ = ["Micah Hamady"] __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Kristian Rother" __email__ = "krother@rubor.de" __status__ = "Prototype" @@ -230,7 +230,7 @@ self.assertEqual(best_hit["ALIGNMENT_LENGTH"], "14") def test_best_hits_unique(self): - """The result should never contain identical hits """ + """The result should never contain identical hits""" records = [h for _, h in self.result.best_hits_by_query(n=5)][0] self.assertEqual(len(records), 3) values = {tuple(h.values()) for h in records} diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_parse/test_cigar.py python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_cigar.py --- python-cogent-2020.12.21a+dfsg/tests/test_parse/test_cigar.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_cigar.py 2021-10-12 00:17:34.000000000 +0000 @@ -14,10 +14,10 @@ __author__ = "Hua Ying" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Hua Ying", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Hua Ying" __email__ = "hua.ying@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_parse/test_clustal.py python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_clustal.py --- python-cogent-2020.12.21a+dfsg/tests/test_parse/test_clustal.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_clustal.py 2021-10-12 00:17:34.000000000 +0000 @@ -15,10 +15,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight", "Sandra Smit"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_parse/test_dialign.py python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_dialign.py --- python-cogent-2020.12.21a+dfsg/tests/test_parse/test_dialign.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_dialign.py 2021-10-12 00:17:34.000000000 +0000 @@ -7,10 +7,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_parse/test_ebi.py python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_ebi.py --- python-cogent-2020.12.21a+dfsg/tests/test_parse/test_ebi.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_ebi.py 2021-10-12 00:17:34.000000000 +0000 @@ -63,10 +63,10 @@ __author__ = "Zongzhi Liu" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Zongzhi Liu", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Zongzhi Liu" __email__ = "zongzhi.liu@gmail.com" __status__ = "Development" @@ -79,10 +79,10 @@ class EbiTests(TestCase): - """ Tests ebi parsers and generic parsers and general functions """ + """Tests ebi parsers and generic parsers and general functions""" def setUp(self): - """ Construct some fake data for testing purposes """ + """Construct some fake data for testing purposes""" pass def test_item_empty_filter(self): @@ -270,7 +270,7 @@ self.assertEqual(mapping_parser("", fields), {}) def test_linecode_merging_maker(self): - """linecode_merging_maker: """ + """linecode_merging_maker:""" f = linecode_merging_maker lines = ["ID id.", "RN rn.", "RR invalid", "RN rn."] labels = ["ID", "REF", "RR", "RN rn."] @@ -281,7 +281,7 @@ pass def test_MinimalEbiParser_valid(self): - """MinimalEbiParser: integrity of output """ + """MinimalEbiParser: integrity of output""" f = curry(MinimalEbiParser, strict=False) # test valid result: sequence, number of records, keys of a header @@ -337,7 +337,7 @@ self.assertRaises(RecordError, list, f(fake_records_valid, strict=True)) def test_EbiParser(self): - """EbiParser: """ + """EbiParser:""" f = curry(EbiParser, strict=False) first_valid = fake_records_valid[:-5] @@ -517,7 +517,7 @@ class FT_Tests(TestCase): - """Tests for FT parsers. """ + """Tests for FT parsers.""" def test_ft_basic_itemparser(self): """ft_basic_itemparser: known values""" @@ -625,7 +625,7 @@ class CC_Tests(TestCase): - """tests for cc_parsers. """ + """tests for cc_parsers.""" def test_cc_itemfinder_valid(self): """cc_itemfinder: yield each expected block.""" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_parse/test_fasta.py python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_fasta.py --- python-cogent-2020.12.21a+dfsg/tests/test_parse/test_fasta.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_fasta.py 2021-10-12 00:17:34.000000000 +0000 @@ -22,10 +22,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -454,7 +454,7 @@ got = group.to_dict() want = expected[count] self.assertEqual(got, want) - self.assertEqual(group.info.Group, "group%s" % (count + 1)) + self.assertEqual(group.info.Group, f"group{count + 1}") count += 1 # check we don't return a done group diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_parse/test_gbseq.py python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_gbseq.py --- python-cogent-2020.12.21a+dfsg/tests/test_parse/test_gbseq.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_gbseq.py 2021-10-12 00:17:34.000000000 +0000 @@ -7,10 +7,10 @@ __author__ = "Matthew Wakefield" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Matthew Wakefield"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Matthew Wakefield" __email__ = "wakefield@wehi.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_parse/test_genbank.py python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_genbank.py --- python-cogent-2020.12.21a+dfsg/tests/test_parse/test_genbank.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_genbank.py 2021-10-12 00:17:34.000000000 +0000 @@ -23,10 +23,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_parse/test_gff.py python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_gff.py --- python-cogent-2020.12.21a+dfsg/tests/test_parse/test_gff.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_gff.py 2021-10-12 00:17:34.000000000 +0000 @@ -11,10 +11,10 @@ __author__ = "Matthew Wakefield" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Matthew Wakefield"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Matthew Wakefield" __email__ = "wakefield@wehi.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_parse/test_greengenes.py python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_greengenes.py --- python-cogent-2020.12.21a+dfsg/tests/test_parse/test_greengenes.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_greengenes.py 2021-10-12 00:17:34.000000000 +0000 @@ -11,11 +11,11 @@ __author__ = "Daniel McDonald" -__copyright__ = "Copyright 2007-2020, The Cogent Project" # consider project name +__copyright__ = "Copyright 2007-2021, The Cogent Project" # consider project name # remember to add yourself if you make changes __credits__ = ["Daniel McDonald"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Daniel McDonald" __email__ = "daniel.mcdonald@colorado.edu" __status__ = "Prototype" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_parse/test_locuslink.py python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_locuslink.py --- python-cogent-2020.12.21a+dfsg/tests/test_parse/test_locuslink.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_locuslink.py 2021-10-12 00:17:34.000000000 +0000 @@ -27,10 +27,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_parse/test_ncbi_taxonomy.py python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_ncbi_taxonomy.py --- python-cogent-2020.12.21a+dfsg/tests/test_parse/test_ncbi_taxonomy.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_ncbi_taxonomy.py 2021-10-12 00:17:34.000000000 +0000 @@ -17,10 +17,10 @@ __author__ = "Jason Carnes" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Jason Carnes", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_parse/test_nexus.py python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_nexus.py --- python-cogent-2020.12.21a+dfsg/tests/test_parse/test_nexus.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_nexus.py 2021-10-12 00:17:34.000000000 +0000 @@ -19,10 +19,10 @@ __author__ = "Catherine Lozupone" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Catherine Lozupone", "Rob Knight", "Micah Hamady"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Catherine Lozupone" __email__ = "lozupone@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_parse/test_pamlmatrix.py python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_pamlmatrix.py --- python-cogent-2020.12.21a+dfsg/tests/test_parse/test_pamlmatrix.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_pamlmatrix.py 2021-10-12 00:17:34.000000000 +0000 @@ -7,10 +7,10 @@ __author__ = "Matthew Wakefield" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Matthew Wakefield"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Matthew Wakefield" __email__ = "wakefield@wehi.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_parse/test_phylip.py python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_phylip.py --- python-cogent-2020.12.21a+dfsg/tests/test_parse/test_phylip.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_phylip.py 2021-10-12 00:17:34.000000000 +0000 @@ -10,10 +10,10 @@ __author__ = "Micah Hamady" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Micah Hamady", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Micah Hamady" __email__ = "hamady@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_parse/test_psl.py python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_psl.py --- python-cogent-2020.12.21a+dfsg/tests/test_parse/test_psl.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_psl.py 2021-10-12 00:17:34.000000000 +0000 @@ -9,10 +9,10 @@ __author__ = "Gavin Huttley, Anuj Pahwa" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight", "Peter Maxwell", "Gavin Huttley", "Anuj Pahwa"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Development" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_parse/test_pwm_parsers.py python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_pwm_parsers.py --- python-cogent-2020.12.21a+dfsg/tests/test_parse/test_pwm_parsers.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_pwm_parsers.py 2021-10-12 00:17:34.000000000 +0000 @@ -11,7 +11,7 @@ __copyright__ = "Copyright 2007-2012, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_parse/test_rdb.py python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_rdb.py --- python-cogent-2020.12.21a+dfsg/tests/test_parse/test_rdb.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_rdb.py 2021-10-12 00:17:34.000000000 +0000 @@ -17,10 +17,10 @@ __author__ = "Sandra Smit" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Sandra Smit", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Sandra Smit" __email__ = "sandra.smit@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_parse/test_record_finder.py python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_record_finder.py --- python-cogent-2020.12.21a+dfsg/tests/test_parse/test_record_finder.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_record_finder.py 2021-10-12 00:17:34.000000000 +0000 @@ -14,10 +14,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight", "Zongzhi Liu"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_parse/test_record.py python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_record.py --- python-cogent-2020.12.21a+dfsg/tests/test_parse/test_record.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_record.py 2021-10-12 00:17:34.000000000 +0000 @@ -27,10 +27,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_parse/test_tinyseq.py python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_tinyseq.py --- python-cogent-2020.12.21a+dfsg/tests/test_parse/test_tinyseq.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_tinyseq.py 2021-10-12 00:17:34.000000000 +0000 @@ -8,10 +8,10 @@ __author__ = "Matthew Wakefield" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Matthew Wakefield"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Matthew Wakefield" __email__ = "wakefield@wehi.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_parse/test_tree.py python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_tree.py --- python-cogent-2020.12.21a+dfsg/tests/test_parse/test_tree.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_tree.py 2021-10-12 00:17:34.000000000 +0000 @@ -16,10 +16,10 @@ # return parse_string(data, constructor) __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight", "Peter Maxwell", "Daniel McDonald"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_parse/test_unigene.py python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_unigene.py --- python-cogent-2020.12.21a+dfsg/tests/test_parse/test_unigene.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_unigene.py 2021-10-12 00:17:34.000000000 +0000 @@ -15,10 +15,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_phylo.py python-cogent-2021.10.12a1+dfsg/tests/test_phylo.py --- python-cogent-2020.12.21a+dfsg/tests/test_phylo.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_phylo.py 2021-10-12 00:17:34.000000000 +0000 @@ -26,7 +26,7 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Peter Maxwell", "Gavin Huttley", @@ -35,7 +35,7 @@ "Ben Kaehler", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_recalculation.py python-cogent-2021.10.12a1+dfsg/tests/test_recalculation.py --- python-cogent-2020.12.21a+dfsg/tests/test_recalculation.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_recalculation.py 2021-10-12 00:17:34.000000000 +0000 @@ -8,10 +8,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_util/__init__.py python-cogent-2021.10.12a1+dfsg/tests/test_util/__init__.py --- python-cogent-2020.12.21a+dfsg/tests/test_util/__init__.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_util/__init__.py 2021-10-12 00:17:34.000000000 +0000 @@ -9,7 +9,7 @@ ] __author__ = "" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Jeremy Widmann", "Sandra Smit", @@ -20,7 +20,7 @@ "Greg Caporaso", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_util/test_deserialise.py python-cogent-2021.10.12a1+dfsg/tests/test_util/test_deserialise.py --- python-cogent-2020.12.21a+dfsg/tests/test_util/test_deserialise.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_util/test_deserialise.py 2021-10-12 00:17:34.000000000 +0000 @@ -25,10 +25,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_util/test_dictarray.py python-cogent-2021.10.12a1+dfsg/tests/test_util/test_dictarray.py --- python-cogent-2020.12.21a+dfsg/tests/test_util/test_dictarray.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_util/test_dictarray.py 2021-10-12 00:17:34.000000000 +0000 @@ -1,3 +1,4 @@ +import json import os from tempfile import TemporaryDirectory @@ -21,10 +22,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -231,6 +232,48 @@ c = DictArrayTemplate("de", "DE").wrap([[b, b], [b, b]]) self.assertTrue(isinstance(c.to_dict()["d"], dict)) + def test_to_dict_values(self): + """values from to_dict should be python types""" + keys = "a", "b", "c", "d" + for data, _type in [ + ([0, 35, 45, 3], int), + (["abc", "def", "jkl;", "aa"], str), + ([0.1, 0.2, 0.3, 0.4], float), + ]: + darr = DictArrayTemplate(keys).wrap(data) + got = {type(v) for v in darr.to_dict().values()} + self.assertEqual(got, {_type}) + + for data, _type in [ + ([0, 35, 45, 3], int), + (["abc", "def", "jkl;", "aa"], str), + ([0.1, 0.2, 0.3, 0.4], float), + ]: + darr = DictArrayTemplate(keys[:2], keys[2:]).wrap([data[:2], data[2:]]) + got = {type(v) for d in darr.to_dict().values() for v in d.values()} + self.assertEqual(got, {_type}) + + def test_to_dict_json(self): + """should be able to json.dumps result of to_dict""" + keys = "a", "b", "c", "d" + for data in [ + [0, 35, 45, 3], + ["abc", "def", "jkl;", "aa"], + [0.1, 0.2, 0.3, 0.4], + ]: + darr = DictArrayTemplate(keys).wrap(data) + got = json.dumps(darr.to_dict()) + self.assertIsInstance(got, str) + + for data in [ + [0, 35, 45, 3], + ["abc", "def", "jkl;", "aa"], + [0.1, 0.2, 0.3, 0.4], + ]: + darr = DictArrayTemplate(keys[:2], keys[2:]).wrap([data[:2], data[2:]]) + got = json.dumps(darr.to_dict()) + self.assertIsInstance(got, str) + def test_to_dict_roundtrip(self): """roundtrip of DictArray.to_dict() should produce same order.""" d1 = dict(a=dict(k=1, l=2, m=3), b=dict(k=4, l=5, m=6)) @@ -448,6 +491,22 @@ got = darr[[1, 2], [1, 2]] assert_allclose(got.array, numpy.array([[0.7, 0.1], [0.2, 0.6]])) + def test_add(self): + """can add compatible dict arrays""" + data = numpy.array([[7, 1], [1, 7]]) + darr1 = DictArrayTemplate(list("AB"), list("CD")).wrap(data) + darr2 = DictArrayTemplate(list("AB"), list("CD")).wrap(data) + darr3 = darr1 + darr2 + assert_allclose(darr3.array, 2 * data) + self.assertEqual(darr3.template.names, darr1.template.names) + # must be correct type + with self.assertRaises(TypeError): + darr1 + data + + # must be equal dimensions + with self.assertRaises(ValueError): + darr1 + DictArrayTemplate(list("CD"), list("AB")).wrap(data) + if __name__ == "__main__": main() diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_util/test_misc.py python-cogent-2021.10.12a1+dfsg/tests/test_util/test_misc.py --- python-cogent-2020.12.21a+dfsg/tests/test_util/test_misc.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_util/test_misc.py 2021-10-12 00:17:34.000000000 +0000 @@ -2,6 +2,8 @@ """Unit tests for utility functions and classes. """ +import bz2 +import gzip import os import pathlib import tempfile @@ -27,6 +29,7 @@ MappedDict, MappedList, NestedSplitter, + _path_relative_to_zip_parent, add_lowercase, adjusted_gt_minprob, adjusted_within_bounds, @@ -48,6 +51,7 @@ list_flatten, not_list_tuple, open_, + open_zip, path_exists, recursive_flatten, remove_files, @@ -55,7 +59,7 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = [ "Rob Knight", "Amanda Birmingham", @@ -65,7 +69,7 @@ "Daniel McDonald", ] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -178,7 +182,7 @@ self.assertEqual(not_list_tuple("34"), True) def test_list_flatten(self): - """list_flatten should remove all nesting, str is untouched """ + """list_flatten should remove all nesting, str is untouched""" self.assertEqual( list_flatten(["aa", [8, "cc", "dd"], ["ee", ["ff", "gg"]]]), ["aa", 8, "cc", "dd", "ee", "ff", "gg"], @@ -281,7 +285,7 @@ self.assertRaises(KeyError, d, "b", 3) def test_remove_files(self): - """Remove files functions as expected """ + """Remove files functions as expected""" # create list of temp file paths test_filepaths = [ tempfile.NamedTemporaryFile(prefix="remove_files_test").name @@ -648,8 +652,26 @@ raise AssertionError self.assertFalse(test_filepath.exists()) + def test_writes_compressed_formats(self): + """correctly writes / reads different compression formats""" + fpath = pathlib.Path("data/sample.tsv") + with open(fpath) as infile: + expect = infile.read() + + with tempfile.TemporaryDirectory(".") as dirname: + dirname = pathlib.Path(dirname) + for suffix in ["gz", "bz2", "zip"]: + outpath = dirname / f"{fpath.name}.{suffix}" + with atomic_write(outpath, mode="wt") as f: + f.write(expect) + + with open_(outpath) as infile: + got = infile.read() + + self.assertEqual(got, expect, msg=f"write failed for {suffix}") + def test_rename(self): - """Renames file as expected """ + """Renames file as expected""" # create temp file directory with tempfile.TemporaryDirectory(".") as dirname: # create temp filepath @@ -674,6 +696,38 @@ got = ifile.read() self.assertEqual(got, "some data") + def test_open_handles_bom(self): + """handle files with a byte order mark""" + with TemporaryDirectory(dir=".") as dirname: + # create the different file types + dirname = pathlib.Path(dirname) + + text = "some text" + + # plain text + textfile = dirname / "sample.txt" + textfile.write_text(text, encoding="utf-8-sig") + + # gzipped + gzip_file = dirname / "sample.txt.gz" + with gzip.open(gzip_file, "wt", encoding="utf-8-sig") as outfile: + outfile.write(text) + + # bzipped + bzip_file = dirname / "sample.txt.bz2" + with bz2.open(bzip_file, "wt", encoding="utf-8-sig") as outfile: + outfile.write(text) + + # zipped + zip_file = dirname / "sample.zip" + with zipfile.ZipFile(zip_file, "w") as outfile: + outfile.write(textfile, "sample.txt") + + for path in (bzip_file, gzip_file, textfile, zip_file): + with open_(path) as infile: + got = infile.read() + self.assertEqual(got, text, msg=f"failed reading {path}") + def test_aw_zip_from_path(self): """supports inferring zip archive name from path""" with TemporaryDirectory(dir=".") as dirname: @@ -698,15 +752,22 @@ def test_expanduser(self): """expands user correctly""" # create temp file directory - home = os.environ["HOME"] + home = pathlib.Path("~").expanduser() with tempfile.TemporaryDirectory(dir=home) as dirname: # create temp filepath dirname = pathlib.Path(dirname) test_filepath = dirname / "Atomic_write_test" - test_filepath = str(test_filepath).replace(home, "~") + test_filepath = str(test_filepath).replace(str(home), "~") with atomic_write(test_filepath, mode="w") as f: f.write("abc") + def test_path_relative_to_zip_parent(self): + """correctly generates member paths for a zip archive""" + zip_path = pathlib.Path("some/path/to/a/data.zip") + for member in ("data/member.txt", "member.txt", "a/b/c/member.txt"): + got = _path_relative_to_zip_parent(zip_path, pathlib.Path(member)) + self.assertEqual(got.parts[0], "data") + class _my_dict(dict): """Used for testing subclass behavior of ClassChecker""" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_util/test_parallel.py python-cogent-2021.10.12a1+dfsg/tests/test_util/test_parallel.py --- python-cogent-2020.12.21a+dfsg/tests/test_util/test_parallel.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_util/test_parallel.py 2021-10-12 00:17:34.000000000 +0000 @@ -10,10 +10,10 @@ __author__ = "Sheng Han Moses Koh" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley", "Sheng Han Moses Koh"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_util/test_recode_alignment.py python-cogent-2021.10.12a1+dfsg/tests/test_util/test_recode_alignment.py --- python-cogent-2020.12.21a+dfsg/tests/test_util/test_recode_alignment.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_util/test_recode_alignment.py 2021-10-12 00:17:34.000000000 +0000 @@ -16,10 +16,10 @@ __author__ = "Greg Caporaso" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Greg Caporaso"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Greg Caporaso" __email__ = "gregcaporaso@gmail.com" __status__ = "Beta" @@ -33,7 +33,7 @@ """ def setUp(self): - """ Initialize some variables for the tests """ + """Initialize some variables for the tests""" self.canonical_abbrevs = "ACDEFGHIKLMNPQRSTVWY" self.ambiguous_abbrevs = "BXZ" @@ -47,7 +47,7 @@ ) def test_build_alphabet_map_handles_bad_data(self): - """build_alphabet_map: bad data raises error """ + """build_alphabet_map: bad data raises error""" self.assertRaises(ValueError, build_alphabet_map) self.assertRaises(ValueError, build_alphabet_map, "not_a_valid_id") self.assertRaises( @@ -130,11 +130,11 @@ try: build_alphabet_map(alphabet_id=alphabet_id) except ValueError: - raise AssertionError("Failed on id: %s" % alphabet_id) + raise AssertionError(f"Failed on id: {alphabet_id}") try: build_alphabet_map(alphabet_def=alphabet_def) except ValueError: - raise AssertionError("Failed on def: %s" % str(alphabet_def)) + raise AssertionError(f"Failed on def: {str(alphabet_def)}") def test_recode_dense_alignment_handles_all_ids_and_defs_wo_error(self): """recode_dense_alignment: handles pre-defined alphabets w/o error""" @@ -142,11 +142,11 @@ try: recode_dense_alignment(self.aln, alphabet_id=alphabet_id) except ValueError: - raise AssertionError("Failed on id: %s" % alphabet_id) + raise AssertionError(f"Failed on id: {alphabet_id}") try: recode_dense_alignment(self.aln, alphabet_def=alphabet_def) except ValueError: - raise AssertionError("Failed on def: %s" % str(alphabet_def)) + raise AssertionError(f"Failed on def: {str(alphabet_def)}") def test_recode_dense_alignment_leaves_original_alignment_intact(self): """recode_dense_alignment: leaves input alignment intact""" @@ -249,10 +249,10 @@ class RecodeMatrixTests(TestCase): - """ Tests of substitution matrix recoding. """ + """Tests of substitution matrix recoding.""" def setUp(self): - """ Create variables for use in the tests """ + """Create variables for use in the tests""" self.m1 = [ [0, 4, 1, 3, 5], [4, 0, 2, 4, 6], diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_util/test_table.py python-cogent-2021.10.12a1+dfsg/tests/test_util/test_table.py --- python-cogent-2020.12.21a+dfsg/tests/test_util/test_table.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_util/test_table.py 2021-10-12 00:17:34.000000000 +0000 @@ -41,10 +41,10 @@ TEST_ROOT = pathlib.Path(__file__).parent.parent __author__ = "Thomas La" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley", "Thomas La", "Christopher Bradley"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -476,7 +476,7 @@ # handle a formatter function def formatcol(value): if isinstance(value, float): - val = "%.2f" % value + val = f"{value:.2f}" else: val = str(value) return val @@ -1376,6 +1376,26 @@ [v["values"] for v in data["columns"].values()], ) + def test_write_compressed(self): + """tests writing to compressed format""" + t = load_table("data/sample.tsv") + with open("data/sample.tsv") as infile: + expect = infile.read() + + with TemporaryDirectory(".") as dirname: + path = pathlib.Path(dirname) / "table.txt" + # using the compressed option + t.write(path, sep="\t", compress=True) + with open_(f"{path}.gz") as infile: + got = infile.read() + self.assertEqual(got, expect) + + # specifying via a suffix + t.write(f"{path}.gz", sep="\t") + with open_(f"{path}.gz") as infile: + got = infile.read() + self.assertEqual(got, expect) + def test_load_table_from_json(self): """tests loading a Table object from json file""" with TemporaryDirectory(dir=".") as dirname: @@ -1393,6 +1413,18 @@ with self.assertRaises(TypeError): load_table({"a": [0, 1]}) + def test_make_table_white_space_in_column(self): + """strips white space from column headers""" + # matching header and data keys + t = make_table(header=[" a"], data={" a": [0, 2]}, sep="\t") + self.assertEqual(t.columns["a"].tolist(), [0, 2]) + self.assertIsInstance(t.to_string(), str) + + # data key has a space + t = make_table(data={" a": [0, 2]}, sep="\t") + self.assertEqual(t.columns["a"].tolist(), [0, 2]) + self.assertIsInstance(t.to_string(), str) + def test_load_table_filename_case(self): """load_table insensitive to file name case""" with TemporaryDirectory(".") as dirname: @@ -1895,10 +1927,10 @@ data = {"Ts": [31, 58], "Tv": [36, 138], "": ["syn", "nsyn"]} table = make_table(header=["", "Ts", "Tv"], data=data) with self.assertRaises(ValueError): + # did not set an index_name table.to_categorical(columns=["Ts", "Tv"]) - table.index_name = "" - got = table.to_categorical(columns=["Ts", "Tv"]) + got = table.to_categorical(columns=["Ts", "Tv"], index_name="") assert_equal(got.observed, table[:, 1:].array) got = table.to_categorical(["Ts"]) diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_util/test_transform.py python-cogent-2021.10.12a1+dfsg/tests/test_util/test_transform.py --- python-cogent-2020.12.21a+dfsg/tests/test_util/test_transform.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_util/test_transform.py 2021-10-12 00:17:34.000000000 +0000 @@ -13,10 +13,10 @@ __author__ = "Sandra Smit" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Rob Knight", "Sandra Smit", "Zongzhi Liu"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Sandra Smit" __email__ = "sandra.smit@colorado.edu" __status__ = "Production" @@ -200,7 +200,7 @@ self.assertEqual(f("aBcDeF"), "aBc") def test_first_index_in_set(self): - """first_index_in_set should return index of first occurrence """ + """first_index_in_set should return index of first occurrence""" vowels = "aeiou" s1 = "ebcua" s2 = "bcbae" diff -Nru python-cogent-2020.12.21a+dfsg/tests/test_util/test_union_dict.py python-cogent-2021.10.12a1+dfsg/tests/test_util/test_union_dict.py --- python-cogent-2020.12.21a+dfsg/tests/test_util/test_union_dict.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/test_util/test_union_dict.py 2021-10-12 00:17:34.000000000 +0000 @@ -8,10 +8,10 @@ __author__ = "Thomas La" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Gavin Huttley", "Thomas La"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -65,6 +65,18 @@ self.assertEqual(d.d.f, 0) self.assertEqual(d.d.g, 7) + def test_or(self): + """should not modify original""" + d = UnionDict({"a": 1, "b": 2, "c": 3, "d": {"e": 5, "f": 6}}) + e = UnionDict({"b": 0, "d": {"f": 0, "g": 7}}) + f = d | e + self.assertEqual(f.a, 1) + self.assertEqual(f.b, 0) + self.assertEqual(f.d.e, 5) + self.assertEqual(f.d.f, 0) + self.assertEqual(f.d.g, 7) + self.assertTrue(f.d is not e.d) + def test_union_value_dict(self): """replacing union or of a value with a dict should be dict""" d = UnionDict({"A": {"B": "Blah"}}) diff -Nru python-cogent-2020.12.21a+dfsg/tests/timetrial.py python-cogent-2021.10.12a1+dfsg/tests/timetrial.py --- python-cogent-2020.12.21a+dfsg/tests/timetrial.py 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tests/timetrial.py 2021-10-12 00:17:34.000000000 +0000 @@ -12,10 +12,10 @@ __author__ = "Peter Maxwell and Gavin Huttley" -__copyright__ = "Copyright 2007-2020, The Cogent Project" +__copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Edward Lang"] __license__ = "BSD-3" -__version__ = "2020.12.21a" +__version__ = "2021.10.12a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -84,7 +84,7 @@ i = i + 1 attempt = 0 else: - print("Discarding probably bogus time: %.3f seconds" % end_time) + print(f"Discarding probably bogus time: {end_time:.3f} seconds") attempt = attempt + 1 if attempt == 5: print("Aborting early due to multiple errors") @@ -94,11 +94,11 @@ mean = total_time / len(times) sd = standard_dev(times, mean) print("") - print("Fastest time : %.3f" % times[0]) - print("Slowest time : %.3f" % times[len(times) - 1]) - print("Mean : %.3f" % mean) - print("Standard dev : %.3f" % sd) - print("Total time : %.3f" % total_time) + print(f"Fastest time : {times[0]:.3f}") + print(f"Slowest time : {times[len(times) - 1]:.3f}") + print(f"Mean : {mean:.3f}") + print(f"Standard dev : {sd:.3f}") + print(f"Total time : {total_time:.3f}") print("") @@ -110,7 +110,7 @@ corrected_times.append(times[i]) corrected_total = corrected_total + times[i] else: - print("Discarding value '%.3f'" % times[i]) + print(f"Discarding value '{times[i]:.3f}'") if len(times) != len(corrected_times): corrected_mean = corrected_total / len(corrected_times) @@ -118,10 +118,10 @@ print("") print("CORRECTED RESULTS") - print("Fastest time : %.3f" % corrected_times[0]) - print("Slowest time : %.3f" % corrected_times[len(corrected_times) - 1]) - print("Mean : %.3f" % corrected_mean) - print("Standard dev : %.3f" % corrected_sd) + print(f"Fastest time : {corrected_times[0]:.3f}") + print(f"Slowest time : {corrected_times[len(corrected_times) - 1]:.3f}") + print(f"Mean : {corrected_mean:.3f}") + print(f"Standard dev : {corrected_sd:.3f}") if __name__ == "__main__": diff -Nru python-cogent-2020.12.21a+dfsg/tox.ini python-cogent-2021.10.12a1+dfsg/tox.ini --- python-cogent-2020.12.21a+dfsg/tox.ini 2020-12-20 23:35:03.000000000 +0000 +++ python-cogent-2021.10.12a1+dfsg/tox.ini 2021-10-12 00:17:34.000000000 +0000 @@ -1,9 +1,13 @@ [tox] -envlist = py36, py37, py37mpi, py38 +envlist = py37, py38, py39, py37mpi, py38mpi, py39mpi +isolated_build = True [testenv] passenv = * -deps = numba>0.48.0 +changedir = tests +deps = py{37,38}: numba>0.48.0 + py39: numba>0.52 + chardet numpy tinydb tqdm @@ -11,35 +15,44 @@ pytest scitrack pandas - py{36,37,38}: pytest-cov - py37mpi: mpi4py + pytest-cov + py{37mpi,38mpi,39mpi}: mpi4py + +[testenv:py39] +basepython = python3.9 +commands = + pytest -x --junitxml=junit-{envname}.xml --cov-report xml --cov=cogent3 ./ --ignore=test_app_mpi.py [testenv:py38] -changedir = tests basepython = python3.8 commands = - pytest --junitxml=junit-{envname}.xml --cov-report xml --cov=cogent3 ./ --ignore=test_mpi.py + pytest -x --junitxml=junit-{envname}.xml --cov-report xml --cov=cogent3 ./ --ignore=test_app_mpi.py [testenv:py37] -changedir = tests basepython = python3.7 commands = - pytest --junitxml=junit-{envname}.xml --cov-report xml --cov=cogent3 ./ --ignore=test_mpi.py + pytest -x --junitxml=junit-{envname}.xml --cov-report xml --cov=cogent3 ./ --ignore=test_app_mpi.py [testenv:py37mpi] -changedir = tests basepython = python3.7 +whitelist_externals = mpiexec commands = - mpiexec -n 1 pytest --junitxml=junit-{envname}.xml --cov-report xml --cov=cogent3 ./ test_mpi.py + mpiexec -n 1 {envpython} -m mpi4py.futures -m pytest -x --junitxml=junit-{envname}.xml --cov-report xml --cov=cogent3 test_app/test_app_mpi.py -[testenv:py36] -changedir = tests -basepython = python3.6 +[testenv:py38mpi] +basepython = python3.8 +whitelist_externals = mpiexec +commands = + mpiexec -n 1 {envpython} -m mpi4py.futures -m pytest -x --junitxml=junit-{envname}.xml --cov-report xml --cov=cogent3 test_app/test_app_mpi.py + +[testenv:py39mpi] +basepython = python3.9 +whitelist_externals = mpiexec commands = - pytest --junitxml=junit-{envname}.xml --cov-report xml --cov=cogent3 ./ --ignore=test_mpi.py + mpiexec -n 1 {envpython} -m mpi4py.futures -m pytest -x --junitxml=junit-{envname}.xml --cov-report xml --cov=cogent3 test_app/test_app_mpi.py [gh-actions] python = - 3.6: py36 3.7: py37 3.8: py38 + 3.9: py39