diff -Nru python-cogent-2020.6.30a0+dfsg/azure-pipelines.yml python-cogent-2020.12.21a+dfsg/azure-pipelines.yml --- python-cogent-2020.6.30a0+dfsg/azure-pipelines.yml 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/azure-pipelines.yml 1970-01-01 00:00:00.000000000 +0000 @@ -1,51 +0,0 @@ -trigger: - - master - -variables: - CIBW_BUILD: cp36-* cp37-* cp38-* - CIBW_BEFORE_BUILD: python -m pip install --upgrade pip - -jobs: -- job: Linux_Build - pool: {vmImage: 'ubuntu-latest'} - steps: - - {task: UsePythonVersion@0, inputs: {versionSpec: '3.6'}} - - {task: UsePythonVersion@0, inputs: {versionSpec: '3.7'}} - - {task: UsePythonVersion@0, inputs: {versionSpec: '3.8'}} - - bash: | - python -m pip install --upgrade pip - python -m pip install cibuildwheel numpy - cibuildwheel --output-dir wheelhouse . - python setup.py sdist - cp dist/*.gz wheelhouse/. - - task: PublishBuildArtifacts@1 - inputs: {pathtoPublish: 'wheelhouse'} - -- job: MacOS_Build - pool: {vmImage: 'macos-latest'} - steps: - - {task: UsePythonVersion@0, inputs: {versionSpec: '3.6'}} - - {task: UsePythonVersion@0, inputs: {versionSpec: '3.7'}} - - {task: UsePythonVersion@0, inputs: {versionSpec: '3.8'}} - - bash: | - python -m pip install --upgrade pip - python -m pip install cibuildwheel numpy - cibuildwheel --output-dir wheelhouse . - - task: PublishBuildArtifacts@1 - inputs: {pathtoPublish: 'wheelhouse'} - -- job: Windows_Build - pool: {vmImage: 'windows-latest'} - steps: - - {task: UsePythonVersion@0, inputs: {versionSpec: '3.6', architecture: x86}} - - {task: UsePythonVersion@0, inputs: {versionSpec: '3.6', architecture: x64}} - - {task: UsePythonVersion@0, inputs: {versionSpec: '3.7', architecture: x86}} - - {task: UsePythonVersion@0, inputs: {versionSpec: '3.7', architecture: x64}} - - {task: UsePythonVersion@0, inputs: {versionSpec: '3.8', architecture: x86}} - - {task: UsePythonVersion@0, inputs: {versionSpec: '3.8', architecture: x64}} - - bash: | - python -m pip install --upgrade pip - python -m pip install cibuildwheel numpy - cibuildwheel --output-dir wheelhouse . - - task: PublishBuildArtifacts@1 - inputs: {pathtoPublish: 'wheelhouse'} diff -Nru python-cogent-2020.6.30a0+dfsg/debian/changelog python-cogent-2020.12.21a+dfsg/debian/changelog --- python-cogent-2020.6.30a0+dfsg/debian/changelog 2020-11-30 09:39:30.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/debian/changelog 2021-02-09 13:42:13.000000000 +0000 @@ -1,3 +1,42 @@ +python-cogent (2020.12.21a+dfsg-4) unstable; urgency=high + + * Team upload. + * debian/rules: Don't build docs package on armel/mipsel + Closes: #950474 + * Skip tests on s390x, mips64el, mipsel, and armel + + -- Michael R. Crusoe Tue, 09 Feb 2021 14:42:13 +0100 + +python-cogent (2020.12.21a+dfsg-3) unstable; urgency=high + + * Team upload. + * debian/rules: skip tests that fail only on buildds + + -- Michael R. Crusoe Mon, 08 Feb 2021 11:14:34 +0100 + +python-cogent (2020.12.21a+dfsg-2) unstable; urgency=high + + * Team upload. + * debian/rules set LC_ALL=C.UTF-8 to try to fix buildd FTBFS + + -- Michael R. Crusoe Mon, 08 Feb 2021 10:18:22 +0100 + +python-cogent (2020.12.21a+dfsg-1) unstable; urgency=medium + + * Team upload. + + [ Andreas Tille ] + * New upstream version + * Do not call specific Python3 version in autopkgtest + * Do not disable Python3.9 + + [ Michael R. Crusoe ] + * debian/control: build without python3-jupyter-sphinx + * build-dep on python3-pytest to fix the tests + * Add patch to give Python 3.9 compatibility + + -- Michael R. Crusoe Sun, 07 Feb 2021 21:40:21 +0100 + python-cogent (2020.6.30a0+dfsg-1) unstable; urgency=medium * Team upload. diff -Nru python-cogent-2020.6.30a0+dfsg/debian/control python-cogent-2020.12.21a+dfsg/debian/control --- python-cogent-2020.6.30a0+dfsg/debian/control 2020-11-30 09:39:30.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/debian/control 2021-02-07 21:13:43.000000000 +0000 @@ -21,8 +21,8 @@ python3-nbsphinx, python3-sphinx-bootstrap-theme, python3-sphinx-gallery, - python3-jupyter-sphinx, python3-sphinxcontrib.bibtex, + python3-pytest , cython3, pandoc Standards-Version: 4.5.1 diff -Nru python-cogent-2020.6.30a0+dfsg/debian/patches/py39_union_dict python-cogent-2020.12.21a+dfsg/debian/patches/py39_union_dict --- python-cogent-2020.6.30a0+dfsg/debian/patches/py39_union_dict 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/debian/patches/py39_union_dict 2021-02-07 21:25:02.000000000 +0000 @@ -0,0 +1,29 @@ +From: Michael R. Crusoe +Subject: Add Python 3.9 compatibility +Forwarded: https://github.com/cogent3/cogent3/pull/850 +--- python-cogent.orig/src/cogent3/util/union_dict.py ++++ python-cogent/src/cogent3/util/union_dict.py +@@ -2,7 +2,7 @@ + """UnionDict extension of dict. + """ + from cogent3.util.misc import extend_docstring_from +- ++import sys + + __author__ = "Thomas La" + __copyright__ = "Copyright 2007-2020, The Cogent Project" +@@ -64,6 +64,14 @@ + self.union(other) + return self + ++ if sys.version_info[:2] == (3, 9): ++ def __ror__(self, other): ++ other.union(self) ++ return other ++ ++ def __ior__(self, other): ++ return self.__or__(other) ++ + def union(self, other): + """returns the union of self with other + diff -Nru python-cogent-2020.6.30a0+dfsg/debian/patches/series python-cogent-2020.12.21a+dfsg/debian/patches/series --- python-cogent-2020.6.30a0+dfsg/debian/patches/series 2020-11-30 09:39:30.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/debian/patches/series 2021-02-07 16:23:21.000000000 +0000 @@ -1,2 +1,3 @@ sphinx.patch fix_interpreter.patch +py39_union_dict diff -Nru python-cogent-2020.6.30a0+dfsg/debian/patches/sphinx.patch python-cogent-2020.12.21a+dfsg/debian/patches/sphinx.patch --- python-cogent-2020.6.30a0+dfsg/debian/patches/sphinx.patch 2020-11-30 09:39:30.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/debian/patches/sphinx.patch 2021-02-07 21:13:43.000000000 +0000 @@ -3,11 +3,14 @@ Description: Work around failure in sphinx doc processing Forwarded: not-needed ---- a/doc/conf.py -+++ b/doc/conf.py -@@ -20,16 +20,17 @@ +--- python-cogent.orig/doc/conf.py ++++ python-cogent/doc/conf.py +@@ -19,18 +19,19 @@ + numpydoc_class_members_toctree = False + extensions = [ - "jupyter_sphinx", +- "jupyter_sphinx", ++# "jupyter_sphinx", "nbsphinx", - "numpydoc", +# "numpydoc", diff -Nru python-cogent-2020.6.30a0+dfsg/debian/rules python-cogent-2020.12.21a+dfsg/debian/rules --- python-cogent-2020.6.30a0+dfsg/debian/rules 2020-11-30 09:39:30.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/debian/rules 2021-02-09 13:08:00.000000000 +0000 @@ -3,14 +3,16 @@ # Uncomment this to turn on verbose mode. #export DH_VERBOSE=1 - +export LC_ALL=C.UTF-8 export DEB_BUILD_MAINT_OPTIONS = hardening=+bindnow export PYBUILD_BEFORE_TEST=cp -r {dir}/tests/data {build_dir} - +export PYBUILD_TEST_PYTEST=1 +export PYBUILD_TEST_ARGS=-k 'not test_pickleable_member_roundtrip and not test_pickleable_roundtrip' export PYBUILD_NAME=cogent3 - -export PYBUILD_DISABLE=3.9 +ifneq (,$(filter $(DEB_HOST_ARCH), armel mipsel s390x mips64el)) +export PYBUILD_DISABLE=test +endif include /usr/share/dpkg/default.mk @@ -28,6 +30,7 @@ override_dh_auto_build: dh_auto_build +ifeq (,$(filter $(DEB_HOST_ARCH), armel mipsel)) set -x; cd doc; PYTHONPATH=$(CURDIR) http_proxy='127.0.0.1:9' $(MAKE) html # remove superfluous files and dirs find doc/_build -name "*.doctree*" -delete @@ -35,6 +38,7 @@ rm -f doc/_build/html/_static/jquery.js rm -f doc/_build/html/_static/underscore.js rm -rf doc/_build/html/_images/math/ +endif override_dh_shlibdeps: dh_shlibdeps diff -Nru python-cogent-2020.6.30a0+dfsg/debian/tests/run-unit-test python-cogent-2020.12.21a+dfsg/debian/tests/run-unit-test --- python-cogent-2020.6.30a0+dfsg/debian/tests/run-unit-test 2020-11-30 09:39:30.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/debian/tests/run-unit-test 2021-02-07 15:17:23.000000000 +0000 @@ -19,4 +19,4 @@ done cd tests -NUMBA_CACHE_DIR="${AUTOPKGTEST_TMP}" python3.8 -m pytest +NUMBA_CACHE_DIR="${AUTOPKGTEST_TMP}" python3 -m pytest diff -Nru python-cogent-2020.6.30a0+dfsg/doc/api/alphabet/alphabet.rst python-cogent-2020.12.21a+dfsg/doc/api/alphabet/alphabet.rst --- python-cogent-2020.6.30a0+dfsg/doc/api/alphabet/alphabet.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/api/alphabet/alphabet.rst 2020-12-20 23:35:03.000000000 +0000 @@ -0,0 +1,10 @@ +:mod:`alphabet` +=============== + +.. currentmodule:: cogent3.core.alphabet + +.. autosummary:: + :toctree: classes + :template: class.rst + + Alphabet diff -Nru python-cogent-2020.6.30a0+dfsg/doc/api/alphabet/classes/cogent3.core.alphabet.Alphabet.rst python-cogent-2020.12.21a+dfsg/doc/api/alphabet/classes/cogent3.core.alphabet.Alphabet.rst --- python-cogent-2020.6.30a0+dfsg/doc/api/alphabet/classes/cogent3.core.alphabet.Alphabet.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/api/alphabet/classes/cogent3.core.alphabet.Alphabet.rst 2020-12-20 23:35:03.000000000 +0000 @@ -0,0 +1,9 @@ +Alphabet +======== + +.. currentmodule:: cogent3.core.alphabet + +.. autoclass:: Alphabet + :members: + :undoc-members: + :inherited-members: \ No newline at end of file diff -Nru python-cogent-2020.6.30a0+dfsg/doc/api/genetic_code/classes/cogent3.core.genetic_code.GeneticCode.rst python-cogent-2020.12.21a+dfsg/doc/api/genetic_code/classes/cogent3.core.genetic_code.GeneticCode.rst --- python-cogent-2020.6.30a0+dfsg/doc/api/genetic_code/classes/cogent3.core.genetic_code.GeneticCode.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/api/genetic_code/classes/cogent3.core.genetic_code.GeneticCode.rst 2020-12-20 23:35:03.000000000 +0000 @@ -0,0 +1,9 @@ +GeneticCode +=========== + +.. currentmodule:: cogent3.core.genetic_code + +.. autoclass:: GeneticCode + :members: + :undoc-members: + :inherited-members: \ No newline at end of file diff -Nru python-cogent-2020.6.30a0+dfsg/doc/api/genetic_code/genetic_code.rst python-cogent-2020.12.21a+dfsg/doc/api/genetic_code/genetic_code.rst --- python-cogent-2020.6.30a0+dfsg/doc/api/genetic_code/genetic_code.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/api/genetic_code/genetic_code.rst 2020-12-20 23:35:03.000000000 +0000 @@ -0,0 +1,10 @@ +:mod:`genetic_code` +=================== + +.. currentmodule:: cogent3.core.genetic_code + +.. autosummary:: + :toctree: classes + :template: class.rst + + GeneticCode diff -Nru python-cogent-2020.6.30a0+dfsg/doc/api/index.rst python-cogent-2020.12.21a+dfsg/doc/api/index.rst --- python-cogent-2020.6.30a0+dfsg/doc/api/index.rst 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/api/index.rst 2020-12-20 23:35:03.000000000 +0000 @@ -2,14 +2,88 @@ API ### -******************************** -Objects for the major data types -******************************** +********************* +Utility Functions For +********************* + +Loading data from file +====================== + +These are all top level imports. For example, + +.. code-block:: python + + from cogent3 import load_unaligned_seqs + +.. toctree:: + :maxdepth: 1 + + __init__/cogent3.__init__.load_unaligned_seqs + __init__/cogent3.__init__.load_aligned_seqs + __init__/cogent3.__init__.load_table + __init__/cogent3.__init__.load_tree + +Making cogent3 types from standard Python types +=============================================== + +These are all top level imports. For example, + +.. code-block:: python + + from cogent3 import make_unaligned_seqs + +.. toctree:: + :maxdepth: 1 + + __init__/cogent3.__init__.make_aligned_seqs + __init__/cogent3.__init__.make_unaligned_seqs + __init__/cogent3.__init__.make_seq + __init__/cogent3.__init__.make_table + __init__/cogent3.__init__.make_tree + +Getting commonly used cogent3 types +=================================== + +These are all top level imports. For example, + +.. code-block:: python + + from cogent3 import get_code + +.. toctree:: + :maxdepth: 1 + + __init__/cogent3.__init__.get_code + __init__/cogent3.__init__.get_moltype + +Displaying cogent3 builtins +=========================== + +These are all top level imports. For example, + +.. code-block:: python + + from cogent3 import get_code + +.. toctree:: + :maxdepth: 1 + + __init__/cogent3.__init__.available_codes + __init__/cogent3.__init__.available_moltypes + __init__/cogent3.__init__.available_models + __init__/cogent3.__init__.available_apps + +**************************** +The Major cogent3 Data Types +**************************** .. toctree:: :maxdepth: 1 alignment/alignment sequence/sequence + genetic_code/genetic_code + moltype/moltype + alphabet/alphabet table/table tree/tree diff -Nru python-cogent-2020.6.30a0+dfsg/doc/api/__init__/cogent3.__init__.available_apps.rst python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.available_apps.rst --- python-cogent-2020.6.30a0+dfsg/doc/api/__init__/cogent3.__init__.available_apps.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.available_apps.rst 2020-12-20 23:35:03.000000000 +0000 @@ -0,0 +1,6 @@ +available_apps +============== + +.. currentmodule:: cogent3.__init__ + +.. autofunction:: available_apps diff -Nru python-cogent-2020.6.30a0+dfsg/doc/api/__init__/cogent3.__init__.available_codes.rst python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.available_codes.rst --- python-cogent-2020.6.30a0+dfsg/doc/api/__init__/cogent3.__init__.available_codes.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.available_codes.rst 2020-12-20 23:35:03.000000000 +0000 @@ -0,0 +1,6 @@ +available_codes +=============== + +.. currentmodule:: cogent3.__init__ + +.. autofunction:: available_codes diff -Nru python-cogent-2020.6.30a0+dfsg/doc/api/__init__/cogent3.__init__.available_models.rst python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.available_models.rst --- python-cogent-2020.6.30a0+dfsg/doc/api/__init__/cogent3.__init__.available_models.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.available_models.rst 2020-12-20 23:35:03.000000000 +0000 @@ -0,0 +1,6 @@ +available_models +================ + +.. currentmodule:: cogent3.__init__ + +.. autofunction:: available_models diff -Nru python-cogent-2020.6.30a0+dfsg/doc/api/__init__/cogent3.__init__.available_moltypes.rst python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.available_moltypes.rst --- python-cogent-2020.6.30a0+dfsg/doc/api/__init__/cogent3.__init__.available_moltypes.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.available_moltypes.rst 2020-12-20 23:35:03.000000000 +0000 @@ -0,0 +1,6 @@ +available_moltypes +================== + +.. currentmodule:: cogent3.__init__ + +.. autofunction:: available_moltypes diff -Nru python-cogent-2020.6.30a0+dfsg/doc/api/__init__/cogent3.__init__.get_code.rst python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.get_code.rst --- python-cogent-2020.6.30a0+dfsg/doc/api/__init__/cogent3.__init__.get_code.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.get_code.rst 2020-12-20 23:35:03.000000000 +0000 @@ -0,0 +1,6 @@ +get_code +======== + +.. currentmodule:: cogent3.__init__ + +.. autofunction:: get_code diff -Nru python-cogent-2020.6.30a0+dfsg/doc/api/__init__/cogent3.__init__.get_moltype.rst python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.get_moltype.rst --- python-cogent-2020.6.30a0+dfsg/doc/api/__init__/cogent3.__init__.get_moltype.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.get_moltype.rst 2020-12-20 23:35:03.000000000 +0000 @@ -0,0 +1,6 @@ +get_moltype +=========== + +.. currentmodule:: cogent3.__init__ + +.. autofunction:: get_moltype diff -Nru python-cogent-2020.6.30a0+dfsg/doc/api/__init__/cogent3.__init__.load_aligned_seqs.rst python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.load_aligned_seqs.rst --- python-cogent-2020.6.30a0+dfsg/doc/api/__init__/cogent3.__init__.load_aligned_seqs.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.load_aligned_seqs.rst 2020-12-20 23:35:03.000000000 +0000 @@ -0,0 +1,6 @@ +load_aligned_seqs +================= + +.. currentmodule:: cogent3.__init__ + +.. autofunction:: load_aligned_seqs diff -Nru python-cogent-2020.6.30a0+dfsg/doc/api/__init__/cogent3.__init__.load_table.rst python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.load_table.rst --- python-cogent-2020.6.30a0+dfsg/doc/api/__init__/cogent3.__init__.load_table.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.load_table.rst 2020-12-20 23:35:03.000000000 +0000 @@ -0,0 +1,6 @@ +load_table +========== + +.. currentmodule:: cogent3.__init__ + +.. autofunction:: load_table \ No newline at end of file diff -Nru python-cogent-2020.6.30a0+dfsg/doc/api/__init__/cogent3.__init__.load_tree.rst python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.load_tree.rst --- python-cogent-2020.6.30a0+dfsg/doc/api/__init__/cogent3.__init__.load_tree.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.load_tree.rst 2020-12-20 23:35:03.000000000 +0000 @@ -0,0 +1,6 @@ +load_tree +========= + +.. currentmodule:: cogent3.__init__ + +.. autofunction:: load_tree \ No newline at end of file diff -Nru python-cogent-2020.6.30a0+dfsg/doc/api/__init__/cogent3.__init__.load_unaligned_seqs.rst python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.load_unaligned_seqs.rst --- python-cogent-2020.6.30a0+dfsg/doc/api/__init__/cogent3.__init__.load_unaligned_seqs.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.load_unaligned_seqs.rst 2020-12-20 23:35:03.000000000 +0000 @@ -0,0 +1,6 @@ +load_unaligned_seqs +=================== + +.. currentmodule:: cogent3.__init__ + +.. autofunction:: load_unaligned_seqs \ No newline at end of file diff -Nru python-cogent-2020.6.30a0+dfsg/doc/api/__init__/cogent3.__init__.make_aligned_seqs.rst python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.make_aligned_seqs.rst --- python-cogent-2020.6.30a0+dfsg/doc/api/__init__/cogent3.__init__.make_aligned_seqs.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.make_aligned_seqs.rst 2020-12-20 23:35:03.000000000 +0000 @@ -0,0 +1,6 @@ +make_aligned_seqs +================= + +.. currentmodule:: cogent3.__init__ + +.. autofunction:: make_aligned_seqs \ No newline at end of file diff -Nru python-cogent-2020.6.30a0+dfsg/doc/api/__init__/cogent3.__init__.make_seq.rst python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.make_seq.rst --- python-cogent-2020.6.30a0+dfsg/doc/api/__init__/cogent3.__init__.make_seq.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.make_seq.rst 2020-12-20 23:35:03.000000000 +0000 @@ -0,0 +1,6 @@ +make_seq +======== + +.. currentmodule:: cogent3.__init__ + +.. autofunction:: make_seq \ No newline at end of file diff -Nru python-cogent-2020.6.30a0+dfsg/doc/api/__init__/cogent3.__init__.make_table.rst python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.make_table.rst --- python-cogent-2020.6.30a0+dfsg/doc/api/__init__/cogent3.__init__.make_table.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.make_table.rst 2020-12-20 23:35:03.000000000 +0000 @@ -0,0 +1,6 @@ +make_table +========== + +.. currentmodule:: cogent3.__init__ + +.. autofunction:: make_table \ No newline at end of file diff -Nru python-cogent-2020.6.30a0+dfsg/doc/api/__init__/cogent3.__init__.make_tree.rst python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.make_tree.rst --- python-cogent-2020.6.30a0+dfsg/doc/api/__init__/cogent3.__init__.make_tree.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.make_tree.rst 2020-12-20 23:35:03.000000000 +0000 @@ -0,0 +1,6 @@ +make_tree +========= + +.. currentmodule:: cogent3.__init__ + +.. autofunction:: make_tree \ No newline at end of file diff -Nru python-cogent-2020.6.30a0+dfsg/doc/api/__init__/cogent3.__init__.make_unaligned_seqs.rst python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.make_unaligned_seqs.rst --- python-cogent-2020.6.30a0+dfsg/doc/api/__init__/cogent3.__init__.make_unaligned_seqs.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/api/__init__/cogent3.__init__.make_unaligned_seqs.rst 2020-12-20 23:35:03.000000000 +0000 @@ -0,0 +1,6 @@ +make_unaligned_seqs +=================== + +.. currentmodule:: cogent3.__init__ + +.. autofunction:: make_unaligned_seqs diff -Nru python-cogent-2020.6.30a0+dfsg/doc/api/moltype/classes/cogent3.core.moltype.MolType.rst python-cogent-2020.12.21a+dfsg/doc/api/moltype/classes/cogent3.core.moltype.MolType.rst --- python-cogent-2020.6.30a0+dfsg/doc/api/moltype/classes/cogent3.core.moltype.MolType.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/api/moltype/classes/cogent3.core.moltype.MolType.rst 2020-12-20 23:35:03.000000000 +0000 @@ -0,0 +1,9 @@ +MolType +======= + +.. currentmodule:: cogent3.core.moltype + +.. autoclass:: MolType + :members: + :undoc-members: + :inherited-members: \ No newline at end of file diff -Nru python-cogent-2020.6.30a0+dfsg/doc/api/moltype/moltype.rst python-cogent-2020.12.21a+dfsg/doc/api/moltype/moltype.rst --- python-cogent-2020.6.30a0+dfsg/doc/api/moltype/moltype.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/api/moltype/moltype.rst 2020-12-20 23:35:03.000000000 +0000 @@ -0,0 +1,10 @@ +:mod:`moltype` +=================== + +.. currentmodule:: cogent3.core.moltype + +.. autosummary:: + :toctree: classes + :template: class.rst + + MolType diff -Nru python-cogent-2020.6.30a0+dfsg/doc/api/table/classes/cogent3.util.table.Columns.rst python-cogent-2020.12.21a+dfsg/doc/api/table/classes/cogent3.util.table.Columns.rst --- python-cogent-2020.6.30a0+dfsg/doc/api/table/classes/cogent3.util.table.Columns.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/api/table/classes/cogent3.util.table.Columns.rst 2020-12-20 23:35:03.000000000 +0000 @@ -0,0 +1,9 @@ +Columns +======= + +.. currentmodule:: cogent3.util.table + +.. autoclass:: Columns + :members: + :undoc-members: + :inherited-members: \ No newline at end of file diff -Nru python-cogent-2020.6.30a0+dfsg/doc/api/table/table.rst python-cogent-2020.12.21a+dfsg/doc/api/table/table.rst --- python-cogent-2020.6.30a0+dfsg/doc/api/table/table.rst 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/api/table/table.rst 2020-12-20 23:35:03.000000000 +0000 @@ -8,3 +8,4 @@ :template: class.rst Table + Columns diff -Nru python-cogent-2020.6.30a0+dfsg/doc/app/align-codon.rst python-cogent-2020.12.21a+dfsg/doc/app/align-codon.rst --- python-cogent-2020.6.30a0+dfsg/doc/app/align-codon.rst 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/app/align-codon.rst 2020-12-20 23:35:03.000000000 +0000 @@ -9,7 +9,6 @@ We load the unaligned sequences we will use in our examples. .. jupyter-execute:: - :linenos: from cogent3.app import io @@ -22,7 +21,6 @@ The default settings will result in estimation of a guide tree (using percent identity between the sequences). The default "codon" model is MG94HKY. .. jupyter-execute:: - :linenos: from cogent3.app.align import progressive_align @@ -33,7 +31,6 @@ The parameters used to construct the alignment, including the guide tree and substitution model, are record in the ``info`` attribute. .. jupyter-execute:: - :linenos: aligned.info @@ -47,7 +44,6 @@ .. note:: An estimated guide tree has its branch lengths scaled so they are consistent with usage in a codon model. .. jupyter-execute:: - :linenos: nt_aligner = progressive_align("codon", distance="paralinear") aligned = nt_aligner(seqs) @@ -59,7 +55,6 @@ .. note:: The guide tree needs to have branch lengths, otherwise a ``ValueError`` is raised. .. jupyter-execute:: - :linenos: tree = "((Chimp:0.001,Human:0.001):0.0076,Macaque:0.01,((Rat:0.01,Mouse:0.01):0.02,Mouse_Lemur:0.02):0.01)" codon_aligner = progressive_align("codon", guide_tree=tree) @@ -70,7 +65,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: codon_aligner = progressive_align( "codon", guide_tree=tree, indel_rate=0.001, indel_length=0.01 @@ -84,7 +78,6 @@ Any codon substitution model can be used. (See ``cogent3.available_models()``.) If you provide parameter values, those must be consistent with the model definition. .. jupyter-execute:: - :linenos: codon_aligner = progressive_align( "CNFHKY", guide_tree=tree, param_vals=dict(omega=0.1, kappa=3) @@ -96,6 +89,5 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: aligned.info \ No newline at end of file diff -Nru python-cogent-2020.6.30a0+dfsg/doc/app/align-nucleotide.rst python-cogent-2020.12.21a+dfsg/doc/app/align-nucleotide.rst --- python-cogent-2020.6.30a0+dfsg/doc/app/align-nucleotide.rst 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/app/align-nucleotide.rst 2020-12-20 23:35:03.000000000 +0000 @@ -9,7 +9,6 @@ We load the unaligned sequences we will use in our examples. .. jupyter-execute:: - :linenos: from cogent3.app import io @@ -22,7 +21,6 @@ The default setting for "nucleotide" is a HKY85 model. .. jupyter-execute:: - :linenos: from cogent3.app.align import progressive_align @@ -36,7 +34,6 @@ For the nucleotide case, you can use TN93 or paralinear. .. jupyter-execute:: - :linenos: nt_aligner = progressive_align("nucleotide", distance="TN93") aligned = nt_aligner(seqs) @@ -46,7 +43,6 @@ ^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: tree = "((Chimp:0.001,Human:0.001):0.0076,Macaque:0.01,((Rat:0.01,Mouse:0.01):0.02,Mouse_Lemur:0.02):0.01)" nt_aligner = progressive_align("nucleotide", guide_tree=tree) @@ -61,7 +57,6 @@ You can use any nucleotide substitution model. For a list of all available, see ``cogent3.available_models()``. .. jupyter-execute:: - :linenos: tree = "((Chimp:0.001,Human:0.001):0.0076,Macaque:0.01,((Rat:0.01,Mouse:0.01):0.02,Mouse_Lemur:0.02):0.01)" nt_aligner = progressive_align("F81", guide_tree=tree) @@ -72,6 +67,5 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: aligned.info \ No newline at end of file diff -Nru python-cogent-2020.6.30a0+dfsg/doc/app/align-protein.rst python-cogent-2020.12.21a+dfsg/doc/app/align-protein.rst --- python-cogent-2020.6.30a0+dfsg/doc/app/align-protein.rst 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/app/align-protein.rst 2020-12-20 23:35:03.000000000 +0000 @@ -9,7 +9,6 @@ We load the unaligned sequences we will use in our examples and translate them. .. jupyter-execute:: - :linenos: from cogent3.app import io, translate @@ -24,7 +23,6 @@ The default setting for "protein" is a WG01 model. .. jupyter-execute:: - :linenos: from cogent3.app.align import progressive_align @@ -40,7 +38,6 @@ .. note:: An estimated guide tree has its branch lengths scaled so they are consistent with usage in a codon model. .. jupyter-execute:: - :linenos: aa_aligner = progressive_align("protein", distance="paralinear") aligned = aa_aligner(seqs) @@ -50,6 +47,5 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: aligned.info \ No newline at end of file diff -Nru python-cogent-2020.6.30a0+dfsg/doc/app/evo-model.rst python-cogent-2020.12.21a+dfsg/doc/app/evo-model.rst --- python-cogent-2020.6.30a0+dfsg/doc/app/evo-model.rst 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/app/evo-model.rst 2020-12-20 23:35:03.000000000 +0000 @@ -10,7 +10,6 @@ ``apps``. .. jupyter-execute:: - :linenos: from cogent3.app import io, sample @@ -25,7 +24,6 @@ tree. It’s not required to specify the tree in this instance. .. jupyter-execute:: - :linenos: from cogent3.app import evo @@ -35,7 +33,6 @@ We apply this to ``aln``. .. jupyter-execute:: - :linenos: fitted = gn(aln) type(fitted) @@ -50,38 +47,32 @@ ``model_result`` has dictionary behaviour, hence the ``key`` column. This will be demonstrated below. .. jupyter-execute:: - :linenos: fitted More detail on the fitted model are available via attributes. For instance, display the maximum likelihood estimates via the likelihood function attribute .. jupyter-execute:: - :linenos: fitted.lf .. jupyter-execute:: - :linenos: fitted.lnL, fitted.nfp .. jupyter-execute:: - :linenos: fitted.source The ``model_result.tree`` attribute is an “annotated tree”. Maximum likelihood estimates from the model have been assigned to the tree. Of particular significance, the “length” attribute corresponds to the expected number of substitutions (or ENS). For a non-stationary model, like GN, this can be different to the conventional length (`Kaehler et al `__). .. jupyter-execute:: - :linenos: fitted.tree, fitted.alignment We can access the sum of all branch lengths. Either as “ENS” or “paralinear” using the ``total_length()`` method. .. jupyter-execute:: - :linenos: fitted.total_length(length_as="paralinear") @@ -91,7 +82,6 @@ Controlled by setting ``split_codons=True``. .. jupyter-execute:: - :linenos: gn = evo.model("GN", split_codons=True) @@ -103,6 +93,5 @@ We get access to the likelihood functions of the individual positions via the indicated dict keys. .. jupyter-execute:: - :linenos: fitted[3] \ No newline at end of file diff -Nru python-cogent-2020.6.30a0+dfsg/doc/app/evo-natsel_zhang.rst python-cogent-2020.12.21a+dfsg/doc/app/evo-natsel_zhang.rst --- python-cogent-2020.6.30a0+dfsg/doc/app/evo-natsel_zhang.rst 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/app/evo-natsel_zhang.rst 2020-12-20 23:35:03.000000000 +0000 @@ -10,14 +10,20 @@ For this model class, there are groups of branches for which all positions are evolving neutrally but some proportion of those neutrally evolving sites change to adaptively evolving on so-called foreground edges. For the current example, we’ll define the Chimpanzee and Human branches as foreground and everything else as background. The following table defines the parameter scopes. -========== ========== ================ ================ -Site Class Proportion Background Edges Foreground Edges -========== ========== ================ ================ -0 p0 0 < omega0 < 1 0 < omega0 < 1 -1 p1 omega1 = 1 omega1 = 1 -2a p2 0 < omega0 < 1 0 < omega2 > 1 -2b p3 omega1 = 1 0 < omega0 < 1 -========== ========== ================ ================ +.. jupyter-execute:: + :hide-code: + + from numpy import array + from cogent3 import make_table + from IPython.core.display import HTML + + header = ['Site Class', 'Proportion', 'Background Edges', 'Foreground Edges'] + data = {'Site Class': array(['0', '1', '2a', '2b'], dtype=' 1', '0 < omega0 < 1'], + dtype=' 1, motif_length=3) @@ -605,7 +565,6 @@ You can use ``take_seqs`` to extract some sequences by sequence identifier from an alignment to a new alignment object: .. jupyter-execute:: - :linenos: from cogent3 import load_aligned_seqs @@ -615,7 +574,6 @@ Alternatively, you can extract only the sequences which are not specified by passing ``negate=True``: .. jupyter-execute:: - :linenos: aln.take_seqs(["Human", "Mouse"], negate=True) @@ -625,7 +583,6 @@ You can use ``take_seqs_if`` to extract sequences into a new alignment object based on whether an arbitrary function applied to the sequence evaluates to True. For example, to extract sequences which don't contain any N bases you could do the following: .. jupyter-execute:: - :linenos: from cogent3 import make_aligned_seqs @@ -646,7 +603,6 @@ You can additionally get the sequences where the provided function evaluates to False: .. jupyter-execute:: - :linenos: aln.take_seqs_if(no_N_chars, negate=True) @@ -659,7 +615,6 @@ We state the motif length we want and whether to allow gap or ambiguous characters. The latter only has meaning for IPUAC character sets (the DNA, RNA or PROTEIN moltypes). We illustrate this for the DNA moltype with motif lengths of 1 and 3. .. jupyter-execute:: - :linenos: from cogent3 import make_aligned_seqs @@ -688,7 +643,6 @@ The method ``get_motif_probs`` of ``Alignment`` objects returns the probabilities for all motifs of a given length. For individual nucleotides: .. jupyter-execute:: - :linenos: from cogent3 import load_aligned_seqs @@ -699,7 +653,6 @@ For dinucleotides or longer, we need to pass in an ``Alphabet`` with the appropriate word length. Here is an example with trinucleotides: .. jupyter-execute:: - :linenos: from cogent3 import load_aligned_seqs, DNA @@ -714,7 +667,6 @@ Some calculations in ``cogent3`` require all non-zero values in the motif probabilities, in which case we use a pseudo-count. We illustrate that here with a simple example where T is missing. Without the pseudo-count, the frequency of T is 0.0, with the pseudo-count defined as 1e-6 then the frequency of T will be slightly less than 1e-6. .. jupyter-execute:: - :linenos: aln = make_aligned_seqs(data=[("a", "AACAAC"), ("b", "AAGAAG")], moltype="dna") motif_probs = aln.get_motif_probs() @@ -727,7 +679,6 @@ We then create a dinucleotide ``Alphabet`` object and use this to get dinucleotide probabilities. These frequencies are determined by breaking each aligned sequence up into non-overlapping dinucleotides and then doing a count. The expected value for the 'AA' dinucleotide in this case will be 2/8 = 0.25. .. jupyter-execute:: - :linenos: seqs = [("a", "AACGTAAG"), ("b", "AACGTAAG")] aln = make_aligned_seqs(data=seqs, moltype="dna") @@ -738,7 +689,6 @@ What about counting the total incidence of dinucleotides including those not in-frame? A naive application of the Python string object's count method will not work as desired either because it "returns the number of non-overlapping occurrences". .. jupyter-execute:: - :linenos: seqs = [("my_seq", "AAAGTAAG")] aln = make_aligned_seqs(data=seqs, moltype="dna") @@ -750,7 +700,6 @@ To count all occurrences of a given dinucleotide in a DNA sequence, one could use a standard Python approach such as list comprehension: .. jupyter-execute:: - :linenos: from cogent3 import make_seq @@ -766,7 +715,6 @@ +++++++++++++++++++++++++++++++++++++++++++++++++ .. jupyter-execute:: - :linenos: from cogent3 import load_aligned_seqs @@ -781,7 +729,6 @@ ++++++++++++++++++++++++++++ .. jupyter-execute:: - :linenos: from cogent3 import load_aligned_seqs @@ -797,7 +744,6 @@ It's often important to know how an alignment position relates to a position in one or more of the sequences in the alignment. The ``gap_maps`` method of the individual sequences is useful for this. To get a map of sequence to alignment positions for a specific sequence in your alignment, do the following: .. jupyter-execute:: - :linenos: from cogent3 import make_aligned_seqs @@ -814,7 +760,6 @@ It's now possible to look up positions in the ``seq1``, and find out what they map to in the alignment: .. jupyter-execute:: - :linenos: seq_to_aln_map[3] seq_to_aln_map[8] @@ -824,7 +769,6 @@ Notice that we grabbed the first result from the call to ``gap_maps``. This is the sequence position to alignment position map. The second value returned is the alignment position to sequence position map, so if you want to find out what sequence positions the alignment positions correspond to (opposed to what alignment positions the sequence positions correspond to) for a given sequence, you would take the following steps: .. jupyter-execute:: - :linenos: aln_to_seq_map = aln.get_gapped_seq("seq1").gap_maps()[1] aln_to_seq_map[3] @@ -833,7 +777,6 @@ If an alignment position is a gap, and therefore has no corresponding sequence position, you'll get a ``KeyError``. .. jupyter-execute:: - :linenos: :raises: KeyError seq_pos = aln_to_seq_map[7] @@ -848,7 +791,6 @@ The ``omit_gap_runs`` method can be applied to remove long stretches of gaps in an alignment. In the following example, we remove sequences that have more than two adjacent gaps anywhere in the aligned sequence. .. jupyter-execute:: - :linenos: aln = make_aligned_seqs( data=[ @@ -863,7 +805,6 @@ If instead, we just wanted to remove positions from the alignment which are gaps in more than a certain percentage of the sequences, we could use the ``omit_gap_pos`` function. For example: .. jupyter-execute:: - :linenos: aln = make_aligned_seqs( data=[ @@ -878,7 +819,6 @@ If you wanted to remove sequences which contain more than a certain percent gap characters, you could use the ``omit_gap_seqs`` method. This is commonly applied to filter partial sequences from an alignment. .. jupyter-execute:: - :linenos: aln = make_aligned_seqs( data=[ @@ -894,6 +834,5 @@ Note that following this call to ``omit_gap_seqs``, the 4th column of ``filtered_aln`` is 100% gaps. This is generally not desirable, so a call to ``omit_gap_seqs`` is frequently followed with a call to ``omit_gap_pos`` with no parameters -- this defaults to removing positions which are all gaps: .. jupyter-execute:: - :linenos: print(filtered_aln.omit_gap_pos()) \ No newline at end of file diff -Nru python-cogent-2020.6.30a0+dfsg/doc/cookbook/alphabet.rst python-cogent-2020.12.21a+dfsg/doc/cookbook/alphabet.rst --- python-cogent-2020.6.30a0+dfsg/doc/cookbook/alphabet.rst 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/cookbook/alphabet.rst 2020-12-20 23:35:03.000000000 +0000 @@ -9,7 +9,6 @@ ``MolType`` instances have an ``Alphabet``. .. jupyter-execute:: - :linenos: from cogent3 import DNA, PROTEIN @@ -19,7 +18,6 @@ ``Alphabet`` instances have a ``MolType``. .. jupyter-execute:: - :linenos: PROTEIN.alphabet.moltype == PROTEIN @@ -29,7 +27,6 @@ You can create a tuple alphabet of, for example, dinucleotides or trinucleotides. .. jupyter-execute:: - :linenos: dinuc_alphabet = DNA.alphabet.get_word_alphabet(2) print(dinuc_alphabet) @@ -40,7 +37,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: seq = "TAGT" indices = DNA.alphabet.to_indices(seq) @@ -50,7 +46,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: seq = DNA.alphabet.from_indices([0, 2, 3, 0]) seq @@ -58,7 +53,6 @@ or .. jupyter-execute:: - :linenos: seq = DNA.alphabet.from_ordinals_to_seq([0, 2, 3, 0]) - seq + seq \ No newline at end of file diff -Nru python-cogent-2020.6.30a0+dfsg/doc/cookbook/annotations.rst python-cogent-2020.12.21a+dfsg/doc/cookbook/annotations.rst --- python-cogent-2020.6.30a0+dfsg/doc/cookbook/annotations.rst 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/cookbook/annotations.rst 2020-12-20 23:35:03.000000000 +0000 @@ -19,7 +19,6 @@ We load a sample genbank file with plenty of features and grab the CDS features. .. jupyter-execute:: - :linenos: from cogent3.parse.genbank import RichGenbankParser @@ -28,7 +27,6 @@ print(accession) .. jupyter-execute:: - :linenos: cds = seq.get_annotations_matching("CDS") print(cds) @@ -39,7 +37,6 @@ You can write your own code to construct annotation objects. One reason you might do this is some genbank files do not have a ``/gene`` tag on gene related features, instead only possessing a ``/locus_tag``. For illustrating the approach we only create annotations for ``CDS`` features. We write a custom callback function that uses the ``locus_tag`` as the ``Feature`` name. .. jupyter-execute:: - :linenos: from cogent3.core.annotation import Feature @@ -64,7 +61,6 @@ """"""""""""""""""""""""""""""" .. jupyter-execute:: - :linenos: from cogent3 import DNA from cogent3.core.annotation import Feature @@ -85,7 +81,6 @@ ++++++++++++++++++ .. jupyter-execute:: - :linenos: from cogent3 import DNA from cogent3.core.annotation import Feature @@ -100,7 +95,6 @@ +++++++++++++++ .. jupyter-execute:: - :linenos: from cogent3 import DNA @@ -115,7 +109,6 @@ """"""""""""""""""""""""""""""" .. jupyter-execute:: - :linenos: from cogent3 import DNA @@ -131,7 +124,6 @@ Construct a pseudo-feature (``cds``) that's a union of other features (``exon1``, ``exon2``, ``exon3``). .. jupyter-execute:: - :linenos: from cogent3 import DNA @@ -149,7 +141,6 @@ These are useful for doing custom things, e.g. you could construct intron features using the below. .. jupyter-execute:: - :linenos: cds.get_coordinates() @@ -159,7 +150,6 @@ A shadow is a span representing everything but the annotation. .. jupyter-execute:: - :linenos: not_cds = cds.get_shadow() not_cds @@ -167,7 +157,6 @@ Compare to the coordinates of the original. .. jupyter-execute:: - :linenos: cds @@ -177,7 +166,6 @@ The following annotation is directly applied onto the sequence and so is in ungapped sequence coordinates. .. jupyter-execute:: - :linenos: from cogent3 import make_aligned_seqs @@ -192,7 +180,6 @@ We add an annotation directly onto an alignment. In this example we add a ``Variable`` that can be displayed as a red line on a drawing. The resulting annotation (``red_data`` here) is in **alignment coordinates**! .. jupyter-execute:: - :linenos: from cogent3.core.annotation import Variable @@ -206,7 +193,6 @@ By a feature or coordinates returns same sequence span .. jupyter-execute:: - :linenos: from cogent3 import DNA @@ -221,7 +207,6 @@ Using the annotation object ``get_slice`` method returns the same thing. .. jupyter-execute:: - :linenos: s1[exon2] exon2.get_slice() @@ -230,7 +215,6 @@ """"""""""""""""""""""""""""""""""""""""""" .. jupyter-execute:: - :linenos: from cogent3 import DNA @@ -247,7 +231,6 @@ .. warning:: Slices are applied in order! .. jupyter-execute:: - :linenos: print(s1) print(s1[exon1, exon2, exon3]) @@ -259,7 +242,6 @@ """""""""""""""""""""""""""""""""""" .. jupyter-execute:: - :linenos: :raises: ValueError s1[1:10, 9:15] @@ -268,7 +250,6 @@ But ``get_region_covering_all`` resolves this, ensuring no overlaps. .. jupyter-execute:: - :linenos: print(s1.get_region_covering_all([exon3, exon3]).get_slice()) @@ -276,7 +257,6 @@ """""""""""""""""""""""""""""""""" .. jupyter-execute:: - :linenos: print(s1[exon2]) ex2_start = exon2[0:3] @@ -290,7 +270,6 @@ You can't slice an alignment using an annotation from a sequence. .. jupyter-execute:: - :linenos: :raises: ValueError aln1[seq_exon] @@ -301,7 +280,6 @@ You can copy annotations onto sequences with the same name, even if the length differs .. jupyter-execute:: - :linenos: aln2 = make_aligned_seqs( data=[["x", "-AAAAAAAAA"], ["y", "TTTT--TTTT"]], array_align=False @@ -315,7 +293,6 @@ but if the feature lies outside the sequence being copied to, you get a lost span .. jupyter-execute:: - :linenos: aln2 = make_aligned_seqs(data=[["x", "-AAAA"], ["y", "TTTTT"]], array_align=False) seq = DNA.make_seq("CCCCCCCCCCCCCCCCCCCC", "x") @@ -328,7 +305,6 @@ You can copy to a sequence with a different name, in a different alignment if the feature lies within the length .. jupyter-execute:: - :linenos: # new test aln2 = make_aligned_seqs( @@ -343,7 +319,6 @@ If the sequence is shorter, again you get a lost span. .. jupyter-execute:: - :linenos: aln2 = make_aligned_seqs( data=[["x", "-AAAAAAAAA"], ["y", "TTTT--TTTT"]], array_align=False @@ -360,7 +335,6 @@ You need to get a corresponding annotation projected into alignment coordinates via a query. .. jupyter-execute:: - :linenos: aln_exon = aln1.get_annotations_from_any_seq("exon") print(aln1[aln_exon]) @@ -369,7 +343,6 @@ """"""""""""""""""""""""""""""""""""""""""""""""""""" .. jupyter-execute:: - :linenos: :raises: ValueError cpgsite2 = s2.get_annotations_matching("cpgsite") @@ -383,7 +356,6 @@ You get back an empty list, and slicing with this returns an empty sequence. .. jupyter-execute:: - :linenos: # this test is new dont_exist = s2.get_annotations_matching("dont_exist") @@ -396,7 +368,6 @@ If you query for a feature from a sequence, it's alignment coordinates may be discontinuous. .. jupyter-execute:: - :linenos: aln3 = make_aligned_seqs( data=[["x", "C-CCCAAAAA"], ["y", "-T----TTTT"]], array_align=False @@ -415,7 +386,6 @@ To get positions spanned by a feature, including gaps, use ``as_one_span``. .. jupyter-execute:: - :linenos: unified = aln_exons[0].as_one_span() print(aln3[unified]) @@ -426,7 +396,6 @@ Reverse complementing a sequence **does not** reverse annotations, that is they retain the reference to the frame for which they were defined. .. jupyter-execute:: - :linenos: plus = DNA.make_seq("CCCCCAAAAAAAAAATTTTTTTTTTAAAGG") plus_rpt = plus.add_feature("blah", "a", [(5, 15), (25, 28)]) @@ -442,7 +411,6 @@ We mask the CDS regions. .. jupyter-execute:: - :linenos: from cogent3.parse.genbank import RichGenbankParser @@ -459,7 +427,6 @@ We mask exon's on an alignment. .. jupyter-execute:: - :linenos: from cogent3 import make_aligned_seqs @@ -474,7 +441,6 @@ These also persist through reverse complement operations. .. jupyter-execute:: - :linenos: rc = aln.rc() print(rc) @@ -484,7 +450,6 @@ """"""""""""""""""""""""""""""" .. jupyter-execute:: - :linenos: from cogent3 import DNA @@ -501,7 +466,6 @@ """""""""""""""""""""""""""""""""""""""""""""" .. jupyter-execute:: - :linenos: from cogent3 import DNA @@ -517,7 +481,6 @@ The annotation methods ``get_region_covering_all`` and ``get_shadow`` can be used to grab all the coding sequences or non-coding sequences in a ``DnaSequence`` object. .. jupyter-execute:: - :linenos: from cogent3.parse.genbank import RichGenbankParser @@ -537,7 +500,6 @@ Sequence features can be accessed via a containing ``Alignment``. .. jupyter-execute:: - :linenos: from cogent3 import make_aligned_seqs @@ -558,7 +520,6 @@ We first make a sequence and add some annotations. .. jupyter-execute:: - :linenos: from cogent3 import DNA diff -Nru python-cogent-2020.6.30a0+dfsg/doc/cookbook/building_alignments.rst python-cogent-2020.12.21a+dfsg/doc/cookbook/building_alignments.rst --- python-cogent-2020.6.30a0+dfsg/doc/cookbook/building_alignments.rst 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/cookbook/building_alignments.rst 2020-12-20 23:35:03.000000000 +0000 @@ -15,7 +15,6 @@ We load a canned nucleotide substitution model and the progressive aligner ``TreeAlign`` function. .. jupyter-execute:: - :linenos: from cogent3 import load_unaligned_seqs, make_tree from cogent3.align.progressive import TreeAlign @@ -23,7 +22,6 @@ We first align without providing a guide tree. The ``TreeAlign`` algorithm builds pairwise alignments and estimates the substitution model parameters and pairwise distances. The distances are used to build a neighbour joining tree and the median value of substitution model parameters are provided to the substitution model for the progressive alignment step. .. jupyter-execute:: - :linenos: seqs = load_unaligned_seqs("data/test2.fasta", moltype="dna") aln, tree = TreeAlign("HKY85", seqs, show_progress=False) @@ -32,7 +30,6 @@ We then align using a guide tree (pre-estimated) and specifying the ratio of transitions to transversions (kappa). .. jupyter-execute:: - :linenos: tree = make_tree( "(((NineBande:0.013,Mouse:0.185):0.023,DogFaced:0.046):0.027,Human:0.034,HowlerMon:0.019)" @@ -49,7 +46,6 @@ We load a canned codon substitution model and use a pre-defined tree and parameter estimates. .. jupyter-execute:: - :linenos: from cogent3 import load_unaligned_seqs, make_tree from cogent3.align.progressive import TreeAlign @@ -70,7 +66,6 @@ We load some unaligned DNA sequences and show their translation. .. jupyter-execute:: - :linenos: from cogent3 import make_unaligned_seqs from cogent3.evolve.models import get_model @@ -91,14 +86,12 @@ print(unaligned_DNA) .. jupyter-execute:: - :linenos: print(unaligned_DNA.get_translation()) We load an alignment of these protein sequences. .. jupyter-execute:: - :linenos: from cogent3 import make_aligned_seqs @@ -112,7 +105,6 @@ We then obtain an alignment of the DNA sequences from the alignment of their translation. .. jupyter-execute:: - :linenos: aligned_DNA = aligned_aa.replace_seqs(unaligned_DNA, aa_to_codon=True) aligned_DNA \ No newline at end of file diff -Nru python-cogent-2020.6.30a0+dfsg/doc/cookbook/building_phylogenies.rst python-cogent-2020.12.21a+dfsg/doc/cookbook/building_phylogenies.rst --- python-cogent-2020.6.30a0+dfsg/doc/cookbook/building_phylogenies.rst 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/cookbook/building_phylogenies.rst 2020-12-20 23:35:03.000000000 +0000 @@ -16,7 +16,6 @@ Both the ``ArrayAlignment`` and ``Alignment`` classes support this. .. jupyter-execute:: - :linenos: from cogent3 import load_aligned_seqs @@ -28,7 +27,6 @@ The ``quick_tree()`` method also supports non-parametric bootstrapping. The number of resampled alignments is specified using the ``bootstrap`` argument. In the following, trees are estimated from 100 resampled alignments and merged into a single consensus topology using a weighted consensus tree algorithm. .. jupyter-execute:: - :linenos: tree = aln.quick_tree(calc="TN93", bootstrap=100, show_progress=False) @@ -36,7 +34,6 @@ ----------------------------------- .. jupyter-execute:: - :linenos: from cogent3 import load_aligned_seqs @@ -50,7 +47,6 @@ ---------------------------------------------------------------- .. jupyter-execute:: - :linenos: from cogent3.phylo import nj from cogent3 import load_aligned_seqs @@ -65,7 +61,6 @@ ------------------------------------------ .. jupyter-execute:: - :linenos: from cogent3.phylo import nj @@ -79,7 +74,6 @@ We illustrate the phylogeny reconstruction by least-squares using the F81 substitution model. We use the advanced-stepwise addition algorithm to search tree space. Here ``a`` is the number of taxa to exhaustively evaluate all possible phylogenies for. Successive taxa are added to the top ``k`` trees (measured by the least-squares metric) and ``k`` trees are kept at each iteration. .. jupyter-execute:: - :linenos: from cogent3.util.deserialise import deserialise_object from cogent3.phylo.least_squares import WLS @@ -96,7 +90,6 @@ We illustrate the phylogeny reconstruction using maximum-likelihood using the F81 substitution model. We use the advanced-stepwise addition algorithm to search tree space. .. jupyter-execute:: - :linenos: from cogent3 import load_aligned_seqs from cogent3.phylo.maximum_likelihood import ML diff -Nru python-cogent-2020.6.30a0+dfsg/doc/cookbook/calc_genetic_distance.rst python-cogent-2020.12.21a+dfsg/doc/cookbook/calc_genetic_distance.rst --- python-cogent-2020.6.30a0+dfsg/doc/cookbook/calc_genetic_distance.rst 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/cookbook/calc_genetic_distance.rst 2020-12-20 23:35:03.000000000 +0000 @@ -14,7 +14,6 @@ available. .. jupyter-execute:: - :linenos: from cogent3 import available_distances @@ -26,7 +25,6 @@ Abbreviations listed from ``available_distances()`` can be used as values for the ``distance_matrix(calc=)``. .. jupyter-execute:: - :linenos: from cogent3 import load_aligned_seqs @@ -38,7 +36,6 @@ ====================================== .. jupyter-execute:: - :linenos: from cogent3 import load_aligned_seqs, get_distance_calculator @@ -47,7 +44,6 @@ dist_calc .. jupyter-execute:: - :linenos: dist_calc.run(show_progress=False) dists = dist_calc.get_pairwise_distances() @@ -56,7 +52,6 @@ The distance calculation object can provide more information. For instance, the standard errors. .. jupyter-execute:: - :linenos: dist_calc.stderr @@ -68,7 +63,6 @@ The following will use the F81 nucleotide substitution model and perform numerical optimisation. .. jupyter-execute:: - :linenos: from cogent3 import load_aligned_seqs, get_model from cogent3.evolve import distance diff -Nru python-cogent-2020.6.30a0+dfsg/doc/cookbook/DNA_and_RNA_sequences.rst python-cogent-2020.12.21a+dfsg/doc/cookbook/DNA_and_RNA_sequences.rst --- python-cogent-2020.6.30a0+dfsg/doc/cookbook/DNA_and_RNA_sequences.rst 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/cookbook/DNA_and_RNA_sequences.rst 2020-12-20 23:35:03.000000000 +0000 @@ -18,7 +18,6 @@ All sequence and alignment objects have a molecular type, or ``MolType`` which provides key properties for validating sequence characters. Here we use the ``DNA`` ``MolType`` to create a DNA sequence. .. jupyter-execute:: - :linenos: from cogent3 import DNA @@ -31,7 +30,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import RNA @@ -41,7 +39,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import DNA @@ -52,7 +49,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import RNA @@ -65,7 +61,6 @@ You can also use a convenience ``make_seq()`` function, providing the moltype as a string. .. jupyter-execute:: - :linenos: from cogent3 import make_seq @@ -77,7 +72,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import make_seq @@ -89,7 +83,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import DNA @@ -100,14 +93,12 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: print(my_seq.rc()) The ``rc`` method name is easier to type .. jupyter-execute:: - :linenos: print(my_seq.rc()) @@ -117,7 +108,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import DNA @@ -130,7 +120,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import DNA @@ -141,7 +130,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import RNA @@ -152,7 +140,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import DNA @@ -164,7 +151,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import DNA @@ -178,7 +164,6 @@ ^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: my_seq[1:6] @@ -190,7 +175,6 @@ We'll do this by specifying the position indices of interest, creating a sequence ``Feature`` and using that to extract the positions. .. jupyter-execute:: - :linenos: from cogent3 import DNA @@ -204,7 +188,6 @@ In this instance we can use the annotatable sequence classes. .. jupyter-execute:: - :linenos: from cogent3 import DNA @@ -226,9 +209,8 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import RNA s = RNA.make_seq("--AUUAUGCUAU-UAu--") - print(s.degap()) + print(s.degap()) \ No newline at end of file diff -Nru python-cogent-2020.6.30a0+dfsg/doc/cookbook/evo_modelling.rst python-cogent-2020.12.21a+dfsg/doc/cookbook/evo_modelling.rst --- python-cogent-2020.6.30a0+dfsg/doc/cookbook/evo_modelling.rst 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/cookbook/evo_modelling.rst 2020-12-20 23:35:03.000000000 +0000 @@ -16,7 +16,6 @@ In cases where code takes a substitution model as an argument, you can use the value under “Abbreviation” as a string. .. jupyter-execute:: - :linenos: from cogent3 import available_models @@ -26,7 +25,6 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. jupyter-execute:: - :linenos: from cogent3.evolve.models import get_model @@ -44,7 +42,6 @@ We specify a general time reversible nucleotide model with gamma distributed rate heterogeneity. .. jupyter-execute:: - :linenos: from cogent3.evolve.models import get_model @@ -57,7 +54,6 @@ We specify a conditional nucleotide frequency codon model with nucleotide general time reversible parameters and a parameter for the ratio of nonsynonymous to synonymous substitutions (omega) with gamma distributed rate heterogeneity. .. jupyter-execute:: - :linenos: from cogent3.evolve.models import get_model @@ -70,7 +66,6 @@ We specify a Jones, Taylor and Thornton 1992 empirical protein substitution model with gamma distributed rate heterogeneity. .. jupyter-execute:: - :linenos: from cogent3.evolve.models import get_model @@ -83,7 +78,6 @@ You start by specifying a substitution model and use that to construct a likelihood function for a specific tree. .. jupyter-execute:: - :linenos: from cogent3 import make_tree from cogent3.evolve.models import get_model @@ -98,7 +92,6 @@ You need to load an alignment and then provide it a likelihood function. I construct very simple trees and alignments for this example. .. jupyter-execute:: - :linenos: from cogent3 import make_tree, make_aligned_seqs from cogent3.evolve.models import get_model @@ -117,7 +110,6 @@ For many evolutionary analyses, it’s desirable to allow different branches on a tree to have different values of a parameter. We show this for a simple codon model case here where we want the great apes (the clade that includes human and orangutan) to have a different value of the ratio of nonsynonymous to synonymous substitutions. This parameter is identified in the precanned ``CNFGTR`` model as ``omega``. .. jupyter-execute:: - :linenos: from cogent3 import load_tree from cogent3.evolve.models import get_model @@ -126,7 +118,6 @@ print(tree.ascii_art()) .. jupyter-execute:: - :linenos: sm = get_model("CNFGTR") lf = sm.make_likelihood_function(tree, digits=2) @@ -141,7 +132,6 @@ We’ve set an *initial* value for this clade so that the edges affected by this rule are evident below. .. jupyter-execute:: - :linenos: lf @@ -153,7 +143,6 @@ This means the parameter will not be modified during likelihood maximisation. We show this here by making the ``omega`` parameter constant at the value 1 – essentially the condition of selective neutrality. .. jupyter-execute:: - :linenos: from cogent3 import load_tree from cogent3.evolve.models import get_model @@ -169,7 +158,6 @@ This can be useful to improve performance, the closer you are to the maximum likelihood estimator the quicker optimisation will be. .. jupyter-execute:: - :linenos: from cogent3 import load_tree from cogent3.evolve.models import get_model @@ -185,7 +173,6 @@ This can be useful for stopping optimisers from getting stuck in a bad part of parameter space. The following is for ``omega`` in a codon model. I’m also providing an initial guess for the parameter (``init=0.1``) as well as a lower bound. An initial guess that is close to the maximum likelihood estimate will speed up optimisation. .. jupyter-execute:: - :linenos: from cogent3 import load_tree from cogent3.evolve.models import get_model @@ -201,7 +188,6 @@ If the branch length estimates seem too large, setting just an upper bound can be sensible. This will apply to all edges on the tree. .. jupyter-execute:: - :linenos: from cogent3 import load_tree from cogent3.evolve.models import get_model @@ -219,7 +205,6 @@ We extend the simple gamma distributed rate heterogeneity case for nucleotides from above to construction of the actual likelihood function. We do this for 4 bins and constraint the bin probabilities to be equal. .. jupyter-execute:: - :linenos: from cogent3 import load_tree from cogent3.evolve.models import get_model @@ -235,7 +220,6 @@ ~~~~~~~~~~~~~~~~~~~~~ .. jupyter-execute:: - :linenos: from cogent3 import load_tree from cogent3.evolve.models import get_model @@ -253,7 +237,6 @@ There are 2 types of optimiser: simulated annealing, a *global* optimiser; and Powell, a *local* optimiser. The simulated annealing method is slow compared to Powell and in general Powell is an adequate choice. I setup a simple nucleotide model to illustrate these. .. jupyter-execute:: - :linenos: from cogent3 import load_tree, load_aligned_seqs from cogent3.evolve.models import get_model @@ -268,21 +251,18 @@ The default is to use Powell. For Powell, it’s recommended to set the ``max_restarts`` argument since this provides a mechanism for Powell to attempt restarting the optimisation from a slightly different spot which can help in overcoming local maxima. .. jupyter-execute:: - :linenos: lf.optimise(local=True, max_restarts=5, show_progress=False) We might want to do crude simulated annealing following by more rigorous Powell. To do this we first need to use the global optimiser, setting ``local=False`` setting a large value for ``global_tolerance``. .. jupyter-execute:: - :linenos: lf.optimise(local=False, global_tolerance=1.0, show_progress=False) Followed by a standard call to ``optimise()``. .. jupyter-execute:: - :linenos: lf.optimise(show_progress=False, max_restarts=5, tolerance=1e-8) @@ -294,7 +274,6 @@ We can monitor this situation using the ``limit_action`` argument to ``optimise``. Providing the value ``raise`` causes an exception to be raised if this condition occurs, as shown below. Providing ``warn`` (default) instead will cause a warning message to be printed to screen but execution will continue. The value ``ignore`` hides any such message. .. jupyter-execute:: - :linenos: from cogent3 import load_tree, load_aligned_seqs from cogent3.evolve.models import get_model @@ -322,7 +301,6 @@ In Jupyter, the likelihood function object presents a representation of the main object features. .. jupyter-execute:: - :linenos: from cogent3 import load_tree, load_aligned_seqs from cogent3.evolve.models import get_model @@ -341,13 +319,11 @@ Reusing the optimised ``lf`` object from above, we can get the log-likelihood and the number of free parameters. .. jupyter-execute:: - :linenos: lnL = lf.lnL lnL .. jupyter-execute:: - :linenos: nfp = lf.nfp nfp @@ -360,14 +336,12 @@ Reusing the optimised ``lf`` object from above. .. jupyter-execute:: - :linenos: lf.get_aic() We can also get the second-order AIC. .. jupyter-execute:: - :linenos: lf.get_aic(second_order=True) @@ -377,7 +351,6 @@ Reusing the optimised ``lf`` object from above. .. jupyter-execute:: - :linenos: lf.get_bic() @@ -392,13 +365,11 @@ We get the statistics out individually. We get the ``length`` for the Human edge and the exchangeability parameter ``A/G``. .. jupyter-execute:: - :linenos: a_g = lf.get_param_value("A/G") a_g .. jupyter-execute:: - :linenos: human = lf.get_param_value("length", "Human") human @@ -407,7 +378,6 @@ '''''''''''''''''''''''''''' .. jupyter-execute:: - :linenos: mprobs = lf.get_motif_probs() mprobs @@ -416,7 +386,6 @@ ''''''''' .. jupyter-execute:: - :linenos: tables = lf.get_statistics(with_motif_probs=True, with_titles=True) tables[0] # just displaying the first @@ -427,7 +396,6 @@ We test the molecular clock hypothesis for human and chimpanzee lineages. The null has these two branches constrained to be equal. .. jupyter-execute:: - :linenos: from cogent3 import load_tree, load_aligned_seqs from cogent3.evolve.models import get_model @@ -453,7 +421,6 @@ The alternate allows the human and chimpanzee branches to differ by just setting all lengths to be independent. .. jupyter-execute:: - :linenos: lf.set_param_rule("length", is_independent=True) lf.set_name("Alt Hypothesis") @@ -465,7 +432,6 @@ We import the function for computing the probability of a chi-square test statistic, compute the likelihood ratio test statistic, degrees of freedom and the corresponding probability. .. jupyter-execute:: - :linenos: from cogent3.maths.stats import chisqprob @@ -482,7 +448,6 @@ In general, however, this capability derives from the ability of any defined ``evolve`` likelihood function to simulate an alignment. This property is provided as ``simulate_alignment`` method on likelihood function objects. .. jupyter-execute:: - :linenos: from cogent3 import load_tree, load_aligned_seqs from cogent3.evolve.models import get_model @@ -511,7 +476,6 @@ The profile method is used to calculate a confidence interval for a named parameter. We show it here for a global substitution model exchangeability parameter (*kappa*, the ratio of transition to transversion rates) and for an edge specific parameter (just the human branch length). .. jupyter-execute:: - :linenos: from cogent3 import load_tree, load_aligned_seqs from cogent3.evolve.models import get_model @@ -535,7 +499,6 @@ To illustrate this, I create a very simple likelihood function. The ``json`` variable below is just a string that can be saved to disk. .. jupyter-execute:: - :linenos: from cogent3 import load_tree, load_aligned_seqs from cogent3.evolve.models import get_model @@ -551,7 +514,6 @@ We deserialise the object from the string. .. jupyter-execute:: - :linenos: from cogent3.util.deserialise import deserialise_object @@ -564,7 +526,6 @@ We first fit a likelihood function. .. jupyter-execute:: - :linenos: from cogent3 import load_tree, load_aligned_seqs from cogent3.evolve.models import get_model @@ -579,7 +540,6 @@ We then get the most likely ancestral sequences. .. jupyter-execute:: - :linenos: ancestors = lf.likely_ancestral_seqs() ancestors[:60] @@ -587,7 +547,6 @@ Or we can get the posterior probabilities (returned as a ``DictArray``) of sequence states at each node. .. jupyter-execute:: - :linenos: ancestral_probs = lf.reconstruct_ancestral_seqs() ancestral_probs["root"][:5] diff -Nru python-cogent-2020.6.30a0+dfsg/doc/cookbook/genetic_code.rst python-cogent-2020.12.21a+dfsg/doc/cookbook/genetic_code.rst --- python-cogent-2020.6.30a0+dfsg/doc/cookbook/genetic_code.rst 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/cookbook/genetic_code.rst 2020-12-20 23:35:03.000000000 +0000 @@ -2,7 +2,6 @@ ----------------------- .. jupyter-execute:: - :linenos: from cogent3 import get_code @@ -15,7 +14,6 @@ ------------------------ .. jupyter-execute:: - :linenos: from cogent3 import make_seq, get_code @@ -28,7 +26,6 @@ ---------------------------------- .. jupyter-execute:: - :linenos: from cogent3 import make_seq, get_code @@ -43,7 +40,6 @@ ----------------- .. jupyter-execute:: - :linenos: from cogent3 import make_seq, get_code @@ -53,7 +49,6 @@ or get the codons for a single amino acid .. jupyter-execute:: - :linenos: standard_code["A"] @@ -61,7 +56,6 @@ ------------------------------------------------------ .. jupyter-execute:: - :linenos: from cogent3 import get_code @@ -72,7 +66,6 @@ ------------------------------------- .. jupyter-execute:: - :linenos: from cogent3 import get_code @@ -83,7 +76,6 @@ --------------------------------------------- .. jupyter-execute:: - :linenos: targets = ["A", "C"] codons = [standard_code[aa] for aa in targets] @@ -95,7 +87,6 @@ ------------------------------------------------ .. jupyter-execute:: - :linenos: from cogent3 import make_seq @@ -109,7 +100,6 @@ Use the method ``get_in_motif_size`` .. jupyter-execute:: - :linenos: from cogent3 import make_seq @@ -123,7 +113,6 @@ You can't translate a sequence that contains a stop codon. .. jupyter-execute:: - :linenos: :raises: AlphabetError pep = my_seq.get_translation() @@ -132,7 +121,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import make_seq @@ -146,10 +134,9 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import make_seq my_seq = make_seq("CAAATGTATTAA", name="my_gene", moltype="dna") pep = my_seq[:-3].get_translation() - print(pep.to_fasta()) + print(pep.to_fasta()) \ No newline at end of file diff -Nru python-cogent-2020.6.30a0+dfsg/doc/cookbook/loading_sequences.rst python-cogent-2020.12.21a+dfsg/doc/cookbook/loading_sequences.rst --- python-cogent-2020.6.30a0+dfsg/doc/cookbook/loading_sequences.rst 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/cookbook/loading_sequences.rst 2020-12-20 23:35:03.000000000 +0000 @@ -14,7 +14,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import load_aligned_seqs @@ -24,7 +23,6 @@ The load functions record the origin of the data in the ``info`` attribute under a `"source"` key. .. jupyter-execute:: - :linenos: aln.info.source @@ -38,7 +36,6 @@ The ``load_unaligned_seqs()`` function returns a sequence collection. .. jupyter-execute:: - :linenos: from cogent3 import load_unaligned_seqs @@ -51,7 +48,6 @@ The loading functions use the filename suffix to infer the file format. This can be overridden using the ``format`` argument. .. jupyter-execute:: - :linenos: from cogent3 import load_aligned_seqs @@ -64,7 +60,6 @@ Simple case of loading a ``list`` of aligned amino acid sequences in FASTA format, with and without ``moltype`` specification. When ``moltype`` is not specified it defaults to ``BYTES`` for the ``ArrayAlignment`` class, ``ASCII`` for the ``Alignment`` class. .. jupyter-execute:: - :linenos: from cogent3 import make_aligned_seqs @@ -80,28 +75,26 @@ Making an alignment from standard python objects ------------------------------------------------ -From a series of strings -^^^^^^^^^^^^^^^^^^^^^^^^ +From a dict of strings +^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import make_aligned_seqs - seqs = [">seq1", "AATCG-A", ">seq2", "AATCGGA"] + seqs = {"seq1": "AATCG-A", "seq2": "AATCGGA"} seqs_loaded = make_aligned_seqs(seqs) - print(seqs_loaded) -From a dict of strings -^^^^^^^^^^^^^^^^^^^^^^ +From a series of strings +^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import make_aligned_seqs - seqs = {"seq1": "AATCG-A", "seq2": "AATCGGA"} + seqs = [">seq1", "AATCG-A", ">seq2", "AATCGGA"] seqs_loaded = make_aligned_seqs(seqs) + print(seqs_loaded) Stripping label characters on loading ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -109,7 +102,6 @@ Load a list of aligned nucleotide sequences, while specifying the DNA molecule type and stripping the comments from the label. In this example, stripping is accomplished by passing a function that removes everything after the first whitespace to the ``label_to_name`` parameter. .. jupyter-execute:: - :linenos: from cogent3 import make_aligned_seqs @@ -122,7 +114,20 @@ loaded_seqs = make_aligned_seqs( DNA_seqs, moltype="dna", label_to_name=lambda x: x.split()[0] ) - print(loaded_seqs) + loaded_seqs + +Making a sequence collection from standard python objects +--------------------------------------------------------- + +This is done using ``make_unaligned_seqs()``, which returns a ``SequenceCollection`` instance. The function arguments match those of ``make_aligned_seqs()``. We demonstrate only for the case where the input data is a ``dict``. + +.. jupyter-execute:: + + from cogent3 import make_unaligned_seqs + + seqs = {"seq1": "AATCA", "seq2": "AATCGGA"} + seqs = make_unaligned_seqs(data=seqs, moltype="dna") + seqs Loading sequences using format parsers -------------------------------------- @@ -137,7 +142,6 @@ .. note:: This returns the sequences as strings. .. jupyter-execute:: - :linenos: from cogent3.parse.fasta import MinimalFastaParser @@ -151,7 +155,6 @@ The FASTA label field is frequently overloaded, with different information fields present in the field and separated by some delimiter. This can be flexibly addressed using the ``LabelParser``. By creating a custom label parser, we can decide which part we use as the sequence name. We show how to convert a field into something specific. .. jupyter-execute:: - :linenos: from cogent3.parse.fasta import LabelParser @@ -168,7 +171,6 @@ ``RichLabel`` objects have an ``Info`` object as an attribute, allowing specific reference to all the specified label fields. .. jupyter-execute:: - :linenos: from cogent3.parse.fasta import MinimalFastaParser, LabelParser diff -Nru python-cogent-2020.6.30a0+dfsg/doc/cookbook/loading_tabular.rst python-cogent-2020.12.21a+dfsg/doc/cookbook/loading_tabular.rst --- python-cogent-2020.6.30a0+dfsg/doc/cookbook/loading_tabular.rst 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/cookbook/loading_tabular.rst 2020-12-20 23:35:03.000000000 +0000 @@ -9,7 +9,6 @@ We load a tab separated data file using the ``load_table()`` function. The format is inferred from the filename suffix and you will note, in this case, it's not actually a `csv` file. .. jupyter-execute:: - :linenos: from cogent3 import load_table @@ -26,33 +25,33 @@ Although unnecessary in this case, it's possible to override the suffix by specifying the delimiter using the ``sep`` argument. .. jupyter-execute:: - :linenos: from cogent3 import load_table table = load_table("data/stats.tsv", sep="\t") table +Selectively loading parts of a big file +======================================= + Loading a set number of lines from a file -========================================= +----------------------------------------- -If you only want a subset of the contents of a file, use the ``FilteringParser``. This allows skipping certain lines by using a callback function. We illustrate this with ``stats.tsv``, skipping any rows with ``"Ratio"`` > 10. +The ``limit`` argument specifies the number of lines to read. .. jupyter-execute:: - :linenos: from cogent3 import load_table table = load_table("data/stats.tsv", limit=2) table -Selectively loading parts of a big file -======================================= +Loading only some rows +---------------------- If you only want a subset of the contents of a file, use the ``FilteringParser``. This allows skipping certain lines by using a callback function. We illustrate this with ``stats.tsv``, skipping any rows with ``"Ratio"`` > 10. .. jupyter-execute:: - :linenos: from cogent3.parse.table import FilteringParser @@ -62,15 +61,22 @@ table = load_table("data/stats.tsv", reader=reader, digits=1) table -.. note:: You can also ``negate`` a condition, which is useful if the condition is complex. +You can also ``negate`` a condition, which is useful if the condition is complex. In this example, it means keep the rows for which ``Ratio > 10``. + +.. jupyter-execute:: + + reader = FilteringParser( + lambda line: float(line[2]) <= 10, with_header=True, sep="\t", negate=True + ) + table = load_table("data/stats.tsv", reader=reader, digits=1) + table Loading only some columns -========================= +------------------------- Specify the columns by their names. .. jupyter-execute:: - :linenos: from cogent3.parse.table import FilteringParser @@ -81,7 +87,6 @@ Or, by their index. .. jupyter-execute:: - :linenos: from cogent3.parse.table import FilteringParser @@ -92,26 +97,29 @@ .. note:: The ``negate`` argument does not affect the columns evaluated. Load raw data as a list of lists of strings -=========================================== +------------------------------------------- We just use ``FilteringParser``. .. jupyter-execute:: - :linenos: from cogent3.parse.table import FilteringParser reader = FilteringParser(with_header=True, sep="\t") data = list(reader("data/stats.tsv")) - data[:2] # just the first two lines -.. note:: The individual elements are still ``str``. +We just display the first two lines. + +.. jupyter-execute:: + + data[:2] + +.. note:: The individual elements are all ``str``. Make a table from header and rows ================================= .. jupyter-execute:: - :linenos: from cogent3 import make_table @@ -126,7 +134,6 @@ For a ``dict`` with key's as column headers. .. jupyter-execute:: - :linenos: from cogent3 import make_table @@ -138,7 +145,6 @@ ======================================================= .. jupyter-execute:: - :linenos: table = make_table(header=["C", "A", "B"], data=data) table @@ -150,7 +156,7 @@ .. jupyter-execute:: - table = load_table("data/stats.tsv", index="Locus") + table = load_table("data/stats.tsv", index_name="Locus") table["NP_055852"] .. jupyter-execute:: @@ -163,9 +169,9 @@ ========================================== .. jupyter-execute:: - :linenos: from pandas import DataFrame + from cogent3 import make_table data = dict(a=[0, 3], b=["a", "c"]) @@ -177,7 +183,6 @@ =================================== .. jupyter-execute:: - :linenos: from cogent3 import make_table @@ -190,7 +195,6 @@ ``make_table()`` is the utility function for creating ``Table`` objects from standard python objects. .. jupyter-execute:: - :linenos: from cogent3 import make_table @@ -202,7 +206,6 @@ ============================= .. jupyter-execute:: - :linenos: from cogent3 import make_table @@ -213,17 +216,28 @@ "DogFaced": "root", "Human": "edge.0", }, - "x": {"NineBande": 1.0, "edge.1": 1.0, "DogFaced": 1.0, "Human": 1.0,}, - "length": {"NineBande": 4.0, "edge.1": 4.0, "DogFaced": 4.0, "Human": 4.0,}, + "x": { + "NineBande": 1.0, + "edge.1": 1.0, + "DogFaced": 1.0, + "Human": 1.0, + }, + "length": { + "NineBande": 4.0, + "edge.1": 4.0, + "DogFaced": 4.0, + "Human": 4.0, + }, } - table = make_table(data=d2D,) + table = make_table( + data=d2D, + ) table Create a table that has complex python objects as elements ========================================================== .. jupyter-execute:: - :linenos: from cogent3 import make_table @@ -239,9 +253,8 @@ ===================== .. jupyter-execute:: - :linenos: from cogent3 import make_table table = make_table() - table \ No newline at end of file + table diff -Nru python-cogent-2020.6.30a0+dfsg/doc/cookbook/moltypes.rst python-cogent-2020.12.21a+dfsg/doc/cookbook/moltypes.rst --- python-cogent-2020.6.30a0+dfsg/doc/cookbook/moltypes.rst 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/cookbook/moltypes.rst 2020-12-20 23:35:03.000000000 +0000 @@ -15,7 +15,6 @@ ========================= .. jupyter-execute:: - :linenos: from cogent3 import available_moltypes @@ -24,7 +23,6 @@ For statements that have a ``moltype`` argument, use the entry under the "Abbreviation" column. For example: .. jupyter-execute:: - :linenos: from cogent3 import load_aligned_seqs @@ -34,7 +32,6 @@ ===================== .. jupyter-execute:: - :linenos: from cogent3 import get_moltype @@ -47,7 +44,6 @@ Just using ``dna`` from above. .. jupyter-execute:: - :linenos: dna.ambiguities @@ -55,7 +51,6 @@ ========================================== .. jupyter-execute:: - :linenos: dna.degenerates @@ -63,7 +58,6 @@ ========================================== .. jupyter-execute:: - :linenos: dna.complement("AGG") @@ -73,7 +67,6 @@ Use the either the top level ``cogent3.make_seq`` function, or the method on the ``MolType`` instance. .. jupyter-execute:: - :linenos: seq = dna.make_seq("AGGCTT", name="seq1") seq @@ -82,7 +75,6 @@ ================ .. jupyter-execute:: - :linenos: rna = get_moltype("rna") rna.is_valid("ACGUACGUACGUACGU") @@ -93,7 +85,6 @@ We demonstrate this by customising DNA so it allows ``.`` as gaps .. jupyter-execute:: - :linenos: from cogent3.core import moltype as mt @@ -111,7 +102,6 @@ .. warning:: At present, constructing a custom ``MolType`` that overrides a builtin one affects the original (in this instance, the ``DnaSequence`` class). All subsequent calls to the original class in the running process that made the change are affected. The below code is resetting this attribute now to allow the rest of the documentation to be executed. .. jupyter-execute:: - :linenos: from cogent3 import DNA from cogent3.core.sequence import DnaSequence diff -Nru python-cogent-2020.6.30a0+dfsg/doc/cookbook/protein_sequences.rst python-cogent-2020.12.21a+dfsg/doc/cookbook/protein_sequences.rst --- python-cogent-2020.6.30a0+dfsg/doc/cookbook/protein_sequences.rst 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/cookbook/protein_sequences.rst 2020-12-20 23:35:03.000000000 +0000 @@ -12,7 +12,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import PROTEIN @@ -24,7 +23,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3.core.genetic_code import DEFAULT as standard_code @@ -36,7 +34,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import load_aligned_seqs diff -Nru python-cogent-2020.6.30a0+dfsg/doc/cookbook/simple_trees.rst python-cogent-2020.12.21a+dfsg/doc/cookbook/simple_trees.rst --- python-cogent-2020.6.30a0+dfsg/doc/cookbook/simple_trees.rst 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/cookbook/simple_trees.rst 2020-12-20 23:35:03.000000000 +0000 @@ -12,7 +12,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -23,7 +22,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -34,7 +32,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -43,14 +40,12 @@ names[:4] .. jupyter-execute:: - :linenos: names[4:] names_nodes = tr.get_nodes_dict() names_nodes["Human"] .. jupyter-execute:: - :linenos: tr.get_node_matching_name("Mouse") @@ -58,7 +53,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -67,7 +61,6 @@ tr.name .. jupyter-execute:: - :linenos: hu.name @@ -75,7 +68,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -85,7 +77,6 @@ type(hu) .. jupyter-execute:: - :linenos: type(tr) @@ -95,7 +86,6 @@ Get all the nodes, tips and edges .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -107,7 +97,6 @@ only the terminal nodes (tips) .. jupyter-execute:: - :linenos: for n in tr.iter_tips(): print(n) @@ -115,7 +104,6 @@ for internal nodes (edges) we can use Newick format to simplify the output .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -127,7 +115,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -140,7 +127,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -155,7 +141,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -167,7 +152,6 @@ lca .. jupyter-execute:: - :linenos: type(lca) @@ -175,7 +159,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -188,7 +171,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -202,7 +184,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -212,7 +193,6 @@ We also show how to select a subset of distances involving just one species. .. jupyter-execute:: - :linenos: human_dists = [names for names in dists if "Human" in names] for dist in human_dists: @@ -222,7 +202,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -233,7 +212,6 @@ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -250,7 +228,6 @@ """"""""""""""" .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -261,7 +238,6 @@ """"""""""""""" .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -269,7 +245,6 @@ print(tr.root_at_midpoint().ascii_art()) .. jupyter-execute:: - :linenos: print(tr.ascii_art()) @@ -277,7 +252,6 @@ """""""""""""""" .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -285,7 +259,6 @@ print(tr.ascii_art()) .. jupyter-execute:: - :linenos: print(tr.rooted_with_tip("Mouse").ascii_art()) @@ -296,7 +269,6 @@ """"""""""""" .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -304,7 +276,6 @@ tr.get_newick() .. jupyter-execute:: - :linenos: tr.get_newick(with_distances=True) @@ -312,7 +283,6 @@ """""""""" .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -327,7 +297,6 @@ Here is the example tree for reference: .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -338,7 +307,6 @@ """""""" .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -350,7 +318,6 @@ """"""""" .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -365,7 +332,6 @@ """""""""""""""" .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -396,7 +362,6 @@ and branch lengths (if tree is a PhyloNode) to reflect the change. .. jupyter-execute:: - :linenos: from cogent3 import make_tree @@ -405,13 +370,11 @@ print(simple_tree.ascii_art()) .. jupyter-execute:: - :linenos: simple_tree.prune() print(simple_tree.ascii_art()) .. jupyter-execute:: - :linenos: print(simple_tree) @@ -419,7 +382,6 @@ """"""""""""""""""""""""""""""""""""""" .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -427,7 +389,6 @@ print(tr1.get_newick()) .. jupyter-execute:: - :linenos: tr2 = tr1.unrooted_deepcopy() print(tr2.get_newick()) @@ -438,7 +399,6 @@ Add internal nodes so that every node has 2 or fewer children. .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -447,7 +407,6 @@ print(tr.ascii_art()) .. jupyter-execute:: - :linenos: print(tr.bifurcating().ascii_art()) @@ -461,7 +420,6 @@ "outgroup_name" argument. .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -469,7 +427,6 @@ print(tr.ascii_art()) .. jupyter-execute:: - :linenos: print(tr.balanced().ascii_art()) @@ -479,7 +436,6 @@ Branch lengths don't matter. .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -494,7 +450,6 @@ the distance from that node to its most distant tip. .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -502,7 +457,6 @@ print(tr.ascii_art()) .. jupyter-execute:: - :linenos: tr.set_tip_distances() for t in tr.preorder(): @@ -512,7 +466,6 @@ """""""""""""""""""""""""""""""""""""""""""""""""""""""""" .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -520,7 +473,6 @@ print(tr) .. jupyter-execute:: - :linenos: tr.scale_branch_lengths() print(tr) @@ -531,7 +483,6 @@ and a list of the tip nodes. .. jupyter-execute:: - :linenos: from cogent3 import load_tree @@ -552,7 +503,6 @@ Note: automatically strips out the names that don't match. .. jupyter-execute:: - :linenos: from cogent3 import load_tree diff -Nru python-cogent-2020.6.30a0+dfsg/doc/cookbook/tables.rst python-cogent-2020.12.21a+dfsg/doc/cookbook/tables.rst --- python-cogent-2020.6.30a0+dfsg/doc/cookbook/tables.rst 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/cookbook/tables.rst 2020-12-20 23:35:03.000000000 +0000 @@ -19,7 +19,6 @@ =================== .. jupyter-execute:: - :linenos: from cogent3 import make_table @@ -34,7 +33,6 @@ This can be done when you create the table. .. jupyter-execute:: - :linenos: from cogent3 import make_table @@ -45,7 +43,6 @@ It can be done by directly assigning to the corresponding attributes. .. jupyter-execute:: - :linenos: data = dict(a=[0, 3], b=["a", "c"]) table = make_table(data=data) @@ -58,7 +55,6 @@ ``Table`` is a row oriented object. Iterating on the table returns each row as a new ``Table`` instance. .. jupyter-execute:: - :linenos: from cogent3 import load_table @@ -70,7 +66,6 @@ The resulting rows can be indexed using their column names. .. jupyter-execute:: - :linenos: for row in table: print(row["Locus"]) @@ -81,7 +76,6 @@ The ``Table.shape`` attribute is like that of a ``numpy`` ``array``. The first element (``Table.shape[0]``) is the number of rows. .. jupyter-execute:: - :linenos: from cogent3 import make_table @@ -95,7 +89,6 @@ ``Table.shape[1]`` is the number of columns. Using the table from above. .. jupyter-execute:: - :linenos: table.shape[1] == 2 @@ -105,7 +98,6 @@ The ``Table.columns`` attribute is a ``Columns`` instance, an object with ``dict`` attributes. .. jupyter-execute:: - :linenos: from cogent3 import load_table @@ -113,14 +105,12 @@ table.columns .. jupyter-execute:: - :linenos: table.columns["Region"] So iteration is the same as for dicts. .. jupyter-execute:: - :linenos: for name in table.columns: print(name) @@ -129,7 +119,6 @@ ================================ .. jupyter-execute:: - :linenos: table = load_table("data/stats.tsv") table @@ -137,7 +126,6 @@ Slice using the column name. .. jupyter-execute:: - :linenos: table[:2, "Region":] @@ -145,7 +133,6 @@ =========================== .. jupyter-execute:: - :linenos: table = load_table("data/stats.tsv") table[:2, :1] @@ -156,7 +143,6 @@ We change the ``Ratio`` column to using scientific notation. .. jupyter-execute:: - :linenos: from cogent3 import load_table @@ -170,7 +156,6 @@ This can be done on table loading, .. jupyter-execute:: - :linenos: table = load_table("data/stats.tsv", digits=1, space=2) table @@ -178,7 +163,6 @@ or, for spacing at least, by modifying the attributes .. jupyter-execute:: - :linenos: table.space = " " table @@ -189,7 +173,6 @@ Wrapping generates neat looking tables whether or not you index the table rows. We demonstrate here .. jupyter-execute:: - :linenos: from cogent3 import make_table @@ -199,16 +182,14 @@ wrap_table .. jupyter-execute:: - :linenos: - wrap_table = make_table(header=h, data=rows, max_width=30, index="name") + wrap_table = make_table(header=h, data=rows, max_width=30, index_name="name") wrap_table Display the top of a table using ``head()`` =========================================== .. jupyter-execute:: - :linenos: table = make_table(data=dict(a=list(range(10)), b=list(range(10)))) table.head() @@ -216,7 +197,6 @@ You change how many rows are displayed. .. jupyter-execute:: - :linenos: table.head(2) @@ -232,7 +212,6 @@ You change how many rows are displayed. .. jupyter-execute:: - :linenos: table.tail(1) @@ -240,7 +219,6 @@ ================================ .. jupyter-execute:: - :linenos: table.set_repr_policy(random=3) table @@ -249,7 +227,6 @@ ================================================= .. jupyter-execute:: - :linenos: table.set_repr_policy(head=2, tail=3) table @@ -262,7 +239,6 @@ The table ``header`` is immutable. Changing column headings is done as follows. .. jupyter-execute:: - :linenos: table = load_table("data/stats.tsv") print(table.header) @@ -273,7 +249,6 @@ =================== .. jupyter-execute:: - :linenos: from cogent3 import make_table @@ -281,7 +256,6 @@ table .. jupyter-execute:: - :linenos: table.columns["a"] = [1, 3, 5] table.columns["b"] = [2, 4, 6] @@ -293,7 +267,6 @@ This can be used to take a single, or multiple columns and generate a new column of values. Here we'll take 2 columns and return True/False based on a condition. .. jupyter-execute:: - :linenos: table = load_table("data/stats.tsv") table = table.with_new_column( @@ -307,7 +280,6 @@ =============================== .. jupyter-execute:: - :linenos: table = load_table("data/stats.tsv") table.array @@ -318,7 +290,6 @@ Via the ``Table.tolist()`` method. .. jupyter-execute:: - :linenos: table = load_table("data/stats.tsv") locus = table.tolist("Locus") @@ -327,7 +298,6 @@ Or directly from the column array object. .. jupyter-execute:: - :linenos: table.columns["Locus"].tolist() @@ -337,7 +307,6 @@ This returns a row oriented list. .. jupyter-execute:: - :linenos: table = load_table("data/stats.tsv") rows = table.tolist(["Region", "Locus"]) @@ -351,7 +320,6 @@ Keys in the resulting dict are the row indices, the value is a dict of column name, value pairs. .. jupyter-execute:: - :linenos: table = load_table("data/stats.tsv") table.to_dict() @@ -362,7 +330,6 @@ Keys in the resulting dict are the column names, the value is a list. .. jupyter-execute:: - :linenos: table = load_table("data/stats.tsv") table.columns.to_dict() @@ -371,7 +338,6 @@ ======================================= .. jupyter-execute:: - :linenos: table = load_table("data/stats.tsv") df = table.to_dataframe() @@ -380,10 +346,29 @@ You can also specify column(s) are categories .. jupyter-execute:: - :linenos: df = table.to_dataframe(categories="Region") +Get a table of counts as a contingency table +============================================ + +If our table consists of counts data, the ``Table`` can convert it into a ``CategoryCount`` instance that can be used for performing basic contingency table statistical tests, e.g. chisquare, G-test of independence, etc.. To do this, we must specify which column contains the row names using the ``index`` argument. + +.. jupyter-execute:: + + table = make_table(data={"Ts": [31, 58], "Tv": [36, 138], "": ["syn", "nsyn"]}, index_name="") + table + +.. jupyter-execute:: + + contingency = table.to_categorical(["Ts", "Tv"]) + contingency + +.. jupyter-execute:: + + g_test = contingency.G_independence() + g_test + Appending tables ================ @@ -392,7 +377,6 @@ Can be done without specifying a new column (set the first argument to ``appended`` to be ``None``). Here we simply use the same table data. .. jupyter-execute:: - :linenos: table1 = load_table("data/stats.tsv") table2 = load_table("data/stats.tsv") @@ -402,7 +386,6 @@ Specifying with a new column. In this case, the value of the ``table.title`` becomes the value for the new column. .. jupyter-execute:: - :linenos: table1.title = "Data1" table2.title = "Data2" @@ -415,7 +398,6 @@ ======================= .. jupyter-execute:: - :linenos: table = load_table("data/stats.tsv") table.summed("Ratio") @@ -423,7 +405,6 @@ Because each column is just a ``numpy.ndarray``, this also can be done directly via the array methods. .. jupyter-execute:: - :linenos: table.columns["Ratio"].sum() @@ -433,7 +414,6 @@ We define a strictly numerical table, .. jupyter-execute:: - :linenos: from cogent3 import make_table @@ -445,14 +425,12 @@ and sum all columns (default condition) .. jupyter-execute:: - :linenos: all_numeric.summed() and all rows .. jupyter-execute:: - :linenos: all_numeric.summed(col_sum=False) @@ -462,7 +440,6 @@ We define a table with mixed data, like a distance matrix. .. jupyter-execute:: - :linenos: mixed = make_table( header=["A", "B", "C"], data=[["*", 1, 2], [3, "*", 5], [6, 7, "*"]] @@ -472,14 +449,12 @@ and sum all columns (default condition), ignoring non-numerical data .. jupyter-execute:: - :linenos: mixed.summed(strict=False) and all rows .. jupyter-execute:: - :linenos: mixed.summed(col_sum=False, strict=False) @@ -489,7 +464,6 @@ We can do this by providing a reference to an external function .. jupyter-execute:: - :linenos: table = load_table("data/stats.tsv") sub_table = table.filtered(lambda x: x < 10.0, columns="Ratio") @@ -498,7 +472,6 @@ or using valid python syntax within a string, which is executed .. jupyter-execute:: - :linenos: sub_table = table.filtered("Ratio < 10.0") sub_table @@ -506,7 +479,6 @@ You can also filter for values in multiple columns .. jupyter-execute:: - :linenos: sub_table = table.filtered("Ratio < 10.0 and Region == 'NonCon'") sub_table @@ -517,7 +489,6 @@ We select only columns that have a sum > 20 from the ``all_numeric`` table constructed above. .. jupyter-execute:: - :linenos: big_numeric = all_numeric.filtered_by_column(lambda x: sum(x) > 20) big_numeric @@ -526,7 +497,6 @@ ================ .. jupyter-execute:: - :linenos: table = load_table("data/stats.tsv") table.sorted(columns="Ratio") @@ -535,7 +505,6 @@ =============== .. jupyter-execute:: - :linenos: table.sorted(columns="Ratio", reverse="Ratio") @@ -543,7 +512,6 @@ ================================================ .. jupyter-execute:: - :linenos: table.sorted(columns=["Region", "Ratio"], reverse="Ratio") @@ -551,7 +519,6 @@ ==================================== .. jupyter-execute:: - :linenos: table = load_table("data/stats.tsv") raw = table.tolist("Region") @@ -561,7 +528,6 @@ ===================================== .. jupyter-execute:: - :linenos: table = load_table("data/stats.tsv") raw = table.tolist(["Locus", "Region"]) @@ -571,7 +537,6 @@ ======================= .. jupyter-execute:: - :linenos: table = load_table("data/stats.tsv") assert table.distinct_values("Region") == set(["NonCon", "Con"]) @@ -580,7 +545,6 @@ ============================== .. jupyter-execute:: - :linenos: table = load_table("data/stats.tsv") assert table.count("Region == 'NonCon' and Ratio > 1") == 1 @@ -591,7 +555,6 @@ This returns a ``CategoryCounter``, a dict like class. .. jupyter-execute:: - :linenos: from cogent3 import make_table @@ -602,20 +565,17 @@ type(unique) .. jupyter-execute:: - :linenos: unique For multiple columns. .. jupyter-execute:: - :linenos: unique = table.count_unique(["A", "B"]) unique .. jupyter-execute:: - :linenos: r = unique.to_table() r @@ -626,7 +586,6 @@ We do a standard inner join here for a restricted subset. We must specify the columns that will be used for the join. Here we just use ``Locus``. .. jupyter-execute:: - :linenos: rows = [ ["NP_004893", True], @@ -647,7 +606,6 @@ ================= .. jupyter-execute:: - :linenos: from cogent3 import make_table @@ -664,7 +622,6 @@ We require a new column heading for the current header data. We also need to specify which existing column will become the header. .. jupyter-execute:: - :linenos: tp = table.transposed(new_column_name="sample", select_as_header="#OTU ID") tp @@ -675,7 +632,6 @@ Using the method provides finer control over formatting. .. jupyter-execute:: - :linenos: from cogent3 import load_table @@ -688,7 +644,6 @@ Using the method provides finer control over formatting. .. jupyter-execute:: - :linenos: from cogent3 import load_table @@ -701,7 +656,6 @@ We use the ``justify`` argument to indicate the column justification. .. jupyter-execute:: - :linenos: table = load_table("data/stats.tsv") print(table.to_markdown(justify="ccr")) @@ -710,7 +664,6 @@ ======================================= .. jupyter-execute:: - :linenos: table = load_table( "data/stats.tsv", title="Some stats.", legend="Derived from something." @@ -721,7 +674,6 @@ ============================================ .. jupyter-execute:: - :linenos: table = load_table( "data/stats.tsv", title="Some stats.", legend="Derived from something." @@ -732,7 +684,6 @@ ============================================= .. jupyter-execute:: - :linenos: table = load_table( "data/stats.tsv", title="Some stats.", legend="Derived from something." @@ -745,7 +696,6 @@ It is also possible to specify column alignment, table caption and other arguments. .. jupyter-execute:: - :linenos: table = load_table("data/stats.tsv") print(table.to_string(format="latex")) @@ -795,14 +745,12 @@ bgraph = make_table(header=["chrom", "start", "end", "value"], rows=rows) .. jupyter-execute:: - :linenos: bgraph.head() Then converted. .. jupyter-execute:: - :linenos: print( bgraph.to_string( @@ -818,7 +766,6 @@ ======================= .. jupyter-execute:: - :linenos: from cogent3 import load_table @@ -828,7 +775,6 @@ We can provide customised formatting via a callback function. .. jupyter-execute:: - :linenos: def format_cell(value, row_num, col_num): style = 'style="background: rgba(176, 245, 102, 0.25);"' if value else "" @@ -847,7 +793,6 @@ We could also use control html element format. .. jupyter-execute:: - :linenos: element_format = dict(thead=f'') rich_html = table.to_rich_html(element_formatters=element_format) @@ -865,7 +810,6 @@ Appending any of the following to a filename will cause that format to be used for writing. .. jupyter-execute:: - :linenos: from cogent3.format.table import known_formats @@ -875,7 +819,6 @@ ============================== .. jupyter-execute:: - :linenos: table.write("stats_tab.tex", justify="ccr", label="tab:table1") @@ -885,7 +828,6 @@ The delimiter can be specified explicitly using the ``sep`` argument or implicitly via the file name suffix. .. jupyter-execute:: - :linenos: table = load_table("data/stats.tsv") table.write("stats_tab.txt", sep="\t") diff -Nru python-cogent-2020.6.30a0+dfsg/doc/cookbook/union_dict.rst python-cogent-2020.12.21a+dfsg/doc/cookbook/union_dict.rst --- python-cogent-2020.6.30a0+dfsg/doc/cookbook/union_dict.rst 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/cookbook/union_dict.rst 2020-12-20 23:35:03.000000000 +0000 @@ -11,7 +11,6 @@ Keys in a ``UnionDict`` can be accessed like attributes .. jupyter-execute:: - :linenos: from cogent3.util.union_dict import UnionDict @@ -19,12 +18,10 @@ data.a .. jupyter-execute:: - :linenos: data["a"] .. jupyter-execute:: - :linenos: data.b.d @@ -34,7 +31,6 @@ If you use the ``|`` bitwise operator to compare two dicts and the left one is a ``UnionDict``, a union operation is performed. .. jupyter-execute:: - :linenos: from cogent3.util.union_dict import UnionDict @@ -45,12 +41,10 @@ This can also be done using the ``union`` method. .. jupyter-execute:: - :linenos: data.b.union({"d": [25]}) .. jupyter-execute:: - :linenos: data.b {"c": 24, "d": [25]} @@ -59,7 +53,6 @@ ------------------------------------------ .. jupyter-execute:: - :linenos: :raises: KeyError from cogent3.util.union_dict import UnionDict @@ -70,7 +63,6 @@ But if accessing as an attribute, you get an attribute error. .. jupyter-execute:: - :linenos: :raises: AttributeError - data.k + data.k \ No newline at end of file diff -Nru python-cogent-2020.6.30a0+dfsg/doc/cookbook/useful_utilities.rst python-cogent-2020.12.21a+dfsg/doc/cookbook/useful_utilities.rst --- python-cogent-2020.6.30a0+dfsg/doc/cookbook/useful_utilities.rst 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/cookbook/useful_utilities.rst 2020-12-20 23:35:03.000000000 +0000 @@ -14,7 +14,6 @@ We write a simple factory function that uses a provided value for omega to compute the squared deviation from an estimate, then use it to create our optimisable function. .. jupyter-execute:: - :linenos: import numpy @@ -31,7 +30,6 @@ We then import the minimise function and use it to minimise the function, obtaining the fit statistic and the associated estimate of S. Note that we provide lower and upper bounds (which are optional) and an initial guess for our parameter of interest (``S``). .. jupyter-execute:: - :linenos: from cogent3.maths.optimisers import minimise, maximise @@ -58,7 +56,6 @@ This support method will force a variable to be an iterable, allowing you to guarantee that the variable will be safe for use in, say, a ``for`` loop. .. jupyter-execute:: - :linenos: :raises: TypeError from cogent3.util.misc import iterable @@ -76,7 +73,6 @@ curry(f,x)(y) = f(x,y) or = lambda y: f(x,y). This was modified from the Python Cookbook. Docstrings are also carried over. .. jupyter-execute:: - :linenos: from cogent3.util.misc import curry @@ -94,7 +90,6 @@ Perform a simple test to see if an object supports iteration .. jupyter-execute:: - :linenos: from cogent3.util.misc import is_iterable @@ -103,7 +98,6 @@ is_iterable(can_iter) .. jupyter-execute:: - :linenos: is_iterable(cannot_iter) @@ -113,7 +107,6 @@ Perform a simple test to see if an object is a single character .. jupyter-execute:: - :linenos: from cogent3.util.misc import is_char @@ -123,12 +116,10 @@ is_char("a") .. jupyter-execute:: - :linenos: is_char("ab") .. jupyter-execute:: - :linenos: is_char(foo()) @@ -138,14 +129,12 @@ To flatten a deeply nested iterable, use ``recursive_flatten``. This method supports multiple levels of nesting, and multiple iterable types .. jupyter-execute:: - :linenos: from cogent3.util.misc import recursive_flatten l = [[[[1, 2], "abcde"], [5, 6]], [7, 8], [9, 10]] .. jupyter-execute:: - :linenos: recursive_flatten(l) @@ -155,19 +144,16 @@ Perform a simple check to see if an object is not a list or a tuple .. jupyter-execute:: - :linenos: from cogent3.util.misc import not_list_tuple not_list_tuple(1) .. jupyter-execute:: - :linenos: not_list_tuple([1]) .. jupyter-execute:: - :linenos: not_list_tuple("ab") @@ -177,7 +163,6 @@ Create a case-insensitive object, for instance, if you want the key 'a' and 'A' to point to the same item in a dict .. jupyter-execute:: - :linenos: from cogent3.util.misc import add_lowercase @@ -190,7 +175,6 @@ Automatically construct a distance matrix lookup function. This is useful for maintaining flexibility about whether a function is being computed or if a lookup is being used .. jupyter-execute:: - :linenos: from cogent3.util.misc import DistanceFromMatrix from numpy import array @@ -200,7 +184,6 @@ f(0, 0) .. jupyter-execute:: - :linenos: f(1, 2) @@ -210,7 +193,6 @@ Check an object against base classes or derived classes to see if it is acceptable .. jupyter-execute:: - :linenos: from cogent3.util.misc import ClassChecker @@ -232,27 +214,22 @@ o in cc .. jupyter-execute:: - :linenos: no in cc .. jupyter-execute:: - :linenos: 5 in cc .. jupyter-execute:: - :linenos: {"a": 5} in cc .. jupyter-execute:: - :linenos: "asasas" in cc .. jupyter-execute:: - :linenos: md in cc @@ -262,7 +239,6 @@ Delegate object method calls, properties and variables to the appropriate object. Useful to combine multiple objects together while assuring that the calls will go to the correct object. .. jupyter-execute:: - :linenos: from cogent3.util.misc import Delegator @@ -276,17 +252,14 @@ len(ls) .. jupyter-execute:: - :linenos: ls[0] .. jupyter-execute:: - :linenos: ls.upper() .. jupyter-execute:: - :linenos: ls.split("_") @@ -296,7 +269,6 @@ Wrap a function to hide it from a class so that it isn't a method. .. jupyter-execute:: - :linenos: from cogent3.util.misc import FunctionWrapper @@ -304,7 +276,6 @@ f .. jupyter-execute:: - :linenos: f(123) @@ -316,7 +287,6 @@ Here is a light example of the ``ConstrainedDict`` .. jupyter-execute:: - :linenos: from cogent3.util.misc import ConstrainedDict @@ -324,7 +294,6 @@ d .. jupyter-execute:: - :linenos: :raises: ConstraintError - d["d"] = 5 + d["d"] = 5 \ No newline at end of file diff -Nru python-cogent-2020.6.30a0+dfsg/doc/cookbook/what_codes.rst python-cogent-2020.12.21a+dfsg/doc/cookbook/what_codes.rst --- python-cogent-2020.6.30a0+dfsg/doc/cookbook/what_codes.rst 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/cookbook/what_codes.rst 2020-12-20 23:35:03.000000000 +0000 @@ -8,7 +8,6 @@ *********************** .. jupyter-execute:: - :linenos: from cogent3 import available_codes @@ -19,7 +18,6 @@ For example: .. jupyter-execute:: - :linenos: from cogent3 import load_aligned_seqs @@ -29,7 +27,6 @@ We specify the genetic code, and that codons that are incomplete as they contain a gap, are converted to ``?``. .. jupyter-execute:: - :linenos: aa_seqs = nt_seqs.get_translation(gc=1, incomplete_ok=True) aa_seqs[:20] @@ -40,7 +37,6 @@ This function can be used directly to get a genetic code. We will get the code with ID 4. .. jupyter-execute:: - :linenos: from cogent3 import get_code diff -Nru python-cogent-2020.6.30a0+dfsg/doc/data_file_links.rst python-cogent-2020.12.21a+dfsg/doc/data_file_links.rst --- python-cogent-2020.6.30a0+dfsg/doc/data_file_links.rst 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/data_file_links.rst 2020-12-20 23:35:03.000000000 +0000 @@ -54,4 +54,3 @@ :download:`tbp.jaspar ` :download:`stats.tsv ` - diff -Nru python-cogent-2020.6.30a0+dfsg/doc/examples/align_codons_to_protein.rst python-cogent-2020.12.21a+dfsg/doc/examples/align_codons_to_protein.rst --- python-cogent-2020.6.30a0+dfsg/doc/examples/align_codons_to_protein.rst 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/examples/align_codons_to_protein.rst 2020-12-20 23:35:03.000000000 +0000 @@ -6,14 +6,12 @@ Although Cogent3 provides a means for directly aligning codon sequences, you may want to use a different approach based on the translate-align-introduce gaps into the original paradigm. After you've translated your codon sequences, and aligned the resulting amino acid sequences, you want to introduce the gaps from the aligned protein sequences back into the original codon sequences. Here's how. .. jupyter-execute:: - :linenos: from cogent3 import make_unaligned_seqs, make_aligned_seqs First I'm going to construct an artificial example, using the seqs dict as a means to get the data into the Alignment object. The basic idea, however, is that you should already have a set of DNA sequences that are in frame (i.e. position 0 is the 1st codon position), you've translated those sequences and aligned these translated sequences. The result is an alignment of aa sequences and a set of unaligned DNA sequences from which the aa seqs were derived. If your sequences are not in frame you can adjust it by either slicing, or adding N's to the beginning of the raw string. .. jupyter-execute:: - :linenos: seqs = { "hum": "AAGCAGATCCAGGAAAGCAGCGAGAATGGCAGCCTGGCCGCGCGCCAGGAGAGGCAGGCCCAGGTCAACCTCACT", @@ -26,7 +24,6 @@ In order to ensure the alignment algorithm preserves the coding frame, we align the translation of the sequences. We need to translate them first, but note that because the seqs are unaligned they we have to set ``aligned=False``, or we'll get an error. .. jupyter-execute:: - :linenos: unaligned_aa = unaligned_DNA.get_translation() print(unaligned_aa.to_fasta()) @@ -34,7 +31,6 @@ The translated seqs can then be written to file, using the method ``write``. That file then serves as input for an alignment program. The resulting alignment file can be read back in. (We won't write to file in this example.) For this example we will specify the aligned sequences in the dict, rather than from file. .. jupyter-execute:: - :linenos: aligned_aa_seqs = { "hum": "KQIQESSENGSLAARQERQAQVNLT", @@ -43,4 +39,4 @@ } aligned_aa = make_aligned_seqs(aligned_aa_seqs, moltype="protein") aligned_DNA = aligned_aa.replace_seqs(unaligned_DNA) - aligned_DNA + aligned_DNA \ No newline at end of file diff -Nru python-cogent-2020.6.30a0+dfsg/doc/examples/calculate_neigbourjoining_tree.rst python-cogent-2020.12.21a+dfsg/doc/examples/calculate_neigbourjoining_tree.rst --- python-cogent-2020.6.30a0+dfsg/doc/examples/calculate_neigbourjoining_tree.rst 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/examples/calculate_neigbourjoining_tree.rst 2020-12-20 23:35:03.000000000 +0000 @@ -11,7 +11,6 @@ An example of how to calculate the pairwise distances for a set of sequences. .. jupyter-execute:: - :linenos: from cogent3 import load_aligned_seqs from cogent3.evolve import distance @@ -20,21 +19,18 @@ Import a substitution model (or create your own) .. jupyter-execute:: - :linenos: from cogent3.evolve.models import get_model Load the alignment. .. jupyter-execute:: - :linenos: al = load_aligned_seqs("data/long_testseqs.fasta") Create a pairwise distances object calculator for the alignment, providing a substitution model instance. .. jupyter-execute:: - :linenos: d = distance.EstimateDistances(al, submodel=get_model("HKY85")) d.run(show_progress=False) @@ -42,7 +38,6 @@ Now use this matrix to build a neighbour joining tree. .. jupyter-execute:: - :linenos: mytree = nj.nj(d.get_pairwise_distances(), show_progress=False) print(mytree.ascii_art()) @@ -50,7 +45,6 @@ We can save this tree to file. .. jupyter-execute:: - :linenos: mytree.write("test_nj.tree") diff -Nru python-cogent-2020.6.30a0+dfsg/doc/examples/calculate_pairwise_distances.rst python-cogent-2020.12.21a+dfsg/doc/examples/calculate_pairwise_distances.rst --- python-cogent-2020.6.30a0+dfsg/doc/examples/calculate_pairwise_distances.rst 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/examples/calculate_pairwise_distances.rst 2020-12-20 23:35:03.000000000 +0000 @@ -13,7 +13,6 @@ An example of how to calculate the pairwise distances for a set of sequences. .. jupyter-execute:: - :linenos: from cogent3 import load_aligned_seqs from cogent3.evolve import distance @@ -21,21 +20,18 @@ Import a substitution model (or create your own) .. jupyter-execute:: - :linenos: from cogent3.evolve.models import HKY85 Load my alignment .. jupyter-execute:: - :linenos: al = load_aligned_seqs("data/long_testseqs.fasta") Create a pairwise distances object with your alignment and substitution model and run it. .. jupyter-execute:: - :linenos: d = distance.EstimateDistances(al, submodel=HKY85()) d.run(show_progress=False) @@ -46,7 +42,6 @@ We'll write a phylip formatted distance matrix. .. jupyter-execute:: - :linenos: d.write("dists_for_phylo.phylip", format="phylip") @@ -55,7 +50,6 @@ We'll also save the distances to file in Python's pickle format. .. jupyter-execute:: - :linenos: import pickle diff -Nru python-cogent-2020.6.30a0+dfsg/doc/examples/calculate_UPGMA_cluster.rst python-cogent-2020.12.21a+dfsg/doc/examples/calculate_UPGMA_cluster.rst --- python-cogent-2020.6.30a0+dfsg/doc/examples/calculate_UPGMA_cluster.rst 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/examples/calculate_UPGMA_cluster.rst 2020-12-20 23:35:03.000000000 +0000 @@ -13,7 +13,6 @@ .. note:: UPGMA should not be used for phylogenetic reconstruction. .. jupyter-execute:: - :linenos: from cogent3 import load_aligned_seqs from cogent3.evolve import distance @@ -22,21 +21,18 @@ Import a substitution model (or create your own) .. jupyter-execute:: - :linenos: from cogent3.evolve.models import HKY85 Load the alignment. .. jupyter-execute:: - :linenos: al = load_aligned_seqs("data/test.paml") Create a pairwise distances object calculator for the alignment, providing a substitution model instance. .. jupyter-execute:: - :linenos: d = distance.EstimateDistances(al, submodel=HKY85()) d.run(show_progress=False) @@ -44,7 +40,6 @@ Now use this matrix to build a UPGMA cluster. .. jupyter-execute:: - :linenos: mycluster = upgma(d.get_pairwise_distances()) print(mycluster.ascii_art()) @@ -52,7 +47,6 @@ We demonstrate saving this UPGMA cluster to a file. .. jupyter-execute:: - :linenos: mycluster.write("test_upgma.tree") diff -Nru python-cogent-2020.6.30a0+dfsg/doc/examples/codon_models.rst python-cogent-2020.12.21a+dfsg/doc/examples/codon_models.rst --- python-cogent-2020.6.30a0+dfsg/doc/examples/codon_models.rst 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/doc/examples/codon_models.rst 2020-12-20 23:35:03.000000000 +0000 @@ -34,14 +34,12 @@ We import these explicitly from the ``cogent3.evolve.models`` module. .. jupyter-execute:: - :linenos: from cogent3.evolve.models import get_model These are functions and calling them returns the indicated substitution model with default behaviour of recoding gap characters into N's. .. jupyter-execute:: - :linenos: tf = get_model("GY94") nf = get_model("MG94GTR") @@ -52,7 +50,6 @@ For our example we load a sample alignment and tree as per usual. To reduce the computational overhead for this example we will limit the number of sampled taxa. .. jupyter-execute:: - :linenos: from cogent3 import load_aligned_seqs, load_tree @@ -65,7 +62,6 @@ We construct a likelihood function and constrain omega parameter (the ratio of nonsynonymous to synonymous substitutions) to equal 1. We also set some display formatting parameters. .. jupyter-execute:: - :linenos: lf = cnf.make_likelihood_function(tree, digits=2, space=3) lf.set_param_rule("omega", is_constant=True, value=1.0) @@ -75,7 +71,6 @@ .. note:: I'm going to specify a set of conditions that will be used for all optimiser steps. For those new to python, one can construct a dictionary with the following form ``{'argument_name': argument_value}``, or alternatively ``dict(argument_name=argument_value)``. I'm doing the latter. This dictionary is then passed to functions/methods by prefacing it with ``**``. .. jupyter-execute:: - :linenos: optimiser_args = dict( local=True, max_restarts=5, tolerance=1e-8, show_progress=False @@ -95,7 +90,6 @@ We can then free up the omega parameter, but before we do that we'll store the log-likelihood and number of free parameters for the current model form for reuse later. .. jupyter-execute:: - :linenos: neutral_lnL = lf.get_log_likelihood() neutral_nfp = lf.get_num_free_params() @@ -108,7 +102,6 @@ We then conduct a likelihood ratio test whether the MLE of omega significantly improves the fit over the constraint it equals 1. We import the convenience function from the ``cogent3`` stats module. .. jupyter-execute:: - :linenos: from cogent3.maths.stats import chisqprob @@ -119,7 +112,6 @@ Not surprisingly, this is significant. We then ask whether the Human and Chimpanzee edges have a value of omega that is significantly different from the rest of the tree. .. jupyter-execute:: - :linenos: lf.set_param_rule( "omega", tip_names=["Chimpanzee", "Human"], outgroup_name="Galago", clade=True @@ -130,7 +122,6 @@ chimp_human_clade_nfp = lf.get_num_free_params() .. jupyter-execute:: - :linenos: LR = 2 * (chimp_human_clade_lnL - non_neutral_lnL) df = chimp_human_clade_nfp - non_neutral_nfp @@ -144,7 +135,6 @@ It is also possible to specify rate-heterogeneity variants of these models. In the first instance we'll create a likelihood function where these rate-classes are global across the entire tree. Because fitting these models can be time consuming I'm going to recreate the non-neutral likelihood function from above first, fit it, and then construct the rate-heterogeneity likelihood function. By doing this I can ensure that the richer model starts with parameter values that produce a log-likelihood the same as the null model, ensuring the subsequent optimisation step improves the likelihood over the null. .. jupyter-execute:: - :linenos: lf = cnf.make_likelihood_function(tree, digits=2, space=3) lf.set_alignment(aln) @@ -157,7 +147,6 @@ To get all the parameter MLEs (branch lengths, GTR terms, etc ..) into the alternate model we get an annotated tree from the null model which will have these values associated with it. .. jupyter-execute:: - :linenos: annot_tree = lf.get_annotated_tree() omega_mle = lf.get_param_value("omega") @@ -165,7 +154,6 @@ We can then construct a new likelihood function, specifying the rate-class properties. .. jupyter-execute:: - :linenos: rate_lf = cnf.make_likelihood_function( annot_tree, bins=["neutral", "adaptive"], digits=2, space=3 @@ -174,14 +162,12 @@ We define a very small value (``epsilon``) that is used to specify the starting values. .. jupyter-execute:: - :linenos: epsilon = 1e-6 We now provide starting parameter values for ``omega`` for the two bins, setting the boundary .. jupyter-execute:: - :linenos: rate_lf.set_param_rule("omega", bin="neutral", upper=1, init=omega_mle) rate_lf.set_param_rule( @@ -191,14 +177,12 @@ and provide the starting values for the bin probabilities (``bprobs``). .. jupyter-execute:: - :linenos: rate_lf.set_param_rule("bprobs", init=[1 - epsilon, epsilon]) The above statement essentially assigns a probability of nearly 1 to the 'neutral' bin. We now set the alignment and fit the model. .. jupyter-execute:: - :linenos: rate_lf.set_alignment(aln) rate_lf.optimise(**optimiser_args) @@ -209,14 +193,12 @@ rate_lf .. jupyter-execute:: - :linenos: print(chisqprob(LR, df)) We can get the posterior probabilities of site-classifications out of this model as .. jupyter-execute:: - :linenos: pp = rate_lf.get_bin_probs() @@ -227,24 +209,26 @@ The following implements a modification of the approach of Zhang, Nielsen and Yang (Mol Biol Evol, 22:2472–9, 2005). For this model class, there are groups of branches for which all positions are evolving neutrally but some proportion of those neutrally evolving sites change to adaptively evolving on so-called foreground edges. For the current example, we'll define the Chimpanzee and Human branches as foreground and everything else as background. The following table defines the parameter scopes. -+--------------+----------------+----------------------+---------------------+ -| Site class | Proportion | Background edges | Foreground edges | -+==============+================+======================+=====================+ -| 0 | p_0 | 0 < omega_0 < 1 | 0 < omega_0 < 1 | -+--------------+----------------+----------------------+---------------------+ -| 1 | p_1 | omega_1=1 | omega_1=1 | -+--------------+----------------+----------------------+---------------------+ -| 2a | p_2 | 0 < omega_0 < 1 | omega_2 > 1 | -+--------------+----------------+----------------------+---------------------+ -| 2b | p_3 | omega_1=1 | omega_2 > 1 | -+--------------+----------------+----------------------+---------------------+ +.. jupyter-execute:: + :hide-code: + + from numpy import array + from cogent3 import make_table + from IPython.core.display import HTML + + header = ['Site Class', 'Proportion', 'Background Edges', 'Foreground Edges'] + data = {'Site Class': array(['0', '1', '2a', '2b'], dtype=' 1', '0 < omega0 < 1'], + dtype='`_, and `non-stationary codon `_ models. .. dropdown:: Click to see an animation showing testing a hypothesis involving a non-stationary nucleotide process. @@ -78,4 +79,13 @@ :text: … :classes: stretched-link +.. toctree:: + :hidden: + :maxdepth: 3 + + app/index + cookbook/index + examples/index + api/index + .. _cogent3: https://cogent3.org diff -Nru python-cogent-2020.6.30a0+dfsg/.github/workflows/testing_develop.yml python-cogent-2020.12.21a+dfsg/.github/workflows/testing_develop.yml --- python-cogent-2020.6.30a0+dfsg/.github/workflows/testing_develop.yml 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/.github/workflows/testing_develop.yml 2020-12-20 23:35:03.000000000 +0000 @@ -18,32 +18,70 @@ steps: - uses: "actions/checkout@v2" + + # caching + - uses: actions/cache@v2 + if: startsWith(runner.os, 'Linux') + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} + ${{ runner.os }}-pip- + + - uses: actions/cache@v2 + if: startsWith(runner.os, 'macOS') + with: + path: ~/Library/Caches/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} + ${{ runner.os }}-pip- + + - uses: actions/cache@v2 + if: startsWith(runner.os, 'Windows') + with: + path: ~\AppData\Local\pip\Cache + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} + ${{ runner.os }}-pip- + - uses: "actions/setup-python@v1" with: python-version: "${{ matrix.python-version }}" - - name: Make conda environment - uses: goanpeca/setup-miniconda@v1 - with: - activate-environment: c3dev - environment-file: c3dev-environment.yml - python-version: "${{ matrix.python-version }}" + # Setup virtualenv + - name: "Setup virtualenv for ${{ matrix.python-version }} on Linux or MacOS" + if: startsWith(runner.os, 'macOS') || startsWith(runner.os, 'Linux') + run: | + python --version + python -m venv .venv + source .venv/bin/activate + python -m pip install --upgrade pip + python -m pip install -r requirements.txt - - name: "Display conda env for ${{ matrix.python-version }}" + - name: "Run tox targets for ${{ matrix.python-version }} on Linux or MacOS" + if: startsWith(runner.os, 'macOS') || startsWith(runner.os, 'Linux') shell: bash -l {0} run: | - conda activate - conda info - conda list - - name: "Run tox targets for ${{ matrix.python-version }}" + source .venv/bin/activate + python -m tox + ls + ls tests + + - name: "Install requirements and run tox targets for ${{ matrix.python-version }} on Windows" + if: startsWith(runner.os, 'Windows') shell: bash -l {0} run: | - conda activate c3dev + python -m pip install --upgrade pip + python -m pip install -r requirements.txt python -m tox ls ls tests + - name: Upload coverage to Codecov uses: codecov/codecov-action@v1 with: file: ./tests/junit-*.xml - fail_ci_if_error: true + fail_ci_if_error: true \ No newline at end of file diff -Nru python-cogent-2020.6.30a0+dfsg/.hgtags python-cogent-2020.12.21a+dfsg/.hgtags --- python-cogent-2020.6.30a0+dfsg/.hgtags 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/.hgtags 2020-12-20 23:35:03.000000000 +0000 @@ -14,3 +14,5 @@ 5f4050c10998f6aabc2ff7febd69e8cf495ecbb2 2019.12.6a 5f4050c10998f6aabc2ff7febd69e8cf495ecbb2 2019.12.6a 6f2c23976dd39e605fc1bd82d3092013a9ebd99c 2019.12.6a +453a7de7f08b2e18ac73924492fbdb344188a048 2020.6.30a0 +743ea31a2fdef619b87165267e90a48523e8fceb 2020.12.14a diff -Nru python-cogent-2020.6.30a0+dfsg/MANIFEST.in python-cogent-2020.12.21a+dfsg/MANIFEST.in --- python-cogent-2020.6.30a0+dfsg/MANIFEST.in 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/MANIFEST.in 2020-12-20 23:35:03.000000000 +0000 @@ -1,2 +1,3 @@ include MANIFEST.in LICENSE +include requirements.txt recursive-include src * diff -Nru python-cogent-2020.6.30a0+dfsg/README.md python-cogent-2020.12.21a+dfsg/README.md --- python-cogent-2020.6.30a0+dfsg/README.md 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/README.md 2020-12-20 23:35:03.000000000 +0000 @@ -36,12 +36,10 @@ ### Install `extra` -- adds visualisation support -**NOTE:** Only available in the development version until release ≥2020.3 - -The `extra` group includes python libraries required for visualisation (i.e. [plotly](https://pypi.org/project/plotly/) and [psutil](https://pypi.org/project/psutil/) plus [pandas](https://pypi.org/project/pandas/) (optional) +The `extra` group includes python libraries required for visualisation, i.e. [plotly](https://pypi.org/project/plotly/), [kaleido](https://pypi.org/project/kaleido/), [psutil](https://pypi.org/project/psutil/) and [pandas](https://pypi.org/project/pandas/). ```bash -$ pip install cogent3[extra] +$ pip install "cogent3[extra]" ``` ### Install `dev` -- adds `cogent3` development related libraries @@ -49,7 +47,7 @@ The `dev` group includes python libraries required for development of `cogent3`. ```bash -$ pip install cogent3[dev] +$ pip install "cogent3[dev]" ``` ### Install the development version @@ -60,11 +58,11 @@ ## Project Information -`cogent3` is released under the BSD-3 license, documentation for [`cogent3` is on readthedocs](https://cogent3.readthedocs.io/en/latest/), while [`cogent3` code is on GitHub](https://github.com/cogent3/cogent3). If you would like to contribute (and we hope you do!), we have created a companion [`c3dev` GitHub](https://github.com/cogent3/c3dev) repo which provides details on how to contribute and some useful tools for doing so. +`cogent3` is released under the BSD-3 license, documentation is at [cogent3.org](https://cogent3.org), while [`cogent3` code is on GitHub](https://github.com/cogent3/cogent3). If you would like to contribute (and we hope you do!), we have created a companion [`c3dev` GitHub](https://github.com/cogent3/c3dev) repo which provides details on how to contribute and some useful tools for doing so. ## Project History -`cogent3` is a descendant of [PyCogent](https://github.com/pycogent/pycogent.github.com). While there is much in common with PyCogent, the amount of change has been substantial, motivating a new name `cogent3`. This name has been chosen because `cogent` was always the import name (dating back to [PyEvolve in 2004](https://www.ncbi.nlm.nih.gov/pubmed/14706121)) and it's Python 3 only. +`cogent3` is a descendant of [PyCogent](https://github.com/pycogent/pycogent.github.com). While there is much in common with PyCogent, the amount of change has been substantial, motivating the name change to `cogent3`. This name has been chosen because `cogent` was always the import name (dating back to [PyEvolve in 2004](https://www.ncbi.nlm.nih.gov/pubmed/14706121)) and it's Python 3 only. Given this history, we are grateful to the multitude of individuals who have made contributions over the years. These individuals are explicitly acknowledged in all the files they contributed to and were co-authors on the original [PyEvolve](https://www.ncbi.nlm.nih.gov/pubmed/14706121) and [PyCogent](https://www.ncbi.nlm.nih.gov/pubmed/17708774) publications. diff -Nru python-cogent-2020.6.30a0+dfsg/requirements.txt python-cogent-2020.12.21a+dfsg/requirements.txt --- python-cogent-2020.6.30a0+dfsg/requirements.txt 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/requirements.txt 2020-12-20 23:35:03.000000000 +0000 @@ -0,0 +1,6 @@ +pillow==8.0.1 +psutil==5.7.3 +scipy==1.5.4 +tox==3.20.1 +tox-gh-actions==2.2.0 +.[dev] \ No newline at end of file diff -Nru python-cogent-2020.6.30a0+dfsg/setup.py python-cogent-2020.12.21a+dfsg/setup.py --- python-cogent-2020.6.30a0+dfsg/setup.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/setup.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,12 +1,8 @@ #!/usr/bin/env python -import os import pathlib -import re -import subprocess import sys -from setuptools import Command, find_packages, setup -from setuptools.extension import Extension +from setuptools import find_packages, setup __author__ = "Peter Maxwell" @@ -19,9 +15,9 @@ "Daniel McDonald", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Peter Maxwell" -__email__ = "pm67nz@gmail.com" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" # Check Python version, no point installing if unsupported version inplace @@ -40,39 +36,6 @@ sys.argv[1:] = ["bdist_wininst"] -# A new command for predist, ie: pyrexc but no compile. -class NullCommand(Command): - description = "Generate .c files from .pyx files" - # List of option tuples: long name, short name (or None), and help string. - user_options = [] # [('', '', ""),] - - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def run(self): - pass - - -class BuildDocumentation(NullCommand): - description = "Generate HTML documentation files" - - def run(self): - # Restructured Text -> HTML - try: - import sphinx - except ImportError: - print("Failed to build html due to ImportErrors for sphinx") - return - cwd = os.getcwd() - os.chdir("doc") - subprocess.call(["make", "html"]) - os.chdir(cwd) - print("Built index.html") - - short_description = "COmparative GENomics Toolkit 3" readme_path = pathlib.Path(__file__).parent / "README.md" @@ -131,6 +94,7 @@ "jupyter_client", "jupyterlab", "jupytext", + "kaleido", "nbconvert", "nbformat", "nbsphinx", @@ -139,7 +103,6 @@ "plotly", "psutil", "pytest", - "pytest-azurepipelines", "pytest-cov", "pytest>=4.3.0", "sphinx", @@ -148,7 +111,7 @@ "sphinx_panels", "tox", ], - "extra": ["pandas", "plotly", "psutil"], + "extra": ["pandas", "plotly", "psutil", "kaleido"], }, project_urls=PROJECT_URLS, ) diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/align/align.py python-cogent-2020.12.21a+dfsg/src/cogent3/align/align.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/align/align.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/align/align.py 2020-12-20 23:35:03.000000000 +0000 @@ -13,7 +13,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/align/compare_numba.py python-cogent-2020.12.21a+dfsg/src/cogent3/align/compare_numba.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/align/compare_numba.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/align/compare_numba.py 2020-12-20 23:35:03.000000000 +0000 @@ -10,7 +10,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Stephen Ma"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/align/dp_calculation.py python-cogent-2020.12.21a+dfsg/src/cogent3/align/dp_calculation.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/align/dp_calculation.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/align/dp_calculation.py 2020-12-20 23:35:03.000000000 +0000 @@ -18,7 +18,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttleuy" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/align/indel_model.py python-cogent-2020.12.21a+dfsg/src/cogent3/align/indel_model.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/align/indel_model.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/align/indel_model.py 2020-12-20 23:35:03.000000000 +0000 @@ -10,7 +10,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/align/indel_positions.py python-cogent-2020.12.21a+dfsg/src/cogent3/align/indel_positions.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/align/indel_positions.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/align/indel_positions.py 2020-12-20 23:35:03.000000000 +0000 @@ -4,7 +4,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/align/__init__.py python-cogent-2020.12.21a+dfsg/src/cogent3/align/__init__.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/align/__init__.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/align/__init__.py 2020-12-20 23:35:03.000000000 +0000 @@ -25,7 +25,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Jeremy Widmann", "Gavin Huttley", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/align/pairwise_pogs_numba.py python-cogent-2020.12.21a+dfsg/src/cogent3/align/pairwise_pogs_numba.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/align/pairwise_pogs_numba.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/align/pairwise_pogs_numba.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,17 +1,7 @@ import numba import numpy as np -from numba import ( - boolean, - float64, - int32, - int64, - jitclass, - njit, - optional, - types, - uint8, -) +from numba import boolean, float64, int32, int64, njit, optional, types, uint8 from numba.core.types.containers import Tuple @@ -19,7 +9,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Stephen Ma"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/align/pairwise.py python-cogent-2020.12.21a+dfsg/src/cogent3/align/pairwise.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/align/pairwise.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/align/pairwise.py 2020-12-20 23:35:03.000000000 +0000 @@ -15,7 +15,6 @@ from cogent3.core.alignment import Aligned from cogent3.evolve.likelihood_tree import LikelihoodTreeEdge from cogent3.util.misc import ascontiguousarray -from cogent3.util.modules import ExpectedImportError, importVersionedModule from . import pairwise_pogs_numba as align_module from . import pairwise_seqs_numba as seq_align_module @@ -33,7 +32,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/align/pairwise_seqs_numba.py python-cogent-2020.12.21a+dfsg/src/cogent3/align/pairwise_seqs_numba.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/align/pairwise_seqs_numba.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/align/pairwise_seqs_numba.py 2020-12-20 23:35:03.000000000 +0000 @@ -8,7 +8,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Stephen Ma"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/align/progressive.py python-cogent-2020.12.21a+dfsg/src/cogent3/align/progressive.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/align/progressive.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/align/progressive.py 2020-12-20 23:35:03.000000000 +0000 @@ -12,7 +12,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -66,9 +66,11 @@ tip_names = tree.get_tip_names() tip_names.sort() seq_names.sort() - assert tip_names == seq_names, ( - "names don't match between seqs and tree: tree=%s; seqs=%s" - % (tip_names, seq_names) + assert ( + tip_names == seq_names + ), "names don't match between seqs and tree: tree=%s; seqs=%s" % ( + tip_names, + seq_names, ) ests_from_pairwise = False elif two_seqs: diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/align/pycompare.py python-cogent-2020.12.21a+dfsg/src/cogent3/align/pycompare.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/align/pycompare.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/align/pycompare.py 2020-12-20 23:35:03.000000000 +0000 @@ -11,7 +11,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/align/traceback.py python-cogent-2020.12.21a+dfsg/src/cogent3/align/traceback.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/align/traceback.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/align/traceback.py 2020-12-20 23:35:03.000000000 +0000 @@ -10,7 +10,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Rob Knight", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -74,8 +74,7 @@ def alignment_traceback(seqs, aligned_positions, word_length): - """Alignment object from state matrix and ending point. - """ + """Alignment object from state matrix and ending point.""" (starts, ends, maps) = map_traceback(aligned_positions) aligneds = [] for (start, end, amap, (name, seq)) in zip(starts, ends, maps, seqs): diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/app/align.py python-cogent-2020.12.21a+dfsg/src/cogent3/app/align.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/app/align.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/app/align.py 2020-12-20 23:35:03.000000000 +0000 @@ -25,7 +25,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -138,20 +138,16 @@ # as we're going to be using a pairwise distance that excludes gaps # eliminating positions with deletions in the reference result = result.filtered(no_ref_gap) - if aligned is None: - aligned = result - continue - - aligned = aligned.add_from_ref_aln(result) + aligned = result if aligned is None else aligned.add_from_ref_aln(result) # default to ArrayAlign - new = aligned.to_type(array_align=True) + new = aligned.to_type(array_align=True, moltype=self._moltype) return new class progressive_align(ComposableSeq): """Progressive multiple sequence alignment via any cogent3 model. - Returns an Alignment object.""" + Returns an Alignment object.""" _input_types = SEQUENCE_TYPE _output_types = (ALIGNED_TYPE, SERIALISABLE_TYPE) @@ -255,9 +251,7 @@ self.func = self.multiple_align def _build_guide(self, seqs): - crude_aligner = align_to_ref(moltype=self._moltype) - aln = crude_aligner(seqs) - tree = self._make_tree(aln) + tree = self._make_tree(seqs) if self._scalar != 1: scaler = scale_branches(scalar=self._scalar) tree = scaler(tree) diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/app/composable.py python-cogent-2020.12.21a+dfsg/src/cogent3/app/composable.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/app/composable.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/app/composable.py 2020-12-20 23:35:03.000000000 +0000 @@ -3,6 +3,7 @@ import os import pathlib import re +import textwrap import time import traceback @@ -29,7 +30,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -62,11 +63,7 @@ def _get_origin(origin): - if type(origin) == str: - result = origin - else: - result = origin.__class__.__name__ - return result + return origin if type(origin) == str else origin.__class__.__name__ class NotCompleted(int): @@ -187,11 +184,7 @@ return True name = data.__class__.__name__ - valid = False - for type_ in self._data_types: - if type_ == name: - valid = True - break + valid = name in self._data_types if not valid: msg = f"invalid data type, '{name}' not in {', '.join(self._data_types)}" valid = NotCompleted("ERROR", self, message=msg, source=data) @@ -214,6 +207,7 @@ if txt: txt += " + " txt += "%s(%s)" % (self.__class__.__name__, ", ".join(self._formatted)) + txt = textwrap.fill(txt, width=80, break_long_words=False) return txt def __repr__(self): @@ -434,8 +428,8 @@ LOGGER.log_file_path = logger elif logger == True: log_file_path = pathlib.Path(_make_logfile_name(self)) - source = pathlib.Path(self.data_store.source) - log_file_path = source.parent / log_file_path + src = pathlib.Path(self.data_store.source) + log_file_path = src.parent / log_file_path LOGGER = scitrack.CachingLogger() LOGGER.log_file_path = str(log_file_path) else: @@ -464,15 +458,21 @@ outcome = result if process is self else self(result) results.append(outcome) if LOGGER: - member = dstore[i] + member = todo[i] # ensure member is a DataStoreMember instance if not isinstance(member, DataStoreMember): member = SingleReadDataStore(member)[0] + mem_id = self.data_store.make_relative_identifier(member.name) + src = self.data_store.make_relative_identifier(result) + assert ( + src == mem_id + ), f"mismatched input data and result identifiers: {src} != {mem_id}" + LOGGER.log_message(member, label="input") if member.md5: LOGGER.log_message(member.md5, label="input md5sum") - mem_id = self.data_store.make_relative_identifier(member.name) + if outcome: member = self.data_store.get_member(mem_id) LOGGER.log_message(member, label="output") @@ -612,6 +612,8 @@ super(_checkpointable, self).__init__(**kwargs) self._formatted_params() + data_path = str(data_path) + if data_path.endswith(".tinydb") and not self.__class__.__name__.endswith("db"): raise ValueError("tinydb suffix reserved for write_db") diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/app/data_store.py python-cogent-2020.12.21a+dfsg/src/cogent3/app/data_store.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/app/data_store.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/app/data_store.py 2020-12-20 23:35:03.000000000 +0000 @@ -10,10 +10,12 @@ from collections import defaultdict from fnmatch import fnmatch, translate from io import TextIOWrapper +from json import JSONDecodeError from pathlib import Path from pprint import pprint from warnings import warn +from scitrack import get_text_hexdigest from tinydb import Query, TinyDB from tinydb.middlewares import CachingMiddleware from tinydb.storages import JSONStorage @@ -27,14 +29,13 @@ ) from cogent3.util.table import Table from cogent3.util.union_dict import UnionDict -from scitrack import get_text_hexdigest __author__ = "Gavin Huttley" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -55,15 +56,21 @@ pass data = json.dumps(data) - record = dict(identifier=identifier, data=data, completed=completed) - return record + return dict(identifier=identifier, data=data, completed=completed) def load_record_from_json(data): """returns identifier, data, completed status from json string""" if type(data) == str: data = json.loads(data) - value = json.loads(data["data"]) + + value = data["data"] + if isinstance(value, str): + try: + value = json.loads(value) + except JSONDecodeError: + pass + return data["identifier"], value, data["completed"] @@ -219,8 +226,7 @@ return None def get_relative_identifier(self, identifier): - """returns the identifier relative to store root path - """ + """returns the identifier relative to store root path""" if isinstance(identifier, DataStoreMember) and identifier.parent is self: return identifier @@ -240,8 +246,7 @@ return identifier def get_absolute_identifier(self, identifier, from_relative=False): - """returns the identifier relative to the root path - """ + """returns the identifier relative to the root path""" if not from_relative: identifier = self.get_relative_identifier(identifier) source = self.source.replace(".zip", "") @@ -340,7 +345,7 @@ kwargs ignored """ - path = Path(source) + path = Path(source).expanduser() assert path.exists() and path.is_file() super(SingleReadDataStore, self).__init__( str(path.parent), suffix=str(path.suffix) @@ -569,7 +574,7 @@ def _has_other_suffixes(self, path, suffix): p = Path(path) - allowed = {str(suffix), "log"} + allowed = {str(suffix).lower(), "log"} for f in p.iterdir(): if get_format_suffixes(str(f))[0] not in allowed: return True @@ -774,8 +779,7 @@ self._finish.detach() def lock(self): - """if writable, and not locked, locks the database to this pid - """ + """if writable, and not locked, locks the database to this pid""" if not self.locked: self._db.insert(dict(identifier="LOCK", pid=os.getpid())) self._db.storage.flush() diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/app/dist.py python-cogent-2020.12.21a+dfsg/src/cogent3/app/dist.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/app/dist.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/app/dist.py 2020-12-20 23:35:03.000000000 +0000 @@ -19,22 +19,40 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" class fast_slow_dist(ComposableDistance): - """Pairwise distance calculation. Uses fast (but less - numerically robust) approach where possible, slow (robust) - approach when not. Returns a DistanceMatrix.""" + """Pairwise distance calculation for aligned sequences. + + Uses fast (but less numerically robust) approach where possible, slow (robust) + approach when not. Returns a DistanceMatrix. + """ _input_types = ALIGNED_TYPE _output_types = (PAIRWISE_DISTANCE_TYPE, SERIALISABLE_TYPE) _data_types = ("ArrayAlignment", "Alignment") def __init__(self, distance=None, moltype=None, fast_calc=None, slow_calc=None): + """ + Parameters + ---------- + moltype : str + cogent3 moltype + distance : str + Name of a distance method available as both fast and slow calculator. + fast_calc + Name of a fast distance calculator. See cogent3.available_distances(). + slow_calc + Name of a slow distance calculator. See cogent3.available_models(). + + Notes + ----- + If you provide fast_calc or slow_calc, you must specify the moltype. + """ super(fast_slow_dist, self).__init__( input_types=self._input_types, output_types=self._output_types, @@ -51,7 +69,7 @@ fast_calc = distance slow_calc = distance - d = set(["hamming", "paralinear", "logdet"]) & set([slow_calc, fast_calc]) + d = {"hamming", "percent", "paralinear", "logdet"} & {slow_calc, fast_calc} if d and not self._moltype: raise ValueError(f"you must provide a moltype for {d}") @@ -81,6 +99,7 @@ elif slow_calc: self._moltype = slow_calc.moltype self._sm = slow_calc + self.func = self.calc_distance def _est_dist_pair_slow(self, aln): """returns distance between seq pairs in aln""" @@ -90,10 +109,9 @@ lf.set_alignment(aln) lf.set_param_rule("length", is_independent=False) lf.optimise(max_restarts=0, show_progress=False) - dist = 2 * lf.get_param_value("length", edge=aln.names[0]) - return dist + return 2 * lf.get_param_value("length", edge=aln.names[0]) - def __call__(self, aln): + def calc_distance(self, aln): if self._moltype and self._moltype != aln.moltype: aln = aln.to_moltype(self._moltype) diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/app/evo.py python-cogent-2020.12.21a+dfsg/src/cogent3/app/evo.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/app/evo.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/app/evo.py 2020-12-20 23:35:03.000000000 +0000 @@ -32,7 +32,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -204,7 +204,7 @@ def fit(self, aln, initialise=None, construct=True, **opt_args): moltypes = {aln.moltype.label, self._sm.moltype.label} - if moltypes == {"protein", "dna"} or moltypes == {"protein", "rna"}: + if moltypes in [{"protein", "dna"}, {"protein", "rna"}]: msg = ( f"substitution model moltype '{self._sm.moltype.label}' and" f" alignment moltype '{aln.moltype.label}' are incompatible" @@ -260,6 +260,18 @@ _data_types = ("ArrayAlignment", "Alignment") def __init__(self, null, *alternates, init_alt=None): + """ + Parameters + ---------- + null : model + The null model instance + alternates : model or series of models + The alternate model or a series of them + init_alt : callable + A callback function for initialising the alternate model + likelihood function prior to optimisation. Defaults to using + MLEs from the null model. + """ # todo document! init_alt needs to be able to take null, alt and *args super(hypothesis, self).__init__( input_types=self._input_types, diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/app/__init__.py python-cogent-2020.12.21a+dfsg/src/cogent3/app/__init__.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/app/__init__.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/app/__init__.py 2020-12-20 23:35:03.000000000 +0000 @@ -8,7 +8,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -41,6 +41,7 @@ def available_apps(): """returns table of all available apps""" from cogent3.util.table import Table + from .composable import Composable, user_function # excluding composable, find all class diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/app/io.py python-cogent-2020.12.21a+dfsg/src/cogent3/app/io.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/app/io.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/app/io.py 2020-12-20 23:35:03.000000000 +0000 @@ -51,7 +51,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -138,7 +138,7 @@ seqs = self.klass(data=data, moltype=self.moltype) seqs.info.source = abs_path - if self._output_types & {"sequences"}: + if self._output_types & {SEQUENCE_TYPE}: seqs = seqs.degap() seqs.info.source = abs_path @@ -437,7 +437,7 @@ class load_json(Composable): - """Loads json serialised cogent3 objects from a json file. + """Loads json serialised cogent3 objects from a json file. Returns whatever object type was stored.""" _type = "output" @@ -515,7 +515,7 @@ class load_db(Composable): - """Loads json serialised cogent3 objects from a TinyDB file. + """Loads json serialised cogent3 objects from a TinyDB file. Returns whatever object type was stored.""" _type = "output" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/app/result.py python-cogent-2020.12.21a+dfsg/src/cogent3/app/result.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/app/result.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/app/result.py 2020-12-20 23:35:03.000000000 +0000 @@ -15,7 +15,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -27,7 +27,7 @@ _type = "generic_result" def __init__(self, source): - self._store = dict() + self._store = {} self._construction_kwargs = dict(source=source) self.source = source @@ -51,8 +51,7 @@ num = len(self) types = [f"{repr(k)}: {self[k].__class__.__name__}" for k in self.keys()[:4]] types = ", ".join(types) - result = f"{num}x {name}({types})" - return result + return f"{num}x {name}({types})" def __str__(self): return repr(self) @@ -126,7 +125,7 @@ elapsed_time=elapsed_time, num_evaluations=num_evaluations, ) - self._store = dict() + self._store = {} self._name = name assert stat is sum or stat is max self._stat = stat @@ -142,19 +141,21 @@ self.deserialised_values() # making sure we're fully reloaded attrs = list(self._stat_attrs) header = ["key"] + attrs[:] - rows = [[""] + [getattr(self, attr) for attr in attrs]] + rows = [[repr("")] + [getattr(self, attr) for attr in attrs]] if len(self) > 1: # we just add keys, lnL and nfp for key in self: row = [repr(key), self[key].lnL, self[key].nfp, "", ""] rows.append(row) + else: + rows[0][0] = repr(list(self)[0]) - table = Table(header=header, data=rows, title=self.name) - return table + return Table(header=header, data=rows, title=self.name) def _repr_html_(self): table = self._get_repr_data_() - return table._repr_html_(include_shape=False) + table.set_repr_policy(show_shape=False) + return table._repr_html_() def __repr__(self): table = self._get_repr_data_() @@ -222,9 +223,7 @@ seq = "".join(("".join(t) for t in zip(seq1, seq2, seq3))) data[n] = seq - simaln = aln.__class__(data=data) - - return simaln + return aln.__class__(data=data) def __lt__(self, other): self_lnL = self.lnL @@ -237,6 +236,7 @@ self._init_stats() if len(self) == 1: result = list(self.values())[0] + result.name = self.name else: result = OrderedDict() for k in sorted(self): @@ -244,6 +244,7 @@ if type(k) == str and k.isdigit(): k = int(k) result[k] = v + v.name = f"{self.name} pos-{k}" return result @@ -252,10 +253,7 @@ if self._lnL is None: lnL = 0.0 for v in self.values(): - if isinstance(v, dict): - l = v.get("lnL") - else: - l = v.lnL + l = v.get("lnL") if isinstance(v, dict) else v.lnL lnL = self._stat([l, lnL]) self._lnL = lnL @@ -266,10 +264,7 @@ if self._nfp is None: nfp = 0 for v in self.values(): - if isinstance(v, dict): - n = v.get("nfp") - else: - n = v.nfp + n = v.get("nfp") if isinstance(v, dict) else v.nfp nfp = self._stat([n, nfp]) self._nfp = nfp @@ -281,10 +276,7 @@ if self._DLC is None: DLC = [] for v in self.values(): - if isinstance(v, dict): - d = v.get("DLC") - else: - d = v.all_psubs_DLC() + d = v.get("DLC") if isinstance(v, dict) else v.all_psubs_DLC() DLC.append(d != False) self._DLC = all(DLC) @@ -338,7 +330,9 @@ Note ---- In the case of a discrete time process, length is 'paralinear'""" - from cogent3.evolve.ns_substitution_model import DiscreteSubstitutionModel + from cogent3.evolve.ns_substitution_model import ( + DiscreteSubstitutionModel, + ) try: model = self.lf.model @@ -409,7 +403,8 @@ def _repr_html_(self): table = self._get_repr_data_() - return table._repr_html_(include_shape=False) + table.set_repr_policy(show_shape=False) + return table._repr_html_() def __repr__(self): table = self._get_repr_data_() @@ -512,20 +507,30 @@ table = Table(header=["hypothesis", "key"] + attrs, data=rows, title=self.name) table = table.sorted(columns="nfp") + table.set_repr_policy(show_shape=False) stats = [[self.LR, self.df, self.pvalue]] - stats = Table(header=["LR", "df", "pvalue"], data=stats, title="Statistics") + col_templates = { + "pvalue": "%.4f" if self.pvalue > 1e-3 else "%.2e", + } + stats = Table( + header=["LR", "df", "pvalue"], + data=stats, + title="Statistics", + column_templates=col_templates, + ) + stats.set_repr_policy(show_shape=False) return stats, table def _repr_html_(self): stats, table = self._get_repr_data_() - result = [t._repr_html_(include_shape=False) for t in (stats, table)] + result = [t._repr_html_() for t in (stats, table)] return "\n".join(result) def __repr__(self): stats, table = self._get_repr_data_() result = [] for t in (stats, table): - r, _ = t._get_repr_() + r, _, _ = t._get_repr_() result.append(str(r)) return "\n".join(result) @@ -537,8 +542,7 @@ @property def alt(self): alts = [self[k] for k in self if k != self._name_of_null] - alt = max(alts) - return alt + return max(alts) @property def LR(self): @@ -550,8 +554,7 @@ @property def df(self): """returns the degrees-of-freedom (alt.nfp - null.nfp)""" - df = self.alt.nfp - self.null.nfp - return df + return self.alt.nfp - self.null.nfp @property def pvalue(self): @@ -591,8 +594,7 @@ @property def null_dist(self): """returns the LR values corresponding to the synthetic data""" - result = [self[k].LR for k in self if k != "observed"] - return result + return [self[k].LR for k in self if k != "observed"] class tabular_result(generic_result): diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/app/sample.py python-cogent-2020.12.21a+dfsg/src/cogent3/app/sample.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/app/sample.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/app/sample.py 2020-12-20 23:35:03.000000000 +0000 @@ -22,7 +22,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -201,7 +201,7 @@ class take_codon_positions(ComposableAligned): - """Extracts the specified codon position(s) from an alignment. + """Extracts the specified codon position(s) from an alignment. Returns an Alignment.""" _input_types = (ALIGNED_TYPE, SERIALISABLE_TYPE) @@ -312,7 +312,7 @@ Returns ------- - A new sequence collection, or False if not all the named sequences are + A new sequence collection, or False if not all the named sequences are in the collection. """ super(take_named_seqs, self).__init__( @@ -336,7 +336,7 @@ class min_length(ComposableSeq): - """Filters sequence collections / alignments by length. Returns the + """Filters sequence collections / alignments by length. Returns the data if it satisfies the condition, NotCompleted otherwise.""" _input_types = (SEQUENCE_TYPE, ALIGNED_TYPE, SERIALISABLE_TYPE) @@ -415,7 +415,7 @@ class fixed_length(ComposableAligned): - """Sample an alignment to a fixed length. Returns an Alignment of the + """Sample an alignment to a fixed length. Returns an Alignment of the specified length, or NotCompleted if alignment too short.""" _input_types = (ALIGNED_TYPE, SERIALISABLE_TYPE) diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/app/translate.py python-cogent-2020.12.21a+dfsg/src/cogent3/app/translate.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/app/translate.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/app/translate.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,7 +1,7 @@ from collections import defaultdict from cogent3.core.alignment import SequenceCollection -from cogent3.core.genetic_code import DEFAULT, get_code +from cogent3.core.genetic_code import get_code from cogent3.core.moltype import get_moltype from .composable import ( @@ -16,13 +16,13 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" -def best_frame(seq, gc=DEFAULT, allow_rc=False, require_stop=False): +def best_frame(seq, gc=1, allow_rc=False, require_stop=False): """returns reading frame start that has either no stops or a single terminal stop codon @@ -37,13 +37,13 @@ best frame on rc, it will be negative require_stop a terminal stop must be present - + Returns ------- int 1, 2, 3 if the best frame on the +_ strand; -1, -2, -3 if the best frame is on the reverse strand - + Raises ------ ValueError @@ -87,9 +87,9 @@ return frame -def translate_frames(seq, moltype=None, gc=DEFAULT, allow_rc=False): - """translates a nucleic acid sequence - +def translate_frames(seq, moltype=None, gc=1, allow_rc=False): + """translates a nucleic acid sequence + Parameters ---------- moltype @@ -98,7 +98,7 @@ identifer for a genetic code or a genetic code instance allow_rc : bool includes frames sequence reverse complement - + Returns ------- [(frame, translation), ..] @@ -118,7 +118,7 @@ def get_fourfold_degenerate_sets(gc, alphabet=None, as_indices=True): """returns set() of codons that are 4-fold degenerate for genetic code gc - + Parameters ---------- gc @@ -161,9 +161,7 @@ _output_types = SEQUENCE_TYPE _data_types = ("ArrayAlignment", "Alignment", "SequenceCollection") - def __init__( - self, moltype="dna", gc=DEFAULT, allow_rc=False, trim_terminal_stop=True - ): + def __init__(self, moltype="dna", gc=1, allow_rc=False, trim_terminal_stop=True): """selects translatable sequences Sequences are truncated to modulo 3. seqs.info has a translation_errors @@ -180,7 +178,7 @@ best frame on rc, it will be negative trim_terminal_stop : bool exclude terminal stop codon from seqs - + Returns ------- A sequence collection. Sequences that could not be translated @@ -249,9 +247,7 @@ _output_types = (SEQUENCE_TYPE, ALIGNED_TYPE) _data_types = ("ArrayAlignment", "Alignment", "SequenceCollection") - def __init__( - self, moltype="dna", gc=DEFAULT, allow_rc=False, trim_terminal_stop=True - ): + def __init__(self, moltype="dna", gc=1, allow_rc=False, trim_terminal_stop=True): """generates aa sequences Parameters diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/app/tree.py python-cogent-2020.12.21a+dfsg/src/cogent3/app/tree.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/app/tree.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/app/tree.py 2020-12-20 23:35:03.000000000 +0000 @@ -13,7 +13,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/cluster/__init__.py python-cogent-2020.12.21a+dfsg/src/cogent3/cluster/__init__.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/cluster/__init__.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/cluster/__init__.py 2020-12-20 23:35:03.000000000 +0000 @@ -8,7 +8,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Catherine Lozuopone", "Rob Knight", "Peter Maxwell", "Justin Kuczynski"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Catherine Lozupone" __email__ = "lozupone@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/cluster/UPGMA.py python-cogent-2020.12.21a+dfsg/src/cogent3/cluster/UPGMA.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/cluster/UPGMA.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/cluster/UPGMA.py 2020-12-20 23:35:03.000000000 +0000 @@ -22,7 +22,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Catherine Lozuopone", "Rob Knight", "Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Catherine Lozupone" __email__ = "lozupone@colorado.edu" __status__ = "Production" @@ -157,8 +157,7 @@ def inputs_from_dict_array(darr): - """makes inputs for UPGMA_cluster from a DictArray object - """ + """makes inputs for UPGMA_cluster from a DictArray object""" darr.array += numpy.eye(darr.shape[0]) * BIG_NUM nodes = list(map(PhyloNode, darr.keys())) return darr.array, nodes diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/core/alignment.py python-cogent-2020.12.21a+dfsg/src/cogent3/core/alignment.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/core/alignment.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/core/alignment.py 2020-12-20 23:35:03.000000000 +0000 @@ -25,7 +25,7 @@ import warnings from collections import Counter, defaultdict -from copy import copy, deepcopy +from copy import deepcopy from functools import total_ordering from itertools import combinations from types import GeneratorType @@ -53,11 +53,10 @@ import cogent3 # will use to get at cogent3.parse.fasta.MinimalFastaParser, from cogent3.core.annotation import Map, _Annotatable -from cogent3.core.genetic_code import DEFAULT, get_code +from cogent3.core.genetic_code import get_code from cogent3.core.info import Info as InfoClass -from cogent3.core.location import LostSpan, Span from cogent3.core.profile import PSSM, MotifCountsArray -from cogent3.core.sequence import ArraySequence, frac_same +from cogent3.core.sequence import ArraySequence, Sequence, frac_same # which is a circular import otherwise. from cogent3.format.alignment import save_to_filename from cogent3.format.fasta import alignment_to_fasta @@ -69,10 +68,12 @@ from cogent3.util import progress_display as UI from cogent3.util.dict_array import DictArrayTemplate from cogent3.util.misc import ( + atomic_write, bytes_to_string, extend_docstring_from, get_format_suffixes, get_object_provenance, + get_setting_from_environ, ) from cogent3.util.union_dict import UnionDict @@ -91,7 +92,7 @@ "Jan Kosinski", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -287,8 +288,7 @@ def seqs_from_generic(seqs, alphabet=None): - """returns seqs, names - """ + """returns seqs, names""" names = [] for s in seqs: if hasattr(s, "name"): @@ -522,7 +522,7 @@ # both SequenceCollections and Alignments. self._set_additional_attributes(curr_seqs) - self._repr_policy = dict(num_seqs=10, num_pos=60, ref_name="longest") + self._repr_policy = dict(num_seqs=10, num_pos=60, ref_name="longest", wrap=60) def __str__(self): """Returns self in FASTA-format, respecting name order.""" @@ -1103,7 +1103,7 @@ """ return alignment_to_fasta(self.to_dict()) - def to_nexus(self, seq_type, interleave_len=50): + def to_nexus(self, seq_type, wrap=50, interleave_len=None): """ Return alignment in NEXUS format and mapping to sequence ids @@ -1115,7 +1115,13 @@ Raises exception if invalid alignment """ - return nexus_from_alignment(self, seq_type, interleave_len=interleave_len) + if interleave_len is not None: + cogent3.util.warning.deprecated( + "argument", "interleave_len", "wrap", "2021.6" + ) + wrap = interleave_len + + return nexus_from_alignment(self, seq_type, wrap=wrap) @property def num_seqs(self): @@ -1184,9 +1190,11 @@ combined = self.seqs + list(other) for seq in combined: - assert seq.__class__ == self_seq_class, ( - "Seq classes different: Expected %s, Got %s" - % (seq.__class__, self_seq_class) + assert ( + seq.__class__ == self_seq_class + ), "Seq classes different: Expected %s, Got %s" % ( + seq.__class__, + self_seq_class, ) combined_aln = self.__class__(data=combined, info=self.info) @@ -1243,19 +1251,25 @@ """ if filename is None: - raise DataError("no filename specified") - - # need to turn the alignment into a dictionary - align_dict = {} - for seq_name in self.names: - align_dict[seq_name] = str(self.named_seqs[seq_name]) + raise IOError("no filename specified") suffix, cmp_suffix = get_format_suffixes(filename) if format is None and suffix: format = suffix + if format == "json": + with atomic_write(filename, mode="wt") as f: + f.write(self.to_json()) + return + + # need to turn the alignment into a dictionary + align_dict = { + seq_name: str(self.named_seqs[seq_name]) for seq_name in self.names + } + if "order" not in kwargs: kwargs["order"] = self.names + save_to_filename(align_dict, filename, format, **kwargs) def __len__(self): @@ -1297,8 +1311,7 @@ raise AttributeError("%s -- %s" % (msg, "Did you set a DNA moltype?")) def get_seq(self, seqname): - """Return a sequence object for the specified seqname. - """ + """Return a sequence object for the specified seqname.""" return self.named_seqs[seqname] def to_dict(self): @@ -1424,7 +1437,7 @@ allow_gap=False, exclude_unobserved=False, ): - """returns dict of counts of motifs per sequence + """counts of motifs per sequence Parameters ---------- @@ -1436,6 +1449,10 @@ allow_gap if True, motifs containing a gap character are included. + Returns + ------- + MotifCountsArray + Notes ----- @@ -1446,7 +1463,7 @@ counts = [] motifs = set() for name in self.names: - seq = self.named_seqs[name] + seq = self.get_seq(name) c = seq.counts( motif_length=motif_length, include_ambiguity=include_ambiguity, @@ -1467,7 +1484,7 @@ allow_gap=False, exclude_unobserved=False, ): - """returns dict of counts of motifs + """counts of motifs Parameters ---------- @@ -1649,12 +1666,12 @@ def pad_seqs(self, pad_length=None, **kwargs): """Returns copy in which sequences are padded to same length. - Parameters - ---------- - pad_length - Length all sequences are to be padded to. Will pad - to max sequence length if pad_length is None or less than max - length. + Parameters + ---------- + pad_length + Length all sequences are to be padded to. Will pad + to max sequence length if pad_length is None or less than max + length. """ # get max length max_len = max([len(s) for s in self.seqs]) @@ -1727,12 +1744,13 @@ rc : bool or None include dotplot of reverse compliment also. Only applies to Nucleic acids moltypes + Returns ------- a Drawable or AnnotatedDrawable """ - from cogent3.draw.drawable import AnnotatedDrawable from cogent3.draw.dotplot import Dotplot + from cogent3.draw.drawable import AnnotatedDrawable if name1 is None and name2 is None: name1, name2 = list(choice(self.names, size=2, replace=False)) @@ -1845,7 +1863,7 @@ numpy array of log2 based scores at every position """ assert not self.is_ragged(), "all sequences must have same length" - from cogent3.parse import jaspar, cisbp + from cogent3.parse import cisbp, jaspar assert pssm or path, "Must specify a PSSM or a path" assert not (pssm and path), "Can only specify one of pssm, path" @@ -1886,34 +1904,45 @@ return array(result) - def set_repr_policy(self, num_seqs=None, num_pos=None, ref_name=None): + def set_repr_policy(self, num_seqs=None, num_pos=None, ref_name=None, wrap=None): """specify policy for repr(self) - Parameters - ---------- - num_seqs : int or None - number of sequences to include in represented display. - num_pos : int or None - length of sequences to include in represented display. - ref_name : str or None - name of sequence to be placed first, or "longest" (default). - If latter, indicates longest sequence will be chosen. - """ + Parameters + ---------- + num_seqs : int or None + number of sequences to include in represented display. + num_pos : int or None + length of sequences to include in represented display. + ref_name : str or None + name of sequence to be placed first, or "longest" (default). + If latter, indicates longest sequence will be chosen. + wrap : int or None + number of printed bases per row + """ if num_seqs: - assert isinstance(num_seqs, int), "num_seqs is not an integer" + if not isinstance(num_seqs, int): + raise TypeError("num_seqs is not an integer") self._repr_policy["num_seqs"] = num_seqs if num_pos: - assert isinstance(num_pos, int), "num_pos is not an integer" + if not isinstance(num_pos, int): + raise TypeError("num_pos is not an integer") self._repr_policy["num_pos"] = num_pos if ref_name: - assert isinstance(ref_name, str), "ref_name is not a string" + if not isinstance(ref_name, str): + raise TypeError("ref_name is not a string") + if ref_name != "longest" and ref_name not in self.names: raise ValueError(f"no sequence name matching {ref_name}") self._repr_policy["ref_name"] = ref_name + if wrap: + if not isinstance(wrap, int): + raise TypeError("wrap is not an integer") + self._repr_policy["wrap"] = wrap + def probs_per_seq( self, motif_length=1, @@ -1976,9 +2005,7 @@ class SequenceCollection(_SequenceCollectionBase): - """Container for unaligned sequences - - """ + """Container for unaligned sequences""" def copy_annotations(self, unaligned): """Copies annotations from seqs in unaligned to self, matching by name. @@ -2039,7 +2066,10 @@ if hasattr(data, "name"): self.name = data.name - def annotate_matches_to(self, pattern, annot_type, name, allow_multiple=False): + @extend_docstring_from(Sequence.annotate_matches_to) + def annotate_matches_to( + self, pattern, annot_type, name, allow_multiple=False + ): # noqa return self.data.annotate_matches_to( pattern=pattern, annot_type=annot_type, @@ -2053,8 +2083,7 @@ moltype = property(_get_moltype) def copy(self): - """Returns a shallow copy of self - """ + """Returns a shallow copy of self""" return self.__class__(self.map, self.data) def deepcopy(self, sliced=True): @@ -2345,13 +2374,11 @@ return result def take_positions_if(self, f, negate=False): - """Returns new Alignment containing cols where f(col) is True. - """ + """Returns new Alignment containing cols where f(col) is True.""" return self.take_positions(self.get_position_indices(f, negate=negate)) def iupac_consensus(self, alphabet=None): - """Returns string containing IUPAC consensus sequence of the alignment. - """ + """Returns string containing IUPAC consensus sequence of the alignment.""" if alphabet is None: alphabet = self.moltype consensus = [] @@ -2460,7 +2487,7 @@ ----- For motif_length > 1, it's advisable to specify exclude_unobserved=True, this avoids unnecessary calculations. - """ + """ probs = self.probs_per_seq( motif_length=motif_length, @@ -2703,7 +2730,7 @@ Setting with_replacement to True and otherwise leaving parameters as defaults generates a standard bootstrap resampling of the alignment. - """ + """ population_size = len(self) // motif_length if not n: n = population_size @@ -2767,17 +2794,24 @@ return names, output def _repr_html_(self): - html = self.to_html( - name_order=self.names[: self._repr_policy["num_seqs"]], - ref_name=self._repr_policy["ref_name"], - limit=self._repr_policy["num_pos"], + settings = self._repr_policy.copy() + env_vals = get_setting_from_environ( + "COGENT3_ALIGNMENT_REPR_POLICY", + dict(num_seqs=int, num_pos=int, wrap=int, ref_name=str), + ) + settings.update(env_vals) + return self.to_html( + name_order=self.names[: settings["num_seqs"]], + ref_name=settings["ref_name"], + limit=settings["num_pos"], + wrap=settings["wrap"], ) - return html def to_html( self, name_order=None, - interleave_len=60, + interleave_len=None, + wrap=60, limit=None, ref_name="longest", colors=None, @@ -2791,8 +2825,9 @@ name_order order of names for display. interleave_len - maximum number of printed bases, defaults to - alignment length + will be replaced by wrap in version 2021.6 + wrap + number of alignment columns per row, old name is interleave_len limit truncate alignment to this length ref_name @@ -2813,39 +2848,51 @@ >>> from IPython.core.display import HTML >>> HTML(aln.to_html()) """ + if interleave_len is not None: + cogent3.util.warning.deprecated( + "argument", "interleave_len", "wrap", "2021.6" + ) + wrap = interleave_len if wrap == 60 else wrap + css, styles = self.moltype.get_css_style( colors=colors, font_size=font_size, font_family=font_family ) - - if not name_order: + if name_order: + selected = self.take_seqs(name_order) + else: + name_order = list(self.names) ref_name = ref_name or "longest" + selected = self if ref_name == "longest": - lengths = self.get_lengths(include_ambiguity=False, allow_gap=False) - length_names = [(l, n) for n, l in lengths.items()] - length_names.sort(reverse=True) - ref = length_names[0][1] + lengths = selected.get_lengths(include_ambiguity=False, allow_gap=False) + + length_names = defaultdict(list) + for n, l in lengths.items(): + length_names[l].append(n) + + longest = max(length_names) + ref = sorted(length_names[longest])[0] + elif ref_name: - if ref_name not in self.names: + if ref_name not in selected.names: raise ValueError(f"Unknown sequence name {ref_name}") ref = ref_name - name_order = list(self.names) name_order.remove(ref) name_order.insert(0, ref) if limit is None: - names, output = self._get_raw_pretty(name_order) + names, output = selected._get_raw_pretty(name_order) else: - names, output = self[:limit]._get_raw_pretty(name_order) + names, output = selected[:limit]._get_raw_pretty(name_order) - gaps = "".join(self.moltype.gaps) + gaps = "".join(selected.moltype.gaps) refname = names[0] refseq = output[refname] seqlen = len(refseq) start_gap = re.search("^[%s]+" % gaps, "".join(refseq)) end_gap = re.search("[%s]+$" % gaps, "".join(refseq)) - ref_colours = [] start = 0 if start_gap is None else start_gap.end() end = len(refseq) if end_gap is None else end_gap.start() seq_style = [] @@ -2854,7 +2901,7 @@ for i in range(seqlen): char = refseq[i] if i < start or i >= end: - style = "terminal_ambig_%s" % self.moltype.label + style = "terminal_ambig_%s" % selected.moltype.label else: style = styles[char] @@ -2881,13 +2928,10 @@ table = [""] seq_ = "" label_ = '' - num_row_ = ( - '' - ) - for i in range(0, seqlen, interleave_len): + num_row_ = '' + for i in range(0, seqlen, wrap): table.append(num_row_.format(i)) - seqblock = seqs[:, i : i + interleave_len].tolist() + seqblock = seqs[:, i : i + wrap].tolist() for n, s in zip(names, seqblock): s = "".join(s) row = "".join([label_ % n, seq_ % s]) @@ -2895,41 +2939,43 @@ table.append("
%s%s
{:,d}
{:,d}
") if ( limit - and limit < len(self) + and limit < len(selected) or name_order - and len(name_order) < len(self.names) + and len(name_order) < len(selected.names) ): - summary = ("%s x %s (truncated to %s x %s) %s " "alignment") % ( - len(self.names), + summary = ("%s x %s (truncated to %s x %s) %s alignment") % ( + self.num_seqs, len(self), - len(name_order) if name_order else len(self.names), - limit if limit else len(self), - self.moltype.label, + len(name_order) if name_order else len(selected.names), + limit if limit else len(selected), + selected.moltype.label, ) else: - summary = ("%s x %s %s " "alignment") % ( - len(self.names), + summary = ("%s x %s %s alignment") % ( + self.num_seqs, len(self), - self.moltype.label, + selected.moltype.label, ) text = [ "", - "", + '
', "\n".join(table), "

%s

" % summary, - "", + "
", ] return "\n".join(text) - def to_pretty(self, name_order=None, interleave_len=None): + def to_pretty(self, name_order=None, wrap=None, interleave_len=None): """returns a string representation of the alignment in pretty print format Parameters @@ -2937,9 +2983,16 @@ name_order order of names for display. interleave_len - maximum number of printed bases, defaults to alignment length + will be replaced by wrap in version 2021.6 + wrap + maximum number of printed bases, old name is interleave_len + """ + if interleave_len is not None: + cogent3.util.warning.deprecated( + "argument", "interleave_len", "wrap", "2021.6" + ) + wrap = interleave_len - """ names, output = self._get_raw_pretty(name_order=name_order) label_width = max(list(map(len, names))) name_template = "{:>%d}" % label_width @@ -2948,18 +3001,18 @@ def make_line(label, seq): return "%s %s" % (label, seq) - if interleave_len is None: + if wrap is None: result = [make_line(display_names[n], "".join(output[n])) for n in names] return "\n".join(result) align_length = len(self) result = [] - for start in range(0, align_length, interleave_len): + for start in range(0, align_length, wrap): for n in names: result.append( make_line( display_names[n], - "".join(output[n][start : start + interleave_len]), + "".join(output[n][start : start + wrap]), ) ) @@ -3025,7 +3078,7 @@ exclude_unobserved=False, alert=False, ): - """returns dict of counts of non-overlapping motifs per sequence + """counts of non-overlapping motifs per sequence Parameters ---------- @@ -3041,6 +3094,10 @@ alert warns if motif_length > 1 and alignment trimmed to produce motif columns + + Returns + ------- + MotifCountsArray """ length = (len(self) // motif_length) * motif_length if alert and len(self) != length: @@ -3254,7 +3311,7 @@ include_gap whether to include gap counts, shown on right y-axis """ - from cogent3.draw.drawable import Drawable, AnnotatedDrawable + from cogent3.draw.drawable import AnnotatedDrawable, Drawable window = window if window else numpy.sqrt(len(self)) window = int(window) @@ -3375,7 +3432,7 @@ elements and estimates that could not be computed for numerical reasons are set as nan """ - from cogent3.draw.drawable import Drawable, AnnotatedDrawable + from cogent3.draw.drawable import AnnotatedDrawable, Drawable from cogent3.evolve import coevolution as coevo from cogent3.util.union_dict import UnionDict @@ -3739,8 +3796,7 @@ } def __init__(self, *args, **kwargs): - """Returns new ArrayAlignment object. Inherits from SequenceCollection. - """ + """Returns new ArrayAlignment object. Inherits from SequenceCollection.""" kwargs["suppress_named_seqs"] = True super(ArrayAlignment, self).__init__(*args, **kwargs) self.array_positions = transpose(self.seq_data.astype(self.alphabet.array_type)) @@ -3888,8 +3944,7 @@ ) def iupac_consensus(self, alphabet=None): - """Returns string containing IUPAC consensus sequence of the alignment. - """ + """Returns string containing IUPAC consensus sequence of the alignment.""" if alphabet is None: alphabet = self.moltype consensus = [] @@ -3929,7 +3984,7 @@ Setting with_replacement to True and otherwise leaving parameters as defaults generates a standard bootstrap resampling of the alignment. - """ + """ population_size = len(self) // motif_length if not n: n = population_size @@ -4017,7 +4072,7 @@ s = s.replace(gapchar, ambig) return s - def trim_stop_codons(self, gc=DEFAULT, allow_partial=False, **kwargs): + def trim_stop_codons(self, gc=1, allow_partial=False, **kwargs): """Removes any terminal stop codons from the sequences Parameters @@ -4035,10 +4090,10 @@ stops = gc["*"] get_index = self.alphabet.degen.index - stop_indices = set(tuple(map(get_index, stop)) for stop in stops) + stop_indices = {tuple(map(get_index, stop)) for stop in stops} new_data = self.array_seqs.copy() - gap_indices = set(get_index(gap) for gap in self.moltype.gaps) + gap_indices = {get_index(gap) for gap in self.moltype.gaps} gap_index = get_index(self.moltype.gap) trim_length = len(self) @@ -4056,7 +4111,7 @@ ) break - if nondegen_index is None or nondegen_index - 3 < 0: + if nondegen_index is None or nondegen_index < 3: continue # slice last three valid positions and see if stop @@ -4072,11 +4127,12 @@ # this is an ugly hack for rather odd standard behaviour # we find the last alignment column to have not just gap chars # and trim up to that + i = 0 for i in range(len(result) - 1, -1, -1): col = set(result.array_seqs[:, i]) if not col <= gap_indices: break - if i != len(result): + if len(result) != i: result = result[: i + 1] return result diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/core/alphabet.py python-cogent-2020.12.21a+dfsg/src/cogent3/core/alphabet.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/core/alphabet.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/core/alphabet.py 2020-12-20 23:35:03.000000000 +0000 @@ -52,7 +52,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Rob Knight", "Andrew Butterfield"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/core/annotation.py python-cogent-2020.12.21a+dfsg/src/cogent3/core/annotation.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/core/annotation.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/core/annotation.py 2020-12-20 23:35:03.000000000 +0000 @@ -15,7 +15,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/core/genetic_code.py python-cogent-2020.12.21a+dfsg/src/cogent3/core/genetic_code.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/core/genetic_code.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/core/genetic_code.py 2020-12-20 23:35:03.000000000 +0000 @@ -16,7 +16,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Greg Caporaso", "Rob Knight", "Peter Maxwell", "Thomas La"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Greg Caporaso" __email__ = "caporaso@colorado.edu" __status__ = "Production" @@ -50,13 +50,14 @@ class GeneticCode: """Holds codon to amino acid mapping, and vice versa. - Usage: gc = GeneticCode(code_sequence) - sgc = GeneticCode( - 'FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG') - sgc['UUU'] == 'F' - sgc['TTT'] == 'F' - sgc['F'] == ['TTT', 'TTC'] #in arbitrary order - sgc['*'] == ['TAA', 'TAG', 'TGA'] #in arbitrary order + Use the `get_code()` function to get one of the included code instances. These are created as follows. + + >>> code_sequence = 'FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG' + >>> gc = GeneticCode(code_sequence) + >>> sgc['UUU'] == 'F' + >>> sgc['TTT'] == 'F' + >>> sgc['F'] == ['TTT', 'TTC'] #in arbitrary order + >>> sgc['*'] == ['TAA', 'TAG', 'TGA'] #in arbitrary order code_sequence : 64 character string containing NCBI genetic code translation @@ -73,8 +74,19 @@ def __init__(self, code_sequence, ID=None, name=None, start_codon_sequence=None): """Returns new GeneticCode object. - code_sequence : 64-character string containing NCBI representation - of the genetic code. Raises GeneticCodeInitError if length != 64. + code_sequence : + + Parameters + ---------- + code_sequence : str + 64-character string containing NCBI representation of the genetic code. + ID + Identifier + name + name of the Genetic code + start_codon_sequence + 64-character string where the '-' character indicates the corresponding + position of code_sequence **is not** a start codon """ if len(code_sequence) != 64: raise GeneticCodeInitError( @@ -197,6 +209,7 @@ blocks = property(_get_blocks) def to_table(self): + """returns aa to codon mapping as a cogent3 Table""" from cogent3.core.moltype import IUPAC_PROTEIN_code_aa rows = [] @@ -205,8 +218,7 @@ codons = ",".join(self[code]) row = [aa, code, codons] rows.append(row) - t = Table(header=headers, data=rows, title=self.name) - return t + return Table(header=headers, data=rows, title=self.name) def __str__(self): """Returns code_sequence that constructs the GeneticCode.""" @@ -219,10 +231,11 @@ def _repr_html_(self): """Returns the html representation of GeneticCode.""" display = self.to_table() - return display._repr_html_(include_shape=False) + display.set_repr_policy(show_shape=False) + return display._repr_html_() def __eq__(self, other): - """ Allows two GeneticCode objects to be compared to each other. + """Allows two GeneticCode objects to be compared to each other. Two GeneticCode objects are equal if they have equal CodeSequences. """ return str(self) == str(other) @@ -243,15 +256,19 @@ raise InvalidCodonError("Codon or aa %s has wrong length" % item) def translate(self, dna, start=0): - """ Translates DNA to protein with current GeneticCode. - - dna = a string of nucleotides - start = position to begin translation (used to implement frames) + """Translates DNA to protein with current GeneticCode. - Returns string containing amino acid sequence. Translates the entire - sequence: it is the caller's responsibility to find open reading frames. - - NOTE: should return Protein object when we have a class for it. + Parameters + ---------- + dna: str + a string of nucleotides + start: int + position to begin translation (used to implement frames) + + Returns + ------- + String containing amino acid sequence. Translates the entire sequence. + It is the caller's responsibility to find open reading frames. """ if not dna: return "" @@ -270,8 +287,7 @@ return found def sixframes(self, dna): - """Returns six-frame translation as dict containing {frame:translation} - """ + """Returns six-frame translation as dict containing {frame:translation}""" reverse = dna.rc() return [self.translate(dna, start) for start in range(3)] + [ self.translate(reverse, start) for start in range(3) @@ -449,7 +465,7 @@ def get_code(code_id=1): """returns the genetic code - + Parameters ---------- code_id @@ -484,7 +500,7 @@ table = Table( header=header, data=rows, - index="Code ID", + index_name="Code ID", title="Specify a genetic code using either 'Name' or " "Code ID (as an integer or string)", ) diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/core/info.py python-cogent-2020.12.21a+dfsg/src/cogent3/core/info.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/core/info.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/core/info.py 2020-12-20 23:35:03.000000000 +0000 @@ -13,9 +13,9 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Prototype" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/core/__init__.py python-cogent-2020.12.21a+dfsg/src/cogent3/core/__init__.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/core/__init__.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/core/__init__.py 2020-12-20 23:35:03.000000000 +0000 @@ -25,7 +25,7 @@ "Greg Caporaso", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/core/location.py python-cogent-2020.12.21a+dfsg/src/cogent3/core/location.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/core/location.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/core/location.py 2020-12-20 23:35:03.000000000 +0000 @@ -56,9 +56,9 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight", "Peter Maxwell", "Matthew Wakefield", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Prototype" strip = str.strip @@ -842,8 +842,7 @@ """Complex object consisting of many spans.""" def __init__(self, spans=None): - """Returns a new Range object with data in spans. - """ + """Returns a new Range object with data in spans.""" spans = [] if spans is None else spans result = SpansOnly() # need to check if we got a single Span, since they define __iter__. diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/core/moltype.py python-cogent-2020.12.21a+dfsg/src/cogent3/core/moltype.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/core/moltype.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/core/moltype.py 2020-12-20 23:35:03.000000000 +0000 @@ -16,7 +16,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Rob Knight", "Daniel McDonald"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -58,7 +58,6 @@ Enumeration, _make_complement_array, ) -from cogent3.core.genetic_code import DEFAULT as DEFAULT_GENETIC_CODE from cogent3.core.genetic_code import get_code from cogent3.core.sequence import ( ABSequence, @@ -1281,7 +1280,7 @@ def get_css_style(self, colors=None, font_size=12, font_family="Lucida Console"): """returns string of CSS classes and {character: , ...} - + Parameters ---------- colors @@ -1298,21 +1297,17 @@ '.%s_%s{font-family: "%s",monospace !important; ' "font-size: %dpt !important; color: %s; }" ) - styles = _style_defaults[self.label].copy() + label = self.label or "" + styles = _style_defaults[label].copy() styles.update( - { - c: "_".join([c, self.label]) - for c in list(self.alphabet) + ["terminal_ambig"] - } + {c: "_".join([c, label]) for c in list(self.alphabet) + ["terminal_ambig"]} ) - css = [] - for char in list(styles) + ["ambig"]: - css.append( - template % (char, self.label, font_family, font_size, colors[char]) - ) + css = [ + template % (char, label, font_family, font_size, colors[char]) + for char in list(styles) + ["ambig"] + ] - css = "\n".join(css) return css, styles @@ -1384,12 +1379,14 @@ label="bytes", ) +# the None value catches cases where a moltype has no label attribute _style_defaults = { - mt.label: defaultdict(_DefaultValue("ambig_%s" % mt.label)) - for mt in (ASCII, BYTES, DNA, RNA, PROTEIN, PROTEIN_WITH_STOP) + getattr(mt, "label", ""): defaultdict( + _DefaultValue("ambig_%s" % getattr(mt, "label", "")) + ) + for mt in (ASCII, BYTES, DNA, RNA, PROTEIN, PROTEIN_WITH_STOP, None) } - # following is a two-state MolType useful for testing AB = MolType( seq_constructor=ABSequence, @@ -1419,7 +1416,7 @@ return self._gc -def CodonAlphabet(gc=DEFAULT_GENETIC_CODE, include_stop_codons=False): +def CodonAlphabet(gc=1, include_stop_codons=False): if isinstance(gc, (int, str)): gc = get_code(gc) if include_stop_codons: @@ -1505,8 +1502,9 @@ v = f"{v[:39]}..." rows.append([n, num, v]) header = ["Abbreviation", "Number of states", "Moltype"] - title = "Specify a moltype by the string 'Abbreviation' (case insensitive)." + title = "Specify a moltype by the Abbreviation (case insensitive)." - result = Table(header=header, data=rows, title=title, index="Abbreviation") + result = Table(header=header, data=rows, title=title, index_name="Abbreviation") result = result.sorted(columns=["Number of states", "Abbreviation"]) + result.format_column("Abbreviation", repr) return result diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/core/profile.py python-cogent-2020.12.21a+dfsg/src/cogent3/core/profile.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/core/profile.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/core/profile.py 2020-12-20 23:35:03.000000000 +0000 @@ -12,7 +12,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -30,11 +30,7 @@ # todo validate that motifs are strings and row_indices are ints or # strings # todo change row_indices argument name to row_keys - if isinstance(data, numpy.ndarray): - some_data = data.any() - else: - some_data = any(data) - + some_data = data.any() if isinstance(data, numpy.ndarray) else any(data) if not some_data or len(data) == 0: raise ValueError("Must provide data") @@ -147,10 +143,27 @@ return self.__class__(result, motifs=motifs, row_indices=row_order) + def _pairwise_stat(self, func): + """returns self dict of pairwise measurements between arrays""" + if len(self.shape) <= 1 or self.shape[0] <= 1: + return None + + from itertools import combinations + + data = {k: v.array for k, v in self.items()} + keys = list(data) + stats = {} + for k1, k2 in combinations(range(len(keys)), 2): + name1, name2 = keys[k1], keys[k2] + stats[(name1, name2)] = func(data[name1], data[name2]) + stats[(name2, name1)] = stats[(name1, name2)] + + return stats + def _get_ordered_motifs_from_tabular(data, index=1): """backend motif extraction function for motif_counts, motif_freqs and pssm - assumed index 1 are motif strings; motif returned in order of occurrence""" + assumed index 1 are motif strings; motif returned in order of occurrence""" chars = [] for entry in data: @@ -177,7 +190,7 @@ ---------- tab_data : numpy array tab_data is numpy array, with tab_data.shape must be (n, 3) - """ + """ motif = _get_ordered_motifs_from_tabular(tab_data) data = _get_data_from_tabular(tab_data, motif, "int") return MotifCountsArray(data, motif) @@ -269,10 +282,10 @@ def entropy_terms(self): """Returns - ------- - entropies : array - Has same dimension as self.array with - safe log operation applied. + ------- + entropies : array + Has same dimension as self.array with + safe log operation applied. """ entropies = safe_p_log_p(self.array) return self.template.wrap(entropies) @@ -364,8 +377,8 @@ self, height=400, width=800, wrap=None, ylim=None, vspace=0.05, colours=None ): """returns a sequence logo Drawable""" - from cogent3.draw.logo import get_mi_char_heights, get_logo from cogent3.draw.drawable import get_domain + from cogent3.draw.logo import get_logo, get_mi_char_heights assert 0 <= vspace <= 1, f"{vspace} not in range 0-1" if ylim is None: @@ -433,6 +446,18 @@ return logo + def pairwise_jsm(self) -> dict: + """pairwise Jensen-Shannon metric""" + from cogent3.maths.measure import jsm + + return self._pairwise_stat(jsm) + + def pairwise_jsd(self) -> dict: + """pairwise Jensen-Shannon divergence""" + from cogent3.maths.measure import jsd + + return self._pairwise_stat(jsd) + class PSSM(_MotifNumberArray): """position specific scoring matrix diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/core/sequence.py python-cogent-2020.12.21a+dfsg/src/cogent3/core/sequence.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/core/sequence.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/core/sequence.py 2020-12-20 23:35:03.000000000 +0000 @@ -35,9 +35,10 @@ ) from numpy.random import permutation +import cogent3 + from cogent3.core.alphabet import AlphabetError -from cogent3.core.genetic_code import DEFAULT as DEFAULT_GENETIC_CODE -from cogent3.core.genetic_code import GeneticCodes +from cogent3.core.genetic_code import get_code from cogent3.core.info import Info as InfoClass from cogent3.format.fasta import alignment_to_fasta from cogent3.maths.stats.contingency import CategoryCounts, TestResult @@ -48,6 +49,7 @@ DistanceFromMatrix, bytes_to_string, get_object_provenance, + get_setting_from_environ, ) from cogent3.util.transform import ( KeepChars, @@ -69,9 +71,9 @@ "Daniel McDonald", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" ARRAY_TYPE = type(array(1)) @@ -150,7 +152,13 @@ return json.dumps(self.to_rich_dict()) def translate(self, *args, **kwargs): - """translate() delegates to self._seq.""" + """returns the result of call str.translate + + Notes + ----- + This is a string method, nothing to do with translating into a + protein sequence. + """ return self._seq.translate(*args, **kwargs) def count(self, item): @@ -648,6 +656,118 @@ def strand_symmetry(self, *args, **kwargs): raise TypeError("must be DNA or RNA moltype") + def _repr_html_(self): + settings = self._repr_policy.copy() + env_vals = get_setting_from_environ( + "COGENT3_ALIGNMENT_REPR_POLICY", + dict(num_pos=int), + ) + settings.update(env_vals) + return self.to_html(limit=settings["num_pos"]) + + def to_html( + self, + wrap=60, + limit=None, + colors=None, + font_size=12, + font_family="Lucida Console", + interleave_len=None, + ): + """returns html with embedded styles for sequence colouring + + Parameters + ---------- + interleave_len + replaced by wrap in version 2021.6 + wrap + maximum number of printed bases, defaults to + alignment length, old name is interleave_len + limit + truncate alignment to this length + colors + {character + moltype. + font_size + in points. Affects labels and sequence and line spacing + (proportional to value) + font_family + string denoting font family + + To display in jupyter notebook: + + >>> from IPython.core.display import HTML + >>> HTML(aln.to_html()) + """ + if interleave_len is not None: + cogent3.util.warning.deprecated( + "argument", "interleave_len", "wrap", "2021.6" + ) + wrap = interleave_len if wrap == 60 else wrap + + # todo refactor interleave_len to be wrap + css, styles = self.moltype.get_css_style( + colors=colors, font_size=font_size, font_family=font_family + ) + + seq = str(self) + seq = seq if limit is None else seq[:limit] + gaps = "".join(self.moltype.gaps) + seqlen = len(seq) + start_gap = re.search("^[%s]+" % gaps, "".join(seq)) + end_gap = re.search("[%s]+$" % gaps, "".join(seq)) + + start = 0 if start_gap is None else start_gap.end() + end = len(seq) if end_gap is None else end_gap.start() + seq_style = [] + template = '%%s' + styled_seq = [] + for i in range(seqlen): + char = seq[i] + if i < start or i >= end: + style = "terminal_ambig_%s" % self.moltype.label + else: + style = styles[char] + + seq_style.append(template % style) + styled_seq.append(seq_style[-1] % char) + + # make a html table + seq = array(styled_seq, dtype="O") + table = [""] + seq_ = "" + label_ = '' + num_row_ = '' + for i in range(0, seqlen, wrap): + table.append(num_row_.format(i)) + seqblock = seq[i : i + wrap].tolist() + seqblock = "".join(seqblock) + row = "".join([label_ % self.name, seq_ % seqblock]) + table.append("%s" % row) + table.append("
%s%s
{:,d}
") + class_name = self.__class__.__name__ + if limit and limit < len(self): + summary = f"{len(self)} (truncated to {limit if limit else len(self)}) {class_name}" + else: + summary = f"{len(self)} {class_name}" + + text = [ + "", + '
', + "\n".join(table), + "

%s

" % summary, + "
", + ] + return "\n".join(text) + @total_ordering class Sequence(_Annotatable, SequenceI): @@ -712,7 +832,10 @@ self.info = info if isinstance(orig_seq, _Annotatable): - self.copy_annotations(orig_seq) + for ann in orig_seq.annotations: + ann.copy_annotations_to(self) + + self._repr_policy = dict(num_pos=60) def to_moltype(self, moltype): """returns copy of self with moltype seq @@ -739,12 +862,6 @@ """Returns filtered seq; used to do DNA/RNA conversions.""" return seq - def get_colour_scheme(self, colours): - return {} - - def get_color_scheme(self, colors): # alias to support US spelling - return self.get_colour_scheme(colours=colors) - def copy_annotations(self, other): self.annotations = other.annotations[:] @@ -968,7 +1085,6 @@ def gettype(self): """Return the sequence type.""" - return self.moltype.label def resolveambiguities(self): @@ -1121,14 +1237,6 @@ self._annotations_nucleic_reversed_on(rc) return rc - def _gc_from_arg(self, gc): - # codon_alphabet is being deprecated in favor of genetic codes. - if gc is None: - gc = DEFAULT_GENETIC_CODE - elif isinstance(gc, (int, str)): - gc = GeneticCodes[gc] - return gc - def has_terminal_stop(self, gc=None, allow_partial=False): """Return True if the sequence has a terminal stop codon. @@ -1141,7 +1249,7 @@ by 3, ignores the 3' terminal incomplete codon """ - gc = self._gc_from_arg(gc) + gc = get_code(gc) codons = self._seq divisible_by_3 = len(codons) % 3 == 0 end3 = self.__class__(self._seq[-3:]).degap() @@ -1165,7 +1273,7 @@ by 3, ignores the 3' terminal incomplete codon """ - gc = self._gc_from_arg(gc) + gc = get_code(gc) codons = self._seq divisible_by_3 = len(codons) % 3 == 0 @@ -1192,7 +1300,7 @@ ------- sequence of PROTEIN moltype """ - gc = self._gc_from_arg(gc) + gc = get_code(gc) codon_alphabet = self.codon_alphabet(gc).with_gap_motif() # translate the codons translation = [] @@ -1227,7 +1335,7 @@ return translation def get_orf_positions(self, gc=None, atg=False): - gc = self._gc_from_arg(gc) + gc = get_code(gc) orfs = [] start = None protein = self.get_translation(gc=gc) @@ -1278,14 +1386,6 @@ class DnaSequence(NucleicAcidSequence): """Holds the standard DNA sequence.""" - def get_colour_scheme(self, colours): - return { - "A": colours.black, - "T": colours.red, - "C": colours.blue, - "G": colours.green, - } - def _seq_filter(self, seq): """Converts U to T.""" return seq.replace("u", "t").replace("U", "T") @@ -1294,14 +1394,6 @@ class RnaSequence(NucleicAcidSequence): """Holds the standard RNA sequence.""" - def get_colour_scheme(self, colours): - return { - "A": colours.black, - "U": colours.red, - "C": colours.blue, - "G": colours.green, - } - def _seq_filter(self, seq): """Converts T to U.""" return seq.replace("t", "u").replace("T", "U") @@ -1386,6 +1478,7 @@ self.moltype = self.alphabet.moltype self.info = info + self._repr_policy = dict(num_pos=60) def __getitem__(self, *args): """__getitem__ returns char or slice, as same class.""" @@ -1616,8 +1709,7 @@ return list(self.gap_array().nonzero()[0]) def frac_same_gaps(self, other): - """Returns fraction of positions where gaps match other's gaps. - """ + """Returns fraction of positions where gaps match other's gaps.""" if not other: return 0 self_gaps = self.gap_array() diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/core/tree.py python-cogent-2020.12.21a+dfsg/src/cogent3/core/tree.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/core/tree.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/core/tree.py 2020-12-20 23:35:03.000000000 +0000 @@ -37,7 +37,11 @@ from numpy import argsort, ceil, log, zeros from cogent3.maths.stats.test import correlation -from cogent3.util.misc import get_object_provenance +from cogent3.util.misc import ( + atomic_write, + get_format_suffixes, + get_object_provenance, +) __author__ = "Gavin Huttley, Peter Maxwell and Rob Knight" @@ -55,7 +59,7 @@ "Justin Kuczynski", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -1293,7 +1297,7 @@ return new_tree def _edgecount(self, parent, cache): - """"The number of edges beyond 'parent' in the direction of 'self', + """ "The number of edges beyond 'parent' in the direction of 'self', unrooted""" neighbours = self._getNeighboursExcept(parent) key = (id(parent), id(self)) @@ -1408,8 +1412,7 @@ return "\n".join(lines) def _getXmlLines(self, indent=0, parent_params=None): - """Return the xml strings for this edge. - """ + """Return the xml strings for this edge.""" params = {} if parent_params is not None: params.update(parent_params) @@ -1444,22 +1447,21 @@ with_distances whether branch lengths are included in string. format - default is newick, xml is alternate. Argument overrides + default is newick, xml and json are alternate. Argument overrides the filename suffix. All attributes are saved in the xml format. """ - if format: - xml = format.lower() == "xml" - else: - xml = filename.lower().endswith("xml") + file_format, _ = get_format_suffixes(filename) + if file_format == "json": + with atomic_write(filename, mode="wt") as f: + f.write(self.to_json()) + return - if xml: - data = self.get_xml() - else: - data = self.get_newick(with_distances=with_distances) - outf = open(filename, "w") - outf.writelines(data) - outf.close() + xml = format.lower() == "xml" if format else filename.lower().endswith("xml") + data = self.get_xml() if xml else self.get_newick(with_distances=with_distances) + + with atomic_write(filename, mode="wt") as outf: + outf.writelines(data) def get_node_names(self, includeself=True, tipsonly=False): """Return a list of edges from this edge - may or may not include self. @@ -1483,8 +1485,7 @@ return [node.name for node in nodes] def get_tip_names(self, includeself=False): - """return the list of the names of all tips contained by this edge - """ + """return the list of the names of all tips contained by this edge""" return self.get_node_names(includeself, tipsonly=True) def get_edge_vector(self, include_root=True): @@ -1993,8 +1994,7 @@ return self is other or me.same_shape(them) def unrooted(self): - """A tree with at least 3 children at the root. - """ + """A tree with at least 3 children at the root.""" constructor = self._default_tree_constructor() need_to_expand = len(self.children) < 3 new_children = [] @@ -2028,7 +2028,7 @@ return tip.parent.unrooted_deepcopy() def root_at_midpoint(self): - """ return a new tree rooted at midpoint of the two tips farthest apart + """return a new tree rooted at midpoint of the two tips farthest apart this fn doesn't preserve the internal node naming or structure, but does keep tip to tip distances correct. uses unrooted_deepcopy() diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/data/energy_params.py python-cogent-2020.12.21a+dfsg/src/cogent3/data/energy_params.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/data/energy_params.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/data/energy_params.py 2020-12-20 23:35:03.000000000 +0000 @@ -18,7 +18,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Amanda Birmingham", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Amanda Birmingham" __email__ = "amanda.birmingham@thermofisher.com" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/data/__init__.py python-cogent-2020.12.21a+dfsg/src/cogent3/data/__init__.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/data/__init__.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/data/__init__.py 2020-12-20 23:35:03.000000000 +0000 @@ -6,7 +6,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight", "Amanda Birmingham"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/data/molecular_weight.py python-cogent-2020.12.21a+dfsg/src/cogent3/data/molecular_weight.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/data/molecular_weight.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/data/molecular_weight.py 2020-12-20 23:35:03.000000000 +0000 @@ -5,9 +5,9 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" ProteinWeights = { diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/draw/dendrogram.py python-cogent-2020.12.21a+dfsg/src/cogent3/draw/dendrogram.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/draw/dendrogram.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/draw/dendrogram.py 2020-12-20 23:35:03.000000000 +0000 @@ -13,7 +13,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Rahul Ghangas"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" @@ -269,11 +269,10 @@ if self._y is None: num_kids = len(self.children) even = num_kids % 2 == 0 + i = floor(num_kids / 2) if even: - i = floor(num_kids / 2) val = (self.children[i].y + self.children[i - 1].y) / 2 else: - i = floor(num_kids / 2) val = self.children[i].y self._y = val return self._y @@ -544,11 +543,7 @@ if length_attr is None and not contemporaneous: contemporaneous = tree.children[0].length is None - if contemporaneous: - length_attr = "frac_pos" - else: - length_attr = length_attr or "length" - + length_attr = "frac_pos" if contemporaneous else length_attr or "length" kwargs = UnionDict(length_attr=length_attr) if contemporaneous else {} self.tree = klass(tree, **kwargs) self.tree.propagate_properties() @@ -576,16 +571,13 @@ @property def label_pad(self): - if isinstance(self.tree, CircularTreeGeometry): - default = 0.15 - else: - default = 0.025 - - if self._label_pad is None and not self.contemporaneous: - max_x = max(self.tree.max_x, abs(self.tree.min_x)) - self._label_pad = max_x * default - elif self._label_pad is None: - self._label_pad = default + default = 0.15 if isinstance(self.tree, CircularTreeGeometry) else 0.025 + if self._label_pad is None: + if not self.contemporaneous: + max_x = max(self.tree.max_x, abs(self.tree.min_x)) + self._label_pad = max_x * default + else: + self._label_pad = default return self._label_pad @label_pad.setter @@ -623,7 +615,7 @@ @contemporaneous.setter def contemporaneous(self, value): - if not type(value) == bool: + if type(value) != bool: raise TypeError if self._contemporaneous != value: klass = self.tree.__class__ @@ -642,6 +634,11 @@ def tip_font(self): return self._tip_font + @tip_font.setter + def tip_font(self, val): + """update tip font settings""" + self._tip_font = val + def _scale_label_pad(self): """returns the label pad scaled by maximum dist to tip""" return self.label_pad @@ -660,22 +657,10 @@ if not self.scale_bar or self.contemporaneous: return None, None - if "left" in self.scale_bar: - x = self.tree.min_x - else: - x = self.tree.max_x - - if "bottom" in self.scale_bar: - y = self.tree.min_y - else: - y = self.tree.max_y - + x = self.tree.min_x if "left" in self.scale_bar else self.tree.max_x + y = self.tree.min_y if "bottom" in self.scale_bar else self.tree.max_y scale = 0.1 * self.tree.max_x - if scale < 1e-4: - text = "{:.2e}".format(scale) - else: - text = "{:.2f}".format(scale) - + text = "{:.1e}".format(scale) if scale < 1e-2 else "{:.2f}".format(scale) shape = { "type": "line", "x0": x, @@ -839,6 +824,8 @@ if type(edges) == str: edges = [edges] edges = frozenset(edges) + if not edges.issubset({edge.name for edge in self.tree.preorder()}): + raise ValueError("edge not present in tree") style = UnionDict(width=self._line_width, color=self._line_color) style.update(line) self._edge_sets[edges] = UnionDict(legendgroup=legendgroup, line=style) @@ -896,10 +883,9 @@ ------- list of edge names """ - names = self.tree.get_edge_names( + return self.tree.get_edge_names( tip1, tip2, stem=stem, clade=clade, outgroup_name=outgroup ) - return names @property def scale_bar(self): diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/draw/dotplot.py python-cogent-2020.12.21a+dfsg/src/cogent3/draw/dotplot.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/draw/dotplot.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/draw/dotplot.py 2020-12-20 23:35:03.000000000 +0000 @@ -8,7 +8,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley", "Peter Maxwell", "Rahul Ghangas"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" @@ -266,7 +266,7 @@ ) fwd, rev = self._fwd, self._rev - if not self.title: + if self.title is None: title = ( f"Window={self._window}, Matched ≥ {self._threshold}/" f"{self._window} & Gap ≤ {self._min_gap}" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/draw/drawable.py python-cogent-2020.12.21a+dfsg/src/cogent3/draw/drawable.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/draw/drawable.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/draw/drawable.py 2020-12-20 23:35:03.000000000 +0000 @@ -12,7 +12,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rahul Ghangas", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" @@ -58,8 +58,9 @@ # this is an ugly hack to get around plotly's NOT robust handling of script path # for automated file naming import inspect - from plotly.io._renderers import renderers + from plotly.io import _base_renderers as base_render + from plotly.io._renderers import renderers class SphinxGalleryRenderer(base_render.ExternalRenderer): def render(self, fig_dict): @@ -254,7 +255,8 @@ kwargs["width"] = kwargs.get("width", fig.layout.width) kwargs["height"] = kwargs.get("height", fig.layout.height) - write_image(fig, path, **kwargs) + path = pathlib.Path(path).expanduser().absolute() + write_image(fig, str(path), **kwargs) def to_image(self, format="png", **kwargs): """creates static image, suffix dictates format""" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/draw/__init__.py python-cogent-2020.12.21a+dfsg/src/cogent3/draw/__init__.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/draw/__init__.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/draw/__init__.py 2020-12-20 23:35:03.000000000 +0000 @@ -14,5 +14,5 @@ "Sheng Han Moses Koh", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/draw/letter.py python-cogent-2020.12.21a+dfsg/src/cogent3/draw/letter.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/draw/letter.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/draw/letter.py 2020-12-20 23:35:03.000000000 +0000 @@ -27,7 +27,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Sheng Han Moses Koh", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" @@ -275,7 +275,15 @@ 13.816700519612363, 0.0, ], - ["C", 7.822582093118824, 0.0, 0.0, 12.142256518341865, 0.0, 49.83879327488448,], + [ + "C", + 7.822582093118824, + 0.0, + 0.0, + 12.142256518341865, + 0.0, + 49.83879327488448, + ], [ "C", 0.0, @@ -489,7 +497,15 @@ 14.202274496850151, 0.0, ], - ["C", 7.788618078002973, 0.0, 0.0, 12.182388391074733, 0.0, 49.10620481899415,], + [ + "C", + 7.788618078002973, + 0.0, + 0.0, + 12.182388391074733, + 0.0, + 49.10620481899415, + ], [ "C", 0.0, @@ -792,7 +808,15 @@ 12.718800552930814, 0.0, ], - ["C", 7.322898967011539, 0.0, 0.0, 12.066392909416166, 0.0, 50.35371605139047,], + [ + "C", + 7.322898967011539, + 0.0, + 0.0, + 12.066392909416166, + 0.0, + 50.35371605139047, + ], [ "C", 0.0, @@ -1549,13 +1573,20 @@ ["V", 100.00000000000001], ["Z"], ], - "-": [["M", 0.0, 100.0], ["H", 25.0], ["V", 0.0], ["H", 0.0], ["V", 100.0], ["Z"],], + "-": [ + ["M", 0.0, 100.0], + ["H", 25.0], + ["V", 0.0], + ["H", 0.0], + ["V", 100.0], + ["Z"], + ], } def letter_stack(data, x, width, colours, axnum): """returns plotly SVG letter shapes with letters stacked on top of each other - + Parameters ---------- data diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/draw/logo.py python-cogent-2020.12.21a+dfsg/src/cogent3/draw/logo.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/draw/logo.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/draw/logo.py 2020-12-20 23:35:03.000000000 +0000 @@ -8,7 +8,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" @@ -58,7 +58,9 @@ xanchor = "x" if axnum == 1 else f"x{axnum}" yanchor = "y" if axnum == 1 else f"y{axnum}" layout[f"x{axis}"] = dict( - anchor=yanchor, tickfont=dict(size=xtick_fontsize), ticks="inside", + anchor=yanchor, + tickfont=dict(size=xtick_fontsize), + ticks="inside", ) layout[f"y{axis}"] = dict( diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/best_likelihood.py python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/best_likelihood.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/best_likelihood.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/best_likelihood.py 2020-12-20 23:35:03.000000000 +0000 @@ -17,7 +17,7 @@ __credits__ = ["Helen Lindsay", "Gavin Huttley", "Daniel McDonald"] cite = "Goldman, N. (1993). Statistical tests of models of DNA substitution. J Mol Evol, 36: 182-98" __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/bootstrap.py python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/bootstrap.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/bootstrap.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/bootstrap.py 2020-12-20 23:35:03.000000000 +0000 @@ -38,7 +38,7 @@ "Peter Maxwell", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/coevolution.py python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/coevolution.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/coevolution.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/coevolution.py 2020-12-20 23:35:03.000000000 +0000 @@ -113,7 +113,7 @@ "Rob Knight", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Greg Caporaso" __email__ = "gregcaporaso@gmail.com" __status__ = "Beta" @@ -137,8 +137,7 @@ def mi(h1, h2, joint_h): - """ Calc Mutual Information given two entropies and their joint entropy - """ + """Calc Mutual Information given two entropies and their joint entropy""" return h1 + h2 - joint_h @@ -153,14 +152,14 @@ def join_positions(pos1, pos2): - """ Merge two positions and return as a list of strings + """Merge two positions and return as a list of strings - pos1: iterable object containing the first positions data - pos2: iterable object containing the second positions data + pos1: iterable object containing the first positions data + pos2: iterable object containing the second positions data - Example: - >>> join_positions('ABCD','1234') - ['A1', 'B2', 'C3', 'D4'] + Example: + >>> join_positions('ABCD','1234') + ['A1', 'B2', 'C3', 'D4'] """ return ["".join([r1, r2]) for r1, r2 in zip(pos1, pos2)] @@ -175,8 +174,7 @@ def ignore_excludes(pos, excludes=DEFAULT_EXCLUDES): - """ Return position data as-is (results in excludes treated as other chars) - """ + """Return position data as-is (results in excludes treated as other chars)""" return pos @@ -194,26 +192,26 @@ excludes=DEFAULT_EXCLUDES, exclude_handler=None, ): - """ Calculate mutual information of a pair of alignment positions + """Calculate mutual information of a pair of alignment positions - alignment: the full alignment object - pos1: index of 1st position in alignment to be compared - (zero-based, not one-based) - pos2: index of 2nd position in alignment to be compared - (zero-based, not one-based) - h1: entropy of pos1, if already calculated (to avoid time to recalc) - h2: entropy of pos2, if already calculated (to avoid time to recalc) - mi_calculator: a function which calculated MI from two entropies and - their joint entropy -- see mi and normalized_mi for examples - null_value: the value to be returned if mi cannot be calculated (e.g., - if mi_calculator == normalized_mi and joint_h = 0.0) - excludes: iterable objects containing characters that require special - handling -- by default, if a position contains an exclude, null_value - will be returned. For non-default handling, pass an exclude_handler - exclude_handler: a function which takes position data and returns it - with exclude characters processed in someway. Position data should be - an iterable object containing the characters present at each position. - f(position_data,excludes=gDefaultExcludes) -> position_data + alignment: the full alignment object + pos1: index of 1st position in alignment to be compared + (zero-based, not one-based) + pos2: index of 2nd position in alignment to be compared + (zero-based, not one-based) + h1: entropy of pos1, if already calculated (to avoid time to recalc) + h2: entropy of pos2, if already calculated (to avoid time to recalc) + mi_calculator: a function which calculated MI from two entropies and + their joint entropy -- see mi and normalized_mi for examples + null_value: the value to be returned if mi cannot be calculated (e.g., + if mi_calculator == normalized_mi and joint_h = 0.0) + excludes: iterable objects containing characters that require special + handling -- by default, if a position contains an exclude, null_value + will be returned. For non-default handling, pass an exclude_handler + exclude_handler: a function which takes position data and returns it + with exclude characters processed in someway. Position data should be + an iterable object containing the characters present at each position. + f(position_data,excludes=gDefaultExcludes) -> position_data """ col1 = list(alignment[pos1].positions)[0] @@ -261,25 +259,25 @@ excludes=DEFAULT_EXCLUDES, exclude_handler=None, ): - """ Calc mi b/w position and all other positions in an alignment + """Calc mi b/w position and all other positions in an alignment - alignment: the full alignment object - position: the position number of interest -- NOTE: this is the - position index, not the sequenece position (so zero-indexed, not - one-indexed) - positional_entropies: a list containing the entropy of each position in - the alignment -- these can be passed in to avoid recalculating if - calling this function over more than one position (e.g., in - mi_alignment) - mi_calculator: a function which calculated MI from two entropies and - their joint entropy -- see mi and normalized_mi for examples - null_value: the value to be returned if mi cannot be calculated (e.g., - if mi_calculator == normalized_mi and joint_h = 0.0) - excludes: iterable objects containing characters that require special - handling -- by default, if a position contains an exclude, null_value - will be returned. For non-default handling, pass an exclude_handler - exclude_handler: a function which takes a position and returns it - with exclude characters processed in someway. + alignment: the full alignment object + position: the position number of interest -- NOTE: this is the + position index, not the sequenece position (so zero-indexed, not + one-indexed) + positional_entropies: a list containing the entropy of each position in + the alignment -- these can be passed in to avoid recalculating if + calling this function over more than one position (e.g., in + mi_alignment) + mi_calculator: a function which calculated MI from two entropies and + their joint entropy -- see mi and normalized_mi for examples + null_value: the value to be returned if mi cannot be calculated (e.g., + if mi_calculator == normalized_mi and joint_h = 0.0) + excludes: iterable objects containing characters that require special + handling -- by default, if a position contains an exclude, null_value + will be returned. For non-default handling, pass an exclude_handler + exclude_handler: a function which takes a position and returns it + with exclude characters processed in someway. """ aln_length = len(alignment) @@ -314,18 +312,18 @@ excludes=DEFAULT_EXCLUDES, exclude_handler=None, ): - """ Calc mi over all position pairs in an alignment + """Calc mi over all position pairs in an alignment - alignment: the full alignment object - mi_calculator: a function which calculated MI from two entropies and - their joint entropy -- see mi and normalized_mi for examples - null_value: the value to be returned if mi cannot be calculated (e.g., - if mi_calculator == normalized_mi and joint_h = 0.0) - excludes: iterable objects containing characters that require special - handling -- by default, if a position contains an exclude, null_value - will be returned. For non-default handling, pass an exclude_handler - exclude_handler: a function which takes a position and returns it - with exclude characters processed in someway. + alignment: the full alignment object + mi_calculator: a function which calculated MI from two entropies and + their joint entropy -- see mi and normalized_mi for examples + null_value: the value to be returned if mi cannot be calculated (e.g., + if mi_calculator == normalized_mi and joint_h = 0.0) + excludes: iterable objects containing characters that require special + handling -- by default, if a position contains an exclude, null_value + will be returned. For non-default handling, pass an exclude_handler + exclude_handler: a function which takes a position and returns it + with exclude characters processed in someway. """ aln_length = len(alignment) @@ -376,20 +374,20 @@ ): """Calc normalized mutual information of a pair of alignment positions - alignment: the full alignment object - pos1: index of 1st position in alignment to be compared - (zero-based, not one-based) - pos2: index of 2nd position in alignment to be compared - (zero-based, not one-based) - h1: entropy of pos1, if already calculated (to avoid time to recalc) - h2: entropy of pos2, if already calculated (to avoid time to recalc) - null_value: the value to be returned if mi cannot be calculated (e.g., - if mi_calculator == normalized_mi and joint_h = 0.0) - excludes: iterable objects containing characters that require special - handling -- by default, if a position contains an exclude, null_value - will be returned. For non-default handling, pass an exclude_handler - exclude_handler: a function which takes a position and returns it - with exclude characters processed in someway. + alignment: the full alignment object + pos1: index of 1st position in alignment to be compared + (zero-based, not one-based) + pos2: index of 2nd position in alignment to be compared + (zero-based, not one-based) + h1: entropy of pos1, if already calculated (to avoid time to recalc) + h2: entropy of pos2, if already calculated (to avoid time to recalc) + null_value: the value to be returned if mi cannot be calculated (e.g., + if mi_calculator == normalized_mi and joint_h = 0.0) + excludes: iterable objects containing characters that require special + handling -- by default, if a position contains an exclude, null_value + will be returned. For non-default handling, pass an exclude_handler + exclude_handler: a function which takes a position and returns it + with exclude characters processed in someway. """ return mi_pair( @@ -416,23 +414,23 @@ excludes=DEFAULT_EXCLUDES, exclude_handler=None, ): - """ Calc normalized mi b/w position and all other positions in an alignment + """Calc normalized mi b/w position and all other positions in an alignment - alignment: the full alignment object - position: the position number of interest -- NOTE: this is the - position index, not the sequenece position (so zero-indexed, not - one-indexed) - positional_entropies: a list containing the entropy of each position in - the alignment -- these can be passed in to avoid recalculating if - calling this function over more than one position (e.g., in - mi_alignment) - null_value: the value to be returned if mi cannot be calculated (e.g., - if mi_calculator == normalized_mi and joint_h = 0.0) - excludes: iterable objects containing characters that require special - handling -- by default, if a position contains an exclude, null_value - will be returned. For non-default handling, pass an exclude_handler - exclude_handler: a function which takes a position and returns it - with exclude characters processed in someway. + alignment: the full alignment object + position: the position number of interest -- NOTE: this is the + position index, not the sequenece position (so zero-indexed, not + one-indexed) + positional_entropies: a list containing the entropy of each position in + the alignment -- these can be passed in to avoid recalculating if + calling this function over more than one position (e.g., in + mi_alignment) + null_value: the value to be returned if mi cannot be calculated (e.g., + if mi_calculator == normalized_mi and joint_h = 0.0) + excludes: iterable objects containing characters that require special + handling -- by default, if a position contains an exclude, null_value + will be returned. For non-default handling, pass an exclude_handler + exclude_handler: a function which takes a position and returns it + with exclude characters processed in someway. """ return mi_position( @@ -455,16 +453,16 @@ excludes=DEFAULT_EXCLUDES, exclude_handler=None, ): - """ Calc normalized mi over all position pairs in an alignment + """Calc normalized mi over all position pairs in an alignment - alignment: the full alignment object - null_value: the value to be returned if mi cannot be calculated (e.g., - if mi_calculator == normalized_mi and joint_h = 0.0) - excludes: iterable objects containing characters that require special - handling -- by default, if a position contains an exclude, null_value - will be returned. For non-default handling, pass an exclude_handler - exclude_handler: a function which takes a position and returns it - with exclude characters processed in someway. + alignment: the full alignment object + null_value: the value to be returned if mi cannot be calculated (e.g., + if mi_calculator == normalized_mi and joint_h = 0.0) + excludes: iterable objects containing characters that require special + handling -- by default, if a position contains an exclude, null_value + will be returned. For non-default handling, pass an exclude_handler + exclude_handler: a function which takes a position and returns it + with exclude characters processed in someway. """ return mi_alignment( alignment=alignment, @@ -551,13 +549,13 @@ def probs_from_dict(d, alphabet): - """ Convert dict of alphabet char probabilities to list in alphabet's order + """Convert dict of alphabet char probabilities to list in alphabet's order - d: probabilities of observing each character in alphabet (dict indexed - by char) - alphabet: the characters in the alphabet -- provided for list order. - Must iterate over the ordered characters in the alphabet (e.g., a list - of characters or an Alphabet object) + d: probabilities of observing each character in alphabet (dict indexed + by char) + alphabet: the characters in the alphabet -- provided for list order. + Must iterate over the ordered characters in the alphabet (e.g., a list + of characters or an Alphabet object) """ return array([d[c] for c in alphabet]) @@ -566,20 +564,20 @@ def freqs_from_aln(aln, alphabet, scaled_aln_size=100): """Return the frequencies in aln of chars in alphabet's order - aln: the alignment object - alphabet: the characters in the alphabet -- provided for list order. - Must iterate over the ordered characters in the alphabet (e.g., a list - of characters or an Alphabet object) - scaled_aln_size: the scaled number of sequences in the alignment. The - original SCA implementation treats all alignments as if they contained - 100 sequences when calculating frequencies and probabilities. 100 is - therefore the default value. + aln: the alignment object + alphabet: the characters in the alphabet -- provided for list order. + Must iterate over the ordered characters in the alphabet (e.g., a list + of characters or an Alphabet object) + scaled_aln_size: the scaled number of sequences in the alignment. The + original SCA implementation treats all alignments as if they contained + 100 sequences when calculating frequencies and probabilities. 100 is + therefore the default value. - *Warning: characters in aln that are not in alphabet are silently - ignored. Is this the desired behavior? + *Warning: characters in aln that are not in alphabet are silently + ignored. Is this the desired behavior? - Need to combine this function with get_position_frequences (and renamed - that one to be more generic) since they're doing the same thing now. + Need to combine this function with get_position_frequences (and renamed + that one to be more generic) since they're doing the same thing now. """ alphabet_as_indices = array([aln.alphabet.to_indices(alphabet)]).transpose() @@ -590,19 +588,19 @@ def get_positional_frequencies(aln, position_number, alphabet, scaled_aln_size=100): """Return the freqs in aln[position_number] of chars in alphabet's order - aln: the alignment object - position_number: the index of the position of interest in aln - (note: zero-based alignment indexing) - alphabet: the characters in the alphabet -- provided for list order. - Must iterate over the ordered characters in the alphabet (e.g., a list - of characters or an Alphabet object) - scaled_aln_size: the scaled number of sequences in the alignment. The - original SCA implementation treats all alignments as if they contained - 100 sequences when calculating frequencies and probabilities. 100 is - therefore the default value. + aln: the alignment object + position_number: the index of the position of interest in aln + (note: zero-based alignment indexing) + alphabet: the characters in the alphabet -- provided for list order. + Must iterate over the ordered characters in the alphabet (e.g., a list + of characters or an Alphabet object) + scaled_aln_size: the scaled number of sequences in the alignment. The + original SCA implementation treats all alignments as if they contained + 100 sequences when calculating frequencies and probabilities. 100 is + therefore the default value. - *Warning: characters in aln that are not in alphabet are silently - ignored. Is this the desired behavior? + *Warning: characters in aln that are not in alphabet are silently + ignored. Is this the desired behavior? """ alphabet_as_indices = array([aln.alphabet.to_indices(alphabet)]).transpose() @@ -614,24 +612,24 @@ def get_positional_probabilities(pos_freqs, natural_probs, scaled_aln_size=100): """Get probs of observering the freq of each char given it's natural freq - In Suel 2003 supplementary material, this step is defined as: - "... each element is the binomial probability of observing each - amino acid residue at position j given its mean frequency in - all natural proteins." - This function performs the calculate for a single position. - - pos_freqs: the frequencies of each char in the alphabet at a - position-of-interest in the alignment (list of floats, typically - output of get_positional_frequencies) - natural_probs: the natural probabilities of observing each char - in the alphabet (list of floats: typically output of probs_from_dict) - scaled_aln_size: the scaled number of sequences in the alignment. The - original SCA implementation treats all alignments as if they contained - 100 sequences when calculating frequencies and probabilities. 100 is - therefore the default value. + In Suel 2003 supplementary material, this step is defined as: + "... each element is the binomial probability of observing each + amino acid residue at position j given its mean frequency in + all natural proteins." + This function performs the calculate for a single position. + + pos_freqs: the frequencies of each char in the alphabet at a + position-of-interest in the alignment (list of floats, typically + output of get_positional_frequencies) + natural_probs: the natural probabilities of observing each char + in the alphabet (list of floats: typically output of probs_from_dict) + scaled_aln_size: the scaled number of sequences in the alignment. The + original SCA implementation treats all alignments as if they contained + 100 sequences when calculating frequencies and probabilities. 100 is + therefore the default value. - Note: It is critical that the values in pos_freqs and natural_probs are - in the same order, which should be the order of chars in the alphabet. + Note: It is critical that the values in pos_freqs and natural_probs are + in the same order, which should be the order of chars in the alphabet. """ results = [] @@ -649,19 +647,19 @@ def get_subalignments(aln, position, selections): - """ returns subalns w/ seq[pos] == selection for each in selections - aln: an alignment object - position: int in alignment to be checked for each perturbation - selections: characters which must be present at seq[pos] for - seq to be in subalignment - - Note: This method returns a list of subalignments corresponding - to the list of selections. So, if you specify selections as - ['A','G'], you would get two subalignments back -- the first - containing sequences with 'A' at position, and the second - containing sequences with 'G' at position. If you want all - sequences containing either 'A' or 'G', merge the resulting - subalignments. + """returns subalns w/ seq[pos] == selection for each in selections + aln: an alignment object + position: int in alignment to be checked for each perturbation + selections: characters which must be present at seq[pos] for + seq to be in subalignment + + Note: This method returns a list of subalignments corresponding + to the list of selections. So, if you specify selections as + ['A','G'], you would get two subalignments back -- the first + containing sequences with 'A' at position, and the second + containing sequences with 'G' at position. If you want all + sequences containing either 'A' or 'G', merge the resulting + subalignments. """ result = [] @@ -672,14 +670,14 @@ def get_dg(position_probs, aln_probs): - """ Return delta_g vector + """Return delta_g vector - position_probs: the prob of observing each alphabet chars frequency in - the alignment position-of-interest, given it's background frequency - in all proteins (list of floats, typically the output of - get_positional_probabilities) - aln_probs: the prob of observing each alphabet chars frequency in the - full alignment, given it's background frequency (list of floats) + position_probs: the prob of observing each alphabet chars frequency in + the alignment position-of-interest, given it's background frequency + in all proteins (list of floats, typically the output of + get_positional_probabilities) + aln_probs: the prob of observing each alphabet chars frequency in the + full alignment, given it's background frequency (list of floats) """ results = [] @@ -691,30 +689,30 @@ def get_dgg(all_dgs, subaln_dgs, scaled_aln_size=100): """Return delta_delta_g value - all_dgs: the dg vector for a position-of-interest in the alignment - (list of floats, typically the output of get_dg) - subaln_dgs: the dg vector for a sub-alignment of the position-of- - interest in the alignment (list of floats, typically the output - of get_dg applied to a sub-alignment) - scaled_aln_size: the scaled number of sequences in the alignment. The - original SCA implementation treats all alignments as if they contained - 100 sequences when calculating frequencies and probabilities. 100 is - therefore the default value. - - * There are two weird issues in this function with respect to the - desciption of the algorithm in the Suel 2003 supplementary material. - In order to get the values presented in their GPCR paper, we need to - (1) divide the euclidian norm by the scaled_aln_size, and then (2) - multiply the result by e. - ** IT IS CRITICAL TO UNDERSTAND WHY - WE NEED TO APPLY THESE STEPS BEFORE PUBLISHING ANYTHING THAT USES - THIS CODE.** - - * A possible reason for the mysterious e scaling is that we are - misinterpreting what they mean when they say ddg is 'the magnitude of - this difference vector.' We are assuming they are referring to the - Euclidian norm, but until I see their code, I can't be sure about - this. + all_dgs: the dg vector for a position-of-interest in the alignment + (list of floats, typically the output of get_dg) + subaln_dgs: the dg vector for a sub-alignment of the position-of- + interest in the alignment (list of floats, typically the output + of get_dg applied to a sub-alignment) + scaled_aln_size: the scaled number of sequences in the alignment. The + original SCA implementation treats all alignments as if they contained + 100 sequences when calculating frequencies and probabilities. 100 is + therefore the default value. + + * There are two weird issues in this function with respect to the + desciption of the algorithm in the Suel 2003 supplementary material. + In order to get the values presented in their GPCR paper, we need to + (1) divide the euclidian norm by the scaled_aln_size, and then (2) + multiply the result by e. + ** IT IS CRITICAL TO UNDERSTAND WHY + WE NEED TO APPLY THESE STEPS BEFORE PUBLISHING ANYTHING THAT USES + THIS CODE.** + + * A possible reason for the mysterious e scaling is that we are + misinterpreting what they mean when they say ddg is 'the magnitude of + this difference vector.' We are assuming they are referring to the + Euclidian norm, but until I see their code, I can't be sure about + this. """ return norm(all_dgs - subaln_dgs) / scaled_aln_size * e @@ -877,48 +875,48 @@ alphabet=default_sca_alphabet, background_freqs=default_sca_freqs, ): - """ Calculate statistical coupling b/w a column and all other columns + """Calculate statistical coupling b/w a column and all other columns - alignment: full alignment object - position: the position of interest to probe for statistical coupling - (subalignments will be generated based on allowed perturbations - at this position) -- int, zero-based indexing into alignment - cutoff: the percentage of sequences that must contain a specific - char at a specific pos1 to result in an allowed sub-alignment. - (According to the Ranganathan papers, this should be the value - determined by their 3rd criteria.) - position_freqs: if precalculated, a matrix containing the output - of get_positional_frequencies for each position in the alignment. - This will typically be used only when sca_position is called from - sca_alignment, and these values are therefore pre-calculated. - position_probs: if precalculated, a matrix containing the output - of get_positional_probabilities for each position in the alignment. - This will typically be used only when sca_position is called from - sca_alignment, and these values are therefore pre-calculated. - dgs: if precalculated, a matrix containing the output - of get_dg for each position in the alignment. - This will typically be used only when sca_position is called from - sca_alignment, and these values are therefore pre-calculated. - perturbations: if precalculated, a matrix containing the output - of get_allowed_perturbations for each position in the alignment. - This will typically be used only when sca_position is called from - sca_alignment, and these values are therefore pre-calculated. - scaled_aln_size: the scaled number of sequences in the alignment. The - original SCA implementation treats all alignments as if they contained - 100 sequences when calculating frequencies and probabilities. 100 is - therefore the default value. - null_value: the value which should be returned if SCA cannot or - should not be calculated (e.g., no allowed perturbations or - pos1==pos2, respectively). - return_all: if cutoff <= 0.50, it is possible that there will be more - than one allowed_perturbation per position. In these cases, either all - of the values could be returned (return_all=True) or the max of the - values can be returned (return_all=False, default). If you'd like one - value, but not the max, wrap this function with return_all=True, and - handle the return value as desired. - alphabet: an ordered iterable object containing the characters in the - alphabet. For example, this can be a CharAlphabet object, a list, - or a string. + alignment: full alignment object + position: the position of interest to probe for statistical coupling + (subalignments will be generated based on allowed perturbations + at this position) -- int, zero-based indexing into alignment + cutoff: the percentage of sequences that must contain a specific + char at a specific pos1 to result in an allowed sub-alignment. + (According to the Ranganathan papers, this should be the value + determined by their 3rd criteria.) + position_freqs: if precalculated, a matrix containing the output + of get_positional_frequencies for each position in the alignment. + This will typically be used only when sca_position is called from + sca_alignment, and these values are therefore pre-calculated. + position_probs: if precalculated, a matrix containing the output + of get_positional_probabilities for each position in the alignment. + This will typically be used only when sca_position is called from + sca_alignment, and these values are therefore pre-calculated. + dgs: if precalculated, a matrix containing the output + of get_dg for each position in the alignment. + This will typically be used only when sca_position is called from + sca_alignment, and these values are therefore pre-calculated. + perturbations: if precalculated, a matrix containing the output + of get_allowed_perturbations for each position in the alignment. + This will typically be used only when sca_position is called from + sca_alignment, and these values are therefore pre-calculated. + scaled_aln_size: the scaled number of sequences in the alignment. The + original SCA implementation treats all alignments as if they contained + 100 sequences when calculating frequencies and probabilities. 100 is + therefore the default value. + null_value: the value which should be returned if SCA cannot or + should not be calculated (e.g., no allowed perturbations or + pos1==pos2, respectively). + return_all: if cutoff <= 0.50, it is possible that there will be more + than one allowed_perturbation per position. In these cases, either all + of the values could be returned (return_all=True) or the max of the + values can be returned (return_all=False, default). If you'd like one + value, but not the max, wrap this function with return_all=True, and + handle the return value as desired. + alphabet: an ordered iterable object containing the characters in the + alphabet. For example, this can be a CharAlphabet object, a list, + or a string. """ num_seqs = alignment.num_seqs @@ -985,29 +983,29 @@ alphabet=default_sca_alphabet, background_freqs=default_sca_freqs, ): - """ Calculate statistical coupling b/w all columns in alignment + """Calculate statistical coupling b/w all columns in alignment - alignment: full alignment object - cutoff: the percentage of sequences that must contain a specific - char at a specific pos1 to result in an allowed sub-alignment. - (According to the Ranganathan papers, this should be the value - determined by their 3rd criteria.) - scaled_aln_size: the scaled number of sequences in the alignment. The - original SCA implementation treats all alignments as if they contained - 100 sequences when calculating frequencies and probabilities. 100 is - therefore the default value. - null_value: the value which should be returned if SCA cannot or - should not be calculated (e.g., no allowed perturbations or - pos1==pos2, respectively). - return_all: if cutoff <= 0.50, it is possible that there will be more - than one allowed_perturbation per position. In these cases, either all - of the values could be returned (return_all=True) or the max of the - values can be returned (return_all=False, default). If you'd like one - value, but not the max, wrap this function with return_all=True, and - handle the return value as desired. - alphabet: an ordered iterable object containing the characters in the - alphabet. For example, this can be a CharAlphabet object, a list, - or a string. + alignment: full alignment object + cutoff: the percentage of sequences that must contain a specific + char at a specific pos1 to result in an allowed sub-alignment. + (According to the Ranganathan papers, this should be the value + determined by their 3rd criteria.) + scaled_aln_size: the scaled number of sequences in the alignment. The + original SCA implementation treats all alignments as if they contained + 100 sequences when calculating frequencies and probabilities. 100 is + therefore the default value. + null_value: the value which should be returned if SCA cannot or + should not be calculated (e.g., no allowed perturbations or + pos1==pos2, respectively). + return_all: if cutoff <= 0.50, it is possible that there will be more + than one allowed_perturbation per position. In these cases, either all + of the values could be returned (return_all=True) or the max of the + values can be returned (return_all=False, default). If you'd like one + value, but not the max, wrap this function with return_all=True, and + handle the return value as desired. + alphabet: an ordered iterable object containing the characters in the + alphabet. For example, this can be a CharAlphabet object, a list, + or a string. """ num_seqs = alignment.num_seqs @@ -1254,7 +1252,7 @@ def get_ancestral_seqs(aln, tree, sm=None, pseudocount=1e-6, optimise=True): - """ Calculates ancestral sequences by maximum likelihood + """Calculates ancestral sequences by maximum likelihood Parameters ---------- @@ -1309,9 +1307,7 @@ def ancestral_state_pair( aln, tree, pos1, pos2, ancestral_seqs=None, null_value=DEFAULT_NULL_VALUE ): - """ - - """ + """""" ancestral_seqs = ancestral_seqs or get_ancestral_seqs(aln, tree) ancestral_names_to_seqs = dict( list(zip(ancestral_seqs.names, ancestral_seqs.array_seqs)) @@ -1443,8 +1439,7 @@ def validate_alphabet(alphabet, freqs): - """SCA validation: ValueError if set(alphabet) != set(freqs.keys()) - """ + """SCA validation: ValueError if set(alphabet) != set(freqs.keys())""" alphabet_chars = set(alphabet) freq_chars = set(freqs.keys()) if alphabet_chars != freq_chars: @@ -1473,10 +1468,10 @@ def validate_ancestral_seqs(alignment, tree, ancestral_seqs): """AS validation: ValueError if incompatible aln, tree, & ancestral seqs - Incompatibility between the alignment and the ancestral_seqs is - different sequence lengths. Incompatbility between the tree and - the ancestral seqs is imperfect overlap between the names of the - ancestors in the tree and the ancestral sequence names. + Incompatibility between the alignment and the ancestral_seqs is + different sequence lengths. Incompatbility between the tree and + the ancestral seqs is imperfect overlap between the names of the + ancestors in the tree and the ancestral sequence names. """ if len(alignment) != len(ancestral_seqs): raise ValueError("Alignment and ancestral seqs are different lengths.") @@ -1491,8 +1486,7 @@ def validate_tree(alignment, tree): - """AS validation: ValueError if tip and seq names aren't same - """ + """AS validation: ValueError if tip and seq names aren't same""" if set(tree.get_tip_names()) != set(alignment.names): raise ValueError("Tree tips and seqs must have perfectly overlapping names.") @@ -1525,8 +1519,7 @@ def coevolve_alignments_validation( method, alignment1, alignment2, min_num_seqs, max_num_seqs, **kwargs ): - """ Validation steps required for intermolecular coevolution analyses - """ + """Validation steps required for intermolecular coevolution analyses""" valid_methods_for_different_moltypes = {}.fromkeys( [mi_alignment, nmi_alignment, resampled_mi_alignment] ) @@ -1585,12 +1578,12 @@ def coevolve_alignment(method, alignment, **kwargs): - """ Apply coevolution method to alignment (for intramolecular coevolution) + """Apply coevolution method to alignment (for intramolecular coevolution) - method: f(alignment,**kwargs) -> 2D array of coevolution scores - alignment: alignment object for which coevolve scores should be - calculated - **kwargs: parameters to be passed to method() + method: f(alignment,**kwargs) -> 2D array of coevolution scores + alignment: alignment object for which coevolve scores should be + calculated + **kwargs: parameters to be passed to method() """ # Perform method specific validation steps if method == sca_alignment: @@ -1621,10 +1614,10 @@ def merge_alignments(alignment1, alignment2): - """ Append alignment 2 to the end of alignment 1 + """Append alignment 2 to the end of alignment 1 - This function is used by coevolve_alignments to merge two alignments - so they can be evaluated by coevolve_alignment. + This function is used by coevolve_alignments to merge two alignments + so they can be evaluated by coevolve_alignment. """ result = {} # Created maps from the final seq ids (i.e., seq id before plus) to the @@ -1654,7 +1647,7 @@ def n_random_seqs(alignment, n): """Given alignment, return n random seqs in a new alignment object. - This function is used by coevolve_alignments. + This function is used by coevolve_alignments. """ seq_names = alignment.names @@ -1673,78 +1666,78 @@ sequence_filter=n_random_seqs, **kwargs, ): - """ Apply method to a pair of alignments (for intermolecular coevolution) + """Apply method to a pair of alignments (for intermolecular coevolution) - method: the *_alignment function to be applied - alignment1: alignment of first molecule (ArrayAlignment) - alignment2: alignment of second molecule (ArrayAlignment) - return_full: if True, returns intra- and inter-molecular - coevolution data in a square matrix (default: False) - merged_aln_filepath: if provided, will write the merged - alignment to file (useful for running post-processing filters) - min_num_seqs: the minimum number of sequences that should be - present in the merged alignment to perform the analysis - (default: 2) - max_num_seqs: the maximum number of sequences to include - in an analysis - if the number of sequences exceeds - max_num_seqs, a random selection of max_num_seqs will be - used. This is a time-saving step as too many sequences can - slow things down a lot. (default: None, any number of - sequences is allowed) - sequence_filter: function which takes an alignment and an int - and returns the int number of sequences from the alignment in - a new alignment object (defualt: util.n_random_seqs(alignment,n)) - if None, a ValueError will be raised if there are more than - max_num_seqs - - This function allows for calculation of coevolve scores between - pairs of alignments. The results are returned in a rectangular - len(alignment1) x len(alignment2) matrix. - - There are some complications involved in preparing alignments for - this function, because it needs to be obvious how to associate the - putative interacting sequences. For example, if looking for - interactions between mammalian proteins A and B, sequences are - required from the same sets of species, and it must be apparant how - to match the sequences that are most likely to be involved in - biologically meaningful interactions. This typically means matching - the sequences of proteins A&B that come from the same species. In - other words, interaction of T. aculeatus proteinA and - H. sapien proteinB likely don't form a biologically relevant - interaction, because the species are so diverged. - - Matching of sequences is performed via the identifiers, but it is - the responsibility of the user to correctly construct the sequence - identifiers before passing the alignments (and tree, if applicable) - to this function. To faciliate matching sequence identifiers, but not - having to discard the important information already present in a - sequence identifier obtained from a database such as KEGG or RefSeq, - sequence identifiers may contain a plus symbol (+). The characters - before the + are used to match sequences between the alignments and - tree. The characters after the + are ignored by this function. So, a - good strategy is to make the text before the '+' a taxonomic - identifier and leave the text after the '+' as the original sequence - identifier. For example, your sequence/tip names could look like: - - alignment1: 'H. sapien+gi|123', 'T. aculeatus+gi|456' - alignment2: 'T. aculeatus+gi|999', 'H. sapien+gi|424' - tree: 'T. aculeatus+gi|456', 'H. sapien' - - If there is no plus, the full sequence identifier will be used for the - matching (see H. sapien in tree). The order of sequences in the - alignments is not important. Also note that we can't split on a colon, - as would be convenient for pulling sequences from KEGG, because colons - are special characters in newick. - - A WORD OF WARNING ON SEQUENCE IDENTIFIER CONSTRUCTION: - A further complication is that in some cases, an organism will have - multiple copies of proteins involved in a complex, but proteinA from - locus 1 will not form a functional comples with proteinB from locus 2. - An example of this is the three T6SSs in P. aeuroginosa. Make sure - this is handled correctly when building your sequence identifiers! - Sequence identifiers are used to match the sequences which are - suspected to form a functional complex, which may not simply mean - sequences from the same species. + method: the *_alignment function to be applied + alignment1: alignment of first molecule (ArrayAlignment) + alignment2: alignment of second molecule (ArrayAlignment) + return_full: if True, returns intra- and inter-molecular + coevolution data in a square matrix (default: False) + merged_aln_filepath: if provided, will write the merged + alignment to file (useful for running post-processing filters) + min_num_seqs: the minimum number of sequences that should be + present in the merged alignment to perform the analysis + (default: 2) + max_num_seqs: the maximum number of sequences to include + in an analysis - if the number of sequences exceeds + max_num_seqs, a random selection of max_num_seqs will be + used. This is a time-saving step as too many sequences can + slow things down a lot. (default: None, any number of + sequences is allowed) + sequence_filter: function which takes an alignment and an int + and returns the int number of sequences from the alignment in + a new alignment object (defualt: util.n_random_seqs(alignment,n)) + if None, a ValueError will be raised if there are more than + max_num_seqs + + This function allows for calculation of coevolve scores between + pairs of alignments. The results are returned in a rectangular + len(alignment1) x len(alignment2) matrix. + + There are some complications involved in preparing alignments for + this function, because it needs to be obvious how to associate the + putative interacting sequences. For example, if looking for + interactions between mammalian proteins A and B, sequences are + required from the same sets of species, and it must be apparant how + to match the sequences that are most likely to be involved in + biologically meaningful interactions. This typically means matching + the sequences of proteins A&B that come from the same species. In + other words, interaction of T. aculeatus proteinA and + H. sapien proteinB likely don't form a biologically relevant + interaction, because the species are so diverged. + + Matching of sequences is performed via the identifiers, but it is + the responsibility of the user to correctly construct the sequence + identifiers before passing the alignments (and tree, if applicable) + to this function. To faciliate matching sequence identifiers, but not + having to discard the important information already present in a + sequence identifier obtained from a database such as KEGG or RefSeq, + sequence identifiers may contain a plus symbol (+). The characters + before the + are used to match sequences between the alignments and + tree. The characters after the + are ignored by this function. So, a + good strategy is to make the text before the '+' a taxonomic + identifier and leave the text after the '+' as the original sequence + identifier. For example, your sequence/tip names could look like: + + alignment1: 'H. sapien+gi|123', 'T. aculeatus+gi|456' + alignment2: 'T. aculeatus+gi|999', 'H. sapien+gi|424' + tree: 'T. aculeatus+gi|456', 'H. sapien' + + If there is no plus, the full sequence identifier will be used for the + matching (see H. sapien in tree). The order of sequences in the + alignments is not important. Also note that we can't split on a colon, + as would be convenient for pulling sequences from KEGG, because colons + are special characters in newick. + + A WORD OF WARNING ON SEQUENCE IDENTIFIER CONSTRUCTION: + A further complication is that in some cases, an organism will have + multiple copies of proteins involved in a complex, but proteinA from + locus 1 will not form a functional comples with proteinB from locus 2. + An example of this is the three T6SSs in P. aeuroginosa. Make sure + this is handled correctly when building your sequence identifiers! + Sequence identifiers are used to match the sequences which are + suspected to form a functional complex, which may not simply mean + sequences from the same species. """ # Perform general validation step @@ -1855,13 +1848,13 @@ def coevolve_position(method, alignment, position, **kwargs): - """ Apply provided coevolution method to a column in alignment + """Apply provided coevolution method to a column in alignment - method: f(alignment,position,**kwargs) -> array of coevolution scores - alignment: alignment object for which coevolve scores should be - calculated (ArrayAlignment) - position: position of interest for coevolution analysis (int) - **kwargs: parameters to be passed to method() + method: f(alignment,position,**kwargs) -> array of coevolution scores + alignment: alignment object for which coevolve scores should be + calculated (ArrayAlignment) + position: position of interest for coevolution analysis (int) + **kwargs: parameters to be passed to method() """ # Perform method-specific validation steps if method == sca_position: @@ -1890,13 +1883,13 @@ def coevolve_pair(method, alignment, pos1, pos2, **kwargs): - """ Apply provided coevolution method to columns pos1 & pos2 of alignment + """Apply provided coevolution method to columns pos1 & pos2 of alignment - method: f(alignment,pos1,pos2,**kwargs) -> coevolution score - alignment: alignment object for which coevolve score should be - calculated (ArrayAlignment) - pos1, pos2: positions to evaluate coevolution between (int) - **kwargs: parameters to be passed to method() + method: f(alignment,pos1,pos2,**kwargs) -> coevolution score + alignment: alignment object for which coevolve score should be + calculated (ArrayAlignment) + pos1, pos2: positions to evaluate coevolution between (int) + **kwargs: parameters to be passed to method() """ # Perform method-specific validation steps @@ -1930,44 +1923,44 @@ ): """Filters positions with more than max_cmp_threshold scores >= threshold - This post-processing filter is based on the idea described in: - "Using multiple interdependency to separate functional from - phylogenetic correlations in protein alignments" - Tillier and Lui, 2003 - - The idea is that when a position achieved a high covariation score - with many other positions, the covariation is more likely to arise - from the phylogeny than from coevolution. They illustrate that this - works in their paper, and I plan to test it with my alpha-helix-based - analysis. Note that you can change cmp_function to change whether - you're looking for high values to indicate covarying positions - (cmp_function=greater_equal, used for most coevolution algorithms) or - low values to indicate covarying positions (cmp_function=less_equal, - used, e.g., for p-value matrices). - - aln: alignment used to generate the coevolution matrix -- this - isn't actually used, but is required to maintain the same interface - as other post-processing filters. Pass None if that's more convenient. - coevolution_matrix: the 2D numpy array to be filtered. This should - be a rectangular matrix for intermoelcular coevolution data (in which - case intermolecular_data_only must be set to True) or a symmetric - square matrix (when intermolecular_data_only=False) - threshold: the threshold coevolution score that other scores should be - compared to - max_cmp_threshold: the max number of scores that are allowed to be - True with respect to cmp_function and threshold (e.g., the max number - of positions that may be greater than the threhsold) before setting - all values associated that position to gDefaultNullValue (default: 1) - cmp_function: the function that compares each score in - coevolution_matrix to threshold (default: ge (greater than)) - - function should return True if the score is one that your looking - (e.g. score >= threshold) or False otherwise - intermolecular_data_only: True if coevolution_matrix is a rectangular - matrix representing an intermolecular coevolution study, and False - if the matrix is a symmetric square matrix + This post-processing filter is based on the idea described in: + "Using multiple interdependency to separate functional from + phylogenetic correlations in protein alignments" + Tillier and Lui, 2003 + + The idea is that when a position achieved a high covariation score + with many other positions, the covariation is more likely to arise + from the phylogeny than from coevolution. They illustrate that this + works in their paper, and I plan to test it with my alpha-helix-based + analysis. Note that you can change cmp_function to change whether + you're looking for high values to indicate covarying positions + (cmp_function=greater_equal, used for most coevolution algorithms) or + low values to indicate covarying positions (cmp_function=less_equal, + used, e.g., for p-value matrices). + + aln: alignment used to generate the coevolution matrix -- this + isn't actually used, but is required to maintain the same interface + as other post-processing filters. Pass None if that's more convenient. + coevolution_matrix: the 2D numpy array to be filtered. This should + be a rectangular matrix for intermoelcular coevolution data (in which + case intermolecular_data_only must be set to True) or a symmetric + square matrix (when intermolecular_data_only=False) + threshold: the threshold coevolution score that other scores should be + compared to + max_cmp_threshold: the max number of scores that are allowed to be + True with respect to cmp_function and threshold (e.g., the max number + of positions that may be greater than the threhsold) before setting + all values associated that position to gDefaultNullValue (default: 1) + cmp_function: the function that compares each score in + coevolution_matrix to threshold (default: ge (greater than)) - + function should return True if the score is one that your looking + (e.g. score >= threshold) or False otherwise + intermolecular_data_only: True if coevolution_matrix is a rectangular + matrix representing an intermolecular coevolution study, and False + if the matrix is a symmetric square matrix - NOTE: IF intermolecular_data_only == True, coevolution_matrix MUST BE - SYMMETRIC, NOT LOWER TRIANGULAR OR OTHERWISE NON-SYMMETRIC!! + NOTE: IF intermolecular_data_only == True, coevolution_matrix MUST BE + SYMMETRIC, NOT LOWER TRIANGULAR OR OTHERWISE NON-SYMMETRIC!! """ # Determine which rows need to be filtered (but don't filter them # right away or subsequent counts could be off) @@ -2017,44 +2010,44 @@ ): """Return True is aln_position is parsimony informative - column_freqs: dict of characters at alignmnet position mapped - to their counts -- this is the output of call alignment.column_freqs() - minimum_count: the minimum number of times a character must show up - for it to be acceptable (default: 2) - minimum_differences: the minimum number of different characters - that must show up at the alignment position (default: 2) - ignored: characters that should not be counted toward - minimum_differences (default are exclude characters) - strict: if True, requires that all amino acids showing up at least - once at the alignment position show up at least minimum_counts - times, rather than only requiring that minimum_differences - amino acids show up minimum_counts times. (default: False) - - The term parsimony informative comes from Codoner, O'Dea, - and Fares 2008, Reducing the false positive rate in the non- - parametric analysis of molecular coevolution. In the paper - they find that if positions which don't contain at least two - different amino acids, and where each different amino acid doesnt - show up at least twice each are ignored (i.e., treated as though - there is not enough information) the positive predictive value - (PPV) and sensitivity (SN) increase on simulated alignments. They - term this quality parsimony informative. - I implemented this as a filter, but include some generalization. - To determine if a column in an alignment is parsimony informative - in the exact manner described in Codoner et al., the following - parameter settings are required: - minimum_count = 2 (default) - minimum_differences = 2 (default) - strict = True (default is False) - To generalize this function, minimum_count and minimum_differences - can be passed in so at least minimum_differences different amino - acids must show up, and each amino acid must show up at least - minimum_count times. - In additional variation, strict=False can be passed requiring - that only minimum_differences number of amino acids show up at least - minimum_counts times (opposed to requiring that ALL amino acids show - up minimum_counts times). This is the default behavior. - By default, the default exclude characters (- and ?) don't count. + column_freqs: dict of characters at alignmnet position mapped + to their counts -- this is the output of call alignment.column_freqs() + minimum_count: the minimum number of times a character must show up + for it to be acceptable (default: 2) + minimum_differences: the minimum number of different characters + that must show up at the alignment position (default: 2) + ignored: characters that should not be counted toward + minimum_differences (default are exclude characters) + strict: if True, requires that all amino acids showing up at least + once at the alignment position show up at least minimum_counts + times, rather than only requiring that minimum_differences + amino acids show up minimum_counts times. (default: False) + + The term parsimony informative comes from Codoner, O'Dea, + and Fares 2008, Reducing the false positive rate in the non- + parametric analysis of molecular coevolution. In the paper + they find that if positions which don't contain at least two + different amino acids, and where each different amino acid doesnt + show up at least twice each are ignored (i.e., treated as though + there is not enough information) the positive predictive value + (PPV) and sensitivity (SN) increase on simulated alignments. They + term this quality parsimony informative. + I implemented this as a filter, but include some generalization. + To determine if a column in an alignment is parsimony informative + in the exact manner described in Codoner et al., the following + parameter settings are required: + minimum_count = 2 (default) + minimum_differences = 2 (default) + strict = True (default is False) + To generalize this function, minimum_count and minimum_differences + can be passed in so at least minimum_differences different amino + acids must show up, and each amino acid must show up at least + minimum_count times. + In additional variation, strict=False can be passed requiring + that only minimum_differences number of amino acids show up at least + minimum_counts times (opposed to requiring that ALL amino acids show + up minimum_counts times). This is the default behavior. + By default, the default exclude characters (- and ?) don't count. """ try: @@ -2097,17 +2090,17 @@ intermolecular_data_only=False, strict=False, ): - """ Replaces scores in coevolution_matrix with null_value for positions - which are not parsimony informative. + """Replaces scores in coevolution_matrix with null_value for positions + which are not parsimony informative. - See is_parsimony_informative doc string for definition of - parsimony informative. + See is_parsimony_informative doc string for definition of + parsimony informative. - aln: the input alignment used to generate the coevolution matrix; - if the alignment was recoded, this should be the recoded alignment. - coevolution_matrix: the result matrix - null_value: the value to place in positions which are not - parsimony informative + aln: the input alignment used to generate the coevolution matrix; + if the alignment was recoded, this should be the recoded alignment. + coevolution_matrix: the result matrix + null_value: the value to place in positions which are not + parsimony informative """ if intermolecular_data_only: len_aln1 = coevolution_matrix.shape[1] @@ -2126,8 +2119,7 @@ def make_positional_exclude_percentage_function(excludes, max_exclude_percent): - """ return function to identify aln positions with > max_exclude_percent - """ + """return function to identify aln positions with > max_exclude_percent""" excludes = {}.fromkeys(excludes) def f(col): @@ -2148,19 +2140,19 @@ excludes=DEFAULT_EXCLUDES, intermolecular_data_only=False, ): - """ Assign null_value to positions with > max_exclude_percent excludes + """Assign null_value to positions with > max_exclude_percent excludes - aln: the ArrayAlignment object - coevolution_matrix: the 2D numpy array -- this will be modified - max_exclude_percent: the maximimu percent of characters that - may be exclude characters in any alignment position (column). - if the percent of exclude characters is greater than this value, - values in this position will be replaced with null_value - (default = 0.10) - null_value: the value to be used as null (default: gDefaultNullValue) - excludes: the exclude characters (default: gDefaultExcludes) - intermolecular_data_only: True if the coevolution result - matrix contains only intermolecular data (default: False) + aln: the ArrayAlignment object + coevolution_matrix: the 2D numpy array -- this will be modified + max_exclude_percent: the maximimu percent of characters that + may be exclude characters in any alignment position (column). + if the percent of exclude characters is greater than this value, + values in this position will be replaced with null_value + (default = 0.10) + null_value: the value to be used as null (default: gDefaultNullValue) + excludes: the exclude characters (default: gDefaultExcludes) + intermolecular_data_only: True if the coevolution result + matrix contains only intermolecular data (default: False) """ # construct the function to be passed to aln.get_position_indices @@ -2191,13 +2183,13 @@ def pickle_coevolution_result(coevolve_result, out_filepath="output.pkl"): - """ Pickle coevolve_result and store it at output_filepath + """Pickle coevolve_result and store it at output_filepath - coevolve_result: result from a coevolve_* function (above); this can - be a float, an array, or a 2D array (most likely it will be one of the - latter two, as it will usually be fast enough to compute a single - coevolve value on-the-fly. - out_filepath: path where the pickled result should be stored + coevolve_result: result from a coevolve_* function (above); this can + be a float, an array, or a 2D array (most likely it will be one of the + latter two, as it will usually be fast enough to compute a single + coevolve value on-the-fly. + out_filepath: path where the pickled result should be stored """ try: infile = open(out_filepath, "wb") @@ -2210,9 +2202,9 @@ def unpickle_coevolution_result(in_filepath): - """ Read in coevolve_result from a pickled file + """Read in coevolve_result from a pickled file - in_filepath: filepath to unpickle + in_filepath: filepath to unpickle """ try: infile = open(in_filepath, "rb") @@ -2229,11 +2221,11 @@ def coevolution_matrix_to_csv(coevolve_matrix, out_filepath="output.csv"): - """ Write coevolve_matrix as csv file at output_filepath + """Write coevolve_matrix as csv file at output_filepath - coevolve_result: result from a coevolve_alignment function (above); - this should be a 2D numpy array - out_filepath: path where the csv result should be stored + coevolve_result: result from a coevolve_alignment function (above); + this should be a 2D numpy array + out_filepath: path where the csv result should be stored """ try: f = open(out_filepath, "w") @@ -2245,9 +2237,9 @@ def csv_to_coevolution_matrix(in_filepath): - """ Read a coevolution matrix from a csv file + """Read a coevolution matrix from a csv file - in_filepath: input filepath + in_filepath: input filepath """ try: f = open(in_filepath) @@ -2273,11 +2265,11 @@ def identify_aln_positions_above_threshold( coevolution_matrix, threshold, aln_position, null_value=DEFAULT_NULL_VALUE ): - """ Returns the list of alignment positions which achieve a - score >= threshold with aln_position. - Coevolution matrix should be symmetrical or you - may get weird results -- scores are pulled from the row describing - aln_position. + """Returns the list of alignment positions which achieve a + score >= threshold with aln_position. + Coevolution matrix should be symmetrical or you + may get weird results -- scores are pulled from the row describing + aln_position. """ coevolution_scores = coevolution_matrix[aln_position] results = [] @@ -2295,16 +2287,16 @@ null_value=DEFAULT_NULL_VALUE, intermolecular_data_only=False, ): - """ Returns list of position pairs with score >= threshold + """Returns list of position pairs with score >= threshold - coevolution_matrix: 2D numpy array - threshold: value to compare matrix positions against - cmp_function: function which takes a value and theshold - and returns a boolean (e.g., ge(), le()) - null_value: value representing null scores -- these are - ignored - intermolecular_data_only: True if the coevolution result - matrix contains only intermolecular data (default: False) + coevolution_matrix: 2D numpy array + threshold: value to compare matrix positions against + cmp_function: function which takes a value and theshold + and returns a boolean (e.g., ge(), le()) + null_value: value representing null scores -- these are + ignored + intermolecular_data_only: True if the coevolution result + matrix contains only intermolecular data (default: False) """ if not intermolecular_data_only: assert ( @@ -2362,12 +2354,12 @@ symmetric=False, ignore_diagonal=False, ): - """ Returns a count of the values in m >= threshold, ignoring nulls. + """Returns a count of the values in m >= threshold, ignoring nulls. - m: coevolution matrix (numpy array) - thresold: value to compare against scores in matrix (float) - cmp_function: function used to compare value to threshold - (e.g., greater_equal, less_equal) + m: coevolution matrix (numpy array) + thresold: value to compare against scores in matrix (float) + cmp_function: function used to compare value to threshold + (e.g., greater_equal, less_equal) """ total_non_null = 0 @@ -2501,23 +2493,23 @@ def parse_coevolution_matrix_filepath(filepath): - """ Parses a coevolution matrix filepath into constituent parts. + """Parses a coevolution matrix filepath into constituent parts. - Format is very specific. Will only work on filenames such as: - path/alignment_identifier.alphabet_id.method.pkl - path/alignment_identifier.alphabet_id.method.csv - - This format is the recommended naming convention for coevolution - matrices. To ensure filepaths compatible with this function, use - cogent3.evolve.coevolution.build_coevolution_matrix_filepath to build - the filepaths for your coevolution matrices. - - - Examples: - parse_coevolution_matrix_filepath('pkls/myosin_995.a1_4.nmi.pkl') - => ('myosin_995', 'a1_4', 'nmi') - parse_coevolution_matrix_filepath('p53.orig.mi.csv') - => ('p53','orig','mi') + Format is very specific. Will only work on filenames such as: + path/alignment_identifier.alphabet_id.method.pkl + path/alignment_identifier.alphabet_id.method.csv + + This format is the recommended naming convention for coevolution + matrices. To ensure filepaths compatible with this function, use + cogent3.evolve.coevolution.build_coevolution_matrix_filepath to build + the filepaths for your coevolution matrices. + + + Examples: + parse_coevolution_matrix_filepath('pkls/myosin_995.a1_4.nmi.pkl') + => ('myosin_995', 'a1_4', 'nmi') + parse_coevolution_matrix_filepath('p53.orig.mi.csv') + => ('p53','orig','mi') """ filename = basename(filepath) fields = filename.split(".") diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/discrete_markov.py python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/discrete_markov.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/discrete_markov.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/discrete_markov.py 2020-12-20 23:35:03.000000000 +0000 @@ -12,7 +12,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/distance.py python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/distance.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/distance.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/distance.py 2020-12-20 23:35:03.000000000 +0000 @@ -15,7 +15,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley", "Peter Maxwell", "Matthew Wakefield"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -346,7 +346,7 @@ T = table.Table( [r"Seq1 \ Seq2"] + self._seqnames, twoD, - index=r"Seq1 \ Seq2", + index_name=r"Seq1 \ Seq2", missing_data="*", ) return T diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/fast_distance.py python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/fast_distance.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/fast_distance.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/fast_distance.py 2020-12-20 23:35:03.000000000 +0000 @@ -18,7 +18,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley", "Yicheng Zhu", "Ben Kaehler"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" # pending addition of protein distance metrics @@ -303,7 +303,7 @@ rows[i].insert(0, names[i]) table = Table( - header=header, data=rows, index=r"Seq1 \ Seq2", missing_data="*", **kwargs + header=header, data=rows, index_name=r"Seq1 \ Seq2", missing_data="*", **kwargs ) return table @@ -534,7 +534,7 @@ class HammingPair(_PairwiseDistance): """Hamming distance calculator for pairwise alignments""" - valid_moltypes = ("dna", "rna", "protein", "text") + valid_moltypes = ("dna", "rna", "protein", "text", "bytes") def __init__(self, moltype="text", *args, **kwargs): """states: the valid sequence states""" @@ -545,7 +545,7 @@ class PercentIdentityPair(_PairwiseDistance): """Percent identity distance calculator for pairwise alignments""" - valid_moltypes = ("dna", "rna", "protein", "text") + valid_moltypes = ("dna", "rna", "protein", "text", "bytes") def __init__(self, moltype="text", *args, **kwargs): """states: the valid sequence states""" @@ -635,8 +635,8 @@ def __init__(self, moltype="dna", use_tk_adjustment=True, *args, **kwargs): """Arguments: - - moltype: string or moltype instance (must be dna or rna) - - use_tk_adjustment: use the correction of Tamura and Kumar 2002 + - moltype: string or moltype instance (must be dna or rna) + - use_tk_adjustment: use the correction of Tamura and Kumar 2002 """ super(LogDetPair, self).__init__(moltype, *args, **kwargs) self.func = _logdet @@ -684,7 +684,8 @@ def available_distances(): - """returns Table listing available pairwise genetic distance calculator + """returns Table listing available fast pairwise genetic distance calculator + Notes ----- For more complicated genetic distance methods, see the evolve.models module. @@ -702,7 +703,7 @@ "Specify a pairwise genetic distance calculator " "using 'Abbreviation' (case insensitive)." ), - index="Abbreviation", + index_name="Abbreviation", ) return table @@ -742,7 +743,7 @@ column = self.array[:, i] data[name] = column header = ["names"] + list(self.names) - table = Table(header=header, data=data, index="names") + table = Table(header=header, data=data, index_name="names") return table def to_dict(self, **kwargs): diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/__init__.py python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/__init__.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/__init__.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/__init__.py 2020-12-20 23:35:03.000000000 +0000 @@ -29,7 +29,7 @@ "Greg Caporaso", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/likelihood_calculation.py python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/likelihood_calculation.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/likelihood_calculation.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/likelihood_calculation.py 2020-12-20 23:35:03.000000000 +0000 @@ -31,7 +31,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/likelihood_function.py python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/likelihood_function.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/likelihood_function.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/likelihood_function.py 2020-12-20 23:35:03.000000000 +0000 @@ -36,7 +36,7 @@ "Ananias Iliadis", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -500,7 +500,7 @@ def _for_display(self): """processes statistics tables for display""" - title = self._name if self._name else "Likelihood function statistics" + title = self.name if self.name else "Likelihood function statistics" result = [] result += self.get_statistics(with_motif_probs=True, with_titles=True) for i, table in enumerate(result): @@ -508,14 +508,7 @@ "motif" in table.title and table.shape[1] == 2 and table.shape[0] >= 60 ): # just sort codon motif probs, then truncate table = table.sorted(columns="motif") - data = table.tolist() - data = data[:5] + [["...", "..."]] + data[-5:] - table = table.__class__( - header=table.header, - data=data, - digits=table._digits, - title=table.title, - ) + table.set_repr_policy(head=5, tail=5, show_shape=False) result[i] = table return title, result @@ -531,7 +524,8 @@ title, results = self._for_display() for i, table in enumerate(results): table.title = table.title.capitalize() - results[i] = table._repr_html_(include_shape=False) + table.set_repr_policy(show_shape=False) + results[i] = table._repr_html_() results = ["

%s

" % title, lnL, nfp] + results return "\n".join(results) @@ -570,7 +564,9 @@ The other measures are always available in the params dict of each node. """ - from cogent3.evolve.ns_substitution_model import DiscreteSubstitutionModel + from cogent3.evolve.ns_substitution_model import ( + DiscreteSubstitutionModel, + ) is_discrete = isinstance(self.model, DiscreteSubstitutionModel) @@ -694,7 +690,9 @@ motif_probs : dict or DictArray an item for each edge of the tree. Computed if not provided. """ - from cogent3.evolve.ns_substitution_model import DiscreteSubstitutionModel + from cogent3.evolve.ns_substitution_model import ( + DiscreteSubstitutionModel, + ) is_discrete = isinstance(self.model, DiscreteSubstitutionModel) @@ -772,11 +770,7 @@ param_names = self.get_param_names() mprob_name = [n for n in param_names if "mprob" in n] - if mprob_name: - mprob_name = mprob_name[0] - else: - mprob_name = "" - + mprob_name = mprob_name[0] if mprob_name else "" if not with_motif_probs: param_names.remove(mprob_name) @@ -834,7 +828,7 @@ heading_names, list_table, max_width=80, - index=row_ids, + index_name=row_ids, title=title, **self._format, ) @@ -891,8 +885,18 @@ edge_attr[edge]["length"] = None model = self._model.to_rich_dict(for_pickle=False) - alignment = self.get_param_value("alignment").to_rich_dict() - mprobs = self.get_motif_probs().to_dict() + + aln_defn = self.defn_for["alignment"] + if len(aln_defn.index) == 1: + alignment = self.get_param_value("alignment").to_rich_dict() + mprobs = self.get_motif_probs().to_dict() + else: + alignment = {a["locus"]: a["value"] for a in aln_defn.get_param_rules()} + mprobs = self.get_motif_probs() + for k in alignment: + alignment[k] = alignment[k].to_rich_dict() + mprobs[k] = mprobs[k].to_dict() + DLC = self.all_psubs_DLC() try: unique_Q = self.all_rate_matrices_unique() @@ -923,12 +927,23 @@ data = json.dumps(data) return data + @property + def name(self): + if self._name is None: + self._name = self.model.name or "" + + return self._name + + @name.setter + def name(self, name): + self._name = name + # For tests. Compat with old LF interface def set_name(self, name): - self._name = name + self.name = name def get_name(self): - return self._name or "unnamed" + return self.name def set_tables_format(self, space=4, digits=4): """sets display properties for statistics tables. This affects results diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/likelihood_tree_numba.py python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/likelihood_tree_numba.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/likelihood_tree_numba.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/likelihood_tree_numba.py 2020-12-20 23:35:03.000000000 +0000 @@ -7,7 +7,7 @@ __copyright__ = "Copyright 2007-2019, The Cogent Project" __credits__ = ["Peter Maxwell", "Rob Knight", "Gavin Huttley", "Stephen Ma"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/likelihood_tree.py python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/likelihood_tree.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/likelihood_tree.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/likelihood_tree.py 2020-12-20 23:35:03.000000000 +0000 @@ -16,7 +16,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -123,7 +123,9 @@ rows = list(zip(motifs, observed, expected)) rows.sort(key=lambda row: (-row[1], row[0])) table = Table( - header=["Pattern", "Observed", "Expected"], data=rows, index="Pattern" + header=["Pattern", "Observed", "Expected"], + data=rows, + index_name="Pattern", ) return (G, table) else: @@ -175,7 +177,11 @@ self.indexes = numpy.ascontiguousarray(self.indexes) if not result.flags["C_CONTIGUOUS"]: result = numpy.ascontiguousarray(result) - return likelihood_tree.sum_input_likelihoods(self.indexes, result, likelihoods,) + return likelihood_tree.sum_input_likelihoods( + self.indexes, + result, + likelihoods, + ) # For root diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/models.py python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/models.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/models.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/models.py 2020-12-20 23:35:03.000000000 +0000 @@ -25,7 +25,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Matthew Wakefield", "Peter Maxwell", "Gavin Huttley", "James Kondilios"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Matthew Wakefield" __email__ = "wakefield@wehi.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/motif_prob_model.py python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/motif_prob_model.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/motif_prob_model.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/motif_prob_model.py 2020-12-20 23:35:03.000000000 +0000 @@ -13,7 +13,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -114,9 +114,9 @@ class ComplexMotifProbModel(MotifProbModel): def __init__(self, tuple_alphabet, mask): """Arguments: - - tuple_alphabet: series of multi-letter motifs - - monomers: the monomers from which the motifs are made - - mask: instantaneous change matrix""" + - tuple_alphabet: series of multi-letter motifs + - monomers: the monomers from which the motifs are made + - mask: instantaneous change matrix""" self.mask = mask self.tuple_alphabet = tuple_alphabet self.monomer_alphabet = monomers = tuple_alphabet.moltype.alphabet diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/ns_substitution_model.py python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/ns_substitution_model.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/ns_substitution_model.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/ns_substitution_model.py 2020-12-20 23:35:03.000000000 +0000 @@ -19,7 +19,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __contributors__ = ["Gavin Huttley", "Peter Maxwell", "Ben Kaeheler", "Ananias Iliadis"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/pairwise_distance_numba.py python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/pairwise_distance_numba.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/pairwise_distance_numba.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/pairwise_distance_numba.py 2020-12-20 23:35:03.000000000 +0000 @@ -5,7 +5,7 @@ __copyright__ = "Copyright 2007-2019, The Cogent Project" __credits__ = ["Gavin Huttley", "Yicheng Zhu", "Ben Kaehler", "Stephen Ma"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/parameter_controller.py python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/parameter_controller.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/parameter_controller.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/parameter_controller.py 2020-12-20 23:35:03.000000000 +0000 @@ -24,7 +24,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Andrew Butterfield", "Peter Maxwell", "Gavin Huttley", "Helen Lindsay"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.ed.au" __status__ = "Production" @@ -90,7 +90,7 @@ self.set_default_tree_parameter_rules() self.mprobs_from_alignment = motif_probs_from_align self.optimise_motif_probs = optimise_motif_probs - self._name = "" + self._name = None self._format = {} def save(self, filename): @@ -213,7 +213,7 @@ outgroup_name=None, ): """From information specifying the scope of a parameter derive a list of - edge names""" + edge names""" if edges is not None: if tip_names or edge: @@ -258,7 +258,7 @@ """modifes the scope of all submodel rate, aside from excluded params, by constructing a list of parameter rules and using the apply_param_rules method - + Parameters ---------- exclude_params @@ -518,9 +518,12 @@ locus_name = "for locus '%s'" % self.locus_names[index] else: locus_name = "" - assert not set(aln.names).symmetric_difference(tip_names), ( - "Tree tip names %s and aln seq names %s don't match %s" - % (self.tree.get_tip_names(), aln.names, locus_name) + assert not set(aln.names).symmetric_difference( + tip_names + ), "Tree tip names %s and aln seq names %s don't match %s" % ( + self.tree.get_tip_names(), + aln.names, + locus_name, ) assert "root" not in aln.names, "'root' is a reserved name." with self.updates_postponed(): diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/predicate.py python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/predicate.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/predicate.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/predicate.py 2020-12-20 23:35:03.000000000 +0000 @@ -18,7 +18,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/simulate.py python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/simulate.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/simulate.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/simulate.py 2020-12-20 23:35:03.000000000 +0000 @@ -10,7 +10,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/solved_models_numba.py python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/solved_models_numba.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/solved_models_numba.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/solved_models_numba.py 2020-12-20 23:35:03.000000000 +0000 @@ -9,7 +9,7 @@ __copyright__ = "Copyright 2007-2019, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Stephen Ma"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/solved_models.py python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/solved_models.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/solved_models.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/solved_models.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,8 +1,5 @@ """P matrices for some DNA models can be calculated without going via the -intermediate rate matrix Q. A Cython implementation of this calculation can -be used when Q is not required, for example during likelihood tree optimisation. -Equivalent pure python code is NOT provided because it is typically slower -than the rate-matrix based alternative and provides no extra functionality. +intermediate rate matrix Q. """ import numpy @@ -15,7 +12,6 @@ TimeReversibleNucleotide, ) from cogent3.maths.matrix_exponentiation import FastExponentiator -from cogent3.util.modules import ExpectedImportError, importVersionedModule from . import solved_models_numba as _solved_models @@ -24,7 +20,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -89,7 +85,7 @@ def HKY85(**kw): - """Hasegawa, Kishino and Yanamo 1985 model""" + """Hasegawa, Kishino and Yano 1985 model""" kw["recode_gaps"] = True return _solved_nucleotide("HKY85", [kappa], **kw) diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/substitution_calculation.py python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/substitution_calculation.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/substitution_calculation.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/substitution_calculation.py 2020-12-20 23:35:03.000000000 +0000 @@ -30,7 +30,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/substitution_model.py python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/substitution_model.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/evolve/substitution_model.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/evolve/substitution_model.py 2020-12-20 23:35:03.000000000 +0000 @@ -79,7 +79,7 @@ "Von Bing Yap", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -462,11 +462,11 @@ **kw, ): """ - - with_rate: Add a 'rate' parameter which varies by bin. - - ordered_param: name of a single parameter which distinguishes any bins. - - distribution: choices of 'free' or 'gamma' or an instance of some - distribution. Could probably just deprecate free - - partitioned_params: names of params to be partitioned across bins + - with_rate: Add a 'rate' parameter which varies by bin. + - ordered_param: name of a single parameter which distinguishes any bins. + - distribution: choices of 'free' or 'gamma' or an instance of some + distribution. Could probably just deprecate free + - partitioned_params: names of params to be partitioned across bins """ _SubstitutionModel.__init__(self, alphabet, **kw) @@ -679,7 +679,7 @@ @extend_docstring_from(_ContinuousSubstitutionModel.__init__) def __init__(self, alphabet, rate_matrix, **kw): """ - - rate_matrix: The instantaneous rate matrix + - rate_matrix: The instantaneous rate matrix """ _ContinuousSubstitutionModel.__init__(self, alphabet, **kw) d = locals() @@ -709,8 +709,8 @@ @extend_docstring_from(_ContinuousSubstitutionModel.__init__) def __init__(self, alphabet, predicates=None, scales=None, **kw): """ - - predicates: a dict of {name:predicate}. See cogent3.evolve.predicate - - scales: scale rules, dict with predicates + - predicates: a dict of {name:predicate}. See cogent3.evolve.predicate + - scales: scale rules, dict with predicates """ self._canned_predicates = None _ContinuousSubstitutionModel.__init__(self, alphabet, **kw) @@ -787,7 +787,7 @@ data=rows, max_width=max_width, title=title, - index=r"From\To", + index_name=r"From\To", ) result = t if return_table else t.to_string(center=True) return result @@ -974,7 +974,7 @@ """ Parameters ---------- - + gc a genetic code instance """ diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/format/alignment.py python-cogent-2020.12.21a+dfsg/src/cogent3/format/alignment.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/format/alignment.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/format/alignment.py 2020-12-20 23:35:03.000000000 +0000 @@ -7,14 +7,14 @@ from cogent3.format.paml import alignment_to_paml from cogent3.format.phylip import alignment_to_phylip from cogent3.parse.record import FileFormatError -from cogent3.util.misc import open_ +from cogent3.util.misc import atomic_write __author__ = "Peter Maxwell and Gavin Huttley" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Thomas La"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -28,24 +28,22 @@ def save_to_filename(alignment, filename, format, **kw): """Arguments: - - alignment: to be written - - filename: name of the sequence alignment file - - format: the multiple sequence file format + - alignment: to be written + - filename: name of the sequence alignment file + - format: the multiple sequence file format """ if format is None: raise FileFormatError("format not known") - f = open_(filename, "wt") - try: - write_alignment_to_file(f, alignment, format, **kw) - except Exception: + with atomic_write(filename, mode="wt") as f: try: - os.unlink(filename) + write_alignment_to_file(f, alignment, format, **kw) except Exception: - pass - raise - finally: - f.close() + try: + os.unlink(filename) + except Exception: + pass + raise def write_alignment_to_file(f, alignment, format, **kw): diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/format/bedgraph.py python-cogent-2020.12.21a+dfsg/src/cogent3/format/bedgraph.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/format/bedgraph.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/format/bedgraph.py 2020-12-20 23:35:03.000000000 +0000 @@ -5,7 +5,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "alpha" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/format/clustal.py python-cogent-2020.12.21a+dfsg/src/cogent3/format/clustal.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/format/clustal.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/format/clustal.py 2020-12-20 23:35:03.000000000 +0000 @@ -11,19 +11,32 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Jeremy Widmann"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Jeremy Widmann" __email__ = "jeremy.widmann@colorado.edu" __status__ = "Development" -def clustal_from_alignment(aln, interleave_len=None): - """Returns a string in Clustal format. - - - aln: can be an Alignment object or a dict. - - interleave_len: sequence line width. Only available if sequences are - aligned. +def clustal_from_alignment(aln, interleave_len=None, wrap=None): + """ + Parameters + ---------- + aln + can be an Alignment object or a dict + wrap + sequence line width. Only available if sequences are + aligned. + + Returns + ------- + Returns a string in Clustal format """ + if interleave_len is not None: + from cogent3.util.warning import deprecated + + deprecated("argument", "interleave_len", "wrap", "2021.6") + wrap = interleave_len if wrap == 60 else wrap + if not aln: return "" @@ -55,7 +68,7 @@ # Get ordered seqs ordered_seqs = [seqs.named_seqs[label] for label in order] - if interleave_len is not None: + if wrap is not None: curr_ix = 0 while curr_ix < aln_len: clustal_list.extend( @@ -64,13 +77,13 @@ % ( x, " " * (max_spaces - len(x)), - y[curr_ix : curr_ix + interleave_len], + y[curr_ix : curr_ix + wrap], ) for x, y in zip(order, ordered_seqs) ] ) clustal_list.append("") - curr_ix += interleave_len + curr_ix += wrap else: clustal_list.extend( [ diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/format/fasta.py python-cogent-2020.12.21a+dfsg/src/cogent3/format/fasta.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/format/fasta.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/format/fasta.py 2020-12-20 23:35:03.000000000 +0000 @@ -9,15 +9,14 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Jeremy Widmann", "Rob Knight", "Gavin Huttley", "Thomas La"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Jeremy Widmann" __email__ = "jeremy.widmann@colorado.edu" __status__ = "Production" def alignment_to_fasta(alignment_dict, block_size=60, order=None): - """Returns a Fasta string given an alignment. - """ + """Returns a Fasta string given an alignment.""" order = order or [] return FastaFormatter().format(alignment_dict, block_size, order) diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/format/gde.py python-cogent-2020.12.21a+dfsg/src/cogent3/format/gde.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/format/gde.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/format/gde.py 2020-12-20 23:35:03.000000000 +0000 @@ -9,13 +9,12 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Jeremy Widmann", "Rob Knight", "Gavin Huttley", "Thomas La"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Thomas La" def alignment_to_gde(alignment_dict, block_size=60, order=None): - """Returns a Gde string given an alignment. - """ + """Returns a Gde string given an alignment.""" return GDEFormatter().format( alignment_dict, block_size, [] if order is None else order ) diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/format/__init__.py python-cogent-2020.12.21a+dfsg/src/cogent3/format/__init__.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/format/__init__.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/format/__init__.py 2020-12-20 23:35:03.000000000 +0000 @@ -29,7 +29,7 @@ "Marcin Cieslik", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/format/nexus.py python-cogent-2020.12.21a+dfsg/src/cogent3/format/nexus.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/format/nexus.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/format/nexus.py 2020-12-20 23:35:03.000000000 +0000 @@ -4,23 +4,29 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" -def nexus_from_alignment(aln, seq_type, interleave_len=50): +def nexus_from_alignment(aln, seq_type, wrap=50, interleave_len=None): """returns a nexus formatted string Parameters ---------- seq_type dna, rna, or protein - interleave_len + wrap the line width + interleave_len + will be replaced by wrap in version 2021.6 """ + if interleave_len is not None: + cogent3.util.warning.deprecated("argument", "interleave_len", "wrap", "2021.6") + wrap = interleave_len if wrap == 50 else wrap + if aln.is_ragged(): raise ValueError( "Sequences in alignment are not all the same " @@ -40,13 +46,10 @@ names_seqs = sorted(aln.named_seqs.items()) while cur_ix < aln_len: nexus_out.extend( - [ - " %s %s" % (x, y[cur_ix : cur_ix + interleave_len]) - for x, y in names_seqs - ] + [" %s %s" % (x, y[cur_ix : cur_ix + wrap]) for x, y in names_seqs] ) nexus_out.append("") - cur_ix += interleave_len + cur_ix += wrap nexus_out.append(" ;\nend;") return "\n".join(nexus_out) diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/format/paml.py python-cogent-2020.12.21a+dfsg/src/cogent3/format/paml.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/format/paml.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/format/paml.py 2020-12-20 23:35:03.000000000 +0000 @@ -9,13 +9,12 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Jeremy Widmann", "Rob Knight", "Gavin Huttley", "Thomas La"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Thomas La" def alignment_to_paml(alignment_dict, block_size=60, order=None): - """Returns a Paml string given an alignment. - """ + """Returns a Paml string given an alignment.""" return PamlFormatter().format( alignment_dict, block_size, [] if order is None else order ) diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/format/phylip.py python-cogent-2020.12.21a+dfsg/src/cogent3/format/phylip.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/format/phylip.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/format/phylip.py 2020-12-20 23:35:03.000000000 +0000 @@ -9,15 +9,14 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley", "Thomas La"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" def alignment_to_phylip(alignment_dict, block_size=60, order=None): - """Returns a Phylip string given an alignment. - """ + """Returns a Phylip string given an alignment.""" return PhylipFormatter().format( alignment_dict, block_size, [] if order is None else order ) diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/format/table.py python-cogent-2020.12.21a+dfsg/src/cogent3/format/table.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/format/table.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/format/table.py 2020-12-20 23:35:03.000000000 +0000 @@ -13,14 +13,12 @@ import numpy -from cogent3.util.warning import discontinued - __author__ = "Gavin Huttley" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley", "Peter Maxwell", "Matthew Wakefield", "Jeremy Widmann"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -40,6 +38,26 @@ "tsv", ) +css_c3table_template = "\n".join( + ( + ".c3table table {margin: 10px 0;}", + ".c3table tr:last-child {border-bottom: 1px solid #000;} ", + ".c3table tr > th {text-align: left; padding: 0 5px;}", + ".c3table tr > td {text-align: left; padding: 5px;}", + ".c3table tr:nth-child(even) {background: #f7f7f7 !important;}", + ".c3table .ellipsis {background: rgba(0, 0, 0, .01);}", + ".c3table .index {background: %(colour)s; margin: 10px; font-weight: 600;}", + ".c3table .head_cell {background: %(head_colour)s; font-weight: bold; text-align: center;}", + ".c3table caption {color: rgb(250, 250, 250); background: " + "rgba(30, 140, 200, 1); padding: 3px; white-space: nowrap; " + "caption-side: top;}", + ".c3table .cell_title {font-weight: bold;}", + ".c3col_left { text-align: left !important; display: block;}", + ".c3col_right { text-align: right !important; display: block;}", + ".c3col_center { text-align: center !important; display: block;}", + ) +) + def _merged_cell_text_wrap(text, max_line_length, space): """ left justify wraps text into multiple rows""" @@ -320,8 +338,8 @@ forces wrapping of table onto successive lines if its' width exceeds that specified identifiers - column index for the last column that uniquely identify - rows. Required if table width exceeds max_width. + index for the column that uniquely identify rows. Required if table + width exceeds max_width. borders whether to display borders. space @@ -802,3 +820,252 @@ dmat += append_species(name, rows[i], mat_breaks) return "\n".join(dmat) + + +def get_continuation_tables_headers( + cols_widths, index_name=None, space=2, max_width=1e100 +): + """ + returns column headers for continuation tables segmented to not exceed max_width + + Parameters + ---------- + cols_widths : list + [[col_name, length of longest string], ...] + index_name : str + column name of an index. This column included in all sub table headers. + space : int + how much white space between columns + max_width : int + maximum width + + Returns + ------- + list of lists, each inner list is the column names for a subtable + """ + width_map = dict(cols_widths) + index_width = 0 if index_name is None else width_map[index_name] + for name, width in width_map.items(): + if index_width + width > max_width: + raise ValueError( + f"{index_name}={index_width} + {name} width={width} > max_width={max_width}" + ) + + if sum(v + space + index_width for _, v in cols_widths) < max_width: + return [[l for l, _ in cols_widths]] + + headers = [] + curr = [index_name] if index_name is not None else [] + cum_sum = index_width + for name, width in cols_widths: + if name == index_name: + continue + + cum_sum += space + width + if cum_sum > max_width: + headers.append(curr) + curr = [index_name, name] if index_name is not None else [name] + cum_sum = index_width + space + width + continue + + curr.append(name) + + headers.append(curr) + + return headers + + +class _MixedFormatter: + """handles formatting of mixed data types""" + + def __init__( + self, alignment, length, precision=4, float_type="f", missing_data=None + ): + self.missing_data = missing_data + self.length = length + self.alignment = alignment + self.precision = precision + self.float_type = float_type + + def __call__(self, val): + prefix = f"{self.alignment}{self.length}" + float_spec = f"{prefix}.{self.precision}{self.float_type}" + int_spec = f"{prefix}d" + result = str(val) + if self.missing_data is not None and not result: + return self.missing_data + + for fspec in (int_spec, float_spec, prefix): + try: + result = format(val, fspec) + break + except (TypeError, ValueError): + pass + + return result + + +def formatted_array( + series, + title="", + precision=4, + format_spec=None, + missing_data="", + pad=True, + align="r", +): + """converts elements in a numpy array series to an equal length string. + + Parameters + ---------- + series + the series of table rows + title + title of series + precision + number of decimal places. Can be overridden by following. + format_spec + format specification as per the python Format Specification, Mini-Language + or a callable function. + missing_data + default missing data value. + pad : bool + Whether to pad all strings to same length. If False, alignment setting is + ignored. + align : str + either 'l', 'c', 'r' for left, center or right alignment, Defaults to 'r'. + Only applied if pad==True + + Returns + ------- + list of formatted series, formatted title, maximum string length + + Notes + ----- + The precedence for formatting is format_spec supersedes pad, precision and + align values. + """ + assert isinstance(series, numpy.ndarray), "must be numpy array" + if pad and align.lower() not in set("lrc"): + raise ValueError(f"align value '{align}' not in 'l,c,r'") + + if pad: + align = {"l": "<", "c": "^", "r": ">"}[align] + + if callable(format_spec): + formatter = format_spec + format_spec = None + else: + formatter = None + + if format_spec and set(format_spec.strip()) <= set("<>^"): + # format_spec just an alignment character, in which case we assign + # that to align and reset format_spec as None so other formatting + # options have an effect + align = format_spec + format_spec = None + + if isinstance(format_spec, str): + format_spec = format_spec.replace("%", "") + + if not any([format_spec, formatter]): + type_name = series.dtype.name + if "int" in type_name: + base_format = "d" + elif "float" in type_name: + base_format = f".{precision}f" + elif "bool" == type_name: + base_format = "" + else: + # handle mixed types with a custom formatter + formatter = _MixedFormatter( + align, len(title), precision, missing_data=missing_data + ) + base_format = "" + + format_spec = base_format + + formatted = [] + max_length = len(title) + for i, v in enumerate(series): + if formatter: + v = formatter(v) + else: + try: + v = format(v, format_spec) + except (TypeError, ValueError): + # could be a python object + v = str(v) + + l = len(v) + if l > max_length: + max_length = l + + formatted.append(v) + + if not pad: + return formatted, title.strip(), max_length + + if format_spec: + match = re.search("[<>^]", format_spec[:2]) + final_align = align if match is None else match.group() + else: + final_align = align + + # now adjust to max_len + format_spec = f"{final_align}{max_length}s" + title = format(title, format_spec) + formatted = [format(v.strip(), format_spec) for v in formatted] + return formatted, title, max_length + + +class HtmlElement: + """wrapper for text to become a HTML element""" + + def __init__(self, text, tag, css_classes=None, newline=False): + """ + Parameters + ---------- + text : str + cell content + tag : str + html table cell tag, e.g. 'td', 'th' + classes : list + list of custom CSS classes + newline : bool + puts the open, close tags on new lines + """ + self.text = str(text) + self.tag = tag + css_classes = [css_classes] if isinstance(css_classes, str) else css_classes + self.css_classes = css_classes + self.newline = newline + + def __str__(self): + txt = self.text + classes = "" if self.css_classes is None else " ".join(self.css_classes) + classes = f' class="{classes}"' if classes else "" + nl = "\n" if self.newline else "" + return f"{nl}<{self.tag}{classes}>{nl}{txt}{nl}" + + def __repr__(self): + return repr(self.text) + + +def is_html_markup(text): + """checks if text contains balanced html markup + + body + """ + pattern = re.compile("(?<=[<])[a-z]+") + tokens = set(pattern.findall(text)) + if not tokens: + return False + + for token in tokens: + num_start = len(re.findall(f"<{token}", text)) + num_end = len(re.findall(f" [3, 1.5, 0, 1.5] @@ -1115,11 +1115,11 @@ def binary_dist_otu_gain(otumtx): - """ Calculates number of new OTUs observed in sample A wrt sample B + """Calculates number of new OTUs observed in sample A wrt sample B - This is an non-phylogenetic distance matrix analagous to unifrac_g. - The number of OTUs gained in each sample is computed with respect to - each other sample. + This is an non-phylogenetic distance matrix analagous to unifrac_g. + The number of OTUs gained in each sample is computed with respect to + each other sample. """ result = [] diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/geometry.py python-cogent-2020.12.21a+dfsg/src/cogent3/maths/geometry.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/geometry.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/maths/geometry.py 2020-12-20 23:35:03.000000000 +0000 @@ -42,7 +42,7 @@ "Helmut Simon", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Sandra Smit" __email__ = "sandra.smit@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/__init__.py python-cogent-2020.12.21a+dfsg/src/cogent3/maths/__init__.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/__init__.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/maths/__init__.py 2020-12-20 23:35:03.000000000 +0000 @@ -29,7 +29,7 @@ "Ben Kaehler", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/markov.py python-cogent-2020.12.21a+dfsg/src/cogent3/maths/markov.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/markov.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/maths/markov.py 2020-12-20 23:35:03.000000000 +0000 @@ -11,7 +11,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/matrix_exponential_integration.py python-cogent-2020.12.21a+dfsg/src/cogent3/maths/matrix_exponential_integration.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/matrix_exponential_integration.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/maths/matrix_exponential_integration.py 2020-12-20 23:35:03.000000000 +0000 @@ -19,7 +19,7 @@ __copyright__ = "Copyright 2007-2014, The Cogent Project" __credits__ = ["Ben Kaehler", "Von Bing Yap", "Gavin Huttley", "Ananias Iliadis"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Ben Kaehler" __email__ = "benjamin.kaehler@anu.edu.au" __status__ = "Production" @@ -97,7 +97,7 @@ def expected_number_subs(p0, Q, t): """returns the expected number of substitutions - + p0 initial state frequencies Q diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/matrix_exponentiation.py python-cogent-2020.12.21a+dfsg/src/cogent3/maths/matrix_exponentiation.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/matrix_exponentiation.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/maths/matrix_exponentiation.py 2020-12-20 23:35:03.000000000 +0000 @@ -16,14 +16,12 @@ from numpy.linalg import LinAlgError, eig, inv, solve -from cogent3.util.modules import ExpectedImportError, importVersionedModule - __author__ = "Peter Maxwell" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Zongzhi Liu"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -106,8 +104,7 @@ self.Q = Q def __call__(self, t=1.0): - """Compute the matrix exponential using Pade approximation of order q. - """ + """Compute the matrix exponential using Pade approximation of order q.""" A = self.Q * t M = A.shape[0] # Scale A so that norm is < 1/2 diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/matrix_logarithm.py python-cogent-2020.12.21a+dfsg/src/cogent3/maths/matrix_logarithm.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/matrix_logarithm.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/maths/matrix_logarithm.py 2020-12-20 23:35:03.000000000 +0000 @@ -18,9 +18,9 @@ __copyright__ = "Copyright 2007-2014, The Cogent Project" __credits__ = ["Rob Knight", "Gavin Huttley", "Von Bing Yap", "Ben Kaehler"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/measure.py python-cogent-2020.12.21a+dfsg/src/cogent3/maths/measure.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/measure.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/maths/measure.py 2020-12-20 23:35:03.000000000 +0000 @@ -13,7 +13,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/optimisers.py python-cogent-2020.12.21a+dfsg/src/cogent3/maths/optimisers.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/optimisers.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/maths/optimisers.py 2020-12-20 23:35:03.000000000 +0000 @@ -21,7 +21,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Andrew Butterfield", "Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/period_numba.py python-cogent-2020.12.21a+dfsg/src/cogent3/maths/period_numba.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/period_numba.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/maths/period_numba.py 2020-12-20 23:35:03.000000000 +0000 @@ -8,7 +8,7 @@ __copyright__ = "Copyright 2007-2019, The Cogent Project" __credits__ = ["Julien Epps", "Hua Ying", "Gavin Huttley", "Stephen Ma"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/period.py python-cogent-2020.12.21a+dfsg/src/cogent3/maths/period.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/period.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/maths/period.py 2020-12-20 23:35:03.000000000 +0000 @@ -21,7 +21,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Julien Epps", "Hua Ying", "Gavin Huttley", "Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -204,11 +204,11 @@ return_all=False, ): """Arguments: - - length: the length of signals to be encountered - - period: specified period at which to return the signal - - llim, ulim: the smallest, largest periods to evaluate - - return_all: whether to return the hybrid, ipdft, autocorr - statistics as a numpy array, or just the hybrid statistic + - length: the length of signals to be encountered + - period: specified period at which to return the signal + - llim, ulim: the smallest, largest periods to evaluate + - return_all: whether to return the hybrid, ipdft, autocorr + statistics as a numpy array, or just the hybrid statistic """ super(Hybrid, self).__init__(length, llim, ulim, period) self.ipdft = Ipdft(length, llim, ulim, period, abs_ft_sig) diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/scipy_optimisers.py python-cogent-2020.12.21a+dfsg/src/cogent3/maths/scipy_optimisers.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/scipy_optimisers.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/maths/scipy_optimisers.py 2020-12-20 23:35:03.000000000 +0000 @@ -10,7 +10,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/scipy_optimize.py python-cogent-2020.12.21a+dfsg/src/cogent3/maths/scipy_optimize.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/scipy_optimize.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/maths/scipy_optimize.py 2020-12-20 23:35:03.000000000 +0000 @@ -36,23 +36,19 @@ def max(m, axis=0): - """max(m,axis=0) returns the maximum of m along dimension axis. - """ + """max(m,axis=0) returns the maximum of m along dimension axis.""" m = asarray(m) return numpy.maximum.reduce(m, axis) def min(m, axis=0): - """min(m,axis=0) returns the minimum of m along dimension axis. - """ + """min(m,axis=0) returns the minimum of m along dimension axis.""" m = asarray(m) return numpy.minimum.reduce(m, axis) def is_array_scalar(x): - """Test whether `x` is either a scalar or an array scalar. - - """ + """Test whether `x` is either a scalar or an array scalar.""" return len(atleast_1d(x) == 1) @@ -60,7 +56,7 @@ pymin = builtins.min pymax = builtins.max -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" _epsilon = sqrt(numpy.finfo(float).eps) diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/simannealingoptimiser.py python-cogent-2020.12.21a+dfsg/src/cogent3/maths/simannealingoptimiser.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/simannealingoptimiser.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/maths/simannealingoptimiser.py 2020-12-20 23:35:03.000000000 +0000 @@ -22,7 +22,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley", "Andrew Butterfield", "Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -204,8 +204,7 @@ class SimulatedAnnealing(object): - """Simulated annealing optimiser for bounded functions - """ + """Simulated annealing optimiser for bounded functions""" def __init__(self, filename=None, interval=None, restore=True): """ diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/solve.py python-cogent-2020.12.21a+dfsg/src/cogent3/maths/solve.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/solve.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/maths/solve.py 2020-12-20 23:35:03.000000000 +0000 @@ -4,7 +4,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/stats/contingency.py python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/contingency.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/stats/contingency.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/contingency.py 2020-12-20 23:35:03.000000000 +0000 @@ -12,17 +12,12 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" -def _get_bin(bins, value): - """returns bin index corresponding to value""" - pass - - # todo this should probably go into different module def shuffled_matrix(matrix): """returns a randomly sampled matrix with same marginals""" @@ -43,21 +38,17 @@ # todo following functions should be moved into stats.test and replace # or merge with the older implementations -def calc_expected(observed, pseudo_count=0): +def calc_expected(observed): """returns the expected array from product of marginal frequencies""" - if pseudo_count and (observed == 0).any(): - observed = observed.copy() - observed += pseudo_count - - num_dim = len(observed.shape) - if num_dim == 2: + if observed.ndim == 1 or (observed.ndim == 2 and 1 in observed.shape): + expecteds = zeros(observed.shape, dtype=float) + expecteds.fill(observed.mean()) + elif observed.ndim == 2: rsum = observed.sum(axis=1) rfreq = rsum / rsum.sum() csum = observed.sum(axis=0) cfreq = csum / csum.sum() expecteds = outer(rfreq, cfreq) * rsum.sum() - elif num_dim == 1: - expecteds = [observed.mean()] * observed.shape[0] else: raise NotImplementedError("too many dimensions") return expecteds @@ -70,8 +61,22 @@ return stat -def calc_G(observed, expected, pseudo_count=0, williams=True): - """returns the G statistic for the two numpy arrays""" +def calc_G(observed, expected, williams=True): + """returns the G statistic for the two numpy arrays + + Parameters + ---------- + observed : numpy.ndarray + Observed counts + expected : numpy.ndarray + Expected values + williams : bool + Applies Williams correction for small sample size + + Returns + ------- + G statistic + """ num_dim = len(observed.shape) df = observed.shape[0] - 1 if num_dim == 2: @@ -116,18 +121,19 @@ return num_gt / num_reps -class _format_row_cell: - """class for handling html formatting of rows""" - - def __init__(self, row_labels): - self.row_labels = row_labels +def _astype(data, dtype): + """returns numpy array of correct type, raises TypeError if fails""" + converted = data.astype(dtype) + try: + assert_allclose( + converted.tolist(), + data.tolist(), + ) + except AssertionError: + msg = f"could not reliably be converted to {dtype} from dtype={data.dtype}" + raise TypeError(msg) - def __call__(self, val, row, col): - if val in self.row_labels: - result = f"{val}" - else: - result = f'{val}' - return result + return converted class CategoryCounts: @@ -142,7 +148,8 @@ """Parameters ------------- observed - a DictArray instance, or something that can be converted to one + a DictArray instance, or something that can be converted to one. + Values must be integers. expected provide in the case where you know the prior proportions, otherwise calculated from marginal frequencies @@ -150,96 +157,64 @@ if not isinstance(observed, DictArray): observed = DictArray(observed) + # make sure values are int + observed.array = _astype(observed.array, int) + if observed.array.sum() == 0: raise ValueError("at least one value must be > 0") - if expected: - expected = observed.template.wrap(expected) - - if observed.array.min() < 0 or expected and expected.array.min() < 0: + if observed.array.min() < 0: raise ValueError("negative values encountered") - if expected: - assert_allclose( - observed.array.sum(), expected.array.sum() - ), "unequal totals of observed and expected" + if observed.array.ndim > 2: + raise NotImplementedError("not designed for >2D") self._observed = observed - self._expected = expected + self.expected = expected self._residuals = None self._df = None self.shape = observed.shape def _get_repr_(self, html=False): + obs = self.observed.to_table() + obs.title = "Observed" + exp = self.expected.to_table() + exp.title = "Expected" + exp.digits = 2 + res = self.residuals.to_table() + res.title = "Residuals" + res.digits = 2 - obs = self.observed.array.tolist() - exp = self.expected.array.tolist() - res = self.residuals.array.tolist() - - ndim = len(self.observed.shape) + ndim = self.observed.array.ndim if ndim == 1: - row_labels = "Observed", "Expected", "Residuals" - row_cell_func = _format_row_cell(row_labels) - col_labels = [str(c) for c in self.observed.template.names[0]] - rows = [] - # format floats for expecteds and resid - for row_label, row in zip(row_labels, [obs, exp, res]): - if row_label == "Observed": - row = [row_label] + [f"{v:,}" for v in row] - else: - row = [row_label] + [f"{v:,.2f}" for v in row] - rows.append(row) - + result = obs.appended("", exp, res, title=None, digits=2) if html: - rows = rich_html( - rows, - header=[""] + col_labels, - row_cell_func=row_cell_func, - merge_identical=False, - ) + result.set_repr_policy(show_shape=False) + result = result._repr_html_() else: - header, rows = formatted_cells(rows, header=[""] + col_labels) - rows = simple_format(header, rows) + result, _, _ = result._get_repr_() + result = str(result) + return result + + result = [] + for t in (obs, exp, res): + t.set_repr_policy(show_shape=False) + if html: + t = t._repr_html_() + else: + t, _, _ = t._get_repr_() + t = str(t) - else: - row_labels = self.observed.template.names[0] - col_labels = self.observed.template.names[1] - row_cell_func = _format_row_cell(row_labels) - result = [] - for caption, table in zip( - ("Observed", "Expected", "Residuals"), (obs, exp, res) - ): - rows = [] - for i, r in enumerate(table): - if caption == "Observed": - r = [f"{v:,}" for v in r] - else: - r = [f"{v:,.2f}" for v in r] - rows.append([row_labels[i]] + r) - if html: - result.append( - rich_html( - rows, - header=[""] + col_labels, - caption=f"{caption}", - row_cell_func=row_cell_func, - merge_identical=False, - ) - ) - else: - header, rows = formatted_cells(rows, header=[""] + col_labels) - result.append(simple_format(header, rows, title=caption)) - joiner = "
" if html else "\n" - rows = joiner.join(result) - return rows + result.append(t) + + joiner = "
" if html else "\n" + return joiner.join(result) def _repr_html_(self): - result = self._get_repr_(html=True) - return result + return self._get_repr_(html=True) def __repr__(self): - result = self._get_repr_(html=False) - return result + return self._get_repr_(html=False) def __str__(self): return self._get_repr_(html=False) @@ -257,6 +232,24 @@ return self._expected + @expected.setter + def expected(self, expected): + if expected is None: + self._expected = None + return + + expected = self.observed.template.wrap(expected) + expected.array = _astype(expected.array, float) + + if expected.array.min() < 0: + raise ValueError("negative values encountered") + + assert_allclose( + self.observed.array.sum(), expected.array.sum() + ), "unequal totals of observed and expected" + + self._expected = expected + @property def residuals(self): if not self._residuals: @@ -305,28 +298,45 @@ ---------- pseudo_count : int added to observed to avoid zero division + williams : bool + Applies Williams correction for small sample size shuffled : int pvalue is estimated via resampling shuffled times from the observed data, preserving the marginals """ assert type(pseudo_count) == int, f"{pseudo_count} not an integer" + obs = self.observed + exp = self.expected + if pseudo_count and (obs.array == 0).any(): + obs = obs.template.wrap(obs.array + pseudo_count) + exp = calc_expected(obs.array) + exp = obs.template.wrap(exp) + assert type(shuffled) == int, f"{shuffled} not an integer" G = calc_G( - self.observed.array, - self.expected.array, - pseudo_count=pseudo_count, + obs.array, + exp.array, williams=williams, ) if not shuffled: pval = chisqprob(G, self.df) else: - pval = estimate_pval(self.observed.array, calc_G, num_reps=shuffled) + pval = estimate_pval(obs.array, calc_G, num_reps=shuffled) + title = "G-test for independence" + amendments = "" + if pseudo_count: + amendments = f"pseudo_count={pseudo_count}, " + if williams: - title = f"{title} (with Williams correction)" + amendments = f"{amendments}Williams correction" + + if amendments: + title = f"{title} (with {amendments})" + result = TestResult( - self.observed, - self.expected, + obs, + exp, self.residuals, "G", G, @@ -336,13 +346,15 @@ ) return result - def G_fit(self, pseudo_count=0, williams=True): - """performs the goodness-of-fit G test""" - assert type(pseudo_count) == int, f"{pseudo_count} not an integer" - obs = self.observed.array - if pseudo_count: - obs += pseudo_count + def G_fit(self, williams=True): + """performs the goodness-of-fit G test + Parameters + ---------- + williams : bool + Applies Williams correction for small sample size + """ + obs = self.observed.array G, pval = G_fit(obs.flatten(), self.expected.array.flatten(), williams=williams) title = "G-test goodness-of-fit" if williams: @@ -406,18 +418,25 @@ setattr(self, stat_name, stat) def _get_repr_(self): + from cogent3.util.table import Table + header = [str(self.stat_name), "df", "pvalue"] - if self.pvalue > 1e-3: - pval = f"{self.pvalue:.4f}" - else: - pval = f"{self.pvalue:.4e}" - rows = [[f"{self.stat:.3f}", f"{self.df}", pval]] - return header, rows + col_templates = { + str(self.stat_name): "%.3f", + "df": "%s", + "pvalue": "%.4f" if self.pvalue > 1e-3 else "%.2e", + } + table = Table( + header, + [[self.stat, self.df, self.pvalue]], + title=self.test_name, + column_templates=col_templates, + ) + table.set_repr_policy(show_shape=False) + return table def __repr__(self): - h, r = self._get_repr_() - h, r = formatted_cells(r, header=h) - result = simple_format(h, r, title=self.test_name) + result = str(self._get_repr_()) components = CategoryCounts( self.observed.to_dict(), expected=self.expected.to_dict() ) @@ -428,12 +447,16 @@ return repr(self) def _repr_html_(self): - from cogent3.util.table import Table - - h, r = self._get_repr_() - table = Table(h, r, title=self.test_name) + table = self._get_repr_() + table.set_repr_policy(show_shape=False) components = CategoryCounts( self.observed.to_dict(), expected=self.expected.to_dict() ) - html = [table._repr_html_(include_shape=False), components._repr_html_()] + html = [table._repr_html_()] + html.append(components._repr_html_()) return "\n".join(html) + + @property + def statistics(self): + """returns Table of stat, df and p-value""" + return self._get_repr_() diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/stats/distribution.py python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/distribution.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/stats/distribution.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/distribution.py 2020-12-20 23:35:03.000000000 +0000 @@ -4,7 +4,7 @@ """ from numpy import arctan as atan -from numpy import exp, sqrt +from numpy import array, exp, sqrt from cogent3.maths.stats.special import ( MACHEP, @@ -33,9 +33,9 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight", "Sandra Smit", "Gavin Huttley", "Daniel McDonald"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" incbet = betai # shouldn't have renamed it... @@ -408,16 +408,14 @@ def gdtr(a, b, x): - """Returns integral from 0 to x of Gamma distribution with params a and b. - """ + """Returns integral from 0 to x of Gamma distribution with params a and b.""" if x < 0.0: raise ZeroDivisionError("x must be at least 0.") return igam(b, a * x) def gdtrc(a, b, x): - """Returns integral from x to inf of Gamma distribution with params a and b. - """ + """Returns integral from x to inf of Gamma distribution with params a and b.""" if x < 0.0: raise ZeroDivisionError("x must be at least 0.") return igamc(b, a * x) @@ -533,3 +531,56 @@ w = incbi(0.5 * a, 0.5 * b, 1.0 - y) x = b * w / (a * (1.0 - w)) return x + + +def probability_points(n): + """return series of n probabilities + + Returns + ------- + Numpy array of probabilities + + Notes + ----- + Useful for plotting probability distributions + """ + assert n > 0, f"{n} must be > 0" + adj = 0.5 if n > 10 else 3 / 8 + denom = n if n > 10 else n + 1 - 2 * adj + return array([(i - adj) / denom for i in range(1, n + 1)]) + + +def theoretical_quantiles(n, dist, *args): + """returns theoretical quantiles from dist + + Parameters + ---------- + n : int + number of elements + dist : str + one of 'normal', 'chisq', 't', 'uniform' + + Returns + ------- + Numpy array of quantiles + """ + dist = dist.lower() + funcs = dict( + normal=ndtri, + chisq=chdtri, + t=stdtri, + ) + + if dist != "uniform" and dist not in funcs: + raise ValueError(f"'{dist} not in {list(funcs)}") + + probs = probability_points(n) + if dist == "uniform": + return probs + + func = funcs[dist] + + if not args: + return array([func(p) for p in probs]) + + return array([func(*(args + (p,))) for p in probs]) diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/stats/information_criteria.py python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/information_criteria.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/stats/information_criteria.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/information_criteria.py 2020-12-20 23:35:03.000000000 +0000 @@ -5,7 +5,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/stats/__init__.py python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/__init__.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/stats/__init__.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/__init__.py 2020-12-20 23:35:03.000000000 +0000 @@ -29,7 +29,7 @@ "Micah Hamady", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/stats/jackknife.py python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/jackknife.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/stats/jackknife.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/jackknife.py 2020-12-20 23:35:03.000000000 +0000 @@ -7,7 +7,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Anuj Pahwa", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/stats/kendall.py python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/kendall.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/stats/kendall.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/kendall.py 2020-12-20 23:35:03.000000000 +0000 @@ -16,7 +16,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley", "Daniel McDonald"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/stats/ks.py python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/ks.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/stats/ks.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/ks.py 2020-12-20 23:35:03.000000000 +0000 @@ -28,7 +28,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/stats/number.py python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/number.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/stats/number.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/number.py 2020-12-20 23:35:03.000000000 +0000 @@ -10,7 +10,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -145,6 +145,54 @@ data = numpy.array(data, dtype=int) return data + def to_dictarray(self): + """construct fully enumerated dictarray + + Returns + ------- + DictArray with dtype of int + + Notes + ----- + Unobserved combinations have zeros. Result can can be indexed as if it was a numpy array using key values + """ + from itertools import product + + from cogent3.util.dict_array import DictArrayTemplate + + key = next(iter(self)) + try: + ndim = 1 if isinstance(key, str) else len(key) + except TypeError: + ndim = 1 + + if ndim == 1: + names = sorted(self) + vals = [self[n] for n in names] + darr = DictArrayTemplate(names).wrap(vals, dtype=int) + return darr + + categories = [sorted(set(labels)) for labels in zip(*self)] + shape = tuple(len(c) for c in categories) + darr = DictArrayTemplate(*categories).wrap(numpy.zeros(shape, dtype=int)) + for comb in product(*categories): + indices = [[categories[i].index(c)] for i, c in enumerate(comb)] + darr.array[indices] = self[comb] + + return darr + + def to_categorical(self): + """create CategoryCount object + + Notes + ----- + Supports only 1 or dimensional data + """ + from cogent3.maths.stats.contingency import CategoryCounts + + darr = self.to_dictarray() + return CategoryCounts(darr) + def to_table(self, column_names=None, **kwargs): """converts to Table diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/stats/period.py python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/period.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/stats/period.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/period.py 2020-12-20 23:35:03.000000000 +0000 @@ -17,7 +17,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Julien Epps", "Hua Ying", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/stats/special.py python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/special.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/stats/special.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/special.py 2020-12-20 23:35:03.000000000 +0000 @@ -10,9 +10,9 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley", "Rob Knight", "Sandra Smit", "Daniel McDonald"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" log_epsilon = 1e-6 # for threshold in log/exp close to 1 @@ -53,8 +53,7 @@ def log_one_minus(x): - """Returns natural log of (1-x). Useful for probability calculations. - """ + """Returns natural log of (1-x). Useful for probability calculations.""" if abs(x) < log_epsilon: return -x else: @@ -62,8 +61,7 @@ def one_minus_exp(x): - """Returns 1-exp(x). Useful for probability calculations. - """ + """Returns 1-exp(x). Useful for probability calculations.""" if abs(x) < log_epsilon: return -x else: @@ -744,6 +742,10 @@ """Returns the gamma function, a generalization of the factorial. See Cephes docs for details.""" + if hasattr(x, "item"): + # avoid issue of x being a limited precision numpy type + # use item() method casts to the nearest Python type + x = x.item() sgngam = 1 q = abs(x) diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/stats/test.py python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/test.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/stats/test.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/maths/stats/test.py 2020-12-20 23:35:03.000000000 +0000 @@ -4,19 +4,15 @@ import warnings -from operator import add -from random import choice - from numpy import ( absolute, + allclose, any, arctanh, array, asarray, - clip, concatenate, corrcoef, - cov, exp, fabs, isinf, @@ -25,9 +21,10 @@ mean, ) from numpy import median as _median -from numpy import nan, nonzero, ones, ravel, reshape, sqrt +from numpy import nonzero, ones, ravel, sqrt +from numpy import std as _std from numpy import sum as npsum -from numpy import take, tanh, trace, transpose, zeros +from numpy import take, tanh, trace, zeros from numpy.random import permutation, randint from cogent3.maths.stats.distribution import ( @@ -55,6 +52,7 @@ log_one_minus, one_minus_exp, ) +from cogent3.util.warning import discontinued __author__ = "Rob Knight" @@ -71,48 +69,47 @@ "Michael Dwan", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" -class IndexOrValueError(IndexError, ValueError): - pass - - -var = cov # cov will calculate variance if called on a vector +# defining globals for the alternate hypotheses +ALT_TWO_SIDED = "2" +ALT_LOW = "low" +ALT_HIGH = "high" -def std_(x, axis=None): - """Returns standard deviations by axis (similiar to numpy.std) +def _get_alternate(value: str): + """identify the alternate hypothesis - The result is unbiased, matching the result from MLab.std - """ - x = asarray(x) + Parameters + ---------- + value : str + the input alternate - if axis is None: - d = x - mean(x) - return sqrt(npsum(d ** 2) / (len(x) - 1)) - elif axis == 0: - result = [] - for col in range(x.shape[1]): - vals = x[:, col] - d = vals - mean(vals) - result.append(sqrt(npsum(d ** 2) / (len(x) - 1))) - return result - elif axis == 1: - result = [] - for row in range(x.shape[0]): - vals = x[row, :] - d = vals - mean(vals) - result.append(sqrt(npsum(d ** 2) / (len(x) - 1))) - return result - else: - raise ValueError("axis out of bounds") + Returns + ------- + the corresponding global ALT_LOW, ALT_HIGH, ALT_TWO_SIDED + """ + value = value.lower() + alts = {k: ALT_LOW for k in ("less", "lo", "low", "lower", "l", "lt", "<")} + alts.update({k: ALT_HIGH for k in ("greater", "hi", "high", "h", "g", "gt", ">")}) + alts.update( + { + k: ALT_TWO_SIDED + for k in ("two sided", "2", "two tailed", "two", "two.sided", "ts") + } + ) + alt = alts.get(value, None) + if alt is None: + raise ValueError(f"alternate hypothesis '{value}' not in {sorted(alts)}") + return alt -# tested only by std +class IndexOrValueError(IndexError, ValueError): + pass def var(x, axis=None): @@ -140,7 +137,7 @@ return sample_SS / (n - 1) -def std(x, axis=None): +def std(x, axis=None): # pragma: no cover """computed unbiased standard deviations along given axis or flat array. Similar with numpy.std, except that it is unbiased. (var = SS/n-1) @@ -156,12 +153,13 @@ return sqrt(sample_variance) -def median(m, axis=None): +def median(m, axis=None): # pragma: no cover """Returns medians by axis (similiar to numpy.mean) numpy.median does not except an axis parameter. Is safe for substition for numpy.median """ + discontinued("function", "median", "2021.11") median_vals = [] rows, cols = m.shape @@ -439,20 +437,27 @@ def t_paired(a, b, tails=None, exp_diff=0): """Returns t and prob for TWO RELATED samples of scores a and b. - From Sokal and Rohlf (1995), p. 354. - Calculates the vector of differences and compares it to exp_diff - using the 1-sample t test. - - Usage: t, prob = t_paired(a, b, tails, exp_diff) - - t is a float; prob is a probability. - a and b should be equal-length lists of paired observations (numbers). - tails should be None (default), 'high', or 'low'. - exp_diff should be the expected difference in means (a-b); 0 by default. + Parameters + ---------- + a, b + equal length lists of paired observations (numbers). + tails + tails should be None (default), 'high', or 'low'. + exp_diff + The expected difference in means (a-b); 0 by default. + + Returns + ------- + t, prob + + Notes + ----- + From Sokal and Rohlf (1995), p. 354. Calculates the vector of differences + and compares it to exp_diff using the 1-sample t test. """ n = len(a) if n != len(b): - raise ValueError("Unequal length lists in ttest_paired.") + raise ValueError("Unequal length lists in t_paired.") try: diffs = array(a) - array(b) return t_one_sample(diffs, popmean=exp_diff, tails=tails) @@ -474,11 +479,10 @@ t is a float; prob is a probability. a should support Mean, StandardDeviation, and Count. popmean should be the expected mean; 0 by default. - tails should be None (default), 'high', or 'low'. -""" + tails should be None (default), 'high', or 'low'.""" try: n = len(a) - t = (mean(a) - popmean) / (std(a) / sqrt(n)) + t = (mean(a) - popmean) / (_std(a, ddof=1) / sqrt(n)) except ( ZeroDivisionError, ValueError, @@ -497,18 +501,18 @@ def t_two_sample(a, b, tails=None, exp_diff=0, none_on_zero_variance=True): """Returns t, prob for two INDEPENDENT samples of scores a, and b. - From Sokal and Rohlf, p 223. - - Usage: t, prob = t_two_sample(a,b, tails, exp_diff) - - t is a float; prob is a probability. - a and b should be sequences of observations (numbers). Need not be equal - lengths. - tails should be None (default), 'high', or 'low'. - exp_diff should be the expected difference in means (a-b); 0 by default. - none_on_zero_variance: if True, will return (None,None) if both a and b - have zero variance (e.g. a=[1,1,1] and b=[2,2,2]). If False, the - following values will be returned: + Parameters + ---------- + a, b + equal length lists of paired observations (numbers). + tails + tails should be None (default), 'high', or 'low'. + exp_diff + The expected difference in means (a-b); 0 by default. + none_on_zero_variance + if True, will return (None,None) if both a and b have zero variance + (e.g. a=[1,1,1] and b=[2,2,2]). If False, the following values will + be returned: Two-tailed test (tails=None): a < b: (-inf,0.0) @@ -524,11 +528,17 @@ If a and b both have no variance and have the same single value (e.g. a=[1,1,1] and b=[1,1,1]), (None,None) will always be returned. + + Returns + ------- + t, prob + + Notes + ----- + From Sokal and Rohlf, p 223. """ - if tails is not None and tails != "high" and tails != "low": - raise ValueError( - "Invalid tail type '%s'. Must be either None, " "'high', or 'low'." % tails - ) + tails = tails or "2" + tails = _get_alternate(str(tails)) try: # see if we need to back off to the single-observation for single-item @@ -604,17 +614,15 @@ def _t_test_no_variance(mean1, mean2, tails): """Handles case where two distributions have no variance.""" - if tails is not None and tails != "high" and tails != "low": - raise ValueError( - "Invalid tail type '%s'. Must be either None, " "'high', or 'low'." % tails - ) + tails = tails or "2" + tails = _get_alternate(str(tails)) - if tails is None: + if tails == ALT_TWO_SIDED: if mean1 < mean2: result = (float("-inf"), 0.0) else: result = (float("inf"), 0.0) - elif tails == "high": + elif tails == ALT_HIGH: if mean1 < mean2: result = (float("-inf"), 1.0) else: @@ -655,10 +663,8 @@ exp_diff - the expected difference in means (x_items - y_items) """ - if tails is not None and tails != "high" and tails != "low": - raise ValueError( - "Invalid tail type '%s'. Must be either None, " "'high', or 'low'." % tails - ) + tails = tails or "2" + tails = _get_alternate(str(tails)) if permutations < 0: raise ValueError( "Invalid number of permutations: %d. Must be greater " @@ -701,11 +707,11 @@ ] # Compute nonparametric p-value based on the permuted t-test results. - if tails is None: + if tails == ALT_TWO_SIDED: better = (absolute(array(perm_t_stats)) >= absolute(obs_t)).sum() - elif tails == "low": + elif tails == ALT_LOW: better = (array(perm_t_stats) <= obs_t).sum() - elif tails == "high": + elif tails == ALT_HIGH: better = (array(perm_t_stats) >= obs_t).sum() nonparam_p_val = (better + 1) / (permutations + 1) @@ -748,7 +754,7 @@ """ try: sample_mean = mean(sample) - sample_std = std(sample) + sample_std = _std(sample, ddof=1) if sample_std == 0: # The list does not vary. @@ -820,6 +826,8 @@ ) except (ZeroDivisionError, ValueError, FloatingPointError): # no variation r = 0.0 + + r = 0.0 if isnan(r) else r # check we didn't get a naughty value for r due to rounding error if r > 1.0: r = 1.0 @@ -831,16 +839,19 @@ def spearman(x_items, y_items): """Returns Spearman's rho. + Parameters + ---------- + x_items + the first list of observations + y_items + the second list of observations + + Notes + ----- This will always be a value between -1.0 and +1.0. x_items and y_items must be the same length, and cannot have fewer than 2 elements each. If one or both of the input vectors do not have any variation, the return value will be 0.0. - - Parameters - ---------- - x_items - the first list of observations - y_items - the second list of observations - """ x_items, y_items = array(x_items), array(y_items) @@ -912,6 +923,49 @@ return ranks, ties +def pearson_correlation(x, y, tails=None): + """Computes the Pearson correlation between two vectors and its significance. + + Parameters + ---------- + x + the first list of observations + y + the second list of observations + tails + if None (the default), a two-sided test is performed. 'high' for a + one-tailed test for positive association, or 'low' for a one-tailed + test for negative association. + + Returns + ------- + Rho, pvalue + + Notes + ----- + Computes a parametric p-value by using Student's t-distribution with df=n-2 + to perform the test of significance. + """ + assert len(x) == len(y), f"unequal lengths of x ({len(x)}) and y ({len(y)})" + n = len(x) + tails = tails or "2" + tails = _get_alternate(str(tails)) + + # Calculate the correlation coefficient. + rho = pearson(x, y) + if allclose(rho, 1.0): + return rho, 0 + + df = n - 2 + if n < 3: + pvalue = 1 + else: + t = rho / sqrt((1 - (rho * rho)) / df) + pvalue = t_tailed_prob(t, df, tails) + + return rho, pvalue + + def correlation(x_items, y_items): """Returns Pearson correlation between x and y, and its significance. @@ -935,6 +989,33 @@ ): """Computes the correlation between two vectors and its significance. + Parameters + ---------- + x_items + the first list of observations + y_items + the second list of observations + method : str + 'pearson' or 'spearman' + tails + if None (the default), a two-sided test is performed. 'high' for a + one-tailed test for positive association, or 'low' for a one-tailed + test for negative association. This parameter affects both the + parametric and nonparametric tests, but the confidence interval + will always be two-sided permutations - the number of permutations + to use in the nonparametric test. Must be a number greater than or + equal to 0. If 0, the nonparametric test will not be performed. In + this case, the list of correlation coefficients obtained from + permutations will be empty, and the nonparametric p-value will be None + permutations : int + Permutes one of the input vectors theis number of times. Used in the + nonparametric test. + confidence_level + the confidence level to use when constructing the + confidence interval. Must be between 0 and 1 (exclusive) + + Notes + ----- Computes a parametric p-value by using Student's t-distribution with df=n-2 to perform the test of significance, as well as a nonparametric p-value obtained by permuting one of the input vectors the specified number of @@ -943,9 +1024,6 @@ greater than 3. Please see Sokal and Rohlf pp. 575-580 and pg. 598-601 for more details regarding these techniques. - Warning: the parametric p-value is unreliable when the method is spearman - and there are less than 11 observations in each vector. - Returns the correlation coefficient (r or rho), the parametric p-value, a list of the r or rho values obtained from permuting the input, the nonparametric p-value, and a tuple for the confidence interval, with the @@ -958,27 +1036,13 @@ elements each. If one or both of the input vectors do not have any variation, r or rho will be 0.0. - Note: the parametric portion of this function is based on the correlation + The parametric portion of this function is based on the correlation function in this module. - Parameters - ---------- - x_items - the first list of observations - y_items - the second list of observations - method - 'pearson' or 'spearman' - tails - if None (the default), a two-sided test is performed. 'high' - for a one-tailed test for positive association, or 'low' for a - one-tailed test for negative association. This parameter affects - both the parametric and nonparametric tests, but the confidence - interval will always be two-sided - permutations - the number of permutations to use in the nonparametric - test. Must be a number greater than or equal to 0. If 0, the - nonparametric test will not be performed. In this case, the list of - correlation coefficients obtained from permutations will be empty, - and the nonparametric p-value will be None - confidence_level - the confidence level to use when constructing the - confidence interval. Must be between 0 and 1 (exclusive) - + Warning + ------- + The parametric p-value is unreliable when the method is spearman + and there are less than 11 observations in each vector. """ # Perform some initial error checking. if method == "pearson": @@ -1074,14 +1138,8 @@ def correlation_matrix(series, as_rows=True): - """Returns pairwise correlations between each pair of series. - """ + """Returns pairwise correlations between each pair of series.""" return corrcoef(series, rowvar=as_rows) - # unused codes below - if as_rows: - return corrcoef(transpose(array(series))) - else: - return corrcoef(array(series)) def regress(x, y): @@ -1167,7 +1225,7 @@ def stdev_from_mean(x): """returns num standard deviations from the mean of each val in x[]""" x = array(x) - return (x - mean(x)) / std(x) + return (x - x.mean()) / x.std(ddof=1) def regress_major(x, y): @@ -1197,17 +1255,26 @@ def z_test(a, popmean=0, popstdev=1, tails=None): """Returns z and probability score for a single sample of items. -Calculates the z-score on ONE sample of items with mean x, given a population -mean and standard deviation (parametric). - -Usage: z, prob = z_test(a, popmean, popstdev, tails) - -z is a float; prob is a probability. -a is a sample with Mean and Count. -popmean should be the parametric population mean; 0 by default. -popstdev should be the parametric population standard deviation, 1 by default. -tails should be None (default), 'high', or 'low'. -""" + Parameters + ---------- + a + list of observations (numbers). + popmean + the parametric population mean; 0 by default + popstdev + the parametric population standard deviation, 1 by default. + tails + tails should be None (default), 'high', or 'low'. + + Returns + ------- + z, prob + + Notes + ----- + Calculates the z-score on ONE sample of items with mean x, given a + population mean and standard deviation (parametric). + """ try: z = (mean(a) - popmean) / popstdev * sqrt(len(a)) return z, z_tailed_prob(z, tails) @@ -1233,9 +1300,12 @@ def t_tailed_prob(t, df, tails): """Return appropriate p-value for given t and df, depending on tails.""" - if tails == "high": + tails = tails or "2" + tails = _get_alternate(str(tails)) + + if tails == ALT_HIGH: return t_high(t, df) - elif tails == "low": + elif tails == ALT_LOW: return t_low(t, df) else: return tprob(t, df) @@ -1243,17 +1313,19 @@ def reverse_tails(tails): """Swaps high for low or vice versa, leaving other values alone.""" - if tails == "high": - return "low" - elif tails == "low": - return "high" + tails = tails or "2" + tails = _get_alternate(str(tails)) + + if tails == ALT_HIGH: + return ALT_LOW + elif tails == ALT_LOW: + return ALT_HIGH else: return tails def tail(prob, test): - """If test is true, returns prob/2. Otherwise returns 1-(prob/2). - """ + """If test is true, returns prob/2. Otherwise returns 1-(prob/2).""" prob /= 2 if test: return prob @@ -1263,8 +1335,7 @@ # todo delete, now from itertools.combinations def combinations(n, k): - """Returns the number of ways of choosing k items from n. - """ + """Returns the number of ways of choosing k items from n.""" return exp(lgam(n + 1) - lgam(k + 1) - lgam(n - k + 1)) @@ -1338,10 +1409,13 @@ This implementation returns the same results as the F test in R. """ + tails = tails or "2" + tails = _get_alternate(str(tails)) + dfn, dfd, F = f_value(a, b) - if tails == "low": + if tails == ALT_LOW: return dfn, dfd, F, f_low(dfn, dfd, F) - elif tails == "high": + elif tails == ALT_HIGH: return dfn, dfd, F, f_high(dfn, dfd, F) else: if var(a) >= var(b): @@ -1398,16 +1472,19 @@ low = look for smaller values than expected by chance high = look for larger values than expected by chance """ + tail = tail or "2" + tail = _get_alternate(str(tail)) + pop_size = len(rand_values) rand_values.sort() - if tail == "high": + if tail == ALT_HIGH: num_better = pop_size for i, curr_val in enumerate(rand_values): if value <= curr_val: num_better = i break p_val = 1 - (num_better / pop_size) - elif tail == "low": + elif tail == ALT_LOW: num_better = pop_size for i, curr_val in enumerate(rand_values): if value < curr_val: @@ -1422,31 +1499,26 @@ Parameters ---------- - success + success : int the number of successes - trials + trials : int the number of trials alt the alternate hypothesis, one of 'less', 'greater', 'two sided' (default). - """ - lo = ["less", "lo", "lower", "l"] - hi = ["greater", "hi", "high", "h", "g"] - two = ["two sided", "2", 2, "two tailed", "two"] - alt = alt.lower().strip() - if alt in lo: + alt = _get_alternate(str(alt)) + if alt == ALT_LOW: p = binomial_low(success, trials, 0.5) - elif alt in hi: + elif alt == ALT_HIGH: success -= 1 p = binomial_high(success, trials, 0.5) - elif alt in two: + else: success = min(success, trials - success) hi = 1 - binomial_high(success, trials, 0.5) lo = binomial_low(success, trials, 0.5) p = hi + lo - else: - raise RuntimeError("alternate [%s] not in %s" % (lo + hi + two)) + return p @@ -1465,29 +1537,24 @@ warns when values are tied. This should left at True unless a monte carlo variant, like ks_boot, is being used. - Note the 1-sample cases are not implemented, although their cdf's are - implemented in ks.py""" - # translation from R 2.4 + Notes + ----- + Translated from R 2.4. The 1-sample cases are not implemented, although + their cdf's are implemented in ks.py + """ + alt = _get_alternate(str(alt)) + num_x = len(x) - num_y = None x = list(zip(x, zeros(len(x), int))) - lo = ["less", "lo", "low", "lower", "l", "lt"] - hi = ["greater", "hi", "high", "h", "g", "gt"] - two = ["two sided", "2", 2, "two tailed", "two", "two.sided"] Pval = None if y is not None: # in anticipation of actually implementing the 1-sample cases num_y = len(y) y = list(zip(y, ones(len(y), int))) n = num_x * num_y / (num_x + num_y) combined = x + y - if len(set(combined)) < num_x + num_y: - ties = True - else: - ties = False - + ties = len(set(combined)) < num_x + num_y combined = array(combined, dtype=[("stat", float), ("sample", int)]) combined.sort(order="stat") - cumsum = zeros(combined.shape[0], float) scales = array([1 / num_x, -1 / num_y]) indices = combined["sample"] cumsum = scales.take(indices) @@ -1495,25 +1562,25 @@ if exact is None: exact = num_x * num_y < 1e4 - if alt in two: + if alt == ALT_TWO_SIDED: stat = max(fabs(cumsum)) - elif alt in lo: + elif alt == ALT_LOW: stat = -cumsum.min() - elif alt in hi: + elif alt == ALT_HIGH: stat = cumsum.max() else: raise RuntimeError("Unknown alt: %s" % alt) - if exact and alt in two and not ties: + if exact and alt == ALT_TWO_SIDED and not ties: Pval = 1 - psmirnov2x(stat, num_x, num_y) else: raise NotImplementedError if Pval is None: - if alt in two: - Pval = 1 - pkstwo(sqrt(n) * stat) - else: - Pval = exp(-2 * n * stat ** 2) - + Pval = ( + 1 - pkstwo(sqrt(n) * stat) + if alt == ALT_TWO_SIDED and not ties + else exp(-2 * n * stat ** 2) + ) if ties and warn_for_ties: warnings.warn("Cannot compute correct KS probability with ties") @@ -1529,7 +1596,7 @@ combined = array(list(x) + list(y)) total_obs = len(combined) num_x = len(x) - for i in range(num_reps): + for _ in range(num_reps): # sampling with replacement indices = randint(0, total_obs, total_obs) sampled = combined.take(indices) @@ -1550,18 +1617,17 @@ alt alternate hypothesis, as per ks_test num_reps - number of replicates for the bootstrap + number of replicates for the bootstrap + Notes + ----- + Based on the ks_boot method in the R Matching package, see + http://sekhon.berkeley.edu/matching/ + One important difference is I preserve the original sample sizes + instead of making them equal. """ - # based on the ks_boot method in the R Matching package - # see http://sekhon.berkeley.edu/matching/ - # One important difference is I preserve the original sample sizes - # instead of making them equal tol = MACHEP * 100 - combined = array(list(x) + list(y)) observed_stat, _p = ks_test(x, y, exact=False, warn_for_ties=False) - total_obs = len(combined) - num_x = len(x) num_greater = 0 for sampled_x, sampled_y in _get_bootstrap_sample(x, y, num_reps): sample_stat, _p = ks_test( @@ -1573,8 +1639,7 @@ def _average_rank(start_rank, end_rank): - ave_rank = npsum(range(start_rank, end_rank + 1)) / (1 + end_rank - start_rank) - return ave_rank + return npsum(range(start_rank, end_rank + 1)) / (1 + end_rank - start_rank) def mw_test(x, y): @@ -1647,13 +1712,12 @@ num_reps number of replicates for the bootstrap + Notes + ----- Uses the same Monte-Carlo resampling code as kw_boot """ tol = MACHEP * 100 - combined = array(list(x) + list(y)) observed_stat, obs_p = mw_test(x, y) - total_obs = len(combined) - num_x = len(x) num_greater = 0 for sampled_x, sampled_y in _get_bootstrap_sample(x, y, num_reps): sample_stat, sample_p = mw_test(sampled_x, sampled_y) @@ -1665,10 +1729,6 @@ def permute_2d(m, p): """Performs 2D permutation of matrix m according to p.""" return m[p][:, p] - # unused below - m_t = transpose(m) - r_t = take(m_t, p, axis=0) - return take(transpose(r_t), p, axis=0) def mantel(m1, m2, n): @@ -1694,34 +1754,36 @@ Returns the p-value, Mantel correlation statistic, and a list of Mantel correlation statistics for each permutation test. - WARNING: The two distance matrices must be symmetric, hollow distance - matrices, as only the lower triangle (excluding the diagonal) will be used - in the calculations (matching R's vegan::mantel function). - Parameters ---------- - m1 - the first distance matrix to use in the test (should be a numpy + m1 + the first distance matrix to use in the test (should be a numpy array or convertible to a numpy array) - m2 - the second distance matrix to use in the test (should be a numpy + m2 + the second distance matrix to use in the test (should be a numpy array or convertible to a numpy array) - n - the number of permutations to test when calculating the p-value - alt - the type of alternative hypothesis to test (can be either + n + the number of permutations to test when calculating the p-value + alt + the type of alternative hypothesis to test (can be either 'two sided' for a two-sided test, 'greater' or 'less' for one-sided tests) - suppress_symmetry_and_hollowness_check - by default, the input distance + suppress_symmetry_and_hollowness_check + by default, the input distance matrices will be checked for symmetry and hollowness. It is recommended to leave this check in place for safety, as the check is fairly fast. However, if you *know* you have symmetric and hollow distance matrices, you can disable this check for small performance gains on extremely large distance matrices + Warnings + -------- + The two distance matrices must be symmetric, hollow distance + matrices, as only the lower triangle (excluding the diagonal) will be used + in the calculations (matching R's vegan::mantel function). """ # Perform some sanity checks on our input. - if alt not in ("two sided", "greater", "less"): - raise ValueError( - "Unrecognized alternative hypothesis. Must be either " - "'two sided', 'greater', or 'less'." - ) + alt = _get_alternate(str(alt)) m1, m2 = asarray(m1), asarray(m2) if m1.shape != m2.shape: raise ValueError("Both distance matrices must be the same size.") @@ -1729,9 +1791,10 @@ raise ValueError( "The number of permutations must be greater than or " "equal to one." ) - if not suppress_symmetry_and_hollowness_check: - if not (is_symmetric_and_hollow(m1) and is_symmetric_and_hollow(m2)): - raise ValueError("Both distance matrices must be symmetric and " "hollow.") + if not suppress_symmetry_and_hollowness_check and not ( + is_symmetric_and_hollow(m1) and is_symmetric_and_hollow(m2) + ): + raise ValueError("Both distance matrices must be symmetric and " "hollow.") # Get a flattened list of lower-triangular matrix elements (excluding the # diagonal) in column-major order. Use these values to calculate the @@ -1743,17 +1806,17 @@ size = len(m1) better = 0 perm_stats = [] - for i in range(n): + for _ in range(n): perm = permute_2d(m1, permutation(size)) perm_flat = _flatten_lower_triangle(perm) r = pearson(perm_flat, m2_flat) - if alt == "two sided": + if alt == ALT_TWO_SIDED: if abs(r) >= abs(orig_stat): better += 1 else: - if (alt == "greater" and r >= orig_stat) or ( - alt == "less" and r <= orig_stat + if (alt == ALT_HIGH and r >= orig_stat) or ( + alt == ALT_LOW and r <= orig_stat ): better += 1 perm_stats.append(r) @@ -1786,11 +1849,7 @@ def kendall_correlation(x, y, alt="two sided", exact=None, warn=True): """returns the statistic (tau) and probability from Kendall's non-parametric - test of association that tau==0. Uses the large sample approximation when - len(x) >= 50 or when there are ties, otherwise it computes the probability - exactly. - - Based on the algorithm implemented in R v2.5 + test of association that tau==0. Parameters ---------- @@ -1802,17 +1861,19 @@ warn whether to warn about tied values + Notes + ----- + Uses the large sample approximation when len(x) >= 50 or when there are + ties, otherwise it computes the probability exactly. + + Based on the algorithm implemented in R v2.5 """ assert len(x) == len(y), "data (x, y) not of same length" assert len(x) > 2, "not enough observations" - # possible alternate hypotheses arguments - lo = ["less", "lo", "lower", "l", "lt"] - hi = ["greater", "hi", "high", "h", "g", "gt"] - two = ["two sided", "2", 2, "two tailed", "two", "two.sided", "ts"] + alt = _get_alternate(str(alt)) - ties = False num = len(x) ties = len(set(x)) != num or len(set(y)) != num if ties and warn: @@ -1823,28 +1884,25 @@ if num < 50 and not ties and exact: combs = int(num * (num - 1) / 2) - working = [] - for i in range(combs): - row = [-1 for j in range(combs)] - working.append(row) + working = -ones((combs, combs), dtype=float) tau = kendalls_tau(x, y, False) q = round((tau + 1) * num * (num - 1) / 4) - if alt in two: + if alt == ALT_TWO_SIDED: if q > num * (num - 1) / 4: p = 1 - pkendall(q - 1, num, Gamma(num + 1), working) else: p = pkendall(q, num, Gamma(num + 1), working) p = min(2 * p, 1) - elif alt in hi: + elif alt == ALT_HIGH: p = 1 - pkendall(q - 1, num, Gamma(num + 1), working) - elif alt in lo: + elif alt == ALT_LOW: p = pkendall(q, num, Gamma(num + 1), working) else: tau, p = kendalls_tau(x, y, True) - if alt in hi: + if alt == ALT_HIGH: p /= 2 - elif alt in lo: + elif alt == ALT_LOW: p = 1 - p / 2 return tau, p @@ -1878,6 +1936,8 @@ one half otherwise the degrees of freedom value will be incorrect. """ # if matrix is symmetric convert all indices to lower trangular + tails = tails or "2" + tails = _get_alternate(str(tails)) if is_symmetric: cells = get_ltm_cells(cells) if cells2: @@ -1892,7 +1952,7 @@ count_more_extreme = 0 stats = [] indices = list(range(len(matrix))) - for k in range(n): + for _ in range(n): # shuffle the order of indices, and use those to permute the matrix permuted_matrix = permute_2d(matrix, permutation(indices)) special_values, other_values = get_values_from_matrix( @@ -1902,13 +1962,13 @@ # with these p-values, we only use the current_stat value) current_stat, current_p = f(special_values, other_values, tails) stats.append(current_stat) - if tails is None: + if tails == ALT_TWO_SIDED: if abs(current_stat) > abs(stat): count_more_extreme += 1 - elif tails == "low": + elif tails == ALT_LOW: if current_stat < stat: count_more_extreme += 1 - elif tails == "high": + elif tails == ALT_HIGH: if current_stat > stat: count_more_extreme += 1 @@ -1924,11 +1984,11 @@ def get_values_from_matrix(matrix, cells, cells2=None, is_symmetric=True): """get values from matrix positions in cells and cells2 - matrix: the numpy array from which values should be taken - cells: indices of first set of requested values - cells2: indices of second set of requested values or None - if they should be randomly selected - is_symmetric: True if matrix is symmetric + matrix: the numpy array from which values should be taken + cells: indices of first set of requested values + cells2: indices of second set of requested values or None + if they should be randomly selected + is_symmetric: True if matrix is symmetric """ @@ -1955,8 +2015,8 @@ def get_ltm_cells(cells): """converts matrix indices so all are below the diagonal - cells: list of indices into a 2D integer-indexable object - (typically a list or lists of array of arrays) + cells: list of indices into a 2D integer-indexable object + (typically a list or lists of array of arrays) """ new_cells = [] diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/util.py python-cogent-2020.12.21a+dfsg/src/cogent3/maths/util.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/maths/util.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/maths/util.py 2020-12-20 23:35:03.000000000 +0000 @@ -50,9 +50,9 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight", "Sandra Smit", "Thomas La"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Development" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/blast.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/blast.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/blast.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/blast.py 2020-12-20 23:35:03.000000000 +0000 @@ -13,7 +13,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Micah Hamady", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Micah Hamady" __email__ = "hamady@colorado.edu" __status__ = "Prototype" @@ -169,7 +169,7 @@ def MinimalPsiBlastParser9(lines, include_column_names=False): """Yields successive records from lines (props, data list) - lines must be of psi-blast output format + lines must be of psi-blast output format """ return GenericBlastParser9(lines, PsiBlastFinder, include_column_names) @@ -177,7 +177,7 @@ def MinimalBlatParser9(lines, include_column_names=True): """Yields successive records from lines (props, data list) - lines must be of blat output (blast9) format + lines must be of blat output (blast9) format """ return GenericBlastParser9(lines, BlatFinder, include_column_names) diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/blast_xml.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/blast_xml.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/blast_xml.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/blast_xml.py 2020-12-20 23:35:03.000000000 +0000 @@ -7,7 +7,7 @@ __contributors__ = ["Micah Hamady"] __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Kristian Rother" __email__ = "krother@rubor.de" __status__ = "Prototype" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/cigar.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/cigar.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/cigar.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/cigar.py 2020-12-20 23:35:03.000000000 +0000 @@ -26,7 +26,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Hua Ying"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Hua Ying" __email__ = "hua.ying@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/cisbp.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/cisbp.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/cisbp.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/cisbp.py 2020-12-20 23:35:03.000000000 +0000 @@ -8,7 +8,7 @@ __copyright__ = "Copyright 2007-2012, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/clustal.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/clustal.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/clustal.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/clustal.py 2020-12-20 23:35:03.000000000 +0000 @@ -25,9 +25,9 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight", "Sandra Smit", "Gavin Huttley", "Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Development" strip = str.strip diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/cogent3_json.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/cogent3_json.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/cogent3_json.py 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/cogent3_json.py 2020-12-20 23:35:03.000000000 +0000 @@ -0,0 +1,50 @@ +"""Supports JSON load/read/write operations on major Cogent3 objects. +""" +import json + +from cogent3.app.data_store import load_record_from_json +from cogent3.util.deserialise import deserialise_object +from cogent3.util.misc import get_object_provenance, open_ + + +__author__ = "Gavin Huttley" +__copyright__ = "Copyright 2007-2020, The Cogent Project" +__credits__ = ["Gavin Huttley", "Stephen Ma"] +__license__ = "BSD-3" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "gavin.huttley@anu.edu.au" +__status__ = "Development" + + +def load_from_json(filename, classes): + """Loads objects from json files. + + Parameters + ---------- + filename: name of the json file + classes: A series of the Cogent3 types, for example: (Alignment, ArrayAlignment) + + """ + assert all( + (isinstance(klass, type) for klass in classes) + ), "classes should be a series of Cogent3 types, for example: (Alignment, ArrayAlignment)" + + with open_(filename) as f: + content = json.loads(f.read()) + try: + _, data, completed = load_record_from_json(content) + if not completed: + raise TypeError("json file is a record for type NotCompleted.") + except (KeyError, TypeError): + data = content + + type_ = data.get("type", None) + if type_ is None: + raise TypeError("json does not contain 'type' key") + + valid_types = {get_object_provenance(klass) for klass in classes} + if type_ not in valid_types: + raise TypeError(f"Invalid data type: {type_} is not one of {valid_types}") + + return deserialise_object(data) diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/dialign.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/dialign.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/dialign.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/dialign.py 2020-12-20 23:35:03.000000000 +0000 @@ -9,7 +9,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/ebi.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/ebi.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/ebi.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/ebi.py 2020-12-20 23:35:03.000000000 +0000 @@ -26,7 +26,7 @@ "Daniel McDonald", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Zongzhi Liu" __email__ = "zongzhi.liu@gmail.com" __status__ = "Development" @@ -535,8 +535,7 @@ def dr_itemparser(lines): - """return a key, value pair from lines of a DR item. - """ + """return a key, value pair from lines of a DR item.""" fields = join_split_parser(lines) return fields[0], fields[1:] @@ -1557,7 +1556,7 @@ if __name__ == "__main__": - from getopt import getopt, GetoptError + from getopt import GetoptError, getopt usage = """ Usage: python __.py [options] [source] diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/fasta.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/fasta.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/fasta.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/fasta.py 2020-12-20 23:35:03.000000000 +0000 @@ -17,9 +17,9 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight", "Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Development" @@ -291,7 +291,7 @@ done_groups series of group keys to be excluded - """ + """ done_groups = [[], done_groups][done_groups is not None] parser = MinimalFastaParser(data, label_to_name=label_to_name, finder=XmfaFinder) diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/gbseq.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/gbseq.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/gbseq.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/gbseq.py 2020-12-20 23:35:03.000000000 +0000 @@ -12,7 +12,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Matthew Wakefield", "Peter Maxwell", "Gavin Huttley", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Matthew Wakefield" __email__ = "wakefield@wehi.edu.au" __status__ = "Production" @@ -32,7 +32,7 @@ ---------- doc An xml.dom.minidom.Document, file object of string - + Returns ------- name, cogent sequence diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/gcg.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/gcg.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/gcg.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/gcg.py 2020-12-20 23:35:03.000000000 +0000 @@ -4,7 +4,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Matthew Wakefield", "Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Matthew Wakefield" __email__ = "wakefield@wehi.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/genbank.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/genbank.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/genbank.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/genbank.py 2020-12-20 23:35:03.000000000 +0000 @@ -2,7 +2,7 @@ from cogent3.core.annotation import Feature from cogent3.core.genetic_code import GeneticCodes from cogent3.core.info import Info -from cogent3.core.moltype import ASCII, DNA, PROTEIN +from cogent3.core.moltype import get_moltype from cogent3.parse.record import FieldWrapper from cogent3.parse.record_finder import ( DelimitedRecordFinder, @@ -14,9 +14,9 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight", "Peter Maxwell", "Matthew Wakefield", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" maketrans = str.maketrans @@ -67,8 +67,7 @@ def indent_splitter(lines): - """Yields the lines whenever it hits a line with same indent level as first. - """ + """Yields the lines whenever it hits a line with same indent level as first.""" first_line = True curr = [] for line in lines: @@ -382,17 +381,6 @@ curr = "complement(%s)" % curr return curr - def isAmbiguous(self): - """Returns True if ambiguous (single-base ambiguity or two locations.) - """ - if self.Ambiguity: - return True - try: - iter(self._data) - return True - except: - return False - def first(self): """Returns first base self could be.""" try: @@ -435,8 +423,7 @@ return curr def strand(self): - """Returns strand of components: 1=forward, -1=reverse, 0=both - """ + """Returns strand of components: 1=forward, -1=reverse, 0=both""" curr = {} for i in self: curr[i.Strand] = 1 @@ -651,7 +638,7 @@ """ info_excludes = info_excludes or [] - moltype = moltype or ASCII + moltype = get_moltype(moltype or "text") for rec in MinimalGenbankParser(handle): info = Info() # populate the info object, excluding the sequence @@ -660,10 +647,8 @@ continue info[label] = value - if rec["mol_type"] == "protein": # which it doesn't for genbank - moltype = PROTEIN - elif rec["mol_type"] == "DNA": - moltype = DNA + if rec["mol_type"].lower() in ("dna", "rna", "protein"): + moltype = get_moltype(rec["mol_type"].lower()) try: seq = moltype.make_seq( @@ -703,7 +688,7 @@ if add_annotation: add_annotation(seq, feature, spans) else: - for id_field in ["gene", "note", "product", "clone"]: + for id_field in ["gene", "product", "clone", "note"]: if id_field in feature: name = feature[id_field] if not isinstance(name, str): diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/gff.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/gff.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/gff.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/gff.py 2020-12-20 23:35:03.000000000 +0000 @@ -9,7 +9,7 @@ "Christopher Bradley", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/greengenes.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/greengenes.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/greengenes.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/greengenes.py 2020-12-20 23:35:03.000000000 +0000 @@ -14,7 +14,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Daniel McDonald"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Daniel McDonald" __email__ = "daniel.mcdonald@colorado.edu" __status__ = "Prototype" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/__init__.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/__init__.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/__init__.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/__init__.py 2020-12-20 23:35:03.000000000 +0000 @@ -48,7 +48,7 @@ "Marcin Cieslik", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/jaspar.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/jaspar.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/jaspar.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/jaspar.py 2020-12-20 23:35:03.000000000 +0000 @@ -10,7 +10,7 @@ __copyright__ = "Copyright 2007-2012, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/locuslink.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/locuslink.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/locuslink.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/locuslink.py 2020-12-20 23:35:03.000000000 +0000 @@ -56,9 +56,9 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Development" maketrans = str.maketrans diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/ncbi_taxonomy.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/ncbi_taxonomy.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/ncbi_taxonomy.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/ncbi_taxonomy.py 2020-12-20 23:35:03.000000000 +0000 @@ -10,7 +10,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Jason Carnes", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Jason Carnes" __email__ = "jason.carnes@sbri.org" __status__ = "Development" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/newick.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/newick.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/newick.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/newick.py 2020-12-20 23:35:03.000000000 +0000 @@ -25,7 +25,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Andrew Butterfield", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/nexus.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/nexus.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/nexus.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/nexus.py 2020-12-20 23:35:03.000000000 +0000 @@ -16,7 +16,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Catherine Lozuopone", "Rob Knight", "Micah Hamady", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Catherine Lozupone" __email__ = "lozupone@colorado.edu" __status__ = "Production" @@ -207,8 +207,13 @@ isblock = re.compile(r"begin\s+(data|characters)").search inblock = False - line = infile.readline().lower() - if not line.startswith("#nexus"): + try: + line = infile.readline() + except AttributeError: + # guessing it's a list of strings from a nexus file + line = infile.pop(0) + + if not line.lower().startswith("#nexus"): raise ValueError("not a nexus file") block = [] @@ -225,7 +230,8 @@ elif not line.startswith(";"): block.append(line) - infile.close() + if hasattr(infile, "close"): + infile.close() if not block: raise ValueError("not found DATA or CHARACTER block") diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/paml_matrix.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/paml_matrix.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/paml_matrix.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/paml_matrix.py 2020-12-20 23:35:03.000000000 +0000 @@ -12,7 +12,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Matthew Wakefield", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Matthew Wakefield" __email__ = "wakefield@wehi.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/paml.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/paml.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/paml.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/paml.py 2020-12-20 23:35:03.000000000 +0000 @@ -4,7 +4,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/phylip.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/phylip.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/phylip.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/phylip.py 2020-12-20 23:35:03.000000000 +0000 @@ -7,7 +7,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Micah Hamady", "Peter Maxwell", "Gavin Huttley", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Micah Hamady" __email__ = "hamady@colorado.edu" __status__ = "Prototype" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/psl.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/psl.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/psl.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/psl.py 2020-12-20 23:35:03.000000000 +0000 @@ -3,7 +3,6 @@ Compatible with blat v.34 """ -from cogent3.parse.table import ConvertFields from cogent3.util.table import Table @@ -11,7 +10,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight", "Peter Maxwell", "Gavin Huttley", "Anuj Pahwa"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Development" @@ -36,17 +35,7 @@ return header -int_series = lambda x: list(map(int, x.replace(",", " ").split())) - -row_converter = ConvertFields( - [(i, int) for i in range(8)] - + [(i, int) for i in range(10, 13)] - + [(i, int) for i in range(14, 18)] - + [(i, int_series) for i in range(18, 21)] -) - - -def MinimalPslParser(data, row_converter=row_converter): +def MinimalPslParser(data): """returns version, header and rows from data""" if type(data) == str: data = open(data) @@ -54,7 +43,6 @@ psl_version = None header = None rows = [] - for record in data: if psl_version is None: assert "psLayout version" in record @@ -72,8 +60,9 @@ continue rows += [record.rstrip().split("\t")] + if header is not None: - yield row_converter(rows[0]) + yield rows[0] rows = [] try: diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/rdb.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/rdb.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/rdb.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/rdb.py 2020-12-20 23:35:03.000000000 +0000 @@ -15,7 +15,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Sandra Smit", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Sandra Smit" __email__ = "sandra.smit@colorado.edu" __status__ = "Development" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/record_finder.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/record_finder.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/record_finder.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/record_finder.py 2020-12-20 23:35:03.000000000 +0000 @@ -20,9 +20,9 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight", "Gavin Huttley", "Zongzhi Liu"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" strip = str.strip diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/record.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/record.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/record.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/record.py 2020-12-20 23:35:03.000000000 +0000 @@ -10,9 +10,9 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight", "Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Development" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/sequence.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/sequence.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/sequence.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/sequence.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,6 +1,7 @@ #!/usr/bin/env python """Classes for reading multiple sequence alignment files in different formats.""" + import re import xml.dom.minidom @@ -29,7 +30,7 @@ "Rob Knight", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -40,8 +41,8 @@ def FromFilenameParser(filename, format=None, **kw): """Arguments: - - filename: name of the sequence alignment file - - format: the multiple sequence file format + - filename: name of the sequence alignment file + - format: the multiple sequence file format """ format = format_from_filename(filename, format) f = open_(filename, newline=None, mode="rt") @@ -108,6 +109,7 @@ "msf": gcg.MsfParser, "nex": nexus.MinimalNexusAlignParser, "nxs": nexus.MinimalNexusAlignParser, + "nexus": nexus.MinimalNexusAlignParser, } XML_PARSERS = {"gbseq": gbseq.GbSeqXmlParser, "tseq": tinyseq.TinyseqParser} diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/table.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/table.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/table.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/table.py 2020-12-20 23:35:03.000000000 +0000 @@ -15,119 +15,12 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" -class ConvertFields(object): - """converter for input data to Table""" - - def __init__(self, conversion, by_column=True): - """handles conversions of columns or lines - - Parameters - ---------- - by_column - conversion will by done for each column, otherwise - done by entire line - - """ - super(ConvertFields, self).__init__() - discontinued("function", "ConvertFields", "2020.11.1") - - self.conversion = conversion - self.by_column = by_column - - self._func = self.convert_by_columns - - if not self.by_column: - assert isinstance( - conversion, Callable - ), "conversion must be callable to convert by line" - self._func = self.convert_by_line - - def convert_by_columns(self, line): - """converts each column in a line""" - for index, cast in self.conversion: - line[index] = cast(line[index]) - return line - - def convert_by_line(self, line): - """converts each column in a line""" - return self.conversion(line) - - def __call__(self, *args, **kwargs): - return self._func(*args, **kwargs) - - -def SeparatorFormatParser( - with_header=True, - converter=None, - ignore=None, - sep=",", - strip_wspace=True, - limit=None, - **kw, -): - """Returns a parser for a delimited tabular file. - - Parameters - ---------- - with_header - when True, first line is taken to be the header. Not - passed to converter. - converter - a callable that returns a correctly formatted line. - ignore - lines for which ignore returns True are ignored. White - lines are always skipped. - sep - the delimiter separating fields. - strip_wspace - removes redundant white - limit - exits after this many lines - - """ - if ignore is None: # keep all lines - ignore = lambda x: False - - by_column = getattr(converter, "by_column", True) - discontinued("function", "SeparatorFormatParser", "2020.11.1") - - def callable(lines): - num_lines = 0 - header = None - for line in lines: - if is_empty(line): - continue - - line = line.strip("\n").split(sep) - if strip_wspace and by_column: - line = [field.strip() for field in line] - - if with_header and not header: - header = True - yield line - continue - - if converter: - line = converter(line) - - if ignore(line): - continue - - yield line - - num_lines += 1 - if limit is not None and num_lines >= limit: - break - - return callable - - class FilteringParser: """A parser for a delimited tabular file that returns records matching a condition.""" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/tinyseq.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/tinyseq.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/tinyseq.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/tinyseq.py 2020-12-20 23:35:03.000000000 +0000 @@ -12,7 +12,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Matthew Wakefield", "Peter Maxwell", "Gavin Huttley", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Matthew Wakefield" __email__ = "wakefield@wehi.edu.au" __status__ = "Production" @@ -32,7 +32,7 @@ ---------- doc An xml.dom.minidom.Document, file object of string - + Returns ------- name, cogent sequence diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/tree.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/tree.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/tree.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/tree.py 2020-12-20 23:35:03.000000000 +0000 @@ -25,9 +25,9 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight", "Catherine Lozupone", "Daniel McDonald"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Development" strip = str.strip diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/tree_xml.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/tree_xml.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/tree_xml.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/tree_xml.py 2020-12-20 23:35:03.000000000 +0000 @@ -35,7 +35,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/unigene.py python-cogent-2020.12.21a+dfsg/src/cogent3/parse/unigene.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/parse/unigene.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/parse/unigene.py 2020-12-20 23:35:03.000000000 +0000 @@ -17,9 +17,9 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Development" maketrans = str.maketrans diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/phylo/consensus.py python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/consensus.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/phylo/consensus.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/consensus.py 2020-12-20 23:35:03.000000000 +0000 @@ -11,10 +11,10 @@ __author__ = "Matthew Wakefield" -__copyright__ = "Copyright 2007-2015, The Cogent Project" +__copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Matthew Wakefield", "Peter Maxwell", "Gavin Huttley", "Ben Kaehler"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Matthew Wakefield" __email__ = "wakefield@wehi.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/phylo/__init__.py python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/__init__.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/phylo/__init__.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/__init__.py 2020-12-20 23:35:03.000000000 +0000 @@ -14,7 +14,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley", "Peter Maxwell", "Matthew Wakefield"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/phylo/least_squares.py python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/least_squares.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/phylo/least_squares.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/least_squares.py 2020-12-20 23:35:03.000000000 +0000 @@ -15,7 +15,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -46,10 +46,10 @@ def __init__(self, dists, weights=None): """Arguments: - - dists: a dict with structure (seq1, seq2): distance - - weights: an equivalently structured dict with measurements of - variability of the distance estimates. By default, the sqrt of - distance is used.""" + - dists: a dict with structure (seq1, seq2): distance + - weights: an equivalently structured dict with measurements of + variability of the distance estimates. By default, the sqrt of + distance is used.""" try: dists = dists.to_dict() except AttributeError: diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/phylo/maximum_likelihood.py python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/maximum_likelihood.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/phylo/maximum_likelihood.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/maximum_likelihood.py 2020-12-20 23:35:03.000000000 +0000 @@ -11,7 +11,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/phylo/nj.py python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/nj.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/phylo/nj.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/nj.py 2020-12-20 23:35:03.000000000 +0000 @@ -24,7 +24,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley", "Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -55,11 +55,11 @@ class PartialTree(object): """A candidate tree stored as - (distance matrix, list of subtrees, list of tip sets, set of partitions, score). - At each iteration (ie: call of the join method) the number of subtrees - is reduced as 2 of them are joined, while the number of partitions is - increased as a new edge is introduced. - """ + (distance matrix, list of subtrees, list of tip sets, set of partitions, score). + At each iteration (ie: call of the join method) the number of subtrees + is reduced as 2 of them are joined, while the number of partitions is + increased as a new edge is introduced. + """ def __init__(self, d, nodes, tips, score): self.d = d @@ -285,7 +285,7 @@ def nj(dists, show_progress=True): """Arguments: - - dists: dict of (name1, name2): distance + - dists: dict of (name1, name2): distance """ (result,) = gnj(dists, keep=1, show_progress=show_progress) (score, tree) = result diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/phylo/tree_collection.py python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/tree_collection.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/phylo/tree_collection.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/tree_collection.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,13 +1,17 @@ from numpy import exp, log +from cogent3.util.misc import atomic_write + from . import consensus __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2015, The Cogent Project" +__copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Ben Kaehler"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" class _UserList(list): @@ -24,14 +28,13 @@ """An ordered list of (score, tree) tuples""" def write(self, filename): - f = open(filename, "w") - for (score, tree) in self: - f.writelines( - self.scored_tree_format( - tree.get_newick(with_distances=True), str(score) + with atomic_write(filename, mode="wt") as f: + for (score, tree) in self: + f.writelines( + self.scored_tree_format( + tree.get_newick(with_distances=True), str(score) + ) ) - ) - f.close() def scored_tree_format(self, tree, score): return [tree, "\t[", score, "]\n"] diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/phylo/tree_space.py python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/tree_space.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/phylo/tree_space.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/tree_space.py 2020-12-20 23:35:03.000000000 +0000 @@ -14,7 +14,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/phylo/util.py python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/util.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/phylo/util.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/phylo/util.py 2020-12-20 23:35:03.000000000 +0000 @@ -12,9 +12,9 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "pm67nz@gmail.com" -__email__ = "rob@spot.colorado.edu" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/recalculation/calculation.py python-cogent-2020.12.21a+dfsg/src/cogent3/recalculation/calculation.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/recalculation/calculation.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/recalculation/calculation.py 2020-12-20 23:35:03.000000000 +0000 @@ -20,7 +20,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Daniel McDonald"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/recalculation/definition.py python-cogent-2020.12.21a+dfsg/src/cogent3/recalculation/definition.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/recalculation/definition.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/recalculation/definition.py 2020-12-20 23:35:03.000000000 +0000 @@ -88,7 +88,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -287,11 +287,7 @@ k: set(v) for k, v in zip(range(num_valid_dims), zip(*self.index)) } - discard = [] - for k, v in dimensioned.items(): - if len(v) == 1: - discard.append(k) - + discard = [k for k, v in dimensioned.items() if len(v) == 1] for k in discard: del dimensioned[k] diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/recalculation/__init__.py python-cogent-2020.12.21a+dfsg/src/cogent3/recalculation/__init__.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/recalculation/__init__.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/recalculation/__init__.py 2020-12-20 23:35:03.000000000 +0000 @@ -5,7 +5,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/recalculation/scope.py python-cogent-2020.12.21a+dfsg/src/cogent3/recalculation/scope.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/recalculation/scope.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/recalculation/scope.py 2020-12-20 23:35:03.000000000 +0000 @@ -17,7 +17,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/recalculation/setting.py python-cogent-2020.12.21a+dfsg/src/cogent3/recalculation/setting.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/recalculation/setting.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/recalculation/setting.py 2020-12-20 23:35:03.000000000 +0000 @@ -9,7 +9,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/util/checkpointing.py python-cogent-2020.12.21a+dfsg/src/cogent3/util/checkpointing.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/util/checkpointing.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/util/checkpointing.py 2020-12-20 23:35:03.000000000 +0000 @@ -8,7 +8,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/util/deserialise.py python-cogent-2020.12.21a+dfsg/src/cogent3/util/deserialise.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/util/deserialise.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/util/deserialise.py 2020-12-20 23:35:03.000000000 +0000 @@ -16,7 +16,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -26,6 +26,8 @@ index = provenance.rfind(".") assert index > 0 klass = provenance[index + 1 :] + nc = "NotCompleted" + klass = nc if nc in klass else klass mod = import_module(provenance[:index]) klass = getattr(mod, klass) return klass @@ -260,7 +262,6 @@ """returns a cogent3 likelihood function instance""" data.pop("version", None) model = deserialise_substitution_model(data.pop("model")) - aln = deserialise_seq_collections(data.pop("alignment")) tree = deserialise_tree(data.pop("tree")) constructor_args = data.pop("likelihood_construction") motif_probs = data.pop("motif_probs") @@ -268,9 +269,18 @@ name = data.pop("name", None) lf = model.make_likelihood_function(tree, **constructor_args) lf.set_name(name) + lf = model.make_likelihood_function(tree, **constructor_args) + if isinstance(constructor_args["loci"], list): + align = data["alignment"] + aln = [deserialise_seq_collections(align[k]) for k in align] + mprobs = [motif_probs[k] for k in motif_probs] + else: + aln = deserialise_seq_collections(data.pop("alignment")) + mprobs = [motif_probs] lf.set_alignment(aln) with lf.updates_postponed(): - lf.set_motif_probs(motif_probs) + for motif_probs in mprobs: + lf.set_motif_probs(motif_probs) for rule in param_rules: lf.set_param_rule(**rule) return lf diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/util/dict_array.py python-cogent-2020.12.21a+dfsg/src/cogent3/util/dict_array.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/util/dict_array.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/util/dict_array.py 2020-12-20 23:35:03.000000000 +0000 @@ -27,15 +27,14 @@ import numpy -from cogent3.format import table -from cogent3.util.misc import get_object_provenance, open_ +from cogent3.util.misc import atomic_write, get_object_provenance __author__ = "Peter Maxwell" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Ben Kaehler"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -311,12 +310,17 @@ return DictArray(array, self) def interpret_index(self, names): + if isinstance(names, numpy.ndarray) and "int" in names.dtype.name: + # the numpy item() method casts to the nearest Python type + names = tuple(v.item() for v in names) + if not isinstance(names, tuple): names = (names,) + index = [] remaining = [] for (ordinals, allnames, name) in zip(self.ordinals, self.names, names): - if type(name) not in (int, slice): + if type(name) not in (int, slice, list, numpy.ndarray): name = ordinals[name] elif isinstance(name, slice): start = name.start @@ -333,49 +337,32 @@ pass name = slice(start, stop, name.step) remaining.append(allnames.__getitem__(name)) + elif type(name) in (list, numpy.ndarray): + name = [n if type(n) == int else ordinals[n] for n in name] + remaining.append([allnames[i] for i in name]) + index.append(name) remaining.extend(self.names[len(index) :]) - if remaining: - klass = type(self)(*remaining) - else: - klass = None + klass = type(self)(*remaining) if remaining else None return (tuple(index), klass) - def array_repr(self, a): - if len(a.shape) == 1: - heading = [str(n) for n in self.names[0]] - a = a[numpy.newaxis, :] - elif len(a.shape) == 2: - heading = [""] + [str(n) for n in self.names[1]] - a = [[str(name)] + list(row) for (name, row) in zip(self.names[0], a)] - else: - return "%s dimensional %s" % (len(self.names), type(self).__name__) - - formatted = table.formatted_cells(rows=a, header=heading) - return str(table.simple_format(formatted[0], formatted[1], space=4)) - - def _get_repr_html(self, a): - """returns Table._repr_html_()""" - from cogent3.util.table import Table - - if len(a.shape) == 1: - heading = [str(n) for n in self.names[0]] - a = a[numpy.newaxis, :] - elif len(a.shape) == 2: - heading = [""] + [str(n) for n in self.names[1]] - a = [[str(name)] + list(row) for (name, row) in zip(self.names[0], a)] - a = {d[0]: d[1:] for d in zip(heading, *a)} - else: - return "%s dimensional %s" % (len(self.names), type(self).__name__) - - t = Table(heading, data=a, digits=3, index=heading[0], max_width=80) - return t._repr_html_(include_shape=False) - class DictArray(object): - """Wraps a numpy array so that it can be indexed with strings like nested - dictionaries (only ordered), for things like substitution matrices and - bin probabilities.""" + """Wraps a numpy array so that it can be indexed with strings. Behaves + like nested dictionaries (only ordered). + + Notes + ----- + Used for things like substitution matrices and bin probabilities. + + Indexing can be done via conventional integer based operations, using + keys, lists of int/keys. + + Behaviour differs from numpy array indexing when you provide lists of + indices. Such indexing is applied sequentially, e.g. darr[[0, 2], [1, 2]] + will return the intersection of rows [0, 2] with columns [1, 2]. In numpy, + the result would instead be the elements at [0, 1], [2, 2]. + """ def __init__(self, *args, **kwargs): """allow alternate ways of creating for time being""" @@ -454,9 +441,28 @@ def __getitem__(self, names): (index, remaining) = self.template.interpret_index(names) - result = self.array[index] + if list in {type(v) for v in index}: + result = self.array + for dim, indices in enumerate(index): + if isinstance(indices, slice): + indices = ( + (indices,) + if dim == 0 + else (slice(None, None),) * dim + (indices,) + ) + result = result[tuple(indices)] + continue + + if isinstance(indices, int): + indices = [indices] + + result = result.take(indices, axis=dim) + + else: + result = self.array[index] + if remaining is not None: - result = self.__class__(result, remaining) + result = self.__class__(result.reshape(remaining._shape), remaining) return result def __iter__(self): @@ -477,7 +483,12 @@ return [(n, self[n]) for n in list(self.keys())] def __repr__(self): - return self.template.array_repr(self.array) + if self.array.ndim > 2: + return "%s dimensional %s" % (self.array.ndim, type(self).__name__) + + t = self.to_table() + t.set_repr_policy(show_shape=False) + return str(t) def __ne__(self, other): return not self.__eq__(other) @@ -532,7 +543,12 @@ return template.wrap(result) def _repr_html_(self): - return self.template._get_repr_html(self.array) + if self.array.ndim > 2: + return "%s dimensional %s" % (self.array.ndim, type(self).__name__) + + t = self.to_table() + t.set_repr_policy(show_shape=False) + return t._repr_html_() def to_string(self, format="tsv", sep=None): """Return the data as a formatted string. @@ -558,9 +574,32 @@ ] return "\n".join([sep.join(row) for row in rows]) - def write(self, path, format="tsv", sep="\t"): + def to_table(self): + """return Table instance + + Notes + ----- + Raises ValueError if number of dimensions > 2 """ - writes a flattened version to path + ndim = self.array.ndim + if ndim > 2: + raise ValueError(f"cannot make 2D table from {ndim}D array") + + from .table import Table + + header = self.template.names[0] if ndim == 1 else self.template.names[1] + index = "" if ndim == 2 else None + if ndim == 1: + data = {c: [v] for c, v in zip(header, self.array)} + else: + data = {c: self.array[:, i].tolist() for i, c in enumerate(header)} + data[""] = self.template.names[0] + + return Table(header=header, data=data, index_name=index) + + def write(self, path, format="tsv", sep="\t"): + """writes a flattened version to path + Parameters ---------- path : str @@ -573,5 +612,5 @@ provided """ data = self.to_string(format=format, sep=sep) - with open_(path, "w") as outfile: + with atomic_write(path, mode="wt") as outfile: outfile.write(data) diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/util/__init__.py python-cogent-2020.12.21a+dfsg/src/cogent3/util/__init__.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/util/__init__.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/util/__init__.py 2020-12-20 23:35:03.000000000 +0000 @@ -8,7 +8,6 @@ "table", "transform", "union_dict", - "unit_test", "warning", "recode_alignment", ] @@ -28,7 +27,7 @@ "Thomas La", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/util/misc.py python-cogent-2020.12.21a+dfsg/src/cogent3/util/misc.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/util/misc.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/util/misc.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,17 +1,22 @@ #!/usr/bin/env python """Generally useful utility classes and methods. """ +import os +import pathlib import re +import warnings import zipfile from bz2 import open as bzip_open from gzip import open as gzip_open +from io import TextIOWrapper from os import path as os_path from os import remove from pathlib import Path from random import choice, randint from tempfile import NamedTemporaryFile, gettempdir from warnings import warn +from zipfile import ZipFile import numpy @@ -31,9 +36,9 @@ "Marcin Cieslik", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -58,9 +63,9 @@ def adjusted_gt_minprob(probs, minprob=1e-6): """returns numpy array of probs scaled such that minimum is > minval - + result sums to 1 within machine precision - + if 2D array, assumes row-order""" assert 0 <= minprob < 1, "invalid minval %s" % minprob probs = array(probs, dtype=float64) @@ -78,7 +83,7 @@ def adjusted_within_bounds(value, lower, upper, eps=1e-7, action="warn"): """returns value such that lower <= value <= upper - + Parameters ---------- value @@ -128,10 +133,34 @@ return data +def open_zip(filename, mode="r", **kwargs): + """open a single member zip-compressed file + + Note + ---- + If mode="r". The function raises ValueError if zip has > 1 record. + The returned object is wrapped by TextIOWrapper with latin encoding + (so it's not a bytes string). + + If mode="w", returns an atomic_write() instance. + """ + if mode.startswith("w"): + return atomic_write(filename, mode=mode, in_zip=True) + + mode = mode.strip("t") + with ZipFile(filename) as zf: + if len(zf.namelist()) != 1: + raise ValueError("Archive is supposed to have only one record.") + opened = zf.open(zf.namelist()[0], mode=mode, **kwargs) + return TextIOWrapper(opened, encoding="latin-1") + + def open_(filename, mode="rt", **kwargs): """open that handles different compression""" filename = Path(filename).expanduser().absolute() - op = {".gz": gzip_open, ".bz2": bzip_open}.get(filename.suffix, open) + op = {".gz": gzip_open, ".bz2": bzip_open, ".zip": open_zip}.get( + filename.suffix, open + ) return op(filename, mode, **kwargs) @@ -139,6 +168,12 @@ """performs atomic write operations, cleans up if fails""" def __init__(self, path, tmpdir=None, in_zip=None, mode="w"): + path = pathlib.Path(path).expanduser() + _, cmp = get_format_suffixes(path) + if in_zip and cmp == "zip": + in_zip = path if isinstance(in_zip, bool) else in_zip + path = pathlib.Path(str(path)[: str(path).rfind(".zip")]) + self._path = path self._mode = mode self._file = None @@ -153,57 +188,69 @@ def _get_tmp_dir(self): """returns parent of destination file""" - if self._in_zip: - parent = Path(self._in_zip).parent - else: - parent = Path(self._path).parent + parent = Path(self._in_zip).parent if self._in_zip else Path(self._path).parent if not parent.exists(): raise FileNotFoundError(f"{parent} directory does not exist") return parent - def __enter__(self): - self._file = NamedTemporaryFile(self._mode, delete=False, dir=self._tmpdir) + def _get_fileobj(self): + """returns file to be written to""" + if self._file is None: + self._file = NamedTemporaryFile(self._mode, delete=False, dir=self._tmpdir) + return self._file - def _close_rename_standard(self, p): + def __enter__(self): + return self._get_fileobj() + + def _close_rename_standard(self, src): + dest = Path(self._path) try: - f = Path(self._path) - f.unlink() + dest.unlink() except FileNotFoundError: pass finally: - p.rename(self._path) + src.rename(dest) - def _close_rename_zip(self, p): + def _close_rename_zip(self, src): with zipfile.ZipFile(self._in_zip, "a") as out: - out.write(str(p), arcname=self._path) + out.write(str(src), arcname=self._path) - p.unlink() + src.unlink() def __exit__(self, exc_type, exc_val, exc_tb): self._file.close() - p = Path(self._file.name) + tmpfile_name = Path(self._file.name) if exc_type is None: - self._close_func(p) + self._close_func(tmpfile_name) self.succeeded = True else: self.succeeded = False - p.unlink() + tmpfile_name.unlink() + + def write(self, text): + """writes text to file""" + fileobj = self._get_fileobj() + fileobj.write(text) + + def close(self): + """closes file""" + self.__exit__(None, None, None) _wout_period = re.compile(r"^\.") def get_format_suffixes(filename): - """returns compression and/or file suffixes""" + """returns file, compression suffixes""" filename = Path(filename) if not filename.suffix: return None, None compression_suffixes = ("bz2", "gz", "zip") - suffixes = [_wout_period.sub("", sfx) for sfx in filename.suffixes[-2:]] + suffixes = [_wout_period.sub("", sfx).lower() for sfx in filename.suffixes[-2:]] if suffixes[-1] in compression_suffixes: - cmp_suffix = filename.suffix[1:] + cmp_suffix = suffixes[-1] else: cmp_suffix = None @@ -915,8 +962,7 @@ def remove_files(list_of_filepaths, error_on_missing=True): - """Remove list of filepaths, optionally raising an error if any are missing - """ + """Remove list of filepaths, optionally raising an error if any are missing""" missing = [] for fp in list_of_filepaths: try: @@ -1045,9 +1091,13 @@ # algorithm inspired by Greg Baacon's answer to # https://stackoverflow.com/questions/2020014/get-fully-qualified-class # -name-of-an-object-in-python - mod = obj.__class__.__module__ - name = obj.__class__.__name__ - result = None + if isinstance(obj, type): + mod = obj.__module__ + name = obj.__name__ + else: + mod = obj.__class__.__module__ + name = obj.__class__.__name__ + if mod is None or mod == "builtins": result = name else: @@ -1091,3 +1141,44 @@ if source_array is not None: return numpy.ascontiguousarray(source_array, dtype=dtype) return source_array + + +def get_setting_from_environ(environ_var, params_types): + """extract settings from environment variable + + Parameters + ---------- + environ_var : str + name of an environment variable + params_types : dict + {param name: type}, values will be cast to type + + Returns + ------- + dict + + Notes + ----- + settings must of form 'param_name1=param_val,param_name2=param_val2' + """ + var = os.environ.get(environ_var, None) + if var is None: + return {} + + var = var.split(",") + result = {} + for item in var: + item = item.split("=") + if len(item) != 2 or item[0] not in params_types: + continue + + name, val = item + try: + val = params_types[name](val) + result[name] = val + except Exception: + warnings.warn( + f"could not cast {name}={val} to type {params_types[name]}, skipping" + ) + + return result diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/util/modules.py python-cogent-2020.12.21a+dfsg/src/cogent3/util/modules.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/util/modules.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/util/modules.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,48 +0,0 @@ -#!/usr/bin/env python -"""Compiled modules may be out of date or missing""" - -import os -import sys - - -__author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2020, The Cogent Project" -__credits__ = ["Peter Maxwell"] -__license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Peter Maxwell" -__email__ = "pm67nz@gmail.com" -__status__ = "Production" - - -class ExpectedImportError(ImportError): - pass - - -def fail(msg): - print(msg, file=sys.stderr) - raise ExpectedImportError - - -def importVersionedModule(name, exec_globals, min_version, alt_desc): - if "COGENT3_PURE_PYTHON" in os.environ: - fail('Not using compiled module "%s". Will use %s.' % (name, alt_desc)) - try: - m = __import__(name, exec_globals) - except ImportError: - fail('Compiled module "%s" not found. Will use %s.' % (name, alt_desc)) - version = getattr(m, "version_info", (0, 0)) - desc = ".".join(str(n) for n in version) - min_desc = ".".join(str(n) for n in min_version) - max_desc = str(min_version[0]) + ".x" - if version < min_version: - fail( - 'Compiled module "%s" is too old as %s < %s. ' - "Will use %s." % (name, desc, min_desc, alt_desc) - ) - if version[0] > min_version[0]: - fail( - 'Compiled module "%s" is too new as %s > %s. ' - "Will use %s." % (name, desc, max_desc, alt_desc) - ) - return m diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/util/parallel.py python-cogent-2020.12.21a+dfsg/src/cogent3/util/parallel.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/util/parallel.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/util/parallel.py 2020-12-20 23:35:03.000000000 +0000 @@ -19,7 +19,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Sheng Han Moses Koh", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" @@ -60,7 +60,7 @@ def is_master_process(): """ Evaluates if current process is master - + In case of MPI checks whether current process is being run on file generated by mpi4py.futures diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/util/progress_display.py python-cogent-2020.12.21a+dfsg/src/cogent3/util/progress_display.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/util/progress_display.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/util/progress_display.py 2020-12-20 23:35:03.000000000 +0000 @@ -13,7 +13,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Sheng Han Moses Koh"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/util/recode_alignment.py python-cogent-2020.12.21a+dfsg/src/cogent3/util/recode_alignment.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/util/recode_alignment.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/util/recode_alignment.py 2020-12-20 23:35:03.000000000 +0000 @@ -51,7 +51,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Greg Caporaso"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Greg Caporaso" __email__ = "gregcaporaso@gmail.com" __status__ = "Beta" @@ -362,23 +362,23 @@ def build_alphabet_map(alphabet_id=None, alphabet_def=None): - """ return dict mapping old alphabet chars to new alphabet chars + """return dict mapping old alphabet chars to new alphabet chars - alphabet_id: string identifying an alphabet in - cogent3.util.recode_alignment.alphabets. - (See cogent3.util.recode_alignment.alphabets.keys() - for valid alphabet_ids.) - alphabet_def: list of two-element tuples where first element is - the new alphabet character and the second elements is an iterable - object containing the old alphabet chars which should be mapped to - the new char. - e.g., [('A','CVILFMWAGSTPYH'),('B','QNDERKBZ')] - (See cogent3.util.recode_alignment.alphabets.values() - for more examples.) - - NOTE: Only one of the two parameters should be provided -- you either - provide the alphabet, or it is looked up. If you do provide both, - the alphabet_id is ignored. + alphabet_id: string identifying an alphabet in + cogent3.util.recode_alignment.alphabets. + (See cogent3.util.recode_alignment.alphabets.keys() + for valid alphabet_ids.) + alphabet_def: list of two-element tuples where first element is + the new alphabet character and the second elements is an iterable + object containing the old alphabet chars which should be mapped to + the new char. + e.g., [('A','CVILFMWAGSTPYH'),('B','QNDERKBZ')] + (See cogent3.util.recode_alignment.alphabets.values() + for more examples.) + + NOTE: Only one of the two parameters should be provided -- you either + provide the alphabet, or it is looked up. If you do provide both, + the alphabet_id is ignored. """ try: @@ -399,22 +399,22 @@ def recode_dense_alignment(aln, alphabet_id=None, alphabet_def=None): """Return new ArrayAlignment recoded in the provided reduced-state alphabet - aln: the ArrayAlignment object to be recoded - alphabet_id: string identifying an alphabet in - cogent3.util.recode_alignment.alphabets. - (See cogent3.util.recode_alignment.alphabets.keys() - for valid alphabet_ids.) - alphabet_def: list of two-element tuples where first element is - the new alphabet character and the second elements is an iterable - object containing the old alphabet chars which should be mapped to - the new char. - e.g., [('A','CVILFMWAGSTPYH'),('B','QNDERKBZ')] - (See cogent3.util.recode_alignment.alphabets.values() - for more examples.) - - Note: either alphabet_id OR alphabet_def must be passed. Either - provide the alphabet, or have it is looked up. If both are provided - the alphabet_id is ignored. + aln: the ArrayAlignment object to be recoded + alphabet_id: string identifying an alphabet in + cogent3.util.recode_alignment.alphabets. + (See cogent3.util.recode_alignment.alphabets.keys() + for valid alphabet_ids.) + alphabet_def: list of two-element tuples where first element is + the new alphabet character and the second elements is an iterable + object containing the old alphabet chars which should be mapped to + the new char. + e.g., [('A','CVILFMWAGSTPYH'),('B','QNDERKBZ')] + (See cogent3.util.recode_alignment.alphabets.values() + for more examples.) + + Note: either alphabet_id OR alphabet_def must be passed. Either + provide the alphabet, or have it is looked up. If both are provided + the alphabet_id is ignored. """ @@ -445,21 +445,21 @@ def recode_freq_vector(alphabet_def, freqs, ignores="BXZ"): - """ recode the bg_freqs to reflect the recoding defined in alphabet_def + """recode the bg_freqs to reflect the recoding defined in alphabet_def - alphabet_def: list of tuples where new char is first tuple element - and sequence of old chars is second tuple element. (For examples, - see cogent3.util.recode_alignment.alphabets.values()) - freqs: dict mapping chars to their frequencies - ignores: the degenerate characters -- we don't want to include these - in the new freqs, b/c they'll be counted multiple times. Also, - isn't clear what should be done if an alphabet were to split them - apart. - - Note: there is no error-checking here, so you need to be sure that - the alphabet and the frequencies are compatible (i.e., freqs and the - old characters must overlap perfectly, with the exception of the - degenerate characters, which are ignored by default). + alphabet_def: list of tuples where new char is first tuple element + and sequence of old chars is second tuple element. (For examples, + see cogent3.util.recode_alignment.alphabets.values()) + freqs: dict mapping chars to their frequencies + ignores: the degenerate characters -- we don't want to include these + in the new freqs, b/c they'll be counted multiple times. Also, + isn't clear what should be done if an alphabet were to split them + apart. + + Note: there is no error-checking here, so you need to be sure that + the alphabet and the frequencies are compatible (i.e., freqs and the + old characters must overlap perfectly, with the exception of the + degenerate characters, which are ignored by default). """ result = {} for new, olds in alphabet_def: @@ -486,14 +486,14 @@ def recode_count_matrix(alphabet, count_matrix, aa_order): """Recodes a subsitution count matrix - alphabet: the alphabet to be used for recoding the matrix - (see cogent3.util.recode_alignment.alphabets.values()) for - examples - count_matrix: matrix to be recoded (e.g., - cogent3.evolve.models.DSO78_matrix) - aa_order: the order of the rows/cols in the matrix as a string - (for cogent3.evolve.models.DSO78_matrix this would be - 'ACDEFGHIKLMNPQRSTVWY') + alphabet: the alphabet to be used for recoding the matrix + (see cogent3.util.recode_alignment.alphabets.values()) for + examples + count_matrix: matrix to be recoded (e.g., + cogent3.evolve.models.DSO78_matrix) + aa_order: the order of the rows/cols in the matrix as a string + (for cogent3.evolve.models.DSO78_matrix this would be + 'ACDEFGHIKLMNPQRSTVWY') """ m = square_matrix_to_dict(count_matrix, aa_order) @@ -527,8 +527,7 @@ freqs=DSO78_freqs, aa_order="ACDEFGHIKLMNPQRSTVWY", ): - """ recode a substituion count matrix and a vector of character freqs - """ + """recode a substituion count matrix and a vector of character freqs""" recoded_freqs = recode_freq_vector(alphabet, freqs) for aa in aa_order: diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/util/table.py python-cogent-2020.12.21a+dfsg/src/cogent3/util/table.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/util/table.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/util/table.py 2020-12-20 23:35:03.000000000 +0000 @@ -26,10 +26,10 @@ from cogent3.format import table as table_format from cogent3.util.dict_array import DictArray, DictArrayTemplate from cogent3.util.misc import ( + atomic_write, extend_docstring_from, get_format_suffixes, get_object_provenance, - open_, ) from cogent3.util.union_dict import UnionDict from cogent3.util.warning import deprecated @@ -42,9 +42,9 @@ __author__ = "Gavin Huttley" __copyright__ = "Copyright 2007-2020, The Cogent Project" -__credits__ = ["Gavin Huttley", "Felix Schill"] +__credits__ = ["Gavin Huttley", "Felix Schill", "Sheng Koh"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -95,90 +95,12 @@ return eval(callback, {}, row) -def formatted_array( - series, title="", precision=4, format_spec=None, missing_data="", center=False, -): - """converts elements in a numpy array series to an equal length string. +_num_type = re.compile("^(float|int|complex)").search - Parameters - ---------- - series - the series of table rows - title - title of series - precision - number of decimal places. Can be overridden by following. - format_spec - format specification as per the python Format Specification, Mini-Language - or a callable function. - missing_data - default missing data value. - - Returns - ------- - list of formatted series, formatted title - """ - if callable(format_spec): - formatter = format_spec - format_spec = base_format = "" - else: - formatter = None - - if isinstance(format_spec, str): - format_spec = format_spec.replace("%", "") - - if format_spec: - match = re.search("[<>^]", format_spec[:2]) - final_align = ">" if match is None else match.group() - align = "" - else: - final_align = align = ">" - - base_format = format_spec if format_spec else "" - assert isinstance(series, numpy.ndarray), "must be numpy array" - if format_spec is None: - type_name = series.dtype.name - align = "^" if center else ">" - if "int" in type_name: - base_format = "d" - elif "float" in type_name: - base_format = f".{precision}f" - elif "bool" == type_name: - base_format = "" - else: - # handle mixed types with a custom formatter - formatter = _MixedFormatter( - align, len(title), precision, missing_data=missing_data - ) - format_spec = base_format = "" - format_spec = base_format - - formatted = [] - max_length = len(title) - for i, v in enumerate(series): - if formatter: - v = formatter(v) - else: - try: - v = format(v, format_spec) - except (TypeError, ValueError): - # could be a python object - v = str(v) - l = len(v) - if l > max_length: - max_length = l - format_spec = f"{align}{max_length}{base_format}" - formatted.append(v) - - # title is always right aligned, for now - title = format(title, f">{max_length}") - # now adjust to max_len - format_spec = f"{final_align}{max_length}s" - for i in range(len(series)): - if len(formatted[i]) < max_length: - formatted[i] = format(formatted[i].strip(), format_spec) - return formatted, title +def array_is_num_type(data): + """whether data has a dtype for int, float or complex""" + return _num_type(data.dtype.name) != None def cast_str_to_numeric(values): @@ -275,36 +197,6 @@ return result -class _MixedFormatter: - """handles formatting of mixed data types""" - - def __init__( - self, alignment, length, precision=4, float_type="f", missing_data=None - ): - self.missing_data = missing_data - self.length = length - self.alignment = alignment - self.precision = precision - self.float_type = float_type - - def __call__(self, val): - prefix = f"{self.alignment}{self.length}" - float_spec = f"{prefix}.{self.precision}{self.float_type}" - int_spec = f"{prefix}d" - result = str(val) - if self.missing_data is not None and not result: - return self.missing_data - - for fspec in (int_spec, float_spec, prefix): - try: - result = format(val, fspec) - break - except (TypeError, ValueError): - pass - - return result - - class Columns(MutableMapping): """Collection of columns. iter operates over columns.""" @@ -336,8 +228,14 @@ key = self._order[key[0]] if type(key) in (list, tuple): - key = [self._get_key_(k) for k in key] - elif isinstance(key, numpy.ndarray): + if all(type(e) == bool for e in key) and len(key) == len(self.order): + key = [k for k, b in zip(self.order, key) if b] + else: + key = [self._get_key_(k) for k in key] + + return key + + if isinstance(key, numpy.ndarray): # we try slicing by array cols = numpy.array(self.order, dtype="U") try: @@ -345,10 +243,10 @@ except Exception: msg = f"{key} could not be used to slice columns" raise KeyError(msg) - else: - raise KeyError(f"{key}") - return key + return key + + raise KeyError(f"{key}") def __contains__(self, key): return key in self._order @@ -361,6 +259,7 @@ if isinstance(key, slice): key, _ = self._template.interpret_index(key) key = self._order[key[0]] + if isinstance(key, numpy.ndarray): key = numpy.array(self._order)[key].tolist() @@ -453,21 +352,24 @@ @property def index_name(self): - # check + """column name whose values can be used to index table rows""" return self._index_name @index_name.setter def index_name(self, name): if name is None: + self._index_name = None return if name not in self: - raise ValueError(f"'{name}' unknown, index must be an existing column") + raise ValueError(f"'{name}' unknown, index_name must be an existing column") - # make sure index has unique values + # make sure index_name has unique values unique = set(self[name]) if len(unique) != self._num_rows: - raise ValueError(f"cannot use '{name}' as index, not all values unique") + raise ValueError( + f"cannot use '{name}' as index_name, not all values unique" + ) self._index_name = name order = [name] + [c for c in self._order if c != name] @@ -545,7 +447,7 @@ self, header=None, data=None, - index=None, + index_name=None, title="", legend="", digits=4, @@ -556,17 +458,46 @@ missing_data="", **kwargs, ): + """ + + Parameters + ---------- + header + column headings + data + a 2D dict, list or tuple. If a dict, it must have column + headings as top level keys, and common row labels as keys in each + column. + index_name + column name with values to be used as row identifiers and keys + for slicing. All column values must be unique. + legend + table legend + title + as implied + digits + floating point resolution + space + number of spaces between columns or a string + max_width + maximum column width for printing + column_templates + dict of column headings + or a function that will handle the formatting. + format + output format when using str(Table) + missing_data + replace missing data with this + """ attrs = { k: v for k, v in locals().items() if k not in ("self", "__class__", "data", "header", "kwargs") } - rows = kwargs.pop("rows", None) - assert not (rows and data), "rows is deprecated, use data" - if rows: - deprecated("argument", "rows", "data", "2020.11") - data = rows + if "index" in kwargs: + deprecated("argument", "index", "index_name", "2021.11") + index_name = kwargs.pop("index", index_name) attrs.update(kwargs) @@ -579,12 +510,13 @@ if isinstance(data, dict): # convert containers like a defaultdict to a standard dict data = dict(data) - - try: - len(data[0]) - row_data = True - except (TypeError, IndexError, KeyError): row_data = False + else: + try: + len(data[0]) + row_data = True + except (TypeError, IndexError, KeyError): + row_data = False if header and row_data: hlen = len(header) @@ -596,26 +528,25 @@ data = {c: v for c, v in zip(header, zip(*data))} - if header is None and isinstance(data, dict): - header = list(data) - elif header is None: - header = [] - - has_index = index is not None - if has_index and not isinstance(index, str): - raise TypeError(f"only str type supported for index, not {type(index)}") + if header is None: + header = list(data) if isinstance(data, dict) else [] + has_index = index_name is not None + if has_index and not isinstance(index_name, str): + raise TypeError( + f"only str type supported for index_name, not {type(index_name)}" + ) if data: row_order = kwargs.get("row_order", None) data = cast_to_1d_dict(data, row_order=row_order) if has_index: try: - self.columns[index] = data[index] + self.columns[index_name] = data[index_name] except KeyError: - raise ValueError(f"'{index}' not in data") + raise ValueError(f"'{index_name}' not in data") for c in header: - if c == index: + if c == index_name: continue self.columns[c] = data[c] @@ -624,12 +555,12 @@ for c in header: self.columns[c] = [] - # this assignment triggers creation of row template if index specified + # this assignment triggers creation of row template if index_name specified # but only if we have data if len(self.columns) > 0: - self.index_name = index + self.index_name = index_name elif has_index: - self._index_name = index + self._index_name = index_name # default title / legend to be empty strings self._title = str(title) if title else "" @@ -646,7 +577,7 @@ self._column_templates = column_templates or {} # define the repr() display policy random = 0 - self._repr_policy = dict(head=None, tail=None, random=random) + self._repr_policy = dict(head=None, tail=None, random=random, show_shape=True) self.format = format self._missing_data = missing_data @@ -658,7 +589,7 @@ def __getitem__(self, names): # this is funky, but a side-effect of construction allowing setting - # prior to having assigned the index column + # prior to having assigned the index_name column self.index_name if isinstance(names, tuple): @@ -691,14 +622,21 @@ return self.columns[columns[0]][rows] attr = self._get_persistent_attrs() - index_name = attr.pop("index") + index_name = attr.pop("index_name") result = self.__class__(**attr) for c in columns: + if len(self.columns[c]) == 0: + continue + result.columns[c] = self.columns[c][rows] if index_name in result.columns: result.index_name = index_name + for c in self._column_templates: + if c in result.columns: + result._column_templates[c] = self._column_templates[c] + return result def __getstate__(self): @@ -714,7 +652,7 @@ data.pop(k, None) kwargs = data.pop("init_table") - index = kwargs.pop("index") + index = kwargs.pop("index_name") table = self.__class__(**kwargs) table.columns.__setstate__(data["data"]) table.index_name = index @@ -724,8 +662,17 @@ if self.shape == (0, 0): return "0 rows x 0 columns" - table, shape_info = self._get_repr_() - result = "\n".join([str(table), shape_info]) + table, shape_info, unset_columns = self._get_repr_() + if self.shape[0] == 0: + return "\n".join([shape_info, unset_columns]) + + if not self._repr_policy["show_shape"]: + shape_info = "" + result = ( + "\n".join([str(table), shape_info, unset_columns]) + if unset_columns + else "\n".join([str(table), shape_info]) + ) return result def __str__(self): @@ -739,119 +686,96 @@ rn = self._repr_policy["random"] head = self._repr_policy["head"] tail = self._repr_policy["tail"] - if head is None and tail is None: - if self.shape[0] < 50: - head = self.shape[0] - tail = None - else: - head, tail = 5, 5 + if not any([head, tail]): + head, tail = (self.shape[0], None) if self.shape[0] < 50 else (5, 5) self._repr_policy["head"] = head self._repr_policy["tail"] = tail shape_info = "" - ellipsis = None if rn: indices = numpy.random.choice(self.shape[0], size=rn, replace=False) indices = list(sorted(indices)) - shape_info = f"Random selection of {rn} rows" + shape_info = f"Random selection of {rn} rows from" elif all([head, tail]): indices = list(range(head)) + list( range(self.shape[0] - tail, self.shape[0]) ) - ellipsis = "..." + if head + tail < self.shape[0]: + shape_info = f"Top {head} and bottom {tail} rows from" elif head: indices = list(range(head)) + if head < self.shape[0]: + shape_info = f"Top {head} rows from" elif tail: indices = list(range(self.shape[0] - tail, self.shape[0])) + if tail < self.shape[0]: + shape_info = f"Bottom {tail} rows from" else: indices = list(range(self.shape[0])) - rows = {} - for c in self.header: - rows[c] = [self.columns[c][i] for i in indices] - - if ellipsis: - for k, v in rows.items(): - v.insert(head, ellipsis) - shape_info += f"\n{self.shape[0]:,} rows x {self.shape[1]:,} columns" - kwargs = self._get_persistent_attrs() - table = self.__class__(header=self.header, data=rows, **kwargs) - table._column_templates.update(self._column_templates) - return table, shape_info - - def _repr_html_(self, include_shape=True): - """returns html, used by Jupyter""" - base_colour = "rgba(161, 195, 209, {alpha})" + unset_columns = [c for c in self.header if not len(self.columns[c])] + unset_columns = ( + "unset columns: %s" % ", ".join(map(repr, unset_columns)) + if unset_columns + else None + ) + table = self[indices] if self.shape[0] > 0 else None - def row_cell_func(val, row, col): - colour = base_colour.format(alpha=0.25) - try: - float(val) - except ValueError: - is_numeric = False - else: - is_numeric = True + return table, shape_info, unset_columns - if self.index_name is not None and col == 0: - style = f' style="background: {colour}; font-weight: 600;"' - elif is_numeric: - style = f' style="font-family: monospace !important;"' - else: - style = "" - val = f"{val}" - return val - - table, shape_info = self._get_repr_() - shape_info = f"

{shape_info}

" - if not include_shape: + def _repr_html_(self): + """returns html, used by Jupyter""" + table, shape_info, unset_columns = self._get_repr_() + if isinstance(table, numpy.ndarray): + # single row / column + table = self + + shape_info = ( + f"

{shape_info}; unset columns={unset_columns}

" + if unset_columns + else f"

{shape_info}

" + ) + if not self._repr_policy["show_shape"]: shape_info = "" - if self.shape == (0, 0): + if self.shape[0] == 0: return shape_info - title, legend = table.title, table.legend - # current rich_html does not provide a good mechanism for custom - # formatting of titles, legends - table.title, table.legend = None, None - head_colour = base_colour.format(alpha=0.75) - element_format = dict( - thead=f'' - ) - html = table.to_rich_html( - row_cell_func=row_cell_func, element_formatters=element_format - ) - if title or legend: - title = title or "" - legend = legend or "" - caption = ( - '' - f'{title}' - f"{legend}" - ) - html = html.splitlines() - html.insert(1, caption) - html = "\n".join(html) + html = table.to_html() + # add elipsis if head + row < self.shape[0] html = html.splitlines() - html.insert( - 1, - "\n".join( - [ - "", - ] - ), - ) - html = "\n".join(["\n".join(html), shape_info]) + head = self._repr_policy.get("head") or self.shape[0] + tail = self._repr_policy.get("tail") or self.shape[0] + if head + tail < self.shape[0] and head and tail: + HE = table_format.HtmlElement + ellipsis = [] + for c in table.columns: + if array_is_num_type(table.columns[c]): + css_class = "c3col_right" + else: + css_class = "c3col_left" + + ellipsis.append( + str(HE(HE("...", "span", css_classes=[css_class]), "td")) + ) + + ellipsis = str(HE("".join(ellipsis), "tr", css_classes="ellipsis")) + num_rows = 0 + for idx in range(len(html)): + item = html[idx] + if "" in item: + num_rows += 1 + if num_rows == head: + html.insert(idx + 1, ellipsis) + break + + html.insert(-1, shape_info) + html = "\n".join(html) return html def _get_persistent_attrs(self): - attrs = UnionDict(self._persistent_attrs.copy()) - return attrs + return UnionDict(self._persistent_attrs.copy()) @property def title(self): @@ -884,24 +808,32 @@ self._persistent_attrs["space"] = value - def set_repr_policy(self, head=None, tail=None, random=0): + def set_repr_policy(self, head=None, tail=None, random=0, show_shape=True): """specify policy for repr(self) Parameters ---------- - - head: number of top rows to included in represented display - - tail: number of bottom rows to included in represented display - - random: number of rows to sample randomly (supercedes head/tail) + head : int + number of top rows to included in represented display + tail : int + number of bottom rows to included in represented display + random : int + number of rows to sample randomly (supercedes head/tail) + show_shape : bool + boolean to determine if table shape info is displayed """ if not any([head, tail, random]): + self._repr_policy["show_shape"] = show_shape return if random: assert ( type(random) == int and random > 0 ), "random must be a positive integer" head = tail = None - self._repr_policy = dict(head=head, tail=tail, random=random) + self._repr_policy = dict( + head=head, tail=tail, random=random, show_shape=show_shape + ) @property def format(self): @@ -946,19 +878,28 @@ def head(self, nrows=5): """displays top nrows""" repr_policy = self._repr_policy - self._repr_policy = dict(head=nrows, tail=None, random=None) + nrows = min(nrows, self.shape[0]) + show_shape = self._repr_policy["show_shape"] + self._repr_policy = dict( + head=nrows, tail=None, random=None, show_shape=show_shape + ) display(self) self._repr_policy = repr_policy def tail(self, nrows=5): """displays bottom nrows""" repr_policy = self._repr_policy - self._repr_policy = dict(head=None, tail=nrows, random=None) + nrows = min(nrows, self.shape[0]) + show_shape = self._repr_policy["show_shape"] + self._repr_policy = dict( + head=None, tail=nrows, random=None, show_shape=show_shape + ) display(self) self._repr_policy = repr_policy @property def index_name(self): + """column name whose values can be used to index table rows""" if self._index_name is not None and not self._template: self.columns.index_name = self._index_name self.index_name = self._index_name @@ -966,13 +907,11 @@ return self._index_name @index_name.setter - def index_name(self, value): - if value is None: - return - - self.columns.index_name = value - self._index_name = value - self._template = DictArrayTemplate(self.columns[value]) + def index_name(self, name): + self.columns.index_name = name + self._index_name = name + self._persistent_attrs["index_name"] = name + self._template = None if name is None else DictArrayTemplate(self.columns[name]) @property def header(self): @@ -1017,7 +956,12 @@ return joined def inner_join( - self, other, columns_self=None, columns_other=None, use_index=True, **kwargs, + self, + other, + columns_self=None, + columns_other=None, + use_index=True, + **kwargs, ): """inner join of self with other @@ -1034,7 +978,7 @@ self[row, columns_self]==other[row, columns_other] for all i use_index if no columns specified and both self and other have a nominated - index, this will be used. + index_name, this will be used. Notes ----- @@ -1082,9 +1026,7 @@ output_mask = [c for c in other.columns if c not in columns_other] - # key is a tuple made from specified columns; data is the row index other_row_index = defaultdict(list) - # subtable = other.columns.take_columns(columns_other) subtable = other[:, columns_other] for row_index, row in enumerate(subtable.columns.array): # insert new entry for each row @@ -1120,7 +1062,12 @@ return joined def joined( - self, other, columns_self=None, columns_other=None, inner_join=True, **kwargs, + self, + other, + columns_self=None, + columns_other=None, + inner_join=True, + **kwargs, ): """returns a new table containing the join of this table and other. See docstring for inner_join, or cross_join @@ -1145,14 +1092,10 @@ def get_row_indices(self, callback, columns, negate=False): """returns boolean array of callback values given columns""" subset = self[:, columns] - if not isinstance(callback, Callable): - data = subset - else: - data = subset.array - + data = subset if not isinstance(callback, Callable) else subset.array num_columns = len(columns) match = not negate - indices = numpy.array( + return numpy.array( [ True if _callback(callback, row=row, num_columns=num_columns) == match @@ -1160,7 +1103,6 @@ for row in data ] ) - return indices def filtered(self, callback, columns=None, **kwargs): """Returns a table with rows satisfying the provided callback function. @@ -1178,6 +1120,10 @@ Row data provided to callback is a 1D list if more than one column, single value (row[col]) otherwise. """ + # no point filtering if no rows, justv return self + if self.shape[0] == 0: + return self + if isinstance(columns, str): columns = (columns,) @@ -1225,6 +1171,10 @@ python code to be evaluated. """ + # no rows, value must be 0 + if self.shape[0] == 0: + return 0 + if isinstance(columns, str): columns = (columns,) @@ -1256,7 +1206,7 @@ data = subset[0].tolist() else: data = subset.array - data = list(tuple(e) for e in data) + data = [tuple(e) for e in data] return CategoryCounter(data=data) @@ -1281,9 +1231,10 @@ Notes ----- - All tables must have the same columns. + All tables must have the same columns. If a column dtype differs between tables, + dtype for that column in result is determined by numpy. """ - if new_column: + if new_column is not None: assert new_column not in self.columns, f"'{new_column}' already exists" # default title is no title kwargs["title"] = kwargs.get("title", "") @@ -1299,28 +1250,50 @@ new_col = [] table_series = (self,) + tables raw_data = defaultdict(list) + dtypes = defaultdict(set) for table in table_series: assert set(table.columns.order) == columns, "columns don't match" - if new_column: + if new_column is not None: new_col.extend([table.title] * table.shape[0]) + + for c in table.columns: + dtypes[c].add(table.columns[c].dtype) + data = table.columns.to_dict() for c, v in data.items(): raw_data[c].extend(v) - dtypes = {c: self.columns[c].dtype for c in self.columns} - if new_column: + if new_column is not None: columns = (new_column,) + self.columns.order raw_data[new_column] = new_col - dtypes[new_column] = "{c}
" for c in self.header] if self.index_name: body_colour = ["white"] * self.shape[0] index_colour = ["rgba(161, 195, 209, 0.5)"] * self.shape[0] colours = [index_colour] + [body_colour[:] for i in range(self.shape[1])] - rows[0] = [f"{e}" for e in rows[0]] + columns[self.index_name] = [f"{e}" for e in columns[self.index_name]] else: colours = "white" tab = UnionDict( type="table", header=dict( - values=[f"{c}" for c in header], + values=header, fill=dict(color="rgba(161, 195, 209, 1)"), font=dict(size=font_size), align="center", ), - cells=dict(values=rows, fill=dict(color=colours)), + cells=dict( + values=[columns[c] for c in self.header], fill=dict(color=colours) + ), ) + draw = Drawable() aspect_ratio = self.shape[0] / self.shape[1] layout = layout or {} @@ -2015,6 +2120,51 @@ draw.layout |= default_layout return draw + def to_categorical(self, columns=None): + """construct object that can be used for statistical tests + + Parameters + ---------- + columns + columns to include. These correspond to contingency column + labels. The row labels come from values under the index_name + column. Defaults to all columns. + + Returns + ------- + CategoryCounts, an object for performing statistical tests on + contingency tables. + + Notes + ----- + Only applies to cases where an index_name is defined. The selected columns + must be int types and represent the counts of corresponding categories. + """ + from cogent3.maths.stats.contingency import CategoryCounts + from cogent3.util.dict_array import DictArrayTemplate + + if self.index_name is None: + raise ValueError(f"requires index_name be set") + + columns = list(self.header) if columns is None else columns + + columns = [columns] if isinstance(columns, str) else columns + if not set(columns) <= set(self.header): + raise ValueError(f"unknown columns {columns}") + + if self.index_name in columns: + columns.remove(self.index_name) + row_cats = self.columns[self.index_name] + # must be convertible to int + for col in columns: + if "int" not in self.columns[col].dtype.name: + raise TypeError(f"{col} is not of int type") + + matrix = self.get_columns(columns, with_index=False).array.astype(int) + + data = DictArrayTemplate(row_cats, columns).wrap(matrix) + return CategoryCounts(data) + def transposed(self, new_column_name, select_as_header=None, **kwargs): """returns the transposed table. @@ -2036,6 +2186,9 @@ raise ValueError(f"not all '{select_as_header}' values unique") attr = self._get_persistent_attrs() + # on transpose, a row index_name becomes a column, so pop + del attr["index_name"] + attr |= kwargs result = self.__class__(**attr) @@ -2089,19 +2242,21 @@ mode = mode or {"pickle": "wb"}.get(format, "w") + if format == "json": + with atomic_write(filename, mode="wt") as f: + f.write(self.to_json()) + return + if compress: if not filename.endswith(".gz"): filename = "%s.gz" % filename mode = "wt" - outfile = open_(filename, mode) + outfile = atomic_write(filename, mode=mode) if format is None: # try guessing from filename suffix - if compress: - index = -2 - else: - index = -1 + index = -2 if compress else -1 suffix = filename.split(".") if len(suffix) > 1: format = suffix[index] @@ -2115,7 +2270,7 @@ rows = self.tolist() rows.insert(0, self.header[:]) rows = writer(rows, has_header=True) - outfile.writelines("\n".join(rows)) + outfile.write("\n".join(rows)) elif format == "pickle": data = self.__getstate__() pickle.dump(data, outfile, protocol=1) @@ -2129,5 +2284,6 @@ writer.writerow([self.legend]) else: table = self.to_string(format=format, sep=sep, **kwargs) - outfile.writelines(table + "\n") + outfile.write(table + "\n") + outfile.close() diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/util/transform.py python-cogent-2020.12.21a+dfsg/src/cogent3/util/transform.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/util/transform.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/util/transform.py 2020-12-20 23:35:03.000000000 +0000 @@ -17,7 +17,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Sandra Smit", "Rob Knight", "Zongzhi Liu"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Sandra Smit" __email__ = "sandra.smit@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/util/union_dict.py python-cogent-2020.12.21a+dfsg/src/cogent3/util/union_dict.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/util/union_dict.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/util/union_dict.py 2020-12-20 23:35:03.000000000 +0000 @@ -8,7 +8,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley", "Thomas La"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/util/unit_test.py python-cogent-2020.12.21a+dfsg/src/cogent3/util/unit_test.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/util/unit_test.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/util/unit_test.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,540 +0,0 @@ -#!/usr/bin/env python -"""Extension of the built-in unittest framework for floating-point comparisons. - -Specific Extensions: - -assertFloatEqual, assertFloatEqualAbs, and assertFloatEqualRel give fine- -grained control over how floating point numbers (or lists thereof) are tested -for equality. - -assertContains and assertNotContains give more helpful error -messages when testing whether an observed item is present or absent in a set -of possiblities. Ditto assertGreaterThan, assertLessThan, and assertIsProb. - -assertSameItems and assertEqualItems test the items in a list -for pairwise identity and equality respectively (i.e. the observed and -expected values must have the same number of each item, though the order can -differ). - -assertSimilarMeans and assertSimilarFreqs allow you to test stochastic results -by setting an explicit P-value and checking that the result is not improbable -given the expected P-value. Please use these instead of guessing confidence -intervals! The major advantage is that you can reset the P-value gloabally over -the whole test suite, so that rare failures don't occur every time. - -""" -from unittest import TestCase as orig_TestCase -from unittest import TestSuite, findTestCases, main - -import numpy - -from numpy import ( - array, - asarray, - isfinite, - logical_and, - logical_or, - ravel, - testing, - zeros, -) - -from cogent3.maths.stats.test import G_ind, t_two_sample -from cogent3.util.misc import recursive_flatten - - -__author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" -__credits__ = [ - "Rob Knight", - "Peter Maxwell", - "Sandra Smit", - "Zongzhi Liu", - "Micah Hamady", - "Daniel McDonald", -] -__license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" -__status__ = "Production" - -# SUPPORT2425 - - -class FakeRandom(object): - """Drop-in substitute for random.random that provides items from list.""" - - def __init__(self, data, circular=False): - """Returns new FakeRandom object, using list of items in data. - - circular: if True (default is False), wraps the list around. Otherwise, - raises IndexError when we run off the end of the list. - - WARNING: data must always be iterable, even if it's a single item. - """ - self._data = data - self._ptr = -1 - self._circular = circular - - def __call__(self, *args, **kwargs): - """Returns next item from the list in self._data. - - Raises IndexError when we run out of data. - """ - self._ptr += 1 - # wrap around if circular - if self._circular: - if self._ptr >= len(self._data): - self._ptr = 0 - return self._data[self._ptr] - - -class TestCase(orig_TestCase): - """Adds some additional utility methods to unittest.TestCase. - - Notably, adds facilities for dealing with floating point numbers, - and some common templates for replicated tests. - - BEWARE: Do not start any method with 'test' unless you want it to actually - run as a test suite in every instance! - """ - - _suite_pvalue = None # see TestCase._set_suite_pvalue() - - def _get_values_from_matching_dicts(self, d1, d2): - """Gets corresponding values from matching dicts""" - if set(d1) != set(d2): - return None - # might not be in same order - return list(d1.values()), [d2[k] for k in d1] - - def errorCheck(self, call, known_errors): - """Applies function to (data, error) tuples, checking for error - """ - for (data, error) in known_errors: - self.assertRaises(error, call, data) - - def valueCheck(self, call, known_values, arg_prefix="", eps=None): - """Applies function to (data, expected) tuples, treating data as args - """ - for (data, expected) in known_values: - observed = eval("call(" + arg_prefix + "data)") - try: - allowed_diff = float(eps) - except TypeError: - self.assertEqual(observed, expected) - else: - self.assertFloatEqual(observed, expected, allowed_diff) - - def assertFloatEqualRel(self, obs, exp, eps=1e-6): - """Tests whether two floating point numbers/arrays are approx. equal. - - Checks whether the distance is within epsilon relative to the value - of the sum of observed and expected. Use this method when you expect - the difference to be small relative to the magnitudes of the observed - and expected values. - - Note: for arbitrary objects, need to compare the specific attribute - that's numeric, not the whole object, using this method. - """ - # do array check first - # note that we can't use array ops to combine, because we need to check - # at each element whether the expected is zero to do the test to avoid - # floating point error. - # WARNING: numpy iterates over objects that are not regular Python - # floats/ints, so need to explicitly catch scalar values and prevent - # cast to array if we want the exact object to print out correctly. - is_array = False - if hasattr(obs, "keys") and hasattr(exp, "keys"): # both dicts? - result = self._get_values_from_matching_dicts(obs, exp) - if result: - obs, exp = result - else: - try: - iter(obs) - iter(exp) - except TypeError: - obs = [obs] - exp = [exp] - else: - try: - arr_obs = array(obs) - arr_exp = array(exp) - arr_diff = arr_obs - arr_exp - if arr_obs.shape != arr_exp.shape: - self.fail( - "Wrong shape: Got %s, but expected %s" - % (repr(obs), repr(exp)) - ) - obs = arr_obs.ravel() - exp = arr_exp.ravel() - is_array = True - except (TypeError, ValueError): - pass - - # shape mismatch can still get by... - # explict cast is to work around bug in certain versions of numpy - # installed version on osx 10.5 - if asarray(obs, object).shape != asarray(exp, object).shape: - self.fail("Wrong shape: Got %s, but expected %s" % (obs, exp)) - - for observed, expected in zip(obs, exp): - # try the cheap comparison first - if observed == expected: - continue - try: - sum = float(observed + expected) - diff = float(observed - expected) - if sum == 0: - if is_array: - self.assertFalse( - abs(diff) > abs(eps), - "Got %s, but expected %s (diff was %s)" - % (repr(arr_obs), repr(arr_exp), repr(arr_diff)), - ) - else: - self.assertFalse( - abs(diff) > abs(eps), - "Got %s, but expected %s (diff was %s)" - % (repr(observed), repr(expected), repr(diff)), - ) - - else: - if is_array: - self.assertFalse( - abs(diff / sum) > abs(eps), - "Got %s, but expected %s (diff was %s)" - % (repr(arr_obs), repr(arr_exp), repr(arr_diff)), - ) - else: - self.assertFalse( - abs(diff / sum) > abs(eps), - "Got %s, but expected %s (diff was %s)" - % (repr(observed), repr(expected), repr(diff)), - ) - except (TypeError, ValueError, AttributeError, NotImplementedError): - self.fail("Got %s, but expected %s" % (repr(observed), repr(expected))) - - def assertFloatEqualAbs(self, obs, exp, eps=1e-6): - """ - Tests whether two floating point numbers are approximately equal. - - Checks whether the absolute value of (a - b) is within epsilon. Use - this method when you expect that one of the values should be very - small, and the other should be zero. - """ - # do array check first - # note that we can't use array ops to combine, because we need to check - # at each element whether the expected is zero to do the test to avoid - # floating point error. - if hasattr(obs, "keys") and hasattr(exp, "keys"): # both dicts? - result = self._get_values_from_matching_dicts(obs, exp) - if result: - obs, exp = result - else: - try: - iter(obs) - iter(exp) - except TypeError: - obs = [obs] - exp = [exp] - else: - try: - arr_obs = array(obs) - arr_exp = array(exp) - if arr_obs.shape != arr_exp.shape: - self.fail( - "Wrong shape: Got %s, but expected %s" - % (repr(obs), repr(exp)) - ) - diff = arr_obs - arr_exp - self.assertFalse( - abs(diff).max() > eps, - "Got %s, but expected %s (diff was %s)" - % (repr(obs), repr(exp), repr(diff)), - ) - return - except (TypeError, ValueError): - pass - # only get here if array comparison failed - for observed, expected in zip(obs, exp): - # cheap comparison first - if observed == expected: - continue - try: - diff = observed - expected - self.assertFalse( - abs(diff) > abs(eps), - "Got %s, but expected %s (diff was %s)" - % (repr(observed), repr(expected), repr(diff)), - ) - except (TypeError, ValueError, AttributeError, NotImplementedError): - self.fail("Got %s, but expected %s" % (repr(observed), repr(expected))) - - def assertFloatEqual(self, obs, exp, eps=1e-6, rel_eps=None, abs_eps=None): - """Tests whether two floating point numbers are approximately equal. - - If one of the arguments is zero, tests the absolute magnitude of the - difference; otherwise, tests the relative magnitude. - - Use this method as a reasonable default. - """ - obs = numpy.asarray(obs, dtype="O") - exp = numpy.asarray(exp, dtype="O") - obs = numpy.ravel(obs) - exp = numpy.ravel(exp) - - if obs.shape != exp.shape: - self.fail("Shape mismatch. Got, %s but expected %s" % (obs, exp)) - - for observed, expected in zip(obs, exp): - if self._is_equal(observed, expected): - continue - try: - rel_eps = rel_eps or eps - abs_eps = abs_eps or eps - if (observed == 0) or (expected == 0): - self.assertFloatEqualAbs(observed, expected, abs_eps) - else: - self.assertFloatEqualRel(observed, expected, rel_eps) - except (TypeError, ValueError, AttributeError, NotImplementedError): - self.fail("Got %s, but expected %s" % (repr(observed), repr(expected))) - - def _is_equal(self, observed, expected): - """Returns True if observed and expected are equal, False otherwise.""" - # errors to catch: TypeError when obs is None - tolist_errors = (AttributeError, ValueError, TypeError) - - try: - obs = observed.tolist() - except tolist_errors: - obs = observed - try: - exp = expected.tolist() - except tolist_errors: - exp = expected - return obs == exp - - def failUnlessEqual(self, observed, expected, msg=None): - """Fail if the two objects are unequal as determined by != - - Overridden to make error message enforce order of observed, expected. - Use numpy.testing.assert_equal if ValueError, TypeError raised. - """ - try: - if not self._is_equal(observed, expected): - raise self.failureException( - msg or "Got %s, but expected %s" % (repr(observed), repr(expected)) - ) - except (ValueError, TypeError) as e: - # The truth value of an array with more than one element is - # ambiguous. Use a.any() or a.all() - # descriptor 'tolist' of 'numpy.generic' object needs an argument - testing.assert_equal(observed, expected) - - def failIfEqual(self, observed, expected, msg=None): - """Fail if the two objects are equal as determined by ==""" - try: - self.assertEqual(observed, expected) - except self.failureException: - pass - else: - raise self.failureException( - msg - or "Observed %s and expected %s: shouldn't test equal" - % (repr(observed), repr(expected)) - ) - - # following needed to get our version instead of unittest's - - assertEqual = assertEquals = failUnlessEqual - - assertNotEqual = assertNotEquals = failIfEqual - - def assertEqualItems(self, observed, expected, msg=None): - """Fail if the two items contain unequal elements""" - obs_items = list(observed) - exp_items = list(expected) - if len(obs_items) != len(exp_items): - raise self.failureException( - msg - or "Observed and expected are different lengths: %s and %s" - % (len(obs_items), len(exp_items)) - ) - - obs_items.sort() - exp_items.sort() - for index, (obs, exp) in enumerate(zip(obs_items, exp_items)): - if obs != exp: - raise self.failureException( - msg - or "Observed %s and expected %s at sorted index %s" - % (obs, exp, index) - ) - - def assertSameItems(self, observed, expected, msg=None): - """Fail if the two items contain non-identical elements""" - obs_items = list(observed) - exp_items = list(expected) - if len(obs_items) != len(exp_items): - raise self.failureException( - msg - or "Observed and expected are different lengths: %s and %s" - % (len(obs_items), len(exp_items)) - ) - - obs_ids = [(id(i), i) for i in obs_items] - exp_ids = [(id(i), i) for i in exp_items] - obs_ids.sort() - exp_ids.sort() - for index, (obs, exp) in enumerate(zip(obs_ids, exp_ids)): - o_id, o = obs - e_id, e = exp - if o_id != e_id: # i.e. the ids are different - raise self.failureException( - msg - or "Observed %s <%s> and expected %s <%s> at sorted index %s" - % (o, o_id, e, e_id, index) - ) - - def assertContains(self, observed, item, msg=None): - """Fail if item not in observed""" - try: - if item in observed: - return - except (TypeError, ValueError): - pass - raise self.failureException( - msg or "Item %s not found in %s" % (repr(item), repr(observed)) - ) - - def assertNotContains(self, observed, item, msg=None): - """Fail if item in observed""" - try: - if item not in observed: - return - except (TypeError, ValueError): - return - raise self.failureException( - msg or "Item %s should not have been in %s" % (repr(item), repr(observed)) - ) - - def assertGreaterThan(self, observed, value, msg=None): - """Fail if observed is <= value""" - try: - if value is None or observed is None: - raise ValueError - if (asarray(observed) > value).all(): - return - except: - pass - raise self.failureException( - msg or "Observed %s has elements <= %s" % (repr(observed), repr(value)) - ) - - def assertLessThan(self, observed, value, msg=None): - """Fail if observed is >= value""" - try: - if value is None or observed is None: - raise ValueError - if (asarray(observed) < value).all(): - return - except: - pass - raise self.failureException( - msg or "Observed %s has elements >= %s" % (repr(observed), repr(value)) - ) - - def assertIsProb(self, observed, msg=None): - """Fail is observed is not between 0.0 and 1.0""" - try: - if observed is None: - raise ValueError - if (asarray(observed) >= 0.0).all() and (asarray(observed) <= 1.0).all(): - return - except: - pass - raise self.failureException( - msg or "Observed %s has elements that are not probs" % (repr(observed)) - ) - - def _set_suite_pvalue(self, pvalue): - """Sets the test suite pvalue to be used in similarity tests - - This value is by default None. The pvalue used in this case is - specified in the test module itself. The purpose of this method is to - set the pvalue to be used when running a massive test suite - """ - self._suite_pvalue = pvalue - - def assertSimilarMeans(self, observed, expected, pvalue=0.01, msg=None): - """Fail if observed p is lower than pvalue""" - if self._suite_pvalue: - pvalue = self._suite_pvalue - - observed, expected = asarray(observed), asarray(expected) - - t, p = t_two_sample(observed, expected) - - # handle case where all elements were the same - if p is None or not isfinite(p): - if not observed.shape: - observed = observed.reshape((1,)) - if not expected.shape: - expected = expected.reshape((1,)) - if observed[0] == expected[0]: - return - elif p > pvalue: - return - else: - raise self.failureException( - msg or "p-value %s, t-test p %s" % (repr(pvalue), repr(p)) - ) - - def assertSimilarFreqs(self, observed, expected, pvalue=0.01, msg=None): - """Fail if observed p is lower than pvalue""" - if self._suite_pvalue: - pvalue = self._suite_pvalue - - obs_ravel = ravel(asarray(observed)) - exp_ravel = ravel(asarray(expected)) - - m = zeros((2, len(obs_ravel))) - m[0, :] = obs_ravel - m[1, :] = exp_ravel - - G, p = G_ind(m) - - if p > pvalue: - return - else: - raise self.failureException( - msg or "p-value %s, G-test p %s" % (repr(pvalue), repr(p)) - ) - - def assertSameObj(self, observed, expected, msg=None): - """Fail if 'observed is not expected'""" - try: - if observed is expected: - return - except: - pass - raise self.failureException( - msg - or "Observed %s is not the same as expected %s" - % (repr(observed), repr(expected)) - ) - - def assertNotSameObj(self, observed, expected, msg=None): - """Fail if 'observed is expected'""" - try: - if observed is not expected: - return - except: - pass - raise self.failureException( - msg - or "Observed %s is the same as expected %s" - % (repr(observed), repr(expected)) - ) diff -Nru python-cogent-2020.6.30a0+dfsg/src/cogent3/util/warning.py python-cogent-2020.12.21a+dfsg/src/cogent3/util/warning.py --- python-cogent-2020.6.30a0+dfsg/src/cogent3/util/warning.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/src/cogent3/util/warning.py 2020-12-20 23:35:03.000000000 +0000 @@ -7,7 +7,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley", "Jai Ram Rideout"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/benchmark_aligning.py python-cogent-2020.12.21a+dfsg/tests/benchmark_aligning.py --- python-cogent-2020.6.30a0+dfsg/tests/benchmark_aligning.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/benchmark_aligning.py 2020-12-20 23:35:03.000000000 +0000 @@ -12,7 +12,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/benchmark.py python-cogent-2020.12.21a+dfsg/tests/benchmark.py --- python-cogent-2020.6.30a0+dfsg/tests/benchmark.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/benchmark.py 2020-12-20 23:35:03.000000000 +0000 @@ -16,7 +16,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -71,7 +71,8 @@ def quiet(f, *args, **kw): - import sys, io + import io + import sys temp = io.StringIO() _stdout = sys.stdout diff -Nru python-cogent-2020.6.30a0+dfsg/tests/__init__.py python-cogent-2020.12.21a+dfsg/tests/__init__.py --- python-cogent-2020.6.30a0+dfsg/tests/__init__.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/__init__.py 2020-12-20 23:35:03.000000000 +0000 @@ -16,7 +16,7 @@ "Edward Lang", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_align/__init__.py python-cogent-2020.12.21a+dfsg/tests/test_align/__init__.py --- python-cogent-2020.6.30a0+dfsg/tests/test_align/__init__.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_align/__init__.py 2020-12-20 23:35:03.000000000 +0000 @@ -5,7 +5,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Jeremy Widmann", "Peter Maxwell", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Jeremy Widmann" __email__ = "jeremy.widmann@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_align/test_align.py python-cogent-2020.12.21a+dfsg/tests/test_align/test_align.py --- python-cogent-2020.6.30a0+dfsg/tests/test_align/test_align.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_align/test_align.py 2020-12-20 23:35:03.000000000 +0000 @@ -25,7 +25,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_app/test_align.py python-cogent-2020.12.21a+dfsg/tests/test_app/test_align.py --- python-cogent-2020.6.30a0+dfsg/tests/test_app/test_align.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_app/test_align.py 2020-12-20 23:35:03.000000000 +0000 @@ -10,7 +10,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -75,6 +75,12 @@ make_generic_scoring_dict(10, get_moltype(test_moltype)), ) + def test_align_to_ref_result_has_moltype(self): + """aligned object has correct moltype""" + aligner = align_app.align_to_ref(moltype="dna") + got = aligner(self.seqs) + self.assertEqual(got.moltype.label, "dna") + def test_progressive_align_protein_moltype(self): """tests guide_tree is None and moltype is protein""" from cogent3 import load_aligned_seqs diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_app/test_app_mpi.py python-cogent-2020.12.21a+dfsg/tests/test_app/test_app_mpi.py --- python-cogent-2020.6.30a0+dfsg/tests/test_app/test_app_mpi.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_app/test_app_mpi.py 2020-12-20 23:35:03.000000000 +0000 @@ -10,7 +10,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley", "Sheng Han Moses Koh"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_app/test_composable.py python-cogent-2020.12.21a+dfsg/tests/test_app/test_composable.py --- python-cogent-2020.6.30a0+dfsg/tests/test_app/test_composable.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_app/test_composable.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,4 +1,5 @@ import os +import pathlib from tempfile import TemporaryDirectory from unittest import TestCase, main @@ -17,7 +18,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -26,11 +27,11 @@ class TestCheckpoint(TestCase): def test_checkpointable(self): """chained funcs should be be able to apply a checkpoint""" - path = "data" + os.sep + "brca1.fasta" reader = io_app.load_aligned(moltype="dna") omit_degens = sample_app.omit_degenerates(moltype="dna") with TemporaryDirectory(dir=".") as dirname: writer = io_app.write_seqs(dirname) + path = "data" + os.sep + "brca1.fasta" aln = reader(path) outpath = writer(aln) @@ -45,7 +46,7 @@ self.assertTrue(len(got) > 1000) -ComposableSeq._input_types = ComposableSeq._output_types = set([None]) +ComposableSeq._input_types = ComposableSeq._output_types = {None} class TestComposableBase(TestCase): @@ -175,6 +176,25 @@ self.assertEqual(len(process.data_store.incomplete), 3) process.data_store.close() + def test_apply_to_not_partially_done(self): + """correctly applies process when result already partially done""" + dstore = io_app.get_data_store("data", suffix="fasta") + num_records = len(dstore) + with TemporaryDirectory(dir=".") as dirname: + dirname = pathlib.Path(dirname) + reader = io_app.load_aligned(format="fasta", moltype="dna") + outpath = dirname / "delme.tinydb" + writer = io_app.write_db(outpath) + _ = writer(reader(dstore[0])) + writer.data_store.close() + + writer = io_app.write_db(outpath, if_exists="ignore") + process = reader + writer + _ = process.apply_to(dstore, show_progress=False) + writer.data_store.close() + dstore = io_app.get_data_store(outpath) + self.assertEqual(len(dstore), num_records) + class TestNotCompletedResult(TestCase): def test_err_result(self): @@ -213,8 +233,8 @@ self.assertEqual( got, "select_translatable(type='sequences', " - "moltype='dna', gc='Standard Nuclear', " - "allow_rc=False, trim_terminal_stop=True)", + "moltype='dna', gc=1, " + "allow_rc=False,\ntrim_terminal_stop=True)", ) func = select_translatable(allow_rc=True) @@ -222,8 +242,8 @@ self.assertEqual( got, "select_translatable(type='sequences', " - "moltype='dna', gc='Standard Nuclear', " - "allow_rc=True, trim_terminal_stop=True)", + "moltype='dna', gc=1, " + "allow_rc=True,\ntrim_terminal_stop=True)", ) nodegen = omit_degenerates() @@ -231,14 +251,14 @@ self.assertEqual( got, "omit_degenerates(type='aligned', moltype=None, " - "gap_is_degen=True, motif_length=1)", + "gap_is_degen=True,\nmotif_length=1)", ) ml = min_length(100) got = str(ml) self.assertEqual( got, "min_length(type='sequences', length=100, " - "motif_length=1, subtract_degen=True, " + "motif_length=1, subtract_degen=True,\n" "moltype=None)", ) @@ -250,7 +270,8 @@ def test_composite_pickleable(self): """composable functions should be pickleable""" from pickle import dumps - from cogent3.app import io, sample, evo, tree, translate, align + + from cogent3.app import align, evo, io, sample, translate, tree read = io.load_aligned(moltype="dna") dumps(read) @@ -284,7 +305,7 @@ def test_triggers_bugcatcher(self): """a composable that does not trap failures returns NotCompletedResult requesting bug report""" - from cogent3.app import io, sample, evo, tree, translate, align + from cogent3.app import align, evo, io, sample, translate, tree read = io.load_aligned(moltype="dna") read.func = lambda x: None @@ -293,6 +314,10 @@ self.assertEqual(got.type, "BUG") +def _demo(ctx, expect): + return ctx.frame_start == expect + + class TestUserFunction(TestCase): def foo(self, val, *args, **kwargs): return val[:4] @@ -300,18 +325,14 @@ def bar(self, val, *args, **kwargs): return val.distance_matrix(calc="hamming", show_progress=False) - def _demo(self, ctx, expect): - self.assertEqual(ctx.frame_start, expect) - return expect - def test_user_function_custom_variables(self): + # not sure what this is meant to be testing demo = user_function( - self._demo, ("aligned", "serialisable"), ("aligned", "serialisable") + _demo, ("aligned", "serialisable"), ("aligned", "serialisable") ) - foo = demo frame_start = 2 - foo.frame_start = frame_start - foo(frame_start) + demo.frame_start = frame_start + self.assertTrue(demo(demo, 2)) def test_user_function(self): """composable functions should be user definable""" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_app/test_data_store.py python-cogent-2020.12.21a+dfsg/tests/test_app/test_data_store.py --- python-cogent-2020.6.30a0+dfsg/tests/test_app/test_data_store.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_app/test_data_store.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,3 +1,4 @@ +import json import os import shutil import sys @@ -16,6 +17,7 @@ WritableDirectoryDataStore, WritableTinyDbDataStore, WritableZippedDataStore, + load_record_from_json, ) from cogent3.parse.fasta import MinimalFastaParser @@ -24,7 +26,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -543,9 +545,10 @@ def test_unchanged_database_record(self): """tests unchanged record via the Readable and Writable DataStore interface to TinyDB""" - from cogent3.app.io import load_db from copy import deepcopy + from cogent3.app.io import load_db + loader = load_db() data = self.data original_record = deepcopy(data) @@ -642,9 +645,10 @@ def test_dblock(self): """locking/unlocking of db""" - from cogent3.app.data_store import _db_lockid from pathlib import Path + from cogent3.app.data_store import _db_lockid + keys = list(self.data) with TemporaryDirectory(dir=".") as dirname: path = os.path.join(dirname, self.basedir) @@ -710,5 +714,22 @@ self.assertEqual(got, expect) +class TestFunctions(TestCase): + """test support functions""" + + def test_load_record_from_json(self): + """handle different types of input""" + orig = {"data": "blah", "identifier": "some.json", "completed": True} + data = orig.copy() + data2 = data.copy() + data2["data"] = json.dumps(data) + for d in (data, json.dumps(data), data2): + expected = "blah" if d != data2 else json.loads(data2["data"]) + Id, data_, compl = load_record_from_json(d) + self.assertEqual(Id, "some.json") + self.assertEqual(data_, expected) + self.assertEqual(compl, True) + + if __name__ == "__main__": main() diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_app/test_dist.py python-cogent-2020.12.21a+dfsg/tests/test_app/test_dist.py --- python-cogent-2020.6.30a0+dfsg/tests/test_app/test_dist.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_app/test_dist.py 2020-12-20 23:35:03.000000000 +0000 @@ -15,7 +15,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley", "Stephen Ma"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -142,11 +142,11 @@ fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR") got = fast_slow_dist(aln3).to_dict() assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")]) - self.assertTrue(0 <= got[("Mouse", "Human")]) + self.assertTrue(got[("Mouse", "Human")] >= 0) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93") got = fast_slow_dist(aln3).to_dict() assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")]) - self.assertTrue(0 <= got[("Mouse", "Human")]) + self.assertTrue(got[("Mouse", "Human")] >= 0) aligner = align.align_to_ref(ref_seq="Human") aln3 = aligner(self.seqs3) @@ -156,43 +156,50 @@ fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93") got = fast_slow_dist(aln3).to_dict() assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")]) - self.assertTrue(0 <= got[("Mouse", "Human")]) + self.assertTrue(got[("Mouse", "Human")] >= 0) aligner = align.align_to_ref(ref_seq="Mouse") aln3 = aligner(self.seqs3) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR") got = fast_slow_dist(aln3).to_dict() - self.assertTrue(0 <= got[("Mouse", "Human")]) + self.assertTrue(got[("Mouse", "Human")] >= 0) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93") got = fast_slow_dist(aln3).to_dict() - self.assertTrue(0 <= got[("Mouse", "Human")]) + self.assertTrue(got[("Mouse", "Human")] >= 0) aligner = align.align_to_ref() aln3 = aligner(self.seqs4) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR") got = fast_slow_dist(aln3).to_dict() - self.assertTrue(0 <= got[("Human", "Opossum")]) + self.assertTrue(got[("Human", "Opossum")] >= 0) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93") got = fast_slow_dist(aln3).to_dict() - self.assertTrue(0 <= got[("Human", "Opossum")]) + self.assertTrue(got[("Human", "Opossum")] >= 0) aligner = align.align_to_ref(ref_seq="Human") aln3 = aligner(self.seqs4) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR") got = fast_slow_dist(aln3).to_dict() - self.assertTrue(0 <= got[("Human", "Opossum")]) + self.assertTrue(got[("Human", "Opossum")] >= 0) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93") got = fast_slow_dist(aln3).to_dict() - self.assertTrue(0 <= got[("Human", "Opossum")]) + self.assertTrue(got[("Human", "Opossum")] >= 0) aligner = align.align_to_ref(ref_seq="Opossum") aln3 = aligner(self.seqs4) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR") got = fast_slow_dist(aln3).to_dict() - self.assertTrue(0 <= got[("Human", "Opossum")]) + self.assertTrue(got[("Human", "Opossum")] >= 0) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93") got = fast_slow_dist(aln3).to_dict() - self.assertTrue(0 <= got[("Human", "Opossum")]) + self.assertTrue(got[("Human", "Opossum")] >= 0) + + # now as a process + proc = align.align_to_ref() + dist_app.fast_slow_dist( + fast_calc="hamming", moltype="dna" + ) + got = proc(self.seqs1) + self.assertEqual(got[("Human", "Rhesus")], 1) treestring = "(Human:0.2,Bandicoot:0.2)" aligner = align.progressive_align(model="WG01", guide_tree=treestring) diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_app/test_evo.py python-cogent-2020.12.21a+dfsg/tests/test_app/test_evo.py --- python-cogent-2020.6.30a0+dfsg/tests/test_app/test_evo.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_app/test_evo.py 2020-12-20 23:35:03.000000000 +0000 @@ -15,7 +15,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -34,9 +34,9 @@ got, ( "model(type='model', sm='HKY85', tree=None, " - "name=None, sm_args=None, lf_args=None, " + "name=None, sm_args=None,\nlf_args=None, " "time_het='max', param_rules=None, " - "opt_args=None, split_codons=False, " + "opt_args=None,\nsplit_codons=False, " "show_progress=False, verbose=False)" ), ) @@ -105,6 +105,8 @@ def test_model_hypothesis_result_repr(self): """result objects __repr__ and _repr_html_ methods work correctly""" + import re + _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", @@ -119,10 +121,25 @@ ) hyp = evo_app.hypothesis(model1, model2) result = hyp(aln) + # check the p-val formatted as %.4f + pval = str(result).splitlines()[4].split()[-1] + self.assertTrue(re.search(r"\d\.\d+", pval) is not None) self.assertIsInstance(result.__repr__(), str) self.assertIsInstance(result._repr_html_(), str) self.assertIsInstance(result.null.__repr__(), str) self.assertIsInstance(result.null._repr_html_(), str) + aln = load_aligned_seqs("data/primate_brca1.fasta", moltype="dna") + aln = aln.take_seqs(["Human", "Rhesus", "Galago"])[2::3].omit_gap_pos() + model1 = evo_app.model( + "F81", opt_args=dict(max_evaluations=25, limit_action="ignore") + ) + model2 = evo_app.model( + "HKY85", opt_args=dict(max_evaluations=100, limit_action="ignore") + ) + hyp = evo_app.hypothesis(model1, model2) + result = hyp(aln) + pval = str(result).splitlines()[4].split()[-1] + self.assertTrue(re.search(r"[0-9\.]+e-\d+", pval) is not None) def test_hypothesis_str(self): """correct str representation""" @@ -132,10 +149,10 @@ got = str(hyp) expect = ( "hypothesis(type='hypothesis', null='HKY85', " - "alternates=(model(type='model', sm='HKY85', tree=None, " - "name='hky85-max-het', sm_args=None, lf_args=None, " + "alternates=(model(type='model',\nsm='HKY85', tree=None, " + "name='hky85-max-het', sm_args=None, lf_args=None,\n" "time_het='max', param_rules=None, opt_args=None," - " split_codons=False, show_progress=False, verbose=False),)," + " split_codons=False,\nshow_progress=False, verbose=False),)," " init_alt=None)" ) self.assertEqual(got, expect) diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_app/test_init.py python-cogent-2020.12.21a+dfsg/tests/test_app/test_init.py --- python-cogent-2020.6.30a0+dfsg/tests/test_app/test_init.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_app/test_init.py 2020-12-20 23:35:03.000000000 +0000 @@ -5,14 +5,14 @@ from unittest import TestCase, main from cogent3 import available_apps -from cogent3.app import align, evo, io, sample, translate, tree +from cogent3.app import align, dist, evo, io, sample, translate, tree __author__ = "Gavin Huttley" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -27,6 +27,7 @@ applications = [ align.align_to_ref(), align.progressive_align(model="GY94"), + dist.fast_slow_dist(moltype="dna", fast_calc="hamming"), evo.ancestral_states(), evo.bootstrap(hyp=test_hyp, num_reps=test_num_reps), evo.hypothesis(test_model1, test_model2), diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_app/test_io.py python-cogent-2020.12.21a+dfsg/tests/test_app/test_io.py --- python-cogent-2020.6.30a0+dfsg/tests/test_app/test_io.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_app/test_io.py 2020-12-20 23:35:03.000000000 +0000 @@ -30,7 +30,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -106,6 +106,14 @@ fasta_loader = io_app.load_aligned(format="fasta") validate(fasta_paths, fasta_loader) + def test_load_aligned_nexus(self): + """should handle nexus too""" + nexus_paths = io_app.get_data_store(self.basedir, suffix="nex") + loader = io_app.load_aligned(format="nexus") + results = [loader(m) for m in nexus_paths] + for result in results: + self.assertIsInstance(result, ArrayAlignment) + def test_load_aligned_from_zip(self): """correctly loads aligned seqs from a zip archive""" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_app/test_result.py python-cogent-2020.12.21a+dfsg/tests/test_app/test_result.py --- python-cogent-2020.6.30a0+dfsg/tests/test_app/test_result.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_app/test_result.py 2020-12-20 23:35:03.000000000 +0000 @@ -14,7 +14,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -81,6 +81,24 @@ got = result.alignment self.assertEqual(got.to_dict(), _data) + def test_model_name_lf_name(self): + """model_result.name is set as lf.name""" + _data = { + "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", + "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", + "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", + } + aln = make_aligned_seqs(data=_data, moltype="dna") + mod = evo_app.model( + "F81", + name="blah", + show_progress=False, + opt_args=dict(max_evaluations=5, limit_action="ignore"), + ) + result = mod(aln) + self.assertEqual(result.name, result.lf.name) + print(result) + def test_model_result_alignment_split_pos_model(self): """returns alignment from lf with split codon positions""" _data = { diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_app/test_sample.py python-cogent-2020.12.21a+dfsg/tests/test_app/test_sample.py --- python-cogent-2020.6.30a0+dfsg/tests/test_app/test_sample.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_app/test_sample.py 2020-12-20 23:35:03.000000000 +0000 @@ -10,7 +10,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_app/test_translate.py python-cogent-2020.12.21a+dfsg/tests/test_app/test_translate.py --- python-cogent-2020.6.30a0+dfsg/tests/test_app/test_translate.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_app/test_translate.py 2020-12-20 23:35:03.000000000 +0000 @@ -16,7 +16,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_app/test_tree.py python-cogent-2020.12.21a+dfsg/tests/test_app/test_tree.py --- python-cogent-2020.6.30a0+dfsg/tests/test_app/test_tree.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_app/test_tree.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,24 +1,36 @@ +import json import os +from tempfile import TemporaryDirectory from unittest import TestCase, main -from cogent3 import DNA, load_aligned_seqs, make_aligned_seqs, make_tree +from cogent3 import ( + DNA, + load_aligned_seqs, + load_tree, + make_aligned_seqs, + make_tree, +) from cogent3.app import dist from cogent3.app import tree as tree_app from cogent3.app.composable import NotCompleted from cogent3.core.tree import PhyloNode from cogent3.evolve.fast_distance import DistanceMatrix +from cogent3.util.misc import get_object_provenance, open_ __author__ = "Gavin Huttley" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" +base_path = os.path.dirname(os.path.dirname(__file__)) +data_path = os.path.join(base_path, "data") + class TestTree(TestCase): def test_scale_tree_lengths(self): @@ -56,9 +68,7 @@ def test_quick_tree(self): """correctly calc a nj tree""" - path = os.path.join( - os.path.abspath(__file__).split("test_app")[0], "data/brca1_5.paml" - ) + path = os.path.join(data_path, "brca1_5.paml") aln = load_aligned_seqs(path, moltype=DNA) fast_slow_dist = dist.fast_slow_dist(fast_calc="hamming", moltype="dna") dist_matrix = fast_slow_dist(aln) @@ -68,16 +78,16 @@ def test_composable_apps(self): """checks the ability of these two apps(fast_slow_dist and quick_tree) to communicate""" - path = os.path.join( - os.path.abspath(__file__).split("test_app")[0], "data/brca1_5.paml" - ) + path = os.path.join(data_path, "brca1_5.paml") aln1 = load_aligned_seqs(path, moltype=DNA) fast_slow_dist = dist.fast_slow_dist(fast_calc="hamming", moltype="dna") quick = tree_app.quick_tree(drop_invalid=False) proc = fast_slow_dist + quick self.assertEqual( str(proc), - "fast_slow_dist(type='distance', distance=None, moltype='dna', fast_calc='hamming', slow_calc=None) + quick_tree(type='tree', drop_invalid=False)", + "fast_slow_dist(type='distance', distance=None, moltype='dna',\n" + "fast_calc='hamming', slow_calc=None) + quick_tree(type='tree',\n" + "drop_invalid=False)", ) self.assertIsInstance(proc, tree_app.quick_tree) self.assertEqual(proc._type, "tree") diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_cluster/__init__.py python-cogent-2020.12.21a+dfsg/tests/test_cluster/__init__.py --- python-cogent-2020.6.30a0+dfsg/tests/test_cluster/__init__.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_cluster/__init__.py 2020-12-20 23:35:03.000000000 +0000 @@ -5,7 +5,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Catherine Lozuopone", "Peter Maxwell", "Rob Knight", "Justin Kuczynski"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_cluster/test_UPGMA.py python-cogent-2020.12.21a+dfsg/tests/test_cluster/test_UPGMA.py --- python-cogent-2020.6.30a0+dfsg/tests/test_cluster/test_UPGMA.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_cluster/test_UPGMA.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,9 +1,11 @@ #!/usr/bin/env python from collections import defaultdict +from unittest import TestCase, main import numpy from numpy import array +from numpy.testing import assert_allclose from cogent3 import make_tree from cogent3.cluster.UPGMA import ( @@ -16,7 +18,6 @@ ) from cogent3.core.tree import PhyloNode from cogent3.util.dict_array import DictArray, DictArrayTemplate, convert2DDict -from cogent3.util.unit_test import TestCase, main Float = numpy.core.numerictypes.sctype2char(float) @@ -26,9 +27,9 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -88,8 +89,7 @@ ) def test_UPGMA_cluster(self): - """upgma works on pairwise distance dict - """ + """upgma works on pairwise distance dict""" pairwise_dist = self.pairwise_distances cluster = upgma(pairwise_dist) cluster = cluster.sorted() # so we can make a stable comparison @@ -99,19 +99,17 @@ self.assertTrue(cluster.same_topology(expect)) def test_find_smallest_index(self): - """find_smallest_index returns the index of smallest value in array - """ + """find_smallest_index returns the index of smallest value in array""" matrix = self.matrix index = find_smallest_index(matrix) self.assertEqual(index, (0, 1)) def test_condense_matrix(self): - """condense_array joins two rows and columns identified by indices - """ + """condense_array joins two rows and columns identified by indices""" matrix = self.matrix index = find_smallest_index(matrix) result = condense_matrix(matrix, index, 9999999999) - self.assertFloatEqual(result[0, 0], 5000000.0) + assert_allclose(result[0, 0], 5000000.0) self.assertEqual(result[1, 4], 9999999999) self.assertEqual(result[0, 1], 9999999999) self.assertEqual(result[0, 2], 4.5) @@ -121,8 +119,7 @@ self.assertEqual(result[4, 0], 22.5) def test_condense_node_order(self): - """condense_node_order condenses nodes in list based on index info - """ + """condense_node_order condenses nodes in list based on index info""" matrix = self.matrix index = find_smallest_index(matrix) node_order = self.node_order @@ -134,8 +131,7 @@ self.assertEqual(node_order[4].__str__(), "e;") def test_upgma_cluster(self): - """UPGMA_cluster clusters nodes based on info in a matrix with UPGMA - """ + """UPGMA_cluster clusters nodes based on info in a matrix with UPGMA""" matrix = self.matrix node_order = self.node_order large_number = 9999999999 @@ -145,8 +141,7 @@ ) def test_UPGMA_cluster_diag(self): - """UPGMA_cluster works when the diagonal has lowest values - """ + """UPGMA_cluster works when the diagonal has lowest values""" # test that checking the diagonal works matrix = self.matrix_zeros node_order = self.node_order @@ -157,8 +152,7 @@ ) def test_UPGMA_cluster_diag(self): - """UPGMA_cluster works when the diagonal has intermediate values - """ + """UPGMA_cluster works when the diagonal has intermediate values""" # test that checking the diagonal works matrix = self.matrix_five node_order = self.node_order @@ -180,8 +174,8 @@ matrix_array, PhyloNode_order = inputs_from_dict_array(matrix_d2d) self.assertEqual(PhyloNode_order[0].name, "1") self.assertEqual(PhyloNode_order[2].name, "3") - self.assertFloatEqual(matrix_array[0][2], 0.92) - self.assertFloatEqual(matrix_array[1][0], 0.86) + assert_allclose(matrix_array[0][2], 0.92) + assert_allclose(matrix_array[1][0], 0.86) # run if called from command line diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_core/__init__.py python-cogent-2020.12.21a+dfsg/tests/test_core/__init__.py --- python-cogent-2020.6.30a0+dfsg/tests/test_core/__init__.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_core/__init__.py 2020-12-20 23:35:03.000000000 +0000 @@ -29,7 +29,7 @@ "Justin Kuczynski", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_core/test_alignment.py python-cogent-2020.12.21a+dfsg/tests/test_core/test_alignment.py --- python-cogent-2020.6.30a0+dfsg/tests/test_core/test_alignment.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_core/test_alignment.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,17 +1,21 @@ #!/usr/bin/env python import json import os +import pathlib import re import sys import unittest +import warnings from os import remove -from tempfile import mktemp +from tempfile import TemporaryDirectory, mktemp +from unittest import TestCase, main import numpy +import pytest from numpy import arange, array, log2, nan, transpose -from numpy.testing import assert_allclose +from numpy.testing import assert_allclose, assert_equal from cogent3 import ( load_aligned_seqs, @@ -55,8 +59,7 @@ ) from cogent3.maths.util import safe_p_log_p from cogent3.parse.fasta import MinimalFastaParser -from cogent3.util.misc import get_object_provenance -from cogent3.util.unit_test import TestCase, main +from cogent3.util.misc import get_object_provenance, open_ __author__ = "Rob Knight" @@ -70,9 +73,9 @@ "Jan Kosinski", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -84,7 +87,7 @@ a = array([[0, 1, 2], [2, 1, 0]]) # three 2-char seqs obs_a, obs_labels = seqs_from_array(a) # note transposition - self.assertEqual(obs_a, [array([0, 2]), array([1, 1]), array([2, 0])]) + assert_equal(obs_a, [array([0, 2]), array([1, 1]), array([2, 0])]) self.assertEqual(obs_labels, None) def test_seqs_from_array_seqs(self): @@ -132,17 +135,17 @@ """aln_from_array should return same array, and successive indices.""" a = array([[0, 1, 2], [3, 4, 5]]) # three 2-char seqs obs_a, obs_labels = aln_from_array(a) - self.assertEqual(obs_a, transpose(a)) - self.assertEqual(obs_labels, None) + assert_equal(obs_a, transpose(a)) + assert_equal(obs_labels, None) def test_aln_from_array_seqs(self): """aln_from_array_seqs should initialize aln from sequence objects.""" s1 = ArraySequence("ACC", name="a", alphabet=RNA.alphabet) s2 = ArraySequence("GGU", name="b", alphabet=RNA.alphabet) obs_a, obs_labels = aln_from_array_seqs([s1, s2], alphabet=BYTES.alphabet) - self.assertEqual(obs_a, array([[2, 1, 1], [3, 3, 0]], "b")) + assert_equal(obs_a, array([[2, 1, 1], [3, 3, 0]], "b")) # seq -> numbers - self.assertEqual(obs_labels, ["a", "b"]) + assert_equal(obs_labels, ["a", "b"]) def test_aln_from_generic(self): """aln_from_generic should initialize aln from list of lists, etc.""" @@ -151,29 +154,29 @@ obs_a, obs_labels = aln_from_generic( [s1, s2], "b", alphabet=RNA.alphabet ) # specify array type - self.assertEqual(obs_a, array([[2, 2, 2], [3, 3, 3]], "b")) # str -> chars - self.assertEqual(obs_labels, [None, None]) + assert_equal(obs_a, array([[2, 2, 2], [3, 3, 3]], "b")) # str -> chars + assert_equal(obs_labels, [None, None]) def test_aln_from_fasta(self): """aln_from_fasta should initialize aln from fasta-format string""" s = ">aa\nAB\nC\n>bb\nDE\nF\n" obs_a, obs_labels = aln_from_fasta(s.splitlines()) - self.assertEqual(obs_a, array(["ABC", "DEF"], "c").view("B")) # seq -> numbers - self.assertEqual(obs_labels, ["aa", "bb"]) + assert_equal(obs_a, array(["ABC", "DEF"], "c").view("B")) # seq -> numbers + assert_equal(obs_labels, ["aa", "bb"]) def test_aln_from_array_aln(self): """aln_from_array_aln should initialize from existing alignment""" a = ArrayAlignment(array([[0, 1, 2], [3, 4, 5]]), conversion_f=aln_from_array) obs_a, obs_labels = aln_from_array_aln(a) - self.assertEqual(obs_a, a.seq_data) - self.assertEqual(obs_labels, a.names) + assert_equal(obs_a, a.seq_data) + assert_equal(obs_labels, a.names) def test_aln_from_collection(self): """aln_from_collection should initialize from existing alignment""" a = SequenceCollection(["AAA", "GGG"]) obs_a, obs_labels = aln_from_collection(a, alphabet=RNA.alphabet) - self.assertEqual(a.to_fasta(), ">seq_0\nAAA\n>seq_1\nGGG\n") - self.assertEqual(obs_a, array([[2, 2, 2], [3, 3, 3]])) + assert_equal(a.to_fasta(), ">seq_0\nAAA\n>seq_1\nGGG\n") + assert_equal(obs_a, array([[2, 2, 2], [3, 3, 3]])) def test_aln_from_empty(self): """aln_from_empty should always raise ValueError""" @@ -199,9 +202,6 @@ def setUp(self): """Define some standard SequenceCollection objects.""" - if type(self.Class) == ArrayAlignment: - pass - self.one_seq = self.Class({"a": "AAAAA"}) self.ragged_padded = self.Class({"a": "AAAAAA", "b": "AAA---", "c": "AAAA--"}) self.identical = self.Class({"a": "AAAA", "b": "AAAA"}) @@ -255,7 +255,7 @@ data = {"seq1": "ACGACGACG", "seq2": "ACGACGACG"} seqs = self.Class(data) copied = seqs.deepcopy(sliced=True) - self.assertEqual(seqs.to_rich_dict(), copied.to_rich_dict()) + assert_equal(seqs.to_rich_dict(), copied.to_rich_dict()) self.assertNotEqual(id(copied), id(seqs)) for name in seqs.names: self.assertNotEqual(id(copied.named_seqs[name]), copied.named_seqs[name]) @@ -283,9 +283,9 @@ x = self.Class(self.a) y = self.Class(self.b) z = self.Class(self.c) - self.assertEqual(x, exp) - self.assertEqual(z, exp) - self.assertEqual(y, exp) + assert_equal(x, exp) + assert_equal(z, exp) + assert_equal(y, exp) test_init_aln.__doc__ = Class.__name__ + test_init_aln.__doc__ @@ -331,7 +331,7 @@ self.assertEqual(a.names, ["seq_0", "seq_1", "seq_2"]) self.assertEqual(list(a.seqs), ["AAAAA", "BBBBB", "CCCCC"]) - def test_init_annotated_seq(self): + def test_init_seq_info(self): """SequenceCollection init from seqs w/ info should preserve data""" a = Sequence("AAA", name="a", info={"x": 3}) b = Sequence("CCC", name="b", info={"x": 4}) @@ -351,6 +351,18 @@ # ArrayAlignment is allowed to strip Info objects self.assertEqual([i.info.x for i in b.seqs], [5, 4, 3]) + def test_init_annotated_seqs(self): + """correctly construct from list with annotated seq""" + if self.Class == ArrayAlignment: + # this class cannot be annotated + return + seq = make_seq("GCCAGGGGGGAAAG-GGAGAA", name="seq1") + _ = seq.add_feature("exon", "name", [(4, 10)]) + coll = self.Class(data=[seq]) + got_seq = coll.get_seq("seq1") + ann = got_seq.annotations[0] + self.assertEqual(str(got_seq[ann]), "GGGGGG") + def test_init_pairs(self): """SequenceCollection init from list of (key,val) pairs should work correctly""" seqs = [["x", "XXX"], ["b", "BBB"], ["c", "CCC"]] @@ -451,8 +463,8 @@ self.assertEqual(seqs, ["AAAAAA", "AAA---", "AAAA--"]) seqs = list(self.ragged_padded.iter_seqs(seq_order=["b", "a", "a"])) self.assertEqual(seqs, ["AAA---", "AAAAAA", "AAAAAA"]) - self.assertSameObj(seqs[1], seqs[2]) - self.assertSameObj(seqs[0], self.ragged_padded.named_seqs["b"]) + self.assertIs(seqs[1], seqs[2]) + self.assertIs(seqs[0], self.ragged_padded.named_seqs["b"]) def test_Items(self): """SequenceCollection iter_selected should iterate over items in specified order.""" @@ -637,7 +649,7 @@ aln.named_seqs["a"], min_similarity=0.4, max_similarity=0.7 ) for seq in "cefg": - self.assertContains(result.named_seqs, seq) + self.assertIn(seq, result.named_seqs) self.assertEqual(result.named_seqs[seq], aln.named_seqs[seq]) self.assertEqual(len(result.named_seqs), 4) @@ -645,7 +657,7 @@ aln.named_seqs["a"], min_similarity=0.95, max_similarity=1 ) for seq in "a": - self.assertContains(result.named_seqs, seq) + self.assertIn(seq, result.named_seqs) self.assertEqual(result.named_seqs[seq], aln.named_seqs[seq]) self.assertEqual(len(result.named_seqs), 1) @@ -653,7 +665,7 @@ aln.named_seqs["a"], min_similarity=0.75, max_similarity=0.85 ) for seq in "bd": - self.assertContains(result.named_seqs, seq) + self.assertIn(seq, result.named_seqs) self.assertEqual(result.named_seqs[seq], aln.named_seqs[seq]) self.assertEqual(len(result.named_seqs), 2) @@ -670,7 +682,7 @@ aln.named_seqs["a"], min_similarity=0.5, transform=transform ) for seq in "abdfg": - self.assertContains(result.named_seqs, seq) + self.assertIn(seq, result.named_seqs) self.assertEqual(result.named_seqs[seq], aln.named_seqs[seq]) self.assertEqual(len(result.named_seqs), 5) @@ -681,7 +693,7 @@ aln.named_seqs["a"], min_similarity=0.5, transform=transform ) for seq in "abcde": - self.assertContains(result.named_seqs, seq) + self.assertIn(seq, result.named_seqs) self.assertEqual(result.named_seqs[seq], aln.named_seqs[seq]) self.assertEqual(len(result.named_seqs), 5) @@ -693,7 +705,7 @@ aln.named_seqs["a"], min_similarity=5, max_similarity=10, metric=metric ) for seq in "ef": - self.assertContains(result.named_seqs, seq) + self.assertIn(seq, result.named_seqs) self.assertEqual(result.named_seqs[seq], aln.named_seqs[seq]) self.assertEqual(len(result.named_seqs), 2) @@ -723,7 +735,7 @@ transform=null_transform, ) for seq in [0, 2]: - self.assertContains(result.named_seqs, seq) + self.assertIn(seq, result.named_seqs) self.assertEqual(result.named_seqs[seq], aln.named_seqs[seq]) self.assertEqual(len(result.named_seqs), 2) # repeat with higher similarity @@ -739,7 +751,7 @@ transform=null_transform, ) for seq in [0]: - self.assertContains(result.named_seqs, seq) + self.assertIn(seq, result.named_seqs) self.assertEqual(result.named_seqs[seq], aln.named_seqs[seq]) self.assertEqual(len(result.named_seqs), 1) # then, verify that the transform changes the results @@ -747,7 +759,7 @@ aln.named_seqs[0], min_similarity=0.5, metric=metric, transform=transform ) for seq in [0, 1, 2]: - self.assertContains(result.named_seqs, seq) + self.assertIn(seq, result.named_seqs) self.assertEqual(result.named_seqs[seq], aln.named_seqs[seq]) self.assertEqual(len(result.named_seqs), 3) @@ -755,7 +767,7 @@ aln.named_seqs[0], min_similarity=0.8, metric=metric, transform=transform ) for seq in [0, 1]: - self.assertContains(result.named_seqs, seq) + self.assertIn(seq, result.named_seqs) self.assertEqual(result.named_seqs[seq], aln.named_seqs[seq]) self.assertEqual(len(result.named_seqs), 2) @@ -1158,7 +1170,7 @@ self.assertEqual(self.gaps.omit_gap_seqs(5.0 / 7 + 0.01), self.gaps) self.assertEqual(self.gaps.omit_gap_seqs(0.99), self.gaps) # check new object creation - self.assertNotSameObj(self.gaps.omit_gap_seqs(0.99), self.gaps) + self.assertIsNot(self.gaps.omit_gap_seqs(0.99), self.gaps) self.assertTrue( isinstance(self.gaps.omit_gap_seqs(3.0 / 7), _SequenceCollectionBase) ) @@ -1180,7 +1192,7 @@ ) self.assertEqual(self.gaps_rna.omit_gap_seqs(5.0 / 7 + 0.01), self.gaps_rna) self.assertEqual(self.gaps_rna.omit_gap_seqs(0.99), self.gaps_rna) - self.assertNotSameObj(self.gaps_rna.omit_gap_seqs(0.99), self.gaps_rna) + self.assertIsNot(self.gaps_rna.omit_gap_seqs(0.99), self.gaps_rna) self.assertTrue( isinstance(self.gaps_rna.omit_gap_seqs(3.0 / 7), _SequenceCollectionBase) ) @@ -1199,7 +1211,7 @@ self.assertEqual(self.gaps.omit_gap_runs(6), self.gaps) self.assertEqual(self.gaps.omit_gap_runs(1000), self.gaps) # test new object creation - self.assertNotSameObj(self.gaps.omit_gap_runs(6), self.gaps) + self.assertIsNot(self.gaps.omit_gap_runs(6), self.gaps) self.assertTrue(isinstance(self.gaps.omit_gap_runs(6), _SequenceCollectionBase)) def test_consistent_gap_degen_handling(self): @@ -1323,7 +1335,7 @@ def test_apply_pssm(self): """should successfully produce pssm scores""" - from cogent3.parse import jaspar, cisbp + from cogent3.parse import cisbp, jaspar _, pwm = jaspar.read("data/sample.jaspar") data = { @@ -1388,39 +1400,83 @@ seqs = self.Class({"a": "AAAAA"}) seqs.set_repr_policy(num_seqs=None, num_pos=None) self.assertEqual( - seqs._repr_policy, dict(num_seqs=10, num_pos=60, ref_name="longest") + seqs._repr_policy, + dict(num_seqs=10, num_pos=60, ref_name="longest", wrap=60), ) def test_set_repr_policy_invalid_input(self): """repr_policy should remain unchanged""" seqs = self.Class({"a": "AAAAA"}) - try: - seqs.set_repr_policy(num_seqs="foo", num_pos=4.2) - self.fail("Inputs not detected as invalid") - except AssertionError: + invalid_args = ( + dict(num_seqs="foo", err=TypeError), + dict(num_pos=4.2, err=TypeError), + dict(ref_name="blah", err=ValueError), + dict(wrap=3.1, err=TypeError), + ) + for arg in invalid_args: + err = arg.pop("err") + with self.assertRaises(err): + seqs.set_repr_policy(**arg) self.assertEqual( - seqs._repr_policy, dict(num_seqs=10, num_pos=60, ref_name="longest") + seqs._repr_policy, + dict(num_seqs=10, num_pos=60, ref_name="longest", wrap=60), ) def test_set_repr_policy_valid_input(self): """repr_policy should be set to new values""" seqs = self.Class({"a": "AAAAA", "b": "AAA--"}) - seqs.set_repr_policy(num_seqs=5, num_pos=40, ref_name="a") - self.assertEqual(seqs._repr_policy, dict(num_seqs=5, num_pos=40, ref_name="a")) - # should persist in slicing + seqs.set_repr_policy(num_seqs=5, num_pos=40, ref_name="a", wrap=10) + self.assertEqual( + seqs._repr_policy, dict(num_seqs=5, num_pos=40, ref_name="a", wrap=10) + ) + if self.Class == SequenceCollection: + # this class cannot slice return True + # should persist in slicing self.assertEqual( - seqs[:2]._repr_policy, dict(num_seqs=5, num_pos=40, ref_name="a") + seqs[:2]._repr_policy, dict(num_seqs=5, num_pos=40, ref_name="a", wrap=10) ) + def test_set_wrap_affects_repr_html(self): + """the wrap argument affects the number of columns""" + if self.Class == SequenceCollection: + # this class does not have this method + return True + + # indirectly tested via counting number of occurrences of 'class="label"' + seqs = self.Class({"a": "AAAAA", "b": "AAA--"}) + orig = seqs._repr_html_() + seqs.set_repr_policy(wrap=3) # break alignment into 2 + got = seqs._repr_html_() + token = 'class="label"' + self.assertEqual(got.count(token), 2 * orig.count(token)) + + # using environment variable + env_name = "COGENT3_ALIGNMENT_REPR_POLICY" + os.environ[env_name] = "wrap=2" + seqs = self.Class({"a": "AAAAA", "b": "AAA--"}) + got = seqs._repr_html_() + self.assertEqual(got.count(token), 3 * orig.count(token)) + os.environ.pop(env_name, None) + def test_get_seq_entropy(self): """get_seq_entropy should get entropy of each seq""" a = self.Class(dict(a="ACCC", b="AGTA"), moltype=DNA) entropy = a.entropy_per_seq() e = 0.81127812445913283 # sum(p log_2 p) for p = 0.25, 0.75 - self.assertFloatEqual(entropy, array([e, 1.5])) + assert_allclose(entropy, array([e, 1.5])) + + def test_write_to_json(self): + # test writing to json file + aln = self.Class([("a", "AAAA"), ("b", "TTTT"), ("c", "CCCC")]) + with TemporaryDirectory(".") as dirname: + path = str(pathlib.Path(dirname) / "sample.json") + aln.write(path) + with open_(path) as fn: + got = json.loads(fn.read()) + self.assertEqual(got, aln.to_rich_dict()) class SequenceCollectionTests(SequenceCollectionBaseTests, TestCase): @@ -1454,8 +1510,8 @@ self.assertEqual(seqs, ["AAAAAA", "AAA", "AAAA"]) seqs = list(self.ragged.iter_seqs(seq_order=["b", "a", "a"])) self.assertEqual(seqs, ["AAA", "AAAAAA", "AAAAAA"]) - self.assertSameObj(seqs[1], seqs[2]) - self.assertSameObj(seqs[0], self.ragged.named_seqs["b"]) + self.assertIs(seqs[1], seqs[2]) + self.assertIs(seqs[0], self.ragged.named_seqs["b"]) def test_toPHYLIP_ragged(self): """SequenceCollection should refuse to convert ragged seqs to phylip""" @@ -1963,11 +2019,11 @@ """SequenceCollection.uncertainties should match hand-calculated values""" aln = self.Class(["ABC", "AXC"]) obs = aln.entropy_per_pos() - self.assertFloatEqual(obs, [0, 1, 0]) + assert_allclose(obs, [0, 1, 0]) # check what happens with only one input sequence aln = self.Class(["ABC"]) obs = aln.entropy_per_pos() - self.assertFloatEqual(obs, [0, 0, 0]) + assert_allclose(obs, [0, 0, 0]) def test_sample(self): """Alignment.sample should permute alignment by default""" @@ -2047,7 +2103,7 @@ got = aln.to_pretty(name_order=["seq1", "seq2", "seq3"]) self.assertEqual(got, "\n".join(expect)) - got = aln.to_pretty(name_order=["seq1", "seq2", "seq3"], interleave_len=4) + got = aln.to_pretty(name_order=["seq1", "seq2", "seq3"], wrap=4) expect = [ "seq1 ACGA", "seq2 -...", @@ -2059,6 +2115,16 @@ ] self.assertEqual(got, "\n".join(expect)) + def test_to_pretty_deprecation_warning(self): + """produce correct pretty print formatted text""" + seqs = {"seq1": "ACGAANGA", "seq2": "-CGAACGA", "seq3": "ATGAACGA"} + expect = ["seq1 ACGAANGA", "seq2 -....C..", "seq3 .T...C.."] + + aln = self.Class(data=seqs, moltype=DNA) + # should raise warning here + with self.assertWarns(DeprecationWarning): + aln.to_pretty(name_order=["seq1", "seq2", "seq3"], interleave_len=4) + def test_to_html(self): """produce correct html formatted text""" seqs = {"seq1": "ACG", "seq2": "-CT"} @@ -2066,7 +2132,7 @@ aln = self.Class(data=seqs, moltype=DNA) got = aln.to_html(ref_name="longest") # name_order=['seq1', 'seq2']) # ensure balanced tags are in the txt - for tag in ["", "", "", "", "
"]: + for tag in ["", "", "", "
"]: self.assertTrue(tag in got) ref_row = ( @@ -2103,6 +2169,16 @@ # order now changes self.assertTrue(got.find(ref_row) < got.find(other_row)) + def test_to_html_deprecation_warning(self): + """ should raise warning using wrap and not interleave_len""" + seqs = {"seq1": "ACG", "seq2": "-CT"} + + aln = self.Class(data=seqs, moltype=DNA) + # specify interleave_len in 2 cases, wrap specified and not specified + # both should raise warnings + with self.assertWarns(DeprecationWarning): + aln.to_html(ref_name="seq2", interleave_len=40) + def test_variable_positions(self): """correctly identify variable positions""" new_seqs = {"seq1": "ACGTACGT", "seq2": "ACCGACGT", "seq3": "ACGTACGT"} @@ -2331,18 +2407,18 @@ s4 = DNA.make_seq("G-ACCC", name="s4") aln = self.Class([s1, s2, s3], moltype=DNA) obs = aln.counts_per_pos() - self.assertEqual(obs.array, exp) - self.assertEqual(obs.motifs, tuple(DNA.alphabet)) + assert_equal(obs.array, exp) + assert_equal(obs.motifs, tuple(DNA.alphabet)) obs = aln.counts_per_pos(motif_length=2) - self.assertEqual(obs[0, "TC"], 1) - self.assertEqual(obs[1, "AC"], 1) - self.assertEqual(obs[2, "AC"], 1) + assert_equal(obs[0, "TC"], 1) + assert_equal(obs[1, "AC"], 1) + assert_equal(obs[2, "AC"], 1) aln = self.Class([s1, s2, s4], moltype=DNA) obs = aln.counts_per_pos(allow_gap=True) - self.assertEqual(obs.array, exp_gap) + assert_equal(obs.array, exp_gap) aln = self.Class(["-RAT", "ACCT", "GTGT"], moltype="dna") c = aln.counts_per_pos(include_ambiguity=False, allow_gap=True) - self.assertEqual(set(c.motifs), set("ACGT-")) + assert_equal(set(c.motifs), set("ACGT-")) def test_counts_per_seq_default_moltype(self): """produce correct counts per seq with default moltypes""" @@ -2385,7 +2461,7 @@ a = self.Class(seqs, alphabet=AB.alphabet) entropy = a.entropy_per_seq() e = 0.81127812445913283 # sum(p log_2 p) for p = 0.25, 0.75 - self.assertFloatEqual(entropy, array([1, 0, e])) + assert_allclose(entropy, array([1, 0, e])) def test_seq_entropy_just_gaps(self): """ArrayAlignment get_seq_entropy should get entropy of each seq""" @@ -2410,7 +2486,7 @@ a = self.Class(dict(a="ACAGGG", b="AGACCC", c="GGCCTA"), moltype=DNA) entropy_excluded = a.entropy_per_seq(exclude_unobserved=True) entropy_unexcluded = a.entropy_per_seq(exclude_unobserved=False) - self.assertEqual(entropy_excluded, entropy_unexcluded) + assert_allclose(entropy_excluded, entropy_unexcluded) def test_distance_matrix(self): """Alignment distance_matrix should produce correct scores""" @@ -2461,39 +2537,39 @@ aln = self.Class(data=data, moltype=DNA) # per position got = aln.count_gaps_per_pos(include_ambiguity=False) - self.assertEqual(got.array, [0, 0, 0, 1, 2, 1, 1, 0, 0, 0]) + assert_equal(got.array, [0, 0, 0, 1, 2, 1, 1, 0, 0, 0]) got = aln.count_gaps_per_pos(include_ambiguity=True) - self.assertEqual(got.array, [0, 0, 0, 1, 2, 1, 1, 1, 0, 0]) + assert_equal(got.array, [0, 0, 0, 1, 2, 1, 1, 1, 0, 0]) def test_count_gaps_per_seq(self): """correctly compute the number of gaps""" data = {"a": "AAAA---GGT", "b": "CCC--GG?GT"} aln = self.Class(data=data, moltype=DNA) got = aln.count_gaps_per_seq(include_ambiguity=False) - self.assertEqual(got.array, [3, 2]) - self.assertEqual(got["b"], 2) + assert_equal(got.array, [3, 2]) + assert_equal(got["b"], 2) got = aln.count_gaps_per_seq(include_ambiguity=True) - self.assertEqual(got.array, [3, 3]) - self.assertEqual(got["b"], 3) + assert_equal(got.array, [3, 3]) + assert_equal(got["b"], 3) # per seq, unique got = aln.count_gaps_per_seq(include_ambiguity=False, unique=True) - self.assertEqual(got.array, [1, 2]) + assert_equal(got.array, [1, 2]) got = aln.count_gaps_per_seq(include_ambiguity=True, unique=True) - self.assertEqual(got.array, [2, 2]) + assert_equal(got.array, [2, 2]) data = {"a": "AAAGGG", "b": "------", "c": "------"} aln = self.Class(data=data, moltype=DNA) got = aln.count_gaps_per_seq(include_ambiguity=False, unique=True) - self.assertEqual(got.array, [6, 0, 0]) - self.assertEqual(got["a"], 6) - self.assertEqual(got["b"], 0) + assert_equal(got.array, [6, 0, 0]) + assert_equal(got["a"], 6) + assert_equal(got["b"], 0) # per_seq, induced_by data = {"a": "--ACGT---GTAC", "b": "--ACGTA--GT--", "c": "--ACGTA-AGT--"} aln = self.Class(data=data, moltype=DNA) got = aln.count_gaps_per_seq(unique=False, induced_by=True) - self.assertEqual(got.array, [2, 1, 2]) - self.assertEqual(got["b"], 1) + assert_equal(got.array, [2, 1, 2]) + assert_equal(got["b"], 1) def test_coevolution(self): """correctly produces matrix of coevo measures""" @@ -2541,6 +2617,21 @@ aln.set_repr_policy(num_seqs=5, num_pos=40, ref_name="b") got = aln._repr_html_() self.assertTrue(got.find(row_a) > got.find(row_b)) + # tests repr policy has been successfully applied + aln = load_aligned_seqs("data/brca1.fasta", moltype="dna") + aln.set_repr_policy(num_seqs=2) + got = aln._repr_html_() + self.assertEqual(got.count(""), 3) + aln.set_repr_policy(num_seqs=3) + got = aln._repr_html_() + self.assertEqual(got.count(""), 4) + aln.set_repr_policy(num_seqs=len(aln.seqs)) + got = aln._repr_html_() + self.assertEqual(got.count(""), len(aln.seqs) + 1) + # tests _repr_html_ displays correct number of sequences + aln = load_aligned_seqs("data/brca1.fasta", moltype="dna") + got = aln._repr_html_() + self.assertIn("%d x %d" % (aln.num_seqs, aln.seq_len), got.splitlines()[-2]) def test_seqlogo(self): """exercise producing a seq logo""" @@ -2622,7 +2713,7 @@ ) # if sliced, seq data should be < orig - self.assertLessThan( + self.assertLess( len(sliced.named_seqs[name].data), len(orig.named_seqs[name].data) ) # and map.parent_length and len(data) should match @@ -3007,9 +3098,9 @@ def test_init(self): """ArrayAlignment init should work from a sequence""" a = ArrayAlignment(array([[0, 1, 2], [3, 4, 5]]), conversion_f=aln_from_array) - self.assertEqual(a.seq_data, array([[0, 3], [1, 4], [2, 5]], "B")) - self.assertEqual(a.array_positions, array([[0, 1, 2], [3, 4, 5]], "B")) - self.assertEqual(a.names, ["seq_0", "seq_1", "seq_2"]) + assert_equal(a.seq_data, array([[0, 3], [1, 4], [2, 5]], "B")) + assert_equal(a.array_positions, array([[0, 1, 2], [3, 4, 5]], "B")) + assert_equal(a.names, ["seq_0", "seq_1", "seq_2"]) def test_guess_input_type(self): """ArrayAlignment _guess_input_type should figure out data type correctly""" @@ -3032,27 +3123,27 @@ """ArrayAlignment init should work from ArraySequence objects.""" s = list(map(ArraySequence, ["abc", "def"])) a = ArrayAlignment(s) - self.assertEqual(a.seq_data, array(["abc", "def"], "c").view("B")) + assert_equal(a.seq_data, array(["abc", "def"], "c").view("B")) def test_init_generic(self): """ArrayAlignment init should work from generic objects.""" s = ["abc", "def"] a = ArrayAlignment(s) - self.assertEqual(a.seq_data, array(["abc", "def"], "c").view("B")) + assert_equal(a.seq_data, array(["abc", "def"], "c").view("B")) def test_init_aln(self): """ArrayAlignment init should work from another alignment.""" s = ["abc", "def"] a = ArrayAlignment(s) b = ArrayAlignment(a) - self.assertNotSameObj(a.seq_data, b.seq_data) - self.assertEqual(b.seq_data, array(["abc", "def"], "c").view("B")) + self.assertIsNot(a.seq_data, b.seq_data) + assert_equal(b.seq_data, array(["abc", "def"], "c").view("B")) def test_init_dict(self): """ArrayAlignment init should work from dict.""" s = {"abc": "AAACCC", "xyz": "GCGCGC"} a = ArrayAlignment(s, names=["abc", "xyz"]) - self.assertEqual(a.seq_data, array(["AAACCC", "GCGCGC"], "c").view("B")) + assert_equal(a.seq_data, array(["AAACCC", "GCGCGC"], "c").view("B")) self.assertEqual(tuple(a.names), ("abc", "xyz")) def test_init_empty(self): @@ -3066,31 +3157,31 @@ s2 = RNA.make_seq("AA") d = ArrayAlignment(s1) - self.assertSameObj(d.moltype, BYTES) - self.assertSameObj(d.alphabet, BYTES.alphabet) + self.assertIs(d.moltype, BYTES) + self.assertIs(d.alphabet, BYTES.alphabet) d = ArrayAlignment(s1, moltype=RNA) - self.assertSameObj(d.moltype, RNA) - self.assertSameObj(d.alphabet, RNA.alphabets.degen_gapped) + self.assertIs(d.moltype, RNA) + self.assertIs(d.alphabet, RNA.alphabets.degen_gapped) d = ArrayAlignment(s1, alphabet=RNA.alphabet) - self.assertSameObj(d.moltype, RNA) - self.assertSameObj(d.alphabet, RNA.alphabet) + self.assertIs(d.moltype, RNA) + self.assertIs(d.alphabet, RNA.alphabet) d = ArrayAlignment(s2) - self.assertSameObj(d.moltype, RNA) - self.assertSameObj(d.alphabet, RNA.alphabets.degen_gapped) + self.assertIs(d.moltype, RNA) + self.assertIs(d.alphabet, RNA.alphabets.degen_gapped) d = ArrayAlignment(s2, moltype=DNA) - self.assertSameObj(d.moltype, DNA) - self.assertSameObj(d.alphabet, DNA.alphabets.degen_gapped) + self.assertIs(d.moltype, DNA) + self.assertIs(d.alphabet, DNA.alphabets.degen_gapped) # checks for containers d = ArrayAlignment([s2]) - self.assertSameObj(d.moltype, RNA) + self.assertIs(d.moltype, RNA) d = ArrayAlignment({"x": s2}) - self.assertSameObj(d.moltype, RNA) + self.assertIs(d.moltype, RNA) d = ArrayAlignment(set([s2])) - self.assertSameObj(d.moltype, RNA) + self.assertIs(d.moltype, RNA) def test_iter(self): """ArrayAlignment iter should iterate over positions""" @@ -3159,26 +3250,26 @@ """ArrayAlignment counts_per_seq should return motif counts each seq""" a = self.a f = a.counts_per_seq() - self.assertEqual(f.array, array([[3, 1], [1, 3]])) + assert_equal(f.array, array([[3, 1], [1, 3]])) f = a.counts_per_seq(motif_length=2, exclude_unobserved=True) - self.assertEqual(f.array, array([[1, 1, 0], [0, 1, 1]])) + assert_equal(f.array, array([[1, 1, 0], [0, 1, 1]])) def test_entropy_per_pos(self): """entropy_per_pos should get entropy of each pos""" a = self.a f = a.entropy_per_pos() e = array([0, 0, 1, 1]) - self.assertEqual(f, e) + assert_allclose(f, e) f = a.entropy_per_pos(motif_length=2) e = array([0, 1]) - self.assertEqual(f, e) + assert_allclose(f, e) seqs = [] for s in ["-GAT", "ACCT", "GAGT"]: seqs.append(make_seq(s, moltype="dna")) a = ArrayAlignment(seqs) f = a.entropy_per_pos(allow_gap=True) e = array([1.584962500721156, 1.584962500721156, 1.584962500721156, 0]) - self.assertEqual(f, e) + assert_allclose(f, e) seqs = [] for s in ["-RAT", "ACCT", "GTGT"]: diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_core/test_alphabet.py python-cogent-2020.12.21a+dfsg/tests/test_core/test_alphabet.py --- python-cogent-2020.6.30a0+dfsg/tests/test_core/test_alphabet.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_core/test_alphabet.py 2020-12-20 23:35:03.000000000 +0000 @@ -5,6 +5,10 @@ """ import pickle +from unittest import TestCase, main + +from numpy.testing import assert_equal + from cogent3.core.alphabet import ( CharAlphabet, Enumeration, @@ -18,7 +22,6 @@ uint32, ) from cogent3.core.moltype import RNA -from cogent3.util.unit_test import TestCase, main DnaBases = CharAlphabet("TCAG") @@ -29,9 +32,9 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Rob Knight", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -190,20 +193,20 @@ """Enumeration counts should count freqs in array""" a = DnaBases f = array([[0, 0, 1, 0, 0, 3]]) - self.assertEqual(a.counts(f), array([4, 1, 0, 1])) + assert_equal(a.counts(f), array([4, 1, 0, 1])) # check that it works with byte array f = array([[0, 0, 1, 0, 0, 3]], "B") - self.assertEqual(a.counts(f), array([4, 1, 0, 1])) + assert_equal(a.counts(f), array([4, 1, 0, 1])) # should ignore out-of-bounds items g = [0, 4] - self.assertEqual(a.counts(g), array([1, 0, 0, 0])) + assert_equal(a.counts(g), array([1, 0, 0, 0])) # make sure it works for long sequences, i.e. no wraparound at 255 h = [0, 3] * 70000 - self.assertEqual(a.counts(h), array([70000, 0, 0, 70000])) + assert_equal(a.counts(h), array([70000, 0, 0, 70000])) h2 = array(h).astype("B") - self.assertEqual(a.counts(h2), array([70000, 0, 0, 70000])) + assert_equal(a.counts(h2), array([70000, 0, 0, 70000])) i = array([0, 3] * 75000) - self.assertEqual(a.counts(i), array([75000, 0, 0, 75000])) + assert_equal(a.counts(i), array([75000, 0, 0, 75000])) # make sure it works for long _binary_ sequences, e.g. the results # of array comparisons. a = array([0, 1, 2, 3] * 10000) @@ -233,7 +236,7 @@ def test_from_string(self): """CharAlphabet from_string should return correct array""" r = CharAlphabet("UCAG") - self.assertEqual(r.from_string("UUCUGA"), array([0, 0, 1, 0, 3, 2], "B")) + assert_equal(r.from_string("UUCUGA"), array([0, 0, 1, 0, 3, 2], "B")) def test_is_valid(self): """CharAlphabet is_valid should return True for valid sequence""" @@ -250,13 +253,13 @@ """CharAlphabet from_array should return correct array""" r = CharAlphabet("UCAG") got = r.from_array(array(["UUC", "UGA"], "c")) - self.assertEqual(got, array([[0, 0, 1], [0, 3, 2]], "B")) + assert_equal(got, array([[0, 0, 1], [0, 3, 2]], "B")) def test_to_chars(self): """CharAlphabet to_chars should convert an input array to chars""" r = CharAlphabet("UCAG") c = r.to_chars(array([[0, 0, 1], [0, 3, 2]], "B")) - self.assertEqual(c, array(["UUC", "UGA"], "c")) + assert_equal(c, array(["UUC", "UGA"], "c")) def test_to_string(self): """CharAlphabet to_string should convert an input array to string""" @@ -275,7 +278,7 @@ rp = r.pairs self.assertEqual(len(rp), 16) rp2 = r.pairs - self.assertSameObj(rp, rp2) + self.assertIs(rp, rp2) def test_triples(self): """triples should cache the same object.""" @@ -283,7 +286,7 @@ rt = r.Triples self.assertEqual(len(rt), 64) rt2 = r.Triples - self.assertSameObj(rt, rt2) + self.assertIs(rt, rt2) class JointEnumerationTests(TestCase): @@ -297,14 +300,14 @@ self.assertEqual(a.shape, (4, 4)) self.assertEqual(a[0], ("T", "U")) self.assertEqual(a[-1], ("G", "G")) - self.assertEqual(a._sub_enum_factors, array([[4], [1]])) + assert_equal(a._sub_enum_factors, array([[4], [1]])) # should work for arbitrary sequences a = JointEnumeration(["TCAG", "UCAG"]) self.assertEqual(len(a), 16) self.assertEqual(a[0], ("T", "U")) self.assertEqual(a[-1], ("G", "G")) - self.assertEqual(a._sub_enum_factors, array([[4], [1]])) + assert_equal(a._sub_enum_factors, array([[4], [1]])) # should work for different length sequences a = JointEnumeration(["TCA", "UCAG"]) @@ -312,7 +315,7 @@ self.assertEqual(len(a), 12) self.assertEqual(a[0], ("T", "U")) self.assertEqual(a[-1], ("A", "G")) - self.assertEqual(a._sub_enum_factors, array([[4], [1]])) # note: _not_ [3,1] + assert_equal(a._sub_enum_factors, array([[4], [1]])) # note: _not_ [3,1] def test_to_indices(self): """JointEnumeration to_indices should convert tuples correctly""" @@ -331,14 +334,14 @@ a = JointEnumeration(["xyz", "abcd", "ef"]) v = [[0, 1, 2, 0], [3, 3, 1, 0], [1, 1, 0, 0]] result = a.pack_arrays(v) - self.assertEqual(result, array([7, 15, 18, 0])) + assert_equal(result, array([7, 15, 18, 0])) def test_unpack_arrays(self): """JointEnumeration unpack_arrays should return correct arrays.""" a = JointEnumeration(["xyz", "abcd", "ef"]) v = [7, 15, 18, 0] result = a.unpack_arrays(v) - self.assertEqual(result, array([[0, 1, 2, 0], [3, 3, 1, 0], [1, 1, 0, 0]])) + assert_equal(result, array([[0, 1, 2, 0], [3, 3, 1, 0], [1, 1, 0, 0]])) if __name__ == "__main__": diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_core/test_annotation.py python-cogent-2020.12.21a+dfsg/tests/test_core/test_annotation.py --- python-cogent-2020.6.30a0+dfsg/tests/test_core/test_annotation.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_core/test_annotation.py 2020-12-20 23:35:03.000000000 +0000 @@ -12,7 +12,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_core/test_core_standalone.py python-cogent-2020.12.21a+dfsg/tests/test_core/test_core_standalone.py --- python-cogent-2020.6.30a0+dfsg/tests/test_core/test_core_standalone.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_core/test_core_standalone.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,10 +1,13 @@ #!/usr/bin/env python +import json import os import pathlib import re import tempfile import unittest +from tempfile import TemporaryDirectory + from cogent3 import DNA, PROTEIN, RNA from cogent3 import STANDARD_CODON as CODON from cogent3 import ( @@ -15,6 +18,7 @@ make_seq, make_unaligned_seqs, ) +from cogent3.app.data_store import make_record_for_json from cogent3.core.alignment import ( Alignment, ArrayAlignment, @@ -28,7 +32,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -101,6 +105,11 @@ self.assertTrue("Human" in got.to_dict()) self.assertEqual(got.info["source"], path) + def test_load_unaligned_seqs_no_format(self): + """test loading unaligned from file""" + with self.assertRaises(ValueError): + got = load_unaligned_seqs("somepath") + def test_load_aligned_seqs(self): """test loading aligned from file""" path = os.path.join(data_path, "brca1_5.paml") @@ -114,6 +123,89 @@ self.assertEqual(got.moltype.label, "dna") self.assertIsInstance(got, Alignment) + def test_load_aligned_seqs_no_format(self): + """test loading unaligned from file""" + with self.assertRaises(ValueError): + got = load_aligned_seqs("somepath") + + def test_load_unaligned_seqs_from_json(self): + """test loading an unaligned object from json file""" + with TemporaryDirectory(dir=".") as dirname: + json_path = os.path.join(dirname, "unaligned.json") + path = os.path.join(data_path, "brca1_5.paml") + unaligned = load_unaligned_seqs(path) + unaligned.write(json_path) + + got = load_unaligned_seqs(json_path) + self.assertIsInstance(got, SequenceCollection) + self.assertEqual(got.to_dict(), unaligned.to_dict()) + self.assertEqual(got.info["source"], path) + # tests json generated by make_record_for_json + uncompleted_record = make_record_for_json("delme", got, False) + uncompleted_record_path = os.path.join(dirname, "uncompleted_record.json") + completed_record = make_record_for_json("delme", got, True) + completed_record_path = os.path.join(dirname, "completed_record.json") + with open(uncompleted_record_path, "w") as out: + out.write(json.dumps(uncompleted_record)) + with open(completed_record_path, "w") as out: + out.write(json.dumps(completed_record)) + # tests when provided record json file is uncompleted + with self.assertRaises(TypeError): + load_unaligned_seqs(uncompleted_record_path) + # tests when provided record json is completed + got = load_unaligned_seqs(completed_record_path) + self.assertIsInstance(got, SequenceCollection) + self.assertEqual(got.to_dict(), unaligned.to_dict()) + self.assertEqual(got.info["source"], path) + + def test_load_aligned_seqs_from_json(self): + """tests loading an aligned object from json file""" + with TemporaryDirectory(dir=".") as dirname: + path = os.path.join(data_path, "brca1_5.paml") + alignment = load_aligned_seqs(path, array_align=False, moltype="dna") + alignment_json_path = os.path.join(dirname, "alignment.json") + alignment.write(alignment_json_path) + array_alignment = load_aligned_seqs(path, moltype="dna") + array_alignment_json_path = os.path.join(dirname, "array_alignment.json") + array_alignment.write(array_alignment_json_path) + # tests case Alignment + got = load_aligned_seqs(alignment_json_path) + self.assertIsInstance(got, Alignment) + self.assertEqual(got.moltype.label, "dna") + self.assertEqual(got.to_dict(), alignment.to_dict()) + self.assertEqual(got.info["source"], path) + # tests case ArrayAlignment + got = load_aligned_seqs(array_alignment_json_path) + self.assertIsInstance(got, ArrayAlignment) + self.assertEqual(got.moltype.label, "dna") + self.assertEqual(got.to_dict(), array_alignment.to_dict()) + self.assertEqual(got.info["source"], path) + + # tests json generated by make_record_for_json + uncompleted_record = make_record_for_json("delme", got, False) + completed_record = make_record_for_json("delme", got, True) + uncompleted_record_path = os.path.join(dirname, "uncompleted_record.json") + completed_record_path = os.path.join(dirname, "completed_record.json") + with open(uncompleted_record_path, "w") as out: + out.write(json.dumps(uncompleted_record)) + with open(completed_record_path, "w") as out: + out.write(json.dumps(completed_record)) + # tests when provided record json file is uncompleted + with self.assertRaises(TypeError): + load_unaligned_seqs(uncompleted_record_path) + # tests when provided record json is completed + got = load_aligned_seqs(completed_record_path) + self.assertIsInstance(got, ArrayAlignment) + self.assertEqual(got.to_dict(), array_alignment.to_dict()) + self.assertEqual(got.info["source"], path) + # tests wrong input json file + json_path = os.path.join(dirname, "unaligned.json") + path = os.path.join(data_path, "brca1_5.paml") + unaligned = load_unaligned_seqs(path) + unaligned.write(json_path) + with self.assertRaises(TypeError): + load_aligned_seqs(json_path) + class ReadingWritingFileFormats(unittest.TestCase): """Testing ability to read file formats.""" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_core/test_features.py python-cogent-2020.12.21a+dfsg/tests/test_core/test_features.py --- python-cogent-2020.6.30a0+dfsg/tests/test_core/test_features.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_core/test_features.py 2020-12-20 23:35:03.000000000 +0000 @@ -10,7 +10,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -725,8 +725,7 @@ ) def test_annotate_matches_to(self): - """annotate_matches_to attaches annotations correctly to a Sequence - """ + """annotate_matches_to attaches annotations correctly to a Sequence""" seq = DNA.make_seq("TTCCACTTCCGCTT", name="x") pattern = "CCRC" annot = seq.annotate_matches_to( diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_core/test_genetic_code.py python-cogent-2020.12.21a+dfsg/tests/test_core/test_genetic_code.py --- python-cogent-2020.6.30a0+dfsg/tests/test_core/test_genetic_code.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_core/test_genetic_code.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,6 +1,8 @@ #!/usr/bin/env python """ Unit tests for Genetic Code classes. """ +from unittest import TestCase, main + from cogent3 import DNA, RNA from cogent3.core.genetic_code import ( DEFAULT, @@ -11,14 +13,13 @@ available_codes, get_code, ) -from cogent3.util.unit_test import TestCase, main __author__ = "Greg Caporaso" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Greg Caporaso", "Rob Knight", "Peter Maxwell", "Thomas La"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Greg Caporaso" __email__ = "caporaso@colorado.edu" __status__ = "Production" @@ -75,8 +76,8 @@ self.assertEqual(sgc.is_stop("UAA"), True) self.assertEqual(sgc.is_stop("AAA"), False) self.assertEqual(len(sgc.sense_codons), 61) - self.assertContains(sgc.sense_codons, "AAA") - self.assertNotContains(sgc.sense_codons, "TGA") + self.assertIn("AAA", sgc.sense_codons) + self.assertNotIn("TGA", sgc.sense_codons) def test_standard_code_lookup(self): """GeneticCodes should hold codes keyed by id as string and number""" @@ -357,7 +358,7 @@ obs_synonyms = GeneticCode(self.SGC).synonyms # note that the lists will be arbitrary-order for i in expected_synonyms: - self.assertEqualItems(obs_synonyms[i], expected_synonyms[i]) + self.assertCountEqual(obs_synonyms[i], expected_synonyms[i]) def test_get_code(self): """correctly return the genetic code""" @@ -387,6 +388,7 @@ def test_to_regex(self): """creates a regex from aa seq to match a DNA sequence""" import re + from cogent3 import make_seq dna = "ACCGAACAGGGC" @@ -405,8 +407,11 @@ """exercising the _repr_html_ method""" gc = get_code(1) got = gc._repr_html_().strip() - self.assertTrue(got.startswith("")) - self.assertTrue(got.endswith("
")) + self.assertTrue( + '
' in got or '
' in got + ) + self.assertTrue("" in got) + self.assertTrue("
" in got) self.assertIn("Standard Nuclear", got) diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_core/test_info.py python-cogent-2020.12.21a+dfsg/tests/test_core/test_info.py --- python-cogent-2020.6.30a0+dfsg/tests/test_core/test_info.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_core/test_info.py 2020-12-20 23:35:03.000000000 +0000 @@ -3,17 +3,18 @@ """ import warnings +from unittest import TestCase, main + from cogent3.core.info import DbRef, DbRefs, Info, _make_list -from cogent3.util.unit_test import TestCase, main __author__ = "Rob Knight" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -61,10 +62,10 @@ def test_cmp(self): """DbRef cmp should first try numeric, then alphabetic, cmp.""" - self.assertLessThan(DbRef("abc"), DbRef("xyz")) + self.assertLess(DbRef("abc"), DbRef("xyz")) self.assertEqual(DbRef("abc"), DbRef("abc")) - self.assertGreaterThan(DbRef("123"), DbRef("14")) - self.assertLessThan(DbRef("123"), DbRef("abc")) + self.assertGreater(DbRef("123"), DbRef("14")) + self.assertLess(DbRef("123"), DbRef("abc")) # check that it ignores other attributes self.assertEqual(DbRef("x", "y", "z", "a", "b"), DbRef("x")) @@ -103,7 +104,7 @@ """Info empty init should work as expected""" d = Info() self.assertEqual(len(d), 1) - self.assertContains(d, "Refs") + self.assertIn("Refs", d) self.assertEqual(d.Refs, DbRefs()) self.assertTrue(isinstance(d.Refs, DbRefs)) @@ -124,27 +125,27 @@ raise Exception("Failed to prevent deletion of required key Refs" "") d.GenBank = ("qaz", "wsx") self.assertEqual(d.GenBank, ["qaz", "wsx"]) - self.assertContains(d.Refs, "GenBank") - self.assertContains(d, "GenBank") + self.assertIn("GenBank", d.Refs) + self.assertIn("GenBank", d) d.GenBank = "xyz" self.assertEqual(d.GenBank, ["xyz"]) - self.assertSameObj(d.GenBank, d.Refs.GenBank) + self.assertIs(d.GenBank, d.Refs.GenBank) d.GO = "x" self.assertEqual(d.GO, ["x"]) d.GO.append("y") self.assertEqual(d.GO, ["x", "y"]) d.ZZZ = "zzz" self.assertEqual(d.ZZZ, "zzz") - self.assertNotContains(d.Refs, "ZZZ") - self.assertNotContains(d, "XXX") + self.assertNotIn("ZZZ", d.Refs) + self.assertNotIn("XXX", d) self.assertEqual(d.XXX, None) def test_identity(self): """Info should get its own new Refs when created""" i = Info() j = Info() - self.assertNotSameObj(i, j) - self.assertNotSameObj(i.Refs, j.Refs) + self.assertIsNot(i, j) + self.assertIsNot(i.Refs, j.Refs) def test_update(self): """update should warn the user of overlapping keys""" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_core/test_location.py python-cogent-2020.12.21a+dfsg/tests/test_core/test_location.py --- python-cogent-2020.6.30a0+dfsg/tests/test_core/test_location.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_core/test_location.py 2020-12-20 23:35:03.000000000 +0000 @@ -2,17 +2,18 @@ """Unit tests for Range, Span and Point classes. """ -from cogent3.core.location import Map, Point, Range, RangeFromString, Span -from cogent3.util.unit_test import TestCase, main +from unittest import TestCase, main + +from cogent3.core.location import Map, Range, RangeFromString, Span __author__ = "Rob Knight" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -54,15 +55,15 @@ def test_contains(self): """Span object contains its start but not its end""" - self.assertNotContains(self.empty, 0) - self.assertContains(self.full, 30) - self.assertContains(self.full, 34) - self.assertNotContains(self.full, 35) - self.assertContains(self.full, self.inside) - self.assertNotContains(self.full, self.overlapping) - self.assertContains(self.spans_zero, 0) - self.assertContains(self.spans_zero, -5) - self.assertNotContains(self.spans_zero, 5) + self.assertNotIn(0, self.empty) + self.assertIn(30, self.full) + self.assertIn(34, self.full) + self.assertNotIn(35, self.full) + self.assertIn(self.inside, self.full) + self.assertNotIn(self.overlapping, self.full) + self.assertIn(0, self.spans_zero) + self.assertIn(-5, self.spans_zero) + self.assertNotIn(5, self.spans_zero) def test_overlaps(self): """Span objects should be able to overlap points or spans""" @@ -128,10 +129,10 @@ second = [r, o, f, s, e, i, n] second.sort() for i, j in zip(first, second): - self.assertSameObj(i, j) + self.assertIs(i, j) for i, j in zip(first, expected_order): - self.assertSameObj(i, j) + self.assertIs(i, j) def test_sort(self): """Span should support sort by 1st/2nd index and direction""" @@ -151,7 +152,7 @@ first.sort() for i, j in zip(first, expected_order): - self.assertSameObj(i, j) + self.assertIs(i, j) def test_starts_before(self): """Span starts_before should match hand-calculated results""" @@ -380,38 +381,38 @@ def test_contains(self): """Range contains an item if any span contains it""" - self.assertContains(self.one, 50) - self.assertContains(self.one, 0) - self.assertContains(self.one, 99) - self.assertNotContains(self.one, 100) - self.assertContains(self.three, 6) - self.assertNotContains(self.three, 7) - self.assertNotContains(self.three, 8) - self.assertNotContains(self.three, 14) - self.assertContains(self.three, 15) - self.assertNotContains(self.three, 29) - self.assertContains(self.three, 30) - self.assertContains(self.three, 34) - self.assertNotContains(self.three, 35) - self.assertNotContains(self.three, 40) + self.assertIn(50, self.one) + self.assertIn(0, self.one) + self.assertIn(99, self.one) + self.assertNotIn(100, self.one) + self.assertIn(6, self.three) + self.assertNotIn(7, self.three) + self.assertNotIn(8, self.three) + self.assertNotIn(14, self.three) + self.assertIn(15, self.three) + self.assertNotIn(29, self.three) + self.assertIn(30, self.three) + self.assertIn(34, self.three) + self.assertNotIn(35, self.three) + self.assertNotIn(40, self.three) # should work if a span is added self.three.spans.append(40) - self.assertContains(self.three, 40) + self.assertIn(40, self.three) # should work for spans - self.assertContains(self.three, Span(31, 33)) - self.assertNotContains(self.three, Span(31, 37)) + self.assertIn(Span(31, 33), self.three) + self.assertNotIn(Span(31, 37), self.three) # span contains itself - self.assertContains(self.two, self.twocopy) + self.assertIn(self.twocopy, self.two) # should work for ranges - self.assertContains(self.three, Range([6, Span(15, 16), Span(30, 33)])) + self.assertIn(Range([6, Span(15, 16), Span(30, 33)]), self.three) # should work for copy, except when extra piece added threecopy = Range(self.three) - self.assertContains(self.three, threecopy) + self.assertIn(threecopy, self.three) threecopy.spans.append(1000) - self.assertNotContains(self.three, threecopy) + self.assertNotIn(threecopy, self.three) self.three.spans.append(Span(950, 1050)) - self.assertContains(self.three, threecopy) - self.assertNotContains(threecopy, self.three) + self.assertIn(threecopy, self.three) + self.assertNotIn(self.three, threecopy) def test_overlaps(self): """Range overlaps should return true if any component overlapping""" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_core/test_maps.py python-cogent-2020.12.21a+dfsg/tests/test_core/test_maps.py --- python-cogent-2020.6.30a0+dfsg/tests/test_core/test_maps.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_core/test_maps.py 2020-12-20 23:35:03.000000000 +0000 @@ -11,7 +11,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Rob Knight", "Matthew Wakefield"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_core/test_moltype.py python-cogent-2020.12.21a+dfsg/tests/test_core/test_moltype.py --- python-cogent-2020.6.30a0+dfsg/tests/test_core/test_moltype.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_core/test_moltype.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,7 +1,9 @@ #!/usr/bin/env python import pickle -from cogent3.core import moltype, sequence +from unittest import TestCase, main + +from cogent3.core import sequence from cogent3.core.moltype import ( DNA, PROTEIN, @@ -20,26 +22,27 @@ IUPAC_RNA_chars, MolType, RnaStandardPairs, - array, available_moltypes, get_moltype, make_matches, make_pairs, ) -from cogent3.data.molecular_weight import DnaMW, ProteinMW, RnaMW -from cogent3.util.unit_test import TestCase, main +from cogent3.data.molecular_weight import DnaMW, RnaMW __author__ = "Gavin Huttley, Peter Maxwell, and Rob Knight" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight", "Gavin Huttley", "Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" # ind some of the standard alphabets to reduce typing +from numpy.testing import assert_allclose + + RnaBases = RNA.alphabets.base DnaBases = DNA.alphabets.base AminoAcids = PROTEIN.alphabets.base @@ -116,7 +119,7 @@ ("n", "n"): False, }, ) - self.assertNotContains(m, ("x", "z")) + self.assertNotIn(("x", "z"), m) def test_init_all(self): """make_matches with everything should produce correct dict""" @@ -172,12 +175,12 @@ def test_init_pairs(self): """make_pairs with just pairs should equal the original""" self.assertEqual(make_pairs(self.pairs), self.pairs) - self.assertNotSameObj(make_pairs(self.pairs), self.pairs) + self.assertIsNot(make_pairs(self.pairs), self.pairs) def test_init_monomers(self): """make_pairs with pairs and monomers should equal just the pairs""" self.assertEqual(make_pairs(self.pairs, "ABCDEFG"), self.pairs) - self.assertNotSameObj(make_pairs(self.pairs, "ABCDEFG"), self.pairs) + self.assertIsNot(make_pairs(self.pairs, "ABCDEFG"), self.pairs) def test_init_gaps(self): """make_pairs should add all combinations of gaps as weak pairs""" @@ -229,15 +232,15 @@ base = o("base") c = CoreObjectGroup(base) - self.assertSameObj(c.base, base) - self.assertSameObj(c.degen, None) - self.assertSameObj(c.base.degen, None) + self.assertIs(c.base, base) + self.assertIs(c.degen, None) + self.assertIs(c.base.degen, None) base, degen, gap, degengap = list(map(o, ["base", "degen", "gap", "degengap"])) c = CoreObjectGroup(base, degen, gap, degengap) - self.assertSameObj(c.base, base) - self.assertSameObj(c.base.degen, degen) - self.assertSameObj(c.degen.gapped, degengap) + self.assertIs(c.base, base) + self.assertIs(c.base.degen, degen) + self.assertIs(c.degen.gapped, degengap) class AlphabetGroupTests(TestCase): @@ -291,15 +294,17 @@ self.assertEqual(available.shape, (7, 3)) self.assertEqual(available[1, "Number of states"], 4) self.assertEqual(available["dna", "Number of states"], 4) + txt = repr(available) + self.assertIn("'dna'", txt) def test_init_minimal(self): """MolType should init OK with just monomers""" a = MolType("Abc") - self.assertContains(a.alphabet, "A") - self.assertNotContains(a.alphabet, "a") # case-sensitive - self.assertContains(a.alphabet, "b") - self.assertNotContains(a.alphabet, "B") - self.assertNotContains(a.alphabet, "x") + self.assertIn("A", a.alphabet) + self.assertNotIn("a", a.alphabet) # case-sensitive + self.assertIn("b", a.alphabet) + self.assertNotIn("B", a.alphabet) + self.assertNotIn("x", a.alphabet) def test_init_everything(self): """MolType should init OK with all parameters set""" @@ -313,7 +318,7 @@ add_lower=False, ) for i in "Abcd~": - self.assertContains(a, i) + self.assertIn(i, a) self.assertEqual(a.complement("b"), "c") self.assertEqual(a.complement("AbcAA"), "AcbAA") self.assertEqual(a.first_degenerate("AbcdA"), 3) @@ -370,19 +375,19 @@ def test_contains(self): """MolType contains should return correct result""" for i in "UCAGWSMKRYBDHVN-" + "UCAGWSMKRYBDHVN-".lower(): - self.assertContains(RnaMolType, i) + self.assertIn(i, RnaMolType) for i in "x!@#$%^&ZzQq": - self.assertNotContains(RnaMolType, i) + self.assertNotIn(i, RnaMolType) a = MolType(dict.fromkeys("ABC"), add_lower=True) for i in "abcABC": - self.assertContains(a, i) - self.assertNotContains(a, "x") + self.assertIn(i, a) + self.assertNotIn("x", a) b = MolType(dict.fromkeys("ABC"), add_lower=False) for i in "ABC": - self.assertContains(b, i) + self.assertIn(i, b) for i in "abc": - self.assertNotContains(b, i) + self.assertNotIn(i, b) def test_iter(self): """MolType iter should iterate over monomer order""" @@ -503,13 +508,13 @@ u = d(s, "random") for i, j in zip(s, t): if i in RnaMolType.degenerates: - self.assertContains(RnaMolType.degenerates[i], j) + self.assertIn(j, RnaMolType.degenerates[i]) else: self.assertEqual(i, j) self.assertNotEqual(t, u) self.assertEqual(d(tuple("UCAG"), "random"), tuple("UCAG")) self.assertEqual(len(s), len(t)) - self.assertSameObj(RnaMolType.first_degenerate(t), None) + self.assertIs(RnaMolType.first_degenerate(t), None) # should raise exception on unknown disambiguation method self.assertRaises(NotImplementedError, d, s, "xyz") @@ -630,11 +635,11 @@ p = ProteinMolType.mw self.assertEqual(p(""), 0) self.assertEqual(r(""), 0) - self.assertFloatEqual(p("A"), 89.09) - self.assertFloatEqual(r("A"), 375.17) - self.assertFloatEqual(p("AAA"), 231.27) - self.assertFloatEqual(r("AAA"), 1001.59) - self.assertFloatEqual(r("AAACCCA"), 2182.37) + assert_allclose(p("A"), 89.09) + assert_allclose(r("A"), 375.17) + assert_allclose(p("AAA"), 231.27) + assert_allclose(r("AAA"), 1001.59) + assert_allclose(r("AAACCCA"), 2182.37) def test_can_match(self): """MolType can_match should return True if all positions can match""" @@ -745,10 +750,10 @@ """RnaMolType should __contain__ the expected symbols.""" keys = "ucagrymkwsbhvdn?-" for k in keys: - self.assertContains(RnaMolType, k) + self.assertIn(k, RnaMolType) for k in keys.upper(): - self.assertContains(RnaMolType, k) - self.assertNotContains(RnaMolType, "X") + self.assertIn(k, RnaMolType) + self.assertNotIn("X", RnaMolType) def test_degenerate_from_seq(self): """RnaMolType degenerate_from_seq should give correct results""" @@ -792,7 +797,7 @@ expect = {b + "_dna" for b in states} self.assertEqual(got, expect) for state in expect: - self.assertTrue(state in css) + self.assertTrue(state in "\n".join(css)) # check subset of protein css, styles = PROTEIN.get_css_style() @@ -801,6 +806,13 @@ expect = {b + "_protein" for b in states} self.assertEqual(got, expect) + def test_get_css_no_label(self): + """should not fail when moltype has no label""" + dna = get_moltype("dna") + orig_label, dna.label = dna.label, None + _ = dna.get_css_style() + dna.label = orig_label + class _AlphabetTestCase(TestCase): def assertEqualSeqs(self, a, b): diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_core/test_profile.py python-cogent-2020.12.21a+dfsg/tests/test_core/test_profile.py --- python-cogent-2020.6.30a0+dfsg/tests/test_core/test_profile.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_core/test_profile.py 2020-12-20 23:35:03.000000000 +0000 @@ -2,19 +2,19 @@ """Provides tests for classes and functions in profile.py """ from collections import Counter +from unittest import TestCase, main from numpy import array, log2, nan, vstack from numpy.testing import assert_allclose from cogent3.core.profile import PSSM, MotifCountsArray, MotifFreqsArray -from cogent3.util.unit_test import TestCase, main __author__ = "Gavin Huttley" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Sandra Smit", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -293,8 +293,66 @@ expected = [0.25, -1] assert_allclose(rel_entropy, expected) + def test_pairwise_jsd(self): + """correctly constructs pairwise JSD dict""" + from numpy.random import random + + from cogent3.maths.measure import jsd + + data = [[0.25, 0.25, 0.25, 0.25], [0.5, 0.5, 0, 0]] + expect = jsd(data[0], data[1]) + freqs = MotifFreqsArray(array(data), "ACGT") + got = freqs.pairwise_jsd() + assert_allclose(list(got.values())[0], expect) + + data = [] + for _ in range(6): + freqs = random(4) + freqs = freqs / freqs.sum() + data.append(freqs) + + freqs = MotifFreqsArray(array(data), "ACGT") + pwise = freqs.pairwise_jsd() + self.assertEqual(len(pwise), 6 * 6 - 6) + + def test_pairwise_jsm(self): + """correctly constructs pairwise JS metric dict""" + from numpy.random import random + + from cogent3.maths.measure import jsm + + data = [[0.25, 0.25, 0.25, 0.25], [0.5, 0.5, 0, 0]] + expect = jsm(data[0], data[1]) + freqs = MotifFreqsArray(array(data), "ACGT") + got = freqs.pairwise_jsm() + assert_allclose(list(got.values())[0], expect) + + data = [] + for _ in range(6): + freqs = random(4) + freqs = freqs / freqs.sum() + data.append(freqs) + + freqs = MotifFreqsArray(array(data), "ACGT") + pwise = freqs.pairwise_jsm() + self.assertEqual(len(pwise), 6 * 6 - 6) + + def test_pairwise_(self): + """returns None when single row""" + # ndim=1 + data = [0.25, 0.25, 0.25, 0.25] + freqs = MotifFreqsArray(array(data), "ACGT") + got = freqs.pairwise_jsm() + self.assertEqual(got, None) + + # ndim=2 + data = array([[0.25, 0.25, 0.25, 0.25]]) + freqs = MotifFreqsArray(data, "ACGT") + got = freqs.pairwise_jsm() + self.assertEqual(got, None) + def test_information(self): - """calculates entr0pies correctly""" + """calculates entropies correctly""" data = [[0.25, 0.25, 0.25, 0.25], [0.5, 0.5, 0, 0]] got = MotifFreqsArray(array(data), "ABCD") entropy = got.information() @@ -307,7 +365,7 @@ pos1 = Counter() pos2 = Counter() num = 1000 - for i in range(num): + for _ in range(num): seq = farr.simulate_seq() self.assertEqual(len(seq), 2) pos1[seq[0]] += 1 diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_core/test_seq_aln_integration.py python-cogent-2020.12.21a+dfsg/tests/test_core/test_seq_aln_integration.py --- python-cogent-2020.6.30a0+dfsg/tests/test_core/test_seq_aln_integration.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_core/test_seq_aln_integration.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,23 +1,26 @@ #!/usr/bin/env python +from unittest import TestCase, main + from numpy import alltrue, array, transpose from cogent3.core.alignment import Alignment, ArrayAlignment from cogent3.core.moltype import RNA -from cogent3.core.sequence import ArraySequence, RnaSequence, Sequence -from cogent3.util.unit_test import TestCase, main +from cogent3.core.sequence import ArraySequence, RnaSequence __author__ = "Sandra Smit" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Sandra Smit", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Sandra Smit" __email__ = "sandra.smit@colorado.edu" __status__ = "Production" +from numpy.testing import assert_equal + class AllTests(TestCase): def setUp(self): @@ -68,8 +71,7 @@ self.assertEqual(str(self.da), "\n".join(exp_lines_general) + "\n") def test_printing_unnamed_seqs(self): - """Printing unnamed sequences should work the same on Aln and DenseAln - """ + """Printing unnamed sequences should work the same on Aln and DenseAln""" exp_lines_gen = [">seq_0", "UCAGGG", ">seq_1", "YCU-RG", ">seq_2", "CAA-NR\n"] self.assertEqual(str(self.nn_aln), "\n".join(exp_lines_gen)) self.assertEqual(str(self.nn_da), "\n".join(exp_lines_gen)) @@ -134,10 +136,10 @@ sub_data = array([[0, 1, 3], [Y, 1, 3], [1, 2, R]]) # First check some data - self.assertEqual(self.da.array_seqs, full_data) - self.assertEqual(self.da.array_positions, transpose(full_data)) - self.assertEqual(sub_da.array_seqs, sub_data) - self.assertEqual(sub_da.array_positions, transpose(sub_data)) + assert_equal(self.da.array_seqs, full_data) + assert_equal(self.da.array_positions, transpose(full_data)) + assert_equal(sub_da.array_seqs, sub_data) + assert_equal(sub_da.array_positions, transpose(sub_data)) obs_sub_da_TP = self.da.take_positions([0, 1, 5]) obs_sub_da_SA = self.da.get_sub_alignment(pos=[0, 1, 5]) @@ -145,15 +147,15 @@ # When using the get_sub_alignment method the data is right self.assertEqual(obs_sub_da_SA, sub_da) self.assertNotEqual(obs_sub_da_SA, self.da) - self.assertEqual(obs_sub_da_SA.array_seqs, sub_data) - self.assertEqual(obs_sub_da_SA.array_positions, transpose(sub_data)) + assert_equal(obs_sub_da_SA.array_seqs, sub_data) + assert_equal(obs_sub_da_SA.array_positions, transpose(sub_data)) # For the take_positions method: Why does this work self.assertEqual(obs_sub_da_TP, sub_da) self.assertNotEqual(obs_sub_da_TP, self.da) # If the data doesn't match? - self.assertEqual(obs_sub_da_TP.array_seqs, sub_data) - self.assertEqual(obs_sub_da_TP.array_positions, transpose(sub_data)) + assert_equal(obs_sub_da_TP.array_seqs, sub_data) + assert_equal(obs_sub_da_TP.array_positions, transpose(sub_data)) # Shouldn't the __eq__ method check the data at least? def test_subset_positions_Alignment(self): @@ -256,7 +258,7 @@ # check is produces the right string from the beginning self.assertEqual(str(model1), "U-C-A-G-") - self.assertEqual(model1._data, [0, 4, 1, 4, 2, 4, 3, 4]) + assert_equal(model1._data, [0, 4, 1, 4, 2, 4, 3, 4]) # ArraySequence should maybe have the same degap method as normal seq self.assertEqual(str(model1.degap()), "UCAG") diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_core/test_sequence.py python-cogent-2020.12.21a+dfsg/tests/test_core/test_sequence.py --- python-cogent-2020.6.30a0+dfsg/tests/test_core/test_sequence.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_core/test_sequence.py 2020-12-20 23:35:03.000000000 +0000 @@ -7,16 +7,16 @@ import re from pickle import dumps +from unittest import TestCase, main from numpy import array -from numpy.testing import assert_allclose +from numpy.testing import assert_allclose, assert_equal from cogent3.core.annotation import Feature, SimpleVariable, Variable from cogent3.core.moltype import ( ASCII, BYTES, DNA, - PROTEIN, RNA, AlphabetError, get_moltype, @@ -31,23 +31,21 @@ ArrayRnaCodonSequence, ArrayRnaSequence, ArraySequence, - ArraySequenceBase, DnaSequence, ProteinSequence, RnaSequence, Sequence, ) from cogent3.util.misc import get_object_provenance -from cogent3.util.unit_test import TestCase, main __author__ = "Rob Knight, Gavin Huttley and Peter Maxwell" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight", "Gavin Huttley", "Peter Maxwell", "Matthew Wakefield"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -286,8 +284,8 @@ """Sequence shuffle should return new random sequence w/ same monomers""" r = self.RNA("UUUUCCCCAAAAGGGG") s = r.shuffle() - self.assertNotEqual(r, s) - self.assertEqualItems(r, s) + self.assertFalse(r == s) + self.assertCountEqual(r, s) def test_complement(self): """Sequence complement should correctly complement sequence""" @@ -404,7 +402,7 @@ assert j in s.moltype.degenerates[i] else: assert i == j - self.assertNotEqual(t, u) + self.assertFalse(t == u) self.assertEqual(len(s), len(t)) def test_degap(self): @@ -510,11 +508,11 @@ """Sequence MW should return correct molecular weight""" self.assertEqual(self.PROT("").mw(), 0) self.assertEqual(self.RNA("").mw(), 0) - self.assertFloatEqual(self.PROT("A").mw(), 89.09) - self.assertFloatEqual(self.RNA("A").mw(), 375.17) - self.assertFloatEqual(self.PROT("AAA").mw(), 231.27) - self.assertFloatEqual(self.RNA("AAA").mw(), 1001.59) - self.assertFloatEqual(self.RNA("AAACCCA").mw(), 2182.37) + assert_allclose(self.PROT("A").mw(), 89.09) + assert_allclose(self.RNA("A").mw(), 375.17) + assert_allclose(self.PROT("AAA").mw(), 231.27) + assert_allclose(self.RNA("AAA").mw(), 1001.59) + assert_allclose(self.RNA("AAACCCA").mw(), 2182.37) def test_can_match(self): """Sequence can_match should return True if all positions can match""" @@ -703,7 +701,7 @@ self.assertEqual(e.frac_same_gaps(e), 0.0) self.assertEqual(s4.frac_same_gaps(s5), 0.0) self.assertEqual(s4.frac_same_gaps(s6), 0.5) - self.assertFloatEqual(s6.frac_same_gaps(s8), 2 / 3.0) + assert_allclose(s6.frac_same_gaps(s8), 2 / 3.0) def test_frac_diffGaps(self): """Sequence frac_diff_gaps should return difference in gap positions""" @@ -730,7 +728,7 @@ self.assertEqual(e.frac_diff_gaps(e), 0.0) self.assertEqual(s4.frac_diff_gaps(s5), 1.0) self.assertEqual(s4.frac_diff_gaps(s6), 0.5) - self.assertFloatEqual(s6.frac_diff_gaps(s8), 1 / 3.0) + assert_allclose(s6.frac_diff_gaps(s8), 1 / 3.0) def test_frac_same_non_gaps(self): """Sequence frac_same_non_gaps should return similarities at non-gaps""" @@ -746,7 +744,7 @@ e = self.RNA("") def test(x, y, z): - return self.assertFloatEqual(x.frac_same_non_gaps(y), z) + return assert_allclose(x.frac_same_non_gaps(y), z) test(s1, s2, 0.25) test(s1, s3, 0) @@ -773,7 +771,7 @@ e = self.RNA("") def test(x, y, z): - return self.assertFloatEqual(x.frac_diff_non_gaps(y), z) + return assert_allclose(x.frac_diff_non_gaps(y), z) test(s1, s2, 0.75) test(s1, s3, 1) @@ -807,7 +805,7 @@ e = self.RNA("") def test(x, y, z): - return self.assertFloatEqual(x.frac_similar(y, transitions), z) + return assert_allclose(x.frac_similar(y, transitions), z) test(e, e, 0) test(s1, e, 0) @@ -949,6 +947,57 @@ with self.assertRaises(AttributeError): s.is_annotated() + def test_to_html(self): + """produce correct html formatted text""" + seq = DnaSequence("ACGGTGGGGGGGGG") + got = seq.to_html(wrap=50) + # ensure balanced tags are in the txt + for tag in ["", "", "", "
"]: + self.assertTrue(tag in got) + + seq_row = ( + 'None' + 'A' + 'C' + 'G' + 'G' + 'T' + 'G' + 'G' + 'G' + 'G' + 'G' + 'G' + 'G' + 'G' + 'G' + ) + + self.assertTrue(seq_row in got) + + def test_to_html_deprecation_warning(self): + """produce correct html formatted text""" + seq = DnaSequence("ACGGTGGGGGGGGG") + with self.assertWarns(DeprecationWarning): + seq.to_html(interleave_len=50) + + def test_repr_html(self): + """correctly uses set_repr and the environment variable settings""" + token = 'class="label"' + seq = self.SEQ("AAAAA") + + orig = [l for l in seq._repr_html_().splitlines() if token in l][0] + orig_num = len(re.findall(r"\bA\b", orig)) + self.assertEqual(orig_num, 5) + + # using environment variable + env_name = "COGENT3_ALIGNMENT_REPR_POLICY" + os.environ[env_name] = "num_pos=2" + got = [l for l in seq._repr_html_().splitlines() if token in l][0] + got_num = len(re.findall(r"\bA\b", got)) + self.assertEqual(got_num, 2) + os.environ.pop(env_name, None) + class SequenceSubclassTests(TestCase): """Only one general set of tests, since the subclasses are very thin.""" @@ -1004,7 +1053,7 @@ orig = "TCAGGA" r = self.SequenceClass(orig) - self.assertEqual(r._data, array([0, 1, 2, 3, 3, 2])) + assert_equal(r._data, array([0, 1, 2, 3, 3, 2])) self.assertEqual(str(r), orig) @@ -1020,7 +1069,7 @@ orig = "TCAGGA" r = self.SequenceClass(orig) - self.assertEqual(r._data, array([6, 62])) + assert_equal(r._data, array([6, 62])) self.assertEqual(str(r), orig) @@ -1040,8 +1089,8 @@ def test_gaps(self): """gapped sequence gaps() should return correct array""" sc = self.SequenceClass - self.assertEqual(sc("TC").gaps(), array([0, 0])) - self.assertEqual(sc("T-").gaps(), array([0, 1])) + assert_equal(sc("TC").gaps(), array([0, 0])) + assert_equal(sc("T-").gaps(), array([0, 1])) def test_degap(self): """gapped sequence degap() should return correct array""" @@ -1051,8 +1100,8 @@ def test_nongaps(self): """gapped sequence nongaps() should return correct array""" sc = self.SequenceClass - self.assertEqual(sc("TC").nongaps(), array([1, 1])) - self.assertEqual(sc("T-").nongaps(), array([1, 0])) + assert_equal(sc("TC").nongaps(), array([1, 1])) + assert_equal(sc("T-").nongaps(), array([1, 0])) def test_regap(self): """gapped sequence regap() should return correct sequence""" @@ -1099,7 +1148,7 @@ """ArrayDnaCodonSequence should behave as expected""" d = ArrayDnaCodonSequence("UUUCGU") self.assertEqual(str(d), "TTTCGT") - self.assertEqual(d._data, array([0, 28])) + assert_equal(d._data, array([0, 28])) self.assertEqual(str(d.to_rna()), "UUUCGU") self.assertEqual(str(d.to_dna()), "TTTCGT") @@ -1107,7 +1156,7 @@ """ArrayRnaCodonSequence should behave as expected""" r = ArrayRnaCodonSequence("UUUCGU") self.assertEqual(str(r), "UUUCGU") - self.assertEqual(r._data, array([0, 28])) + assert_equal(r._data, array([0, 28])) self.assertEqual(str(r.to_rna()), "UUUCGU") self.assertEqual(str(r.to_dna()), "TTTCGT") @@ -1153,25 +1202,25 @@ """Sequence gap_array should return array of gaps""" r = self.RNA("-?A-?NRY-") v = r.gap_array() - self.assertEqual(v, array([1, 1, 0, 1, 1, 0, 0, 0, 1])) + assert_equal(v, array([1, 1, 0, 1, 1, 0, 0, 0, 1])) r = self.RNA("AC") v = r.gap_array() - self.assertEqual(v, array([0, 0])) + assert_equal(v, array([0, 0])) r = self.RNA("-?") v = r.gap_array() - self.assertEqual(v, array([1, 1])) + assert_equal(v, array([1, 1])) def test_gap_indices(self): """Sequence gap_indices should return positions of gaps""" r = self.RNA("-?A-?NRY-") v = r.gap_indices() - self.assertEqual(v, array([0, 1, 3, 4, 8])) + assert_equal(v, array([0, 1, 3, 4, 8])) r = self.RNA("AC") v = r.gap_indices() - self.assertEqual(v, array([])) # note: always returns array + assert_equal(v, array([])) # note: always returns array r = self.RNA("-?") v = r.gap_indices() - self.assertEqual(v, array([0, 1])) + assert_equal(v, array([0, 1])) def test_count_ab(self): """abseq array seq should count characters""" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_core/test_tree.py python-cogent-2020.12.21a+dfsg/tests/test_core/test_tree.py --- python-cogent-2020.6.30a0+dfsg/tests/test_core/test_tree.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_core/test_tree.py 2020-12-20 23:35:03.000000000 +0000 @@ -2,19 +2,21 @@ """Tests of classes for dealing with trees and phylogeny. """ import json +import os import sys import unittest from copy import copy, deepcopy +from tempfile import TemporaryDirectory +from unittest import TestCase, main from numpy import arange, array -from cogent3 import make_tree +from cogent3 import load_tree, make_tree from cogent3.core.tree import PhyloNode, TreeError, TreeNode from cogent3.maths.stats.test import correlation from cogent3.parse.tree import DndParser -from cogent3.util.misc import get_object_provenance -from cogent3.util.unit_test import TestCase, main +from cogent3.util.misc import get_object_provenance, open_ __author__ = "Rob Knight" @@ -32,11 +34,17 @@ "Jose Carlos Clemente Litran", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" +from numpy.testing import assert_allclose, assert_equal + + +base_path = os.path.dirname(os.path.dirname(__file__)) +data_path = os.path.join(base_path, "data") + class TreeTests(TestCase): """Tests of top-level functions.""" @@ -224,6 +232,42 @@ expect = tr.to_rich_dict() self.assertEqual(got, expect) + def test_write_to_json(self): + tree = load_tree(filename=os.path.join(data_path, "brca1_5.tree")) + with TemporaryDirectory(dir=".") as dirname: + json_path = os.path.join(dirname, "brca1_5.json") + tree.write(json_path) + with open_(json_path) as fn: + got = json.loads(fn.read()) + self.assertEqual(got["type"], get_object_provenance(PhyloNode)) + self.assertEqual( + tree.get_newick(semicolon=False, with_node_names=True), + got["newick"], + ) + self.assertEqual( + set(tree.get_node_names()), got["edge_attributes"].keys() + ) + + def test_write_to_txt(self): + """write a tree to newick""" + tree = load_tree(filename=os.path.join(data_path, "brca1_5.tree")) + with TemporaryDirectory(dir=".") as dirname: + out_path = os.path.join(dirname, "brca1_5.txt") + tree.write(out_path) + with open_(out_path) as fn: + got = fn.read() + self.assertTrue(got.count("(") == got.count(")") == 3) + + def test_write_to_xml(self): + """write a tree to xml""" + tree = load_tree(filename=os.path.join(data_path, "brca1_5.tree")) + with TemporaryDirectory(dir=".") as dirname: + out_path = os.path.join(dirname, "brca1_5.xml") + tree.write(out_path) + with open_(out_path) as fn: + got = fn.read() + self.assertTrue(got.count("") == got.count("") > 0) + def test_multifurcating(self): """Coerces nodes to have <= n children""" t_str = "((a:1,b:2,c:3)d:4,(e:5,f:6,g:7)h:8,(i:9,j:10,k:11)l:12)m:14;" @@ -557,12 +601,12 @@ self.assertEqual(len(t2.tips()), 1024) self.assertEqual(len(t3.tips()), 1024) - self.assertNotSameObj(t, t2) + self.assertIsNot(t, t2) self.assertEqual(t.name, t2.name) self.assertNotEqual(t.name, t3.name) self.assertEqual(t.XYZ, t2.XYZ) - self.assertNotSameObj(t.XYZ, t2.XYZ) + self.assertIsNot(t.XYZ, t2.XYZ) self.assertEqual(t.get_newick(), t2.get_newick()) @@ -1104,7 +1148,7 @@ """make_tree_array maps nodes to the descendants in them""" tree = self.TreeRoot result, node_list = tree.make_tree_array() - self.assertEqual( + assert_equal( result, array([[1, 1, 1, 1], [1, 1, 1, 0], [1, 1, 1, 0], [0, 0, 1, 0]]) ) nodes = [node.name for node in node_list] @@ -1112,7 +1156,7 @@ # test if works with a dec_list supplied dec_list = ["d", "added", "e", "g", "h"] result2, node_list = tree.make_tree_array(dec_list) - self.assertEqual( + assert_equal( result2, array([[1, 0, 1, 1, 1], [1, 0, 1, 1, 0], [1, 0, 1, 1, 0], [0, 0, 0, 1, 0]]), ) @@ -1439,12 +1483,12 @@ names = ["H", "G", "M"] exp = (array([[0, 2.0, 6.7], [2.0, 0, 6.7], [6.7, 6.7, 0.0]]), nodes) obs = self.t.tip_to_tip_distances(endpoints=names) - self.assertEqual(obs[0], exp[0]) - self.assertEqual(obs[1], exp[1]) + assert_equal(obs[0], exp[0]) + assert_equal(obs[1], exp[1]) obs = self.t.tip_to_tip_distances(endpoints=nodes) - self.assertEqual(obs[0], exp[0]) - self.assertEqual(obs[1], exp[1]) + assert_equal(obs[0], exp[0]) + assert_equal(obs[1], exp[1]) def test_prune(self): """prune should reconstruct correct topology and Lengths of tree.""" @@ -1548,8 +1592,7 @@ self.assertEqual(tmid.distance(tmid.get_node_matching_name("d")), 2.75) def test_root_at_midpoint4(self): - """midpoint should be selected correctly when it is an internal node - """ + """midpoint should be selected correctly when it is an internal node""" tree = DndParser("(a:1,((c:1,d:3)n3:1,b:1)n2:1)rt;") tmid = tree.root_at_midpoint() self.assertEqual(tmid.get_distances(), tree.get_distances()) @@ -1569,8 +1612,7 @@ self.assertEqual(tmid.distance(tmid.get_node_matching_name("d")), 3) def test_root_at_midpoint5(self): - """midpoint should be selected correctly when on an even 2tip tree - """ + """midpoint should be selected correctly when on an even 2tip tree""" tree = DndParser("""(BLO_1:0.649351,BLO_2:0.649351):0.0;""") tmid = tree.root_at_midpoint() self.assertEqual(tmid.get_distances(), tree.get_distances()) @@ -1578,13 +1620,9 @@ nontipnames = [t.name for t in tree.nontips()] self.assertTrue(tmid.is_root()) - self.assertFloatEqual( - tmid.distance(tmid.get_node_matching_name("BLO_2")), 0.649351 - ) - self.assertFloatEqual( - tmid.distance(tmid.get_node_matching_name("BLO_1")), 0.649351 - ) - self.assertFloatEqual(tmid[0].distance(tmid[1]), 2.0 * 0.649351) + assert_allclose(tmid.distance(tmid.get_node_matching_name("BLO_2")), 0.649351) + assert_allclose(tmid.distance(tmid.get_node_matching_name("BLO_1")), 0.649351) + assert_allclose(tmid[0].distance(tmid[1]), 2.0 * 0.649351) def test_set_tip_distances(self): """set_tip_distances should correctly set tip distances.""" @@ -1637,9 +1675,7 @@ self.assertEqual( sorted(rooted.get_tip_names()), sorted(unrooted.get_tip_names()) ) - self.assertLessThan( - len(unrooted.get_node_names()), len(rooted.get_node_names()) - ) + self.assertLess(len(unrooted.get_node_names()), len(rooted.get_node_names())) def test_get_figure(self): """exercising get_figure""" @@ -1663,13 +1699,13 @@ """tip_to_tip should work for one-level multifurcating tree""" matrix, order = self.fun(self.root_one_level) self.assertEqual([i.name for i in order], list("abc")) - self.assertEqual(matrix, array([[0, 3, 4], [3, 0, 5], [4, 5, 0]])) + assert_equal(matrix, array([[0, 3, 4], [3, 0, 5], [4, 5, 0]])) def test_two_level(self): """tip_to_tip should work for two-level tree""" matrix, order = self.fun(self.root_two_level) self.assertEqual([i.name for i in order], list("abcd")) - self.assertFloatEqual( + assert_allclose( matrix, array([[0, 3, 4, 1.4], [3, 0, 5, 2.4], [4, 5, 0, 3.4], [1.4, 2.4, 3.4, 0]]), ) @@ -1687,15 +1723,15 @@ """tip_to_tip should work for small but complex tree""" dist, tips = self.fun(self.root_std) tips = [tip.name for tip in tips] - self.assertEqual(dist, tree_std_dist) - self.assertEqual(tips, tree_std_tips) + assert_equal(dist, tree_std_dist) + assert_equal(tips, tree_std_tips) def test_one_child(self): """tip_to_tip should work for tree with a single child""" dist, tips = self.fun(self.root_one_child) tips = [tip.name for tip in tips] - self.assertEqual(dist, tree_one_child_dist) - self.assertEqual(tips, tree_one_child_tips) + assert_equal(dist, tree_one_child_dist) + assert_equal(tips, tree_one_child_tips) # for use with testing iterative copy method @@ -1987,8 +2023,7 @@ self.assertEqual(str(subtree), str(new_tree)) def test_getsubtree_2(self): - """tree.get_sub_tree() has same pairwise tip dists as tree (len0 node) - """ + """tree.get_sub_tree() has same pairwise tip dists as tree (len0 node)""" t1 = DndParser( "((a:1,b:2):4,((c:3, j:17.2):0,(d:1,e:1):2):3)", PhyloNode ) # note c,j is len 0 node @@ -1999,7 +2034,7 @@ self.assertEqual((pair, dist), (pair, orig_dists[pair])) def test_getsubtree_3(self): - """tree.get_sub_tree() has same pairwise tip dists as tree + """tree.get_sub_tree() has same pairwise tip dists as tree (nonzero nodes) """ @@ -2026,8 +2061,7 @@ self.assertEqual((pair, dist), (pair, orig_dists[pair])) def test_getsubtree_4(self): - """tree.get_sub_tree() handles keep_root correctly - """ + """tree.get_sub_tree() handles keep_root correctly""" t1 = DndParser("((a:1,b:2):4,(((c:2)cparent:1, j:17):0,(d:1,e:4):2):3)") # /----4--- /--1-a # ---------| \--2-b @@ -2095,15 +2129,13 @@ # sameroot should have longer root to tip dists for tip in t1.tips(): - self.assertFloatEqual(t1.distance(tip), true_root_dists[tip.name]) + assert_allclose(t1.distance(tip), true_root_dists[tip.name]) for tip in subtree.tips(): - self.assertFloatEqual(subtree.distance(tip), true_sub_root_dists[tip.name]) + assert_allclose(subtree.distance(tip), true_sub_root_dists[tip.name]) for tip in sub_sameroot.tips(): - self.assertFloatEqual(sub_sameroot.distance(tip), true_root_dists[tip.name]) + assert_allclose(sub_sameroot.distance(tip), true_root_dists[tip.name]) for tip in sub_sameroot2.tips(): - self.assertFloatEqual( - sub_sameroot2.distance(tip), true_root_dists[tip.name] - ) + assert_allclose(sub_sameroot2.distance(tip), true_root_dists[tip.name]) def test_getsubtree_5(self): """get sub tree correctly uses tips only if specified""" @@ -2155,6 +2187,20 @@ node = sub1.get_node_matching_name(name) self.assertTrue(node.params["non-scalar"], {name: vals[node.name]}) + def test_load_tree_from_json(self): + """tests loading a Tree from json file""" + with TemporaryDirectory(dir=".") as dirname: + json_path = os.path.join(dirname, "tree.json") + self.tree.write(json_path) + got = load_tree(json_path) + self.assertIsInstance(got, PhyloNode) + self.assertEqual( + got.get_newick(), + # Since json file has rich dict as its content, parameter with_node_names is set True here. + self.tree.get_newick(with_node_names=True), + ) + self.assertEqual(got.get_node_names(), self.tree.get_node_names()) + def test_ascii(self): self.tree.ascii_art() # unlabeled internal node diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_data/__init__.py python-cogent-2020.12.21a+dfsg/tests/test_data/__init__.py --- python-cogent-2020.6.30a0+dfsg/tests/test_data/__init__.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_data/__init__.py 2020-12-20 23:35:03.000000000 +0000 @@ -5,7 +5,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_data/test_molecular_weight.py python-cogent-2020.12.21a+dfsg/tests/test_data/test_molecular_weight.py --- python-cogent-2020.6.30a0+dfsg/tests/test_data/test_molecular_weight.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_data/test_molecular_weight.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,28 +1,25 @@ #!/usr/bin/env python """Tests for molecular weight. """ -from cogent3.data.molecular_weight import ( - DnaMW, - ProteinMW, - RnaMW, - WeightCalculator, -) -from cogent3.util.unit_test import TestCase, main +from unittest import TestCase, main + +from cogent3.data.molecular_weight import ProteinMW, RnaMW __author__ = "Rob Knight" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" +from numpy.testing import assert_allclose + class WeightCalculatorTests(TestCase): - """Tests for WeightCalculator, which should calculate molecular weights. - """ + """Tests for WeightCalculator, which should calculate molecular weights.""" def test_call(self): """WeightCalculator should return correct molecular weight""" @@ -30,12 +27,12 @@ p = ProteinMW self.assertEqual(p(""), 0) self.assertEqual(r(""), 0) - self.assertFloatEqual(p("A"), 89.09) - self.assertFloatEqual(r("A"), 375.17) - self.assertFloatEqual(p("AAA"), 231.27) - self.assertFloatEqual(r("AAA"), 1001.59) - self.assertFloatEqual(r("AAACCCA"), 2182.37) - self.assertFloatEqual( + assert_allclose(p("A"), 89.09) + assert_allclose(r("A"), 375.17) + assert_allclose(p("AAA"), 231.27) + assert_allclose(r("AAA"), 1001.59) + assert_allclose(r("AAACCCA"), 2182.37) + assert_allclose( p( "MVQQAESLEAESNLPREALDTEEGEFMACSPVALDESDPDWCKTASGHIKRPMNAFMVWSKIERRKIMEQSPDMHNAEISKRLGKR\ WKMLKDSEKIPFIREAERLRLKHMADYPDYKYRPRKKPKMDPSAKPSASQSPEKSAAGGGGGSAGGGAGGAKTSKGSSKKCGKLKA\ diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_draw/__init__.py python-cogent-2020.12.21a+dfsg/tests/test_draw/__init__.py --- python-cogent-2020.6.30a0+dfsg/tests/test_draw/__init__.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_draw/__init__.py 2020-12-20 23:35:03.000000000 +0000 @@ -10,7 +10,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_draw/test_dendrogram.py python-cogent-2020.12.21a+dfsg/tests/test_draw/test_dendrogram.py --- python-cogent-2020.6.30a0+dfsg/tests/test_draw/test_dendrogram.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_draw/test_dendrogram.py 2020-12-20 23:35:03.000000000 +0000 @@ -16,7 +16,7 @@ __copyright__ = "Copyright 2007-2012, The Cogent Project" __credits__ = ["Gavin Huttley", "Rahul Ghangas"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" @@ -44,7 +44,7 @@ tree = make_tree(treestring="(a,b,(c,(d,e)e1)e2)") dnd = Dendrogram(tree=tree) edges = dnd.get_edge_names("d", "c", clade=True, stem=False) - self.assertEqual(set(edges), set(["c", "d", "e", "e1"])) + self.assertEqual(set(edges), {"c", "d", "e", "e1"}) def test_min_max_x_y(self): """correctly compute the min and max of x and y""" @@ -159,6 +159,25 @@ style, ) + def test_style_edges(self): + """test style_edges only accepts edges present in tree""" + tree = make_tree(treestring="(a,b,(c,(d,e)e1)e2)") + dnd = Dendrogram(tree=tree) + dnd.style_edges("a", line=dict(color="magenta")) + with self.assertRaises(ValueError): + dnd.style_edges("foo", line=dict(color="magenta")) + + def test_tip_font(self): + """test tip_font settable""" + tree = make_tree(treestring="(a,b,(c,(d,e)e1)e2)") + dnd = Dendrogram(tree=tree) + dnd.tip_font |= dict(size=18) + self.assertEqual(dnd.tip_font.size, 18) + dnd.tip_font.size = 10 + self.assertEqual(dnd.tip_font.size, 10) + dnd.tip_font.color = "red" + self.assertEqual(dnd.tip_font["color"], "red") + if __name__ == "__main__": main() diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_draw/test_dotplot.py python-cogent-2020.12.21a+dfsg/tests/test_draw/test_dotplot.py --- python-cogent-2020.6.30a0+dfsg/tests/test_draw/test_dotplot.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_draw/test_dotplot.py 2020-12-20 23:35:03.000000000 +0000 @@ -16,7 +16,7 @@ __copyright__ = "Copyright 2007-2012, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" @@ -118,6 +118,14 @@ self.assertEqual(len(dp.seq1), 4) self.assertEqual(len(dp.seq2), 3) + def test_dotplot_title(self): + """setting empty string title works""" + seqs = make_unaligned_seqs( + {"seq1": "ACGG", "seq2": "CGCA", "seq3": "CCG-"}, moltype="dna" + ) + dp = seqs.dotplot("seq1", "seq3", title="") + self.assertEqual(dp.figure.layout.title, "") + if __name__ == "__main__": main() diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_draw/test_draw_integration.py python-cogent-2020.12.21a+dfsg/tests/test_draw/test_draw_integration.py --- python-cogent-2020.6.30a0+dfsg/tests/test_draw/test_draw_integration.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_draw/test_draw_integration.py 2020-12-20 23:35:03.000000000 +0000 @@ -13,7 +13,7 @@ __copyright__ = "Copyright 2007-2012, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" @@ -375,7 +375,7 @@ def test_to_plotly(self): """exercise producing a plotly table""" - table = make_table(header=["a", "b"], data=[[0, 1]], index="a") + table = make_table(header=["a", "b"], data=[[0, 1]], index_name="a") drawable = table.to_plotly() self.assertIsInstance(drawable, Drawable) self._check_drawable_attrs(drawable.figure, "table") diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_draw/test_logo.py python-cogent-2020.12.21a+dfsg/tests/test_draw/test_logo.py --- python-cogent-2020.6.30a0+dfsg/tests/test_draw/test_logo.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_draw/test_logo.py 2020-12-20 23:35:03.000000000 +0000 @@ -9,7 +9,7 @@ __copyright__ = "Copyright 2007-2012, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_draw/test_shapes.py python-cogent-2020.12.21a+dfsg/tests/test_draw/test_shapes.py --- python-cogent-2020.6.30a0+dfsg/tests/test_draw/test_shapes.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_draw/test_shapes.py 2020-12-20 23:35:03.000000000 +0000 @@ -9,7 +9,7 @@ __copyright__ = "Copyright 2007-2012, The Cogent Project" __credits__ = ["Gavin Huttley", "Rahul Ghangas"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_evolve/__init__.py python-cogent-2020.12.21a+dfsg/tests/test_evolve/__init__.py --- python-cogent-2020.6.30a0+dfsg/tests/test_evolve/__init__.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_evolve/__init__.py 2020-12-20 23:35:03.000000000 +0000 @@ -17,7 +17,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight", "Peter Maxwell", "Greg Caporaso"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_evolve/test_best_likelihood.py python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_best_likelihood.py --- python-cogent-2020.6.30a0+dfsg/tests/test_evolve/test_best_likelihood.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_best_likelihood.py 2020-12-20 23:35:03.000000000 +0000 @@ -2,6 +2,8 @@ import math +from unittest import TestCase, main + from cogent3 import DNA, make_aligned_seqs from cogent3.evolve.best_likelihood import ( BestLogLikelihood, @@ -12,18 +14,20 @@ get_G93_lnL_from_array, get_ML_probs, ) -from cogent3.util.unit_test import TestCase, main __author__ = "Helen Lindsay" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley", "Helen Lindsay"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Helen Lindsay" __email__ = "helen.lindsay@anu.edu.au" __status__ = "Production" +from numpy.testing import assert_allclose + + IUPAC_DNA_ambiguities = "NRYWSKMBDHV" @@ -123,21 +127,21 @@ } sum = 0 for pattern, lnL, freq in obs: - self.assertFloatEqual(lnL, expect[pattern]) + assert_allclose(lnL, expect[pattern]) sum += lnL self.assertTrue(lnL >= 0) - self.assertFloatEqual(sum, 1) + assert_allclose(sum, 1) def test_get_G93_lnL_from_array(self): columns = aligned_columns_to_rows(self.aln, 1) obs = get_G93_lnL_from_array(columns) expect = math.log(math.pow(4 / 13.0, 4)) + 3 * math.log(math.pow(3 / 13.0, 3)) - self.assertFloatEqual(obs, expect) + assert_allclose(obs, expect) def test_BestLogLikelihood(self): obs = BestLogLikelihood(self.aln, DNA.alphabet) expect = math.log(math.pow(4 / 13.0, 4)) + 3 * math.log(math.pow(3 / 13.0, 3)) - self.assertFloatEqual(obs, expect) + assert_allclose(obs, expect) lnL, l = BestLogLikelihood(self.aln, DNA.alphabet, return_length=True) self.assertEqual(l, len(self.aln)) diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_evolve/test_bootstrap.py python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_bootstrap.py --- python-cogent-2020.6.30a0+dfsg/tests/test_evolve/test_bootstrap.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_bootstrap.py 2020-12-20 23:35:03.000000000 +0000 @@ -17,7 +17,7 @@ "Andrew Butterfield", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_evolve/test_coevolution.py python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_coevolution.py --- python-cogent-2020.6.30a0+dfsg/tests/test_evolve/test_coevolution.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_coevolution.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,14 +1,6 @@ -#!/usr/bin/env python -# Authors: Greg Caporaso (gregcaporaso@gmail.com), Brett Easton, Gavin Huttley -# test_coevolution.py - -""" Description -File created on 22 May 2007. -""" - from os import environ, remove -from os.path import exists from tempfile import NamedTemporaryFile, mktemp +from unittest import TestCase, main from numpy import ( arange, @@ -18,9 +10,7 @@ less_equal, log, nan, - ones, sqrt, - transpose, zeros, ) @@ -33,7 +23,7 @@ make_tree, ) from cogent3.core.alignment import ArrayAlignment -from cogent3.core.alphabet import Alphabet, CharAlphabet +from cogent3.core.alphabet import CharAlphabet from cogent3.evolve.coevolution import ( DEFAULT_NULL_VALUE, AAGapless, @@ -45,7 +35,6 @@ ancestral_state_position, ancestral_states_input_validation, build_coevolution_matrix_filepath, - build_rate_matrix, calc_pair_scale, coevolution_matrix_to_csv, coevolve_alignment, @@ -101,22 +90,20 @@ validate_position, validate_tree, ) -from cogent3.evolve.models import DSO78_freqs, DSO78_matrix -from cogent3.evolve.substitution_model import Empirical, Parametric -from cogent3.maths.stats.distribution import binomial_exact from cogent3.maths.stats.number import CategoryCounter -from cogent3.util.unit_test import TestCase, main __author__ = "Greg Caporaso" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Greg Caporaso"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Greg Caporaso" __email__ = "gregcaporaso@gmail.com" __status__ = "Beta" +from numpy.testing import assert_allclose, assert_equal + class CoevolutionTests(TestCase): """ Tests of coevolution.py """ @@ -283,31 +270,27 @@ def test_mi(self): """ mi calculations function as expected with valid data""" - self.assertFloatEqual(mi(1.0, 1.0, 1.0), 1.0) - self.assertFloatEqual(mi(1.0, 1.0, 2.0), 0.0) - self.assertFloatEqual(mi(1.0, 1.0, 1.5), 0.5) + assert_allclose(mi(1.0, 1.0, 1.0), 1.0) + assert_allclose(mi(1.0, 1.0, 2.0), 0.0) + assert_allclose(mi(1.0, 1.0, 1.5), 0.5) def test_normalized_mi(self): """ normalized mi calculations function as expected with valid data""" - self.assertFloatEqual(normalized_mi(1.0, 1.0, 1.0), 1.0) - self.assertFloatEqual(normalized_mi(1.0, 1.0, 2.0), 0.0) - self.assertFloatEqual(normalized_mi(1.0, 1.0, 1.5), 0.3333, 3) + assert_allclose(normalized_mi(1.0, 1.0, 1.0), 1.0) + assert_allclose(normalized_mi(1.0, 1.0, 2.0), 0.0) + assert_allclose(normalized_mi(1.0, 1.0, 1.5), 0.3333, 3) def test_mi_pair(self): """ mi_pair calculates mi from a pair of columns """ aln = ArrayAlignment(data={"1": "AB", "2": "AB"}, moltype=PROTEIN) - self.assertFloatEqual(mi_pair(aln, pos1=0, pos2=1), 0.0) + assert_allclose(mi_pair(aln, pos1=0, pos2=1), 0.0) aln = ArrayAlignment(data={"1": "AB", "2": "BA"}, moltype=PROTEIN) - self.assertFloatEqual(mi_pair(aln, pos1=0, pos2=1), 1.0) + assert_allclose(mi_pair(aln, pos1=0, pos2=1), 1.0) # order of positions doesn't matter (when it shouldn't) aln = ArrayAlignment(data={"1": "AB", "2": "AB"}, moltype=PROTEIN) - self.assertFloatEqual( - mi_pair(aln, pos1=0, pos2=1), mi_pair(aln, pos1=1, pos2=0) - ) + assert_allclose(mi_pair(aln, pos1=0, pos2=1), mi_pair(aln, pos1=1, pos2=0)) aln = ArrayAlignment(data={"1": "AB", "2": "BA"}, moltype=PROTEIN) - self.assertFloatEqual( - mi_pair(aln, pos1=0, pos2=1), mi_pair(aln, pos1=1, pos2=0) - ) + assert_allclose(mi_pair(aln, pos1=0, pos2=1), mi_pair(aln, pos1=1, pos2=0)) def test_wrapper_functions_handle_invalid_parameters(self): """coevolve_*: functions error on missing parameters""" @@ -324,41 +307,40 @@ t = make_tree(treestring="(1:0.5,2:0.5);") cutoff = 0.50 # mi_pair == coevolve_pair(mi_pair,...) - self.assertFloatEqual( + assert_allclose( coevolve_pair(mi_pair, aln, pos1=0, pos2=1), mi_pair(aln, pos1=0, pos2=1) ) - self.assertFloatEqual( + assert_allclose( coevolve_pair(nmi_pair, aln, pos1=0, pos2=1), nmi_pair(aln, pos1=0, pos2=1) ) - self.assertFloatEqual( + assert_allclose( coevolve_pair(ancestral_state_pair, aln, pos1=0, pos2=1, tree=t), ancestral_state_pair(aln, pos1=0, pos2=1, tree=t), ) - self.assertFloatEqual( + assert_allclose( coevolve_pair(sca_pair, aln, pos1=0, pos2=1, cutoff=cutoff), sca_pair(aln, pos1=0, pos2=1, cutoff=cutoff), ) def test_coevolve_position(self): - """coevolve_position: returns same as position methods called directly - """ + """coevolve_position: returns same as position methods called directly""" aln = ArrayAlignment(data={"1": "AC", "2": "AC"}, moltype=PROTEIN) t = make_tree(treestring="(1:0.5,2:0.5);") cutoff = 0.50 # mi_position == coevolve_position(mi_position,...) - self.assertFloatEqual( + assert_allclose( coevolve_position(mi_position, aln, position=0), mi_position(aln, position=0), ) - self.assertFloatEqual( + assert_allclose( coevolve_position(nmi_position, aln, position=0), nmi_position(aln, position=0), ) - self.assertFloatEqual( + assert_allclose( coevolve_position(ancestral_state_position, aln, position=0, tree=t), ancestral_state_position(aln, position=0, tree=t), ) - self.assertFloatEqual( + assert_allclose( coevolve_position(sca_position, aln, position=0, cutoff=cutoff), sca_position(aln, position=0, cutoff=cutoff), ) @@ -369,28 +351,21 @@ t = make_tree(treestring="(1:0.5,2:0.5);") cutoff = 0.50 # mi_alignment == coevolve_alignment(mi_alignment,...) - self.assertFloatEqual(coevolve_alignment(mi_alignment, aln), mi_alignment(aln)) - self.assertFloatEqual( - coevolve_alignment(mip_alignment, aln), mip_alignment(aln) - ) - self.assertFloatEqual( - coevolve_alignment(mia_alignment, aln), mia_alignment(aln) - ) - self.assertFloatEqual( - coevolve_alignment(nmi_alignment, aln), nmi_alignment(aln) - ) - self.assertFloatEqual( + assert_allclose(coevolve_alignment(mi_alignment, aln), mi_alignment(aln)) + assert_allclose(coevolve_alignment(mip_alignment, aln), mip_alignment(aln)) + assert_allclose(coevolve_alignment(mia_alignment, aln), mia_alignment(aln)) + assert_allclose(coevolve_alignment(nmi_alignment, aln), nmi_alignment(aln)) + assert_allclose( coevolve_alignment(ancestral_state_alignment, aln, tree=t), ancestral_state_alignment(aln, tree=t), ) - self.assertFloatEqual( + assert_allclose( coevolve_alignment(sca_alignment, aln, cutoff=cutoff), sca_alignment(aln, cutoff=cutoff), ) def test_coevolve_alignments_validation_idenifiers(self): - """coevolve_alignments_validation: seq/tree validation functions - """ + """coevolve_alignments_validation: seq/tree validation functions""" method = sca_alignment aln1 = ArrayAlignment(data={"1": "AC", "2": "AD"}, moltype=PROTEIN) aln2 = ArrayAlignment(data={"1": "EFW", "2": "EGY"}, moltype=PROTEIN) @@ -485,8 +460,7 @@ ) def test_coevolve_alignments_validation_max_num_seqs(self): - """coevolve_alignments_validation: min_num_seqs <= max_num_seqs - """ + """coevolve_alignments_validation: min_num_seqs <= max_num_seqs""" method = mi_alignment # min_num_seqs > max_num_seqs-> ValueError aln1 = ArrayAlignment(data={"1": "AC", "2": "AD"}, moltype=PROTEIN) @@ -499,8 +473,7 @@ ) def test_coevolve_alignments_validation_moltypes(self): - """coevolve_alignments_validation: valid for acceptable moltypes - """ + """coevolve_alignments_validation: valid for acceptable moltypes""" aln1 = ArrayAlignment(data={"1": "AC", "2": "AU"}, moltype=RNA) aln2 = ArrayAlignment(data={"1": "EFW", "2": "EGY"}, moltype=PROTEIN) # different moltype @@ -527,8 +500,7 @@ ) def test_coevolve_alignments(self): - """ coevolve_alignments: returns correct len(aln1) x len(aln2) matrix - """ + """coevolve_alignments: returns correct len(aln1) x len(aln2) matrix""" aln1 = ArrayAlignment(data={"1": "AC", "2": "AD"}, moltype=PROTEIN) aln2 = ArrayAlignment(data={"1": "EFW", "2": "EGY"}, moltype=PROTEIN) combined_aln = ArrayAlignment( @@ -539,25 +511,25 @@ # MI m = mi_alignment(combined_aln) expected = array([[m[2, 0], m[2, 1]], [m[3, 0], m[3, 1]], [m[4, 0], m[4, 1]]]) - self.assertFloatEqual(coevolve_alignments(mi_alignment, aln1, aln2), expected) + assert_allclose(coevolve_alignments(mi_alignment, aln1, aln2), expected) # MI (return_full=True) - self.assertFloatEqual( + assert_allclose( coevolve_alignments(mi_alignment, aln1, aln2, return_full=True), m ) # NMI m = nmi_alignment(combined_aln) expected = array([[m[2, 0], m[2, 1]], [m[3, 0], m[3, 1]], [m[4, 0], m[4, 1]]]) - self.assertFloatEqual(coevolve_alignments(nmi_alignment, aln1, aln2), expected) + assert_allclose(coevolve_alignments(nmi_alignment, aln1, aln2), expected) # AS m = ancestral_state_alignment(combined_aln, tree=t) expected = array([[m[2, 0], m[2, 1]], [m[3, 0], m[3, 1]], [m[4, 0], m[4, 1]]]) - self.assertFloatEqual( + assert_allclose( coevolve_alignments(ancestral_state_alignment, aln1, aln2, tree=t), expected ) # SCA m = sca_alignment(combined_aln, cutoff=cutoff) expected = array([[m[2, 0], m[2, 1]], [m[3, 0], m[3, 1]], [m[4, 0], m[4, 1]]]) - self.assertFloatEqual( + assert_allclose( coevolve_alignments(sca_alignment, aln1, aln2, cutoff=cutoff), expected ) @@ -625,55 +597,52 @@ # MI m = mi_alignment(combined_aln) expected = array([[m[2, 0], m[2, 1]], [m[3, 0], m[3, 1]], [m[4, 0], m[4, 1]]]) - self.assertFloatEqual(coevolve_alignments(mi_alignment, aln1, aln2), expected) + assert_allclose(coevolve_alignments(mi_alignment, aln1, aln2), expected) # MI (return_full=True) - self.assertFloatEqual( + assert_allclose( coevolve_alignments(mi_alignment, aln1, aln2, return_full=True), m ) # NMI m = nmi_alignment(combined_aln) expected = array([[m[2, 0], m[2, 1]], [m[3, 0], m[3, 1]], [m[4, 0], m[4, 1]]]) - self.assertFloatEqual(coevolve_alignments(nmi_alignment, aln1, aln2), expected) + assert_allclose(coevolve_alignments(nmi_alignment, aln1, aln2), expected) def test_mi_pair_cols_default_exclude_handling(self): """ mi_pair returns null_value on excluded by default """ aln = ArrayAlignment(data={"1": "AB", "2": "-B"}, moltype=PROTEIN) - self.assertFloatEqual(mi_pair(aln, pos1=0, pos2=1), DEFAULT_NULL_VALUE) + assert_allclose(mi_pair(aln, pos1=0, pos2=1), DEFAULT_NULL_VALUE) aln = ArrayAlignment(data={"1": "-B", "2": "-B"}, moltype=PROTEIN) - self.assertFloatEqual(mi_pair(aln, pos1=0, pos2=1), DEFAULT_NULL_VALUE) + assert_allclose(mi_pair(aln, pos1=0, pos2=1), DEFAULT_NULL_VALUE) aln = ArrayAlignment(data={"1": "AA", "2": "-B"}, moltype=PROTEIN) - self.assertFloatEqual(mi_pair(aln, pos1=0, pos2=1), DEFAULT_NULL_VALUE) + assert_allclose(mi_pair(aln, pos1=0, pos2=1), DEFAULT_NULL_VALUE) aln = ArrayAlignment(data={"1": "AA", "2": "PB"}, moltype=PROTEIN) - self.assertFloatEqual( - mi_pair(aln, pos1=0, pos2=1, excludes="P"), DEFAULT_NULL_VALUE - ) + assert_allclose(mi_pair(aln, pos1=0, pos2=1, excludes="P"), DEFAULT_NULL_VALUE) def test_mi_pair_cols_non_default_exclude_handling(self): """ mi_pair uses non-default exclude_handler when provided""" aln = ArrayAlignment(data={"1": "A-", "2": "A-"}, moltype=PROTEIN) - self.assertFloatEqual(mi_pair(aln, pos1=0, pos2=1), DEFAULT_NULL_VALUE) - self.assertFloatEqual( + assert_allclose(mi_pair(aln, pos1=0, pos2=1), DEFAULT_NULL_VALUE) + assert_allclose( mi_pair(aln, pos1=0, pos2=1, exclude_handler=ignore_excludes), 0.0 ) def test_mi_pair_cols_and_entropies(self): - """ mi_pair calculates mi from a pair of columns and precalc entropies - """ + """mi_pair calculates mi from a pair of columns and precalc entropies""" aln = ArrayAlignment(data={"1": "AB", "2": "AB"}, moltype=PROTEIN) - self.assertFloatEqual(mi_pair(aln, pos1=0, pos2=1, h1=0.0, h2=0.0), 0.0) + assert_allclose(mi_pair(aln, pos1=0, pos2=1, h1=0.0, h2=0.0), 0.0) aln = ArrayAlignment(data={"1": "AB", "2": "BA"}, moltype=PROTEIN) - self.assertFloatEqual(mi_pair(aln, pos1=0, pos2=1, h1=1.0, h2=1.0), 1.0) + assert_allclose(mi_pair(aln, pos1=0, pos2=1, h1=1.0, h2=1.0), 1.0) # incorrect positional entropies provided to ensure that the # precalculated values are used, and that entorpies are not # caluclated on-the-fly. aln = ArrayAlignment(data={"1": "AB", "2": "AB"}, moltype=PROTEIN) - self.assertFloatEqual(mi_pair(aln, pos1=0, pos2=1, h1=1.0, h2=1.0), 2.0) + assert_allclose(mi_pair(aln, pos1=0, pos2=1, h1=1.0, h2=1.0), 2.0) def test_mi_pair_alt_calculator(self): """ mi_pair uses alternate mi_calculator when provided """ aln = ArrayAlignment(data={"1": "AB", "2": "AB"}, moltype=PROTEIN) - self.assertFloatEqual(mi_pair(aln, pos1=0, pos2=1), 0.0) - self.assertFloatEqual( + assert_allclose(mi_pair(aln, pos1=0, pos2=1), 0.0) + assert_allclose( mi_pair(aln, pos1=0, pos2=1, mi_calculator=normalized_mi), DEFAULT_NULL_VALUE, ) @@ -681,18 +650,18 @@ def test_mi_position_valid_input(self): """ mi_position functions with varied valid input """ aln = ArrayAlignment(data={"1": "ACG", "2": "GAC"}, moltype=PROTEIN) - self.assertFloatEqual(mi_position(aln, 0), array([1.0, 1.0, 1.0])) + assert_allclose(mi_position(aln, 0), array([1.0, 1.0, 1.0])) aln = ArrayAlignment(data={"1": "ACG", "2": "ACG"}, moltype=PROTEIN) - self.assertFloatEqual(mi_position(aln, 0), array([0.0, 0.0, 0.0])) + assert_allclose(mi_position(aln, 0), array([0.0, 0.0, 0.0])) aln = ArrayAlignment(data={"1": "ACG", "2": "ACG"}, moltype=PROTEIN) - self.assertFloatEqual(mi_position(aln, 2), array([0.0, 0.0, 0.0])) + assert_allclose(mi_position(aln, 2), array([0.0, 0.0, 0.0])) def test_mi_position_from_alignment_nmi(self): """mi_position functions w/ alternate mi_calculator """ aln = ArrayAlignment(data={"1": "ACG", "2": "ACG"}, moltype=PROTEIN) - self.assertFloatEqual(mi_position(aln, 0), array([0.0, 0.0, 0.0])) + assert_allclose(mi_position(aln, 0), array([0.0, 0.0, 0.0])) aln = ArrayAlignment(data={"1": "ACG", "2": "ACG"}, moltype=PROTEIN) - self.assertFloatEqual( + assert_allclose( mi_position(aln, 0, mi_calculator=normalized_mi), array([DEFAULT_NULL_VALUE, DEFAULT_NULL_VALUE, DEFAULT_NULL_VALUE]), ) @@ -700,18 +669,16 @@ def test_mi_position_from_alignment_default_exclude_handling(self): """ mi_position handles excludes by setting to null_value""" aln = ArrayAlignment(data={"1": "ACG", "2": "G-C"}, moltype=PROTEIN) - self.assertFloatEqual( - mi_position(aln, 0), array([1.0, DEFAULT_NULL_VALUE, 1.0]) - ) + assert_allclose(mi_position(aln, 0), array([1.0, DEFAULT_NULL_VALUE, 1.0])) aln = ArrayAlignment(data={"1": "ACG", "2": "GPC"}, moltype=PROTEIN) - self.assertFloatEqual( + assert_allclose( mi_position(aln, 0, excludes="P"), array([1.0, DEFAULT_NULL_VALUE, 1.0]) ) def test_mi_position_from_alignment_non_default_exclude_handling(self): """ mi_position handles excludes w/ non-default method""" aln = ArrayAlignment(data={"1": "ACG", "2": "G-C"}, moltype=PROTEIN) - self.assertFloatEqual( + assert_allclose( mi_position(aln, 0, exclude_handler=ignore_excludes), array([1.0, 1.0, 1.0]) ) @@ -726,11 +693,11 @@ ) # gap in second column aln = ArrayAlignment(data={"1": "ACG", "2": "A-G"}, moltype=PROTEIN) - self.assertFloatEqual(mi_alignment(aln), expected) + assert_allclose(mi_alignment(aln), expected) # excludes = 'P' aln = ArrayAlignment(data={"1": "ACG", "2": "APG"}, moltype=PROTEIN) - self.assertFloatEqual(mi_alignment(aln, excludes="P"), expected) + assert_allclose(mi_alignment(aln, excludes="P"), expected) # gap in first column expected = array( @@ -741,20 +708,19 @@ ] ) aln = ArrayAlignment(data={"1": "-CG", "2": "ACG"}, moltype=PROTEIN) - self.assertFloatEqual(mi_alignment(aln), expected) + assert_allclose(mi_alignment(aln), expected) def test_mi_alignment_high(self): """ mi_alignment detected perfectly correlated columns """ expected = [[1.0, 1.0], [1.0, 1.0]] aln = ArrayAlignment(data={"1": "AG", "2": "GA"}, moltype=PROTEIN) - self.assertFloatEqual(mi_alignment(aln), expected) + assert_allclose(mi_alignment(aln), expected) def test_mi_alignment_low(self): - """ mi_alignment detected in perfectly uncorrelated columns - """ + """mi_alignment detected in perfectly uncorrelated columns""" expected = [[0.0, 0.0], [0.0, 1.0]] aln = ArrayAlignment(data={"1": "AG", "2": "AC"}, moltype=PROTEIN) - self.assertFloatEqual(mi_alignment(aln), expected) + assert_allclose(mi_alignment(aln), expected) def test_resampled_mi_alignment(self): """ resampled_mi_alignment returns without error """ @@ -773,15 +739,11 @@ data={"1": "ACDEF", "2": "ACFEF", "3": "ACGEF"}, moltype=PROTEIN ) # no kwargs passed - self.assertFloatEqual( - coevolve_alignment(mi_alignment, aln1), mi_alignment(aln1) - ) + assert_allclose(coevolve_alignment(mi_alignment, aln1), mi_alignment(aln1)) # different method passed - self.assertFloatEqual( - coevolve_alignment(nmi_alignment, aln1), nmi_alignment(aln1) - ) + assert_allclose(coevolve_alignment(nmi_alignment, aln1), nmi_alignment(aln1)) # kwargs passed - self.assertFloatEqual( + assert_allclose( coevolve_alignment(mi_alignment, aln1, mi_calculator=nmi), nmi_alignment(aln1), ) @@ -874,7 +836,7 @@ filepath = mktemp() pickle_coevolution_result(expected, filepath) actual = unpickle_coevolution_result(filepath) - self.assertFloatEqual(actual, expected) + assert_allclose(actual, expected) remove(filepath) def test_csv_coevolution_result_error(self): @@ -887,13 +849,12 @@ self.assertRaises(IOError, csv_to_coevolution_matrix, "invalid/file/path.pkl") def test_csv_and_uncsv(self): - """converting to/from csv matrix results in correct coevolution matrix - """ + """converting to/from csv matrix results in correct coevolution matrix""" expected = array([[1.4, 2.2], [DEFAULT_NULL_VALUE, 0.4]]) filepath = mktemp() coevolution_matrix_to_csv(expected, filepath) actual = csv_to_coevolution_matrix(filepath) - self.assertFloatEqual(actual, expected) + assert_allclose(actual, expected) remove(filepath) def test_parse_coevolution_matrix_filepath(self): @@ -1009,8 +970,7 @@ self.assertEqual(count_le_threshold(m, 9), (6, 6)) def test_count_ge_threshold_symmetric_ignore_diagonal(self): - """count_ge_threshold works with symmetric and/or ignoring diag = True - """ + """count_ge_threshold works with symmetric and/or ignoring diag = True""" # no good scores, varied null value m = array([[DEFAULT_NULL_VALUE] * 3] * 3) self.assertEqual(count_ge_threshold(m, 1.0, symmetric=True), (0, 0)) @@ -1054,8 +1014,7 @@ ) def test_count_le_threshold_symmetric_ignore_diagonal(self): - """count_le_threshold works with symmetric and/or ignoring diag = True - """ + """count_le_threshold works with symmetric and/or ignoring diag = True""" # varied null value m = array([[DEFAULT_NULL_VALUE] * 3] * 3) self.assertEqual(count_le_threshold(m, 1.0, symmetric=True), (0, 0)) @@ -1099,8 +1058,7 @@ ) def test_aln_position_pairs_cmp_threshold_intramolecular(self): - """aln_position_pairs_ge_threshold: intramolecular matrix - """ + """aln_position_pairs_ge_threshold: intramolecular matrix""" m = array( [ [0, DEFAULT_NULL_VALUE, DEFAULT_NULL_VALUE], @@ -1142,8 +1100,7 @@ ) def test_aln_position_pairs_ge_threshold_intermolecular(self): - """aln_position_pairs_ge_threshold: intermolecular matrix - """ + """aln_position_pairs_ge_threshold: intermolecular matrix""" m = array([[1.0, 10.0, 4.0, 3.0], [9.0, 18.0, 5.0, 6.0]]) # error if failed to specify intermolecular_data_only=True self.assertRaises( @@ -1203,8 +1160,7 @@ ) def test_is_parsimony_informative_strict(self): - """ is_parsimony_informative functions as expected with strict=True - """ + """is_parsimony_informative functions as expected with strict=True""" freqs = {"A": 25} self.assertFalse(is_parsimony_informative(freqs, strict=True)) freqs = {"A": 25, "-": 25} @@ -1226,8 +1182,7 @@ self.assertTrue(is_parsimony_informative(freqs, strict=True)) def test_is_parsimony_informative_non_strict(self): - """ is_parsimony_informative functions as expected with strict=False - """ + """is_parsimony_informative functions as expected with strict=False""" freqs = {"A": 25} self.assertFalse(is_parsimony_informative(freqs, strict=False)) freqs = {"A": 25, "-": 25} @@ -1249,8 +1204,7 @@ self.assertTrue(is_parsimony_informative(freqs, strict=False)) def test_is_parsimony_informative_non_default(self): - """ is_parsimony_informative functions w non default paramters - """ + """is_parsimony_informative functions w non default paramters""" # NEED TO UPDATE THESE TESTS BASED ON MY ERROR IN THE # DEFINITION OF PARSIMONY INFORMATIVE. # changed minimum_count @@ -1285,8 +1239,7 @@ self.assertFalse(is_parsimony_informative(freqs, ignored="A", strict=False)) def test_filter_non_parsimony_informative_intramolecular(self): - """ non-parsimony informative sites in intramolecular matrix -> null - """ + """non-parsimony informative sites in intramolecular matrix -> null""" aln = make_aligned_seqs( data={"1": "ACDE", "2": "ACDE", "3": "ACDE", "4": "ACDE"}, moltype=PROTEIN, @@ -1302,7 +1255,7 @@ ) expected = array([[DEFAULT_NULL_VALUE] * 4] * 4) filter_non_parsimony_informative(aln, m) - self.assertFloatEqual(m, expected) + assert_allclose(m, expected) aln = make_aligned_seqs( data={"1": "ACDE", "2": "FCDE", "3": "ACDE", "4": "FCDE"}, @@ -1320,11 +1273,10 @@ expected = array([[DEFAULT_NULL_VALUE] * 4] * 4) expected[0, 0] = 42.0 filter_non_parsimony_informative(aln, m) - self.assertFloatEqual(m, expected) + assert_allclose(m, expected) def test_filter_non_parsimony_informative_intermolecular(self): - """ non-parsimony informative sites in intermolecular matrix -> null - """ + """non-parsimony informative sites in intermolecular matrix -> null""" # all non-parsimony informative aln = make_aligned_seqs( data={"1": "ACDEWQ", "2": "ACDEWQ", "3": "ACDEWQ", "4": "ACDEWQ"}, @@ -1334,7 +1286,7 @@ m = array([[1.0, 10.0, 4.0, 3.0], [9.0, 18.0, 5.0, 6.0]]) expected = array([[DEFAULT_NULL_VALUE] * 4] * 2) filter_non_parsimony_informative(aln, m, intermolecular_data_only=True) - self.assertFloatEqual(m, expected) + assert_allclose(m, expected) # one non-parsimony informative pair of positions aln = make_aligned_seqs( data={"1": "FCDEWD", "2": "ACDEWQ", "3": "ACDEWD", "4": "FCDEWQ"}, @@ -1345,7 +1297,7 @@ expected = array([[DEFAULT_NULL_VALUE] * 4] * 2) expected[1, 0] = 9.0 filter_non_parsimony_informative(aln, m, intermolecular_data_only=True) - self.assertFloatEqual(m, expected) + assert_allclose(m, expected) # all parsimony informative aln = make_aligned_seqs( data={"1": "FFFFFF", "2": "FFFFFF", "3": "GGGGGG", "4": "GGGGGG"}, @@ -1355,11 +1307,10 @@ m = array([[1.0, 10.0, 4.0, 3.0], [9.0, 18.0, 5.0, 6.0]]) expected = array([[1.0, 10.0, 4.0, 3.0], [9.0, 18.0, 5.0, 6.0]]) filter_non_parsimony_informative(aln, m, intermolecular_data_only=True) - self.assertFloatEqual(m, expected) + assert_allclose(m, expected) def test_filter_exclude_positions_intramolecular(self): - """filter_exclude_positions: functions for intramolecular data - """ + """filter_exclude_positions: functions for intramolecular data""" # filter zero positions (no excludes) aln = make_aligned_seqs( data={"1": "WCDE", "2": "ACDE", "3": "ACDE", "4": "ACDE"}, @@ -1383,7 +1334,7 @@ ] ) filter_exclude_positions(aln, m) - self.assertFloatEqual(m, expected) + assert_allclose(m, expected) # filter zero positions (max_exclude_percentage = percent exclude) aln = make_aligned_seqs( data={"1": "-CDE", "2": "A-DE", "3": "AC-E", "4": "ACD-"}, @@ -1407,7 +1358,7 @@ ] ) filter_exclude_positions(aln, m, max_exclude_percent=0.25) - self.assertFloatEqual(m, expected) + assert_allclose(m, expected) # filter zero positions (max_exclude_percentage too high) aln = make_aligned_seqs( data={"1": "-CDE", "2": "A-DE", "3": "AC-E", "4": "ACD-"}, @@ -1431,7 +1382,7 @@ ] ) filter_exclude_positions(aln, m, max_exclude_percent=0.5) - self.assertFloatEqual(m, expected) + assert_allclose(m, expected) # filter one position (defualt max_exclude_percentage) aln = make_aligned_seqs( data={"1": "-CDE", "2": "ACDE", "3": "ACDE", "4": "ACDE"}, @@ -1455,7 +1406,7 @@ ] ) filter_exclude_positions(aln, m) - self.assertFloatEqual(m, expected) + assert_allclose(m, expected) # filter one position (non-defualt max_exclude_percentage) aln = make_aligned_seqs( data={"1": "-CDE", "2": "ACDE", "3": "ACDE", "4": "-CDE"}, @@ -1479,7 +1430,7 @@ ] ) filter_exclude_positions(aln, m, max_exclude_percent=0.49) - self.assertFloatEqual(m, expected) + assert_allclose(m, expected) # filter all positions (defualt max_exclude_percentage) aln = make_aligned_seqs( data={"1": "----", "2": "ACDE", "3": "ACDE", "4": "ACDE"}, @@ -1496,7 +1447,7 @@ ) expected = array([[DEFAULT_NULL_VALUE] * 4] * 4) filter_exclude_positions(aln, m) - self.assertFloatEqual(m, expected) + assert_allclose(m, expected) # filter all positions (non-defualt max_exclude_percentage) aln = make_aligned_seqs( data={"1": "----", "2": "A-DE", "3": "AC--", "4": "-CDE"}, @@ -1513,7 +1464,7 @@ ) expected = array([[DEFAULT_NULL_VALUE] * 4] * 4) filter_exclude_positions(aln, m, max_exclude_percent=0.49) - self.assertFloatEqual(m, expected) + assert_allclose(m, expected) # filter one position (defualt max_exclude_percentage, # non-defualt excludes) @@ -1539,7 +1490,7 @@ ] ) filter_exclude_positions(aln, m, excludes="W") - self.assertFloatEqual(m, expected) + assert_allclose(m, expected) # filter one position (defualt max_exclude_percentage, # non-defualt null_value) @@ -1565,11 +1516,10 @@ ] ) filter_exclude_positions(aln, m, null_value=999.0) - self.assertFloatEqual(m, expected) + assert_allclose(m, expected) def test_filter_exclude_positions_intermolecular(self): - """filter_exclude_positions: functions for intermolecular data - """ + """filter_exclude_positions: functions for intermolecular data""" # these tests correspond to alignments of length 4 and 2 positions # respectively, hence a coevolution_matrix with shape = (2,4) @@ -1582,7 +1532,7 @@ m = array([[1.0, 10.0, 4.0, 3.0], [9.0, 18.0, 5.0, 6.0]]) expected = array([[1.0, 10.0, 4.0, 3.0], [9.0, 18.0, 5.0, 6.0]]) filter_exclude_positions(merged_aln, m, intermolecular_data_only=True) - self.assertFloatEqual(m, expected) + assert_allclose(m, expected) # filter one position (aln1) merged_aln = make_aligned_seqs( @@ -1595,7 +1545,7 @@ [[1.0, 10.0, DEFAULT_NULL_VALUE, 3.0], [9.0, 18.0, DEFAULT_NULL_VALUE, 6.0]] ) filter_exclude_positions(merged_aln, m, intermolecular_data_only=True) - self.assertFloatEqual(m, expected) + assert_allclose(m, expected) # filter one position (aln2) merged_aln = make_aligned_seqs( data={"1": "WCEEDE", "2": "ACDEDE", "3": "ACDEDE", "4": "ACDED-"}, @@ -1605,7 +1555,7 @@ m = array([[1.0, 10.0, 4.0, 3.0], [9.0, 18.0, 5.0, 6.0]]) expected = array([[1.0, 10.0, 4.0, 3.0], [DEFAULT_NULL_VALUE] * 4]) filter_exclude_positions(merged_aln, m, intermolecular_data_only=True) - self.assertFloatEqual(m, expected) + assert_allclose(m, expected) # filter two positions (aln1 & aln2) merged_aln = make_aligned_seqs( @@ -1618,7 +1568,7 @@ [[DEFAULT_NULL_VALUE, 10.0, 4.0, 3.0], [DEFAULT_NULL_VALUE] * 4] ) filter_exclude_positions(merged_aln, m, intermolecular_data_only=True) - self.assertFloatEqual(m, expected) + assert_allclose(m, expected) # filter two positions (aln1 & aln2, alt excludes) merged_aln = make_aligned_seqs( @@ -1633,7 +1583,7 @@ filter_exclude_positions( merged_aln, m, intermolecular_data_only=True, excludes="W" ) - self.assertFloatEqual(m, expected) + assert_allclose(m, expected) # filter two positions (aln1 & aln2, alt null_value) merged_aln = make_aligned_seqs( @@ -1646,7 +1596,7 @@ filter_exclude_positions( merged_aln, m, intermolecular_data_only=True, null_value=999.0 ) - self.assertFloatEqual(m, expected) + assert_allclose(m, expected) def test_filter_threshold_based_multiple_interdependency_intermolecular(self): "multiple interdependency filter functions with intermolecular data " @@ -1672,7 +1622,7 @@ actual = filter_threshold_based_multiple_interdependency( None, m, 0.95, 0, greater_equal, True ) - self.assertFloatEqual(actual, expected) + assert_allclose(actual, expected) # realisitic test case m = array( [ @@ -1693,7 +1643,7 @@ actual = filter_threshold_based_multiple_interdependency( None, m, 0.95, 1, greater_equal, True ) - self.assertFloatEqual(actual, expected) + assert_allclose(actual, expected) # upper boundary, nothing filtered null = DEFAULT_NULL_VALUE m = array( @@ -1708,7 +1658,7 @@ actual = filter_threshold_based_multiple_interdependency( None, m, 0.95, 5, greater_equal, True ) - self.assertFloatEqual(actual, expected) + assert_allclose(actual, expected) # cmp_function = less_equal, realistic test case m = array( @@ -1730,7 +1680,7 @@ actual = filter_threshold_based_multiple_interdependency( None, m, 0.35, 1, less_equal, True ) - self.assertFloatEqual(actual, expected) + assert_allclose(actual, expected) def test_filter_threshold_based_multiple_interdependency_intramolecular(self): "multiple interdependency filter functions with intramolecular data " @@ -1756,7 +1706,7 @@ actual = filter_threshold_based_multiple_interdependency( None, m, 0.95, 0, greater_equal ) - self.assertFloatEqual(actual, expected) + assert_allclose(actual, expected) # realistic test case m = array( @@ -1778,7 +1728,7 @@ actual = filter_threshold_based_multiple_interdependency( None, m, 0.95, 1, greater_equal ) - self.assertFloatEqual(actual, expected) + assert_allclose(actual, expected) # upper boundary, nothing filtered m = array( @@ -1793,7 +1743,7 @@ actual = filter_threshold_based_multiple_interdependency( None, m, 0.95, 5, greater_equal ) - self.assertFloatEqual(actual, expected) + assert_allclose(actual, expected) ## cmp_function = le # realistic test case @@ -1816,29 +1766,27 @@ actual = filter_threshold_based_multiple_interdependency( None, m, 0.33, 1, less_equal ) - self.assertFloatEqual(actual, expected) + assert_allclose(actual, expected) def test_probs_from_dict(self): - """probs_from_dict: dict of probs -> list of probs in alphabet's order - """ + """probs_from_dict: dict of probs -> list of probs in alphabet's order""" d = {"A": 0.25, "D": 0.52, "C": 0.23} a = list("ACD") - self.assertFloatEqual(probs_from_dict(d, a), [0.25, 0.23, 0.52]) + assert_allclose(probs_from_dict(d, a), [0.25, 0.23, 0.52]) a = list("ADC") - self.assertFloatEqual(probs_from_dict(d, a), [0.25, 0.52, 0.23]) + assert_allclose(probs_from_dict(d, a), [0.25, 0.52, 0.23]) a = list("DCA") - self.assertFloatEqual(probs_from_dict(d, a), [0.52, 0.23, 0.25]) + assert_allclose(probs_from_dict(d, a), [0.52, 0.23, 0.25]) a = CharAlphabet("DCA") - self.assertFloatEqual(probs_from_dict(d, a), [0.52, 0.23, 0.25]) + assert_allclose(probs_from_dict(d, a), [0.52, 0.23, 0.25]) # protein natural probs l = probs_from_dict(protein_dict, AAGapless) for i in range(20): - self.assertFloatEqual(l[i], protein_dict[AAGapless[i]], 0.001) + assert_allclose(l[i], protein_dict[AAGapless[i]], 0.001) def test_freqs_from_aln(self): - """freqs_from_aln: freqs of alphabet chars in aln is calc'ed correctly - """ + """freqs_from_aln: freqs of alphabet chars in aln is calc'ed correctly""" # non-default scaled_aln_size aln = ArrayAlignment( data=list(zip(list(range(4)), ["ACGT", "AGCT", "ACCC", "TAGG"])), @@ -1846,19 +1794,19 @@ ) alphabet = "ACGT" expected = [4, 5, 4, 3] - self.assertEqual(freqs_from_aln(aln, alphabet, 16), expected) + assert_equal(freqs_from_aln(aln, alphabet, 16), expected) # change the order of the alphabet alphabet = "TGCA" expected = [3, 4, 5, 4] - self.assertEqual(freqs_from_aln(aln, alphabet, 16), expected) + assert_equal(freqs_from_aln(aln, alphabet, 16), expected) # default scaled_aln_size, sums of freqs == 100 alphabet = "ACGT" expected = [25.0, 31.25, 25, 18.75] - self.assertEqual(freqs_from_aln(aln, alphabet), expected) + assert_allclose(freqs_from_aln(aln, alphabet), expected) # alphabet char which doesn't show up gets zero freq alphabet = "ACGTW" expected = [25.0, 31.25, 25, 18.75, 0] - self.assertEqual(freqs_from_aln(aln, alphabet), expected) + assert_allclose(freqs_from_aln(aln, alphabet), expected) # alignment char which doesn't show up is silently ignored aln = ArrayAlignment( data=list(zip(list(range(4)), ["ACGT", "AGCT", "ACCC", "TWGG"])), @@ -1866,30 +1814,29 @@ ) alphabet = "ACGT" expected = [18.75, 31.25, 25, 18.75] - self.assertEqual(freqs_from_aln(aln, alphabet), expected) + assert_allclose(freqs_from_aln(aln, alphabet), expected) def test_freqs_to_array(self): """freqs_to_array: should convert CategoryCounter object to array""" # should work with empty object f = CategoryCounter() f2a = freqs_to_array - self.assertFloatEqual(f2a(f, AAGapless), zeros(20)) + assert_allclose(f2a(f, AAGapless), zeros(20)) # should work with full object, omitting unwanted keys f = CategoryCounter({"A": 20, "Q": 30, "X": 20}) expected = zeros(20) expected[AAGapless.index("A")] = 20 expected[AAGapless.index("Q")] = 30 - self.assertFloatEqual(f2a(f, AAGapless), expected) + assert_allclose(f2a(f, AAGapless), expected) # should work for normal dict and any alphabet d = {"A": 3, "D": 1, "C": 5, "E": 2} alpha = "ABCD" exp = array([3, 0, 5, 1]) - self.assertFloatEqual(f2a(d, alpha), exp) + assert_allclose(f2a(d, alpha), exp) def test_get_allowed_perturbations(self): - """get_allowed_perturbations: should work for different cutoff values - """ + """get_allowed_perturbations: should work for different cutoff values""" counts = [50, 40, 10, 0] a = list("ACGT") self.assertEqual(get_allowed_perturbations(counts, 1.0, a), []) @@ -1903,8 +1850,7 @@ self.assertEqual(get_allowed_perturbations(counts, 0.0, a), a) def test_get_subalignments(self): - """get_subalignments: works with different alignment sizes and cutoffs - """ + """get_subalignments: works with different alignment sizes and cutoffs""" aln = ArrayAlignment( data={1: "AAAA", 2: "AAAC", 3: "AACG", 4: "ACCT", 5: "ACG-"}, moltype=PROTEIN, @@ -1939,27 +1885,26 @@ expected_1 = array([0.0, 25.0, 25.0, 25.0, 25.0]) expected_2 = array([0.0, 0.0, 50.0, 50.0, 0.0]) expected_3 = array([0.0, 0.0, 25.0, 50.0, 25.0]) - self.assertFloatEqual(get_positional_frequencies(aln, 0, "ACDEF"), expected_0) - self.assertFloatEqual(get_positional_frequencies(aln, 1, "ACDEF"), expected_1) - self.assertFloatEqual(get_positional_frequencies(aln, 2, "ACDEF"), expected_2) - self.assertFloatEqual(get_positional_frequencies(aln, 3, "ACDEF"), expected_3) + assert_allclose(get_positional_frequencies(aln, 0, "ACDEF"), expected_0) + assert_allclose(get_positional_frequencies(aln, 1, "ACDEF"), expected_1) + assert_allclose(get_positional_frequencies(aln, 2, "ACDEF"), expected_2) + assert_allclose(get_positional_frequencies(aln, 3, "ACDEF"), expected_3) # extra characters (W) are silently ignored -- is this the desired # behavior? aln = ArrayAlignment( data={1: "WCDE", 2: "ADDE", 3: "AEED", 4: "AFEF"}, moltype=PROTEIN ) expected_0 = array([75.0, 0.0, 0.0, 0.0, 0.0]) - self.assertFloatEqual(get_positional_frequencies(aln, 0, "ACDEF"), expected_0) + assert_allclose(get_positional_frequencies(aln, 0, "ACDEF"), expected_0) # 20 residue amino acid alphabet aln = ArrayAlignment( data={1: "ACDE", 2: "ADDE", 3: "AEED", 4: "AFEF"}, moltype=PROTEIN ) expected = array([100.0] + [0.0] * 19) - self.assertFloatEqual(get_positional_frequencies(aln, 0, AAGapless), expected) + assert_allclose(get_positional_frequencies(aln, 0, AAGapless), expected) def test_get_positional_frequencies(self): - """get_positional_frequencies: works with non-default scaled_aln_size - """ + """get_positional_frequencies: works with non-default scaled_aln_size""" aln = ArrayAlignment( data={1: "ACDE", 2: "ADDE", 3: "AEED", 4: "AFEF"}, moltype=PROTEIN ) @@ -1967,39 +1912,26 @@ expected_1 = array([0.0, 1.0, 1.0, 1.0, 1.0]) expected_2 = array([0.0, 0.0, 2.0, 2.0, 0.0]) expected_3 = array([0.0, 0.0, 1.0, 2.0, 1.0]) - self.assertFloatEqual( - get_positional_frequencies(aln, 0, "ACDEF", 4), expected_0 - ) - self.assertFloatEqual( - get_positional_frequencies(aln, 1, "ACDEF", 4), expected_1 - ) - self.assertFloatEqual( - get_positional_frequencies(aln, 2, "ACDEF", 4), expected_2 - ) - self.assertFloatEqual( - get_positional_frequencies(aln, 3, "ACDEF", 4), expected_3 - ) + assert_allclose(get_positional_frequencies(aln, 0, "ACDEF", 4), expected_0) + assert_allclose(get_positional_frequencies(aln, 1, "ACDEF", 4), expected_1) + assert_allclose(get_positional_frequencies(aln, 2, "ACDEF", 4), expected_2) + assert_allclose(get_positional_frequencies(aln, 3, "ACDEF", 4), expected_3) # extra characters (W) are silently ignored -- is this the desired # behavior? aln = ArrayAlignment( data={1: "WCDE", 2: "ADDE", 3: "AEED", 4: "AFEF"}, moltype=PROTEIN ) expected_0 = array([3.0, 0.0, 0.0, 0.0, 0.0]) - self.assertFloatEqual( - get_positional_frequencies(aln, 0, "ACDEF", 4), expected_0 - ) + assert_allclose(get_positional_frequencies(aln, 0, "ACDEF", 4), expected_0) # 20 residue amino acid alphabet aln = ArrayAlignment( data={1: "ACDE", 2: "ADDE", 3: "AEED", 4: "AFEF"}, moltype=PROTEIN ) expected = array([4.0] + [0.0] * 19) - self.assertFloatEqual( - get_positional_frequencies(aln, 0, AAGapless, 4), expected - ) + assert_allclose(get_positional_frequencies(aln, 0, AAGapless, 4), expected) def test_validate_alphabet_invalid(self): - """validate_alphabet: raises error on incompatible alpabet and freqs - """ + """validate_alphabet: raises error on incompatible alpabet and freqs""" # len(alpha) > len(freqs) self.assertRaises(ValueError, validate_alphabet, "ABC", {"A": 0.5, "B": 0.5}) self.assertRaises(ValueError, validate_alphabet, "ABCD", {"A": 0.5, "B": 0.5}) @@ -2015,8 +1947,7 @@ ) def test_validate_alphabet_valid(self): - """validate_alphabet: does nothing on compatible alpabet and freqs - """ + """validate_alphabet: does nothing on compatible alpabet and freqs""" validate_alphabet("AB", {"A": 0.5, "B": 0.5}) validate_alphabet(CharAlphabet("AB"), {"A": 0.5, "B": 0.5}) validate_alphabet([1, 42, 8], {1: 0.5, 42: 0.25, 8: 0.25}) @@ -2080,31 +2011,28 @@ freqs = [1.0, 2.0, 0.0] probs = [0.33, 0.33, 0.33] expected = array([0.444411, 0.218889, 0.300763]) - self.assertFloatEqual(get_positional_probabilities(freqs, probs, 3), expected) + assert_allclose(get_positional_probabilities(freqs, probs, 3), expected) def test_get_dg(self): """get_dg: returns delta_g vector""" p = [0.1, 0.2, 0.3] a = [0.5, 0.6, 0.7] expected = [log(0.1 / 0.5), log(0.2 / 0.6), log(0.3 / 0.7)] - self.assertFloatEqual(get_dg(p, a), expected) + assert_allclose(get_dg(p, a), expected) def test_get_dgg(self): """get_dgg: returns delta_delta_g value given two delta_g vectors """ v1 = array([0.05, 0.5, 0.1]) v2 = array([0.03, 0.05, 0.1]) expected = sqrt(sum((v1 - v2) * (v1 - v2))) / 100 * e - self.assertFloatEqual(get_dgg(v1, v2), expected) + assert_allclose(get_dgg(v1, v2), expected) def test_get_positional_probabilities_w_def_num_seqs(self): - """get_positional_probabilities: works w/ num_seqs scaled to 100 (def) - """ + """get_positional_probabilities: works w/ num_seqs scaled to 100 (def)""" freqs = [15.0, 33.0, 52.0] probs = [0.33, 0.33, 0.33] expected = array([2.4990e-5, 0.0846, 3.8350e-5]) - self.assertFloatEqual( - get_positional_probabilities(freqs, probs), expected, 0.001 - ) + assert_allclose(get_positional_probabilities(freqs, probs), expected, 0.001) def test_get_positional_probs_handles_rounding_error_in_freqs(self): """get_positional_probabilities: works w/ rounding error in freqs""" @@ -2115,7 +2043,7 @@ freqs = [100.0000000001, 0.0, 0.0] probs = [0.33, 0.33, 0.33] expected = array([7.102218e-49, 4.05024e-18, 4.05024e-18]) - self.assertFloatEqual(get_positional_probabilities(freqs, probs), expected) + assert_allclose(get_positional_probabilities(freqs, probs), expected, rtol=1e-5) # value that is truely over raises an error freqs = [101.0000000001, 0.0, 0.0] probs = [0.33, 0.33, 0.33] @@ -2124,7 +2052,9 @@ freqs = [50.0000000001, 0.0, 0.0] probs = [0.33, 0.33, 0.33] expected = array([8.42747e-25, 2.01252e-9, 2.01252e-9]) - self.assertFloatEqual(get_positional_probabilities(freqs, probs, 50), expected) + assert_allclose( + get_positional_probabilities(freqs, probs, 50), expected, rtol=1e-5 + ) # value that is truely over raises an error freqs = [51.0000000001, 0.0, 0.0] probs = [0.33, 0.33, 0.33] @@ -2203,8 +2133,7 @@ ) def test_sca_pair_return_all(self): - """sca_pair: handles return_all by returning lists of proper length - """ + """sca_pair: handles return_all by returning lists of proper length""" # two allowed_perturbations a = "ACGT" aln = ArrayAlignment( @@ -2250,7 +2179,7 @@ ) # expected = [('A',-1),('C',-1)] expected = DEFAULT_NULL_VALUE - self.assertFloatEqual(actual, expected) + assert_allclose(actual, expected) # pos1 == pos2 actual = sca_pair( @@ -2262,9 +2191,11 @@ alphabet=a, background_freqs=self.dna_base_freqs, ) - # expected = [('A',-1),('C',-1)] - expected = [("A", 2.40381185618)] - self.assertFloatEqual(actual, expected) + + actual = list(zip(*actual)) + expected = list(zip(*[("A", 2.40381185618)])) + assert_equal(actual[0], expected[0]) + assert_allclose(actual[1], expected[1]) def test_sca_pair_error(self): """sca_pair:returns w/ error when appropriate """ @@ -2348,7 +2279,7 @@ self.dna_aln, 1, 0.50, alphabet="ACGT", background_freqs=self.dna_base_freqs ) # sanity check -- coupling w/ self - self.assertFloatEqual(r[1], 3.087, 0.01) + assert_allclose(r[1], 3.087, 0.01) r = sca_position( self.dna_aln_gapped, 1, @@ -2356,7 +2287,7 @@ alphabet="ACGT", background_freqs=self.dna_base_freqs, ) - self.assertFloatEqual(r[1], 3.387, 0.01) + assert_allclose(r[1], 3.387, 0.01) # same tests, but called via coevolve_position r = coevolve_position( @@ -2368,7 +2299,7 @@ background_freqs=self.dna_base_freqs, ) # sanity check -- coupling w/ self - self.assertFloatEqual(r[1], 3.087, 0.01) + assert_allclose(r[1], 3.087, 0.01) r = coevolve_position( sca_position, self.dna_aln_gapped, @@ -2378,7 +2309,7 @@ background_freqs=self.dna_base_freqs, ) # sanity check -- coupling w/ self - self.assertFloatEqual(r[1], 3.387, 0.01) + assert_allclose(r[1], 3.387, 0.01) def test_sca_position_error(self): """sca_position: returns w/ error when appropriate """ @@ -2444,7 +2375,7 @@ actual = sca_position( self.dna_aln, 1, 0.50, alphabet="ACGT", background_freqs=self.dna_base_freqs ) - self.assertFloatEqual(actual, expected) + assert_allclose(actual, expected) # change some of the defaults to make sure they make it through bg_freqs = {"A": 0.50, "C": 0.50} expected = [] @@ -2470,7 +2401,7 @@ scaled_aln_size=20, background_freqs=bg_freqs, ) - self.assertFloatEqual(actual, expected) + assert_allclose(actual, expected) def test_sca_alignment_no_error(self): """sca_alignment: returns w/o error """ @@ -2478,7 +2409,7 @@ self.dna_aln, 0.50, alphabet="ACGT", background_freqs=self.dna_base_freqs ) # sanity check -- coupling w/ self - self.assertFloatEqual(r[0][0], 2.32222608171) + assert_allclose(r[0][0], 2.32222608171) # same test, but called via coevolve_alignment r = coevolve_alignment( @@ -2489,7 +2420,7 @@ background_freqs=self.dna_base_freqs, ) # sanity check -- coupling w/ self - self.assertFloatEqual(r[0][0], 2.32222608171) + assert_allclose(r[0][0], 2.32222608171) def test_sca_alignment_error(self): """sca_alignment: returns w/ error when appropriate """ @@ -2531,7 +2462,7 @@ actual = sca_alignment( self.dna_aln, 0.50, alphabet="ACGT", background_freqs=self.dna_base_freqs ) - self.assertFloatEqual(actual, expected) + assert_allclose(actual, expected) # change some of the defaults to make sure they make it through bg_freqs = {"A": 0.50, "C": 0.50} expected = [] @@ -2555,34 +2486,32 @@ scaled_aln_size=20, background_freqs=bg_freqs, ) - self.assertFloatEqual(actual, expected) + assert_allclose(actual, expected) def test_sca_pair_gpcr(self): - """sca_pair: reproduces several GPCR data from Suel et al., 2003 - """ - self.assertFloatEqual(sca_pair(self.gpcr_aln, 295, 18, 0.32), 0.12, 0.1) - self.assertFloatEqual(sca_pair(self.gpcr_aln, 295, 124, 0.32), 1.86, 0.1) - self.assertFloatEqual(sca_pair(self.gpcr_aln, 295, 304, 0.32), 0.3, 0.1) + """sca_pair: reproduces several GPCR data from Suel et al., 2003""" + assert_allclose(sca_pair(self.gpcr_aln, 295, 18, 0.32), 0.12, 0.1) + assert_allclose(sca_pair(self.gpcr_aln, 295, 124, 0.32), 1.86, 0.1) + assert_allclose(sca_pair(self.gpcr_aln, 295, 304, 0.32), 0.3, 0.1) # covariation w/ self - self.assertFloatEqual(sca_pair(self.gpcr_aln, 295, 295, 0.32), 7.70358628) + assert_allclose(sca_pair(self.gpcr_aln, 295, 295, 0.32), 7.70358628) def test_sca_position_gpcr(self): - """sca_position: reproduces several GPCR data from Suel et al., 2003 - """ + """sca_position: reproduces several GPCR data from Suel et al., 2003""" if not self.run_slow_tests: return vector = sca_position(self.gpcr_aln, 295, 0.32) - self.assertFloatEqual(vector[18], 0.12, 0.1) - self.assertFloatEqual(vector[124], 1.86, 0.1) - self.assertFloatEqual(vector[304], 0.3, 0.1) + assert_allclose(vector[18], 0.12, 0.1) + assert_allclose(vector[124], 1.86, 0.1) + assert_allclose(vector[304], 0.3, 0.1) # covariation w/ self == null_value - self.assertFloatEqual(vector[295], nan) + assert_allclose(vector[295], nan) def test_ltm_to_symmetric(self): """ltm_to_symmetric: making ltm matrices symmetric functions""" m = arange(9).reshape((3, 3)) expected = [[0, 3, 6], [3, 4, 7], [6, 7, 8]] - self.assertEqual(ltm_to_symmetric(m), expected) + assert_equal(ltm_to_symmetric(m), expected) # non-square matrices not supported self.assertRaises(AssertionError, ltm_to_symmetric, arange(10).reshape(5, 2)) self.assertRaises(AssertionError, ltm_to_symmetric, arange(10).reshape(2, 5)) @@ -2753,8 +2682,7 @@ ) def test_validate_ancestral_seqs_invalid(self): - """validate_ancestral_seqs: ValueError on incompatible anc. seqs & tree - """ + """validate_ancestral_seqs: ValueError on incompatible anc. seqs & tree""" # edge missing aln = ArrayAlignment(data={"A": "AC", "B": "CA", "C": "CC"}, moltype=PROTEIN) self.assertRaises( @@ -2813,8 +2741,7 @@ ) def test_validate_ancestral_seqs_valid(self): - """validate_ancestral_seqs: does nothing on compatible anc. seqs & tree - """ + """validate_ancestral_seqs: does nothing on compatible anc. seqs & tree""" aln = ArrayAlignment(data={"A": "AC", "B": "CA", "C": "CC"}, moltype=PROTEIN) # valid data -> no error validate_ancestral_seqs( @@ -2942,27 +2869,24 @@ self.assertEqual(actual.num_seqs, 1) def test_ancestral_state_alignment_handles_ancestral_state_calc(self): - """ancestral_state_alignment: functions when calc'ing ancestral states - """ + """ancestral_state_alignment: functions when calc'ing ancestral states""" t = make_tree(treestring="((A:0.5,B:0.5):0.5,C:0.5);") aln = ArrayAlignment(data={"A": "AA", "B": "AA", "C": "AC"}, moltype=PROTEIN) - self.assertEqual(ancestral_state_alignment(aln, t), [[0, 0], [0, 2]]) + assert_equal(ancestral_state_alignment(aln, t), [[0, 0], [0, 2]]) # non-bifurcating tree t = make_tree(treestring="(A:0.5,B:0.5,C:0.5);") aln = ArrayAlignment(data={"A": "AA", "B": "AA", "C": "AC"}, moltype=PROTEIN) - self.assertEqual(ancestral_state_alignment(aln, t), [[0, 0], [0, 2]]) + assert_equal(ancestral_state_alignment(aln, t), [[0, 0], [0, 2]]) def test_ancestral_state_position_handles_ancestral_state_calc(self): - """ancestral_state_position: functions when calc'ing ancestral states - """ + """ancestral_state_position: functions when calc'ing ancestral states""" t = make_tree(treestring="((A:0.5,B:0.5):0.5,C:0.5);") aln = ArrayAlignment(data={"A": "AA", "B": "AA", "C": "AC"}, moltype=PROTEIN) - self.assertEqual(ancestral_state_position(aln, t, 0), [0, 0]) - self.assertEqual(ancestral_state_position(aln, t, 1), [0, 2]) + assert_equal(ancestral_state_position(aln, t, 0), [0, 0]) + assert_equal(ancestral_state_position(aln, t, 1), [0, 2]) def test_ancestral_state_pair_handles_ancestral_state_calc(self): - """ancestral_state_position: functions when calc'ing ancestral states - """ + """ancestral_state_position: functions when calc'ing ancestral states""" t = make_tree(treestring="((A:0.5,B:0.5):0.5,C:0.5);") aln = ArrayAlignment(data={"A": "AA", "B": "AA", "C": "AC"}, moltype=PROTEIN) self.assertEqual(ancestral_state_pair(aln, t, 0, 0), 0) @@ -3066,8 +2990,7 @@ ) def test_ancestral_states_methods_handle_bad_position_numbers(self): - """coevolve_* w/ ancestral_states raise ValueError on bad position - """ + """coevolve_* w/ ancestral_states raise ValueError on bad position""" self.assertRaises( ValueError, @@ -3100,73 +3023,69 @@ ) def test_ancestral_state_alignment_non_bifurcating_tree(self): - """ancestral_state_alignment: handles non-bifurcating tree correctly - """ - self.assertEqual( + """ancestral_state_alignment: handles non-bifurcating tree correctly""" + assert_equal( ancestral_state_alignment(self.aln2, self.t2, self.ancestral_states2_3), [[9, 9], [9, 9]], ) def test_ancestral_state_alignment_bifurcating_tree(self): """ancestral_state_alignment: handles bifurcating tree correctly """ - self.assertFloatEqual( + assert_allclose( ancestral_state_alignment(self.aln1_5, self.t1, self.ancestral_states1), [[5, 5, 5], [5, 11.6, 11.6], [5, 11.6, 11.6]], ) def test_ancestral_state_alignment_ancestor_difference(self): - """ancestral_state_alignment: different ancestor -> different result - """ + """ancestral_state_alignment: different ancestor -> different result""" # ancestral_states2_1 - self.assertEqual( + assert_equal( ancestral_state_alignment(self.aln2, self.t2, self.ancestral_states2_1), [[5, 2], [2, 2]], ) # ancestral_states2_2 - self.assertEqual( + assert_equal( ancestral_state_alignment(self.aln2, self.t2, self.ancestral_states2_2), [[2, 2], [2, 5]], ) # ancestral_states2_3 - self.assertEqual( + assert_equal( ancestral_state_alignment(self.aln2, self.t2, self.ancestral_states2_3), [[9, 9], [9, 9]], ) def test_ancestral_state_position_ancestor_difference(self): - """ancestral_state_position: difference_ancestor -> different result - """ + """ancestral_state_position: difference_ancestor -> different result""" # ancestral_states2_1 - self.assertEqual( + assert_equal( ancestral_state_position(self.aln2, self.t2, 0, self.ancestral_states2_1), [5, 2], ) - self.assertEqual( + assert_equal( ancestral_state_position(self.aln2, self.t2, 1, self.ancestral_states2_1), [2, 2], ) # ancestral_states2_2 - self.assertEqual( + assert_equal( ancestral_state_position(self.aln2, self.t2, 0, self.ancestral_states2_2), [2, 2], ) - self.assertEqual( + assert_equal( ancestral_state_position(self.aln2, self.t2, 1, self.ancestral_states2_2), [2, 5], ) # ancestral_states2_3 - self.assertEqual( + assert_equal( ancestral_state_position(self.aln2, self.t2, 0, self.ancestral_states2_3), [9, 9], ) - self.assertEqual( + assert_equal( ancestral_state_position(self.aln2, self.t2, 1, self.ancestral_states2_3), [9, 9], ) def test_ancestral_state_pair_ancestor_difference(self): - """ancestral_state_pair: difference_ancestor -> different result - """ + """ancestral_state_pair: difference_ancestor -> different result""" # ancestral_states2_1 self.assertEqual( ancestral_state_pair(self.aln2, self.t2, 0, 0, self.ancestral_states2_1), 5 @@ -3208,153 +3127,148 @@ ) def test_ancestral_state_alignment_tree_difference(self): - """ancestral_state_alignment: different result on different tree - """ + """ancestral_state_alignment: different result on different tree""" # tree: t3_1 - self.assertEqual( + assert_equal( ancestral_state_alignment(self.aln3, self.t3_1, self.ancestral_states3), [[7, 5], [5, 5]], ) # tree: t3_2 - self.assertEqual( + assert_equal( ancestral_state_alignment(self.aln3, self.t3_2, self.ancestral_states3), [[2, 2], [2, 5]], ) def test_ancestral_state_position_tree_difference(self): - """ancestral_state_position: different result on different tree - """ + """ancestral_state_position: different result on different tree""" # tree: t3_1 - self.assertEqual( + assert_equal( ancestral_state_position(self.aln3, self.t3_1, 0, self.ancestral_states3), [7, 5], ) - self.assertEqual( + assert_equal( ancestral_state_position(self.aln3, self.t3_1, 1, self.ancestral_states3), [5, 5], ) # tree: t3_2 - self.assertEqual( + assert_equal( ancestral_state_position(self.aln3, self.t3_2, 0, self.ancestral_states3), [2, 2], ) - self.assertEqual( + assert_equal( ancestral_state_position(self.aln3, self.t3_2, 1, self.ancestral_states3), [2, 5], ) def test_ancestral_state_pair_tree_difference(self): - """ancestral_state_pair: different result on different tree - """ + """ancestral_state_pair: different result on different tree""" # tree: t3_1 - self.assertFloatEqual( + assert_allclose( ancestral_state_pair(self.aln3, self.t3_1, 0, 1, self.ancestral_states3), 5 ) - self.assertFloatEqual( + assert_allclose( ancestral_state_pair(self.aln3, self.t3_1, 1, 0, self.ancestral_states3), 5 ) - self.assertFloatEqual( + assert_allclose( ancestral_state_pair(self.aln3, self.t3_1, 0, 0, self.ancestral_states3), 7 ) - self.assertFloatEqual( + assert_allclose( ancestral_state_pair(self.aln3, self.t3_1, 1, 1, self.ancestral_states3), 5 ) # tree: t3_2 - self.assertFloatEqual( + assert_allclose( ancestral_state_pair(self.aln3, self.t3_2, 0, 1, self.ancestral_states3), 2 ) - self.assertFloatEqual( + assert_allclose( ancestral_state_pair(self.aln3, self.t3_2, 1, 0, self.ancestral_states3), 2 ) - self.assertFloatEqual( + assert_allclose( ancestral_state_pair(self.aln3, self.t3_2, 0, 0, self.ancestral_states3), 2 ) - self.assertFloatEqual( + assert_allclose( ancestral_state_pair(self.aln3, self.t3_2, 1, 1, self.ancestral_states3), 5 ) def test_ancestral_state_alignment_aln_difference(self): - """ancestral_state_alignment: difference aln -> different result - """ + """ancestral_state_alignment: difference aln -> different result""" expected = [[0, 0, 0], [0, 2, 0], [0, 0, 7.8]] actual = ancestral_state_alignment(self.aln1_1, self.t1, self.ancestral_states1) - self.assertFloatEqual(actual, expected) + assert_allclose(actual, expected) expected = [[5, 5, 5], [5, 11.6, 11.6], [5, 11.6, 11.6]] actual = ancestral_state_alignment(self.aln1_5, self.t1, self.ancestral_states1) - self.assertFloatEqual(actual, expected) + assert_allclose(actual, expected) def test_ancestral_state_position_aln_difference(self): - """ancestral_state_position: difference aln -> different result - """ + """ancestral_state_position: difference aln -> different result""" expected = [0, 0, 0] actual = ancestral_state_position( self.aln1_1, self.t1, 0, self.ancestral_states1 ) - self.assertFloatEqual(actual, expected) + assert_allclose(actual, expected) expected = [0, 2, 0] actual = ancestral_state_position( self.aln1_1, self.t1, 1, self.ancestral_states1 ) - self.assertFloatEqual(actual, expected) + assert_allclose(actual, expected) expected = [0, 0, 7.8] actual = ancestral_state_position( self.aln1_1, self.t1, 2, self.ancestral_states1 ) - self.assertFloatEqual(actual, expected) + assert_allclose(actual, expected) expected = [5, 5, 5] actual = ancestral_state_position( self.aln1_5, self.t1, 0, self.ancestral_states1 ) - self.assertFloatEqual(actual, expected) + assert_allclose(actual, expected) expected = [5, 11.6, 11.6] actual = ancestral_state_position( self.aln1_5, self.t1, 1, self.ancestral_states1 ) - self.assertFloatEqual(actual, expected) + assert_allclose(actual, expected) expected = [5, 11.6, 11.6] actual = ancestral_state_position( self.aln1_5, self.t1, 2, self.ancestral_states1 ) - self.assertFloatEqual(actual, expected) + assert_allclose(actual, expected) def test_ancestral_state_pair_aln_difference(self): """acestral_state_pair: different aln -> different result """ - self.assertFloatEqual( + assert_allclose( ancestral_state_pair(self.aln1_1, self.t1, 0, 0, self.ancestral_states1), 0 ) - self.assertFloatEqual( + assert_allclose( ancestral_state_pair(self.aln1_1, self.t1, 1, 1, self.ancestral_states1), 2 ) - self.assertFloatEqual( + assert_allclose( ancestral_state_pair(self.aln1_1, self.t1, 2, 2, self.ancestral_states1), 7.8, ) - self.assertFloatEqual( + assert_allclose( ancestral_state_pair(self.aln1_5, self.t1, 0, 1, self.ancestral_states1), 5 ) - self.assertFloatEqual( + assert_allclose( ancestral_state_pair(self.aln1_5, self.t1, 0, 2, self.ancestral_states1), 5 ) - self.assertFloatEqual( + assert_allclose( ancestral_state_pair(self.aln1_5, self.t1, 1, 2, self.ancestral_states1), 11.6, ) def test_ancestral_state_pair_symmetry(self): """ancestral_state_pair: value[i,j] == value[j,i] """ - self.assertFloatEqual( + assert_allclose( ancestral_state_pair(self.aln1_5, self.t1, 0, 1, self.ancestral_states1), ancestral_state_pair(self.aln1_5, self.t1, 1, 0, self.ancestral_states1), ) - self.assertFloatEqual( + assert_allclose( ancestral_state_pair(self.aln1_5, self.t1, 0, 2, self.ancestral_states1), ancestral_state_pair(self.aln1_5, self.t1, 2, 0, self.ancestral_states1), ) - self.assertFloatEqual( + assert_allclose( ancestral_state_pair(self.aln1_5, self.t1, 1, 2, self.ancestral_states1), ancestral_state_pair(self.aln1_5, self.t1, 2, 1, self.ancestral_states1), ) @@ -3419,7 +3333,7 @@ for w in w1, w2: for k, d in w: weights += list(d.values()) - self.assertFloatEqual(sum(weights), 0.5) + assert_allclose(sum(weights), 0.5) w2.sort() self.assertEqual(w2, e) @@ -3435,12 +3349,12 @@ weights_i = make_weights(col_i, col_i.sum) weights_j = make_weights(col_j, col_j.sum) entropy = mi(col_i.entropy, col_j.entropy, pair_freqs.entropy) - self.assertFloatEqual(entropy, _calc_mi()) + assert_allclose(entropy, _calc_mi()) scales = calc_pair_scale(data, col_i, col_j, weights_i, weights_j) scaled_mi = 1 - sum( [w * pair_freqs[pr] for pr, e, w in scales if entropy <= e] ) - self.assertFloatEqual(scaled_mi, expected_smi) + assert_allclose(scaled_mi, expected_smi) data = [ "BN", @@ -3493,7 +3407,7 @@ alignment""" arr = resampled_mi_alignment(self.aln) # expected value from hand calculation - self.assertFloatEqual(arr.tolist(), [[1.0, 0.78333333], [0.78333333, 1.0]]) + assert_allclose(arr.tolist(), [[1.0, 0.78333333], [0.78333333, 1.0]]) ALN_FILE = """Seq_1 ACDEFG diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_evolve/test_distance.py python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_distance.py --- python-cogent-2020.6.30a0+dfsg/tests/test_evolve/test_distance.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_distance.py 2020-12-20 23:35:03.000000000 +0000 @@ -52,7 +52,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley", "Yicheng Zhu", "Ben Kaehler"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_evolve/test_likelihood_function.py python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_likelihood_function.py --- python-cogent-2020.6.30a0+dfsg/tests/test_evolve/test_likelihood_function.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_likelihood_function.py 2020-12-20 23:35:03.000000000 +0000 @@ -14,6 +14,8 @@ import os import warnings +from unittest import TestCase, main + import numpy from numpy import dot, ones @@ -40,7 +42,6 @@ ) from cogent3.maths.matrix_exponentiation import PadeExponentiator as expm from cogent3.maths.stats.information_criteria import aic, bic -from cogent3.util.unit_test import TestCase, main warnings.filterwarnings("ignore", "Motif probs overspecified") @@ -60,7 +61,7 @@ "Ananias Iliadis", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -268,7 +269,7 @@ likelihood_function = self._makeLikelihoodFunction(submod) likelihood_function.set_param_rule("omega", value=0.5, is_constant=True) evolve_lnL = likelihood_function.get_log_likelihood() - self.assertFloatEqual(evolve_lnL, -103.05742415448259) + assert_allclose(evolve_lnL, -103.05742415448259) def test_nucleotide(self): """test a nucleotide model.""" @@ -279,7 +280,7 @@ likelihood_function = self._makeLikelihoodFunction(submod) self.assertEqual(likelihood_function.get_num_free_params(), 0) evolve_lnL = likelihood_function.get_log_likelihood() - self.assertFloatEqual(evolve_lnL, -148.6455087258624) + assert_allclose(evolve_lnL, -148.6455087258624) def test_solved_nucleotide(self): """test a solved nucleotide model.""" @@ -292,7 +293,7 @@ self.assertTrue(lf.lnL > -152) def test_discrete_nucleotide(self): - """test that partially discrete nucleotide model can be constructed, + """test that partially discrete nucleotide model can be constructed, differs from continuous, and has the expected number of free params""" submod = TimeReversibleNucleotide( equal_motif_probs=True, motif_probs=None, predicates={"kappa": "transition"} @@ -313,7 +314,7 @@ ) likelihood_function = self._makeLikelihoodFunction(submod) evolve_lnL = likelihood_function.get_log_likelihood() - self.assertFloatEqual(evolve_lnL, -118.35045332768402) + assert_allclose(evolve_lnL, -118.35045332768402) def test_protein(self): """test a protein model.""" @@ -322,7 +323,7 @@ likelihood_function = self._makeLikelihoodFunction(submod, translate=True) evolve_lnL = likelihood_function.get_log_likelihood() - self.assertFloatEqual(evolve_lnL, -91.35162044257062) + assert_allclose(evolve_lnL, -91.35162044257062) class LikelihoodFunctionTests(TestCase): @@ -376,10 +377,10 @@ nfp = lf.get_num_free_params() lnL = lf.get_log_likelihood() l = len(self.data) - self.assertFloatEqual(lf.get_aic(), aic(lnL, nfp)) - self.assertFloatEqual(lf.get_aic(second_order=True), aic(lnL, nfp, l)) + assert_allclose(lf.get_aic(), aic(lnL, nfp)) + assert_allclose(lf.get_aic(second_order=True), aic(lnL, nfp, l)) - self.assertFloatEqual(lf.get_bic(), bic(lnL, nfp, l)) + assert_allclose(lf.get_bic(), bic(lnL, nfp, l)) def test_result_str(self): # actualy more a test of self._setLengthsAndBetas() @@ -396,15 +397,15 @@ 4.0000 ------ ============================= - edge parent length +edge parent length ----------------------------- - Human edge.0 0.3000 +Human edge.0 0.3000 HowlerMon edge.0 0.4000 - edge.0 edge.1 0.7000 - Mouse edge.1 0.5000 - edge.1 root 0.6000 -NineBande root 0.2000 - DogFaced root 0.1000 +edge.0 edge.1 0.7000 +Mouse edge.1 0.5000 +edge.1 root 0.6000 +NineBande root 0.2000 +DogFaced root 0.1000 ----------------------------- ==================================== A C G T @@ -420,15 +421,15 @@ log-likelihood = -382.5399 number of free parameters = 14 =============================== - edge parent length beta +edge parent length beta ------------------------------- - Human edge.0 1.00 1.00 +Human edge.0 1.00 1.00 HowlerMon edge.0 1.00 1.00 - edge.0 edge.1 1.00 1.00 - Mouse edge.1 1.00 1.00 - edge.1 root 1.00 1.00 -NineBande root 1.00 1.00 - DogFaced root 1.00 1.00 +edge.0 edge.1 1.00 1.00 +Mouse edge.1 1.00 1.00 +edge.1 root 1.00 1.00 +NineBande root 1.00 1.00 +DogFaced root 1.00 1.00 ------------------------------- ====================== A C G T @@ -575,45 +576,15 @@ 6.0000 ------ ============================= - edge parent length ------------------------------ - Human edge.0 4.0000 -HowlerMon edge.0 4.0000 - edge.0 edge.1 4.0000 - Mouse edge.1 4.0000 - edge.1 root 4.0000 -NineBande root 4.0000 - DogFaced root 4.0000 ------------------------------ -==================================== - A C G T ------------------------------------- -0.2500 0.2500 0.2500 0.2500 -------------------------------------""", - ) - - # self.submodel.setScaleRule("ts",['beta']) - # self.submodel.setScaleRule("tv",['beta'], exclude_pars = True) - self.assertEqual( - str(likelihood_function), - """Likelihood function statistics -log-likelihood = -413.1886 -number of free parameters = 0 -====== - beta ------- -6.0000 ------- -============================= - edge parent length +edge parent length ----------------------------- - Human edge.0 4.0000 +Human edge.0 4.0000 HowlerMon edge.0 4.0000 - edge.0 edge.1 4.0000 - Mouse edge.1 4.0000 - edge.1 root 4.0000 -NineBande root 4.0000 - DogFaced root 4.0000 +edge.0 edge.1 4.0000 +Mouse edge.1 4.0000 +edge.1 root 4.0000 +NineBande root 4.0000 +DogFaced root 4.0000 ----------------------------- ==================================== A C G T @@ -658,15 +629,15 @@ log-likelihood = -382.5399 number of free parameters = 14 ======================================= - edge parent length beta +edge parent length beta --------------------------------------- - Human edge.0 1.0000 1.0000 +Human edge.0 1.0000 1.0000 HowlerMon edge.0 1.0000 1.0000 - edge.0 edge.1 1.0000 1.0000 - Mouse edge.1 1.0000 1.0000 - edge.1 root 1.0000 1.0000 -NineBande root 1.0000 1.0000 - DogFaced root 1.0000 1.0000 +edge.0 edge.1 1.0000 1.0000 +Mouse edge.1 1.0000 1.0000 +edge.1 root 1.0000 1.0000 +NineBande root 1.0000 1.0000 +DogFaced root 1.0000 1.0000 --------------------------------------- ==================================== A C G T @@ -737,8 +708,8 @@ Q = lf.get_rate_matrix_for_edge("NineBande", calibrated=False) Q2 = lf.get_rate_matrix_for_edge("NineBande", calibrated=True) P = lf.get_psub_for_edge("NineBande") - self.assertFloatEqual(expm(Q.array)(1.0), P.array) - self.assertFloatEqual(expm(Q2.array)(length), P.array) + assert_allclose(expm(Q.array)(1.0), P.array) + assert_allclose(expm(Q2.array)(length), P.array) # should fail for a discrete Markov model dm = ns_substitution_model.DiscreteSubstitutionModel(DNA.alphabet) @@ -835,7 +806,7 @@ for rule in rules: lf.set_param_rule(**rule) new_lnL = lf.get_log_likelihood() - self.assertFloatEqual(new_lnL, lnL) + assert_allclose(new_lnL, lnL) def test_get_param_rules_discrete(self): """discrete time models produce valid rules""" @@ -1104,7 +1075,7 @@ glf.set_alignment(_aln) glf.set_name("GN") glf.initialise_from_nested(slf) - self.assertFloatEqual(glf.get_log_likelihood(), slf.get_log_likelihood()) + assert_allclose(glf.get_log_likelihood(), slf.get_log_likelihood()) def test_initialise_from_nested_diff(self): """non-reversible likelihood initialised from nested, non-scoped, time-reversible""" @@ -1129,7 +1100,7 @@ glf.set_alignment(_aln) glf.set_name("GN") glf.initialise_from_nested(slf) - self.assertFloatEqual(glf.get_log_likelihood(), slf.get_log_likelihood()) + assert_allclose(glf.get_log_likelihood(), slf.get_log_likelihood()) def test_initialise_from_nested_same_type_tr(self): """time-reversible likelihood initialised from nested, non-scoped, time-reversible""" @@ -1153,7 +1124,7 @@ glf.set_alignment(_aln) glf.set_name("GTR") glf.initialise_from_nested(slf) - self.assertFloatEqual(glf.get_log_likelihood(), slf.get_log_likelihood()) + assert_allclose(glf.get_log_likelihood(), slf.get_log_likelihood()) def test_initialise_from_nested_same_type_tr_scoped(self): """time-reversible likelihood initialised from nested, scoped, time-reversible""" @@ -1180,7 +1151,7 @@ glf.set_alignment(_aln) glf.set_name("GTR") glf.initialise_from_nested(slf) - self.assertFloatEqual(glf.get_log_likelihood(), slf.get_log_likelihood()) + assert_allclose(glf.get_log_likelihood(), slf.get_log_likelihood()) def test_initialise_from_nested_same_type_nr(self): """non-reversible likelihood initialised from nested, non-scoped, non-reversible""" @@ -1213,7 +1184,7 @@ glf.initialise_from_nested(slf) expect = slf.get_log_likelihood() got = glf.get_log_likelihood() - self.assertFloatEqual(got, expect) + assert_allclose(got, expect) def test_initialise_from_nested_same_type_nr_scoped(self): """non-reversible likelihood initialised from nested, scoped, non-reversible""" @@ -1259,7 +1230,7 @@ lf.set_param_rule("kappa", init=1) lf.set_param_rule("length", edge="a", init=length) len_dict = lf.get_lengths_as_ens() - self.assertFloatEqual(len_dict["a"], length) + assert_allclose(len_dict["a"], length) def test_get_lengths_as_ens_not_equal(self): """lengths do not equal ENS for a non-reversible model""" @@ -1352,6 +1323,31 @@ lf.set_alignment(_aln) _ = lf.to_rich_dict() + # tests multiple alignments + half = len(self.data) // 2 + aln1 = self.data[:half] + aln2 = self.data[half:] + loci_names = ["1st-half", "2nd-half"] + loci = [aln1, aln2] + tree = make_tree(tip_names=self.data.names) + model = get_model("HKY85") + lf = model.make_likelihood_function(tree, loci=loci_names) + lf.set_alignment(loci) + for i, loci_name in enumerate(loci_names): + d = lf.to_rich_dict() + alignment = d["alignment"] + motif_probs = d["motif_probs"] + self.assertEqual(alignment[loci_name], loci[i].to_rich_dict()) + self.assertEqual(motif_probs[loci_name], loci[i].get_motif_probs()) + # tests single alignment + lf = model.make_likelihood_function(tree) + lf.set_alignment(aln1) + d = lf.to_rich_dict() + alignment = d["alignment"] + motif_probs = d["motif_probs"] + self.assertEqual(alignment, aln1.to_rich_dict()) + self.assertEqual(motif_probs, aln1.get_motif_probs()) + def test_repr(self): """repr should not fail""" lf = self._makeLikelihoodFunction() @@ -1364,6 +1360,13 @@ got = lf._repr_html_() self.assertIn("

log-likelihood", got) + def test_get_set_name_properties(self): + """correctly creates lf name attr""" + lf = get_model("HKY85").make_likelihood_function(self.tree) + self.assertEqual(lf.name, lf.model.name) + lf.name = "" + self.assertEqual(lf.name, "") + class ComparisonTests(TestCase): """comparisons of likelihood calcs with earlier pycogent""" @@ -1970,7 +1973,7 @@ def test_loci(self): """recap multiple-loci""" - from cogent3.recalculation.scope import EACH, ALL + from cogent3.recalculation.scope import ALL, EACH aln = load_aligned_seqs("data/long_testseqs.fasta") half = len(aln) // 2 diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_evolve/test_models.py python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_models.py --- python-cogent-2020.6.30a0+dfsg/tests/test_evolve/test_models.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_models.py 2020-12-20 23:35:03.000000000 +0000 @@ -24,7 +24,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_evolve/test_motifchange.py python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_motifchange.py --- python-cogent-2020.6.30a0+dfsg/tests/test_evolve/test_motifchange.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_motifchange.py 2020-12-20 23:35:03.000000000 +0000 @@ -16,7 +16,7 @@ "Brett Easton", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_evolve/test_newq.py python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_newq.py --- python-cogent-2020.6.30a0+dfsg/tests/test_evolve/test_newq.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_newq.py 2020-12-20 23:35:03.000000000 +0000 @@ -2,6 +2,8 @@ import warnings +from unittest import TestCase, main + from numpy import dot, ones from numpy.testing import assert_allclose @@ -22,7 +24,6 @@ TimeReversibleNucleotide, ) from cogent3.maths.matrix_exponentiation import PadeExponentiator as expm -from cogent3.util.unit_test import TestCase, main warnings.filterwarnings("ignore", "Motif probs overspecified") @@ -33,7 +34,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -124,9 +125,7 @@ nuc_lf.set_alignment(self.aln) new_di_lf.set_alignment(self.aln) - self.assertFloatEqual( - nuc_lf.get_log_likelihood(), new_di_lf.get_log_likelihood() - ) + assert_allclose(nuc_lf.get_log_likelihood(), new_di_lf.get_log_likelihood()) def test_lf_display(self): """str of likelihood functions should not fail""" @@ -201,7 +200,7 @@ for other in results[:i]: self.assertNotAlmostEqual(other, lh, places=2) for other in results[i:]: - self.assertFloatEqual(other, lh) + assert_allclose(other, lh) results.append(lh) def test_position_specific_mprobs(self): @@ -269,9 +268,7 @@ cd_lf = cd.make_likelihood_function(self.tree) cd_lf.set_param_rule("length", is_independent=False, init=0.4) cd_lf.set_alignment(self.aln) - self.assertFloatEqual( - cd_lf.get_log_likelihood(), ps_lf.get_log_likelihood() - ) + assert_allclose(cd_lf.get_log_likelihood(), ps_lf.get_log_likelihood()) compare_models(self.posn_root_probs, 2) # trinucleotide @@ -361,11 +358,11 @@ d = dot(e, d) prob_vectors = lf.get_motif_probs_by_node() - self.assertFloatEqual(prob_vectors["a"].array, a) - self.assertFloatEqual(prob_vectors["b"].array, b) - self.assertFloatEqual(prob_vectors["c"].array, c) - self.assertFloatEqual(prob_vectors["d"].array, d) - self.assertFloatEqual(prob_vectors["edge.0"].array, e) + assert_allclose(prob_vectors["a"].array, a) + assert_allclose(prob_vectors["b"].array, b) + assert_allclose(prob_vectors["c"].array, c) + assert_allclose(prob_vectors["d"].array, d) + assert_allclose(prob_vectors["edge.0"].array, e) def test_get_motif_probs_by_node_mg94(self): """handles different statespace dimensions from process and stationary distribution""" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_evolve/test_ns_substitution_model.py python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_ns_substitution_model.py --- python-cogent-2020.6.30a0+dfsg/tests/test_evolve/test_ns_substitution_model.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_ns_substitution_model.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,5 +1,7 @@ import warnings +from unittest import TestCase, main + import numpy from numpy import array, dot, empty, ones @@ -18,12 +20,7 @@ StrandSymmetric, ) from cogent3.evolve.predicate import MotifChange -from cogent3.evolve.substitution_model import ( - Parametric, - TimeReversibleNucleotide, -) -from cogent3.maths.matrix_exponentiation import PadeExponentiator as expm -from cogent3.util.unit_test import TestCase, main +from cogent3.evolve.substitution_model import TimeReversibleNucleotide warnings.filterwarnings("ignore", "Motif probs overspecified") @@ -34,7 +31,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley", "Ananias Iliadis"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -182,7 +179,7 @@ gen_lf = self.make_cached("general", max_evaluations=2) gen_lnL = gen_lf.get_log_likelihood() dis_lf = self._setup_discrete_from_general(gen_lf) - self.assertFloatEqual(gen_lnL, dis_lf.get_log_likelihood()) + assert_allclose(gen_lnL, dis_lf.get_log_likelihood()) def test_paralinear_consistent_discrete_continuous(self): """paralinear masure should be consistent between the two classes""" @@ -202,7 +199,7 @@ rules = sm_lf.get_param_rules() gen_lf.apply_param_rules(rules) gen_lnL = gen_lf.get_log_likelihood() - self.assertFloatEqualAbs(sm_lnL, gen_lnL, eps=0.1) + assert_allclose(sm_lnL, gen_lnL, rtol=0.1) def test_general_stationary(self): """General stationary should be close to General""" @@ -210,7 +207,7 @@ gen_lf = self.make_cached("general", max_evaluations=25) gen_stat_lnL = gen_stat_lf.get_log_likelihood() gen_lnL = gen_lf.get_log_likelihood() - self.assertLessThan(gen_stat_lnL, gen_lnL) + self.assertLess(gen_stat_lnL, gen_lnL) def test_general_stationary_is_stationary(self): """should be stationary""" @@ -220,7 +217,7 @@ for edge in self.tree: psub = gen_stat_lf.get_psub_for_edge(edge.name) pi = dot(mprobs, psub.array) - self.assertFloatEqual(mprobs, pi) + assert_allclose(mprobs, pi) def test_general_is_not_stationary(self): """should not be stationary""" @@ -231,7 +228,7 @@ psub = gen_lf.get_psub_for_edge(edge.name) pi = dot(mprobs, psub.array) try: - self.assertFloatEqual(mprobs, pi) + assert_allclose(mprobs, pi) except AssertionError: pass diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_evolve/test_parameter_controller.py python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_parameter_controller.py --- python-cogent-2020.6.30a0+dfsg/tests/test_evolve/test_parameter_controller.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_parameter_controller.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,23 +1,26 @@ import os import warnings +from unittest import TestCase, main + import cogent3.evolve.parameter_controller import cogent3.evolve.substitution_model from cogent3 import make_aligned_seqs, make_tree -from cogent3.maths import optimisers -from cogent3.util.unit_test import TestCase, main __author__ = "Peter Maxwell" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Matthew Wakefield"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" +from numpy.testing import assert_allclose, assert_almost_equal + + base_path = os.getcwd() data_path = os.path.join(base_path, "data") @@ -68,8 +71,10 @@ def compare_mprobs(got, exp): # handle min val - for e in got: - self.assertFloatEqual(got[e], exp[e], eps=3e-6) + motifs = list(got) + assert_almost_equal( + [got[m] for m in motifs], [exp[m] for m in motifs], decimal=5 + ) model = cogent3.evolve.substitution_model.TimeReversibleNucleotide( model_gaps=True, motif_probs=None @@ -83,7 +88,7 @@ compare_mprobs(got, mprobs) lf.set_motif_probs_from_data(self.al[:1], is_constant=True) - self.assertFloatEqual(lf.get_motif_probs()["G"], 0.6, eps=3e-6) + assert_almost_equal(lf.get_motif_probs()["G"], 0.6, decimal=4) lf.set_motif_probs_from_data(self.al[:1], pseudocount=1) self.assertNotEqual(lf.get_motif_probs()["G"], 0.6) @@ -102,7 +107,7 @@ "G": 7.5 / 27, } compare_mprobs(motif_probs, correct_probs) - self.assertFloatEqual(sum(motif_probs.values()), 1.0) + assert_allclose(sum(motif_probs.values()), 1.0) def test_setMultiLocus(self): """2 loci each with own mprobs""" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_evolve/test_scale_rules.py python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_scale_rules.py --- python-cogent-2020.6.30a0+dfsg/tests/test_evolve/test_scale_rules.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_scale_rules.py 2020-12-20 23:35:03.000000000 +0000 @@ -15,7 +15,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_evolve/test_simulation.py python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_simulation.py --- python-cogent-2020.6.30a0+dfsg/tests/test_evolve/test_simulation.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_simulation.py 2020-12-20 23:35:03.000000000 +0000 @@ -13,7 +13,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_evolve/test_substitution_model.py python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_substitution_model.py --- python-cogent-2020.6.30a0+dfsg/tests/test_evolve/test_substitution_model.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_evolve/test_substitution_model.py 2020-12-20 23:35:03.000000000 +0000 @@ -13,7 +13,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_format/__init__.py python-cogent-2020.12.21a+dfsg/tests/test_format/__init__.py --- python-cogent-2020.6.30a0+dfsg/tests/test_format/__init__.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_format/__init__.py 2020-12-20 23:35:03.000000000 +0000 @@ -11,7 +11,7 @@ "Jeremy Widmann", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_format/test_bedgraph.py python-cogent-2020.12.21a+dfsg/tests/test_format/test_bedgraph.py --- python-cogent-2020.6.30a0+dfsg/tests/test_format/test_bedgraph.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_format/test_bedgraph.py 2020-12-20 23:35:03.000000000 +0000 @@ -10,7 +10,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_format/test_clustal.py python-cogent-2020.12.21a+dfsg/tests/test_format/test_clustal.py --- python-cogent-2020.6.30a0+dfsg/tests/test_format/test_clustal.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_format/test_clustal.py 2020-12-20 23:35:03.000000000 +0000 @@ -13,15 +13,14 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Jeremy Widmann"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Jeremy Widmann" __email__ = "jeremy.widmann@colorado.edu" __status__ = "Production" class ClustalTests(TestCase): - """Tests for Clustal writer. - """ + """Tests for Clustal writer.""" def setUp(self): """Setup for Clustal tests.""" @@ -94,7 +93,7 @@ clustal_from_alignment(self.alignment_dict), self.clustal_with_label ) self.assertEqual( - clustal_from_alignment(self.alignment_dict, interleave_len=2), + clustal_from_alignment(self.alignment_dict, wrap=2), self.clustal_with_label_lw2, ) @@ -105,7 +104,7 @@ self.clustal_with_label_reordered, ) self.assertEqual( - clustal_from_alignment(self.alignment_object, interleave_len=2), + clustal_from_alignment(self.alignment_object, wrap=2), self.clustal_with_label_lw2_reordered, ) diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_format/test_fasta.py python-cogent-2020.12.21a+dfsg/tests/test_format/test_fasta.py --- python-cogent-2020.6.30a0+dfsg/tests/test_format/test_fasta.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_format/test_fasta.py 2020-12-20 23:35:03.000000000 +0000 @@ -13,15 +13,14 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Jeremy Widmann", "Gavin Huttley", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Jeremy Widmann" __email__ = "jeremy.widmann@colorado.edu" __status__ = "Production" class FastaTests(TestCase): - """Tests for Fasta writer. - """ + """Tests for Fasta writer.""" def setUp(self): """Setup for Fasta tests.""" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_maths/__init__.py python-cogent-2020.12.21a+dfsg/tests/test_maths/__init__.py --- python-cogent-2020.6.30a0+dfsg/tests/test_maths/__init__.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_maths/__init__.py 2020-12-20 23:35:03.000000000 +0000 @@ -20,7 +20,7 @@ "Antonio Gonzalez Pena", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_distance_transform.py python-cogent-2020.12.21a+dfsg/tests/test_maths/test_distance_transform.py --- python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_distance_transform.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_maths/test_distance_transform.py 2020-12-20 23:35:03.000000000 +0000 @@ -2,7 +2,9 @@ """Unit tests for distance_transform.py functions. """ -from numpy import array, diag, ones, shape, sqrt +from unittest import TestCase, main + +from numpy import array, ones, shape, sqrt from cogent3.maths.distance_transform import ( binary_dist_chisq, @@ -39,18 +41,19 @@ trans_specprof, zeros, ) -from cogent3.util.unit_test import TestCase, main __author__ = "Justin Kuczynski" __copyright__ = "Copyright 2007-2020, The Cogent Project" __contributors__ = ["Justin Kuczynski", "Zongzhi Liu", "Greg Caporaso"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Justin Kuczynski" __email__ = "justinak@gmail.com" __status__ = "Prototype" +from numpy.testing import assert_allclose, assert_equal + class functionTests(TestCase): """Tests of top-level functions.""" @@ -88,17 +91,17 @@ tests inputs of empty mtx, zeros, and results compared with calcs done by hand""" - self.assertFloatEqual(dist_canberra(self.zeromtx), zeros((4, 4), "d")) + assert_allclose(dist_canberra(self.zeromtx), zeros((4, 4), "d")) mtx1expected = array([[0.0, 46.2 / 52.2], [46.2 / 52.2, 0.0]], "d") - self.assertFloatEqual(dist_canberra(self.mtx1), mtx1expected) + assert_allclose(dist_canberra(self.mtx1), mtx1expected) sparse1exp = ones((self.sparse1.shape[0], self.sparse1.shape[0])) # remove diagonal sparse1exp[0, 0] = sparse1exp[1, 1] = sparse1exp[2, 2] = sparse1exp[3, 3] = 0.0 sparse1exp[0, 1] = sparse1exp[1, 0] = (5.33 - 0.4) / (5.33 + 0.4) - self.assertFloatEqual(dist_canberra(self.sparse1), sparse1exp) + assert_allclose(dist_canberra(self.sparse1), sparse1exp) def test_dist_canberra_bug(self): i = array([[0, 0, 1], [0, 1, 1]]) @@ -107,7 +110,7 @@ ) expected = array([[0.0, d], [d, 0.0]]) actual = dist_canberra(i) - self.assertFloatEqual(expected, actual) + assert_allclose(expected, actual) def test_dist_euclidean(self): """tests dist_euclidean @@ -115,7 +118,7 @@ tests inputs of empty mtx, zeros, and dense1 compared with calcs done by hand""" - self.assertFloatEqual(dist_euclidean(self.zeromtx), zeros((4, 4), "d")) + assert_allclose(dist_euclidean(self.zeromtx), zeros((4, 4), "d")) dense1expected = array( [ @@ -125,7 +128,7 @@ ], "d", ) - self.assertFloatEqual(dist_euclidean(self.dense1), dense1expected) + assert_allclose(dist_euclidean(self.dense1), dense1expected) def test_dist_gower(self): """tests dist_gower @@ -133,10 +136,10 @@ tests inputs of empty mtx, zeros, and results compared with calcs done by hand""" - self.assertFloatEqual(dist_gower(self.zeromtx), zeros((4, 4), "d")) + assert_allclose(dist_gower(self.zeromtx), zeros((4, 4), "d")) mtx1expected = array([[0.0, 2.0], [2.0, 0.0]], "d") - self.assertFloatEqual(dist_gower(self.mtx1), mtx1expected) + assert_allclose(dist_gower(self.mtx1), mtx1expected) sparse1expected = array( [ @@ -147,7 +150,7 @@ ], "d", ) - self.assertFloatEqual(dist_gower(self.sparse1), sparse1expected) + assert_allclose(dist_gower(self.sparse1), sparse1expected) def test_dist_manhattan(self): """tests dist_manhattan @@ -155,20 +158,20 @@ tests inputs of empty mtx, zeros, and dense1 compared with calcs done by hand""" - self.assertFloatEqual(dist_manhattan(self.zeromtx), zeros((4, 4), "d")) + assert_allclose(dist_manhattan(self.zeromtx), zeros((4, 4), "d")) dense1expected = array( [[0.0, 5.0, 019.9], [5.0, 0.0, 24.9], [19.9, 24.90, 0.0]], "d" ) - self.assertFloatEqual(dist_manhattan(self.dense1), dense1expected) + assert_allclose(dist_manhattan(self.dense1), dense1expected) def test_dist_abund_jaccard(self): """dist_abund_jaccard should compute distances for dense1 and mtx1""" mtx1_expected = array([[0, 0.25], [0.25, 0]], "d") - self.assertEqual(dist_abund_jaccard(self.mtx1), mtx1_expected) + assert_equal(dist_abund_jaccard(self.mtx1), mtx1_expected) dense1_expected = zeros((3, 3), "d") - self.assertEqual(dist_abund_jaccard(self.dense1), dense1_expected) + assert_equal(dist_abund_jaccard(self.dense1), dense1_expected) sparse1_expected = array( [ @@ -179,7 +182,7 @@ ], "d", ) - self.assertEqual(dist_abund_jaccard(self.sparse1), sparse1_expected) + assert_equal(dist_abund_jaccard(self.sparse1), sparse1_expected) def test_dist_morisita_horn(self): """tests dist_morisita_horn @@ -187,11 +190,11 @@ tests inputs of empty mtx, zeros, and dense1 compared with calcs done by hand""" - self.assertFloatEqual(dist_morisita_horn(self.zeromtx), zeros((4, 4), "d")) + assert_allclose(dist_morisita_horn(self.zeromtx), zeros((4, 4), "d")) a = 1 - 2 * 69.3 / (26 / 16.0 * 23.1 * 4) mtx1expected = array([[0, a], [a, 0]], "d") - self.assertFloatEqual(dist_morisita_horn(self.mtx1), mtx1expected) + assert_allclose(dist_morisita_horn(self.mtx1), mtx1expected) def test_dist_bray_curtis(self): """tests dist_bray_curtis @@ -199,10 +202,10 @@ tests inputs of empty mtx, zeros, and mtx1 compared with calcs done by hand""" - self.assertFloatEqual(dist_manhattan(self.zeromtx), zeros((4, 4) * 1, "d")) + assert_allclose(dist_manhattan(self.zeromtx), zeros((4, 4) * 1, "d")) mtx1expected = array([[0, 21.1 / 27.1], [21.1 / 27.1, 0]], "d") - self.assertFloatEqual(dist_bray_curtis(self.mtx1), mtx1expected) + assert_allclose(dist_bray_curtis(self.mtx1), mtx1expected) def test_dist_bray_curtis_faith(self): """tests dist_bray_curtis_faith @@ -210,10 +213,10 @@ tests inputs of empty mtx, zeros, and mtx1 compared with calcs done by hand""" - self.assertFloatEqual(dist_manhattan(self.zeromtx), zeros((4, 4) * 1, "d")) + assert_allclose(dist_manhattan(self.zeromtx), zeros((4, 4) * 1, "d")) mtx1expected = array([[0, 21.1 / 27.1], [21.1 / 27.1, 0]], "d") - self.assertFloatEqual(dist_bray_curtis_faith(self.mtx1), mtx1expected) + assert_allclose(dist_bray_curtis_faith(self.mtx1), mtx1expected) def test_dist_soergel(self): """tests dist_soergel @@ -221,13 +224,13 @@ tests inputs of empty mtx, zeros, and dense1 compared with calcs done by hand/manhattan dist""" - self.assertFloatEqual(dist_soergel(self.zeromtx), zeros((4, 4) * 1, "d")) + assert_allclose(dist_soergel(self.zeromtx), zeros((4, 4) * 1, "d")) dense1expected = dist_manhattan(self.dense1) dense1norm = array([[1, 8, 23], [8, 1, 27], [23, 27, 1]], "d") dense1expected /= dense1norm - self.assertFloatEqual(dist_soergel(self.dense1), dense1expected) + assert_allclose(dist_soergel(self.dense1), dense1expected) def test_dist_kulczynski(self): """tests dist_kulczynski @@ -235,7 +238,7 @@ tests inputs of empty mtx, zeros, and mtx1 compared with calcs done by hand""" - self.assertFloatEqual(dist_kulczynski(self.zeromtx), zeros((4, 4) * 1, "d")) + assert_allclose(dist_kulczynski(self.zeromtx), zeros((4, 4) * 1, "d")) mtx1expected = array( [ @@ -245,27 +248,27 @@ "d", ) - self.assertFloatEqual(dist_kulczynski(self.mtx1), mtx1expected) + assert_allclose(dist_kulczynski(self.mtx1), mtx1expected) def test_dist_pearson(self): """tests dist_pearson tests inputs of empty mtx, zeros, mtx compared with calcs done - by hand, and an example from + by hand, and an example from http://davidmlane.com/hyperstat/A56626.html """ - self.assertFloatEqual(dist_pearson(self.zeromtx), zeros((4, 4), "d")) + assert_allclose(dist_pearson(self.zeromtx), zeros((4, 4), "d")) mtx1expected = array([[0, 0], [0, 0]], "d") - self.assertFloatEqual(dist_pearson(self.mtx1), mtx1expected) + assert_allclose(dist_pearson(self.mtx1), mtx1expected) # example 1 from http://davidmlane.com/hyperstat/A56626.html ex1 = array([[1, 2, 3], [2, 5, 6]], "d") ex1res = 1 - 4.0 / sqrt(2.0 * (8 + 2.0 / 3.0)) ex1expected = array([[0, ex1res], [ex1res, 0]], "d") - self.assertFloatEqual(dist_pearson(ex1), ex1expected) + assert_allclose(dist_pearson(ex1), ex1expected) def test_dist_spearman_approx(self): """tests dist_spearman_approx @@ -273,9 +276,7 @@ tests inputs of empty mtx, zeros, and an example from wikipedia """ - self.assertFloatEqual( - dist_spearman_approx(self.zeromtx), zeros((4, 4) * 1, "d") - ) + assert_allclose(dist_spearman_approx(self.zeromtx), zeros((4, 4) * 1, "d")) # ex1 from wikipedia Spearman's_rank_correlation_coefficient 20jan2009 ex1 = array( @@ -287,14 +288,14 @@ ) ex1res = 6.0 * 194.0 / (10.0 * 99.0) ex1expected = array([[0, ex1res], [ex1res, 0]], "d") - self.assertFloatEqual(dist_spearman_approx(ex1), ex1expected) + assert_allclose(dist_spearman_approx(ex1), ex1expected) # now binary fns def test_binary_dist_otu_gain(self): """ binary OTU gain functions as expected """ actual = binary_dist_otu_gain(self.input_binary_dist_otu_gain1) expected = array([[0, 1, 2, 2], [1, 0, 2, 1], [1, 1, 0, 1], [1, 0, 1, 0]]) - self.assertEqual(actual, expected) + assert_equal(actual, expected) def test_binary_dist_chisq(self): """tests binary_dist_chisq @@ -302,10 +303,10 @@ tests inputs of empty mtx, zeros, and mtx1 compared with calcs done by hand""" - self.assertFloatEqual(binary_dist_chisq(self.zeromtx), zeros((4, 4), "d")) + assert_allclose(binary_dist_chisq(self.zeromtx), zeros((4, 4), "d")) mtx1expected = array([[0, sqrt(9 / 8.0)], [sqrt(9 / 8.0), 0]], "d") - self.assertFloatEqual(binary_dist_chisq(self.mtx1), mtx1expected) + assert_allclose(binary_dist_chisq(self.mtx1), mtx1expected) def test_binary_dist_chord(self): """tests binary_dist_chord @@ -313,7 +314,7 @@ tests inputs of empty mtx, zeros, and results compared with calcs done by hand""" - self.assertFloatEqual(binary_dist_chord(self.zeromtx), zeros((4, 4), "d")) + assert_allclose(binary_dist_chord(self.zeromtx), zeros((4, 4), "d")) mtx1expected = array( [ @@ -322,7 +323,7 @@ ], "d", ) - self.assertFloatEqual(binary_dist_chord(self.mtx1), mtx1expected) + assert_allclose(binary_dist_chord(self.mtx1), mtx1expected) def test_binary_dist_lennon(self): """tests binary_dist_lennon @@ -330,14 +331,14 @@ tests inputs of empty mtx, zeros, and results compared with calcs done by hand""" - self.assertFloatEqual(binary_dist_lennon(self.zeromtx), zeros((4, 4), "d")) + assert_allclose(binary_dist_lennon(self.zeromtx), zeros((4, 4), "d")) mtxa = array([[5.2, 9, 0.2], [0, 99, 1], [0, 0.0, 8233.1]], "d") - self.assertFloatEqual(binary_dist_lennon(mtxa), zeros((3, 3), "d")) + assert_allclose(binary_dist_lennon(mtxa), zeros((3, 3), "d")) mtxb = array([[5.2, 0, 0.2, 9.2], [0, 0, 0, 1], [0, 3.2, 0, 8233.1]], "d") mtxbexpected = array([[0, 0, 0.5], [0, 0, 0], [0.5, 0, 0]], "d") - self.assertFloatEqual(binary_dist_lennon(mtxb), mtxbexpected) + assert_allclose(binary_dist_lennon(mtxb), mtxbexpected) def test_binary_dist_pearson(self): """tests binary_dist_pearson @@ -345,9 +346,9 @@ tests inputs of empty mtx, zeros, and dense1 compared with calcs done by hand""" - self.assertFloatEqual(binary_dist_pearson(self.zeromtx), zeros((4, 4), "d")) + assert_allclose(binary_dist_pearson(self.zeromtx), zeros((4, 4), "d")) - self.assertFloatEqual(binary_dist_pearson(self.dense1), zeros((3, 3))) + assert_allclose(binary_dist_pearson(self.dense1), zeros((3, 3))) def test_binary_dist_jaccard(self): """tests binary_dist_jaccard @@ -355,19 +356,19 @@ tests inputs of empty mtx, zeros, and sparse1 compared with calcs done by hand""" - self.assertFloatEqual(binary_dist_jaccard(self.zeromtx), zeros((4, 4), "d")) + assert_allclose(binary_dist_jaccard(self.zeromtx), zeros((4, 4), "d")) sparse1expected = array( [[0, 0, 1.0, 1.0], [0, 0, 1, 1], [1, 1, 0, 1], [1, 1, 1, 0]], "d" ) - self.assertFloatEqual(binary_dist_jaccard(self.sparse1), sparse1expected) + assert_allclose(binary_dist_jaccard(self.sparse1), sparse1expected) sparse1expected = dist_manhattan(self.sparse1.astype(bool)) sparse1norm = array( [[1, 1, 2, 1], [1, 1, 2, 1], [2, 2, 1, 1], [1, 1, 1, 100]], "d" ) sparse1expected /= sparse1norm - self.assertFloatEqual(binary_dist_jaccard(self.sparse1), sparse1expected) + assert_allclose(binary_dist_jaccard(self.sparse1), sparse1expected) def test_binary_dist_ochiai(self): """tests binary_dist_ochiai @@ -375,10 +376,10 @@ tests inputs of empty mtx, zeros, and mtx1 compared with calcs done by hand""" - self.assertFloatEqual(binary_dist_ochiai(self.zeromtx), zeros((4, 4), "d")) + assert_allclose(binary_dist_ochiai(self.zeromtx), zeros((4, 4), "d")) mtx1expected = array([[0, 1 - 1 / sqrt(2.0)], [1 - 1 / sqrt(2.0), 0]], "d") - self.assertFloatEqual(binary_dist_ochiai(self.mtx1), mtx1expected) + assert_allclose(binary_dist_ochiai(self.mtx1), mtx1expected) def test_binary_dist_hamming(self): """tests binary_dist_hamming @@ -386,10 +387,10 @@ tests inputs of empty mtx, zeros, and mtx1 compared with calcs done by hand""" - self.assertFloatEqual(binary_dist_hamming(self.zeromtx), zeros((4, 4), "d")) + assert_allclose(binary_dist_hamming(self.zeromtx), zeros((4, 4), "d")) mtx1expected = array([[0, 1], [1, 0]], "d") - self.assertFloatEqual(binary_dist_hamming(self.mtx1), mtx1expected) + assert_allclose(binary_dist_hamming(self.mtx1), mtx1expected) def test_binary_dist_sorensen_dice(self): """tests binary_dist_sorensen_dice @@ -397,18 +398,16 @@ tests inputs of empty mtx, zeros, and mtx1 compared with calcs done by hand""" - self.assertFloatEqual( - binary_dist_sorensen_dice(self.zeromtx), zeros((4, 4), "d") - ) + assert_allclose(binary_dist_sorensen_dice(self.zeromtx), zeros((4, 4), "d")) mtx1expected = array([[0, 1 / 3.0], [1 / 3.0, 0]], "d") - self.assertFloatEqual(binary_dist_sorensen_dice(self.mtx1), mtx1expected) + assert_allclose(binary_dist_sorensen_dice(self.mtx1), mtx1expected) sparse1expected = array( [[0, 0, 1.0, 1.0], [0, 0, 1, 1], [1, 1, 0, 1], [1, 1, 1, 0]], "d" ) - self.assertFloatEqual(binary_dist_sorensen_dice(self.sparse1), sparse1expected) + assert_allclose(binary_dist_sorensen_dice(self.sparse1), sparse1expected) def test_binary_dist_euclidean(self): """tests binary_dist_euclidean @@ -424,9 +423,9 @@ sparse1expected[2, 3] = 1.0 sparse1expected = self.get_sym_mtx_from_uptri(sparse1expected) - self.assertFloatEqual(binary_dist_euclidean(self.dense1), dense1expected) - self.assertFloatEqual(binary_dist_euclidean(self.sparse1), sparse1expected) - self.assertFloatEqual(binary_dist_euclidean(self.zeromtx), zeros((4, 4), "d")) + assert_allclose(binary_dist_euclidean(self.dense1), dense1expected) + assert_allclose(binary_dist_euclidean(self.sparse1), sparse1expected) + assert_allclose(binary_dist_euclidean(self.zeromtx), zeros((4, 4), "d")) # zj's stuff def test_chord_transform(self): @@ -438,12 +437,12 @@ [0.90453403, 0.30151134, 0.30151134], ] res = trans_chord(self.mat_test) - self.assertFloatEqual(res, exp) + assert_allclose(res, exp) def test_chord_dist(self): """dist_chord should return the exp result.""" - self.assertFloatEqual(dist_chord(self.zeromtx), zeros((4, 4), "d")) + assert_allclose(dist_chord(self.zeromtx), zeros((4, 4), "d")) exp = [ [0.0, 0.46662021, 0.72311971], @@ -451,7 +450,7 @@ [0.72311971, 0.62546036, 0.0], ] dist = dist_chord(self.mat_test) - self.assertFloatEqual(dist, exp) + assert_allclose(dist, exp) def test_chisq_transform(self): """trans_chisq should return the exp result in the ref paper.""" @@ -461,12 +460,12 @@ [1.01418511, 0.36514837, 0.3380617], ] res_m = trans_chisq(self.mat_test) - self.assertFloatEqual(res_m, exp_m) + assert_allclose(res_m, exp_m) def test_chisq_distance(self): """dist_chisq should return the exp result.""" - self.assertFloatEqual(dist_chisq(self.zeromtx), zeros((4, 4), "d")) + assert_allclose(dist_chisq(self.zeromtx), zeros((4, 4), "d")) exp_d = [ [0.0, 0.4910521, 0.78452291], @@ -474,7 +473,7 @@ [0.78452291, 0.69091002, 0.0], ] res_d = dist_chisq(self.mat_test) - self.assertFloatEqual(res_d, exp_d) + assert_allclose(res_d, exp_d) def test_hellinger_transform(self): """dist_hellinger should return the exp result in the ref paper.""" @@ -484,12 +483,12 @@ [0.77459667, 0.4472136, 0.4472136], ] res = trans_hellinger(self.mat_test) - self.assertFloatEqual(res, exp) + assert_allclose(res, exp) def test_hellinger_distance(self): """dist_hellinger should return the exp result.""" - self.assertFloatEqual(dist_hellinger(self.zeromtx), zeros((4, 4), "d")) + assert_allclose(dist_hellinger(self.zeromtx), zeros((4, 4), "d")) exp = [ [0.0, 0.23429661, 0.38175149], @@ -497,18 +496,18 @@ [0.38175149, 0.32907422, 0.0], ] dist = dist_hellinger(self.mat_test) - self.assertFloatEqual(dist, exp) + assert_allclose(dist, exp) def test_species_profile_transform(self): """trans_specprof should return the exp result.""" exp = [[0.25, 0.25, 0.5], [0.28571429, 0.42857143, 0.28571429], [0.6, 0.2, 0.2]] res = trans_specprof(self.mat_test) - self.assertFloatEqual(res, exp) + assert_allclose(res, exp) def test_species_profile_distance(self): """dist_specprof should return the exp result.""" - self.assertFloatEqual(dist_specprof(self.zeromtx), zeros((4, 4), "d")) + assert_allclose(dist_specprof(self.zeromtx), zeros((4, 4), "d")) exp = [ [0.0, 0.28121457, 0.46368092], @@ -516,17 +515,17 @@ [0.46368092, 0.39795395, 0.0], ] dist = dist_specprof(self.mat_test) - self.assertFloatEqual(dist, exp) + assert_allclose(dist, exp) def test_dist_bray_curtis_magurran1(self): """ zero values should return zero dist, or 1 with nonzero samples""" res = dist_bray_curtis_magurran(numpy.array([[0, 0, 0], [0, 0, 0], [1, 1, 1]])) - self.assertFloatEqual(res, numpy.array([[0, 0, 1], [0, 0, 1], [1, 1, 0]])) + assert_allclose(res, numpy.array([[0, 0, 1], [0, 0, 1], [1, 1, 0]])) def test_dist_bray_curtis_magurran2(self): """ should match hand-calculated values""" res = dist_bray_curtis_magurran(numpy.array([[1, 4, 3], [1, 3, 5], [0, 2, 0]])) - self.assertFloatEqual( + assert_allclose( res, numpy.array( [ @@ -582,7 +581,7 @@ # for i in range(len(dist_results)): # for j in range(i): # try: - # self.assertFloatEqual(dist_results[i], dist_results[j]) + # assert_allclose(dist_results[i], dist_results[j]) # except: # pass # should not be equal, so catch error and proceed # else: diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_geometry.py python-cogent-2020.12.21a+dfsg/tests/test_maths/test_geometry.py --- python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_geometry.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_maths/test_geometry.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,6 +1,7 @@ #!/usr/bin/env python """Tests of the geometry package.""" from math import sqrt +from unittest import TestCase, main from numpy import ( all, @@ -16,7 +17,7 @@ ) from numpy.linalg import inv, norm from numpy.random import choice, dirichlet -from numpy.testing import assert_allclose +from numpy.testing import assert_allclose, assert_equal from cogent3.maths.geometry import ( aitchison_distance, @@ -31,14 +32,13 @@ multiplicative_replacement, sphere_points, ) -from cogent3.util.unit_test import TestCase, main __author__ = "Sandra Smit" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Sandra Smit", "Rob Knight", "Helmut Simon"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Sandra Smit" __email__ = "sandra.smit@colorado.edu" __status__ = "Production" @@ -59,12 +59,12 @@ def test_center_of_mass_one_array(self): """center_of_mass_one_array should behave correctly""" com1 = center_of_mass_one_array - self.assertEqual(com1(self.simple), array([2, 2])) - self.assertEqual(com1(self.simple_list), array([2, 2])) - self.assertFloatEqual(com1(self.more_weight), array([2, 2.785714])) - self.assertEqual(com1(self.square), array([2, 2])) - self.assertEqual(com1(self.square_odd), array([2, 2])) - self.assertEqual(com1(self.sec_weight, 1), array([2, 2])) + assert_equal(com1(self.simple), array([2, 2])) + assert_equal(com1(self.simple_list), array([2, 2])) + assert_allclose(com1(self.more_weight), array([2, 2.785714]), rtol=1e-6) + assert_equal(com1(self.square), array([2, 2])) + assert_equal(com1(self.square_odd), array([2, 2])) + assert_equal(com1(self.sec_weight, 1), array([2, 2])) def test_CoM_one_array_wrong(self): """center_of_mass_one_array should fail on wrong input""" @@ -79,9 +79,9 @@ com2 = center_of_mass_two_array coor = take(self.square_odd, (0, 1), 1) weights = take(self.square_odd, (2,), 1) - self.assertEqual(com2(coor, weights), array([2, 2])) + assert_equal(com2(coor, weights), array([2, 2])) weights = weights.ravel() - self.assertEqual(com2(coor, weights), array([2, 2])) + assert_equal(com2(coor, weights), array([2, 2])) def test_CoM_two_array_wrong(self): """center_of_mass_two_array should fail on wrong input""" @@ -91,19 +91,18 @@ self.assertRaises(ValueError, com2, self.simple, weights) # not aligned def test_center_of_mass(self): - """center_of_mass should make right choice between functional methods - """ + """center_of_mass should make right choice between functional methods""" com = center_of_mass com1 = center_of_mass_one_array com2 = center_of_mass_two_array - self.assertEqual(com(self.simple), com1(self.simple)) - self.assertFloatEqual(com(self.more_weight), com1(self.more_weight)) - self.assertEqual(com(self.sec_weight, 1), com1(self.sec_weight, 1)) + assert_equal(com(self.simple), com1(self.simple)) + assert_allclose(com(self.more_weight), com1(self.more_weight)) + assert_equal(com(self.sec_weight, 1), com1(self.sec_weight, 1)) coor = take(self.square_odd, (0, 1), 1) weights = take(self.square_odd, (2,), 1) - self.assertEqual(com(coor, weights), com2(coor, weights)) + assert_equal(com(coor, weights), com2(coor, weights)) weights = weights.ravel() - self.assertEqual(com(coor, weights), com2(coor, weights)) + assert_equal(com(coor, weights), com2(coor, weights)) def test_distance(self): """distance should return Euclidean distance correctly.""" @@ -124,11 +123,11 @@ self.assertEqual(distance(a1, a1), 0) self.assertEqual(distance(a2, a2), 0) self.assertEqual(distance(a1, a2), distance(a2, a1)) - self.assertFloatEqual(distance(a1, a2), sqrt(22.25)) + assert_allclose(distance(a1, a2), sqrt(22.25)) def test_sphere_points(self): """tests sphere points""" - self.assertEqual(sphere_points(1), array([[1.0, 0.0, 0.0]])) + assert_equal(sphere_points(1), array([[1.0, 0.0, 0.0]])) class TestAitchison(TestCase): diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_matrix_exponential_integration.py python-cogent-2020.12.21a+dfsg/tests/test_maths/test_matrix_exponential_integration.py --- python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_matrix_exponential_integration.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_maths/test_matrix_exponential_integration.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,3 +1,5 @@ +from unittest import TestCase, main + import numpy as np from numpy import array, diag, dot, exp @@ -5,18 +7,19 @@ import cogent3.maths.matrix_exponentiation as cmme from cogent3.maths import matrix_exponential_integration as expm -from cogent3.util.unit_test import TestCase, main __author__ = "Ben Kaehler" __copyright__ = "Copyright 2007-2014, The Cogent Project" __credits__ = ["Ben Kaehler", "Ananias Iliadis", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Ben Kaehler" __email__ = "benjamin.kaehler@anu.edu.au" __status__ = "Production" +from numpy.testing import assert_allclose + class TestIntegratingExponentiator(TestCase): def test_van_loan_integrating_exponentiator(self): @@ -30,11 +33,15 @@ q = array([[0.5, 0.2, 0.1, 0.2]] * 4) for i in range(4): q[i, i] = 0.0 - q[i, i] = -sum(q[i,]) + q[i, i] = -sum( + q[ + i, + ] + ) p0 = array([0.2, 0.3, 0.3, 0.2]) I = expm.VanLoanIntegratingExponentiator(q, -diag(q))(1.0) - self.assertFloatEqual(dot(p0, I), result) + assert_allclose(dot(p0, I), result) self.assertRaises( ArithmeticError, @@ -51,21 +58,17 @@ [[exp(t) - 1.0, exp(t) * (t - 1.0) + 1.0], [0.0, exp(t) - 1.0]] ) - self.assertFloatEqual( - expm.VanLoanIntegratingExponentiator(Q)(1.0), integral(1.0) - ) - self.assertFloatEqual( - expm.VanLoanIntegratingExponentiator(Q)(2.0), integral(2.0) - ) + assert_allclose(expm.VanLoanIntegratingExponentiator(Q)(1.0), integral(1.0)) + assert_allclose(expm.VanLoanIntegratingExponentiator(Q)(2.0), integral(2.0)) R = array([[1.0], [1.0]]) - self.assertFloatEqual( + assert_allclose( expm.VanLoanIntegratingExponentiator(Q, R, cmme.TaylorExponentiator)(1.0), dot(integral(1.0), R), ) def test_von_bing_integrating_exponentiator(self): - """VonBingIntegratingExponentiator should reproduce Felsenstein + """VonBingIntegratingExponentiator should reproduce Felsenstein analytic result, should throw if we pass it a defective matrix, and should match results obtained from VanLoanIntegratingExponentiator for a diagonisable matrix.""" @@ -74,11 +77,15 @@ q = array([[0.5, 0.2, 0.1, 0.2]] * 4) for i in range(4): q[i, i] = 0.0 - q[i, i] = -sum(q[i,]) + q[i, i] = -sum( + q[ + i, + ] + ) p0 = array([0.2, 0.3, 0.3, 0.2]) I = expm.VonBingIntegratingExponentiator(q)(1.0) - self.assertFloatEqual(dot(dot(p0, I), -diag(q)), result) + assert_allclose(dot(dot(p0, I), -diag(q)), result) self.assertRaises( ArithmeticError, @@ -95,13 +102,13 @@ ] ) - self.assertFloatEqual( + assert_allclose( expm.VonBingIntegratingExponentiator(p)(1.0), expm.VanLoanIntegratingExponentiator( p, exponentiator=cmme.FastExponentiator )(1.0), ) - self.assertFloatEqual( + assert_allclose( expm.VonBingIntegratingExponentiator(p)(2.0), expm.VanLoanIntegratingExponentiator( p, exponentiator=cmme.FastExponentiator @@ -126,7 +133,7 @@ Q = get_calibrated_Q(R) length = 0.1 got = expm.expected_number_subs(moprobs, Q, length) - self.assertFloatEqual(got, length) + assert_allclose(got, length) # case 2, length != ENS A = array( diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_matrix_logarithm.py python-cogent-2020.12.21a+dfsg/tests/test_maths/test_matrix_logarithm.py --- python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_matrix_logarithm.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_maths/test_matrix_logarithm.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,5 +1,7 @@ #!/usr/bin/env python """Unit tests for matrix logarithm.""" +from unittest import TestCase, main + from numpy import array from cogent3.maths.matrix_logarithm import ( @@ -7,18 +9,19 @@ logm, logm_taylor, ) -from cogent3.util.unit_test import TestCase, main __author__ = "Rob Knight" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight", "Gavin Huttley", "Ben Kaehler"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" +from numpy.testing import assert_allclose + class logarithm_tests(TestCase): """Tests of top-level matrix logarithm functions.""" @@ -35,7 +38,7 @@ ) q = logm(p) - self.assertFloatEqual( + assert_allclose( q, array( [ @@ -45,6 +48,7 @@ [0.35144866, 0.31279003, 0.10478999, -0.76902868], ] ), + rtol=1e-6, ) def test_logm_taylor(self): @@ -65,10 +69,10 @@ [0.23022035, 0.22306947, 0.06995306, 0.47675713], ] ) - self.assertFloatEqual(q_taylor, q_eig) + assert_allclose(q_taylor, q_eig) def test_is_generator_unique(self): - """ is_generator_unique should identify non-unique primary roots or + """is_generator_unique should identify non-unique primary roots or raise a NotImplementedError for non-primary roots""" q_fail = array( [ diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_measure.py python-cogent-2020.12.21a+dfsg/tests/test_maths/test_measure.py --- python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_measure.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_maths/test_measure.py 2020-12-20 23:35:03.000000000 +0000 @@ -18,7 +18,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley", "Stephen Ka-Wah Ma"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_optimisers.py python-cogent-2020.12.21a+dfsg/tests/test_maths/test_optimisers.py --- python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_optimisers.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_maths/test_optimisers.py 2020-12-20 23:35:03.000000000 +0000 @@ -16,7 +16,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_period.py python-cogent-2020.12.21a+dfsg/tests/test_maths/test_period.py --- python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_period.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_maths/test_period.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,3 +1,5 @@ +from unittest import TestCase, main + from numpy import arange, array, convolve, exp, float64, pi, random, sin, zeros from cogent3.maths.period import _autocorr_inner2 as py_autocorr_inner @@ -7,18 +9,19 @@ from cogent3.maths.period_numba import autocorr_inner as numba_autocorr_inner from cogent3.maths.period_numba import goertzel_inner as numba_goertzel_inner from cogent3.maths.period_numba import ipdft_inner as numba_ipdft_inner -from cogent3.util.unit_test import TestCase, main __author__ = "Hua Ying, Julien Epps and Gavin Huttley" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Julien Epps", "Hua Ying", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" +from numpy.testing import assert_allclose, assert_almost_equal, assert_equal + class TestPeriod(TestCase): def setUp(self): @@ -137,7 +140,7 @@ ) N = 100 period = 10 - self.assertFloatEqual( + assert_allclose( py_goertzel_inner(x, N, period), numba_goertzel_inner(x, N, period) ) @@ -160,7 +163,7 @@ py_result = py_ipdft_inner(x, X, W, ulim, N) numba_result = numba_ipdft_inner(x, X, W, ulim, N) for i, j in zip(py_result, numba_result): - self.assertFloatEqual(abs(i), abs(j)) + assert_allclose(abs(i), abs(j), rtol=1e-6) x = array( [ @@ -272,14 +275,14 @@ py_autocorr_inner(x, py_xc, N) numba_autocorr_inner(x, numba_xc, N) for i, j in zip(py_xc, numba_xc): - self.assertFloatEqual(i, j) + assert_allclose(i, j) def test_autocorr(self): """correctly compute autocorrelation""" s = [1, 1, 1, 1] X, periods = auto_corr(s, llim=-3, ulim=None) exp_X = array([1, 2, 3, 4, 3, 2, 1], dtype=float) - self.assertEqual(X, exp_X) + assert_equal(X, exp_X) auto_x, auto_periods = auto_corr(self.sig, llim=2, ulim=50) max_idx = list(auto_x).index(max(auto_x)) @@ -313,7 +316,7 @@ ) ) X = abs(X) - self.assertFloatEqual(X, exp_X, eps=1e-3) + assert_almost_equal(X, exp_X, decimal=4) ipdft_x, ipdft_periods = ipdft(self.sig, llim=2, ulim=50) ipdft_x = abs(ipdft_x) @@ -324,7 +327,7 @@ def test_goertzel(self): """goertzel and ipdft should be the same""" ipdft_pwr, ipdft_prd = ipdft(self.sig, llim=10, ulim=10) - self.assertFloatEqual(goertzel(self.sig, 10), ipdft_pwr) + assert_allclose(goertzel(self.sig, 10), ipdft_pwr) def test_hybrid(self): """correctly compute hybrid statistic""" @@ -342,9 +345,9 @@ hybrid_ipdft_autocorr_stats, hybrid_periods = hybrid( self.sig, llim=None, ulim=50, return_all=True ) - self.assertEqual(hybrid_ipdft_autocorr_stats[0], hybrid_x) - self.assertEqual(hybrid_ipdft_autocorr_stats[1], ipdft_pwr) - self.assertEqual(hybrid_ipdft_autocorr_stats[2], auto_x) + assert_equal(hybrid_ipdft_autocorr_stats[0], hybrid_x) + assert_equal(hybrid_ipdft_autocorr_stats[1], ipdft_pwr) + assert_equal(hybrid_ipdft_autocorr_stats[2], auto_x) ipdft_pwr, ipdft_prd = ipdft(self.sig, llim=10, ulim=10) auto_x, auto_periods = auto_corr(self.sig, llim=10, ulim=10) diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_stats/__init__.py python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/__init__.py --- python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_stats/__init__.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/__init__.py 2020-12-20 23:35:03.000000000 +0000 @@ -5,7 +5,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight", "Catherine Lozupone", "Gavin Huttley", "Sandra Smit"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_stats/test_contingency.py python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_contingency.py --- python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_stats/test_contingency.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_contingency.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,15 +1,18 @@ from unittest import TestCase, main +import numpy + from numpy.testing import assert_allclose -from cogent3.maths.stats.contingency import CategoryCounts +from cogent3.maths.stats.contingency import CategoryCounts, calc_expected +from cogent3.util.dict_array import DictArrayTemplate __author__ = "Gavin Huttley" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -33,7 +36,7 @@ ) def test_chisq2(self): - """constrtucted from 2D dict""" + """constructed from 2D dict""" data = { "rest_of_tree": {"env1": 2, "env3": 1, "env2": 0}, "b": {"env1": 1, "env3": 1, "env2": 3}, @@ -56,6 +59,8 @@ got = table.chisq_test() assert_allclose(got.chisq, 173.7603305785124) self.assertLess(got.pvalue, 2.2e-16) # value from R + _ = got._repr_html_() # shouldn't fail + self.assertIn("1.12e-39", str(got)) # used sci formatting def test_G_ind(self): """correctly produce G test of independence""" @@ -63,6 +68,13 @@ got = table.G_independence(williams=True) self.assertEqual(got.df, 2) + def test_G_ind_with_pseudocount(self): + """G test of independence with pseudocount""" + table = CategoryCounts([[762, 327, 0], [484, 239, 0]]) + got = table.G_independence(williams=True, pseudo_count=1) + assert_allclose(table.observed.array + 1, got.observed.array) + assert_allclose(got.expected.array, calc_expected(got.observed.array)) + def test_G_fit_with_expecteds(self): """compute G-fit with provided expecteds""" obs = [2, 10, 8, 2, 4] @@ -73,6 +85,21 @@ got = table.G_fit() assert_allclose(got.G, 9.849234) assert_allclose(got.pvalue, 0.04304536) + _ = got._repr_html_() # shouldn't fail + self.assertIn("0.0430", str(got)) # used normal formatting + + def test_assign_expected(self): + """assign expected property""" + obs = [2, 10, 8, 2, 4] + exp = [5.2] * 5 + keys = ["Marl", "Chalk", "Sandstone", "Clay", "Limestone"] + table = CategoryCounts(dict(zip(keys, obs))) + table.expected = dict(zip(keys, exp)) + got = table.G_fit() + assert_allclose(got.G, 9.849234) + table.expected = None + got = table.G_fit() + print(got) def test_zero_observeds(self): """raises ValueError""" @@ -117,6 +144,22 @@ table = CategoryCounts(dict(zip(keys, obs)), expected=dict(zip(keys, exp))) str(table) + def test_repr_contingency(self): + """exercising repr(CategoryCounts) with/without html=True""" + table = CategoryCounts( + { + "rest_of_tree": {"env1": 2, "env3": 1, "env2": 0}, + "b": {"env1": 1, "env3": 1, "env2": 3}, + } + ) + str(table) + obs = [2, 10, 8, 2, 4] + exp = [5.2] * 5 + keys = ["Marl", "Chalk", "Sandstone", "Clay", "Limestone"] + table = CategoryCounts(dict(zip(keys, obs)), expected=dict(zip(keys, exp))) + got = table._get_repr_() + got = table._get_repr_(html=True) + def test_accessing_elements(self): """successfully access elements""" table = CategoryCounts( @@ -149,9 +192,9 @@ def test_validate_expecteds(self): """test provided expecteds total same as observed""" - obs = dict(a=10, b=2, c=2) - exp = [5, 5, 5] with self.assertRaises(AssertionError): + obs = dict(a=10, b=2, c=2) + exp = [5, 5, 5] CategoryCounts(obs, expected=exp) def test_repr_str_html(self): @@ -165,11 +208,48 @@ got_g1 = table.G_fit() got_g2 = table.G_independence() got_chisq = table.chisq_test() - for obj in (got_g1, got_g2, got_chisq): + for obj in (table, got_g1, got_g2, got_chisq): str(obj) repr(obj) obj._repr_html_() + def test_statistics(self): + """returns TestResult.statistics has stats""" + table = CategoryCounts( + { + "rest_of_tree": {"env1": 2, "env3": 1, "env2": 0}, + "b": {"env1": 1, "env3": 1, "env2": 3}, + } + ) + got = table.chisq_test() + stats = got.statistics + self.assertEqual(stats[0, "pvalue"], got.pvalue) + + def test_calc_expected2(self): + """handle case where expected is a single column vector""" + nums = numpy.array([1, 2, 3]).reshape((3, 1)) + got = calc_expected(nums) + assert_allclose(got, numpy.array([2, 2, 2]).reshape((3, 1))) + + def test_category_counts_from_non_int_arrays(self): + """handles object and float numpy array, fails if float""" + a = numpy.array([[31, 36], [58, 138]], dtype=object) + darr = DictArrayTemplate(["syn", "nsyn"], ["Ts", "Tv"]).wrap(a) + got = CategoryCounts(darr) + assert_allclose(got.observed.array.tolist(), a.tolist()) + + for dtype in (object, float): + with self.assertRaises(TypeError): + a = numpy.array([[31.3, 36], [58, 138]], dtype=dtype) + darr = DictArrayTemplate(["syn", "nsyn"], ["Ts", "Tv"]).wrap(a) + _ = CategoryCounts(darr) + + # negative values disallowed + with self.assertRaises(ValueError): + a = numpy.array([[31, -36], [58, 138]], dtype=int) + darr = DictArrayTemplate(["syn", "nsyn"], ["Ts", "Tv"]).wrap(a) + _ = CategoryCounts(darr) + if __name__ == "__main__": main() diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_stats/test_distribution.py python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_distribution.py --- python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_stats/test_distribution.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_distribution.py 2020-12-20 23:35:03.000000000 +0000 @@ -4,6 +4,8 @@ Currently using tests against calculations in R, spreadsheets being unreliable. """ +from unittest import TestCase, main + from cogent3.maths.stats.distribution import ( bdtr, bdtrc, @@ -29,27 +31,30 @@ poisson_exact, poisson_high, poisson_low, + probability_points, stdtr, stdtri, t_high, t_low, + theoretical_quantiles, tprob, z_high, z_low, zprob, ) -from cogent3.util.unit_test import TestCase, main __author__ = "Rob Knight" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley", "Rob Knight", "Sandra Smit"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" +from numpy.testing import assert_allclose, assert_almost_equal + class DistributionsTests(TestCase): """Tests of particular statistical distributions.""" @@ -91,9 +96,9 @@ ] for z, p in zip(self.values, probs): - self.assertFloatEqual(z_low(z), p) + assert_allclose(z_low(z), p, rtol=1e-6) for z, p in zip(self.negvalues, negprobs): - self.assertFloatEqual(z_low(z), p) + assert_allclose(z_low(z), p, rtol=1e-6) def test_z_high(self): """z_high should match R's pnorm(lower.tail=FALSE) function""" @@ -128,9 +133,9 @@ ] for z, p in zip(self.values, probs): - self.assertFloatEqual(z_high(z), p) + assert_allclose(z_high(z), p, rtol=1e-6) for z, p in zip(self.negvalues, negprobs): - self.assertFloatEqual(z_high(z), p) + assert_allclose(z_high(z), p, rtol=1e-6) def test_zprob(self): """zprob should match twice the z_high probability for abs(z)""" @@ -154,9 +159,9 @@ ] for z, p in zip(self.values, probs): - self.assertFloatEqual(zprob(z), p) + assert_allclose(zprob(z), p, rtol=1e-6) for z, p in zip(self.negvalues, probs): - self.assertFloatEqual(zprob(z), p) + assert_allclose(zprob(z), p, rtol=1e-6) def test_chi_low(self): """chi_low should match R's pchisq() function""" @@ -207,7 +212,7 @@ } for df in self.df: for x, p in zip(self.values, probs[df]): - self.assertFloatEqual(chi_low(x, df), p) + assert_allclose(chi_low(x, df), p, rtol=1e-6) def test_chi_high(self): """chi_high should match R's pchisq(lower.tail=FALSE) function""" @@ -258,7 +263,7 @@ for df in self.df: for x, p in zip(self.values, probs[df]): - self.assertFloatEqual(chi_high(x, df), p) + assert_allclose(chi_high(x, df), p, rtol=1e-6) def test_t_low(self): """t_low should match R's pt() function""" @@ -354,9 +359,9 @@ for df in self.df: for x, p in zip(self.values, probs[df]): - self.assertFloatEqualRel(t_low(x, df), p, eps=1e-4) + assert_almost_equal(t_low(x, df), p, decimal=4) for x, p in zip(self.negvalues, negprobs[df]): - self.assertFloatEqualRel(t_low(x, df), p, eps=1e-4) + assert_almost_equal(t_low(x, df), p, decimal=4) def test_t_high(self): """t_high should match R's pt(lower.tail=FALSE) function""" @@ -452,9 +457,9 @@ for df in self.df: for x, p in zip(self.values, probs[df]): - self.assertFloatEqualRel(t_high(x, df), p, eps=1e-4) + assert_almost_equal(t_high(x, df), p, decimal=4) for x, p in zip(self.negvalues, negprobs[df]): - self.assertFloatEqualRel(t_high(x, df), p, eps=1e-4) + assert_almost_equal(t_high(x, df), p, decimal=4) def test_tprob(self): """tprob should match twice the t_high probability for abs(t)""" @@ -514,7 +519,7 @@ } for df in self.df: for x, p in zip(self.values, probs[df]): - self.assertFloatEqualRel(tprob(x, df), p, eps=1e-4) + assert_almost_equal(tprob(x, df), p, decimal=4) def test_poisson_low(self): """Lower tail of poisson should match R for integer successes""" @@ -536,7 +541,7 @@ (180, 1024): 8.266457e-233, } for (key, value) in list(expected.items()): - self.assertFloatEqual(poisson_low(*key), value) + assert_allclose(poisson_low(*key), value, rtol=1e-6) def test_poisson_high(self): """Upper tail of poisson should match R for integer successes""" @@ -558,7 +563,7 @@ (180, 1024): 1, } for (key, value) in list(expected.items()): - self.assertFloatEqual(poisson_high(*key), value) + assert_allclose(poisson_high(*key), value) def test_poisson_exact(self): """Poisson exact should match expected values from R""" @@ -578,7 +583,7 @@ (180, 1024): 6.815085e-233, } for (key, value) in list(expected.items()): - self.assertFloatEqual(poisson_exact(*key), value) + assert_allclose(poisson_exact(*key), value, rtol=1e-6) def test_binomial_high(self): """Binomial high should match values from R for integer successes""" @@ -597,7 +602,7 @@ (-0.5, 3, 0.1): 1, } for (key, value) in list(expected.items()): - self.assertFloatEqualRel(binomial_high(*key), value, 1e-4) + assert_almost_equal(binomial_high(*key), value, 1e-4) # should reject if successes > trials or successes < -1 self.assertRaises(ValueError, binomial_high, 7, 5, 0.5) @@ -617,7 +622,7 @@ (1032, 2050, 0.5): 0.6297845, } for (key, value) in list(expected.items()): - self.assertFloatEqualRel(binomial_low(*key), value, 1e-4) + assert_almost_equal(binomial_low(*key), value, 1e-4) def test_binomial_series(self): """binomial_exact should match values from R on a whole series""" @@ -629,7 +634,7 @@ ) for i in range(len(expected)): - self.assertFloatEqual(binomial_exact(i, 10, 0.3), expected[i]) + assert_allclose(binomial_exact(i, 10, 0.3), expected[i]) def test_binomial_exact(self): """binomial_exact should match values from R for integer successes""" @@ -646,11 +651,10 @@ (1032, 2050, 0.5): 0.01679804, } for (key, value) in list(expected.items()): - self.assertFloatEqualRel(binomial_exact(*key), value, 1e-4) + assert_almost_equal(binomial_exact(*key), value, 1e-4) def test_binomial_exact_floats(self): - """binomial_exact should be within limits for floating point numbers - """ + """binomial_exact should be within limits for floating point numbers""" expected = { (18.3, 100, 0.2): (0.09089812, 0.09807429), (2.7, 1050, 0.006): (0.03615498, 0.07623827), @@ -664,7 +668,7 @@ for (key, value) in list(expected.items()): min_val, max_val = value assert min_val < binomial_exact(*key) < max_val - # self.assertFloatEqualRel(binomial_exact(*key), value, 1e-4) + # assert_almost_equal(binomial_exact(*key), value, 1e-4) def test_binomial_exact_errors(self): """binomial_exact should raise errors on invalid input""" @@ -716,7 +720,7 @@ e = list(expected.items()) e.sort() for (key, value) in e: - self.assertFloatEqualRel(f_high(*key), value) + assert_almost_equal(f_high(*key), value) def test_f_low(self): """F low should match values from R for integer successes""" @@ -737,15 +741,15 @@ (1032, 2050, 0.1): 1.70204e-278, } for (key, value) in list(expected.items()): - self.assertFloatEqualRel(f_low(*key), value) + assert_almost_equal(f_low(*key), value) def test_fprob(self): """fprob should return twice the tail on a particular side""" error = 1e-4 # right-hand side - self.assertFloatEqualAbs(fprob(10, 10, 1.2), 0.7788, eps=error) + assert_allclose(fprob(10, 10, 1.2), 0.7788, rtol=error) # left-hand side - self.assertFloatEqualAbs(fprob(10, 10, 1.2, side="left"), 1.2212, eps=error) + assert_allclose(fprob(10, 10, 1.2, side="left"), 1.2212, rtol=error) self.assertRaises(ValueError, fprob, 10, 10, -3) self.assertRaises(ValueError, fprob, 10, 10, 1, "non_valid_side") @@ -782,7 +786,7 @@ index = 0 for i in t: for j in k: - self.assertFloatEqual(stdtr(j, i), exp[index]) + assert_allclose(stdtr(j, i), exp[index]) index += 1 def test_bdtr(self): @@ -871,7 +875,7 @@ for k in k_s: for n in n_s: for p in p_s: - self.assertFloatEqual(bdtr(k, n, p), exp[index]) + assert_allclose(bdtr(k, n, p), exp[index]) index += 1 def test_bdtrc(self): @@ -961,7 +965,7 @@ for k in k_s: for n in n_s: for p in p_s: - self.assertFloatEqual(bdtrc(k, n, p), exp[index]) + assert_allclose(bdtrc(k, n, p), exp[index]) index += 1 def test_pdtr(self): @@ -1003,7 +1007,7 @@ index = 0 for k in k_s: for m in m_s: - self.assertFloatEqual(pdtr(k, m), exp[index]) + assert_allclose(pdtr(k, m), exp[index]) index += 1 def test_pdtrc(self): @@ -1045,7 +1049,7 @@ index = 0 for k in k_s: for m in m_s: - self.assertFloatEqual(pdtrc(k, m), exp[index]) + assert_allclose(pdtrc(k, m), exp[index]) index += 1 def test_fdtr(self): @@ -1139,7 +1143,7 @@ for a in a_s: for b in b_s: for x in x_s: - self.assertFloatEqual(fdtr(a, b, x), exp[index]) + assert_allclose(fdtr(a, b, x), exp[index]) index += 1 def test_fdtrc(self): @@ -1233,7 +1237,7 @@ for a in a_s: for b in b_s: for x in x_s: - self.assertFloatEqual(fdtrc(a, b, x), exp[index]) + assert_allclose(fdtrc(a, b, x), exp[index]) index += 1 def test_gdtr(self): @@ -1327,7 +1331,7 @@ for a in a_s: for b in b_s: for x in x_s: - self.assertFloatEqual(gdtr(a, b, x), exp[index]) + assert_allclose(gdtr(a, b, x), exp[index]) index += 1 def test_gdtrc(self): @@ -1421,7 +1425,7 @@ for a in a_s: for b in b_s: for x in x_s: - self.assertFloatEqual(gdtrc(a, b, x), exp[index]) + assert_allclose(gdtrc(a, b, x), exp[index]) index += 1 def test_chdtri(self): @@ -1463,7 +1467,7 @@ index = 0 for k in k_s: for p in p_s: - self.assertFloatEqual(chdtri(k, p), exp[index]) + assert_allclose(chdtri(k, p), exp[index]) index += 1 def test_stdtri(self): @@ -1505,7 +1509,7 @@ index = 0 for k in k_s: for p in p_s: - self.assertFloatEqual(stdtri(k, p), exp[index]) + assert_allclose(stdtri(k, p), exp[index], rtol=1e-6, atol=1e-6) index += 1 def test_pdtri(self): @@ -1547,7 +1551,7 @@ index = 0 for k in k_s: for p in p_s: - self.assertFloatEqual(pdtri(k, p), exp[index]) + assert_allclose(pdtri(k, p), exp[index]) index += 1 def test_bdtri(self): @@ -1621,7 +1625,7 @@ for k in k_s: for n in n_s: for p in p_s: - self.assertFloatEqual(bdtri(k, n, p), exp[index]) + assert_allclose(bdtri(k, n, p), exp[index]) index += 1 def test_gdtri(self): @@ -1760,7 +1764,7 @@ for k in k_s: for n in n_s: for p in p_s: - self.assertFloatEqual(gdtri(k, n, p), exp[index]) + assert_allclose(gdtri(k, n, p), exp[index], rtol=1e-6) index += 1 def test_fdtri(self): @@ -1924,9 +1928,71 @@ for k in k_s: for n in n_s: for p in p_s: - self.assertFloatEqual(fdtri(k, n, p), exp[index]) + assert_allclose(fdtri(k, n, p), exp[index]) index += 1 + def test_probability_points(self): + """generates evenly spaced probabilities""" + expect = ( + 0.1190476190476190, + 0.3095238095238095, + 0.5000000000000000, + 0.6904761904761905, + 0.8809523809523809, + ) + got = probability_points(5) + assert_almost_equal(got, expect) + expect = ( + 0.04545454545454546, + 0.13636363636363635, + 0.22727272727272727, + 0.31818181818181818, + 0.40909090909090912, + 0.50000000000000000, + 0.59090909090909094, + 0.68181818181818177, + 0.77272727272727271, + 0.86363636363636365, + 0.95454545454545459, + ) + got = probability_points(11) + assert_almost_equal(got, expect) + + def test_theoretical_quantiles(self): + """correctly produce theoretical quantiles""" + expect = probability_points(4) + got = theoretical_quantiles(4, dist="uniform") + assert_almost_equal(got, expect) + dists = ["normal", "chisq", "t", "poisson", "binomial", "F", "gamma"] + expect = ( + -1.049131397963971, + -0.299306910465667, + 0.299306910465667, + 1.049131397963971, + ) + p = probability_points(4) + got = theoretical_quantiles(len(expect), dist="normal") + assert_almost_equal(got, expect) + + # for gamma with shape 2, scale 1/3 + expect = [ + 3.833845224364122, + 1.922822334309249, + 0.9636761737854768, + 0.3181293892593747, + ] + got = theoretical_quantiles(4, "chisq", 2) + assert_almost_equal(got, expect) + + expect = ( + -1.2064470985524887, + -0.3203979544794824, + 0.3203979544794824, + 1.2064470985524887, + ) + got = theoretical_quantiles(4, "t", 4) + assert_almost_equal(got, expect) + if __name__ == "__main__": main() diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_stats/test_information_criteria.py python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_information_criteria.py --- python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_stats/test_information_criteria.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_information_criteria.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,34 +1,37 @@ #!/usr/bin/env python +from unittest import TestCase, main + from cogent3.maths.stats.information_criteria import aic, bic -from cogent3.util.unit_test import TestCase, main __author__ = "Gavin Huttley" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" +from numpy.testing import assert_allclose + class InformationCriteria(TestCase): """Tests calculation of AIC and BIC measures.""" def test_aic(self): """correctly compute AIC from Burnham & Anderson 2002, p102""" - self.assertFloatEqual(aic(-9.7039, 4), 27.4078) + assert_allclose(aic(-9.7039, 4), 27.4078) def test_aic_corrected(self): """correctly compute AIC corrected for small sample size""" # from Burnham & Anderson 2002, p102 - self.assertFloatEqual(aic(-9.7039, 4, sample_size=13), 32.4078) + assert_allclose(aic(-9.7039, 4, sample_size=13), 32.4078) def test_bic(self): """correctly compute BIC""" # against hand calculated - self.assertFloatEqual(bic(-9.7039, 4, 13), 29.6675974298) + assert_allclose(bic(-9.7039, 4, 13), 29.6675974298) if __name__ == "__main__": diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_stats/test_jackknife.py python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_jackknife.py --- python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_stats/test_jackknife.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_jackknife.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,18 +1,21 @@ +from unittest import TestCase, main + import numpy as np from cogent3.maths.stats.jackknife import JackknifeStats -from cogent3.util.unit_test import TestCase, main __author__ = "Anuj Pahwa, Gavin Huttley" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Anuj Pahwa", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" +from numpy.testing import assert_allclose + def pmcc(data, axis=1): """Compute the Product-moment correlation coefficient. @@ -94,8 +97,8 @@ # Scalar pmcc_stat = stat_maker(pmcc, data, 1) test_knife = JackknifeStats(data.shape[1], pmcc_stat) - self.assertFloatEqual(test_knife.jackknifed_stat, 1.2905845) - self.assertFloatEqual(test_knife.standard_error, 0.2884490) + assert_allclose(test_knife.jackknifed_stat, 1.2905845) + assert_allclose(test_knife.standard_error, 0.2884490) self.assertTrue(test_knife._jackknifed_stat != None) # Vector @@ -107,8 +110,8 @@ got_standard_err = test_knife.standard_error for index in [0, 1]: - self.assertFloatEqual(got_jk_stat[index], expected_jk_stat[index]) - self.assertFloatEqual(got_standard_err[index], expected_standard_err[index]) + assert_allclose(got_jk_stat[index], expected_jk_stat[index]) + assert_allclose(got_standard_err[index], expected_standard_err[index]) def test_tables(self): """jackknife should work for calculators return scalars or vectors""" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_stats/test_ks.py python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_ks.py --- python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_stats/test_ks.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_ks.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,4 +1,6 @@ #!/usr/bin/env python +from unittest import TestCase, main + from cogent3.maths.stats.ks import ( pkolmogorov1x, pkolmogorov2x, @@ -6,18 +8,19 @@ psmirnov2x, ) from cogent3.maths.stats.test import ks_boot, ks_test -from cogent3.util.unit_test import TestCase, main __author__ = "Gavin Huttley" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" +from numpy.testing import assert_allclose + class KSTests(TestCase): """Tests Kolmogorov-Smirnov.""" @@ -130,50 +133,50 @@ def test_pk1x(self): """1 sample 1-sided should match answers from R""" - self.assertFloatEqual(pkolmogorov1x(0.06, 30), 0.2248113) + assert_allclose(pkolmogorov1x(0.06, 30), 0.2248113) def test_pk2x(self): """1 sample 2-sided should match answers from R""" - self.assertFloatEqual(pkolmogorov2x(0.7199, 50), (1 - 6.661e-16)) - self.assertFloatEqual(pkolmogorov2x(0.08, 30), 0.01754027) - self.assertFloatEqual(pkolmogorov2x(0.03, 300), 0.05753413) + assert_allclose(pkolmogorov2x(0.7199, 50), (1 - 6.661e-16), rtol=1e-5) + assert_allclose(pkolmogorov2x(0.08, 30), 0.01754027, rtol=1e-5) + assert_allclose(pkolmogorov2x(0.03, 300), 0.05753413, rtol=1e-5) def test_ps2x(self): """2 sample 2-sided smirnov should match answers from R""" - self.assertFloatEqual(psmirnov2x(0.48, 20, 50), 0.9982277) - self.assertFloatEqual(psmirnov2x(0.28, 20, 50), 0.8161612) - self.assertFloatEqual(psmirnov2x(0.28, 50, 20), 0.8161612) + assert_allclose(psmirnov2x(0.48, 20, 50), 0.9982277) + assert_allclose(psmirnov2x(0.28, 20, 50), 0.8161612) + assert_allclose(psmirnov2x(0.28, 50, 20), 0.8161612) def tes_pk2x(self): """2 sample 2-sided kolmogorov should match answers from R""" - self.assertFloatEqual(pkolmogorov1x(0.058, 50), 0.007530237) - self.assertFloatEqual(pkolmogorov1x(0.018, 50), 4.887356e-26) - self.assertFloatEqual(pkolmogorov1x(0.018, 5000), 0.922618) + assert_allclose(pkolmogorov1x(0.058, 50), 0.007530237) + assert_allclose(pkolmogorov1x(0.018, 50), 4.887356e-26) + assert_allclose(pkolmogorov1x(0.018, 5000), 0.922618) def test_pkstwo(self): """kolmogorov asymptotic should match answers from R""" - self.assertFloatEqual(pkstwo(2.3), [1 - 5.084e-05], eps=1e-5) + assert_allclose(pkstwo(2.3), [1 - 5.084e-05], rtol=1e-5) def test_ks2x(self): """KS two-sample, 2-sided should match answers from R""" D, Pval = ks_test(self.x1, self.x2) - self.assertFloatEqual((D, Pval), (0.46, 3.801e-05), eps=1e-4) + assert_allclose((D, Pval), (0.46, 3.801e-05), rtol=1e-4) D, Pval = ks_test(self.x1, self.x2, exact=False) - self.assertFloatEqual((D, Pval), (0.46, 5.084e-05), eps=1e-4) + assert_allclose((D, Pval), (0.46, 5.084e-05), rtol=1e-4) D, Pval = ks_test(self.x1, self.x2[:20]) - self.assertFloatEqual((D, Pval), (0.53, 0.0003576), eps=1e-4) + assert_allclose((D, Pval), (0.53, 0.0003576), rtol=1e-4) D, Pval = ks_test(self.x2[:20], self.x1) - self.assertFloatEqual((D, Pval), (0.53, 0.0003576), eps=1e-4) + assert_allclose((D, Pval), (0.53, 0.0003576), rtol=1e-4) D, Pval = ks_test(self.x1[:20], self.x2) - self.assertFloatEqual((D, Pval), (0.48, 0.001772), eps=1e-4) + assert_allclose((D, Pval), (0.48, 0.001772), rtol=1e-3) D, Pval = ks_test(self.x1, self.x2, alt="greater") - self.assertFloatEqual((D, Pval), (0.46, 2.542e-05), eps=1e-4) + assert_allclose((D, Pval), (0.46, 2.542e-05), rtol=1e-4) D, Pval = ks_test(self.x1, self.x2, alt="g") - self.assertFloatEqual((D, Pval), (0.46, 2.542e-05), eps=1e-4) + assert_allclose((D, Pval), (0.46, 2.542e-05), rtol=1e-4) D, Pval = ks_test(self.x1, self.x2, alt="less") - self.assertFloatEqual((D, Pval), (6.9388939039072284e-18, 1.0), eps=1e-4) + assert_allclose((D, Pval), (6.9388939039072284e-18, 1.0), rtol=1e-4) D, Pval = ks_test(self.x2, self.x1, alt="l") - self.assertFloatEqual((D, Pval), (0.46, 2.542e-05), eps=1e-4) + assert_allclose((D, Pval), (0.46, 2.542e-05), rtol=1e-4) def test_ks_boot(self): """excercising the bootstrapped version of KS""" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_stats/test_number.py python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_number.py --- python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_stats/test_number.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_number.py 2020-12-20 23:35:03.000000000 +0000 @@ -12,7 +12,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -303,6 +303,59 @@ self.assertEqual(i02[("C", "G")], 3) self.assertEqual(i02[("G", "G")], 6) + def test_to_dictarray(self): + """correctly constructs dict arrays""" + d1 = {"T": 87, "C": 81, "A": 142, "expect": [142, 81, 87]} + d2 = { + ("T", "G"): 87, + ("C", "C"): 81, + ("A", "G"): 142, + ("T", "T"): 58, + "expect": [[0, 142, 0], [81, 0, 0], [0, 87, 58]], + } + d3 = { + ("T", "G", "A"): 87, + ("C", "C", "C"): 81, + ("A", "G", "A"): 142, + ("T", "T", "C"): 58, + "expect": [ + [[0, 0], [142, 0], [0, 0]], + [[0, 81], [0, 0], [0, 0]], + [[0, 0], [87, 0], [0, 58]], + ], + } + for d in (d1, d2, d3): + expect = d.pop("expect") + cat_count = number.CategoryCounter(d) + darr = cat_count.to_dictarray() + assert_allclose(darr.array, expect) + + def test_to_categorical(self): + """correctly constructs categorical data""" + d1 = {"T": 87, "C": 81, "A": 142, "expect": [142, 81, 87]} + d2 = { + ("T", "G"): 87, + ("C", "C"): 81, + ("A", "G"): 142, + ("T", "T"): 58, + "expect": [[0, 142, 0], [81, 0, 0], [0, 87, 58]], + } + d3 = { + ("T", "G", "A"): 87, + ("C", "C", "C"): 81, + ("A", "G", "A"): 142, + ("T", "T", "C"): 58, + } + for d in (d1, d2): + expect = d.pop("expect") + cats = number.CategoryCounter(d) + cat_count = cats.to_categorical() + assert_allclose(cat_count.observed.array, expect, err_msg=d) + + with self.assertRaises(NotImplementedError): + cats = number.CategoryCounter(d3) + cats.to_categorical() + if __name__ == "__main__": main() diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_stats/test_period.py python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_period.py --- python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_stats/test_period.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_period.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,13 +1,8 @@ +from unittest import TestCase, main + import numpy -from cogent3.maths.period import ( - AutoCorrelation, - Hybrid, - Ipdft, - auto_corr, - hybrid, - ipdft, -) +from cogent3.maths.period import AutoCorrelation, Hybrid, Ipdft, ipdft from cogent3.maths.stats.period import ( SeqToSymbols, blockwise_bootstrap, @@ -17,18 +12,19 @@ g_statistic, seq_to_symbols, ) -from cogent3.util.unit_test import TestCase, main __author__ = "Hua Ying, Julien Epps and Gavin Huttley" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Julien Epps", "Hua Ying", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" +from numpy.testing import assert_allclose, assert_equal + class TestPeriodStat(TestCase): def setUp(self): @@ -202,8 +198,8 @@ """calc g-stat correctly""" X, periods = ipdft(self.sig, llim=2, ulim=39) g_obs, p_val = g_statistic(X) - self.assertFloatEqual(p_val, 0.9997, eps=1e-3) - self.assertFloatEqual(g_obs, 0.0577, eps=1e-3) + assert_allclose(p_val, 0.9997, rtol=1e-3) + assert_allclose(g_obs, 0.0577, rtol=1e-3) def test_circular_indices(self): v = list(range(10)) @@ -215,20 +211,18 @@ """both py and pyx seq_to_symbol versions correctly convert a sequence""" motifs = [b"AA", b"AT", b"TT"] symbols = seq_to_symbols(b"AATGGTTA", motifs, 2) - self.assertEqual(symbols, numpy.array([1, 1, 0, 0, 0, 1, 0, 0])) + assert_equal(symbols, numpy.array([1, 1, 0, 0, 0, 1, 0, 0])) symbols = seq_to_symbols(b"AAGATT", motifs, 2, numpy.zeros(6, numpy.uint8)) - self.assertEqual(symbols, numpy.array([1, 0, 0, 1, 1, 0])) + assert_equal(symbols, numpy.array([1, 0, 0, 1, 1, 0])) def test_seq_to_symbol_factory(self): """checks factory function for conversion works""" motifs = ["AA", "AT", "TT"] seq_to_symbols = SeqToSymbols(motifs) got = seq_to_symbols("AATGGTTA") - self.assertEqual(got, numpy.array([1, 1, 0, 0, 0, 1, 0, 0])) + assert_equal(got, numpy.array([1, 1, 0, 0, 0, 1, 0, 0])) got = seq_to_symbols("AAGATT") - self.assertEqual( - seq_to_symbols("AAGATT"), numpy.array([1, 0, 0, 1, 1, 0], numpy.uint8) - ) + assert_equal(got, numpy.array([1, 0, 0, 1, 1, 0], numpy.uint8)) def test_permutation(self): s = ( diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_stats/test_special.py python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_special.py --- python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_stats/test_special.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_special.py 2020-12-20 23:35:03.000000000 +0000 @@ -3,6 +3,8 @@ """ import math +from unittest import TestCase, main + from cogent3.maths.stats.special import ( combinations, combinations_exact, @@ -18,18 +20,19 @@ permutations, permutations_exact, ) -from cogent3.util.unit_test import TestCase, main __author__ = "Rob Knight" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley", "Rob Knight", "Sandra Smit"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" +from numpy.testing import assert_allclose, assert_almost_equal + class SpecialTests(TestCase): """Tests miscellaneous functions.""" @@ -43,7 +46,7 @@ self.assertEqual(permutations(4, 2), 12) self.assertEqual(permutations(4, 3), 24) self.assertEqual(permutations(4, 4), 24) - self.assertFloatEqual(permutations(300, 100), 3.8807387193009318e239) + assert_allclose(permutations(300, 100), 3.8807387193009318e239) def test_permutations_errors(self): """permutations should raise errors on invalid input""" @@ -53,14 +56,14 @@ def test_permutations_float(self): """permutations should use gamma function when floats as input""" - self.assertFloatEqual(permutations(1.0, 1), 1) - self.assertFloatEqual(permutations(2, 1.0), 2) - self.assertFloatEqual(permutations(3.0, 1.0), 3) - self.assertFloatEqual(permutations(4.0, 1), 4) - self.assertFloatEqual(permutations(4.0, 2.0), 12) - self.assertFloatEqual(permutations(4.0, 3.0), 24) - self.assertFloatEqual(permutations(4, 4.0), 24) - self.assertFloatEqual(permutations(300, 100), 3.8807387193009318e239) + assert_allclose(permutations(1.0, 1), 1) + assert_allclose(permutations(2, 1.0), 2) + assert_allclose(permutations(3.0, 1.0), 3) + assert_allclose(permutations(4.0, 1), 4) + assert_allclose(permutations(4.0, 2.0), 12) + assert_allclose(permutations(4.0, 3.0), 24) + assert_allclose(permutations(4, 4.0), 24) + assert_allclose(permutations(300, 100), 3.8807387193009318e239) def test_permutations_range(self): """permutations should increase gradually with increasing k""" @@ -79,27 +82,25 @@ def test_permutations_exact(self): """permutations_exact should return expected results""" - self.assertFloatEqual(permutations_exact(1, 1), 1) - self.assertFloatEqual(permutations_exact(2, 1), 2) - self.assertFloatEqual(permutations_exact(3, 1), 3) - self.assertFloatEqual(permutations_exact(4, 1), 4) - self.assertFloatEqual(permutations_exact(4, 2), 12) - self.assertFloatEqual(permutations_exact(4, 3), 24) - self.assertFloatEqual(permutations_exact(4, 4), 24) - self.assertFloatEqual(permutations_exact(300, 100), 3.8807387193009318e239) + assert_allclose(permutations_exact(1, 1), 1) + assert_allclose(permutations_exact(2, 1), 2) + assert_allclose(permutations_exact(3, 1), 3) + assert_allclose(permutations_exact(4, 1), 4) + assert_allclose(permutations_exact(4, 2), 12) + assert_allclose(permutations_exact(4, 3), 24) + assert_allclose(permutations_exact(4, 4), 24) + assert_allclose(permutations_exact(300, 100) / 3.8807387193009318e239, 1.0) def test_ln_permutations(self): """ln_permutations should return expected results""" - self.assertFloatEqual(ln_permutations(1, 1), math.log(1)) - self.assertFloatEqual(ln_permutations(2, 1), math.log(2)) - self.assertFloatEqual(ln_permutations(3, 1.0), math.log(3)) - self.assertFloatEqual(ln_permutations(4, 1), math.log(4)) - self.assertFloatEqual(ln_permutations(4.0, 2), math.log(12)) - self.assertFloatEqual(ln_permutations(4, 3.0), math.log(24)) - self.assertFloatEqual(ln_permutations(4, 4), math.log(24)) - self.assertFloatEqual( - ln_permutations(300.0, 100), math.log(3.8807387193009318e239) - ) + assert_allclose(ln_permutations(1, 1), math.log(1)) + assert_allclose(ln_permutations(2, 1), math.log(2)) + assert_allclose(ln_permutations(3, 1.0), math.log(3)) + assert_allclose(ln_permutations(4, 1), math.log(4)) + assert_allclose(ln_permutations(4.0, 2), math.log(12)) + assert_allclose(ln_permutations(4, 3.0), math.log(24)) + assert_allclose(ln_permutations(4, 4), math.log(24)) + assert_allclose(ln_permutations(300.0, 100), math.log(3.8807387193009318e239)) def test_combinations(self): """combinations should return expected results when int as input""" @@ -111,7 +112,7 @@ self.assertEqual(combinations(4, 3), 4) self.assertEqual(combinations(4, 4), 1) self.assertEqual(combinations(20, 4), 19 * 17 * 15) - self.assertFloatEqual(combinations(300, 100), 4.1582514632578812e81) + assert_allclose(combinations(300, 100), 4.1582514632578812e81) def test_combinations_errors(self): """combinations should raise errors on invalid input""" @@ -121,15 +122,15 @@ def test_combinations_float(self): """combinations should use gamma function when floats as input""" - self.assertFloatEqual(combinations(1.0, 1.0), 1) - self.assertFloatEqual(combinations(2.0, 1.0), 2) - self.assertFloatEqual(combinations(3.0, 1.0), 3) - self.assertFloatEqual(combinations(4.0, 1.0), 4) - self.assertFloatEqual(combinations(4.0, 2), 6) - self.assertFloatEqual(combinations(4, 3.0), 4) - self.assertFloatEqual(combinations(4.0, 4.0), 1) - self.assertFloatEqual(combinations(20.0, 4.0), 19 * 17 * 15) - self.assertFloatEqual(combinations(300, 100.0), 4.1582514632578812e81) + assert_allclose(combinations(1.0, 1.0), 1) + assert_allclose(combinations(2.0, 1.0), 2) + assert_allclose(combinations(3.0, 1.0), 3) + assert_allclose(combinations(4.0, 1.0), 4) + assert_allclose(combinations(4.0, 2), 6) + assert_allclose(combinations(4, 3.0), 4) + assert_allclose(combinations(4.0, 4.0), 1) + assert_allclose(combinations(20.0, 4.0), 19 * 17 * 15) + assert_allclose(combinations(300, 100.0), 4.1582514632578812e81) def test_combinations_range(self): """combinations should decrease gradually with increasing k""" @@ -156,21 +157,19 @@ self.assertEqual(combinations_exact(4, 3), 4) self.assertEqual(combinations_exact(4, 4), 1) self.assertEqual(combinations_exact(20, 4), 19 * 17 * 15) - self.assertFloatEqual(combinations_exact(300, 100), 4.1582514632578812e81) + assert_allclose(combinations_exact(300, 100), 4.1582514632578812e81) def test_ln_combinations(self): """ln_combinations should return expected results""" - self.assertFloatEqual(ln_combinations(1, 1), math.log(1)) - self.assertFloatEqual(ln_combinations(2, 1), math.log(2)) - self.assertFloatEqual(ln_combinations(3, 1), math.log(3)) - self.assertFloatEqual(ln_combinations(4.0, 1), math.log(4)) - self.assertFloatEqual(ln_combinations(4, 2.0), math.log(6)) - self.assertFloatEqual(ln_combinations(4, 3), math.log(4)) - self.assertFloatEqual(ln_combinations(4, 4.0), math.log(1)) - self.assertFloatEqual(ln_combinations(20, 4), math.log(19 * 17 * 15)) - self.assertFloatEqual( - ln_combinations(300, 100), math.log(4.1582514632578812e81) - ) + assert_allclose(ln_combinations(1, 1), math.log(1)) + assert_allclose(ln_combinations(2, 1), math.log(2)) + assert_allclose(ln_combinations(3, 1), math.log(3)) + assert_allclose(ln_combinations(4.0, 1), math.log(4)) + assert_allclose(ln_combinations(4, 2.0), math.log(6)) + assert_allclose(ln_combinations(4, 3), math.log(4)) + assert_allclose(ln_combinations(4, 4.0), math.log(1)) + assert_allclose(ln_combinations(20, 4), math.log(19 * 17 * 15)) + assert_allclose(ln_combinations(300, 100), math.log(4.1582514632578812e81)) def test_ln_binomial_integer(self): """ln_binomial should match R results for integer values""" @@ -187,7 +186,7 @@ (1032, 2050, 0.5): math.log(0.01679804), } for (key, value) in list(expected.items()): - self.assertFloatEqualRel(ln_binomial(*key), value, 1e-4) + assert_allclose(ln_binomial(*key), value, 1e-4) def test_ln_binomial_floats(self): """Binomial exact should match values from R for integer successes""" @@ -204,11 +203,10 @@ for (key, value) in list(expected.items()): min_val, max_val = value assert min_val < ln_binomial(*key) < max_val - # self.assertFloatEqualRel(binomial_exact(*key), value, 1e-4) + # self.assert_allclose(binomial_exact(*key), value, 1e-4) def test_ln_binomial_range(self): - """ln_binomial should increase in a monotonically increasing region. - """ + """ln_binomial should increase in a monotonically increasing region.""" start = 0 end = 1 step = 0.1 @@ -224,19 +222,19 @@ def test_log_one_minus_large(self): """log_one_minus_x should return math.log(1-x) if x is large""" - self.assertFloatEqual(log_one_minus(0.2), math.log(1 - 0.2)) + assert_allclose(log_one_minus(0.2), math.log(1 - 0.2)) def test_log_one_minus_small(self): """log_one_minus_x should return -x if x is small""" - self.assertFloatEqualRel(log_one_minus(1e-30), 1e-30) + assert_allclose(log_one_minus(1e-30), -1e-30, rtol=1e-30, atol=1e-30) def test_one_minus_exp_large(self): """one_minus_exp_x should return 1 - math.exp(x) if x is large""" - self.assertFloatEqual(one_minus_exp(0.2), 1 - (math.exp(0.2))) + assert_allclose(one_minus_exp(0.2), 1 - (math.exp(0.2))) def test_one_minus_exp_small(self): """one_minus_exp_x should return -x if x is small""" - self.assertFloatEqual(one_minus_exp(1e-30), -1e-30) + assert_allclose(one_minus_exp(1e-30), -1e-30) def test_log1p(self): """log1p should give same results as cephes""" @@ -269,7 +267,7 @@ 1.09861228867, ] for p, e in zip(p_s, exp): - self.assertFloatEqual(log1p(p), e) + assert_allclose(log1p(p), e) def test_igami(self): """igami should give same result as cephes implementation""" @@ -309,7 +307,7 @@ 188.010915412, ] for o, e in zip(obs, exp): - self.assertFloatEqual(o, e) + assert_allclose(o, e) def test_ndtri(self): """ndtri should give same result as implementation in cephes""" @@ -416,7 +414,7 @@ 2.32634787404, ] obs = [ndtri(i / 100.0) for i in range(100)] - self.assertFloatEqual(obs, exp) + assert_allclose(obs, exp) def test_incbi(self): """incbi results should match cephes libraries""" @@ -575,10 +573,10 @@ for y in yy_range: result = incbi(a, b, y) e = exp[i] - self.assertFloatEqual(e, result) + assert_allclose(e, result) i += 1 # specific cases that failed elsewhere - self.assertFloatEqual(incbi(999, 2, 1e-10), 0.97399698104554944) + assert_allclose(incbi(999, 2, 1e-10), 0.97399698104554944) # execute tests if called from command line diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_stats/test_test.py python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_test.py --- python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_stats/test_test.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_maths/test_stats/test_test.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,22 +1,29 @@ """Unit tests for statistical tests and utility functions. """ -import math + +from unittest import TestCase, main from numpy import ( arange, array, + asarray, concatenate, - cov, fill_diagonal, + isfinite, + logical_and, ones, + ravel, reshape, - sqrt, testing, tril, + zeros, ) from cogent3.maths.stats.number import NumberCounter from cogent3.maths.stats.test import ( + ALT_HIGH, + ALT_LOW, + ALT_TWO_SIDED, ANOVA_one_way, G_2_by_2, G_fit, @@ -24,10 +31,10 @@ MonteCarloP, ZeroExpectedError, _flatten_lower_triangle, + _get_alternate, _get_rank, _permute_observations, bayes_updates, - calc_contingency_expected, combinations, correlation, correlation_matrix, @@ -51,6 +58,7 @@ mw_boot, mw_test, pearson, + pearson_correlation, permute_2d, posteriors, regress, @@ -67,13 +75,10 @@ t_one_observation, t_one_sample, t_paired, - t_tailed_prob, t_two_sample, tail, - z_tailed_prob, z_test, ) -from cogent3.util.unit_test import TestCase, main __author__ = "Rob Knight" @@ -88,11 +93,41 @@ "Michael Dwan", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" +from numpy.testing import assert_allclose, assert_equal + + +def is_prob(value): + """helper function to establish a 0 <= value <= 1""" + value = asarray(value) + return logical_and(value >= 0, value <= 1.0).all() + + +def similar_means(observed, expected, pvalue=0.01): + """False if observed p is lower than pvalue""" + + observed, expected = asarray(observed), asarray(expected) + + t, p = t_two_sample(observed, expected) + + # handle case where all elements were the same + if p is None or not isfinite(p): + + if not observed.shape: + observed = observed.reshape((1,)) + + if not expected.shape: + expected = expected.reshape((1,)) + + if observed[0] == expected[0]: + return True + + return p > pvalue + class TestsHelper(TestCase): """Class with utility methods useful for other tests.""" @@ -137,7 +172,7 @@ except TypeError: p_val = obs[p_val_idx] - self.assertIsProb(p_val) + self.assertTrue(is_prob(p_val)) if p_val >= exp_min and p_val <= exp_max: found_match = True break @@ -150,7 +185,7 @@ def test_std(self): """Should produce a standard deviation of 1.0 for a std normal dist""" expected = 1.58113883008 - self.assertFloatEqual(std(array([1, 2, 3, 4, 5])), expected) + assert_allclose(std(array([1, 2, 3, 4, 5])), expected) expected_a = array([expected, expected, expected, expected, expected]) a = array( @@ -162,8 +197,8 @@ [2, 3, 4, 5, 1], ] ) - self.assertFloatEqual(std(a, axis=0), expected_a) - self.assertFloatEqual(std(a, axis=1), expected_a) + assert_allclose(std(a, axis=0), expected_a) + assert_allclose(std(a, axis=1), expected_a) self.assertRaises(ValueError, std, a, 5) def test_std_2d(self): @@ -210,117 +245,124 @@ expected = array([5.5, 6.5, 7.5]) observed = median(m, axis=0) - self.assertEqual(observed, expected) + assert_equal(observed, expected) expected = array([2.0, 5.0, 8.0, 11.0]) observed = median(m, axis=1) - self.assertEqual(observed, expected) + assert_equal(observed, expected) self.assertRaises(ValueError, median, m, 10) def test_tail(self): """tail should return x/2 if test is true; 1-(x/2) otherwise""" - self.assertFloatEqual(tail(0.25, "a" == "a"), 0.25 / 2) - self.assertFloatEqual(tail(0.25, "a" != "a"), 1 - (0.25 / 2)) + assert_allclose(tail(0.25, "a" == "a"), 0.25 / 2) + assert_allclose(tail(0.25, "a" != "a"), 1 - (0.25 / 2)) def test_combinations(self): """combinations should return correct binomial coefficient""" - self.assertFloatEqual(combinations(5, 3), 10) - self.assertFloatEqual(combinations(5, 2), 10) + assert_allclose(combinations(5, 3), 10) + assert_allclose(combinations(5, 2), 10) # only one way to pick no items or the same number of items - self.assertFloatEqual(combinations(123456789, 0), 1) - self.assertFloatEqual(combinations(123456789, 123456789), 1) + assert_allclose(combinations(123456789, 0), 1) + assert_allclose(combinations(123456789, 123456789), 1) # n ways to pick one item - self.assertFloatEqual(combinations(123456789, 1), 123456789) + assert_allclose(combinations(123456789, 1), 123456789, rtol=1e-6) # n(n-1)/2 ways to pick 2 items - self.assertFloatEqual(combinations(123456789, 2), 123456789 * 123456788 / 2) + assert_allclose(combinations(123456789, 2), 123456789 * 123456788 / 2) # check an arbitrary value in R - self.assertFloatEqual(combinations(1234567, 12), 2.617073e64) + assert_allclose(combinations(1234567, 12), 2.617073e64, rtol=1e-6) def test_multiple_comparisons(self): """multiple_comparisons should match values from R""" - self.assertFloatEqual(multiple_comparisons(1e-7, 10000), 1 - 0.9990005) - self.assertFloatEqual(multiple_comparisons(0.05, 10), 0.4012631) - self.assertFloatEqual(multiple_comparisons(1e-20, 1), 1e-20) - self.assertFloatEqual(multiple_comparisons(1e-300, 1), 1e-300) - self.assertFloatEqual(multiple_comparisons(0.95, 3), 0.99987499999999996) - self.assertFloatEqual(multiple_comparisons(0.75, 100), 0.999999999999679) - self.assertFloatEqual(multiple_comparisons(0.5, 1000), 1) - self.assertFloatEqual(multiple_comparisons(0.01, 1000), 0.99995682875259) - self.assertFloatEqual(multiple_comparisons(0.5, 5), 0.96875) - self.assertFloatEqual(multiple_comparisons(1e-20, 10), 1e-19) + assert_allclose( + multiple_comparisons(1e-7, 10000), 1 - 0.9990005, rtol=1e-6, atol=1e-6 + ) + assert_allclose(multiple_comparisons(0.05, 10), 0.4012631) + assert_allclose(multiple_comparisons(1e-20, 1), 1e-20) + assert_allclose(multiple_comparisons(1e-300, 1), 1e-300) + assert_allclose(multiple_comparisons(0.95, 3), 0.99987499999999996) + assert_allclose(multiple_comparisons(0.75, 100), 0.999999999999679) + assert_allclose(multiple_comparisons(0.5, 1000), 1) + assert_allclose(multiple_comparisons(0.01, 1000), 0.99995682875259) + assert_allclose(multiple_comparisons(0.5, 5), 0.96875) + assert_allclose(multiple_comparisons(1e-20, 10), 1e-19) def test_multiple_inverse(self): """multiple_inverse should invert multiple_comparisons results""" # NOTE: multiple_inverse not very accurate close to 1 - self.assertFloatEqual(multiple_inverse(1 - 0.9990005, 10000), 1e-7) - self.assertFloatEqual(multiple_inverse(0.4012631, 10), 0.05) - self.assertFloatEqual(multiple_inverse(1e-20, 1), 1e-20) - self.assertFloatEqual(multiple_inverse(1e-300, 1), 1e-300) - self.assertFloatEqual(multiple_inverse(0.96875, 5), 0.5) - self.assertFloatEqual(multiple_inverse(1e-19, 10), 1e-20) + assert_allclose( + multiple_inverse(1 - 0.9990005, 10000), 1e-7, rtol=1e-6, atol=1e-6 + ) + assert_allclose(multiple_inverse(0.4012631, 10), 0.05, rtol=1e-6, atol=1e-6) + assert_allclose(multiple_inverse(1e-20, 1), 1e-20) + assert_allclose(multiple_inverse(1e-300, 1), 1e-300) + assert_allclose(multiple_inverse(0.96875, 5), 0.5) + assert_allclose(multiple_inverse(1e-19, 10), 1e-20) def test_multiple_n(self): """multiple_n should swap parameters in multiple_comparisons""" - self.assertFloatEqual(multiple_n(1e-7, 1 - 0.9990005), 10000) - self.assertFloatEqual(multiple_n(0.05, 0.4012631), 10) - self.assertFloatEqual(multiple_n(1e-20, 1e-20), 1) - self.assertFloatEqual(multiple_n(1e-300, 1e-300), 1) - self.assertFloatEqual(multiple_n(0.95, 0.99987499999999996), 3) - self.assertFloatEqual(multiple_n(0.5, 0.96875), 5) - self.assertFloatEqual(multiple_n(1e-20, 1e-19), 10) + assert_allclose(multiple_n(1e-7, 1 - 0.9990005), 10000, rtol=1e-6, atol=1e-6) + assert_allclose(multiple_n(0.05, 0.4012631), 10, rtol=1e-6, atol=1e-6) + assert_allclose(multiple_n(1e-20, 1e-20), 1) + assert_allclose(multiple_n(1e-300, 1e-300), 1) + assert_allclose(multiple_n(0.95, 0.99987499999999996), 3) + assert_allclose(multiple_n(0.5, 0.96875), 5) + assert_allclose(multiple_n(1e-20, 1e-19), 10) def test_fisher(self): """fisher results should match p 795 Sokal and Rohlf""" - self.assertFloatEqual( - fisher([0.073, 0.086, 0.10, 0.080, 0.060]), 0.0045957946540917905 + assert_allclose( + fisher([0.073, 0.086, 0.10, 0.080, 0.060]), + 0.0045957946540917905, + rtol=1e-6, + atol=1e-6, ) def test_regress(self): """regression slope, intercept should match p 459 Sokal and Rohlf""" x = [0, 12, 29.5, 43, 53, 62.5, 75.5, 85, 93] y = [8.98, 8.14, 6.67, 6.08, 5.90, 5.83, 4.68, 4.20, 3.72] - self.assertFloatEqual(regress(x, y), (-0.05322, 8.7038), 0.001) + assert_allclose(regress(x, y), (-0.05322, 8.7038), 0.001) # higher precision from OpenOffice - self.assertFloatEqual(regress(x, y), (-0.05322215, 8.70402730)) + assert_allclose(regress(x, y), (-0.05322215, 8.70402730)) # add test to confirm no overflow error with large numbers x = [32119, 33831] y = [2.28, 2.43] exp = (8.761682243e-05, -5.341209112e-01) - self.assertFloatEqual(regress(x, y), exp, 0.001) + assert_allclose(regress(x, y), exp, 0.001) def test_regress_origin(self): """regression slope constrained through origin should match Excel""" x = array([1, 2, 3, 4]) y = array([4, 2, 6, 8]) - self.assertFloatEqual(regress_origin(x, y), (1.9333333, 0)) + assert_allclose(regress_origin(x, y), (1.9333333, 0)) # add test to confirm no overflow error with large numbers x = [32119, 33831] y = [2.28, 2.43] exp = (7.1428649481939822e-05, 0) - self.assertFloatEqual(regress_origin(x, y), exp, 0.001) + assert_allclose(regress_origin(x, y), exp, 0.001) def test_regress_R2(self): """regress_R2 returns the R^2 value of a regression""" x = [1.0, 2.0, 3.0, 4.0, 5.0] y = [2.1, 4.2, 5.9, 8.4, 9.6] result = regress_R2(x, y) - self.assertFloatEqual(result, 0.99171419347896) + assert_allclose(result, 0.99171419347896) def test_regress_residuals(self): """regress_residuals reprts error for points in linear regression""" x = [1.0, 2.0, 3.0, 4.0, 5.0] y = [2.1, 4.2, 5.9, 8.4, 9.6] result = regress_residuals(x, y) - self.assertFloatEqual(result, [-0.1, 0.08, -0.14, 0.44, -0.28]) + assert_allclose(result, [-0.1, 0.08, -0.14, 0.44, -0.28]) def test_stdev_from_mean(self): """stdev_from_mean returns num std devs from mean for each val in x""" x = [2.1, 4.2, 5.9, 8.4, 9.6] result = stdev_from_mean(x) - self.assertFloatEqual( + assert_allclose( result, [ -1.292463399014413, @@ -351,10 +393,12 @@ 17.25, 9.52, ] - self.assertFloatEqual(regress_major(x, y), (18.93633, -32.55208)) + assert_allclose(regress_major(x, y), (18.93633, -32.55208), rtol=1e-6) def test_sign_test(self): """sign_test, should match values from R""" + import numpy + v = [ ("two sided", 26, 50, 0.88772482734078251), ("less", 26, 50, 0.6641), @@ -364,21 +408,29 @@ ("2", 30, 50, 0.20263875106454063), ("h", 49, 50, 4.5297099404706387e-14), ("h", 50, 50, 8.8817841970012543e-16), + ("2", numpy.int64(95), 124, 2.204644901720111e-09), ] for alt, success, trials, p in v: result = sign_test(success, trials, alt=alt) - self.assertFloatEqual(result, p, eps=1e-5) + assert_allclose(result, p, rtol=1e-5) def test_permute_2d(self): """permute_2d permutes rows and cols of a matrix.""" a = reshape(arange(9), (3, 3)) - self.assertEqual(permute_2d(a, [0, 1, 2]), a) - self.assertEqual( - permute_2d(a, [2, 1, 0]), array([[8, 7, 6], [5, 4, 3], [2, 1, 0]]) - ) - self.assertEqual( - permute_2d(a, [1, 2, 0]), array([[4, 5, 3], [7, 8, 6], [1, 2, 0]]) - ) + assert_equal(permute_2d(a, [0, 1, 2]), a) + assert_equal(permute_2d(a, [2, 1, 0]), array([[8, 7, 6], [5, 4, 3], [2, 1, 0]])) + assert_equal(permute_2d(a, [1, 2, 0]), array([[4, 5, 3], [7, 8, 6], [1, 2, 0]])) + + def test_get_alternate(self): + """correctly identifies the specified alternate hypothesis""" + alt = _get_alternate("lo") + self.assertEqual(alt, ALT_LOW) + alt = _get_alternate("hi") + self.assertEqual(alt, ALT_HIGH) + alt = _get_alternate("2") + self.assertEqual(alt, ALT_TWO_SIDED) + with self.assertRaises(ValueError): + _get_alternate("22") class GTests(TestCase): @@ -386,9 +438,9 @@ def test_G_2_by_2_2tailed_equal(self): """G_2_by_2 should return 0 if all cell counts are equal""" - self.assertFloatEqual(0, G_2_by_2(1, 1, 1, 1, False, False)[0]) - self.assertFloatEqual(0, G_2_by_2(100, 100, 100, 100, False, False)[0]) - self.assertFloatEqual(0, G_2_by_2(100, 100, 100, 100, True, False)[0]) + assert_allclose(0, G_2_by_2(1, 1, 1, 1, False, False)[0]) + assert_allclose(0, G_2_by_2(100, 100, 100, 100, False, False)[0]) + assert_allclose(0, G_2_by_2(100, 100, 100, 100, True, False)[0]) def test_G_2_by_2_bad_data(self): """G_2_by_2 should raise ValueError if any counts are negative""" @@ -398,29 +450,27 @@ """G_2_by_2 values should match examples in Sokal & Rohlf""" # example from p 731, Sokal and Rohlf (1995) # without correction - self.assertFloatEqual( - G_2_by_2(12, 22, 16, 50, False, False)[0], 1.33249, 0.0001 - ) - self.assertFloatEqual( - G_2_by_2(12, 22, 16, 50, False, False)[1], 0.24836, 0.0001 - ) + assert_allclose(G_2_by_2(12, 22, 16, 50, False, False)[0], 1.33249, 0.0001) + assert_allclose(G_2_by_2(12, 22, 16, 50, False, False)[1], 0.24836, 0.0001) # with correction - self.assertFloatEqual(G_2_by_2(12, 22, 16, 50, True, False)[0], 1.30277, 0.0001) - self.assertFloatEqual(G_2_by_2(12, 22, 16, 50, True, False)[1], 0.25371, 0.0001) + assert_allclose(G_2_by_2(12, 22, 16, 50, True, False)[0], 1.30277, 0.0001) + assert_allclose(G_2_by_2(12, 22, 16, 50, True, False)[1], 0.25371, 0.0001) def test_G_2_by_2_1tailed_examples(self): """G_2_by_2 values should match values from codon_binding program""" # first up...the famous arginine case - self.assertFloatEqualAbs(G_2_by_2(36, 16, 38, 106), (29.111609, 0), 0.00001) + assert_allclose( + G_2_by_2(36, 16, 38, 106), (29.111609, 0), rtol=0.00001, atol=1e-6 + ) # then some other miscellaneous positive and negative values - self.assertFloatEqualAbs( - G_2_by_2(0, 52, 12, 132), (-7.259930, 0.996474), 0.00001 + assert_allclose( + G_2_by_2(0, 52, 12, 132), (-7.259930, 0.996474), rtol=0.00001, atol=1e-6 ) - self.assertFloatEqualAbs( - G_2_by_2(5, 47, 14, 130), (-0.000481, 0.508751), 0.00001 + assert_allclose( + G_2_by_2(5, 47, 14, 130), (-0.000481, 0.508751), rtol=0.00001, atol=1e-6 ) - self.assertFloatEqualAbs( - G_2_by_2(5, 47, 36, 108), (-6.065167, 0.993106), 0.00001 + assert_allclose( + G_2_by_2(5, 47, 36, 108), (-6.065167, 0.993106), rtol=0.00001, atol=1e-6 ) def test_Gfit_unequal_lists(self): @@ -452,21 +502,21 @@ 15.06250, ] # without correction - self.assertFloatEqualAbs(G_fit(obs, exp, False)[0], 8.82397, 0.00002) - self.assertFloatEqualAbs(G_fit(obs, exp, False)[1], 0.26554, 0.00002) + assert_allclose(G_fit(obs, exp, False)[0], 8.82397, 0.00002) + assert_allclose(G_fit(obs, exp, False)[1], 0.26554, 0.00002) # with correction - self.assertFloatEqualAbs(G_fit(obs, exp)[0], 8.76938, 0.00002) - self.assertFloatEqualAbs(G_fit(obs, exp)[1], 0.26964, 0.00002) + assert_allclose(G_fit(obs, exp)[0], 8.76938, 0.00002) + assert_allclose(G_fit(obs, exp)[1], 0.26964, 0.00002) # example from p. 700, Sokal and Rohlf (1995) obs = [130, 46] exp = [132, 44] # without correction - self.assertFloatEqualAbs(G_fit(obs, exp, False)[0], 0.12002, 0.00002) - self.assertFloatEqualAbs(G_fit(obs, exp, False)[1], 0.72901, 0.00002) + assert_allclose(G_fit(obs, exp, False)[0], 0.12002, 0.00002) + assert_allclose(G_fit(obs, exp, False)[1], 0.72901, 0.00002) # with correction - self.assertFloatEqualAbs(G_fit(obs, exp)[0], 0.11968, 0.00002) - self.assertFloatEqualAbs(G_fit(obs, exp)[1], 0.72938, 0.00002) + assert_allclose(G_fit(obs, exp)[0], 0.11968, 0.00002) + assert_allclose(G_fit(obs, exp)[1], 0.72938, 0.00002) def test_safe_sum_p_log_p(self): """safe_sum_p_log_p should ignore zero elements, not raise error""" @@ -476,8 +526,8 @@ def test_G_ind(self): """G test for independence should match Sokal and Rohlf p 738 values""" a = array([[29, 11], [273, 191], [8, 31], [64, 64]]) - self.assertFloatEqual(G_ind(a)[0], 28.59642) - self.assertFloatEqual(G_ind(a, True)[0], 28.31244) + assert_allclose(G_ind(a)[0], 28.59642) + assert_allclose(G_ind(a, True)[0], 28.31244, rtol=1e-5) class LikelihoodTests(TestCase): @@ -494,10 +544,10 @@ equal_answer = [1, 1, 1, 1] unequal_answer = [2, 1, 0.5, 0.5] for obs, exp in zip(likelihoods(equal, equal), equal_answer): - self.assertFloatEqual(obs, exp) + assert_allclose(obs, exp) for obs, exp in zip(likelihoods(unequal, equal), unequal_answer): - self.assertFloatEqual(obs, exp) + assert_allclose(obs, exp) def test_likelihoods_equal_evidence(self): """likelihoods should return vector of 1's if evidence equal for all""" @@ -508,11 +558,11 @@ not_unity = [0.7, 0.7, 0.7, 0.7] for obs, exp in zip(likelihoods(equal, unequal), equal_answer): - self.assertFloatEqual(obs, exp) + assert_allclose(obs, exp) # should be the same if evidences don't sum to 1 for obs, exp in zip(likelihoods(not_unity, unequal), equal_answer): - self.assertFloatEqual(obs, exp) + assert_allclose(obs, exp) def test_likelihoods_unequal_evidence(self): """likelihoods should update based on weighted sum if evidence unequal""" @@ -523,7 +573,7 @@ # if priors and evidence both unequal, likelihoods should change # (calculated using StarCalc) for obs, exp in zip(likelihoods(not_unity, unequal), products): - self.assertFloatEqual(obs, exp) + assert_allclose(obs, exp) def test_posteriors_unequal_lists(self): """posteriors should raise ValueError if input lists unequal lengths""" @@ -535,7 +585,7 @@ second = [0.25, 0.5, 0, 0.1, 1] product = [0, 0.125, 0, 0.1, 0.25] for obs, exp in zip(posteriors(first, second), product): - self.assertFloatEqual(obs, exp) + assert_allclose(obs, exp) class BayesUpdateTests(TestCase): @@ -565,21 +615,21 @@ """bayes_updates should match hand calculations of probability updates""" # result for first -> fourth calculated by hand for obs, exp in zip(bayes_updates(self.test), self.result): - self.assertFloatEqualAbs(obs, exp, 1e-11) + assert_allclose(obs, exp, rtol=1e-11, atol=1e-6) def test_bayes_updates_permuted(self): """bayes_updates should not be affected by order of inputs""" for obs, exp in zip(bayes_updates(self.permuted), self.result): - self.assertFloatEqualAbs(obs, exp, 1e-11) + assert_allclose(obs, exp, rtol=1e-11, atol=1e-6) def test_bayes_update_nondiscriminating(self): """bayes_updates should be unaffected by extra nondiscriminating data""" # deletion of non-discriminating evidence should not affect result for obs, exp in zip(bayes_updates(self.deleted), self.result): - self.assertFloatEqualAbs(obs, exp, 1e-11) + assert_allclose(obs, exp, rtol=1e-5, atol=1e-6) # additional non-discriminating evidence should not affect result for obs, exp in zip(bayes_updates(self.extra), self.result): - self.assertFloatEqualAbs(obs, exp, 1e-11) + assert_allclose(obs, exp, rtol=1e-5, atol=1e-6) class StatTests(TestsHelper): @@ -628,8 +678,8 @@ """t_paired should match values from Sokal & Rohlf p 353""" x, y = self.x, self.y # check value of t and the probability for 2-tailed - self.assertFloatEqual(t_paired(y, x)[0], 19.7203, 1e-4) - self.assertFloatEqual(t_paired(y, x)[1], 1.301439e-11, 1e-4) + assert_allclose(t_paired(y, x)[0], 19.7203, 1e-4) + assert_allclose(t_paired(y, x)[1], 1.301439e-11, 1e-4) def test_t_paired_no_variance(self): """t_paired should return None if lists are invariant""" @@ -642,10 +692,10 @@ """t_paired should match pre-calculated 1-tailed values""" x, y = self.x, self.y # check probability for 1-tailed low and high - self.assertFloatEqual(t_paired(y, x, "low")[1], 1 - (1.301439e-11 / 2), 1e-4) - self.assertFloatEqual(t_paired(x, y, "high")[1], 1 - (1.301439e-11 / 2), 1e-4) - self.assertFloatEqual(t_paired(y, x, "high")[1], 1.301439e-11 / 2, 1e-4) - self.assertFloatEqual(t_paired(x, y, "low")[1], 1.301439e-11 / 2, 1e-4) + assert_allclose(t_paired(y, x, "low")[1], 1 - (1.301439e-11 / 2), 1e-4) + assert_allclose(t_paired(x, y, "high")[1], 1 - (1.301439e-11 / 2), 1e-4) + assert_allclose(t_paired(y, x, "high")[1], 1.301439e-11 / 2, 1e-4) + assert_allclose(t_paired(x, y, "low")[1], 1.301439e-11 / 2, 1e-4) def test_t_paired_specific_difference(self): """t_paired should allow a specific difference to be passed""" @@ -655,7 +705,7 @@ # same, except that reversing list order reverses sign of difference self.assertFalse(t_paired(x, y, exp_diff=-0.2)[0] > 1e-10) # check that there's no significant difference from the true mean - self.assertFloatEqual(t_paired(y, x, exp_diff=0.2)[1], 1, 1e-4) + assert_allclose(t_paired(y, x, exp_diff=0.2)[1], 1, 1e-4) def test_t_paired_bad_data(self): """t_paired should raise ValueError on lists of different lengths""" @@ -665,7 +715,7 @@ """t_two_sample should match example on p.225 of Sokal and Rohlf""" I = array([7.2, 7.1, 9.1, 7.2, 7.3, 7.2, 7.5]) II = array([8.8, 7.5, 7.7, 7.6, 7.4, 6.7, 7.2]) - self.assertFloatEqual(t_two_sample(I, II), (-0.1184, 0.45385 * 2), 0.001) + assert_allclose(t_two_sample(I, II), (-0.1184, 0.45385 * 2), rtol=0.01) def test_t_two_sample_no_variance(self): """t_two_sample should properly handle lists that are invariant""" @@ -676,7 +726,7 @@ self.assertEqual(t_two_sample(x, y), (None, None)) # Test none_on_zero_variance=False on various tail types. We use - # self.assertEqual instead of self.assertFloatEqual because the latter + # self.assertEqual instead of assert_allclose because the latter # sees inf and -inf as being equal. # Two tailed: a < b @@ -728,25 +778,25 @@ """t_one_sample results should match those from R""" x = array(list(range(-5, 5))) y = array(list(range(-1, 10))) - self.assertFloatEqualAbs(t_one_sample(x), (-0.5222, 0.6141), 1e-4) - self.assertFloatEqualAbs(t_one_sample(y), (4, 0.002518), 1e-4) + assert_allclose(t_one_sample(x), (-0.5222, 0.6141), 1e-4) + assert_allclose(t_one_sample(y), (4, 0.002518), rtol=1e-3) # do some one-tailed tests as well - self.assertFloatEqualAbs(t_one_sample(y, tails="low"), (4, 0.9987), 1e-4) - self.assertFloatEqualAbs(t_one_sample(y, tails="high"), (4, 0.001259), 1e-4) + assert_allclose(t_one_sample(y, tails="low"), (4, 0.9987), rtol=1e-3) + assert_allclose(t_one_sample(y, tails="high"), (4, 0.001259), rtol=1e-3) def test_t_two_sample_switch(self): """t_two_sample should call t_one_observation if 1 item in sample.""" sample = array([4.02, 3.88, 3.34, 3.87, 3.18]) x = array([3.02]) - self.assertFloatEqual(t_two_sample(x, sample), (-1.5637254, 0.1929248)) - self.assertFloatEqual(t_two_sample(sample, x), (1.5637254, 0.1929248)) + assert_allclose(t_two_sample(x, sample), (-1.5637254, 0.1929248)) + assert_allclose(t_two_sample(sample, x), (1.5637254, 0.1929248)) # can't do the test if both samples have single item self.assertEqual(t_two_sample(x, x), (None, None)) # Test special case if t=0. - self.assertFloatEqual(t_two_sample([2], [1, 2, 3]), (0.0, 1.0)) - self.assertFloatEqual(t_two_sample([1, 2, 3], [2]), (0.0, 1.0)) + assert_allclose(t_two_sample([2], [1, 2, 3]), (0.0, 1.0)) + assert_allclose(t_two_sample([1, 2, 3], [2]), (0.0, 1.0)) def test_t_one_observation(self): """t_one_observation should match p. 228 of Sokal and Rohlf""" @@ -754,7 +804,7 @@ x = 3.02 # note that this differs after the 3rd decimal place from what's in the # book, because Sokal and Rohlf round their intermediate steps... - self.assertFloatEqual(t_one_observation(x, sample), (-1.5637254, 0.1929248)) + assert_allclose(t_one_observation(x, sample), (-1.5637254, 0.1929248)) def test_t_one_observation_no_variance(self): """t_one_observation should correctly handle an invariant list.""" @@ -787,7 +837,7 @@ I = array([7.2, 7.1, 9.1, 7.2, 7.3, 7.2, 7.5]) II = array([8.8, 7.5, 7.7, 7.6, 7.4, 6.7, 7.2]) obs = mc_t_two_sample(I, II) - self.assertFloatEqual(obs[:2], exp) + assert_allclose(obs[:2], exp) self.assertEqual(len(obs[2]), 999) self.assertCorrectPValue(0.8, 0.9, mc_t_two_sample, [I, II], p_val_idx=3) @@ -796,13 +846,13 @@ I = [7.2, 7.1, 9.1, 7.2, 7.3, 7.2, 7.5] II = [8.8, 7.5, 7.7, 7.6, 7.4, 6.7, 7.2] obs = mc_t_two_sample(I, II) - self.assertFloatEqual(obs[:2], exp) + assert_allclose(obs[:2], exp) self.assertEqual(len(obs[2]), 999) self.assertCorrectPValue(0.8, 0.9, mc_t_two_sample, [I, II], p_val_idx=3) exp = (-0.11858541225631833, 0.45378289658933718) obs = mc_t_two_sample(I, II, tails="low") - self.assertFloatEqual(obs[:2], exp) + assert_allclose(obs[:2], exp) self.assertEqual(len(obs[2]), 999) self.assertCorrectPValue( 0.4, 0.47, mc_t_two_sample, [I, II], {"tails": "low"}, p_val_idx=3 @@ -810,7 +860,7 @@ exp = (-0.11858541225631833, 0.54621710341066287) obs = mc_t_two_sample(I, II, tails="high", permutations=99) - self.assertFloatEqual(obs[:2], exp) + assert_allclose(obs[:2], exp) self.assertEqual(len(obs[2]), 99) self.assertCorrectPValue( 0.4, @@ -823,7 +873,7 @@ exp = (-2.8855783649036986, 0.99315596652421401) obs = mc_t_two_sample(I, II, tails="high", permutations=99, exp_diff=1) - self.assertFloatEqual(obs[:2], exp) + assert_allclose(obs[:2], exp) self.assertEqual(len(obs[2]), 99) self.assertCorrectPValue( 0.55, @@ -841,7 +891,7 @@ I = array([7.2, 7.1, 9.1, 7.2, 7.3, 7.2]) II = array([8.8, 7.5, 7.7, 7.6, 7.4, 6.7, 7.2]) obs = mc_t_two_sample(I, II) - self.assertFloatEqual(obs[:2], exp) + assert_allclose(obs[:2], exp) self.assertEqual(len(obs[2]), 999) self.assertCorrectPValue(0.8, 0.9, mc_t_two_sample, [I, II], p_val_idx=3) @@ -851,24 +901,24 @@ x = array([3.02]) exp = (-1.5637254, 0.1929248) obs = mc_t_two_sample(x, sample) - self.assertFloatEqual(obs[:2], exp) - self.assertFloatEqual(len(obs[2]), 999) - self.assertIsProb(obs[3]) + assert_allclose(obs[:2], exp) + assert_allclose(len(obs[2]), 999) + self.assertTrue(is_prob(obs[3])) exp = (1.5637254, 0.1929248) obs = mc_t_two_sample(sample, x) - self.assertFloatEqual(obs[:2], exp) - self.assertFloatEqual(len(obs[2]), 999) - self.assertIsProb(obs[3]) + assert_allclose(obs[:2], exp) + assert_allclose(len(obs[2]), 999) + self.assertTrue(is_prob(obs[3])) # Test the case where we can have no variance in the permuted lists. x = array([1, 1, 2]) y = array([1]) exp = (0.5, 0.666666666667) obs = mc_t_two_sample(x, y) - self.assertFloatEqual(obs[:2], exp) - self.assertFloatEqual(len(obs[2]), 999) - self.assertIsProb(obs[3]) + assert_allclose(obs[:2], exp) + assert_allclose(len(obs[2]), 999) + self.assertTrue(is_prob(obs[3])) def test_mc_t_two_sample_no_perms(self): """Test gives empty permutation results if no perms are given.""" @@ -876,7 +926,8 @@ I = array([7.2, 7.1, 9.1, 7.2, 7.3, 7.2, 7.5]) II = array([8.8, 7.5, 7.7, 7.6, 7.4, 6.7, 7.2]) obs = mc_t_two_sample(I, II, permutations=0) - self.assertFloatEqual(obs, exp) + assert_allclose(obs[:2], exp[:2]) + assert_equal(obs[2:], exp[2:]) def test_mc_t_two_sample_no_mc(self): """Test no MC stats if initial t-test is bad.""" @@ -944,7 +995,7 @@ exp = (-0.70710678118654791, 0.51851851851851838) obs = mc_t_two_sample(x, y, permutations=10000) - self.assertFloatEqual(obs[:2], exp) + assert_allclose(obs[:2], exp) self.assertEqual(len(obs[2]), 10000) self.assertCorrectPValue( 0.97, 1.0, mc_t_two_sample, [x, y], {"permutations": 10000}, p_val_idx=3 @@ -970,44 +1021,41 @@ self.assertEqual(len(obs[1]), 1) self.assertEqual(len(obs[0][0]), len(I)) self.assertEqual(len(obs[1][0]), len(II)) - self.assertFloatEqual( - sorted(concatenate((obs[0][0], obs[1][0]))), sorted(I + II) - ) + assert_allclose(sorted(concatenate((obs[0][0], obs[1][0]))), sorted(I + II)) def test_reverse_tails(self): """reverse_tails should return 'high' if tails was 'low' or vice versa""" self.assertEqual(reverse_tails("high"), "low") self.assertEqual(reverse_tails("low"), "high") - self.assertEqual(reverse_tails(None), None) - self.assertEqual(reverse_tails(3), 3) + self.assertEqual(reverse_tails(None), ALT_TWO_SIDED) def test_tail(self): """tail should return prob/2 if test is true, or 1-(prob/2) if false""" - self.assertFloatEqual(tail(0.25, True), 0.125) - self.assertFloatEqual(tail(0.25, False), 0.875) - self.assertFloatEqual(tail(1, True), 0.5) - self.assertFloatEqual(tail(1, False), 0.5) - self.assertFloatEqual(tail(0, True), 0) - self.assertFloatEqual(tail(0, False), 1) + assert_allclose(tail(0.25, True), 0.125) + assert_allclose(tail(0.25, False), 0.875) + assert_allclose(tail(1, True), 0.5) + assert_allclose(tail(1, False), 0.5) + assert_allclose(tail(0, True), 0) + assert_allclose(tail(0, False), 1) def test_z_test(self): """z_test should give correct values""" sample = array([1, 2, 3, 4, 5]) - self.assertFloatEqual(z_test(sample, 3, 1), (0, 1)) - self.assertFloatEqual(z_test(sample, 3, 2, "high"), (0, 0.5)) - self.assertFloatEqual(z_test(sample, 3, 2, "low"), (0, 0.5)) + assert_allclose(z_test(sample, 3, 1), (0, 1)) + assert_allclose(z_test(sample, 3, 2, "high"), (0, 0.5)) + assert_allclose(z_test(sample, 3, 2, "low"), (0, 0.5)) # check that population mean and variance, and tails, can be set OK. - self.assertFloatEqual( + assert_allclose( z_test(sample, 0, 1), (6.7082039324993694, 1.9703444711798951e-11) ) - self.assertFloatEqual( + assert_allclose( z_test(sample, 1, 10), (0.44721359549995793, 0.65472084601857694) ) - self.assertFloatEqual( + assert_allclose( z_test(sample, 1, 10, "high"), (0.44721359549995793, 0.65472084601857694 / 2), ) - self.assertFloatEqual( + assert_allclose( z_test(sample, 1, 10, "low"), (0.44721359549995793, 1 - (0.65472084601857694 / 2)), ) @@ -1060,14 +1108,14 @@ m1 = array([[0, 1, 2], [1, 0, 3], [2, 3, 0]]) m2 = array([[0, 2, 7], [2, 0, 6], [7, 6, 0]]) p, stat, perms = mantel_test(m1, m1, 999, alt="greater") - self.assertFloatEqual(stat, 1.0) + assert_allclose(stat, 1.0) self.assertEqual(len(perms), 999) self.assertCorrectPValue( 0.09, 0.25, mantel_test, (m1, m1, 999), {"alt": "greater"} ) p, stat, perms = mantel_test(m1, m2, 999, alt="greater") - self.assertFloatEqual(stat, 0.755928946018) + assert_allclose(stat, 0.755928946018) self.assertEqual(len(perms), 999) self.assertCorrectPValue( 0.2, 0.5, mantel_test, (m1, m2, 999), {"alt": "greater"} @@ -1082,17 +1130,17 @@ m2 = array([[0, 2, 7], [2, 0, 6], [7, 6, 0]]) m3 = array([[0, 0.5, 0.25], [0.5, 0, 0.1], [0.25, 0.1, 0]]) p, stat, perms = mantel_test(m1, m1, 999, alt="less") - self.assertFloatEqual(p, 1.0) - self.assertFloatEqual(stat, 1.0) + assert_allclose(p, 1.0) + assert_allclose(stat, 1.0) self.assertEqual(len(perms), 999) p, stat, perms = mantel_test(m1, m2, 999, alt="less") - self.assertFloatEqual(stat, 0.755928946018) + assert_allclose(stat, 0.755928946018) self.assertEqual(len(perms), 999) self.assertCorrectPValue(0.6, 1.0, mantel_test, (m1, m2, 999), {"alt": "less"}) p, stat, perms = mantel_test(m1, m3, 999, alt="less") - self.assertFloatEqual(stat, -0.989743318611) + assert_allclose(stat, -0.989743318611) self.assertEqual(len(perms), 999) self.assertCorrectPValue(0.1, 0.25, mantel_test, (m1, m3, 999), {"alt": "less"}) @@ -1105,21 +1153,21 @@ m2 = array([[0, 2, 7], [2, 0, 6], [7, 6, 0]]) m3 = array([[0, 0.5, 0.25], [0.5, 0, 0.1], [0.25, 0.1, 0]]) p, stat, perms = mantel_test(m1, m1, 999, alt="two sided") - self.assertFloatEqual(stat, 1.0) + assert_allclose(stat, 1.0) self.assertEqual(len(perms), 999) self.assertCorrectPValue( 0.20, 0.45, mantel_test, (m1, m1, 999), {"alt": "two sided"} ) p, stat, perms = mantel_test(m1, m2, 999, alt="two sided") - self.assertFloatEqual(stat, 0.755928946018) + assert_allclose(stat, 0.755928946018) self.assertEqual(len(perms), 999) self.assertCorrectPValue( 0.6, 0.75, mantel_test, (m1, m2, 999), {"alt": "two sided"} ) p, stat, perms = mantel_test(m1, m3, 999, alt="two sided") - self.assertFloatEqual(stat, -0.989743318611) + assert_allclose(stat, -0.989743318611) self.assertEqual(len(perms), 999) self.assertCorrectPValue( 0.2, 0.45, mantel_test, (m1, m3, 999), {"alt": "two sided"} @@ -1176,9 +1224,9 @@ def test_pearson(self): """Test pearson correlation method on valid data.""" # This test output was verified by R. - self.assertFloatEqual(pearson([1, 2], [1, 2]), 1.0) - self.assertFloatEqual(pearson([1, 2, 3], [1, 2, 3]), 1.0) - self.assertFloatEqual(pearson([1, 2, 3], [1, 2, 4]), 0.9819805) + assert_allclose(pearson([1, 2], [1, 2]), 1.0) + assert_allclose(pearson([1, 2, 3], [1, 2, 3]), 1.0) + assert_allclose(pearson([1, 2, 3], [1, 2, 4]), 0.9819805) def test_pearson_invalid_input(self): """Test running pearson on bad input.""" @@ -1190,33 +1238,33 @@ # One vector has no ties. exp = 0.3719581 obs = spearman(self.a, self.b) - self.assertFloatEqual(obs, exp) + assert_allclose(obs, exp) # Both vectors have no ties. exp = 0.2969697 obs = spearman(self.b, self.c) - self.assertFloatEqual(obs, exp) + assert_allclose(obs, exp) # Both vectors have ties. exp = 0.388381 obs = spearman(self.a, self.r) - self.assertFloatEqual(obs, exp) + assert_allclose(obs, exp) exp = -0.17575757575757578 obs = spearman(self.data1, self.data2) - self.assertFloatEqual(obs, exp) + assert_allclose(obs, exp) def test_spearman_no_variation(self): """Test the spearman function with a vector having no variation.""" exp = 0.0 obs = spearman([1, 1, 1], [1, 2, 3]) - self.assertFloatEqual(obs, exp) + assert_allclose(obs, exp) def test_spearman_ranked(self): """Test the spearman function with a vector that is already ranked.""" exp = 0.2969697 obs = spearman(self.b_ranked, self.c_ranked) - self.assertFloatEqual(obs, exp) + assert_allclose(obs, exp) def test_spearman_one_obs(self): """Test running spearman on a single observation.""" @@ -1235,23 +1283,28 @@ 4, ) obs = _get_rank(self.x) - self.assertFloatEqual(exp, obs) + assert_allclose(obs[0], exp[0]) + self.assertEqual(obs[1], exp[1]) exp = ([1.5, 3.0, 5.5, 4.0, 1.5, 7.0, 8.0, 9.0, 10.0, 5.5], 2) obs = _get_rank(self.a) - self.assertFloatEqual(exp, obs) + assert_allclose(obs[0], exp[0]) + self.assertEqual(obs[1], exp[1]) exp = ([2, 7, 10, 1, 3, 6, 4, 8, 5, 9], 0) obs = _get_rank(self.b) - self.assertFloatEqual(exp, obs) + assert_allclose(obs[0], exp[0]) + self.assertEqual(obs[1], exp[1]) exp = ([1.5, 7.0, 10.0, 1.5, 3.0, 6.0, 4.0, 8.0, 5.0, 9.0], 1) obs = _get_rank(self.r) - self.assertFloatEqual(exp, obs) + assert_allclose(obs[0], exp[0]) + self.assertEqual(obs[1], exp[1]) exp = ([], 0) obs = _get_rank([]) - self.assertEqual(exp, obs) + assert_allclose(obs[0], exp[0]) + self.assertEqual(obs[1], exp[1]) def test_get_rank_invalid_input(self): """Test the _get_rank function with invalid input.""" @@ -1278,13 +1331,17 @@ bad = [1, 2, 3] # originally gave r = 1.0000000002 - self.assertFloatEqual(correlation(x, x), (1, 0)) - self.assertFloatEqual(correlation(x, y), (0, 1)) - self.assertFloatEqual(correlation(y, z), (0, 1)) - self.assertFloatEqualAbs(correlation(x, a), (0.9827076, 0.01729), 1e-5) - self.assertFloatEqualAbs(correlation(x, b), (-0.9621405, 0.03786), 1e-5) - self.assertFloatEqualAbs(correlation(x, c), (0.3779645, 0.622), 1e-3) - self.assertEqual(correlation(bad, bad), (1, 0)) + assert_allclose(pearson_correlation(x, x), (1, 0)) + assert_allclose(pearson_correlation(x, y), (0, 1), rtol=1e-3) + assert_allclose(pearson_correlation(y, z), (0, 1)) + assert_allclose( + pearson_correlation(x, a), (0.9827076, 0.01729), rtol=1e-4, atol=1e-6 + ) + assert_allclose(pearson_correlation(x, b), (-0.9621405, 0.03786), rtol=1e-4) + assert_allclose(pearson_correlation(x, c), (0.3779645, 0.622), 1e-3) + self.assertEqual(pearson_correlation(bad, bad), (1, 0)) + got = pearson_correlation(self.data1, self.data2, tails="low") + assert_allclose(got, (-0.03760147385, 0.4589314864)) def test_correlation_test_pearson(self): """Test correlation_test using pearson on valid input.""" @@ -1298,7 +1355,7 @@ confidence_level=0.90, permutations=990, ) - self.assertFloatEqual(obs[:2], (-0.03760147, 0.91786297277172868)) + assert_allclose(obs[:2], (-0.03760147, 0.91786297277172868), rtol=1e-6) self.assertEqual(len(obs[2]), 990) for r in obs[2]: self.assertTrue(r >= -1.0 and r <= 1.0) @@ -1310,7 +1367,7 @@ {"method": "pearson", "confidence_level": 0.90, "permutations": 990}, p_val_idx=3, ) - self.assertFloatEqual(obs[4], (-0.5779077, 0.5256224)) + assert_allclose(obs[4], (-0.5779077, 0.5256224)) # Test with non-default tail type. obs = correlation_test( @@ -1321,7 +1378,7 @@ permutations=990, tails="low", ) - self.assertFloatEqual(obs[:2], (-0.03760147, 0.45893148638586434)) + assert_allclose(obs[:2], (-0.03760147, 0.45893148638586434), rtol=1e-6) self.assertEqual(len(obs[2]), 990) for r in obs[2]: self.assertTrue(r >= -1.0 and r <= 1.0) @@ -1338,14 +1395,14 @@ }, p_val_idx=3, ) - self.assertFloatEqual(obs[4], (-0.5779077, 0.5256224)) + assert_allclose(obs[4], (-0.5779077, 0.5256224)) def test_correlation_test_spearman(self): """Test correlation_test using spearman on valid input.""" # This example taken from Wikipedia page: # http://en.wikipedia.org/wiki/Spearman's_rank_correlation_coefficient obs = correlation_test(self.data1, self.data2, method="spearman", tails="high") - self.assertFloatEqual(obs[:2], (-0.17575757575757578, 0.686405827612)) + assert_allclose(obs[:2], (-0.17575757575757578, 0.686405827612)) self.assertEqual(len(obs[2]), 999) for rho in obs[2]: self.assertTrue(rho >= -1.0 and rho <= 1.0) @@ -1357,7 +1414,7 @@ {"method": "spearman", "tails": "high"}, p_val_idx=3, ) - self.assertFloatEqual(obs[4], (-0.7251388558041697, 0.51034422964834503)) + assert_allclose(obs[4], (-0.7251388558041697, 0.51034422964834503)) # The p-value is off because the example uses a one-tailed test, while # we use a two-tailed test. Someone confirms the answer that we get @@ -1365,7 +1422,7 @@ # http://stats.stackexchange.com/questions/22816/calculating-p-value- # for-spearmans-rank-correlation-coefficient-example-on-wikip obs = correlation_test(self.data1, self.data2, method="spearman", tails=None) - self.assertFloatEqual(obs[:2], (-0.17575757575757578, 0.62718834477648433)) + assert_allclose(obs[:2], (-0.17575757575757578, 0.62718834477648433)) self.assertEqual(len(obs[2]), 999) for rho in obs[2]: self.assertTrue(rho >= -1.0 and rho <= 1.0) @@ -1377,7 +1434,7 @@ {"method": "spearman", "tails": None}, p_val_idx=3, ) - self.assertFloatEqual(obs[4], (-0.7251388558041697, 0.51034422964834503)) + assert_allclose(obs[4], (-0.7251388558041697, 0.51034422964834503)) def test_correlation_test_invalid_input(self): """Test correlation_test using invalid input.""" @@ -1420,36 +1477,40 @@ (-0.97687328610475876, 0.93488023560400879), ) obs = correlation_test([1, 2, 3, 4], [1, 2, 1, 1], permutations=0) - self.assertFloatEqual(obs, exp) + for o, e in zip(obs, exp): + if isinstance(e, type(None)): + assert_equal(o, e) + else: + assert_allclose(o, e) def test_correlation_test_perfect_correlation(self): """Test correlation_test with perfectly-correlated input vectors.""" # These results were verified with R. obs = correlation_test([1, 2, 3, 4], [1, 2, 3, 4]) - self.assertFloatEqual(obs[:2], (0.99999999999999978, 2.2204460492503131e-16)) + assert_allclose(obs[:2], (0.99999999999999978, 2.2204460492503131e-16)) self.assertEqual(len(obs[2]), 999) for r in obs[2]: self.assertTrue(r >= -1.0 and r <= 1.0) self.assertCorrectPValue( 0.06, 0.09, correlation_test, ([1, 2, 3, 4], [1, 2, 3, 4]), p_val_idx=3 ) - self.assertFloatEqual(obs[4], (0.99999999999998879, 1.0)) + assert_allclose(obs[4], (0.99999999999998879, 1.0)) def test_correlation_test_small_obs(self): """Test correlation_test with a small number of observations.""" # These results were verified with R. obs = correlation_test([1, 2, 3], [1, 2, 3]) - self.assertFloatEqual(obs[:2], (1.0, 0)) + assert_allclose(obs[:2], (1.0, 0)) self.assertEqual(len(obs[2]), 999) for r in obs[2]: self.assertTrue(r >= -1.0 and r <= 1.0) self.assertCorrectPValue( 0.3, 0.4, correlation_test, ([1, 2, 3], [1, 2, 3]), p_val_idx=3 ) - self.assertFloatEqual(obs[4], (None, None)) + self.assertEqual(obs[4], (None, None)) obs = correlation_test([1, 2, 3], [1, 2, 3], method="spearman") - self.assertFloatEqual(obs[:2], (1.0, 0)) + assert_allclose(obs[:2], (1.0, 0)) self.assertEqual(len(obs[2]), 999) for r in obs[2]: self.assertTrue(r >= -1.0 and r <= 1.0) @@ -1461,7 +1522,7 @@ {"method": "spearman"}, p_val_idx=3, ) - self.assertFloatEqual(obs[4], (None, None)) + self.assertEqual(obs[4], (None, None)) def test_correlation_matrix(self): """Correlations in matrix should match values from R""" @@ -1469,9 +1530,9 @@ b = [1.5, 1.4, 1.2, 1.1] c = [15, 10, 5, 20] m = correlation_matrix([a, b, c]) - self.assertFloatEqual(m[0, 0], [1.0]) - self.assertFloatEqual([m[1, 0], m[1, 1]], [correlation(b, a)[0], 1.0]) - self.assertFloatEqual(m[2], [correlation(c, a)[0], correlation(c, b)[0], 1.0]) + assert_allclose(m[0, 0], [1.0]) + assert_allclose([m[1, 0], m[1, 1]], [correlation(b, a)[0], 1.0]) + assert_allclose(m[2], [correlation(c, a)[0], correlation(c, b)[0], 1.0]) class Ftest(TestCase): @@ -1482,7 +1543,7 @@ a = array([1, 3, 5, 7, 9, 8, 6, 4, 2]) b = array([5, 4, 6, 3, 7, 6, 4, 5]) self.assertEqual(f_value(a, b), (8, 7, 4.375)) - self.assertFloatEqual(f_value(b, a), (7, 8, 0.2285714)) + assert_allclose(f_value(b, a), (7, 8, 0.2285714), rtol=1e-6) too_short = array([4]) self.assertRaises(ValueError, f_value, too_short, b) @@ -1706,21 +1767,24 @@ # allowed error. This big, because results from R # are rounded at 4 decimals - error = 1e-4 + error = 1e-3 - self.assertFloatEqual(f_two_sample(a, a), (49, 49, 1, 1), eps=error) - self.assertFloatEqual(f_two_sample(a, b), (49, 49, 0.8575, 0.5925), eps=error) - self.assertFloatEqual(f_two_sample(b, a), (49, 49, 1.1662, 0.5925), eps=error) - self.assertFloatEqual( - f_two_sample(a, b, tails="low"), (49, 49, 0.8575, 0.2963), eps=error + assert_allclose(f_two_sample(a, a), (49, 49, 1, 1), rtol=error) + assert_allclose(f_two_sample(a, b), (49, 49, 0.8575, 0.5925), rtol=error) + assert_allclose(f_two_sample(b, a), (49, 49, 1.1662, 0.5925), rtol=error) + assert_allclose( + f_two_sample(a, b, tails="low"), (49, 49, 0.8575, 0.2963), rtol=error ) - self.assertFloatEqual( - f_two_sample(a, b, tails="high"), (49, 49, 0.8575, 0.7037), eps=error + assert_allclose( + f_two_sample(a, b, tails="high"), (49, 49, 0.8575, 0.7037), rtol=error ) - self.assertFloatEqual(f_two_sample(a, c), (49, 59, 0.6587, 0.1345), eps=error) + assert_allclose(f_two_sample(a, c), (49, 59, 0.6587, 0.1345), rtol=error) # p value very small, so first check df's and F value - self.assertFloatEqualAbs( - f_two_sample(d, a, tails="low")[0:3], (29, 49, 0.0028), eps=error + assert_allclose( + f_two_sample(d, a, tails="low")[0:3], + (29, 49, 0.0028), + rtol=error, + atol=1e-4, ) assert f_two_sample(d, a, tails="low")[3] < 2.2e-16 # p value @@ -1753,13 +1817,13 @@ def test_mw_test(self): """mann-whitney test results should match Sokal & Rohlf""" U, p = mw_test(self.x, self.y) - self.assertFloatEqual(U, 123.5) + assert_allclose(U, 123.5) self.assertTrue(0.02 <= p <= 0.05) def test_mw_boot(self): """excercising the Monte-carlo variant of mann-whitney""" U, p = mw_boot(self.x, self.y, 10) - self.assertFloatEqual(U, 123.5) + assert_allclose(U, 123.5) self.assertTrue(0 <= p <= 0.5) @@ -1770,8 +1834,8 @@ """conducts the tests for each alternate hypothesis against expecteds""" for alt, exp_p, exp_tau in alt_expecteds: tau, p_val = kendall_correlation(x, y, alt=alt, warn=False) - self.assertFloatEqual(tau, exp_tau, eps=1e-3) - self.assertFloatEqual(p_val, exp_p, eps=1e-3) + assert_allclose(tau, exp_tau, rtol=1e-3) + assert_allclose(p_val, exp_p, rtol=1e-3) def test_exact_calcs(self): """calculations of exact probabilities should match R""" @@ -1956,17 +2020,17 @@ self.assertEqual(other_vals, [4, 13, 15]) def test_distance_matrix_permutation_test_non_symmetric(self): - """ evaluate empirical p-values for a non symmetric matrix + """evaluate empirical p-values for a non symmetric matrix - To test the empirical p-values, we look at a simple 3x3 matrix - b/c it is easy to see what t score every permutation will - generate -- there's only 6 permutations. - Running dist_matrix_test with n=1000, we expect that each - permutation will show up 160 times, so we know how many - times to expect to see more extreme t scores. We therefore - know what the empirical p-values will be. (n=1000 was chosen - empirically -- smaller values seem to lead to much more frequent - random failures.) + To test the empirical p-values, we look at a simple 3x3 matrix + b/c it is easy to see what t score every permutation will + generate -- there's only 6 permutations. + Running dist_matrix_test with n=1000, we expect that each + permutation will show up 160 times, so we know how many + times to expect to see more extreme t scores. We therefore + know what the empirical p-values will be. (n=1000 was chosen + empirically -- smaller values seem to lead to much more frequent + random failures.) """ @@ -1981,34 +2045,34 @@ # looks at each possible permutation n times -- # compare first row to rest r = make_result_list(m, [(0, 0), (0, 1), (0, 2)], n=n, is_symmetric=False) - self.assertSimilarMeans(r, 0.0 / 6.0) + self.assertTrue(similar_means(r, 0.0 / 6.0)) r = make_result_list( m, [(0, 0), (0, 1), (0, 2)], n=n, is_symmetric=False, tails="high" ) - self.assertSimilarMeans(r, 4.0 / 6.0) + self.assertTrue(similar_means(r, 4.0 / 6.0)) r = make_result_list( m, [(0, 0), (0, 1), (0, 2)], n=n, is_symmetric=False, tails="low" ) - self.assertSimilarMeans(r, 0.0 / 6.0) + self.assertTrue(similar_means(r, 0.0 / 6.0)) # looks at each possible permutation n times -- # compare last row to rest r = make_result_list(m, [(2, 0), (2, 1), (2, 2)], n=n, is_symmetric=False) - self.assertSimilarMeans(r, 0.0 / 6.0) + self.assertTrue(similar_means(r, 0.0 / 6.0)) r = make_result_list( m, [(2, 0), (2, 1), (2, 2)], n=n, is_symmetric=False, tails="high" ) - self.assertSimilarMeans(r, 0.0 / 6.0) + self.assertTrue(similar_means(r, 0.0 / 6.0)) r = make_result_list( m, [(2, 0), (2, 1), (2, 2)], n=n, is_symmetric=False, tails="low" ) - self.assertSimilarMeans(r, 4.0 / 6.0) + self.assertTrue(similar_means(r, 4.0 / 6.0)) def test_distance_matrix_permutation_test_symmetric(self): - """ evaluate empirical p-values for symmetric matrix + """evaluate empirical p-values for symmetric matrix - See test_distance_matrix_permutation_test_non_symmetric - doc string for a description of how this test works. + See test_distance_matrix_permutation_test_non_symmetric + doc string for a description of how this test works. """ @@ -2023,11 +2087,11 @@ # looks at each possible permutation n times -- # compare first row to rest r = make_result_list(m, [(0, 0), (0, 1), (0, 2)], n=n) - self.assertSimilarMeans(r, 2.0 / 6.0) + self.assertTrue(similar_means(r, 2.0 / 6.0)) r = make_result_list(m, [(0, 0), (0, 1), (0, 2)], n=n, tails="high") - self.assertSimilarMeans(r, 0.77281447417149496, 0) + self.assertTrue(similar_means(r, 0.77281447417149496, 0)) r = make_result_list(m, [(0, 0), (0, 1), (0, 2)], n=n, tails="low") - self.assertSimilarMeans(r, 2.0 / 6.0) + self.assertTrue(similar_means(r, 2.0 / 6.0)) # The following lines are not part of the test code, but are useful in # figuring out what t-scores all of the permutations will yield. @@ -2067,8 +2131,7 @@ ) def test_ANOVA_one_way(self): - """ANOVA one way returns same values as ANOVA on a stats package - """ + """ANOVA one way returns same values as ANOVA on a stats package""" g1 = NumberCounter([10.0, 11.0, 10.0, 5.0, 6.0]) g2 = NumberCounter([1.0, 2.0, 3.0, 4.0, 1.0, 2.0]) g3 = NumberCounter([6.0, 7.0, 5.0, 6.0, 7.0]) @@ -2076,13 +2139,13 @@ dfn, dfd, F, between_MS, within_MS, group_means, prob = ANOVA_one_way(i) self.assertEqual(dfn, 2) self.assertEqual(dfd, 13) - self.assertFloatEqual(F, 18.565450643776831) - self.assertFloatEqual(between_MS, 55.458333333333343) - self.assertFloatEqual(within_MS, 2.9871794871794868) - self.assertFloatEqual( + assert_allclose(F, 18.565450643776831) + assert_allclose(between_MS, 55.458333333333343) + assert_allclose(within_MS, 2.9871794871794868) + assert_allclose( group_means, [8.4000000000000004, 2.1666666666666665, 6.2000000000000002] ) - self.assertFloatEqual(prob, 0.00015486238993089464) + assert_allclose(prob, 0.00015486238993089464) # execute tests if called from command line diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_util.py python-cogent-2020.12.21a+dfsg/tests/test_maths/test_util.py --- python-cogent-2020.6.30a0+dfsg/tests/test_maths/test_util.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_maths/test_util.py 2020-12-20 23:35:03.000000000 +0000 @@ -4,6 +4,7 @@ # SUPPORT2425 # from __future__ import with_statement +from unittest import TestCase, main from warnings import filterwarnings import numpy @@ -19,6 +20,7 @@ transpose, zeros, ) +from numpy.testing import assert_allclose, assert_equal from cogent3.maths.util import ( column_degeneracy, @@ -28,7 +30,6 @@ safe_log, safe_p_log_p, ) -from cogent3.util.unit_test import TestCase, main # , numpy_err filterwarnings("ignore", "invalid value encountered in", category=RuntimeWarning) @@ -40,54 +41,51 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Jeremy Widmann", "Rob Knight", "Sandra Smit"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" class ArrayMathTests(TestCase): def test_safe_p_log_p(self): - """safe_p_log_p: should handle pos/neg/zero/empty arrays - """ + """safe_p_log_p: should handle pos/neg/zero/empty arrays""" # normal valid array a = array([[4, 0, 8], [2, 16, 4]]) - self.assertEqual(safe_p_log_p(a), array([[-8, 0, -24], [-2, -64, -8]])) + assert_equal(safe_p_log_p(a), array([[-8, 0, -24], [-2, -64, -8]])) # just zeros a = array([[0, 0], [0, 0]]) - self.assertEqual(safe_p_log_p(a), array([[0, 0], [0, 0]])) + assert_equal(safe_p_log_p(a), array([[0, 0], [0, 0]])) # negative number -- throw error with self.assertRaises(FloatingPointError): safe_p_log_p(array([-4])) # integer input, float output - self.assertFloatEqual(safe_p_log_p(array([3])), array([-4.75488750])) + assert_allclose(safe_p_log_p(array([3])), array([-4.75488750])) # empty array - self.assertEqual(safe_p_log_p(array([])), array([])) + assert_equal(safe_p_log_p(array([])), array([])) def test_safe_log(self): - """safe_log: should handle pos/neg/zero/empty arrays - """ + """safe_log: should handle pos/neg/zero/empty arrays""" # normal valid array a = array([[4, 0, 8], [2, 16, 4]]) - self.assertEqual(safe_log(a), array([[2, 0, 3], [1, 4, 2]])) + assert_equal(safe_log(a), array([[2, 0, 3], [1, 4, 2]])) # input integers, output floats - self.assertFloatEqual(safe_log(array([1, 2, 3])), array([0, 1, 1.5849625])) + assert_allclose(safe_log(array([1, 2, 3])), array([0, 1, 1.5849625])) # just zeros a = array([[0, 0], [0, 0]]) - self.assertEqual(safe_log(a), array([[0, 0], [0, 0]])) + assert_equal(safe_log(a), array([[0, 0], [0, 0]])) # negative number with self.assertRaises(FloatingPointError): safe_log(array([0, 3, -4])) # empty array - self.assertEqual(safe_log(array([])), array([])) + assert_equal(safe_log(array([])), array([])) # double empty array - self.assertEqual(safe_log(array([[]])), array([[]])) + assert_equal(safe_log(array([[]])), array([[]])) def test_row_uncertainty(self): - """row_uncertainty: should handle pos/neg/zero/empty arrays - """ + """row_uncertainty: should handle pos/neg/zero/empty arrays""" # normal valid array b = transpose( array( @@ -99,21 +97,20 @@ ] ) ) - self.assertFloatEqual(row_uncertainty(b), [2, 1.97, 1.47, 0.81, 0], 1e-3) + assert_allclose(row_uncertainty(b), [2, 1.97, 1.47, 0.81, 0], rtol=1e-2) # one-dimensional array self.assertRaises(ValueError, row_uncertainty, array([0.25, 0.25, 0.25, 0.25])) # zeros - self.assertEqual(row_uncertainty(array([[0, 0]])), array([0])) + assert_equal(row_uncertainty(array([[0, 0]])), array([0])) # empty 2D array - self.assertEqual(row_uncertainty(array([[]])), array([0])) - self.assertEqual(row_uncertainty(array([[], []])), array([0, 0])) + assert_equal(row_uncertainty(array([[]])), array([0])) + assert_equal(row_uncertainty(array([[], []])), array([0, 0])) # negative number -- throw error with self.assertRaises(FloatingPointError): row_uncertainty(array([[-2]])) def test_col_uncertainty(self): - """column_uncertainty: should handle pos/neg/zero/empty arrays - """ + """column_uncertainty: should handle pos/neg/zero/empty arrays""" b = array( [ [0.25, 0.2, 0.45, 0.25, 1], @@ -122,49 +119,47 @@ [0.25, 0.3, 0.05, 0, 0], ] ) - self.assertFloatEqual(column_uncertainty(b), [2, 1.97, 1.47, 0.81, 0], 1e-3) + assert_allclose(column_uncertainty(b), [2, 1.97, 1.47, 0.81, 0], rtol=1e-2) # one-dimensional array self.assertRaises( ValueError, column_uncertainty, array([0.25, 0.25, 0.25, 0.25]) ) # zeros - self.assertEqual(column_uncertainty(array([[0, 0]])), array([0, 0])) + assert_equal(column_uncertainty(array([[0, 0]])), array([0, 0])) # empty 2D array - self.assertEqual(column_uncertainty(array([[]])), array([])) - self.assertEqual(column_uncertainty(array([[], []])), array([])) + assert_equal(column_uncertainty(array([[]])), array([])) + assert_equal(column_uncertainty(array([[], []])), array([])) # negative number -- throw error with self.assertRaises(FloatingPointError): column_uncertainty(array([[-2]])) def test_row_degeneracy(self): - """row_degeneracy: should work with different cutoff values and arrays - """ + """row_degeneracy: should work with different cutoff values and arrays""" a = array([[0.1, 0.3, 0.4, 0.2], [0.5, 0.3, 0, 0.2], [0.8, 0, 0.1, 0.1]]) - self.assertEqual(row_degeneracy(a, cutoff=0.75), [3, 2, 1]) - self.assertEqual(row_degeneracy(a, cutoff=0.95), [4, 3, 3]) + assert_equal(row_degeneracy(a, cutoff=0.75), [3, 2, 1]) + assert_equal(row_degeneracy(a, cutoff=0.95), [4, 3, 3]) # one-dimensional array self.assertRaises(ValueError, row_degeneracy, array([0.25, 0.25, 0.25, 0.25])) # if cutoff value is not found, results are clipped to the # number of columns in the array - self.assertEqual(row_degeneracy(a, cutoff=2), [4, 4, 4]) + assert_equal(row_degeneracy(a, cutoff=2), [4, 4, 4]) # same behavior on empty array - self.assertEqual(row_degeneracy(array([[]])), []) + assert_equal(row_degeneracy(array([[]])), []) def test_column_degeneracy(self): - """column_degeneracy: should work with different cutoff values - """ + """column_degeneracy: should work with different cutoff values""" a = array([[0.1, 0.8, 0.3], [0.3, 0.2, 0.3], [0.6, 0, 0.4]]) - self.assertEqual(column_degeneracy(a, cutoff=0.75), [2, 1, 3]) - self.assertEqual(column_degeneracy(a, cutoff=0.45), [1, 1, 2]) + assert_equal(column_degeneracy(a, cutoff=0.75), [2, 1, 3]) + assert_equal(column_degeneracy(a, cutoff=0.45), [1, 1, 2]) # one-dimensional array self.assertRaises( ValueError, column_degeneracy, array([0.25, 0.25, 0.25, 0.25]) ) # if cutoff value is not found, results are clipped to the # number of rows in the array - self.assertEqual(column_degeneracy(a, cutoff=2), [3, 3, 3]) + assert_equal(column_degeneracy(a, cutoff=2), [3, 3, 3]) # same behavior on empty array - self.assertEqual(column_degeneracy(array([[]])), []) + assert_equal(column_degeneracy(array([[]])), []) if __name__ == "__main__": diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_parse/__init__.py python-cogent-2020.12.21a+dfsg/tests/test_parse/__init__.py --- python-cogent-2020.6.30a0+dfsg/tests/test_parse/__init__.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_parse/__init__.py 2020-12-20 23:35:03.000000000 +0000 @@ -36,7 +36,7 @@ "Marcin Cieslik", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_blast.py python-cogent-2020.12.21a+dfsg/tests/test_parse/test_blast.py --- python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_blast.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_parse/test_blast.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,3 +1,5 @@ +from unittest import TestCase, main + from cogent3.parse.blast import ( FastacmdTaxonomyParser, GenericBlastParser9, @@ -17,18 +19,19 @@ make_label, query_finder, ) -from cogent3.util.unit_test import TestCase, main __author__ = "Micah Hamady" __copyright__ = "Copyright 2007-2016, The Cogent Project" __credits__ = ["Micah Hamady", "Rob Knight"] __license__ = "GPL" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Micah Hamady" __email__ = "hamady@colorado.edu" __status__ = "Production" +from numpy.testing import assert_allclose, assert_equal + class BlastTests(TestCase): """Tests of top-level functions""" @@ -281,30 +284,38 @@ def test_QMEBlast9(self): """QMEBlast9 should return expected lines from all iterations""" - self.assertFloatEqual( - QMEBlast9(self.rec3), - [ - ("ece:Z4181", "ece:Z4181", 3e-47), - ("ece:Z4181", "ecs:ECs3717", 3e-47), - ("ece:Z4181", "spt:SPA2730", 1e-5), - ("ece:Z4181", "ecs:ECs3717", 3e-54), # WARNING: allows duplicates - ("ece:Z4181", "cvi:CV2421", 2e-8), - ("ece:Z4182", "ece:Z4182", 3e-47), - ("ece:Z4182", "cvi:CV2422", 2e-6), - ], + expect = list( + zip( + *[ + ("ece:Z4181", "ece:Z4181", 3e-47), + ("ece:Z4181", "ecs:ECs3717", 3e-47), + ("ece:Z4181", "spt:SPA2730", 1e-5), + ("ece:Z4181", "ecs:ECs3717", 3e-54), # WARNING: allows duplicates + ("ece:Z4181", "cvi:CV2421", 2e-8), + ("ece:Z4182", "ece:Z4182", 3e-47), + ("ece:Z4182", "cvi:CV2422", 2e-6), + ], + ) ) + got = list(zip(*QMEBlast9(self.rec3))) + assert_equal(got[:-1], expect[:-1]) + assert_allclose(got[-1], expect[-1]) def test_QMEPsiBlast9(self): """QMEPsiBlast9 should only return items from last iterations""" - self.assertFloatEqual( - QMEPsiBlast9(self.rec3), - [ - ("ece:Z4181", "ecs:ECs3717", 3e-54), - ("ece:Z4181", "cvi:CV2421", 2e-8), - ("ece:Z4182", "ece:Z4182", 3e-47), - ("ece:Z4182", "cvi:CV2422", 2e-6), - ], + expect = list( + zip( + *[ + ("ece:Z4181", "ecs:ECs3717", 3e-54), + ("ece:Z4181", "cvi:CV2421", 2e-8), + ("ece:Z4182", "ece:Z4182", 3e-47), + ("ece:Z4182", "cvi:CV2422", 2e-6), + ] + ) ) + got = list(zip(*QMEPsiBlast9(self.rec3))) + assert_equal(got[:-1], expect[:-1]) + assert_allclose(got[-1], expect[-1]) def test_fastacmd_taxonomy_splitter(self): """fastacmd_taxonomy_splitter should split records into groups""" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_blast_xml.py python-cogent-2020.12.21a+dfsg/tests/test_parse/test_blast_xml.py --- python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_blast_xml.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_parse/test_blast_xml.py 2020-12-20 23:35:03.000000000 +0000 @@ -8,7 +8,7 @@ __contributors__ = ["Micah Hamady"] __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Kristian Rother" __email__ = "krother@rubor.de" __status__ = "Prototype" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_cigar.py python-cogent-2020.12.21a+dfsg/tests/test_parse/test_cigar.py --- python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_cigar.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_parse/test_cigar.py 2020-12-20 23:35:03.000000000 +0000 @@ -17,7 +17,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Hua Ying", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Hua Ying" __email__ = "hua.ying@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_clustal.py python-cogent-2020.12.21a+dfsg/tests/test_parse/test_clustal.py --- python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_clustal.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_parse/test_clustal.py 2020-12-20 23:35:03.000000000 +0000 @@ -18,9 +18,9 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight", "Sandra Smit"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" # Note: the data are all strings and hence immutable, so it's OK to define diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_dialign.py python-cogent-2020.12.21a+dfsg/tests/test_parse/test_dialign.py --- python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_dialign.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_parse/test_dialign.py 2020-12-20 23:35:03.000000000 +0000 @@ -10,7 +10,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_ebi.py python-cogent-2020.12.21a+dfsg/tests/test_parse/test_ebi.py --- python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_ebi.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_parse/test_ebi.py 2020-12-20 23:35:03.000000000 +0000 @@ -66,7 +66,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Zongzhi Liu", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Zongzhi Liu" __email__ = "zongzhi.liu@gmail.com" __status__ = "Development" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_fasta.py python-cogent-2020.12.21a+dfsg/tests/test_parse/test_fasta.py --- python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_fasta.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_parse/test_fasta.py 2020-12-20 23:35:03.000000000 +0000 @@ -25,9 +25,9 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" base_path = os.path.dirname(os.path.dirname(__file__)) @@ -89,10 +89,10 @@ def test_gt_bracket_in_seq(self): """MinimalFastaParser handles alternate finder function - this test also illustrates how to use the MinimalFastaParser - to handle "sequences" that start with a > symbol, which can - happen when we abuse the MinimalFastaParser to parse - fasta-like sequence quality files. + this test also illustrates how to use the MinimalFastaParser + to handle "sequences" that start with a > symbol, which can + happen when we abuse the MinimalFastaParser to parse + fasta-like sequence quality files. """ oneseq_w_gt = ">abc\n>CAG\n".split("\n") diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_gbseq.py python-cogent-2020.12.21a+dfsg/tests/test_parse/test_gbseq.py --- python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_gbseq.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_parse/test_gbseq.py 2020-12-20 23:35:03.000000000 +0000 @@ -10,7 +10,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Matthew Wakefield"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Matthew Wakefield" __email__ = "wakefield@wehi.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_genbank.py python-cogent-2020.12.21a+dfsg/tests/test_parse/test_genbank.py --- python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_genbank.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_parse/test_genbank.py 2020-12-20 23:35:03.000000000 +0000 @@ -26,9 +26,9 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -471,6 +471,22 @@ self.assertEqual(str(got), expects[locus]) infile.close() + def test_rich_parser_moltype(self): + """correctly handles moltypes""" + with open("data/annotated_seq.gb") as infile: + parser = RichGenbankParser(infile) + got_1 = [s for _, s in parser][0] + + with open("data/annotated_seq.gb") as infile: + parser = RichGenbankParser(infile, moltype="dna") + got_2 = [s for _, s in parser][0] + + self.assertEqual(len(got_1.annotations), len(got_2.annotations)) + self.assertEqual(got_2.moltype.label, "dna") + # name formed from /product value + got = {f.name for f in got_2.get_annotations_matching("mRNA")} + self.assertEqual(got, {"conserved hypothetical protein", "chaperone, putative"}) + class LocationTests(TestCase): """Tests of the Location class.""" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_gff.py python-cogent-2020.12.21a+dfsg/tests/test_parse/test_gff.py --- python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_gff.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_parse/test_gff.py 2020-12-20 23:35:03.000000000 +0000 @@ -14,7 +14,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Matthew Wakefield"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Matthew Wakefield" __email__ = "wakefield@wehi.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_greengenes.py python-cogent-2020.12.21a+dfsg/tests/test_parse/test_greengenes.py --- python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_greengenes.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_parse/test_greengenes.py 2020-12-20 23:35:03.000000000 +0000 @@ -15,7 +15,7 @@ # remember to add yourself if you make changes __credits__ = ["Daniel McDonald"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Daniel McDonald" __email__ = "daniel.mcdonald@colorado.edu" __status__ = "Prototype" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_locuslink.py python-cogent-2020.12.21a+dfsg/tests/test_parse/test_locuslink.py --- python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_locuslink.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_parse/test_locuslink.py 2020-12-20 23:35:03.000000000 +0000 @@ -30,9 +30,9 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_ncbi_taxonomy.py python-cogent-2020.12.21a+dfsg/tests/test_parse/test_ncbi_taxonomy.py --- python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_ncbi_taxonomy.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_parse/test_ncbi_taxonomy.py 2020-12-20 23:35:03.000000000 +0000 @@ -2,6 +2,8 @@ """Tests of parsers for dealing with NCBI Taxonomy files. """ +from unittest import TestCase, main + from cogent3.parse.ncbi_taxonomy import ( MissingParentError, NcbiName, @@ -9,21 +11,18 @@ NcbiNameParser, NcbiTaxon, NcbiTaxonLookup, - NcbiTaxonNode, - NcbiTaxonomy, NcbiTaxonomyFromFiles, NcbiTaxonParser, ) -from cogent3.util.unit_test import TestCase, main __author__ = "Jason Carnes" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Jason Carnes", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" good_nodes = """1\t|\t1\t|\tno rank\t|\t\t|\t8\t|\t0\t|\t1\t|\t0\t|\t0\t|\t0\t|\t0\t|\t0\t|\t\t| @@ -297,7 +296,7 @@ Note: nested_species is explicitly designed to test the case where the nodes file does _not_ contain the root, and where the id of the de facto - root is not 1, to make sure there's nothing special about a node + root is not 1, to make sure there's nothing special about a node called 'root' or with id 1. """ @@ -356,7 +355,7 @@ self.assertEqual(len(dec), 1) assert dec[0] is tx[555] sp = tx["f"].getRankedDescendants("species") - self.assertSameItems(sp, [tx[1010], tx[9999], tx[7777], tx[6666]]) + self.assertCountEqual(sp, [tx[1010], tx[9999], tx[7777], tx[6666]]) empty = tx[11].getRankedDescendants("superclass") self.assertEqual(empty, []) gr = tx[3].getRankedDescendants("group") diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_nexus.py python-cogent-2020.12.21a+dfsg/tests/test_parse/test_nexus.py --- python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_nexus.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_parse/test_nexus.py 2020-12-20 23:35:03.000000000 +0000 @@ -22,7 +22,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Catherine Lozupone", "Rob Knight", "Micah Hamady"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Catherine Lozupone" __email__ = "lozupone@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_pamlmatrix.py python-cogent-2020.12.21a+dfsg/tests/test_parse/test_pamlmatrix.py --- python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_pamlmatrix.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_parse/test_pamlmatrix.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,20 +1,23 @@ #!/usr/bin/env python from io import StringIO +from unittest import TestCase, main from cogent3.evolve.models import DSO78_freqs, DSO78_matrix from cogent3.parse.paml_matrix import PamlMatrixParser -from cogent3.util.unit_test import TestCase, main __author__ = "Matthew Wakefield" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Matthew Wakefield"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Matthew Wakefield" __email__ = "wakefield@wehi.edu.au" __status__ = "Production" +from numpy.testing import assert_equal + + data = """ 27 98 32 @@ -57,10 +60,8 @@ class TestParsePamlMatrix(TestCase): def test_parse(self): matrix, freqs = PamlMatrixParser(StringIO(data)) - self.assertEqual(DSO78_matrix, matrix) - self.assertEqual(DSO78_freqs, freqs) - - pass + assert_equal(DSO78_matrix, matrix) + assert_equal(DSO78_freqs, freqs) if __name__ == "__main__": diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_phylip.py python-cogent-2020.12.21a+dfsg/tests/test_parse/test_phylip.py --- python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_phylip.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_parse/test_phylip.py 2020-12-20 23:35:03.000000000 +0000 @@ -13,7 +13,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Micah Hamady", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Micah Hamady" __email__ = "hamady@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_psl.py python-cogent-2020.12.21a+dfsg/tests/test_parse/test_psl.py --- python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_psl.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_parse/test_psl.py 2020-12-20 23:35:03.000000000 +0000 @@ -12,7 +12,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight", "Peter Maxwell", "Gavin Huttley", "Anuj Pahwa"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Development" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_pwm_parsers.py python-cogent-2020.12.21a+dfsg/tests/test_parse/test_pwm_parsers.py --- python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_pwm_parsers.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_parse/test_pwm_parsers.py 2020-12-20 23:35:03.000000000 +0000 @@ -11,7 +11,7 @@ __copyright__ = "Copyright 2007-2012, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_rdb.py python-cogent-2020.12.21a+dfsg/tests/test_parse/test_rdb.py --- python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_rdb.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_parse/test_rdb.py 2020-12-20 23:35:03.000000000 +0000 @@ -20,7 +20,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Sandra Smit", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Sandra Smit" __email__ = "sandra.smit@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_record_finder.py python-cogent-2020.12.21a+dfsg/tests/test_parse/test_record_finder.py --- python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_record_finder.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_parse/test_record_finder.py 2020-12-20 23:35:03.000000000 +0000 @@ -17,9 +17,9 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight", "Zongzhi Liu"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_record.py python-cogent-2020.12.21a+dfsg/tests/test_parse/test_record.py --- python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_record.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_parse/test_record.py 2020-12-20 23:35:03.000000000 +0000 @@ -30,9 +30,9 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_tinyseq.py python-cogent-2020.12.21a+dfsg/tests/test_parse/test_tinyseq.py --- python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_tinyseq.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_parse/test_tinyseq.py 2020-12-20 23:35:03.000000000 +0000 @@ -11,7 +11,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Matthew Wakefield"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Matthew Wakefield" __email__ = "wakefield@wehi.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_tree.py python-cogent-2020.12.21a+dfsg/tests/test_parse/test_tree.py --- python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_tree.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_parse/test_tree.py 2020-12-20 23:35:03.000000000 +0000 @@ -19,9 +19,9 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight", "Peter Maxwell", "Daniel McDonald"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" sample = """ diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_unigene.py python-cogent-2020.12.21a+dfsg/tests/test_parse/test_unigene.py --- python-cogent-2020.6.30a0+dfsg/tests/test_parse/test_unigene.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_parse/test_unigene.py 2020-12-20 23:35:03.000000000 +0000 @@ -18,9 +18,9 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_phylo.py python-cogent-2020.12.21a+dfsg/tests/test_phylo.py --- python-cogent-2020.6.30a0+dfsg/tests/test_phylo.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_phylo.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,8 +1,11 @@ #! /usr/bin/env python import os +import pathlib import unittest import warnings +from tempfile import TemporaryDirectory + from numpy import exp, log from cogent3 import get_model, load_aligned_seqs, load_tree, make_tree @@ -23,7 +26,7 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2015, The Cogent Project" +__copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = [ "Peter Maxwell", "Gavin Huttley", @@ -32,7 +35,7 @@ "Ben Kaehler", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -432,6 +435,14 @@ maj_tree = self.rooted_trees_lengths[0][1] self.assertTrue(abs(get_ac(ct).length - get_ac(maj_tree).length) < 1e-9) + def test_scored_trees_collection_write(self): + """writes a tree collection""" + sct = ScoredTreeCollection(self.rooted_trees_lengths) + with TemporaryDirectory(".") as dirname: + dirname = pathlib.Path(dirname) + out = dirname / "collection.trees" + sct.write(out) + def test_consensus_from_scored_trees_collection(self): """tree collection should get same consensus as direct approach""" tree_list = [(i * -1, t) for i, t in enumerate(self.trees)] diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_recalculation.py python-cogent-2020.12.21a+dfsg/tests/test_recalculation.py --- python-cogent-2020.6.30a0+dfsg/tests/test_recalculation.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_recalculation.py 2020-12-20 23:35:03.000000000 +0000 @@ -11,7 +11,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_util/__init__.py python-cogent-2020.12.21a+dfsg/tests/test_util/__init__.py --- python-cogent-2020.6.30a0+dfsg/tests/test_util/__init__.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_util/__init__.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,6 +1,5 @@ #!/usr/bin/env python __all__ = [ - "test_unit_test", "test_misc", "test_dictarray", "test_table", @@ -21,7 +20,7 @@ "Greg Caporaso", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_util/test_deserialise.py python-cogent-2020.12.21a+dfsg/tests/test_util/test_deserialise.py --- python-cogent-2020.6.30a0+dfsg/tests/test_util/test_deserialise.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_util/test_deserialise.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,6 +1,8 @@ import json +import os from tempfile import TemporaryDirectory +from unittest import TestCase, main import numpy @@ -16,15 +18,17 @@ from cogent3.app.result import model_collection_result, model_result from cogent3.core import alignment, moltype from cogent3.evolve.models import get_model -from cogent3.util.deserialise import deserialise_object -from cogent3.util.unit_test import TestCase, main +from cogent3.util.deserialise import ( + deserialise_likelihood_function, + deserialise_object, +) __author__ = "Gavin Huttley" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -105,8 +109,8 @@ """Tree to_json enables roundtrip""" tree = make_tree(treestring="(c:01,d:0.3,(a:0.05,b:0.08)xx:0.2)") got = deserialise_object(tree.to_json()) - self.assertFloatEqual(got.get_node_matching_name("a").length, 0.05) - self.assertFloatEqual(got.get_node_matching_name("xx").length, 0.2) + assert_allclose(got.get_node_matching_name("a").length, 0.05) + assert_allclose(got.get_node_matching_name("xx").length, 0.2) def test_roundtrip_submod(self): """substitution model to_json enables roundtrip""" @@ -153,7 +157,7 @@ lnL = lf.get_log_likelihood() data = lf.to_json() got_obj = deserialise_object(data) - self.assertFloatEqual(got_obj.get_log_likelihood(), lnL) + assert_allclose(got_obj.get_log_likelihood(), lnL) def test_roundtrip_discrete_time_likelihood_function(self): """discrete time likelihood function.to_json enables roundtrip""" @@ -171,7 +175,7 @@ lnL = lf.get_log_likelihood() data = lf.to_json() got_obj = deserialise_object(data) - self.assertFloatEqual(got_obj.get_log_likelihood(), lnL) + assert_allclose(got_obj.get_log_likelihood(), lnL) def test_roundtrip_het_lf(self): """correctly round trips a site-het model""" @@ -212,7 +216,7 @@ outfile.write(data) got = deserialise_object(outpath) - self.assertFloatEqual(got.get_log_likelihood(), lnL) + assert_allclose(got.get_log_likelihood(), lnL) def test_roundtrip_model_result(self): """mode_result.to_json enables roundtrip and lazy evaluation""" @@ -250,7 +254,9 @@ def test_roundtrip_model_result2(self): """model_result of split codon correct type after roundtrip""" from cogent3.app import evo as evo_app - from cogent3.evolve.parameter_controller import AlignmentLikelihoodFunction + from cogent3.evolve.parameter_controller import ( + AlignmentLikelihoodFunction, + ) _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", @@ -282,7 +288,9 @@ def test_model_collection_result(self): """round trip of model collection works""" from cogent3.app import evo as evo_app - from cogent3.evolve.parameter_controller import AlignmentLikelihoodFunction + from cogent3.evolve.parameter_controller import ( + AlignmentLikelihoodFunction, + ) _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", @@ -321,7 +329,9 @@ def test_roundtrip_hypothesis_result(self): """nested items retain the correct type after roundtrip""" from cogent3.app import evo as evo_app - from cogent3.evolve.parameter_controller import AlignmentLikelihoodFunction + from cogent3.evolve.parameter_controller import ( + AlignmentLikelihoodFunction, + ) _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", @@ -439,6 +449,40 @@ got = deserialise_object(jdata) self.assertEqual(got, data) + def test_deserialise_likelihood_function(self): + """correctly deserialise data into likelihood function""" + # tests multiple alignments + data = load_aligned_seqs( + filename=os.path.join(os.getcwd(), "data", "brca1_5.paml") + ) + half = len(data) // 2 + aln1 = data[:half] + aln2 = data[half:] + loci_names = ["1st-half", "2nd-half"] + loci = [aln1, aln2] + tree = make_tree(tip_names=data.names) + model = get_model("HKY85") + lf = model.make_likelihood_function(tree, loci=loci_names) + lf.set_alignment(loci) + lf_rich_dict = lf.to_rich_dict() + got = deserialise_likelihood_function(lf_rich_dict) + self.assertEqual(str(lf.defn_for["mprobs"]), str(got.defn_for["mprobs"])) + self.assertEqual( + str(lf.defn_for["alignment"].assignments), + str(got.defn_for["alignment"].assignments), + ) + # tests single alignment + model = get_model("HKY85") + lf = model.make_likelihood_function(tree) + lf.set_alignment(aln1) + lf_rich_dict = lf.to_rich_dict() + got = deserialise_likelihood_function(lf_rich_dict) + self.assertEqual(str(lf.defn_for["mprobs"]), str(got.defn_for["mprobs"])) + self.assertEqual( + str(lf.defn_for["alignment"].assignments), + str(got.defn_for["alignment"].assignments), + ) + if __name__ == "__main__": main() diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_util/test_dictarray.py python-cogent-2020.12.21a+dfsg/tests/test_util/test_dictarray.py --- python-cogent-2020.6.30a0+dfsg/tests/test_util/test_dictarray.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_util/test_dictarray.py 2020-12-20 23:35:03.000000000 +0000 @@ -24,7 +24,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -91,7 +91,8 @@ """convert_1D_dict produces valid template input""" data = dict(a=0, b=35, c=45) vals, keys = convert_1D_dict(data) - b = DictArrayTemplate(keys).wrap(vals) + b = DictArrayTemplate(keys) + b = b.wrap(vals) self.assertEqual(b.array.tolist(), [0, 35, 45]) def test_construct_both_dim_str(self): @@ -116,6 +117,23 @@ self.assertEqual(b[0].keys(), [0, 1, 2]) self.assertEqual(sum(b[0]), 1) + def test_str_labels(self): + """DictArray with str labels or numpy U dtype""" + b = DictArrayTemplate(["Ab", "Bb", "Cb"], ["Db", "Eb", "Fb"]).wrap(self.a) + self.assertEqual(b.keys(), ["Ab", "Bb", "Cb"]) + self.assertEqual(b[0].keys(), ["Db", "Eb", "Fb"]) + self.assertEqual(b["Ab", "Eb"], 0) + self.assertEqual(b["Bb", "Eb"], 1) + + b = DictArrayTemplate( + numpy.array(["Ab", "Bb", "Cb"], dtype="U"), + numpy.array(["Db", "Eb", "Fb"], dtype="U"), + ).wrap(self.a) + self.assertEqual(b.keys(), ["Ab", "Bb", "Cb"]) + self.assertEqual(b[0].keys(), ["Db", "Eb", "Fb"]) + self.assertEqual(b["Ab", "Eb"], 0) + self.assertEqual(b["Bb", "Eb"], 1) + def test_with_mixed_label_types(self): """DictArray constructed with mixed label types.""" b = DictArrayTemplate("ABC", 3).wrap(self.a) @@ -309,6 +327,18 @@ self.assertIsInstance(got, str) self.assertTrue(len(got), 100) + # case where 1D array + a = [4, 6, 4, 2] + darr = DictArrayTemplate(["A", "C", "G", "T"]).wrap(a) + got = darr._repr_html_() + self.assertTrue('class="index"' not in got) + + # case of 3D array + d3 = numpy.arange(8).reshape((2, 2, 2)) + darr = DictArrayTemplate(2, 2, 2).wrap(d3) + got = darr._repr_html_() + self.assertIn("3 dimensional", got) + def test_write(self): """exercising write method""" data = [[3, 7], [2, 8], [5, 5]] @@ -342,6 +372,82 @@ with self.assertRaises(ValueError): darr.to_string(format="md"), + def test_to_table(self): + """creates Table when ndim <= 2""" + from cogent3.util.table import Table + + a1D = DictArrayTemplate(["a", "b"]).wrap([0, 1]) + t = a1D.to_table() + self.assertIsInstance(t, Table) + # 1D tables don't get an index_name column + self.assertEqual(t.index_name, None) + a2D = DictArrayTemplate(["a", "b"], ["c", "d"]).wrap( + numpy.array([0, 1, 2, 3]).reshape((2, 2)) + ) + t = a2D.to_table() + self.assertIsInstance(t, Table) + self.assertEqual(t.shape, (2, 3)) # because index_name column added + # make sure the 2D variant has an index_name column, name is empty string + self.assertEqual(t.index_name, "") + self.assertEqual(t.columns[""].tolist(), a2D.template.names[0]) + # which works + self.assertEqual(t["b", "d"], 3) + + a3D = DictArrayTemplate(["a", "b"], ["c", "d"], ["e", "f"]).wrap( + numpy.array([0, 1, 2, 3, 4, 5, 6, 7]).reshape((2, 2, 2)) + ) + with self.assertRaises(ValueError): + _ = a3D.to_table() + + def test_interpret_index(self): + """correctly handles just explicitly defined indices""" + n = ["ab", "dna", "rna"] + a1D = DictArrayTemplate(n) + got = a1D.interpret_index(["ab", "rna"]) + self.assertEqual(got[0], ([0, 2],)) + got = a1D.interpret_index([0, 2]) + self.assertEqual(got[0], ([0, 2],)) + + def test_slicing_combos(self): + """different mixtures of slicing should work""" + darr = DictArrayTemplate(list(DNA), list(DNA)).wrap( + [ + [0.7, 0.1, 0.2, 0.3], + [0.1, 0.7, 0.1, 0.3], + [0.3, 0.2, 0.6, 0.3], + [0.4, 0.1, 0.1, 0.7], + ] + ) + got = darr["C":"G", "C":"G"] + assert_allclose(got.array, numpy.array([[0.7, 0.1], [0.2, 0.6]])) + + got = darr[[1, 2], [1, 2]] + assert_allclose(got.array, numpy.array([[0.7, 0.1], [0.2, 0.6]])) + + got = darr[[2, 3], "C"] + assert_allclose(got.array, numpy.array([0.2, 0.1])) + got = darr["C", [2, 3]] + assert_allclose(got.array, numpy.array([0.1, 0.3])) + + got = darr[[1, 2], "T":"A"] + assert_allclose(got.array, numpy.array([[0.1, 0.7], [0.3, 0.2]])) + + got = darr["T":"A", [1, 2]] + assert_allclose(got.array, numpy.array([[0.1, 0.2], [0.7, 0.1]])) + + # make sure we cope with keys that are int's + nums = list(range(1, 5)) + darr = DictArrayTemplate(nums, nums).wrap( + [ + [0.7, 0.1, 0.2, 0.3], + [0.1, 0.7, 0.1, 0.3], + [0.3, 0.2, 0.6, 0.3], + [0.4, 0.1, 0.1, 0.7], + ] + ) + got = darr[[1, 2], [1, 2]] + assert_allclose(got.array, numpy.array([[0.7, 0.1], [0.2, 0.6]])) + if __name__ == "__main__": main() diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_util/test_misc.py python-cogent-2020.12.21a+dfsg/tests/test_util/test_misc.py --- python-cogent-2020.6.30a0+dfsg/tests/test_util/test_misc.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_util/test_misc.py 2020-12-20 23:35:03.000000000 +0000 @@ -2,12 +2,16 @@ """Unit tests for utility functions and classes. """ +import os import pathlib import tempfile +import zipfile from copy import copy, deepcopy from os import remove, rmdir from os.path import exists +from tempfile import TemporaryDirectory +from unittest import TestCase, main from numpy.testing import assert_allclose @@ -35,6 +39,7 @@ get_merged_overlapping_coords, get_object_provenance, get_run_start_indices, + get_setting_from_environ, identity, is_char, is_char_or_noniterable, @@ -42,11 +47,11 @@ iterable, list_flatten, not_list_tuple, + open_, path_exists, recursive_flatten, remove_files, ) -from cogent3.util.unit_test import TestCase, main __author__ = "Rob Knight" @@ -60,9 +65,9 @@ "Daniel McDonald", ] __license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" +__version__ = "2020.12.21a" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -108,9 +113,9 @@ l, u = 1e-5, 2 eps = 1e-6 got = adjusted_within_bounds(l - eps, l, u, eps=eps) - self.assertFloatEqual(got, l) + assert_allclose(got, l) got = adjusted_within_bounds(u + eps, l, u, eps=eps) - self.assertFloatEqual(got, u) + assert_allclose(got, u) with self.assertRaises(ValueError): got = adjusted_within_bounds(u + 4, l, u, eps=eps, action="raise") @@ -429,6 +434,19 @@ data = [[20, 21, 0.11], [21, 22, 0.12], [22, 23, 0.13], [23, 24, 0.14]] self.assertEqual(get_merged_by_value_coords(data, digits=1), [[20, 24, 0.1]]) + def test_get_format_suffixes_returns_lower_case(self): + """should always return lower case""" + a, b = get_format_suffixes("suffixes.GZ") + self.assertTrue(a == None and b == "gz") + a, b = get_format_suffixes("suffixes.ABCD") + self.assertTrue(a == "abcd" and b == None) + a, b = get_format_suffixes("suffixes.ABCD.BZ2") + self.assertTrue(a == "abcd" and b == "bz2") + a, b = get_format_suffixes("suffixes.abcd.BZ2") + self.assertTrue(a == "abcd" and b == "bz2") + a, b = get_format_suffixes("suffixes.ABCD.bz2") + self.assertTrue(a == "abcd" and b == "bz2") + def test_get_format_suffixes(self): """correctly return suffixes for compressed etc.. formats""" a, b = get_format_suffixes("no_suffixes") @@ -472,6 +490,15 @@ got, "cogent3.evolve.substitution_model." "TimeReversibleNucleotide" ) + # handle a type + from cogent3 import SequenceCollection + + instance = SequenceCollection(dict(a="ACG", b="GGG")) + instance_prov = get_object_provenance(instance) + self.assertEqual(instance_prov, "cogent3.core.alignment.SequenceCollection") + type_prov = get_object_provenance(SequenceCollection) + self.assertEqual(instance_prov, type_prov) + def test_NestedSplitter(self): """NestedSplitter should make a function which return expected list""" # test delimiters, constructor, filter_ @@ -526,9 +553,100 @@ # or string instance self.assertTrue(path_exists(__file__)) + def test_open_reads_zip(self): + """correctly reads a zip compressed file""" + with TemporaryDirectory(dir=".") as dirname: + text_path = os.path.join(dirname, "foo.txt") + with open(text_path, "w") as f: + f.write("any str") + + zip_path = os.path.join(dirname, "foo.zip") + with zipfile.ZipFile(zip_path, "w") as zip: + zip.write(text_path) + + with open_(zip_path) as got: + self.assertEqual(got.readline(), "any str") + + def test_open_writes_zip(self): + """correctly writes a zip compressed file""" + with TemporaryDirectory(dir=".") as dirname: + zip_path = pathlib.Path(dirname) / "foo.txt.zip" + + with open_(zip_path, "w") as f: + f.write("any str") + + with zipfile.ZipFile(zip_path, "r") as zip: + name = zip.namelist()[0] + got = zip.open(name).read() + self.assertEqual(got, b"any str") + + def test_open_zip_multi(self): + """zip with multiple records cannot be opened using open_""" + with TemporaryDirectory(dir=".") as dirname: + text_path1 = os.path.join(dirname, "foo.txt") + with open(text_path1, "w") as f: + f.write("any str") + + text_path2 = os.path.join(dirname, "bar.txt") + with open(text_path2, "w") as f: + f.write("any str") + + zip_path = os.path.join(dirname, "foo.zip") + with zipfile.ZipFile(zip_path, "w") as zip: + zip.write(text_path1) + zip.write(text_path2) + + with self.assertRaises(ValueError): + open_(zip_path) + + def test_get_setting_from_environ(self): + """correctly recovers environment variables""" + import os + + def make_env_setting(d): + return ",".join([f"{k}={v}" for k, v in d.items()]) + + env_name = "DUMMY_SETTING" + os.environ.pop(env_name, None) + setting = dict(num_pos=2, num_seq=4, name="blah") + single_setting = dict(num_pos=2) + correct_names_types = dict(num_pos=int, num_seq=int, name=str) + incorrect_names_types = dict(num_pos=int, num_seq=int, name=float) + + for stng in (setting, single_setting): + os.environ[env_name] = make_env_setting(stng) + got = get_setting_from_environ(env_name, correct_names_types) + for key in got: + self.assertEqual(got[key], setting[key]) + + os.environ[env_name] = make_env_setting(setting) + got = get_setting_from_environ(env_name, incorrect_names_types) + assert "name" not in got + for key in got: + self.assertEqual(got[key], setting[key]) + + # malformed env setting + os.environ[env_name] = make_env_setting(setting).replace("=", "") + got = get_setting_from_environ(env_name, correct_names_types) + self.assertEqual(got, {}) + + os.environ.pop(env_name, None) -class Atomic_writeTests(TestCase): - """Unit tests for the Atomic_write class.""" + +class AtomicWriteTests(TestCase): + """testing the atomic_write class.""" + + def test_does_not_write_if_exception(self): + """file does not exist if an exception raised before closing""" + # create temp file directory + with tempfile.TemporaryDirectory(".") as dirname: + dirname = pathlib.Path(dirname) + test_filepath = dirname / "Atomic_write_test" + with self.assertRaises(AssertionError): + with atomic_write(test_filepath, mode="w") as f: + f.write("abc") + raise AssertionError + self.assertFalse(test_filepath.exists()) def test_rename(self): """Renames file as expected """ @@ -544,6 +662,51 @@ with atomic_write(test_filepath, mode="w") as f: f.write("abc") + def test_atomic_write_noncontext(self): + """atomic write works as more regular file object""" + with TemporaryDirectory(dir=".") as dirname: + path = pathlib.Path(dirname) / "foo.txt" + zip_path = path.parent / f"{path.name}.zip" + aw = atomic_write(path, in_zip=zip_path, mode="w") + aw.write("some data") + aw.close() + with open_(zip_path) as ifile: + got = ifile.read() + self.assertEqual(got, "some data") + + def test_aw_zip_from_path(self): + """supports inferring zip archive name from path""" + with TemporaryDirectory(dir=".") as dirname: + path = pathlib.Path(dirname) / "foo.txt" + zip_path = path.parent / f"{path.name}.zip" + aw = atomic_write(zip_path, in_zip=True, mode="w") + aw.write("some data") + aw.close() + with open_(zip_path) as ifile: + got = ifile.read() + self.assertEqual(got, "some data") + + path = pathlib.Path(dirname) / "foo2.txt" + zip_path = path.parent / f"{path.name}.zip" + aw = atomic_write(path, in_zip=zip_path, mode="w") + aw.write("some data") + aw.close() + with open_(zip_path) as ifile: + got = ifile.read() + self.assertEqual(got, "some data") + + def test_expanduser(self): + """expands user correctly""" + # create temp file directory + home = os.environ["HOME"] + with tempfile.TemporaryDirectory(dir=home) as dirname: + # create temp filepath + dirname = pathlib.Path(dirname) + test_filepath = dirname / "Atomic_write_test" + test_filepath = str(test_filepath).replace(home, "~") + with atomic_write(test_filepath, mode="w") as f: + f.write("abc") + class _my_dict(dict): """Used for testing subclass behavior of ClassChecker""" @@ -641,8 +804,7 @@ class _list_and_string(list, Delegator): - """Trivial class to demonstrate Delegator. - """ + """Trivial class to demonstrate Delegator.""" def __init__(self, items, string): Delegator.__init__(self, string) diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_util/test_parallel.py python-cogent-2020.12.21a+dfsg/tests/test_util/test_parallel.py --- python-cogent-2020.6.30a0+dfsg/tests/test_util/test_parallel.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_util/test_parallel.py 2020-12-20 23:35:03.000000000 +0000 @@ -13,7 +13,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley", "Sheng Han Moses Koh"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_util/test_recode_alignment.py python-cogent-2020.12.21a+dfsg/tests/test_util/test_recode_alignment.py --- python-cogent-2020.6.30a0+dfsg/tests/test_util/test_recode_alignment.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_util/test_recode_alignment.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,46 +1,35 @@ -#!/usr/bin/env python -# Author: Greg Caporaso (gregcaporaso@gmail.com) -# test_recode_alignment.py +from unittest import TestCase, main -""" Description -File created on 19 Jun 2007. - -""" - -from numpy import array +from numpy.testing import assert_allclose from cogent3 import make_aligned_seqs from cogent3.core.alignment import ArrayAlignment -from cogent3.core.alphabet import Alphabet from cogent3.evolve.models import DSO78_freqs, DSO78_matrix -from cogent3.evolve.substitution_model import Parametric from cogent3.util.recode_alignment import ( alphabets, build_alphabet_map, - recode_alignment, recode_count_matrix, recode_counts_and_freqs, recode_dense_alignment, recode_freq_vector, ) -from cogent3.util.unit_test import TestCase, main __author__ = "Greg Caporaso" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Greg Caporaso"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Greg Caporaso" __email__ = "gregcaporaso@gmail.com" __status__ = "Beta" class RecodeAlignmentTests(TestCase): - """ Tests of functions in recode_alphabet.py + """Tests of functions in recode_alphabet.py - These functions will probably move at some point, and the unit tests - will move with them. + These functions will probably move at some point, and the unit tests + will move with them. """ def setUp(self): @@ -66,8 +55,7 @@ ) def test_build_alphabet_map_w_alphabet_id(self): - """build_alphabet_map: returns correct dict when given alphabet_id - """ + """build_alphabet_map: returns correct dict when given alphabet_id""" expected = dict( [ ("G", "G"), @@ -101,8 +89,7 @@ self.assertEqual(build_alphabet_map("charge_3")["K"], "K") def test_build_alphabet_map_w_alphabet_def(self): - """build_alphabet_map: returns correct dict when given alphabet_def - """ + """build_alphabet_map: returns correct dict when given alphabet_def""" expected = dict( [ ("G", "S"), @@ -162,8 +149,7 @@ raise AssertionError("Failed on def: %s" % str(alphabet_def)) def test_recode_dense_alignment_leaves_original_alignment_intact(self): - """recode_dense_alignment: leaves input alignment intact - """ + """recode_dense_alignment: leaves input alignment intact""" # provided with alphabet_id actual = recode_dense_alignment(self.aln, alphabet_id="charge_2") self.assertNotEqual(actual, self.aln) @@ -172,8 +158,7 @@ self.assertNotEqual(actual, self.aln) def test_recode_dense_alignment(self): - """recode_dense_alignment: recode alignment to charge_2 alpha works - """ + """recode_dense_alignment: recode alignment to charge_2 alpha works""" expected_c2 = ArrayAlignment( data={"1": "AKKAKAK", "2": "AKK-KAK", "3": "AAAAAA-"} ) @@ -211,8 +196,7 @@ self.assertEqual(actual, expected) def test_recode_dense_alignment_to_orig(self): - """recode_dense_alignment: recode aln to orig returns original aln - """ + """recode_dense_alignment: recode aln to orig returns original aln""" # provided with alphabet_id self.assertEqual(recode_dense_alignment(self.aln, alphabet_id="orig"), self.aln) # provided with alphabet_def @@ -220,98 +204,35 @@ recode_dense_alignment(self.aln, alphabet_def=self.orig), self.aln ) - # THE FUNCTION THAT THESE TESTS APPLY TO ONLY EXISTS AS A STUB RIGHT - # NOW -- WILL UNCOMMENT THE TESTS WHEN THE FUNCTIONS IS READY. - # --GREG C. (11/19/08) - # def test_recode_alignment(self): - # """recode_alignment: recode alignment works as expected - # """ - # expected_c2 = make_aligned_seqs(data=\ - # {'1':'AKKAKAK','2':'AKK-KAK','3':'AAAAAA-'}) - # expected_h3 = make_aligned_seqs(data=\ - # {'1':'PRRPRPR','2':'PRR-RPR','3':'PPPPYY-'}) - # expected_aa = make_aligned_seqs(data=\ - # {'1':'AAAAAAA','2':'AAA-AAA','3':'AAAAAA-'}) - # - # # provided with alphabet_id - # actual = recode_alignment(self.aln2, alphabet_id='charge_2') - # self.assertEqual(actual,expected_c2) - # # provided with alphabet_def - # actual = recode_alignment(self.aln2, alphabet_def=self.charge_2) - # self.assertEqual(actual,expected_c2) - # - # # different alphabet - # actual = recode_alignment(self.aln2, alphabet_id='hydropathy_3') - # self.assertEqual(actual,expected_h3) - # actual = recode_alignment(self.aln2,\ - # alphabet_def=self.hydropathy_3) - # self.assertEqual(actual,expected_h3) - # - # # different alphabet - # actual = recode_alignment(self.aln2, alphabet_def=self.all_to_a) - # self.assertEqual(actual,expected_aa) - # - # # original charactars which aren't remapped are let in original state - # actual = recode_alignment(self.aln2, alphabet_def=[('a','b')]) - # self.assertEqual(actual,self.aln2) - # - # # non-alphabetic character mapped same as alphabetic characters - # actual = recode_alignment(self.aln2, alphabet_def=[('.','-')]) - # expected = make_aligned_seqs(\ - # data={'1':'CDDFBXZ', '2':'CDD.BXZ', '3':'AAAASS.'}) - # self.assertEqual(actual,expected) - # - # def test_recode_alignment_to_orig(self): - # """recode_alignment: recode aln to orig returns original aln - # """ - # # provided with alphabet_id - # self.assertEqual(recode_alignment(\ - # self.aln2, alphabet_id='orig'), self.aln2) - # # provided with alphabet_def - # self.assertEqual(recode_alignment(\ - # self.aln2, alphabet_def=self.orig), self.aln2) - # - # def test_recode_alignment_leaves_original_alignment_intact(self): - # """recode_alignment: leaves input alignment intact - # """ - # # provided with alphabet_id - # actual = recode_alignment(self.aln2, alphabet_id='charge_2') - # self.assertNotEqual(actual,self.aln2) - # # provided with alphabet_def - # actual = recode_alignment(self.aln2, alphabet_def=self.charge_2) - # self.assertNotEqual(actual,self.aln2) - def test_recode_freq_vector(self): - """recode_freq_vector: bg freqs updated to reflect recoded alphabet - """ + """recode_freq_vector: bg freqs updated to reflect recoded alphabet""" freqs = {"A": 0.21, "E": 0.29, "C": 0.05, "D": 0.45} a_def = [("A", "AEC"), ("E", "D")] expected = {"A": 0.55, "E": 0.45} - self.assertFloatEqual(recode_freq_vector(a_def, freqs), expected) + self.assertEqual(recode_freq_vector(a_def, freqs), expected) # reversal of alphabet freqs = {"A": 0.21, "E": 0.29, "C": 0.05, "D": 0.45} a_def = [("A", "D"), ("E", "C"), ("C", "E"), ("D", "A")] expected = {"A": 0.45, "E": 0.05, "C": 0.29, "D": 0.21} - self.assertFloatEqual(recode_freq_vector(a_def, freqs), expected) + self.assertEqual(recode_freq_vector(a_def, freqs), expected) # no change in freqs (old alphabet = new alphabet) freqs = {"A": 0.21, "E": 0.29, "C": 0.05, "D": 0.45} a_def = [("A", "A"), ("E", "E"), ("C", "C"), ("D", "D")] - self.assertFloatEqual(recode_freq_vector(a_def, freqs), freqs) + self.assertEqual(recode_freq_vector(a_def, freqs), freqs) freqs = {"A": 0.21, "E": 0.29, "C": 0.05, "D": 0.45} a_def = [("X", "AEC"), ("Y", "D")] expected = {"X": 0.55, "Y": 0.45} - self.assertFloatEqual(recode_freq_vector(a_def, freqs), expected) + self.assertEqual(recode_freq_vector(a_def, freqs), expected) def test_recode_freq_vector_ignores(self): - """recode_freq_vector: ignored chars are ignored - """ + """recode_freq_vector: ignored chars are ignored""" freqs = {"A": 0.21, "B": 0.29, "C": 0.05, "D": 0.45, "X": 0.22, "Z": 0.5} a_def = [("A", "A"), ("B", "B"), ("C", "C"), ("D", "D"), ("X", "X"), ("Z", "Z")] expected = {"A": 0.21, "C": 0.05, "D": 0.45} - self.assertFloatEqual(recode_freq_vector(a_def, freqs), expected) + self.assertEqual(recode_freq_vector(a_def, freqs), expected) freqs = { "D": 0.21, @@ -324,7 +245,7 @@ } a_def = [("D", "DEN"), ("Q", "Q")] expected = {"D": 0.55, "Q": 0.45} - self.assertFloatEqual(recode_freq_vector(a_def, freqs), expected) + self.assertEqual(recode_freq_vector(a_def, freqs), expected) class RecodeMatrixTests(TestCase): @@ -371,8 +292,7 @@ self.alphabet2_w_ambig = [("D", "DEX"), ("L", "LIB"), ("C", "CZ")] def test_recode_counts_and_freqs(self): - """recode_counts_and_freqs: functions as expected - """ + """recode_counts_and_freqs: functions as expected""" alphabet = alphabets["charge_his_3"] aa_order = "ACDEFGHIKLMNPQRSTVWY" actual = recode_counts_and_freqs(alphabet) @@ -382,40 +302,37 @@ expected_freqs = {}.fromkeys(aa_order, 0.0) expected_freqs.update(recode_freq_vector(alphabet, DSO78_freqs)) expected = (expected_matrix, expected_freqs) - self.assertEqual(actual, expected) + assert_allclose(actual[0], expected[0]) + self.assertEqual(actual[1], expected[1]) def test_recode_count_matrix_2_states(self): - """recode_count_matrix: returns correct result with 2-state alphabet - """ + """recode_count_matrix: returns correct result with 2-state alphabet""" actual = recode_count_matrix(self.alphabet1, self.m1, self.aa_order1) expected = self.recoded_m1 - self.assertEqual(actual, expected) + assert_allclose(actual, expected) def test_recode_count_matrix_3_states(self): - """recode_count_matrix: returns correct result with 3-state alphabet - """ + """recode_count_matrix: returns correct result with 3-state alphabet""" actual = recode_count_matrix(self.alphabet2, self.m2, self.aa_order2) expected = self.recoded_m2 - self.assertEqual(actual, expected) + assert_allclose(actual, expected) def test_recode_count_matrix_3_states_ambig_ignored(self): - """recode_count_matrix: correct result w 3-state alphabet w ambig chars - """ + """recode_count_matrix: correct result w 3-state alphabet w ambig chars""" actual = recode_count_matrix(self.alphabet2_w_ambig, self.m2, self.aa_order2) expected = self.recoded_m2 - self.assertEqual(actual, expected) + assert_allclose(actual, expected) def test_recode_count_matrix_no_change(self): - """recode_count_matrix: no changes applied when they shouldn't be - """ + """recode_count_matrix: no changes applied when they shouldn't be""" # recoding recoded matrices actual = recode_count_matrix(self.alphabet1, self.recoded_m1, self.aa_order1) expected = self.recoded_m1 - self.assertEqual(actual, expected) + assert_allclose(actual, expected) actual = recode_count_matrix(self.alphabet2, self.recoded_m2, self.aa_order2) expected = self.recoded_m2 - self.assertEqual(actual, expected) + assert_allclose(actual, expected) if __name__ == "__main__": diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_util/test_table.py python-cogent-2020.12.21a+dfsg/tests/test_util/test_table.py --- python-cogent-2020.6.30a0+dfsg/tests/test_util/test_table.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_util/test_table.py 2020-12-20 23:35:03.000000000 +0000 @@ -2,6 +2,8 @@ """Unit tests for table. """ +import contextlib +import json import os import pathlib import pickle @@ -16,13 +18,18 @@ from numpy.testing import assert_equal from cogent3 import load_table, make_table +from cogent3.format.table import ( + formatted_array, + get_continuation_tables_headers, + is_html_markup, +) from cogent3.parse.table import FilteringParser +from cogent3.util.misc import get_object_provenance, open_ from cogent3.util.table import ( Table, cast_str_to_array, cast_str_to_numeric, cast_to_array, - formatted_array, ) @@ -37,7 +44,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley", "Thomas La", "Christopher Bradley"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -45,7 +52,7 @@ class TrapOutput: def __call__(self, data, *args, **kwargs): - self.data, _ = data._get_repr_() + self.data, _, _ = data._get_repr_() self.output = repr(data) @@ -135,7 +142,7 @@ def test_index_name(self): """correctly assigns""" - t = Table(header=self.t3_header, data=self.t3_rows, index="foo") + t = Table(header=self.t3_header, data=self.t3_rows, index_name="foo") self.assertEqual(t.index_name, "foo") # fails if not an existing column with self.assertRaises(ValueError): @@ -144,23 +151,36 @@ data = t.columns.to_dict() # correctly handled when provided on construction with self.assertRaises(ValueError): - t = Table(data=data, index="missing") + t = Table(data=data, index_name="missing") - t = Table(data=data, index="foo") + t = Table(data=data, index_name="foo") self.assertEqual(t.index_name, "foo") + # correctly reset when assigned None + t.index_name = None + self.assertEqual(t.index_name, None) + self.assertEqual(t.columns.index_name, None) + self.assertEqual(t._template, None) + # ... prior to providing columns - t = Table(index="foo") + t = Table(index_name="foo") for c, v in data.items(): t.columns[c] = v self.assertEqual(t.index_name, "foo") - t = Table(index="missing") + t = Table(index_name="missing") for c, v in data.items(): t.columns[c] = v with self.assertRaises(ValueError): t.index_name + def test_table_data_int_keys(self): + """correctly construct table from dict with int's as keys""" + head = ["", 0, 1] + data = {0: [2, 2], 1: [2, 2], "": [0, 1]} + t = Table(head, data=data) + assert_equal(t.array.tolist(), [[0, 2, 2], [1, 2, 2]]) + def test_table_with_empty_string_index(self): """handle an index of empty string""" d = { @@ -168,7 +188,7 @@ "Chimpanzee": [0.0, 0.19, 0.005], "Galago": [0.19, 0.0, 0.19], } - table = make_table(data=d, index="") + table = make_table(data=d, index_name="") val = table["Galago", "Chimpanzee"] self.assertEqual(val, 0.19) @@ -201,12 +221,13 @@ def test_indexing_rows(self): """works using names or ints""" - t = Table(header=self.t7_header, data=self.t7_rows, index="gene") - self.assertEqual(t["ENSG00000019485", "chrom"], "A") + t = Table(header=self.t7_header, data=self.t7_rows, index_name="gene") + got = t["ENSG00000019485", "chrom"] + self.assertEqual(got, "A") def test_immutability_cells(self): """table cells are immutable""" - t = Table(header=self.t7_header, data=self.t7_rows, index="gene") + t = Table(header=self.t7_header, data=self.t7_rows, index_name="gene") with self.assertRaises(TypeError): t["ENSG00000019485", "chrom"] = "D" @@ -231,6 +252,46 @@ assert_equal(n.header, numpy.array(t.header)[columns]) self.assertEqual(n.shape, (2, 2)) + # column formatting copied on slice + t = Table(header=self.t5_header, data=self.t5_rows) + t.format_column("c", "%.2e") + n = t[:, 1:] + self.assertEqual(n._column_templates, t._column_templates) + + def test_slicing_using_numpy_indexing(self): + """support numpy advanced indexing""" + t = Table(header=self.t5_header, data=self.t5_rows) + indices = t.columns["b"] != 0 + got = t[indices] + expect = t.array[[0, 2], :] + assert_equal(got.array, expect) + got = t[indices, [True, False, True, True]] + expect = expect[:, [0, 2, 3]] + assert_equal(got.array, expect) + + # using numpy arrays for rows and columns + got_np = t[indices, numpy.array([True, False, True, True])] + assert_equal(got_np.array, got.array) + + def test_slicing_with_index(self): + """different slice types work when index_name defined""" + # slicing by int works with index_name too + t = Table(header=self.t8_header, data=self.t8_rows, index_name="edge.name") + got = t[[1]] + self.assertEqual(got.columns["edge.name"], "NineBande") + self.assertEqual(got.shape, (1, t.shape[1])) + for v, dtype in [(1, None), (1, object), ("NineBande", "U")]: + got = t[numpy.array([v], dtype=dtype)] + self.assertEqual(got.columns["edge.name"], "NineBande") + self.assertEqual(got.shape, (1, t.shape[1])) + + # works if, for some reason, the index_name column has floats + t = Table(header=self.t7_header, data=self.t7_rows, index_name="stat") + got = t[[1827.5580]] + self.assertEqual(got.shape, (1, t.shape[1])) + got = t[numpy.array([1827.5580])] + self.assertEqual(got.shape, (1, t.shape[1])) + def test_specifying_space(self): """controls spacing in simple format""" space = " " @@ -273,9 +334,9 @@ index = "A/C" h = ["A/C", "A/G", "A/T", "C/A"] rows = [[0.0425, 0.1424, 0.0226, 0.0391]] - t = Table(header=h, data=rows, max_width=30, index=index) + t = Table(header=h, data=rows, max_width=30, index_name=index) wrapped = str(t) - # index column occurs twice for these conditions + # index_name column occurs twice for these conditions for c in h: expect = 2 if c == index else 1 self.assertEqual(wrapped.count(c), expect) @@ -284,12 +345,30 @@ """correctly wraps table to <= maximum width""" # multi-row table d2D = { - "edge.parent": {"NineBande": "root", "Human": "edge.0",}, - "x": {"NineBande": 1.0, "Human": 1.0,}, - "length": {"NineBande": 4.0, "Human": 4.0,}, - "y": {"NineBande": 3.0, "Human": 3.0,}, - "z": {"NineBande": 6.0, "Human": 6.0,}, - "edge.name": {"Human": "Human", "NineBande": "NineBande",}, + "edge.parent": { + "NineBande": "root", + "Human": "edge.0", + }, + "x": { + "NineBande": 1.0, + "Human": 1.0, + }, + "length": { + "NineBande": 4.0, + "Human": 4.0, + }, + "y": { + "NineBande": 3.0, + "Human": 3.0, + }, + "z": { + "NineBande": 6.0, + "Human": 6.0, + }, + "edge.name": { + "Human": "Human", + "NineBande": "NineBande", + }, } row_order = list(d2D["edge.name"]) t = Table( @@ -298,7 +377,7 @@ row_order=row_order, space=8, max_width=50, - index="edge.name", + index_name="edge.name", title="My title", legend="legend: this is a nonsense example.", ) @@ -312,17 +391,32 @@ rows = [[0.0425, 0.1424, 0.0226, 0.0391]] t = Table(header=h, data=rows, max_width=30) wrapped = str(t) - # index column occurs twice for these conditions + # index_name column occurs twice for these conditions for c in h: self.assertEqual(wrapped.count(c), 1) # multi-row table data = { - "edge.parent": {"NineBande": "root", "edge.1": "root",}, - "x": {"NineBande": 1.0, "edge.1": 1.0,}, - "length": {"NineBande": 4.0, "edge.1": 4.0,}, - "y": {"NineBande": 3.0, "edge.1": 3.0,}, - "z": {"NineBande": 6.0, "edge.1": 6.0,}, + "edge.parent": { + "NineBande": "root", + "edge.1": "root", + }, + "x": { + "NineBande": 1.0, + "edge.1": 1.0, + }, + "length": { + "NineBande": 4.0, + "edge.1": 4.0, + }, + "y": { + "NineBande": 3.0, + "edge.1": 3.0, + }, + "z": { + "NineBande": 6.0, + "edge.1": 6.0, + }, } t = Table(data=data, max_width=30) wrapped = str(t) @@ -333,48 +427,48 @@ """correctly format array data""" f = (2.53, 12.426, 9.9, 7.382e-08) # with default format_spec - g, l = formatted_array(numpy.array(f), "LR", precision=2) + g, l, w = formatted_array(numpy.array(f), "LR", precision=2) self.assertTrue(l.endswith("LR")) for e in g: v = e.split(".") self.assertEqual(len(v[-1]), 2, v) # handles bool - g, l = formatted_array(numpy.array([True, False, True]), "LR", precision=2) + g, l, w = formatted_array(numpy.array([True, False, True]), "LR", precision=2) self.assertEqual(g[0].strip(), "True") # title is always right aligned - _, l = formatted_array(numpy.array(f), "LR", format_spec=">.1f") - self.assertTrue(l.endswith("LR")) - _, l = formatted_array(numpy.array(f), "LR", format_spec="<.1f") + _, l, _ = formatted_array(numpy.array(f), "LR", format_spec=">.1f") self.assertTrue(l.endswith("LR")) + _, l, _ = formatted_array(numpy.array(f), "LR", format_spec="<.1f") + self.assertTrue(l.startswith("LR")) # using format_spec with right alignment character - g, l = formatted_array(numpy.array(f), " blah", format_spec=">.1f") + g, l, w = formatted_array(numpy.array(f), " blah", format_spec=">.1f") for e in g: # padded with spaces self.assertTrue(e.startswith(" "), e) self.assertFalse(e.endswith(" "), e) # using format_spec with left alignment character - g, l = formatted_array(numpy.array(f), " blah", format_spec="<.1f") + g, l, w = formatted_array(numpy.array(f), " blah", format_spec="<.1f") for e in g: # padded with spaces self.assertTrue(e.endswith(" "), e) self.assertFalse(e.startswith(" "), e) # using format_spec with center alignment character - g, l = formatted_array(numpy.array(f), " blah", format_spec="^.1f") + g, l, w = formatted_array(numpy.array(f), " blah", format_spec="^.1f") for e in g: # padded with spaces self.assertTrue(e.endswith(" "), e) self.assertTrue(e.startswith(" "), e) - g, _ = formatted_array(numpy.array(f), "blah", format_spec=".4f") + g, _, _ = formatted_array(numpy.array(f), "blah", format_spec=".4f") for e in g: v = e.split(".") self.assertEqual(len(v[-1]), 4, v) # cope with C-style format strings - g, _ = formatted_array(numpy.array(f), "blah", format_spec="%.4f") + g, _, _ = formatted_array(numpy.array(f), "blah", format_spec="%.4f") for e in g: v = e.split(".") self.assertEqual(len(v[-1]), 4, v) @@ -388,11 +482,86 @@ return val o = [3, "abc", 3.456789] - g, _ = formatted_array(numpy.array(o, dtype="O"), "blah", format_spec=formatcol) + g, _, _ = formatted_array( + numpy.array(o, dtype="O"), "blah", format_spec=formatcol + ) self.assertEqual(g[0], " 3", g[0]) self.assertEqual(g[1], " abc", g[1]) self.assertEqual(g[2], "3.46", g) + # don't pad + g, l, w = formatted_array(numpy.array(f), " blah", format_spec="<.1f") + g, l, w = formatted_array( + numpy.array(f), " blah", format_spec="<.1f", pad=False + ) + self.assertEqual(l, "blah") + for v in g: + self.assertTrue(" " not in v) + + # use the align argument, 'c' + g, l, w = formatted_array( + numpy.array(f), " blah ", precision=1, pad=True, align="c" + ) + for v in g: + self.assertTrue(v.startswith(" ") and v.endswith(" ")) + + # use the align argument, 'l' + g, l, w = formatted_array( + numpy.array(f), " blah ", precision=1, pad=True, align="l" + ) + for v in g: + self.assertTrue(not v.startswith(" ") and v.endswith(" ")) + + # use the align argument, 'r' + col_title = " blah " + g, l, w = formatted_array( + numpy.array(f), col_title, precision=1, pad=True, align="r" + ) + for v in g: + self.assertTrue(v.startswith(" ") and not v.endswith(" ")) + + self.assertEqual(w, len(col_title)) + + # raises error if align invalid value + with self.assertRaises(ValueError): + formatted_array( + numpy.array(f), " blah ", precision=1, pad=True, align="blah" + ) + + def test_get_continuation_tables_headers(self): + """correctly identify the columns for subtables""" + cols_widths = [("", 10), ("b", 5), ("c", 3), ("d", 14), ("e", 15)] + got = get_continuation_tables_headers(cols_widths) + # no subtables, returns list of lists + expect = [[c for c, _ in cols_widths]] + self.assertEqual(got, expect) + # fails if any column has a width < max_width + with self.assertRaises(ValueError): + get_continuation_tables_headers(cols_widths, max_width=5) + + # or if the sum of the index_name width and column is > max_width + with self.assertRaises(ValueError): + get_continuation_tables_headers(cols_widths, index_name="", max_width=24) + + got = get_continuation_tables_headers(cols_widths, max_width=25) + expect = [["", "b", "c"], ["d"], ["e"]] + self.assertEqual(got, expect) + + # with an index_name column + got = get_continuation_tables_headers(cols_widths, index_name="", max_width=27) + expect = [["", "b", "c"], ["", "d"], ["", "e"]] + self.assertEqual(got, expect) + + cols_widths = [("a", 10), ("b", 5), ("c", 3), ("d", 14), ("e", 15)] + got = get_continuation_tables_headers(cols_widths, index_name="a", max_width=27) + expect = [["a", "b", "c"], ["a", "d"], ["a", "e"]] + self.assertEqual(got, expect) + + # space has an affect + got = get_continuation_tables_headers(cols_widths, max_width=25, space=4) + expect = [["a", "b"], ["c", "d"], ["e"]] + self.assertEqual(got, expect) + def test_cast_to_array(self): """correctly cast to numpy array""" b = (True, False, True) @@ -434,10 +603,30 @@ "DogFaced": "root", "Human": "edge.0", }, - "x": {"NineBande": 1.0, "edge.1": 1.0, "DogFaced": 1.0, "Human": 1.0,}, - "length": {"NineBande": 4.0, "edge.1": 4.0, "DogFaced": 4.0, "Human": 4.0,}, - "y": {"NineBande": 3.0, "edge.1": 3.0, "DogFaced": 3.0, "Human": 3.0,}, - "z": {"NineBande": 6.0, "edge.1": 6.0, "DogFaced": 6.0, "Human": 6.0,}, + "x": { + "NineBande": 1.0, + "edge.1": 1.0, + "DogFaced": 1.0, + "Human": 1.0, + }, + "length": { + "NineBande": 4.0, + "edge.1": 4.0, + "DogFaced": 4.0, + "Human": 4.0, + }, + "y": { + "NineBande": 3.0, + "edge.1": 3.0, + "DogFaced": 3.0, + "Human": 3.0, + }, + "z": { + "NineBande": 6.0, + "edge.1": 6.0, + "DogFaced": 6.0, + "Human": 6.0, + }, "edge.names": { "NineBande": "NineBande", "edge.1": "edge.1", @@ -447,16 +636,26 @@ } t = make_table(data=data) self.assertEqual(t.shape, (4, 6)) - # if index column not specified + # if index_name column not specified with self.assertRaises(IndexError): _ = t["Human", "edge.parent"] - # use an index - t = make_table(data=data, index="edge.names") - # index col is the first one, and the data can be indexed + # use an index_name + t = make_table(data=data, index_name="edge.names") + # index_name col is the first one, and the data can be indexed self.assertEqual(t.columns.order[0], "edge.names") self.assertEqual(t["Human", "edge.parent"], "edge.0") + # providing path raises TypeError + with self.assertRaises(TypeError): + make_table("some_path.tsv") + + with self.assertRaises(TypeError): + make_table(header="some_path.tsv") + + with self.assertRaises(TypeError): + make_table(data="some_path.tsv") + def test_modify_title_legend(self): """reflected in persistent attrs""" rows = ( @@ -511,6 +710,18 @@ append_2 = t2.appended("foo2", [t3, t4]) self.assertEqual(append_2.shape[0], t2.shape[0] + t3.shape[0] + t4.shape[0]) + append_3 = t2.appended("", [t3, t4]) + self.assertEqual(append_3.shape[0], t2.shape[0] + t3.shape[0] + t4.shape[0]) + self.assertEqual(append_3.shape[1], t2.shape[1] + 1) + + def test_appended_mixed_dtypes(self): + """handles table columns with different dtypes""" + t1 = Table(header=["a", "b"], data=dict(a=[1], b=["s"])) + t2 = Table(header=["a", "b"], data=dict(a=[1.2], b=[4])) + appended = t1.appended(None, t2) + self.assertTrue("float" in appended.columns["a"].dtype.name) + self.assertTrue("object" in appended.columns["b"].dtype.name) + def test_count(self): """test the table count method""" t1 = Table(header=self.t1_header, data=self.t1_rows) @@ -527,6 +738,12 @@ self.assertEqual(t2.count("bar % 2 == 0"), 2) self.assertEqual(t2.count("id == 0"), 0) + def test_count_empty(self): + """empty table count method returns 0""" + t1 = Table(header=self.t1_header) + self.assertEqual(t1.count('chrom == "X"'), 0) + self.assertEqual(t1.count(lambda x: x == "X", columns="chrom"), 0) + def test_count_unique(self): """correctly computes unique values""" data = { @@ -577,6 +794,15 @@ self.assertEqual(t2.filtered("bar % 2 == 0").shape[0], 2) self.assertEqual(t2.filtered("id == 0").shape[0], 0) + def test_filtered_empty(self): + """test the table filtered method""" + t1 = Table(header=self.t1_header) + self.assertEqual(t1.shape[0], 0) + got = t1.filtered('chrom == "X"') + self.assertEqual(got.shape[0], 0) + got = t1.filtered(lambda x: x == "X", columns="chrom") + self.assertEqual(got.shape[0], 0) + def test_filtered_by_column(self): """test the table filtered_by_column method""" t1 = Table(header=self.t1_header, data=self.t1_rows) @@ -600,10 +826,13 @@ self.assertEqual(t1.get_columns(["chrom", "length"]).shape[0], t1.shape[0]) self.assertEqual(t1.get_columns(["chrom", "length"]).shape[1], 2) - # if name_index, includes that in return - t1 = Table(header=self.t1_header, data=self.t1_rows, index="stableid") + # if index_name, includes that in return + t1 = Table(header=self.t1_header, data=self.t1_rows, index_name="stableid") r = t1.get_columns(["length"]) self.assertEqual(r.header, ("stableid", "length")) + # if index_name, unless excluded + r = t1.get_columns(["length"], with_index=False) + self.assertIs(r.index_name, None) def test_joined(self): """test the table joined method""" @@ -715,6 +944,10 @@ self.assertEqual(table[0, "stableid"], "ENSG00000019102") self.assertEqual(table[last_index, "stableid"], "ENSG00000019144") + # providing reversed argument name raises TypeError + with self.assertRaises(TypeError): + table.sorted(reversed="chrom") + def test_summed(self): """test the table summed method""" t5 = Table(header=self.t5_header, data=self.t5_rows) @@ -774,6 +1007,24 @@ self.assertEqual(got.header, ("", "11", "22", "33", "44", "55")) r = str(got) # this should not fail! + def test_transposed_forgets_index(self): + """transposed table defaults to no row index_name""" + data = { + "": [0, 1, 2, 3, 4, 5, 6], + "T": [2, 10, 1, 6, 1, 5, 0], + "C": [0, 0, 0, 0, 0, 0, 1], + "A": [8, 0, 9, 4, 9, 4, 4], + "G": [0, 0, 0, 0, 0, 1, 5], + } + t = Table(header=["", "T", "C", "A", "G"], data=data, index_name="") + tr = t.transposed("Base", select_as_header="") + self.assertEqual(tr.index_name, None) + + # but you can set a new one + tr = t.transposed("Base", select_as_header="", index_name="Base") + self.assertEqual(tr.index_name, "Base") + self.assertEqual(tr["G", "5"], 1) + def test_del_column(self): """correctly removes the column""" t = Table(header=self.t5_header, data=self.t5_rows) @@ -800,7 +1051,7 @@ t5_row_sum = t5.with_new_column("sum", sum, t5.header) self.assertEqual(t5_row_sum.get_columns("sum").tolist(), [4, 4, 8]) # now using a string expression - t8 = Table(header=self.t8_header, data=self.t8_rows, index="edge.name") + t8 = Table(header=self.t8_header, data=self.t8_rows, index_name="edge.name") n = t8.with_new_column("YZ", callback="y+z") assert_equal(n.columns["YZ"], [9.0, 9.0]) # if the new column alreayb exists, the new table has the newest column @@ -858,6 +1109,17 @@ table = make_table(header=["a"]) self.assertEqual(str(table), "=\na\n-\n-") + def test_str_object_col(self): + """str works when a column has complex object""" + # data has tuples in an array + data = dict( + key=numpy.array([("a", "c"), ("b", "c"), ("a", "d")], dtype="O"), + count=[1, 3, 2], + ) + t = Table(data=data) + got = str(t) + self.assertEqual(len(got.splitlines()), 7) + def test_str_md_format(self): """str() produces markdown table""" md_table = make_table( @@ -927,6 +1189,57 @@ self.assertEqual(tex[2], r"\caption{a title.}") self.assertEqual(tex[3], r"\label{table}") + def test_to_html(self): + """generates html table within c3table div""" + # with no index_name, or title, or legend + import re + + t = Table(header=self.t8_header, data=self.t8_rows) + got = t.to_html() + # make sure tags are matched + for tag in ("div", "style", "table", "thead"): + self.assertEqual(len(re.findall(f"<[/]*{tag}.*>", got)), 2) + + self.assertEqual(len(re.findall(f"<[/]*tr>", got)), 4) + # 2 columns should be left aligned, 4 right aligned + # adding 1 for the CSS style definition + self.assertEqual(got.count("c3col_left"), 4 + 1) + self.assertEqual(got.count("c3col_right"), 8 + 1) + self.assertEqual(got.count("cell_title"), 1) # CSS defn only + num_spans = got.count("span") + num_caption = got.count("caption") + + t = Table(header=self.t8_header, data=self.t8_rows, title="a title") + got = t.to_html() + self.assertEqual(got.count("cell_title"), 2) + # number of spans increases by 2 to enclose the title + self.assertEqual(got.count("span"), num_spans + 2) + self.assertEqual(got.count("caption"), num_caption + 2) + # no
element + self.assertNotIn("
", got) + + t = Table(header=self.t8_header, data=self.t8_rows, legend="a legend") + got = t.to_html() + self.assertEqual(got.count("cell_title"), 1) + # cell_legend not actually defined in CSS yet + self.assertEqual(got.count("cell_legend"), 1) + # number of spans increases by 2 to enclose the title + self.assertEqual(got.count("span"), num_spans + 2) + self.assertEqual(got.count("caption"), num_caption + 2) + # no
element + self.assertNotIn("
", got) + + t = Table( + header=self.t8_header, data=self.t8_rows, title="a title", legend="a legend" + ) + got = t.to_html() + self.assertEqual(got.count("cell_title"), 2) + # cell_legend not actually defined in CSS yet + self.assertEqual(got.count("cell_legend"), 1) + self.assertEqual(got.count("caption"), num_caption + 2) + # has
element + self.assertIn("
", got) + def test_invalid_format(self): """should raise value error""" t = make_table(self.t2_header, data=self.t2_rows) @@ -942,7 +1255,7 @@ ["e", 0.44084000179091454, 0.44083999937417828, 0.44084000179090932, ""], ] header = ["seq1/2", "a", "c", "b", "e"] - dist = Table(header=header, data=rows, index="seq1/2") + dist = Table(header=header, data=rows, index_name="seq1/2") r = dist.to_string(format="phylip") r = r.splitlines() self.assertEqual(r[0].strip(), "4") @@ -957,15 +1270,37 @@ def test_pickle_unpickle(self): """roundtrip via pickling""" data = { - "edge.parent": {"NineBande": "root", "edge.1": "root",}, - "x": {"NineBande": 1.0, "edge.1": 1.0,}, - "length": {"NineBande": 4.0, "edge.1": 4.0,}, - "y": {"NineBande": 3.0, "edge.1": 3.0,}, - "z": {"NineBande": 6.0, "edge.1": 6.0,}, - "edge.name": {"NineBande": "NineBande", "edge.1": "edge.1",}, + "edge.parent": { + "NineBande": "root", + "edge.1": "root", + }, + "x": { + "NineBande": 1.0, + "edge.1": 1.0, + }, + "length": { + "NineBande": 4.0, + "edge.1": 4.0, + }, + "y": { + "NineBande": 3.0, + "edge.1": 3.0, + }, + "z": { + "NineBande": 6.0, + "edge.1": 6.0, + }, + "edge.name": { + "NineBande": "NineBande", + "edge.1": "edge.1", + }, } t = Table( - data=data, max_width=50, index="edge.name", title="My title", legend="blah", + data=data, + max_width=50, + index_name="edge.name", + title="My title", + legend="blah", ) # via string s = pickle.dumps(t) @@ -1018,6 +1353,57 @@ with self.assertRaises(ValueError): r = load_table(path, skip_inconsistent=False) + def test_write_to_json(self): + """tests writing to json file""" + t = load_table("data/sample.tsv") + with TemporaryDirectory(".") as dirname: + path = pathlib.Path(dirname) / "table.json" + t.write(path) + with open_(path) as fn: + got = json.loads(fn.read()) + self.assertEqual(got["type"], get_object_provenance(Table)) + data = got["data"] + self.assertEqual(tuple(data["order"]), t.header) + self.assertEqual( + t.shape, + ( + len(tuple(data["columns"].items())[0][1]["values"]), + len(data["columns"]), + ), + ) + self.assertEqual( + t.array.T.tolist(), + [v["values"] for v in data["columns"].values()], + ) + + def test_load_table_from_json(self): + """tests loading a Table object from json file""" + with TemporaryDirectory(dir=".") as dirname: + json_path = os.path.join(dirname, "table.json") + t = load_table("data/sample.tsv") + t.write(json_path) + + got = load_table(json_path) + self.assertEqual(got.shape, t.shape) + self.assertEqual(got.header, t.header) + assert_equal(got.array, t.array) + + def test_load_table_invalid_type(self): + """raises TypeError if filename invalid type""" + with self.assertRaises(TypeError): + load_table({"a": [0, 1]}) + + def test_load_table_filename_case(self): + """load_table insensitive to file name case""" + with TemporaryDirectory(".") as dirname: + dirname = pathlib.Path(dirname) + with open(dirname / "temp.CSV", "w") as outfile: + outfile.write("a,b,c\n0,2,abc\n1,3,efg") + + table = load_table(dirname / "temp.CSV") + data = table.columns.to_dict() + self.assertEqual(data, dict(a=[0, 1], b=[2, 3], c=["abc", "efg"])) + def test_load_table_returns_static_columns(self): """for static data, load_table gives same dtypes for static_columns_type=True/False""" t = load_table("data/sample.tsv", sep="\t", static_column_types=False) @@ -1091,7 +1477,7 @@ legend="A legend", ) sv = table.to_csv() - expect = ["id,foo,bar", " 6,abc, 66", " 7,bca, 77"] + expect = ["id,foo,bar", "6,abc,66", "7,bca,77"] self.assertEqual(sv.splitlines(), expect) sv = table.to_csv(with_title=True) self.assertEqual(sv.splitlines(), ["A title"] + expect) @@ -1110,7 +1496,7 @@ ) sv = table.to_tsv() - expect = ["id\tfoo\tbar", " 6\tabc\t 66", " 7\tbca\t 77"] + expect = ["id\tfoo\tbar", "6\tabc\t66", "7\tbca\t77"] self.assertEqual(sv.splitlines(), expect) sv = table.to_tsv(with_title=True) self.assertEqual(sv.splitlines(), ["A title"] + expect) @@ -1119,6 +1505,16 @@ sv = table.to_tsv(with_title=True, with_legend=True) self.assertEqual(sv.splitlines(), ["A title"] + expect + ["A legend"]) + def test_to_delim(self): + """successfully create separated format with arbitrary character""" + table = Table( + header=self.t3_header, + data=self.t3_rows, + ) + sv = table.to_string(sep=";") + expect = ["id;foo;bar", "6;abc;66", "7;bca;77"] + self.assertEqual(sv.splitlines(), expect) + def test_to_rst_grid(self): """generates a rst grid table""" table = Table(header=["a", "b"], data=[[1, 2]], title="A title") @@ -1130,7 +1526,10 @@ def test_to_rst_csv(self): """generates a rst csv-table""" table = Table( - header=["a", "b"], data=[[1, 2]], title="A title", legend="A legend", + header=["a", "b"], + data=[[1, 2]], + title="A title", + legend="A legend", ) got = table.to_rst(csv_table=True) self.assertEqual( @@ -1147,7 +1546,12 @@ got = table.to_rst(csv_table=True) self.assertEqual( got.splitlines(), - [".. csv-table::", ' :header: "a", "b"', "", " 1, 2",], + [ + ".. csv-table::", + ' :header: "a", "b"', + "", + " 1, 2", + ], ) def test_get_repr_(self): @@ -1157,14 +1561,30 @@ # the next line was previously failing g = t._get_repr_() + table = Table(header=["a", "b"], data=[[1, 2]]) + table, _, unset_columns = table._get_repr_() + self.assertEqual(table.shape, (1, 2)) + self.assertIsNone(unset_columns) + + table = make_table(header=["a", "b"]) + table.columns["a"] = ["a"] + table, _, unset_columns = table._get_repr_() + self.assertEqual(table.shape, (1, 1)) + self.assertIn("b", unset_columns) + def test_repr_html_(self): """should produce html""" - # no index + # no index_name t = Table(header=self.t8_header, data=self.t8_rows) _ = t._repr_html_() - # with an index - t = Table(header=self.t8_header, data=self.t8_rows, index="edge.name") - _ = t._repr_html_() + + # with an index_name + t = Table(header=self.t8_header, data=self.t8_rows, index_name="edge.name") + got = t._repr_html_() + # and the index_name column should contain "index_name" css class + self.assertEqual( + got.count("index"), t.shape[0] + 1 + ) # add 1 for CSS style sheet # data has tuples in an array data = dict( @@ -1174,6 +1594,28 @@ t = Table(data=data) _ = t._repr_html_() + # some columns without data + table = make_table(header=["a", "b"]) + table.columns["a"] = ["a"] + _ = t._repr_html_() + + # single column with a single value should not fail + table = make_table(data={"kappa": [3.2]}, title="a title") + _ = table._repr_html_() + + # set head and tail, introduces ellipsis row class + table = make_table(data={"A": list("abcdefghijk"), "B": list(range(11))}) + table.set_repr_policy(head=8, tail=1) + got = table._repr_html_().splitlines() + num_rows = 0 + for l in got: + if "" in l: + num_rows += 1 + if "ellipsis" in l: + break + + self.assertEqual(num_rows, 9) + def test_array(self): """should produce array""" # data has tuples in an array @@ -1207,7 +1649,7 @@ formatted = [ f for f in writer([l.split(",") for l in comma_sep], has_header=True) ] - expected_format = ["id | foo | bar", " 6 | abc | 66", " 7 | bca | 77"] + expected_format = ["id | foo | bar", "6 | abc | 66", "7 | bca | 77"] self.assertEqual(formatted, expected_format) def test_set_repr_policy(self): @@ -1216,14 +1658,19 @@ t.set_repr_policy(random=2) r = repr(t) self.assertIsInstance(r, str) - r, _ = t._get_repr_() + r, _, _ = t._get_repr_() self.assertEqual(r.shape[0], 2) t.set_repr_policy(head=1) - r, _ = t._get_repr_() + r, _, _ = t._get_repr_() self.assertEqual(r.shape[0], 1) t.set_repr_policy(tail=3) - r, _ = t._get_repr_() + r, _, _ = t._get_repr_() self.assertEqual(r.shape[0], 3) + t.set_repr_policy(show_shape=False) + r = repr(t) + self.assertFalse(f"\n{t.shape[0]:,} rows x {t.shape[1]:,} columns" in r) + r = t._repr_html_() + self.assertFalse(f"\n{t.shape[0]:,} rows x {t.shape[1]:,} columns" in r) def test_head(self): """returns the head of the table!""" @@ -1237,6 +1684,12 @@ self.assertEqual(head.data.shape[0], 3) self.assertEqual(len(head.output.splitlines()), 9) self.assertEqual(head.data.tolist(), self.t1_rows[:3]) + # tests when number of rows < default + t = make_table(data=dict(a=["a"], b=["b"])) + t.head() + self.assertEqual(head.data.shape[0], 1) + self.assertEqual(len(head.output.splitlines()), 7) + self.assertEqual(head.data.tolist(), [["a", "b"]]) table.display = display def test_tail(self): @@ -1250,7 +1703,16 @@ t.tail(nrows=3) self.assertEqual(tail.data.shape[0], 3) self.assertEqual(len(tail.output.splitlines()), 9) - self.assertEqual(tail.data.tolist(), self.t1_rows[-3:]) + self.assertEqual( + [int(v) for v in tail.data[:, -1].tolist()], + [r[-1] for r in self.t1_rows[-3:]], + ) + # tests when number of rows < default + t = make_table(data=dict(a=["a"], b=["b"])) + t.tail() + self.assertEqual(tail.data.shape[0], 1) + self.assertEqual(len(tail.output.splitlines()), 7) + self.assertEqual(tail.data.tolist(), [["a", "b"]]) table.display = display @skipIf(DataFrame is None, "pandas not installed") @@ -1368,7 +1830,7 @@ got = load_table(path, reader=reader) self.assertEqual(got.shape, (10, 1)) - # specified by index + # specified by index_name reader = FilteringParser(columns=[0, 2], with_header=True, sep="\t") got = load_table(path, reader=reader) self.assertEqual(got.shape, (10, 2)) @@ -1386,7 +1848,7 @@ with self.assertRaises(ValueError): _ = load_table(path, reader=reader) - # raises IndexError if column index doesn't exist + # raises IndexError if column index_name doesn't exist reader = FilteringParser(columns=[0, 10], with_header=True, sep="\t") with self.assertRaises(IndexError): _ = load_table(path, reader=reader) @@ -1428,6 +1890,51 @@ data = [[230, "acdef", 1.3], [6, "cc", numpy.array([1.9876, 2.34])]] _ = formatted_cells(data, header=head) + def test_to_categorical(self): + """correctly construct contingency table""" + data = {"Ts": [31, 58], "Tv": [36, 138], "": ["syn", "nsyn"]} + table = make_table(header=["", "Ts", "Tv"], data=data) + with self.assertRaises(ValueError): + table.to_categorical(columns=["Ts", "Tv"]) + + table.index_name = "" + got = table.to_categorical(columns=["Ts", "Tv"]) + assert_equal(got.observed, table[:, 1:].array) + + got = table.to_categorical(["Ts"]) + mean = got.observed.array.mean() + expected = numpy.array([[mean], [mean]]) + assert_equal(got.expected, expected) + + # works if index_name included + got = table.to_categorical(columns=["Ts", "Tv", ""]) + assert_equal(got.observed, table[:, 1:].array) + + # works if no columns specified + got = table.to_categorical() + assert_equal(got.observed, table[:, 1:].array) + + data = { + "": numpy.array(["syn", "nsyn"], dtype=object), + "Ts": numpy.array([31, 58], dtype=object), + "Tv": numpy.array([36, 138], dtype=object), + } + + table = make_table(header=["", "Ts", "Tv"], data=data, index_name="") + with self.assertRaises(TypeError): + table.to_categorical(columns=["Ts", "Tv"]) + + def test_is_html_markup(self): + """format function confirms correctly specified html""" + self.assertTrue(is_html_markup("blah
")) + self.assertTrue(is_html_markup("blah
blah
")) + self.assertTrue(is_html_markup("blahblah")) + self.assertTrue(is_html_markup("blah\nblah")) + self.assertFalse(is_html_markup("blah")) + self.assertFalse(is_html_markup("
")) + self.assertFalse(is_html_markup("blah < blah")) + self.assertFalse(is_html_markup("blah > blah")) + if __name__ == "__main__": main() diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_util/test_transform.py python-cogent-2020.12.21a+dfsg/tests/test_util/test_transform.py --- python-cogent-2020.6.30a0+dfsg/tests/test_util/test_transform.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_util/test_transform.py 2020-12-20 23:35:03.000000000 +0000 @@ -1,6 +1,8 @@ #!/usr/bin/env python """Tests of transformation and composition functions . """ +from unittest import TestCase, main + from cogent3.util.transform import ( KeepChars, first_index_in_set, @@ -8,18 +10,19 @@ per_longest, per_shortest, ) -from cogent3.util.unit_test import TestCase, main __author__ = "Sandra Smit" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Rob Knight", "Sandra Smit", "Zongzhi Liu"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Sandra Smit" __email__ = "sandra.smit@colorado.edu" __status__ = "Production" +from numpy.testing import assert_allclose + class has_x(object): # convenience class for has_field and related functions @@ -125,50 +128,50 @@ # test behavior with default aggregator and normalizer f = for_seq(is_eq) - self.assertFloatEqual(f(s1, s1), 1.0) - self.assertFloatEqual(f(s1, short), 1.0) - self.assertFloatEqual(f(short, s1), 1.0) - self.assertFloatEqual(f(short, s4), 0.0) - self.assertFloatEqual(f(s4, short), 0.0) - self.assertFloatEqual(f(s1, s2), 0.6) + assert_allclose(f(s1, s1), 1.0) + assert_allclose(f(s1, short), 1.0) + assert_allclose(f(short, s1), 1.0) + assert_allclose(f(short, s4), 0.0) + assert_allclose(f(s4, short), 0.0) + assert_allclose(f(s1, s2), 0.6) f = for_seq(is_ne) - self.assertFloatEqual(f(s1, s1), 0.0) - self.assertFloatEqual(f(s1, short), 0.0) - self.assertFloatEqual(f(short, s1), 0.0) - self.assertFloatEqual(f(short, s4), 1.0) - self.assertFloatEqual(f(s4, short), 1.0) - self.assertFloatEqual(f(s1, s2), 0.4) + assert_allclose(f(s1, s1), 0.0) + assert_allclose(f(s1, short), 0.0) + assert_allclose(f(short, s1), 0.0) + assert_allclose(f(short, s4), 1.0) + assert_allclose(f(s4, short), 1.0) + assert_allclose(f(s1, s2), 0.4) f = for_seq(lt_5) - self.assertFloatEqual(f(s3, s3), 1.0) - self.assertFloatEqual(f(s3, s4), 0.0) - self.assertFloatEqual(f(s2, s3), 0.6) + assert_allclose(f(s3, s3), 1.0) + assert_allclose(f(s3, s4), 0.0) + assert_allclose(f(s2, s3), 0.6) f = for_seq(diff) - self.assertFloatEqual(f(s1, s1), 0.0) - self.assertFloatEqual(f(s4, s1), 2.0) - self.assertFloatEqual(f(s1, s4), -2.0) + assert_allclose(f(s1, s1), 0.0) + assert_allclose(f(s4, s1), 2.0) + assert_allclose(f(s1, s4), -2.0) # test behavior with different aggregator f = for_seq(diff) - self.assertFloatEqual(f(s1, s5), 0) + assert_allclose(f(s1, s5), 0) f = for_seq(diff, aggregator=sum) - self.assertFloatEqual(f(s1, s5), 0) + assert_allclose(f(s1, s5), 0) f = for_seq(diff, aggregator=sumsq) - self.assertFloatEqual(f(s1, s5), 2.0) + assert_allclose(f(s1, s5), 2.0) # test behavior with different normalizer f = for_seq(diff, aggregator=sumsq, normalizer=None) - self.assertFloatEqual(f(s1, s5), 10) + assert_allclose(f(s1, s5), 10) f = for_seq(diff, aggregator=sumsq) - self.assertFloatEqual(f(s1, s5), 2.0) + assert_allclose(f(s1, s5), 2.0) f = for_seq(diff, aggregator=sumsq, normalizer=times_two) - self.assertFloatEqual(f(s1, s5), 20) + assert_allclose(f(s1, s5), 20) f = for_seq(diff, aggregator=sumsq) - self.assertFloatEqual(f(s5, short), 4) + assert_allclose(f(s5, short), 4) f = for_seq(diff, aggregator=sumsq, normalizer=long_norm) - self.assertFloatEqual(f(s5, short), 0.8) + assert_allclose(f(s5, short), 0.8) class Filter_Criteria_Tests(TestCase): diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_util/test_union_dict.py python-cogent-2020.12.21a+dfsg/tests/test_util/test_union_dict.py --- python-cogent-2020.6.30a0+dfsg/tests/test_util/test_union_dict.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_util/test_union_dict.py 2020-12-20 23:35:03.000000000 +0000 @@ -2,15 +2,16 @@ """Unit tests for union_dict. """ +from unittest import TestCase, main + from cogent3.util.union_dict import UnionDict -from cogent3.util.unit_test import TestCase, main __author__ = "Thomas La" __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Gavin Huttley", "Thomas La"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2020.6.30a0+dfsg/tests/test_util/test_unit_test.py python-cogent-2020.12.21a+dfsg/tests/test_util/test_unit_test.py --- python-cogent-2020.6.30a0+dfsg/tests/test_util/test_unit_test.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/test_util/test_unit_test.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,998 +0,0 @@ -#!/usr/bin/env python -"""Tests for cogent3.util.unit_test, extension of the built-in PyUnit framework. -""" -from sys import exc_info - -import numpy - -from numpy import array, inf, log, zeros - -# SUPPORT2425 -# from __future__ import with_statement -from cogent3.util.unit_test import FakeRandom, TestCase, main - - -__author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2020, The Cogent Project" -__credits__ = ["Rob Knight", "Sandra Smit", "Gavin Huttley", "Daniel McDonald"] -__license__ = "BSD-3" -__version__ = "2020.6.30a" -__maintainer__ = "Rob Knight" -__email__ = "rob@spot.colorado.edu" -__status__ = "Production" - - -class FakeRandomTests(TestCase): - """Tests FakeRandom class.""" - - def test_call_constant(self): - """FakeRandom __call__ should return next item from list if constant""" - const = FakeRandom([1]) - self.assertEqual(const(), 1) - self.assertRaises(IndexError, const) - - def test_call_constant_wrap(self): - """FakeRandom __call__ should wrap for one-item list if specified""" - const = FakeRandom([1], True) - for i in range(10): - self.assertEqual(const(), True) - - def test_call_var(self): - """FakeRandom __call__ should work with a multi-item list""" - f = FakeRandom([1, 2, 3]) - self.assertEqual(f(), 1) - self.assertEqual(f(), 2) - self.assertEqual(f(), 3) - self.assertRaises(IndexError, f) - - def test_call_var_wrap(self): - """FakeRandom __call__ should work with a multi-item wrapped list""" - f = FakeRandom([1, 2, 3], True) - result = [f() for i in range(10)] - self.assertEqual(result, [1, 2, 3, 1, 2, 3, 1, 2, 3, 1]) - - def test_cal_var_args(self): - """FakeRandom __call__ should ignore extra args""" - f = FakeRandom([[1, 2, 3]], True) - for i in range(5): - result = f((5, 5)) # shape parameter ignored - self.assertEqual(result, [1, 2, 3]) - - -class TestCaseTests(TestCase): - """Tests for extension of the built-in unittest framework. - - For each test, includes an example of success and failure. - """ - - unequal_pairs = [ - (1, 0), - ([], ()), - (None, 0), - ("", " "), - (1, "1"), - (0, "0"), - ("", None), - (array([1, 2, 3]), array([1, 2, 4])), - (array([[1, 2], [3, 4]]), array([[1.0, 2.0], [3.0, 4.1]])), - (array([1]), array([1, 2])), - (zeros(0), array([1])), - (array([1, 1, 1]), array([1])), - (array([[1, 1], [1, 1]]), array([1, 1, 1, 1])), - (zeros(0), None), - (zeros(3), zeros(5)), - (zeros(0), ""), - ] - - equal_pairs = [ - (1, 1), - (0, 0), - (5, 5), - (5, 5.0), - (0, 0.0), - ("", ""), - (" ", " "), - ("a", "a"), - (None, None), - ([0, 1], [0.0, 1.0]), - (array([1, 2, 3]), array([1, 2, 3])), - (array([[1, 2], [3, 4]]), array([[1.0, 2.0], [3.0, 4.0]])), - (zeros(0), []), - (zeros(0), zeros(0)), - (array([]), zeros(0)), - (zeros(3), zeros(3)), - (array([0, 0, 0]), zeros(3)), - (array([]), []), - ] - - small = 1e-7 - big = 1e-5 - - within_1e6_abs_pairs = [ - (1, 1 + small), - (1 + small, 1), - (1, 1 - small), - (1 - small, 1), - (100000, 100000 - small), - (-100000, -100000 - small), - (-1, -1 + small), - (-1, -1 - small), - (0, small), - (0, -small), - (array([1, 2]), array([1, 2 + small])), - (array([[1, 2], [3, 4]]), array([[1, 2 + small], [3, 4]])), - ] - - within_1e6_rel_pairs = [ - (1, 1 + 1 * small), - (1 + 1 * small, 1), - (1, 1 - 1 * small), - (1 - 1 * small, 1), - (100000, 100000 - 100000 * small), - (-100000, -100000 - 100000 * small), - (-1, -1 + -1 * small), - (-1, -1 - -1 * small), - (array([1, 2]), array([1 + small, 2])), - ( - array([[1000, 1000], [1000, 1000]]), - array([[1000 + 1000 * small, 1000], [1000, 1000]]), - ), - ] - - outside_1e6_abs_pairs = [ - (1, 1 + big), - (1 + big, 1), - (1, 1 - big), - (1 - big, 1), - (100000, 100000 - big), - (-100000, -100000 - big), - (-1, -1 + big), - (-1, -1 - big), - (0, big), - (0, -big), - (1e7, 1e7 + 1), - (array([1, 1]), array([1, 1 + big])), - (array([[1, 1], [1, 1]]), array([[1, 1 + big], [1, 1]])), - ] - - outside_1e6_rel_pairs = [ - (1, 1 + 1 * big), - (1 + 1 * big, 1), - (1, 1 - 1 * big), - (1 - 1 * big, 1), - (100000, 100000 - 100000 * big), - (-100000, -100000 - 100000 * big), - (-1, -1 + -1 * big), - (-1, -1 - -1 * big), - (1e-30, 1e-30 + small), - (0, small), - (1e5, 1e5 + 1), - (array([1, 1]), array([1, 1 + 1 * big])), - ] - - def test_assertNotEqual_None(self): - """assertNotEqual should raise exception with two copies of None""" - try: - self.assertNotEqual(None, None) - except: - message = str(exc_info()[1]) - self.assertEqual( - message, "Observed None and expected None: shouldn't test equal" - ) - else: - raise AssertionError( - "unit_test.assertNotEqual failed on input %s and %s" - % (repr(first), repr(second)) - ) - - def test_assertNotEqual_numbers(self): - """assertNotEqual should raise exception with integer and float zero""" - try: - self.assertNotEqual(0, 0.0) - except: - message = str(exc_info()[1]) - self.assertEqual( - message, "Observed 0 and expected 0.0: shouldn't test equal" - ) - else: - raise AssertionError( - "unit_test.assertNotEqual failed on input %s and %s" - % (repr(first), repr(second)) - ) - - def test_assertNotEqual_unequal(self): - """assertNotEqual should not raise exception when values differ""" - for first, second in self.unequal_pairs: - try: - self.assertNotEqual(first, second) - except: - raise AssertionError( - "unit_test.assertNotEqual failed on input %s and %s" - % (repr(first), repr(second)) - ) - - def test_assertNotEqual_equal(self): - """assertNotEqual should raise exception when values differ""" - for first, second in self.equal_pairs: - try: - self.assertNotEqual(first, second) - except: - message = str(exc_info()[1]) - self.assertEqual( - message, - "Observed %s and expected %s: shouldn't test equal" - % (repr(first), repr(second)), - ) - else: - raise AssertionError( - "unit_test.assertNotEqual failed on input %s and %s" - % (repr(first), repr(second)) - ) - - def test_assertEqual_None(self): - """assertEqual should not raise exception with two copies of None""" - try: - self.assertEqual(None, None) - except: - raise AssertionError( - "unit_test.assertEqual failed on input %s and %s" - % (repr(first), repr(second)) - ) - - def test_assertEqual_numbers(self): - """assertEqual should not raise exception with integer and float zero""" - try: - self.assertEqual(0, 0.0) - except: - raise AssertionError( - "unit_test.assertEqual failed on input %s and %s" - % (repr(first), repr(second)) - ) - - def test_assertEqual_unequal(self): - """assertEqual should raise exception when values differ""" - for first, second in self.unequal_pairs: - try: - self.assertEqual(first, second) - except: - message = str(exc_info()[1]) - self.assertEqual( - message, "Got %s, but expected %s" % (repr(first), repr(second)) - ) - else: - raise AssertionError( - "unit_test.assertEqual failed on input %s and %s" - % (repr(first), repr(second)) - ) - - def test_assertEqual_equal(self): - """assertEqual should not raise exception when values test equal""" - for first, second in self.equal_pairs: - try: - self.assertEqual(first, second) - except: - raise AssertionError( - "unit_test.assertEqual failed on input %s and %s" - % (repr(first), repr(second)) - ) - - def test_assertEqual_nested_array(self): - self.assertEqual([[1, 0], [0, 1]], [array([1, 0]), array([0, 1])]) - - def test_assertEqual_shape_mismatch(self): - """assertEqual should raise when obs and exp shapes mismatch""" - obs = [1, 2, 3] - exp = [1, 2, 3, 4] - self.assertRaises(AssertionError, self.assertEqual, obs, exp) - - def test_assertFloatEqualAbs_equal(self): - """assertFloatEqualAbs should not raise exception when values within eps""" - for first, second in self.within_1e6_abs_pairs: - try: - self.assertFloatEqualAbs(first, second, eps=1e-6) - except: - raise AssertionError( - "unit_test.assertFloatEqualAbs failed on input %s and %s" - % (repr(first), repr(second)) - ) - - def test_assertFloatEqualAbs_threshold(self): - """assertFloatEqualAbs should raise exception when eps is very small""" - for first, second in self.within_1e6_abs_pairs: - try: - self.assertFloatEqualAbs(first, second, 1e-30) - except: - message = str(exc_info()[1]) - diff = first - second - exp = "True is not false : Got %s, but expected %s (diff was %s)" % ( - repr(first), - repr(second), - repr(diff), - ) - self.assertEqual(message, exp) - else: - raise AssertionError( - "unit_test.assertFloatEqualAbs failed on input %s and %s" - % (repr(first), repr(second)) - ) - - def test_assertFloatEqualAbs_unequal(self): - """assertFloatEqualAbs should raise exception when values differ by >eps""" - for first, second in self.outside_1e6_abs_pairs: - try: - self.assertFloatEqualAbs(first, second) - except: - message = str(exc_info()[1]) - diff = first - second - self.assertEqual( - message, - "True is not false : Got %s, but expected %s (diff was %s)" - % (repr(first), repr(second), repr(diff)), - ) - else: - raise AssertionError( - "unit_test.assertFloatEqualAbs failed on input %s and %s" - % (repr(first), repr(second)) - ) - - def test_assertFloatEqualAbs_shape_mismatch(self): - """assertFloatEqualAbs should raise when obs and exp shapes mismatch""" - obs = [1, 2, 3] - exp = [1, 2, 3, 4] - self.assertRaises(AssertionError, self.assertFloatEqualAbs, obs, exp) - - def test_assertFloatEqualRel_equal(self): - """assertFloatEqualRel should not raise exception when values within eps""" - for first, second in self.within_1e6_rel_pairs: - try: - self.assertFloatEqualRel(first, second) - except: - raise AssertionError( - "unit_test.assertFloatEqualRel failed on input %s and %s" - % (repr(first), repr(second)) - ) - - def test_assertFloatEqualRel_unequal(self): - """assertFloatEqualRel should raise exception when eps is very small""" - for first, second in self.within_1e6_rel_pairs: - try: - self.assertFloatEqualRel(first, second, 1e-30) - except: - message = str(exc_info()[1]) - diff = first - second - self.assertEqual( - message, - "True is not false : Got %s, but expected %s (diff was %s)" - % (repr(first), repr(second), repr(diff)), - ) - else: - raise AssertionError( - "unit_test.assertFloatEqualRel failed on input %s and %s" - % (repr(first), repr(second)) - ) - - def test_assertFloatEqualRel_unequal(self): - """assertFloatEqualRel should raise exception when values differ by >eps""" - for first, second in self.outside_1e6_rel_pairs: - try: - self.assertFloatEqualRel(first, second) - except: - message = str(exc_info()[1]) - diff = first - second - self.assertEqual( - message, - "True is not false : Got %s, but expected %s (diff was %s)" - % (repr(first), repr(second), repr(diff)), - ) - else: - raise AssertionError( - "unit_test.assertFloatEqualRel failed on input %s and %s" - % (repr(first), repr(second)) - ) - - def test_assertFloatEqualRel_shape_mismatch(self): - """assertFloatEqualRel should raise when obs and exp shapes mismatch""" - obs = [1, 2, 3] - exp = [1, 2, 3, 4] - self.assertRaises(AssertionError, self.assertFloatEqualRel, obs, exp) - - def test_assertFloatEqualList_equal(self): - """assertFloatEqual should work on two lists of similar values""" - originals = [0, 1, -1, 10, -10, 100, -100] - modified = [i + 1e-7 for i in originals] - try: - self.assertFloatEqual(originals, modified) - self.assertFloatEqual([], []) # test empty lists as well - except: - raise AssertionError( - "unit_test.assertFloatEqual failed on lists of similar values" - ) - - def test_assertFloatEqual_shape_mismatch(self): - """assertFloatEqual should raise when obs and exp shapes mismatch""" - obs = [1, 2, 3] - exp = [1, 2, 3, 4] - self.assertRaises(AssertionError, self.assertFloatEqual, obs, exp) - - def test_assertFloatEqualList_unequal(self): - """assertFloatEqual should fail on two lists of dissimilar values""" - originals = [0, 1, -1, 10, -10, 100, -100] - modified = [i + 1e-5 for i in originals] - try: - self.assertFloatEqual(originals, modified) - except: - pass - else: - raise AssertionError( - "unit_test.assertFloatEqual failed on lists of dissimilar values" - ) - - def test_assertFloatEqual_mixed(self): - """assertFloatEqual should work on equal lists of mixed types.""" - first = [i[0] for i in self.equal_pairs] - second = [i[1] for i in self.equal_pairs] - self.assertFloatEqual(first, second) - - def test_assertFloatEqualAbs_mixed(self): - first = [i[0] for i in self.equal_pairs] - second = [i[1] for i in self.equal_pairs] - """assertFloatEqualAbs should work on equal lists of mixed types.""" - self.assertFloatEqualAbs(first, second) - - def test_assertFloatEqualRel_mixed(self): - first = [i[0] for i in self.equal_pairs] - second = [i[1] for i in self.equal_pairs] - """assertFloatEqualRel should work on equal lists of mixed types.""" - self.assertFloatEqualRel(first, second) - - def test_assertFloatEqual_mixed_unequal(self): - """assertFloatEqual should work on unequal lists of mixed types.""" - first = [i[0] for i in self.unequal_pairs] - second = [i[1] for i in self.unequal_pairs] - self.assertRaises(AssertionError, self.assertFloatEqual, first, second) - - def test_assertFloatEqualAbs_mixed(self): - """assertFloatEqualAbs should work on lists of mixed types.""" - first = [i[0] for i in self.unequal_pairs] - second = [i[1] for i in self.unequal_pairs] - self.assertRaises(AssertionError, self.assertFloatEqualAbs, first, second) - - def test_assertFloatEqualRel_mixed(self): - """assertFloatEqualRel should work on lists of mixed types.""" - first = [i[0] for i in self.unequal_pairs] - second = [i[1] for i in self.unequal_pairs] - self.assertRaises(AssertionError, self.assertFloatEqualRel, first, second) - - def test_assertEqualItems(self): - """assertEqualItems should raise exception if items not equal""" - self.assertEqualItems("abc", "abc") - self.assertEqualItems("abc", "cba") - self.assertEqualItems("", "") - self.assertEqualItems("abc", ["a", "b", "c"]) - self.assertEqualItems([0], [0.0]) - - try: - self.assertEqualItems("abc", "abcd") - except: - message = str(exc_info()[1]) - self.assertEqual( - message, "Observed and expected are different lengths: 3 and 4" - ) - else: - raise AssertionError( - "unit_test.assertEqualItems failed on input %s and %s" - % (repr(first), repr(second)) - ) - - try: - self.assertEqualItems("cab", "acc") - except: - message = str(exc_info()[1]) - self.assertEqual(message, "Observed b and expected c at sorted index 1") - else: - raise AssertionError( - "unit_test.assertEqualItems failed on input %s and %s" - % (repr(first), repr(second)) - ) - try: - self.assertEqualItems("cba", "yzx") - except: - message = str(exc_info()[1]) - self.assertEqual(message, "Observed a and expected x at sorted index 0") - else: - raise AssertionError( - "unit_test.assertEqualItems failed on input %s and %s" - % (repr(first), repr(second)) - ) - - def test_assertSameItems(self): - """assertSameItems should raise exception if items not same""" - x = 0 - y = "abcdef" - z = 3 - y1 = "abc" + "def" - z1 = 3.0 - - y_id = id(y) - z_id = id(z) - y1_id = id(y1) - z1_id = id(z1) - - self.assertSameItems([x, y, z], [x, y, z]) - self.assertSameItems([x, y, z], [z, x, y]) - self.assertSameItems("", "") - self.assertSameItems([x, y, z], (x, y, z)) - - try: - self.assertSameItems([x, y, z], [x, y, z, y]) - except: - message = str(exc_info()[1]) - self.assertEqual( - message, "Observed and expected are different lengths: 3 and 4" - ) - else: - raise AssertionError( - "unit_test.assertSameItems failed on input %s and %s" - % (repr([x, y, z]), repr([x, y, z, y])) - ) - - try: - first_list = [x, y, z] - second_list = [y, x, z1] - self.assertSameItems(first_list, second_list) - except self.failureException: - pass - else: - raise AssertionError( - "unit_test.assertEqualItems failed on input %s and %s" - % (repr([x, y, z]), repr([y, x, z1])) - ) - - # assert y is not y1 - # try: - # self.assertSameItems([y], (y1,)) - # except self.failureException: - # pass - # else: - # raise AssertionError, \ - # "unit_test.assertEqualItems failed on input %s and %s" \ - # % (`[y]`, `(y1,)`) - - def test_assertContains(self): - """assertContains should raise exception if item not in test set""" - self.assertContains("abc", "a") - self.assertContains(["a", "b", "c"], "a") - self.assertContains(["a", "b", "c"], "b") - self.assertContains(["a", "b", "c"], "c") - self.assertContains({"a": 1, "b": 2}, "a") - - class _fake_container(object): - def __contains__(self, other): - return True - - fake = _fake_container() - self.assertContains(fake, "x") - self.assertContains(fake, 3) - self.assertContains(fake, {"a": "b"}) - - try: - self.assertContains("", []) - except: - message = str(exc_info()[1]) - self.assertEqual(message, "Item [] not found in ''") - else: - raise AssertionError( - "unit_test.assertContains failed on input %s and %s" - % (repr(""), repr([])) - ) - - try: - self.assertContains("abcd", "x") - except: - message = str(exc_info()[1]) - self.assertEqual(message, "Item 'x' not found in 'abcd'") - else: - raise AssertionError( - "unit_test.assertContains failed on input %s and %s" - % (repr("abcd"), repr("x")) - ) - - def test_assertNotContains(self): - """assertNotContains should raise exception if item in test set""" - self.assertNotContains("abc", "x") - self.assertNotContains(["a", "b", "c"], "x") - self.assertNotContains("abc", None) - self.assertNotContains(["a", "b", "c"], {"x": 1}) - self.assertNotContains({"a": 1, "b": 2}, 3.0) - - class _fake_container(object): - def __contains__(self, other): - return False - - fake = _fake_container() - self.assertNotContains(fake, "x") - self.assertNotContains(fake, 3) - self.assertNotContains(fake, {"a": "b"}) - - try: - self.assertNotContains("", "") - except: - message = str(exc_info()[1]) - self.assertEqual(message, "Item '' should not have been in ''") - else: - raise AssertionError( - "unit_test.assertNotContains failed on input %s and %s" - % (repr(""), repr("")) - ) - - try: - self.assertNotContains("abcd", "a") - except: - message = str(exc_info()[1]) - self.assertEqual(message, "Item 'a' should not have been in 'abcd'") - else: - raise AssertionError( - "unit_test.assertNotContains failed on input %s and %s" - % (repr("abcd"), repr("a")) - ) - - try: - self.assertNotContains({"a": 1, "b": 2}, "a") - except: - message = str(exc_info()[1]) - self.assertTrue("Item 'a' should not have been in" in message) - else: - raise AssertionError( - "unit_test.assertNotContains failed on input %s and %s" - % (repr({"a": 1, "b": 2}), repr("a")) - ) - - def test_assertGreaterThan_equal(self): - """assertGreaterThan should raise exception if equal""" - self.assertRaises(AssertionError, self.assertGreaterThan, 5, 5) - self.assertRaises(AssertionError, self.assertGreaterThan, 5.0, 5.0) - self.assertRaises(AssertionError, self.assertGreaterThan, 5.0, 5) - self.assertRaises(AssertionError, self.assertGreaterThan, 5, 5.0) - - def test_assertGreaterThan_None(self): - """assertGreaterThan should raise exception if compared to None""" - self.assertRaises(AssertionError, self.assertGreaterThan, 5, None) - self.assertRaises(AssertionError, self.assertGreaterThan, None, 5) - self.assertRaises(AssertionError, self.assertGreaterThan, 5.0, None) - self.assertRaises(AssertionError, self.assertGreaterThan, None, 5.0) - - def test_assertGreaterThan_numbers_true(self): - """assertGreaterThan should pass when observed > value""" - self.assertGreaterThan(10, 5) - - def test_assertGreaterThan_numbers_false(self): - """assertGreaterThan should raise when observed <= value""" - self.assertRaises(AssertionError, self.assertGreaterThan, 2, 5) - - def test_assertGreaterThan_numbers_list_true(self): - """assertGreaterThan should pass when all elements are > value""" - observed = [1, 2, 3, 4, 3, 2, 3, 4, 6, 3] - self.assertGreaterThan(observed, 0) - - def test_assertGreaterThan_numbers_list_false(self): - """assertGreaterThan should raise when a single element is <= value""" - observed = [2, 3, 4, 3, 2, 1, 3, 4, 6, 3] - self.assertRaises(AssertionError, self.assertGreaterThan, observed, 1) - - def test_assertGreaterThan_floats_true(self): - """assertGreaterThan should pass when observed > value""" - self.assertGreaterThan(5.0, 3.0) - - def test_assertGreaterThan_floats_false(self): - """assertGreaterThan should raise when observed <= value""" - self.assertRaises(AssertionError, self.assertGreaterThan, 3.0, 5.0) - - def test_assertGreaterThan_floats_list_true(self): - """assertGreaterThan should pass when all elements are > value""" - observed = [1.0, 2.0, 3.0, 4.0, 6.0, 3.0] - self.assertGreaterThan(observed, 0.0) - - def test_assertGreaterThan_floats_list_false(self): - """assertGreaterThan should raise when any elements are <= value""" - observed = [2.0, 3.0, 4.0, 1.0, 3.0, 3.0] - self.assertRaises(AssertionError, self.assertGreaterThan, observed, 1.0) - - def test_assertGreaterThan_mixed_true(self): - """assertGreaterThan should pass when observed > value""" - self.assertGreaterThan(5.0, 3) - self.assertGreaterThan(5, 3.0) - - def test_assertGreaterThan_mixed_false(self): - """assertGreaterThan should raise when observed <= value""" - self.assertRaises(AssertionError, self.assertGreaterThan, -3, 5.0) - self.assertRaises(AssertionError, self.assertGreaterThan, 3.0, 5) - - def test_assertGreaterThan_mixed_list_true(self): - """assertGreaterThan should pass when all elements are > value""" - observed = [1.0, 2, 3.0, 4.0, 6, 3.0] - self.assertGreaterThan(observed, 0.0) - self.assertGreaterThan(observed, 0) - - def test_assertGreaterThan_mixed_list_false(self): - """assertGreaterThan should raise when a single element is <= value""" - observed = [2.0, 3, 4, 1.0, 3.0, 3.0] - self.assertRaises(AssertionError, self.assertGreaterThan, observed, 1.0) - self.assertRaises(AssertionError, self.assertGreaterThan, observed, 1) - - def test_assertGreaterThan_numpy_array_true(self): - """assertGreaterThan should pass when all elements are > value""" - observed = array([1, 2, 3, 4]) - self.assertGreaterThan(observed, 0) - self.assertGreaterThan(observed, 0.0) - - def test_assertGreaterThan_numpy_array_false(self): - """assertGreaterThan should pass when any element is <= value""" - observed = array([1, 2, 3, 4]) - self.assertRaises(AssertionError, self.assertGreaterThan, observed, 3) - self.assertRaises(AssertionError, self.assertGreaterThan, observed, 3.0) - - def test_assertLessThan_equal(self): - """assertLessThan should raise exception if equal""" - self.assertRaises(AssertionError, self.assertLessThan, 5, 5) - self.assertRaises(AssertionError, self.assertLessThan, 5.0, 5.0) - self.assertRaises(AssertionError, self.assertLessThan, 5.0, 5) - self.assertRaises(AssertionError, self.assertLessThan, 5, 5.0) - - def test_assertLessThan_None(self): - """assertLessThan should raise exception if compared to None""" - self.assertRaises(AssertionError, self.assertLessThan, 5, None) - self.assertRaises(AssertionError, self.assertLessThan, None, 5) - self.assertRaises(AssertionError, self.assertLessThan, 5.0, None) - self.assertRaises(AssertionError, self.assertLessThan, None, 5.0) - - def test_assertLessThan_numbers_true(self): - """assertLessThan should pass when observed < value""" - self.assertLessThan(10, 15) - - def test_assertLessThan_numbers_false(self): - """assertLessThan should raise when observed >= value""" - self.assertRaises(AssertionError, self.assertLessThan, 6, 5) - - def test_assertLessThan_numbers_list_true(self): - """assertLessThan should pass when all elements are < value""" - observed = [1, 2, 3, 4, 3, 2, 3, 4, 6, 3] - self.assertLessThan(observed, 8) - - def test_assertLessThan_numbers_list_false(self): - """assertLessThan should raise when a single element is >= value""" - observed = [2, 3, 4, 3, 2, 1, 3, 4, 6, 3] - self.assertRaises(AssertionError, self.assertLessThan, observed, 6) - - def test_assertLessThan_floats_true(self): - """assertLessThan should pass when observed < value""" - self.assertLessThan(-5.0, 3.0) - - def test_assertLessThan_floats_false(self): - """assertLessThan should raise when observed >= value""" - self.assertRaises(AssertionError, self.assertLessThan, 3.0, -5.0) - - def test_assertLessThan_floats_list_true(self): - """assertLessThan should pass when all elements are < value""" - observed = [1.0, 2.0, -3.0, 4.0, -6.0, 3.0] - self.assertLessThan(observed, 5.0) - - def test_assertLessThan_floats_list_false(self): - """assertLessThan should raise when a single element is >= value""" - observed = [2.0, 3.0, 4.0, 1.0, 3.0, 3.0] - self.assertRaises(AssertionError, self.assertLessThan, observed, 4.0) - - def test_assertLessThan_mixed_true(self): - """assertLessThan should pass when observed < value""" - self.assertLessThan(2.0, 3) - self.assertLessThan(2, 3.0) - - def test_assertLessThan_mixed_false(self): - """assertLessThan should raise when observed >= value""" - self.assertRaises(AssertionError, self.assertLessThan, 6, 5.0) - self.assertRaises(AssertionError, self.assertLessThan, 6.0, 5) - - def test_assertLessThan_mixed_list_true(self): - """assertLessThan should pass when all elements are < value""" - observed = [1.0, 2, 3.0, 4.0, 6, 3.0] - self.assertLessThan(observed, 7.0) - self.assertLessThan(observed, 7) - - def test_assertLessThan_mixed_list_false(self): - """assertLessThan should raise when a single element is >= value""" - observed = [2.0, 3, 4, 1.0, 3.0, 3.0] - self.assertRaises(AssertionError, self.assertLessThan, observed, 4.0) - self.assertRaises(AssertionError, self.assertLessThan, observed, 4) - - def test_assertLessThan_numpy_array_true(self): - """assertLessThan should pass when all elements are < value""" - observed = array([1, 2, 3, 4]) - self.assertLessThan(observed, 5) - self.assertLessThan(observed, 5.0) - - def test_assertLessThan_numpy_array_false(self): - """assertLessThan should pass when any element is >= value""" - observed = array([1, 2, 3, 4]) - self.assertRaises(AssertionError, self.assertLessThan, observed, 3) - self.assertRaises(AssertionError, self.assertLessThan, observed, 3.0) - - def test_assertIsProb_None(self): - """assertIsProb should raise when compared against None""" - self.assertRaises(AssertionError, self.assertIsProb, None) - - def test_assertIsProb_numbers_true(self): - """assertIsProb should pass when compared against valid numbers""" - self.assertIsProb(0) - self.assertIsProb(1) - - def test_assertIsProb_numbers_false(self): - """assertIsProb should raise when compared against invalid numbers""" - self.assertRaises(AssertionError, self.assertIsProb, -1) - self.assertRaises(AssertionError, self.assertIsProb, 2) - - def test_assertIsProb_numbers_list_true(self): - """assertIsProb should pass when all elements are probs""" - observed = [0, 1, 0] - self.assertIsProb(observed) - - def test_assertIsProb_numbers_list_false(self): - """assertIsProb should raise when any element is not a prob""" - observed = [-2, -4, 3] - self.assertRaises(AssertionError, self.assertIsProb, observed) - - def test_assertIsProb_float_true(self): - """assertIsProb should pass when compared against valid numbers""" - self.assertIsProb(0.0) - self.assertIsProb(1.0) - - def test_assertIsProb_float_false(self): - """assertIsProb should raise when compared against invalid numbers""" - self.assertRaises(AssertionError, self.assertIsProb, -1.0) - self.assertRaises(AssertionError, self.assertIsProb, 2.0) - - def test_assertIsProb_float_list_true(self): - """assertIsProb should pass when all elements are probs""" - observed = [0.0, 1.0, 0.0] - self.assertIsProb(observed) - - def test_assertIsProb_float_list_false(self): - """assertIsProb should raise when any element is not a prob""" - observed = [-2.0, -4.0, 3.0] - self.assertRaises(AssertionError, self.assertIsProb, observed) - - def test_assertIsProb_mixed_list_true(self): - """assertIsProb should pass when all elements are probs""" - observed = [0.0, 1, 0.0] - self.assertIsProb(observed) - - def test_assertIsProb_mixed_list_false(self): - """assertIsProb should raise when any element is not a prob""" - observed = [-2.0, -4, 3.0] - self.assertRaises(AssertionError, self.assertIsProb, observed) - - def test_assertIsProb_numpy_array_true(self): - """assertIsProb should pass when all elements are probs""" - observed = array([0.0, 0.4, 0.8]) - self.assertIsProb(observed) - - def test_assertIsProb_numpy_array_true(self): - """assertIsProb should pass when all elements are probs""" - observed = array([0.0, -0.4, 0.8]) - self.assertRaises(AssertionError, self.assertIsProb, observed) - - def test_assertSimilarMeans_one_obs_true(self): - """assertSimilarMeans should pass when p > pvalue""" - obs = [5] - expected = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] - self.assertSimilarMeans(obs, expected) - self.assertSimilarMeans(obs, expected, pvalue=0.25) - self._set_suite_pvalue(0.10) - self.assertSimilarMeans(obs, expected) - - def test_assertSimilarMeans_one_obs_false(self): - """assertSimilarMeans should raise when p < pvalue""" - obs = [5] - expected = [0.001, 0.009, 0.00012] - self.assertRaises(AssertionError, self.assertSimilarMeans, obs, expected) - self.assertRaises(AssertionError, self.assertSimilarMeans, obs, expected, 0.1) - self._set_suite_pvalue(0.001) - self.assertRaises(AssertionError, self.assertSimilarMeans, obs, expected) - - def test_assertSimilarMeans_twosample_true(self): - """assertSimilarMeans should pass when p > pvalue""" - obs = [4, 5, 6] - expected = [1, 2, 3, 4, 5, 6, 7, 8, 9] - self.assertSimilarMeans(obs, expected) - self.assertSimilarMeans(obs, expected, pvalue=0.25) - self._set_suite_pvalue(0.10) - self.assertSimilarMeans(obs, expected) - - def test_assertSimilarMeans_twosample_false(self): - """assertSimilarMeans should raise when p < pvalue""" - obs = [1, 2, 3] - expected = [6, 7, 8, 9, 10, 11, 12, 13, 14] - self.assertRaises(AssertionError, self.assertSimilarMeans, obs, expected) - self.assertRaises(AssertionError, self.assertSimilarMeans, obs, expected, 0.1) - self._set_suite_pvalue(0.001) - self.assertRaises(AssertionError, self.assertSimilarMeans, obs, expected) - - def test_assertSimilarFreqs_true(self): - """assertSimilarFreqs should pass when p > pvalue""" - observed = [2, 2, 3, 2, 1, 2, 2, 2, 2] - expected = [2, 2, 2, 2, 2, 2, 2, 2, 2] - self.assertSimilarFreqs(observed, expected) - self.assertSimilarFreqs(observed, expected, pvalue=0.25) - self._set_suite_pvalue(0.10) - self.assertSimilarFreqs(observed, expected) - - def test_assertSimilarFreqs_false(self): - """assertSimilarFreqs should raise when p < pvalue""" - observed = [10, 15, 20, 10, 12, 12, 13] - expected = [100, 50, 10, 20, 700, 2, 100] - self.assertRaises(AssertionError, self.assertSimilarFreqs, observed, expected) - self.assertRaises( - AssertionError, self.assertSimilarFreqs, observed, expected, 0.2 - ) - self._set_suite_pvalue(0.001) - self.assertRaises(AssertionError, self.assertSimilarFreqs, observed, expected) - - def test_assertSimilarFreqs_numpy_array_true(self): - """assertSimilarFreqs should pass when p > pvalue""" - observed = array([2, 2, 3, 2, 1, 2, 2, 2, 2]) - expected = array([2, 2, 2, 2, 2, 2, 2, 2, 2]) - self.assertSimilarFreqs(observed, expected) - self.assertSimilarFreqs(observed, expected, pvalue=0.25) - self._set_suite_pvalue(0.10) - self.assertSimilarFreqs(observed, expected) - - def test_assertSimilarFreqs_numpy_array_false(self): - """assertSimilarFreqs should raise when p < pvalue""" - observed = array([10, 15, 20, 10, 12, 12, 13]) - expected = array([100, 50, 10, 20, 700, 2, 100]) - self.assertRaises(AssertionError, self.assertSimilarFreqs, observed, expected) - self.assertRaises( - AssertionError, self.assertSimilarFreqs, observed, expected, 0.2 - ) - self._set_suite_pvalue(0.001) - self.assertRaises(AssertionError, self.assertSimilarFreqs, observed, expected) - - def test_set_suite_pvalue(self): - """Should set the suite pvalue""" - # force stats to fail - self._set_suite_pvalue(0.99) - obs = [2, 5, 6] - exp = [1, 2, 3, 4, 5, 6, 7, 8, 9] - self.assertRaises(AssertionError, self.assertSimilarMeans, obs, exp) - - # force stats to pass - self._set_suite_pvalue(0.01) - self.assertSimilarMeans(obs, exp) - - def test_assertSameObj_true(self): - """assertSameObj should pass when 'a is b'""" - self.assertSameObj("foo", "foo") - self.assertSameObj(None, None) - bar = lambda x: 5 - self.assertSameObj(bar, bar) - - def test_assertSameObj_false(self): - """assertSameObj should raise when 'a is not b'""" - self.assertRaises(AssertionError, self.assertSameObj, "foo", "bar") - self.assertRaises(AssertionError, self.assertSameObj, None, "bar") - self.assertRaises(AssertionError, self.assertSameObj, lambda x: 5, lambda y: 6) - - def test_assertNotSameObj_true(self): - """assertNotSameObj should pass when 'a is not b'""" - self.assertNotSameObj("foo", "bar") - self.assertNotSameObj(None, 5) - self.assertNotSameObj(lambda x: 5, lambda y: 6) - - def test_assertNotSameObj_false(self): - """assertSameObj should raise when 'a is b'""" - self.assertRaises(AssertionError, self.assertNotSameObj, "foo", "foo") - self.assertRaises(AssertionError, self.assertNotSameObj, None, None) - bar = lambda x: 5 - self.assertRaises(AssertionError, self.assertNotSameObj, bar, bar) - - -if __name__ == "__main__": - main() diff -Nru python-cogent-2020.6.30a0+dfsg/tests/timetrial.py python-cogent-2020.12.21a+dfsg/tests/timetrial.py --- python-cogent-2020.6.30a0+dfsg/tests/timetrial.py 2020-06-30 05:30:11.000000000 +0000 +++ python-cogent-2020.12.21a+dfsg/tests/timetrial.py 2020-12-20 23:35:03.000000000 +0000 @@ -15,7 +15,7 @@ __copyright__ = "Copyright 2007-2020, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Edward Lang"] __license__ = "BSD-3" -__version__ = "2020.6.30a" +__version__ = "2020.12.21a" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production"