diff -Nru python-cogent-2021.10.12a1+dfsg/c3dev-environment.yml python-cogent-2022.5.25a1+dfsg/c3dev-environment.yml --- python-cogent-2021.10.12a1+dfsg/c3dev-environment.yml 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/c3dev-environment.yml 1970-01-01 00:00:00.000000000 +0000 @@ -1,21 +0,0 @@ -name: c3dev -channels: - - plotly - - conda-forge - - defaults -dependencies: - - matplotlib - - mpi4py - - nb_conda_kernels - - nodejs - - pillow - - pip - - plotly-orca - - psutil - - python>=3.6 - - scipy - - pip: - - --editable ./[dev] - - tox-gh-actions - - sphinx-gallery - - sphinx_bootstrap_theme \ No newline at end of file diff -Nru python-cogent-2021.10.12a1+dfsg/changelog.md python-cogent-2022.5.25a1+dfsg/changelog.md --- python-cogent-2021.10.12a1+dfsg/changelog.md 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/changelog.md 2022-05-24 23:42:33.000000000 +0000 @@ -1,3 +1,83 @@ +# Changes since release 2022.4.20a1 + +## Contributors + +- Gavin Huttley + +## ENH + +- new `cogent3.util.parallel.as_completed()` generator function + - `as_completed()` wraps MPI or `concurrent.futures` executors and delivers results as they are completed. In contrast, `parallel.imap()` / `parallel.map()` deliver results in the same order as the input series. The advantage of `as_completed()` is the interval of result arrival at the parent process is better distributed. +- new function `cogent3.load_seq()` loads a single sequence from a file +- convert substitution model `__str__` to `__repr__`; more useful since `__repr__` is called also by str(). + +## BUG + +- fixes to `annotation_from_gff()` method on annotatable sequence / alignment objects + - method would break if GFF records had no ID. This situation is quite common in some Ensembl gff3 files. We generate a "no-id-#" identifier in those cases. + - we now add features are added to their parent feature. +- improve consistency in setting motif_probs on likelihood function + - only apply a pseudocount if optimising motif probs and at least one state has zero frequency, default pseudocount is 0.5. Thanks to StephenRogers1 for finding this issue! + +## DOC + +- document the API of the new `load_seq()` function + +# Changes since release 2022.4.15a1 + +## DEP + +- added warning that we will drop support for python 3.7 by 2022.10. This means the developer version will switch to python 3.8 from 2022.6. +- discontinued delimiter argument from parse.table.load_delimited + +# Changes since release 2021.10.12a1 + +## Contributors + +- Gavin Huttley +- u6675275 + +## API + +- moved all io related functions classes from util.misc to util.io, indicating their removal after version 2022.4 +- app.result objects require source instance of str or pathlib.Path +- fail if users set motif prob optimisation via sm_args in app.evo.model as value is over ridden by the explicit argument, need to block this as effect is major + +## BUG + +- RichGenbankParser moltype argument now overrides file spec, if provided, this defines the moltype of the returned sequence, otherwise the moltype is determined from the genbank file meta-data +- fix initialise from nested params for codon models +- load_tree now handles pathlib.Path's as input, fixes #991 +- writer composable apps apply_to now handles provided logger +- fixed serialisation of multi-locus likelihood functions with constrained motif probs +- support multiple calls of to_rich_dict() +- solve case where optimiser gets an invalid starting vector +- solved case where optimised parameter values are outside bounds + +## DEP + +- removed deprecated function for median, use numpy.median instead +- removed deprecated index argument from table constructors, use index_name instead +- cogent3.math periodicity classes method names pep8, old names retained, with deprecation warnings + +## ENH + +- Drawable.plotly_figure property returns plotly graph object Figure instance +- refactor of cogent3.app.composable.appify so decorated functions can be pickled +- app.evo.model handles sequential fitting of models with mixed process. Sequential fitting now works if lf_args includes specifying edges for using a discrete-time Markov process +- add optimise_motif_probs argument to app.evo.model +- add upper argument to app.evo.model +- now support python 3.10 +- added register_model decorator to cogent3.evolve.models. Used for simplifying discovery of canned substitution models. Users can now use this mechanism too for adding their own custom models. Doing this smoothes usage of custom models with cogent3.app.evo.model. A further benefit is the inclusion of a model to the appropriate module attributes is now done automatically. +- generalise Jensen-Shannon calculations to > 2 distributions +- the register_deserialiser class takes a series of strings that serve to uniquely identify the "type" value in a dict to be reconstituted using the decorated function. This enables support for user defined custom json storage. +- add type hint for input paths to most commonly used loaders +- time-heterogeneity support mixed discrete and continuous-time models +- more compact representation of datastore summary_incomplete +- more refinements on summary_logs +- cogent3.app.io.register_datastore_reader enables development of third party readers / loaders to be developed. Registering a reader class requires decorating it with the filename suffix that will distinguish that content type. Still limited to reading from files only. +- improve general stationary model numerical precision tolerance + # Since release 2021.5.7a1 ## Contributors diff -Nru python-cogent-2021.10.12a1+dfsg/debian/changelog python-cogent-2022.5.25a1+dfsg/debian/changelog --- python-cogent-2021.10.12a1+dfsg/debian/changelog 2022-06-25 14:38:12.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/debian/changelog 2022-07-29 14:30:18.000000000 +0000 @@ -1,3 +1,11 @@ +python-cogent (2022.5.25a1+dfsg-1) unstable; urgency=medium + + * New upstream version + * Standards-Version: 4.6.1 (routine-update) + * Build-Depends: python3-plotly + + -- Andreas Tille Fri, 29 Jul 2022 16:30:18 +0200 + python-cogent (2021.10.12a1+dfsg-2) unstable; urgency=medium * Team Upload. diff -Nru python-cogent-2021.10.12a1+dfsg/debian/control python-cogent-2022.5.25a1+dfsg/debian/control --- python-cogent-2021.10.12a1+dfsg/debian/control 2022-02-19 18:33:59.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/debian/control 2022-07-29 14:30:18.000000000 +0000 @@ -22,9 +22,10 @@ python3-sphinx-bootstrap-theme, python3-sphinx-gallery, python3-pytest , + python3-plotly , cython3, pandoc -Standards-Version: 4.6.0 +Standards-Version: 4.6.1 Vcs-Browser: https://salsa.debian.org/med-team/python-cogent Vcs-Git: https://salsa.debian.org/med-team/python-cogent.git Homepage: https://github.com/cogent3/cogent3 diff -Nru python-cogent-2021.10.12a1+dfsg/debian/patches/remove-jupyter-sphinx.patch python-cogent-2022.5.25a1+dfsg/debian/patches/remove-jupyter-sphinx.patch --- python-cogent-2021.10.12a1+dfsg/debian/patches/remove-jupyter-sphinx.patch 2022-02-19 18:36:53.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/debian/patches/remove-jupyter-sphinx.patch 2022-07-29 14:30:18.000000000 +0000 @@ -4,7 +4,7 @@ Last-Update: 2022-02-20 --- a/pyproject.toml +++ b/pyproject.toml -@@ -51,7 +51,6 @@ +@@ -51,7 +51,6 @@ doc = ["click", "ipykernel", "ipython", "ipywidgets", @@ -12,7 +12,7 @@ "jupyter_client", "jupyterlab", "jupytext", -@@ -80,7 +79,6 @@ +@@ -79,7 +78,6 @@ dev = ["black", "ipython", "ipywidgets", "isort", diff -Nru python-cogent-2021.10.12a1+dfsg/debian/patches/sphinx.patch python-cogent-2022.5.25a1+dfsg/debian/patches/sphinx.patch --- python-cogent-2021.10.12a1+dfsg/debian/patches/sphinx.patch 2022-02-19 18:33:59.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/debian/patches/sphinx.patch 2022-07-29 14:30:18.000000000 +0000 @@ -5,7 +5,7 @@ --- a/doc/conf.py +++ b/doc/conf.py -@@ -20,18 +20,18 @@ add_module_names = False # don't includ +@@ -30,17 +30,18 @@ add_module_names = False # don't includ numpydoc_class_members_toctree = False extensions = [ @@ -20,22 +20,11 @@ "sphinx.ext.githubpages", "sphinx.ext.mathjax", "sphinx.ext.todo", -- "sphinx_gallery.gen_gallery", +- "sphinx_gallery.load_style", - "sphinx_panels", -- "sphinxcontrib.bibtex", -+# "sphinx_gallery.gen_gallery", ++# "sphinx_gallery.load_style", +# "sphinx_panels", + "sphinx.ext.napoleon", # "sphinxcontrib.spelling", ] ---- a/doc/rtd-environment.yml -+++ b/doc/rtd-environment.yml -@@ -20,7 +20,6 @@ dependencies: - - nbsphinx - - nbformat - - nbconvert!=5.4 -- - sphinxcontrib-bibtex - - sphinx-gallery - - sphinx_bootstrap_theme - - ../ diff -Nru python-cogent-2021.10.12a1+dfsg/doc/api/index.rst python-cogent-2022.5.25a1+dfsg/doc/api/index.rst --- python-cogent-2021.10.12a1+dfsg/doc/api/index.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/api/index.rst 2022-05-24 23:42:33.000000000 +0000 @@ -18,8 +18,9 @@ .. toctree:: :maxdepth: 1 - __init__/cogent3.__init__.load_unaligned_seqs + __init__/cogent3.__init__.load_seq __init__/cogent3.__init__.load_aligned_seqs + __init__/cogent3.__init__.load_unaligned_seqs __init__/cogent3.__init__.load_delimited __init__/cogent3.__init__.load_table __init__/cogent3.__init__.load_tree @@ -36,9 +37,9 @@ .. toctree:: :maxdepth: 1 + __init__/cogent3.__init__.make_seq __init__/cogent3.__init__.make_aligned_seqs __init__/cogent3.__init__.make_unaligned_seqs - __init__/cogent3.__init__.make_seq __init__/cogent3.__init__.make_table __init__/cogent3.__init__.make_tree diff -Nru python-cogent-2021.10.12a1+dfsg/doc/api/__init__/cogent3.__init__.load_seq.rst python-cogent-2022.5.25a1+dfsg/doc/api/__init__/cogent3.__init__.load_seq.rst --- python-cogent-2021.10.12a1+dfsg/doc/api/__init__/cogent3.__init__.load_seq.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/api/__init__/cogent3.__init__.load_seq.rst 2022-05-24 23:42:33.000000000 +0000 @@ -0,0 +1,6 @@ +load_seq +======== + +.. currentmodule:: cogent3.__init__ + +.. autofunction:: load_seq diff -Nru python-cogent-2021.10.12a1+dfsg/doc/app/align-codon.rst python-cogent-2022.5.25a1+dfsg/doc/app/align-codon.rst --- python-cogent-2021.10.12a1+dfsg/doc/app/align-codon.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/app/align-codon.rst 2022-05-24 23:42:33.000000000 +0000 @@ -90,4 +90,4 @@ .. jupyter-execute:: - aligned.info \ No newline at end of file + aligned.info diff -Nru python-cogent-2021.10.12a1+dfsg/doc/app/align-nucleotide.rst python-cogent-2022.5.25a1+dfsg/doc/app/align-nucleotide.rst --- python-cogent-2021.10.12a1+dfsg/doc/app/align-nucleotide.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/app/align-nucleotide.rst 2022-05-24 23:42:33.000000000 +0000 @@ -68,4 +68,4 @@ .. jupyter-execute:: - aligned.info \ No newline at end of file + aligned.info diff -Nru python-cogent-2021.10.12a1+dfsg/doc/app/align-protein.rst python-cogent-2022.5.25a1+dfsg/doc/app/align-protein.rst --- python-cogent-2021.10.12a1+dfsg/doc/app/align-protein.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/app/align-protein.rst 2022-05-24 23:42:33.000000000 +0000 @@ -48,4 +48,4 @@ .. jupyter-execute:: - aligned.info \ No newline at end of file + aligned.info diff -Nru python-cogent-2021.10.12a1+dfsg/doc/app/dstore.rst python-cogent-2022.5.25a1+dfsg/doc/app/dstore.rst --- python-cogent-2021.10.12a1+dfsg/doc/app/dstore.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/app/dstore.rst 2022-05-24 23:42:33.000000000 +0000 @@ -112,4 +112,4 @@ .. jupyter-execute:: - print(dstore.logs[0].read()[:225]) # truncated for clarity \ No newline at end of file + print(dstore.logs[0].read()[:225]) # truncated for clarity diff -Nru python-cogent-2021.10.12a1+dfsg/doc/app/evo-ancestral-states.rst python-cogent-2022.5.25a1+dfsg/doc/app/evo-ancestral-states.rst --- python-cogent-2021.10.12a1+dfsg/doc/app/evo-ancestral-states.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/app/evo-ancestral-states.rst 2022-05-24 23:42:33.000000000 +0000 @@ -12,7 +12,7 @@ .. jupyter-execute:: - from cogent3.app import io, evo + from cogent3.app import evo, io reader = io.load_aligned(format="fasta") aln = reader("data/primate_brca1.fasta") @@ -38,4 +38,4 @@ .. jupyter-execute:: - result.tree.get_figure(contemporaneous=True).show(width=500, height=500) \ No newline at end of file + result.tree.get_figure(contemporaneous=True).show(width=500, height=500) diff -Nru python-cogent-2021.10.12a1+dfsg/doc/app/evo-dt-nuc-model.rst python-cogent-2022.5.25a1+dfsg/doc/app/evo-dt-nuc-model.rst --- python-cogent-2021.10.12a1+dfsg/doc/app/evo-dt-nuc-model.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/app/evo-dt-nuc-model.rst 2022-05-24 23:42:33.000000000 +0000 @@ -10,7 +10,7 @@ .. jupyter-execute:: - from cogent3.app import io, evo + from cogent3.app import evo, io loader = io.load_aligned(format="fasta", moltype="dna") aln = loader("data/primate_brca1.fasta") @@ -53,4 +53,4 @@ .. jupyter-execute:: - stats["edge motif motif2 params"] \ No newline at end of file + stats["edge motif motif2 params"] diff -Nru python-cogent-2021.10.12a1+dfsg/doc/app/evo-extract-model-stats.rst python-cogent-2022.5.25a1+dfsg/doc/app/evo-extract-model-stats.rst --- python-cogent-2021.10.12a1+dfsg/doc/app/evo-extract-model-stats.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/app/evo-extract-model-stats.rst 2022-05-24 23:42:33.000000000 +0000 @@ -12,7 +12,7 @@ .. jupyter-execute:: - from cogent3.app import io, evo + from cogent3.app import evo, io loader = io.load_aligned(format="fasta", moltype="dna") aln = loader("data/primate_brca1.fasta") @@ -57,4 +57,4 @@ .. jupyter-execute:: - tabulated["motif params"] \ No newline at end of file + tabulated["motif params"] diff -Nru python-cogent-2021.10.12a1+dfsg/doc/app/evo-hypothesis.rst python-cogent-2022.5.25a1+dfsg/doc/app/evo-hypothesis.rst --- python-cogent-2021.10.12a1+dfsg/doc/app/evo-hypothesis.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/app/evo-hypothesis.rst 2022-05-24 23:42:33.000000000 +0000 @@ -10,7 +10,7 @@ .. jupyter-execute:: - from cogent3.app import io, evo, sample + from cogent3.app import evo, io, sample loader = io.load_aligned(format="fasta", moltype="dna") aln = loader("data/primate_brca1.fasta") @@ -18,10 +18,9 @@ .. jupyter-execute:: tree = "data/primate_brca1.tree" - sm_args = dict(optimise_motif_probs=True) - null = evo.model("GTR", tree=tree, sm_args=sm_args) - alt = evo.model("GN", tree=tree, sm_args=sm_args) + null = evo.model("GTR", tree=tree, optimise_motif_probs=True) + alt = evo.model("GN", tree=tree, optimise_motif_probs=True) hyp = evo.hypothesis(null, alt) result = hyp(aln) type(result) @@ -76,4 +75,4 @@ from cogent3.app.io import write_db writer = write_db("path/to/myresults.tinydb", create=True, if_exists="overwrite") - writer(result) \ No newline at end of file + writer(result) diff -Nru python-cogent-2021.10.12a1+dfsg/doc/app/evo-model.rst python-cogent-2022.5.25a1+dfsg/doc/app/evo-model.rst --- python-cogent-2021.10.12a1+dfsg/doc/app/evo-model.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/app/evo-model.rst 2022-05-24 23:42:33.000000000 +0000 @@ -94,4 +94,4 @@ .. jupyter-execute:: - fitted[3] \ No newline at end of file + fitted[3] diff -Nru python-cogent-2021.10.12a1+dfsg/doc/app/evo-model-timehet.rst python-cogent-2022.5.25a1+dfsg/doc/app/evo-model-timehet.rst --- python-cogent-2021.10.12a1+dfsg/doc/app/evo-model-timehet.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/app/evo-model-timehet.rst 2022-05-24 23:42:33.000000000 +0000 @@ -13,7 +13,7 @@ .. jupyter-execute:: from cogent3 import load_tree - from cogent3.app import io, evo + from cogent3.app import evo, io tree = load_tree("data/primate_brca1.tree") fig = tree.get_figure(contemporaneous=True) @@ -49,4 +49,4 @@ .. jupyter-execute:: - result.lf \ No newline at end of file + result.lf diff -Nru python-cogent-2021.10.12a1+dfsg/doc/app/evo-model-with-tree.rst python-cogent-2022.5.25a1+dfsg/doc/app/evo-model-with-tree.rst --- python-cogent-2021.10.12a1+dfsg/doc/app/evo-model-with-tree.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/app/evo-model-with-tree.rst 2022-05-24 23:42:33.000000000 +0000 @@ -50,4 +50,4 @@ .. jupyter-execute:: - fitted.lf \ No newline at end of file + fitted.lf diff -Nru python-cogent-2021.10.12a1+dfsg/doc/app/evo-natsel_neutral.rst python-cogent-2022.5.25a1+dfsg/doc/app/evo-natsel_neutral.rst --- python-cogent-2021.10.12a1+dfsg/doc/app/evo-natsel_neutral.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/app/evo-natsel_neutral.rst 2022-05-24 23:42:33.000000000 +0000 @@ -12,7 +12,7 @@ .. jupyter-execute:: - from cogent3.app import io, evo + from cogent3.app import evo, io loader = io.load_aligned(format="fasta", moltype="dna") aln = loader("data/primate_brca1.fasta") @@ -29,4 +29,4 @@ .. jupyter-execute:: - result.alt.lf \ No newline at end of file + result.alt.lf diff -Nru python-cogent-2021.10.12a1+dfsg/doc/app/evo-natsel_sitehet.rst python-cogent-2022.5.25a1+dfsg/doc/app/evo-natsel_sitehet.rst --- python-cogent-2021.10.12a1+dfsg/doc/app/evo-natsel_sitehet.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/app/evo-natsel_sitehet.rst 2022-05-24 23:42:33.000000000 +0000 @@ -12,7 +12,7 @@ .. jupyter-execute:: - from cogent3.app import io, evo + from cogent3.app import evo, io loader = io.load_aligned(format="fasta", moltype="dna") aln = loader("data/primate_brca1.fasta") @@ -38,4 +38,4 @@ .. jupyter-execute:: bprobs = result.alt.lf.get_bin_probs() - bprobs[:, :20] \ No newline at end of file + bprobs[:, :20] diff -Nru python-cogent-2021.10.12a1+dfsg/doc/app/evo-natsel_timehet.rst python-cogent-2022.5.25a1+dfsg/doc/app/evo-natsel_timehet.rst --- python-cogent-2021.10.12a1+dfsg/doc/app/evo-natsel_timehet.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/app/evo-natsel_timehet.rst 2022-05-24 23:42:33.000000000 +0000 @@ -10,7 +10,7 @@ .. jupyter-execute:: - from cogent3.app import io, evo + from cogent3.app import evo, io loader = io.load_aligned(format="fasta", moltype="dna") aln = loader("data/primate_brca1.fasta") @@ -27,4 +27,4 @@ .. jupyter-execute:: - result.alt.lf \ No newline at end of file + result.alt.lf diff -Nru python-cogent-2021.10.12a1+dfsg/doc/app/evo-natsel_zhang.rst python-cogent-2022.5.25a1+dfsg/doc/app/evo-natsel_zhang.rst --- python-cogent-2021.10.12a1+dfsg/doc/app/evo-natsel_zhang.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/app/evo-natsel_zhang.rst 2022-05-24 23:42:33.000000000 +0000 @@ -13,9 +13,10 @@ .. jupyter-execute:: :hide-code: + from IPython.core.display import HTML from numpy import array + from cogent3 import make_table - from IPython.core.display import HTML header = ['Site Class', 'Proportion', 'Background Edges', 'Foreground Edges'] data = {'Site Class': array(['0', '1', '2a', '2b'], dtype='gi|10047090|ref|NP_055147.1| small muscle protein, X-linked [Homo sapiens]", @@ -190,4 +190,4 @@ for name, seq in MinimalFastaParser(fasta_data, label_to_name=label_to_name): print(name) print(name.info.gi) - print(name.info.description) \ No newline at end of file + print(name.info.description) diff -Nru python-cogent-2021.10.12a1+dfsg/doc/cookbook/moltypes.rst python-cogent-2022.5.25a1+dfsg/doc/cookbook/moltypes.rst --- python-cogent-2021.10.12a1+dfsg/doc/cookbook/moltypes.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/cookbook/moltypes.rst 2022-05-24 23:42:33.000000000 +0000 @@ -106,4 +106,4 @@ from cogent3 import DNA from cogent3.core.sequence import DnaSequence - DnaSequence.moltype = DNA \ No newline at end of file + DnaSequence.moltype = DNA diff -Nru python-cogent-2021.10.12a1+dfsg/doc/cookbook/protein_sequences.rst python-cogent-2022.5.25a1+dfsg/doc/cookbook/protein_sequences.rst --- python-cogent-2021.10.12a1+dfsg/doc/cookbook/protein_sequences.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/cookbook/protein_sequences.rst 2022-05-24 23:42:33.000000000 +0000 @@ -37,4 +37,4 @@ from cogent3 import load_aligned_seqs - seq = load_aligned_seqs("data/abglobin_aa.phylip", moltype="protein") \ No newline at end of file + seq = load_aligned_seqs("data/abglobin_aa.phylip", moltype="protein") diff -Nru python-cogent-2021.10.12a1+dfsg/doc/cookbook/simple_trees.rst python-cogent-2022.5.25a1+dfsg/doc/cookbook/simple_trees.rst --- python-cogent-2021.10.12a1+dfsg/doc/cookbook/simple_trees.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/cookbook/simple_trees.rst 2022-05-24 23:42:33.000000000 +0000 @@ -348,7 +348,7 @@ .. jupyter-execute:: :hide-code: - from cogent3.util.misc import remove_files + from cogent3.util.io import remove_files remove_files(["data/temp.tree", "data/temp.pdf"], error_on_missing=False) @@ -508,4 +508,4 @@ tr1 = make_tree("(B:2,(C:3,D:4)F:5)G;") tr2 = make_tree("(C:2,(B:3,D:4)F:5)G;") - tr1.compare_by_tip_distances(tr2) \ No newline at end of file + tr1.compare_by_tip_distances(tr2) diff -Nru python-cogent-2021.10.12a1+dfsg/doc/cookbook/tables.rst python-cogent-2022.5.25a1+dfsg/doc/cookbook/tables.rst --- python-cogent-2021.10.12a1+dfsg/doc/cookbook/tables.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/cookbook/tables.rst 2022-05-24 23:42:33.000000000 +0000 @@ -817,4 +817,4 @@ for name in ("stats_tab.txt", "stats_tab.tex"): p = pathlib.Path(name) if p.exists(): - p.unlink() \ No newline at end of file + p.unlink() diff -Nru python-cogent-2021.10.12a1+dfsg/doc/cookbook/union_dict.rst python-cogent-2022.5.25a1+dfsg/doc/cookbook/union_dict.rst --- python-cogent-2021.10.12a1+dfsg/doc/cookbook/union_dict.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/cookbook/union_dict.rst 2022-05-24 23:42:33.000000000 +0000 @@ -65,4 +65,4 @@ .. jupyter-execute:: :raises: AttributeError - data.k \ No newline at end of file + data.k diff -Nru python-cogent-2021.10.12a1+dfsg/doc/cookbook/useful_utilities.rst python-cogent-2022.5.25a1+dfsg/doc/cookbook/useful_utilities.rst --- python-cogent-2021.10.12a1+dfsg/doc/cookbook/useful_utilities.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/cookbook/useful_utilities.rst 2022-05-24 23:42:33.000000000 +0000 @@ -31,7 +31,7 @@ .. jupyter-execute:: - from cogent3.maths.optimisers import minimise, maximise + from cogent3.maths.optimisers import maximise, minimise S = minimise( f, # the function @@ -176,9 +176,10 @@ .. jupyter-execute:: - from cogent3.util.misc import DistanceFromMatrix from numpy import array + from cogent3.util.misc import DistanceFromMatrix + m = array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) f = DistanceFromMatrix(m) f(0, 0) @@ -296,4 +297,4 @@ .. jupyter-execute:: :raises: ConstraintError - d["d"] = 5 \ No newline at end of file + d["d"] = 5 diff -Nru python-cogent-2021.10.12a1+dfsg/doc/cookbook/what_codes.rst python-cogent-2022.5.25a1+dfsg/doc/cookbook/what_codes.rst --- python-cogent-2021.10.12a1+dfsg/doc/cookbook/what_codes.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/cookbook/what_codes.rst 2022-05-24 23:42:33.000000000 +0000 @@ -41,4 +41,4 @@ from cogent3 import get_code gc = get_code(4) - gc \ No newline at end of file + gc diff -Nru python-cogent-2021.10.12a1+dfsg/doc/doctest2script.py python-cogent-2022.5.25a1+dfsg/doc/doctest2script.py --- python-cogent-2021.10.12a1+dfsg/doc/doctest2script.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/doctest2script.py 2022-05-24 23:42:33.000000000 +0000 @@ -8,7 +8,7 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __contributors__ = ["Gavin Huttley", "Peter Maxwell"] __license__ = "BSD-3" __version__ = "2020.2.7a" diff -Nru python-cogent-2021.10.12a1+dfsg/doc/doctest_rsts.py python-cogent-2022.5.25a1+dfsg/doc/doctest_rsts.py --- python-cogent-2021.10.12a1+dfsg/doc/doctest_rsts.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/doctest_rsts.py 2022-05-24 23:42:33.000000000 +0000 @@ -11,11 +11,10 @@ from nbconvert.preprocessors import CellExecutionError, ExecutePreprocessor -from cogent3.util.misc import atomic_write - +from cogent3.util.io import atomic_write __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" __version__ = "2020.2.7a" diff -Nru python-cogent-2021.10.12a1+dfsg/doc/draw/aln/plot_aln-coevolution.rst python-cogent-2022.5.25a1+dfsg/doc/draw/aln/plot_aln-coevolution.rst --- python-cogent-2021.10.12a1+dfsg/doc/draw/aln/plot_aln-coevolution.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/draw/aln/plot_aln-coevolution.rst 2022-05-24 23:42:33.000000000 +0000 @@ -0,0 +1,50 @@ +.. jupyter-execute:: + :hide-code: + + import set_working_directory + +Coevolution analysis +==================== + +A method on the alignment provides an interface to the simpler (and yet robust and fast) methods for estimating coevolution. The default measure is normalised mutual information (NMI). + +.. todo:: add citation for NMI + +Display coevolution as a heatmap +-------------------------------- + +Using the ``drawable`` argument causes the returned object to have a ``drawable`` attribute (type ``Drawable`` which has ``show()`` and ``write()`` methods), for the corresponding plot types -- a heatmap in this case. + +.. jupyter-execute:: + + from cogent3 import load_aligned_seqs + + aln = load_aligned_seqs("data/brca1.fasta", moltype="dna") + aln = aln.no_degenerates(motif_length=3) + aln = aln.get_translation() + aln = aln[:100] # for compute speed in testing the documentation + coevo = aln.coevolution(show_progress=False, drawable="heatmap") + coevo.drawable.show() + +.. jupyter-execute:: + :hide-code: + + outpath = set_working_directory.get_thumbnail_dir() / "plot_aln-coevolution.png" + + coevo.drawable.write(outpath) + +Display coevolution scores as a Violin plot +------------------------------------------- + +.. jupyter-execute:: + + coevo = aln.coevolution(show_progress=False, drawable="violin") + coevo.drawable.show(width=300) + +Display coevolution scores as a Boxplot +--------------------------------------- + +.. jupyter-execute:: + + coevo = aln.coevolution(show_progress=False, drawable="box") + coevo.drawable.show(width=300) diff -Nru python-cogent-2021.10.12a1+dfsg/doc/draw/aln/plot_aln-dotplot-1.rst python-cogent-2022.5.25a1+dfsg/doc/draw/aln/plot_aln-dotplot-1.rst --- python-cogent-2021.10.12a1+dfsg/doc/draw/aln/plot_aln-dotplot-1.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/draw/aln/plot_aln-dotplot-1.rst 2022-05-24 23:42:33.000000000 +0000 @@ -0,0 +1,62 @@ +.. jupyter-execute:: + :hide-code: + + import set_working_directory + +Dotplot basics +============== + +A technique (`Gibbs and McIntyre `_) for comparing sequences. All ``cogent3`` sequence collections classes (``SequenceCollection``, ``Alignment`` and ``ArrayAlignment``) have a dotplot method. + +.. todo:: Change dotplot ref to a citation + +The method returns a drawable, as demonstrated below between unaligned sequences. + +.. jupyter-execute:: + + import os + + from cogent3 import load_unaligned_seqs + + seqs = load_unaligned_seqs("data/SCA1-cds.fasta", moltype="dna") + draw = seqs.dotplot() + draw.show() + +.. jupyter-execute:: + :hide-code: + + outpath = set_working_directory.get_thumbnail_dir() / "plot_aln-dotplot-1.png" + + draw.write(outpath) + +If sequence names are not provided, two randomly chosen sequences are selected (see below). The plot title reflects the parameter values for defining a match. ``window`` is the size of the sequence segments being compared. ``threshold`` is the number of exact matches within ``window`` required for the two sequence segments to be considered a match. ``gap`` is the size of a gap between adjacent matches before merging. + +Modifying the matching parameters +--------------------------------- + +If we set window and threshold to be equal, this is equivalent to an exact match approach. + +.. jupyter-execute:: + + draw = seqs.dotplot(name1="Human", name2="Mouse", window=8, threshold=8) + draw.show() + +Displaying dotplot for the reverse complement +--------------------------------------------- + +.. jupyter-execute:: + + draw = seqs.dotplot(name1="Human", name2="Mouse", rc=True) + draw.show() + +.. note:: clicking on an entry in the legend turns it off + +Setting plot attributes +----------------------- + +I'll modify the title and figure width. + +.. jupyter-execute:: + + draw = seqs.dotplot(name1="Human", name2="Mouse", rc=True, title="SCA1", width=400) + draw.show() diff -Nru python-cogent-2021.10.12a1+dfsg/doc/draw/aln/plot_aln-dotplot-2.rst python-cogent-2022.5.25a1+dfsg/doc/draw/aln/plot_aln-dotplot-2.rst --- python-cogent-2021.10.12a1+dfsg/doc/draw/aln/plot_aln-dotplot-2.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/draw/aln/plot_aln-dotplot-2.rst 2022-05-24 23:42:33.000000000 +0000 @@ -0,0 +1,44 @@ +.. jupyter-execute:: + :hide-code: + + import set_working_directory + +Dotplot with annotated sequences +================================ + +If sequences in a dotplot have been annotated, the ``dotplot()`` method returns an ``AnnotatedDrawable``. + +Reloading from json +------------------- + +The data file, ``tp53.json``, was created from a query of ensembl for one-to-one orthologs of human TP53 between Human, Macaque, Orangutan and Marmoset. The resulting sequences were annotated with the location of the CDS for the canonical transcript, then the ``SequenceCollection`` was saved as json using ``cogent3.app.write_json``. + +.. jupyter-execute:: + + from cogent3.app.io import get_data_store, load_json + + loader = load_json() + seqs = loader("data/tp53.json") + dp = seqs.dotplot(name1="Macaque", name2="Marmoset", width=600) + dp.show() + +.. jupyter-execute:: + :hide-code: + + outpath = set_working_directory.get_thumbnail_dir() / "plot_aln-dotplot-2.png" + + dp.write(outpath) + +Removing annotation tracks +-------------------------- + +.. jupyter-execute:: + + help(dp.remove_track) + +Thus we could remove the left annotation track, for instance with + +.. jupyter-execute:: + + dp.remove_track(left_track=True) + dp.show() diff -Nru python-cogent-2021.10.12a1+dfsg/doc/draw/aln/plot_aln-gaps-per-seq.rst python-cogent-2022.5.25a1+dfsg/doc/draw/aln/plot_aln-gaps-per-seq.rst --- python-cogent-2021.10.12a1+dfsg/doc/draw/aln/plot_aln-gaps-per-seq.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/draw/aln/plot_aln-gaps-per-seq.rst 2022-05-24 23:42:33.000000000 +0000 @@ -0,0 +1,54 @@ +.. jupyter-execute:: + :hide-code: + + import set_working_directory + +Counting gaps per sequence +========================== + +We have several different ways of counting sequence gaps, and of visualising the results. By default, the ``count_gaps_per_seq()`` method returns a matrix of counts without the ability to visualise the results. When setting the argument ``unique=True``, the counts are for gaps uniquely induced by each sequence. This can be a useful indicator of highly divergent sequences. + +.. jupyter-execute:: + + from cogent3 import load_aligned_seqs + + aln = load_aligned_seqs("data/brca1.fasta", moltype="dna") + + counts = aln.count_gaps_per_seq(unique=True) + counts[10: 20] # limiting the width of the displayed output + +Plotting counts of unique gaps +------------------------------ + +Using the ``drawable`` argument causes the returned object to have a ``drawable`` attribute (type ``Drawable`` which has ``show()`` and ``write()`` methods), for the corresponding plot type. The three plot types supported are shown below. In all cases, placing the mouse pointer over a data point will show hover text with the number of unique gaps and the sequence name. + +Displaying unique gaps as a bar chart +------------------------------------- + +.. jupyter-execute:: + + counts = aln.count_gaps_per_seq(unique=True, drawable="bar") + counts.drawable.show(width=500) + +.. jupyter-execute:: + :hide-code: + + outpath = set_working_directory.get_thumbnail_dir() / "plot_aln-gaps-per-seq.png" + + counts.drawable.write(outpath) + +Displaying unique gaps as a violin plot +--------------------------------------- + +.. jupyter-execute:: + + counts = aln.count_gaps_per_seq(unique=True, drawable="violin") + counts.drawable.show(width=300, height=500) + +Displaying unique gaps as a box plot +------------------------------------ + +.. jupyter-execute:: + + counts = aln.count_gaps_per_seq(unique=True, drawable="box") + counts.drawable.show(width=300, height=500) diff -Nru python-cogent-2021.10.12a1+dfsg/doc/draw/aln/plot_aln-info-plot.rst python-cogent-2022.5.25a1+dfsg/doc/draw/aln/plot_aln-info-plot.rst --- python-cogent-2021.10.12a1+dfsg/doc/draw/aln/plot_aln-info-plot.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/draw/aln/plot_aln-info-plot.rst 2022-05-24 23:42:33.000000000 +0000 @@ -0,0 +1,42 @@ +.. jupyter-execute:: + :hide-code: + + import set_working_directory + +Information analysis of an alignment +==================================== + +Information here is in the formal sense -- maximum entropy minus the entropy at a position. This is fast to compute and is an indicator of the variability at a position. + +Illustrated with a simple example +--------------------------------- + +.. jupyter-execute:: + + from cogent3 import load_aligned_seqs, make_aligned_seqs, make_seq + + s1 = make_seq("TGATGTAAGGTAGTT", name="s1", moltype="dna") + s2 = make_seq("--CTGGAAGGGT---", name="s2", moltype="dna") + + seqs = make_aligned_seqs(data=[s1, s2], array_align=False, moltype="dna") + draw = seqs.information_plot(window=2, include_gap=True) + draw.show(width=500, height=400) + +On a sample data set +-------------------- + +Clicking on any of the legend items causes that to disappear from the plot. + +.. jupyter-execute:: + + aln = load_aligned_seqs("data/brca1.fasta", moltype="protein") + + fig = aln.information_plot(stat="median") + fig.show(width=500, height=400) + +.. jupyter-execute:: + :hide-code: + + outpath = set_working_directory.get_thumbnail_dir() / "plot_aln-info-plot.png" + + fig.write(outpath) diff -Nru python-cogent-2021.10.12a1+dfsg/doc/draw/aln/plot_aln-seqlogo.rst python-cogent-2022.5.25a1+dfsg/doc/draw/aln/plot_aln-seqlogo.rst --- python-cogent-2021.10.12a1+dfsg/doc/draw/aln/plot_aln-seqlogo.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/draw/aln/plot_aln-seqlogo.rst 2022-05-24 23:42:33.000000000 +0000 @@ -0,0 +1,57 @@ +.. jupyter-execute:: + :hide-code: + + import set_working_directory + +Sequence logos +============== + +Sequence logo's display sequence information. They're extensively applied to transcription factor binding site (TFBS) display. They can also be applied to sequence alignments more generally. + +Drawing logo for a TFBS +----------------------- + +We use the TFBS for the TATA box binding protein. + +.. jupyter-execute:: + + from cogent3 import load_aligned_seqs + from cogent3.parse import jaspar + + _, pwm = jaspar.read("data/tbp.jaspar") + freqarr = pwm.to_freq_array() + freqarr[:5] # illustrating the contents of the MotifFreqsArray + +.. jupyter-execute:: + + logo = freqarr.logo() + logo.show(height=250, width=500) + +.. jupyter-execute:: + :hide-code: + + outpath = set_working_directory.get_thumbnail_dir() / "plot_aln-seqlogo.png" + + logo.write(outpath) + +Drawing a sequence logo from a multiple sequence alignment +---------------------------------------------------------- + +This can be done for an entire alignment, but bear in mind it can take some time to render. Note that we include gap characters in the display. + +.. jupyter-execute:: + + aln = load_aligned_seqs("data/brca1-bats.fasta", moltype="dna") + l = aln[:311].seqlogo(height=300, width=500, wrap=60, vspace=0.05) + l.show() + +Sequence logo of protein alignment +---------------------------------- + +No difference here except it uses the built-in colour scheme from the protein ``MolType``. + +.. jupyter-execute:: + + aa = aln.get_translation(incomplete_ok=True)[:120] + logo = aa.seqlogo(width=500, height=300, wrap=50, vspace=0.1) + logo.show() diff -Nru python-cogent-2021.10.12a1+dfsg/doc/draw/index.rst python-cogent-2022.5.25a1+dfsg/doc/draw/index.rst --- python-cogent-2021.10.12a1+dfsg/doc/draw/index.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/draw/index.rst 2022-05-24 23:42:33.000000000 +0000 @@ -0,0 +1,28 @@ +############# +Image Gallery +############# + +We use `Plotly `_ as our backend for visualisation. It provides excellent graph interactivity in Jupyter notebooks. + +********************** +Alignments & Sequences +********************** + +.. nbgallery:: + :name: rst-aln-gallery + :glob: + :reversed: + + aln/* + +****************** +Phylogenetic Trees +****************** + +.. nbgallery:: + :name: rst-tree-gallery + :glob: + :reversed: + + tree/* + diff -Nru python-cogent-2021.10.12a1+dfsg/doc/draw/tree/plot_tree-angular.rst python-cogent-2022.5.25a1+dfsg/doc/draw/tree/plot_tree-angular.rst --- python-cogent-2021.10.12a1+dfsg/doc/draw/tree/plot_tree-angular.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/draw/tree/plot_tree-angular.rst 2022-05-24 23:42:33.000000000 +0000 @@ -0,0 +1,34 @@ +.. jupyter-execute:: + :hide-code: + + import set_working_directory + +Angular Dendrogram Style +======================== + +This is a left-right style. You'll note that there's overlap of edges at the bottom -- a known issue with this display style. + +.. jupyter-execute:: + + from cogent3.app import io + + reader = io.load_json() + + ens_tree = reader("data/GN-tree.json") + fig = ens_tree.get_figure(style="angular", width=600, height=600) + fig.show() + +With Contemporaneous Tips +------------------------- + +.. jupyter-execute:: + + fig.contemporaneous = True + fig.show() + +.. jupyter-execute:: + :hide-code: + + outpath = set_working_directory.get_thumbnail_dir() / "plot_tree-angular.png" + + fig.write(outpath) diff -Nru python-cogent-2021.10.12a1+dfsg/doc/draw/tree/plot_tree-circular.rst python-cogent-2022.5.25a1+dfsg/doc/draw/tree/plot_tree-circular.rst --- python-cogent-2021.10.12a1+dfsg/doc/draw/tree/plot_tree-circular.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/draw/tree/plot_tree-circular.rst 2022-05-24 23:42:33.000000000 +0000 @@ -0,0 +1,50 @@ +.. jupyter-execute:: + :hide-code: + + import set_working_directory + +Circular Dendrogram Style +========================= + +I modify values for the ``scale_bar`` placement and for ``label_pad``. The latter controls the spacing between the tip ends and the label text. + +.. jupyter-execute:: + + from cogent3.app import io + + reader = io.load_json() + + ens_tree = reader("data/GN-tree.json") + fig = ens_tree.get_figure("circular", width=600, height=600) + fig.scale_bar = "top right" + fig.label_pad = 0.1 + fig.show() + +Colouring a set of edges +------------------------ + +.. jupyter-execute:: + + fig.style_edges( + "AfricanEl", + tip2="Manatee", + legendgroup="Afrotheria", + line=dict(color="magenta", width=2), + ) + fig.show(width=650, height=600) + +With Contemporaneous Tips +------------------------- + +.. jupyter-execute:: + + fig.contemporaneous = True + fig.label_pad = 0.3 + fig.show(width=650, height=600) + +.. jupyter-execute:: + :hide-code: + + outpath = set_working_directory.get_thumbnail_dir() / "plot_tree-circular.png" + + fig.write(outpath) diff -Nru python-cogent-2021.10.12a1+dfsg/doc/draw/tree/plot_tree-radial.rst python-cogent-2022.5.25a1+dfsg/doc/draw/tree/plot_tree-radial.rst --- python-cogent-2021.10.12a1+dfsg/doc/draw/tree/plot_tree-radial.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/draw/tree/plot_tree-radial.rst 2022-05-24 23:42:33.000000000 +0000 @@ -0,0 +1,33 @@ +.. jupyter-execute:: + :hide-code: + + import set_working_directory + +Radial Dendrogram Style +======================= + +.. jupyter-execute:: + + from cogent3.app import io + + reader = io.load_json() + + ens_tree = reader("data/GN-tree.json") + fig = ens_tree.get_figure("radial", width=600, height=600) + fig.show() + +.. jupyter-execute:: + :hide-code: + + outpath = set_working_directory.get_thumbnail_dir() / "plot_tree-radial.png" + + fig.write(outpath) + +With Contemporaneous Tips +------------------------- + +.. jupyter-execute:: + + fig.contemporaneous = True + fig.label_pad = 0.23 + fig.show() diff -Nru python-cogent-2021.10.12a1+dfsg/doc/draw/tree/plot_tree-square.rst python-cogent-2022.5.25a1+dfsg/doc/draw/tree/plot_tree-square.rst --- python-cogent-2021.10.12a1+dfsg/doc/draw/tree/plot_tree-square.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/draw/tree/plot_tree-square.rst 2022-05-24 23:42:33.000000000 +0000 @@ -0,0 +1,50 @@ +.. jupyter-execute:: + :hide-code: + + import set_working_directory + +Square Dendrogram Style +======================= + +We use a tree saved in ``json`` format from a likelihood function analysis of a non-stationary model. The tree was derived such that the the branch lengths are now "ENS". + +.. note:: I change the scale bar placement. Valid values are ``"top left"``, ``"top right"``, ``"bottom left"`` (default), or ``"bottom right"``. + +.. jupyter-execute:: + + from cogent3.app import io + + reader = io.load_json() + + ens_tree = reader("data/GN-tree.json") + fig = ens_tree.get_figure(width=600, height=600) + fig.scale_bar = "top right" + fig.show() + +.. jupyter-execute:: + :hide-code: + + outpath = set_working_directory.get_thumbnail_dir() / "plot_tree-square.png" + + fig.write(outpath) + +Colouring a set of edges +------------------------ + +.. jupyter-execute:: + + fig.style_edges( + "AfricanEl", + tip2="Manatee", + legendgroup="Afrotheria", + line=dict(color="magenta"), + ) + fig.show() + +With Contemporaneous Tips +------------------------- + +.. jupyter-execute:: + + fig.contemporaneous = True + fig.show() diff -Nru python-cogent-2021.10.12a1+dfsg/doc/draw/tree/plot_tree-support.rst python-cogent-2022.5.25a1+dfsg/doc/draw/tree/plot_tree-support.rst --- python-cogent-2021.10.12a1+dfsg/doc/draw/tree/plot_tree-support.rst 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/draw/tree/plot_tree-support.rst 2022-05-24 23:42:33.000000000 +0000 @@ -0,0 +1,40 @@ +.. jupyter-execute:: + :hide-code: + + import set_working_directory + +Showing Bootstrap Support +========================= + +We use a tree saved in ``json`` format from a 100 replicate bootstrap resamplings. The ``show_support=True`` argument controls whether or not to display support. The ``threshold=0.8`` argument indicates only nodes with a support level ≤0.8 will have support text displayed. + +.. jupyter-execute:: + + from cogent3.app import io + + reader = io.load_json() + + tree = reader("data/tree-with-support.json") + fig = tree.get_figure(show_support=True, threshold=0.8) + fig.scale_bar = None + fig.show(width=500, height=400) + +Change the placement of support text +------------------------------------ + +The support text is positioned relative to the ``x``, ``y`` coordinates of the tree node. Control over support text placement is achieved using the ``support_xshift`` and ``support_yshift`` attributes. These are expressed in terms of pixels. + +To place the support text internal to the node, we set the ``yshift=0`` (so at the same y-value of the node) and xshift it to the right using a positive integer. + +.. jupyter-execute:: + + fig.support_xshift = 15 + fig.support_yshift = 0 + fig.show(width=500, height=400) + +.. jupyter-execute:: + :hide-code: + + outpath = set_working_directory.get_thumbnail_dir() / "plot_tree-support.png" + + fig.write(outpath) diff -Nru python-cogent-2021.10.12a1+dfsg/doc/draw_examples/aln/plot_aln-coevolution.py python-cogent-2022.5.25a1+dfsg/doc/draw_examples/aln/plot_aln-coevolution.py --- python-cogent-2021.10.12a1+dfsg/doc/draw_examples/aln/plot_aln-coevolution.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/draw_examples/aln/plot_aln-coevolution.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,33 +0,0 @@ -""" -Evaluating coevolution -====================== - -A method on the alignment provides an interface to the simpler (and yet robust and fast) methods for estimating coevolution. The default measure is normalised mutual information (NMI). -""" -#%% -# Display coevolution as a heatmap -# ################################ - -from cogent3 import load_aligned_seqs - - -aln = load_aligned_seqs("../../data/brca1.fasta", moltype="dna") -aln = aln.no_degenerates(motif_length=3) -aln = aln.get_translation() -aln = aln[:100] # for compute speed in testing the documentation -coevo = aln.coevolution(show_progress=False, drawable="heatmap") -coevo.show() - -#%% -# Display coevolution scores as a Violin plot -# ########################################### - -coevo = aln.coevolution(show_progress=False, drawable="violin") -coevo.show(width=300) - -#%% -# Display coevolution scores as a Boxplot -# ####################################### - -coevo = aln.coevolution(show_progress=False, drawable="box") -coevo.show(width=300) diff -Nru python-cogent-2021.10.12a1+dfsg/doc/draw_examples/aln/plot_aln-dotplot-1.py python-cogent-2022.5.25a1+dfsg/doc/draw_examples/aln/plot_aln-dotplot-1.py --- python-cogent-2021.10.12a1+dfsg/doc/draw_examples/aln/plot_aln-dotplot-1.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/draw_examples/aln/plot_aln-dotplot-1.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,53 +0,0 @@ -""" -Dotplot -======= - -A technique (`Gibbs and McIntyre `_) for comparing sequences. All ``cogent3`` sequence collections classes (``SequenceCollection``, ``Alignment`` and ``ArrayAlignment``) have a dotplot method. - -The method returns a drawable, as demonstrated below between unaligned sequences. -""" - -# %% -import os - -from cogent3 import load_unaligned_seqs - - -seqs = load_unaligned_seqs("../../data/SCA1-cds.fasta", moltype="dna") -draw = seqs.dotplot() -draw.show() - -#%% -# If sequence names are not provided, two randomly chosen sequences are selected (see below). The plot title reflects the parameter values for defining a match. ``window`` is the size of the sequence segments being compared. ``threshold`` is the number of exact matches within ``window`` required for the two sequence segments to be considered a match. ``gap`` is the size of a gap between adjacent matches before merging. -# -# Modifying the matching parameters -# ################################# -# -# If we set window and threshold to be equal, this is equivalent to an exact match approach. - -draw = seqs.dotplot(name1="Human", name2="Mouse", window=8, threshold=8) -draw.show() - -#%% -# Displaying dotplot for the reverse complement -# ############################################# - -draw = seqs.dotplot(name1="Human", name2="Mouse", rc=True) -draw.show() - -#%% -# .. note:: clicking on an entry in the legend turns it off -# -# Setting plot attributes -# ####################### -# -# I'll modify the title and figure width. - -draw = seqs.dotplot(name1="Human", name2="Mouse", rc=True, title="SCA1", width=400) -draw.show() - -#%% -# All options -# ########### - -help(seqs.dotplot) diff -Nru python-cogent-2021.10.12a1+dfsg/doc/draw_examples/aln/plot_aln-dotplot-2.py python-cogent-2022.5.25a1+dfsg/doc/draw_examples/aln/plot_aln-dotplot-2.py --- python-cogent-2021.10.12a1+dfsg/doc/draw_examples/aln/plot_aln-dotplot-2.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/draw_examples/aln/plot_aln-dotplot-2.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,34 +0,0 @@ -""" -Dotplot with annotated sequences -================================ - -If sequences in a dotplot have been annotated, the `dotplot()` method returns an `AnnotatedDrawable`. -""" -#%% -# Reloading from json -# ################### -# -# The data file, `tp53.json`, was created from a query of ensembl for one-to-one orthologs of human TP53 between Human, Macaque, Orangutan and Marmoset. The resulting sequences were annotated with the location of the CDS for the canonical transcript, then the `SequenceCollection` was saved as json using `cogent3.app.write_json`. - -from cogent3.app.io import get_data_store, load_json - - -loader = load_json() -seqs = loader("../../data/tp53.json") -dp = seqs.dotplot(name1="Macaque", name2="Marmoset", width=600) -dp.show() - -#%% -# Removing annotation tracks -# ########################## - -help(dp.remove_track) - -#%% -# Thus we could remove the left annotation track, for instance with -# -# ```python -# dp.remove_track(left_track=True) -# ``` -# -# For some reason, the display of this result is broken on RTD so we don't do it here. diff -Nru python-cogent-2021.10.12a1+dfsg/doc/draw_examples/aln/plot_aln-gaps-per-seq.py python-cogent-2022.5.25a1+dfsg/doc/draw_examples/aln/plot_aln-gaps-per-seq.py --- python-cogent-2021.10.12a1+dfsg/doc/draw_examples/aln/plot_aln-gaps-per-seq.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/draw_examples/aln/plot_aln-gaps-per-seq.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,40 +0,0 @@ -""" -Counting gaps per sequence -========================== - -We have several different ways of counting sequence gaps, and of visualising the results. By default, the `count_gaps_per_seq()` method returns a matrix of counts without the ability to visualise the results. When setting the argument `unique=True`, the counts are for gaps uniquely induced by each sequence. This can be a useful indicator of highly divergent sequences. -""" - -#%% -from cogent3 import load_aligned_seqs - - -aln = load_aligned_seqs('../../data/brca1.fasta', moltype='dna') - -counts = aln.count_gaps_per_seq(unique=True) -counts - -#%% -# Plotting counts of unique gaps -# ############################## -# -# There are three plot types supported. In all cases, placing the mouse pointer over a data point will show hover text with the sequence name. - -#%% -# Displaying unique gaps as a bar chart -# ************************************* - -counts = aln.count_gaps_per_seq(unique=True, drawable='bar') -counts.show(width=500) -#%% -# Displaying unique gaps as a violin plot -# *************************************** - -counts = aln.count_gaps_per_seq(unique=True, drawable='violin') -counts.show(width=300, height=500) -#%% -# Displaying unique gaps as a box plot -# ************************************ - -counts = aln.count_gaps_per_seq(unique=True, drawable='box') -counts.show(width=300, height=500) diff -Nru python-cogent-2021.10.12a1+dfsg/doc/draw_examples/aln/plot_aln-info-plot.py python-cogent-2022.5.25a1+dfsg/doc/draw_examples/aln/plot_aln-info-plot.py --- python-cogent-2021.10.12a1+dfsg/doc/draw_examples/aln/plot_aln-info-plot.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/draw_examples/aln/plot_aln-info-plot.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,32 +0,0 @@ -""" -Information analysis of an alignment -==================================== - -Information here is in the formal sense -- maximum entropy minus the entropy at a position. This is fast to compute and is an indicator of the variability at a position. -""" -#%% -# Illustrated with a simple example -# ################################# - -from cogent3 import load_aligned_seqs, make_aligned_seqs, make_seq - - -s1 = make_seq('TGATGTAAGGTAGTT', name='s1', moltype="dna") -s2 = make_seq('--CTGGAAGGGT---', name='s2', moltype="dna") - -seqs = make_aligned_seqs(data=[s1, s2], array_align=False, - moltype='dna') -draw = seqs.information_plot(window=2, include_gap=True) -draw.show(width=500, height=400) - -#%% -# On a sample data set -# ******************** -# -# Clicking on any of the legend items causes that to disappear from the plot. - -aln = load_aligned_seqs('../../data/brca1.fasta', moltype='protein') - -fig = aln.information_plot(stat='median') -# sphinx_gallery_thumbnail_number = 2 -fig.show(width=500, height=400) diff -Nru python-cogent-2021.10.12a1+dfsg/doc/draw_examples/aln/plot_aln-seqlogo.py python-cogent-2022.5.25a1+dfsg/doc/draw_examples/aln/plot_aln-seqlogo.py --- python-cogent-2021.10.12a1+dfsg/doc/draw_examples/aln/plot_aln-seqlogo.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/draw_examples/aln/plot_aln-seqlogo.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,45 +0,0 @@ -""" -Draw sequence logos -=================== - -Sequence logo's display sequence information. They're extensively applied to transcription factor binding site (TFBS) display. They can also be applied to sequence alignments more generally. -""" -#%% -# Drawing logo for a TFBS -# ####################### -# -# We use the TFBS for the TAT box binding protein. - -from cogent3 import load_aligned_seqs -from cogent3.parse import jaspar - - -_, pwm = jaspar.read("../../data/tbp.jaspar") -freqarr = pwm.to_freq_array() -freqarr[:5] # illustrating the contents of the MotifFreqsArray - - -# %% -logo = freqarr.logo() -logo.show(height=250, width=500) - -#%% -# Drawing a sequence logo from a multiple sequence alignment -# ########################################################## -# -# This can be done for an entire alignment, but bear in mind it can take some time to render. Note that we include gap characters in the display. - - -aln = load_aligned_seqs("../../data/brca1-bats.fasta", moltype="dna") -l = aln[:311].seqlogo(height=300, width=500, wrap=60, vspace=0.05) -l.show() - -#%% -# Sequence logo of protein alignment -# ################################## -# -# No difference here except it uses the built-in colour scheme from the protein `MolType`. - -aa = aln.get_translation(incomplete_ok=True)[:120] -logo = aa.seqlogo(width=500, height=300, wrap=50, vspace=0.1) -logo.show() diff -Nru python-cogent-2021.10.12a1+dfsg/doc/draw_examples/aln/README.rst python-cogent-2022.5.25a1+dfsg/doc/draw_examples/aln/README.rst --- python-cogent-2021.10.12a1+dfsg/doc/draw_examples/aln/README.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/draw_examples/aln/README.rst 1970-01-01 00:00:00.000000000 +0000 @@ -1,4 +0,0 @@ -********************** -Alignments & Sequences -********************** - diff -Nru python-cogent-2021.10.12a1+dfsg/doc/draw_examples/README.rst python-cogent-2022.5.25a1+dfsg/doc/draw_examples/README.rst --- python-cogent-2021.10.12a1+dfsg/doc/draw_examples/README.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/draw_examples/README.rst 1970-01-01 00:00:00.000000000 +0000 @@ -1,5 +0,0 @@ -############# -Image Gallery -############# - -We use `Plotly `_ as our backend for visualisation. It provides excellent graph interactivity in Jupyter notebooks. diff -Nru python-cogent-2021.10.12a1+dfsg/doc/draw_examples/tree/plot_tree-angular.py python-cogent-2022.5.25a1+dfsg/doc/draw_examples/tree/plot_tree-angular.py --- python-cogent-2021.10.12a1+dfsg/doc/draw_examples/tree/plot_tree-angular.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/draw_examples/tree/plot_tree-angular.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,22 +0,0 @@ -""" -Display a Phylogenetic Tree with a Angular Dendrogram Style -=========================================================== - -This is a left-right style. You'll note that there's overlap of edges at the bottom -- a known issue with this display style. -""" -# %% -from cogent3.app import io - - -reader = io.load_json() - -ens_tree = reader("../../data/GN-tree.json") -fig = ens_tree.get_figure(style="angular", width=600, height=600) -fig.show() - -#%% -# With Contemporaneous Tips -# ######################### - -fig.contemporaneous = True -fig.show() diff -Nru python-cogent-2021.10.12a1+dfsg/doc/draw_examples/tree/plot_tree-circular.py python-cogent-2022.5.25a1+dfsg/doc/draw_examples/tree/plot_tree-circular.py --- python-cogent-2021.10.12a1+dfsg/doc/draw_examples/tree/plot_tree-circular.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/draw_examples/tree/plot_tree-circular.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,30 +0,0 @@ -""" -Display a Phylogenetic Tree with a Circular Dendrogram Style -============================================================ -""" -# %% -from cogent3.app import io - - -reader = io.load_json() - -ens_tree = reader("../../data/GN-tree.json") -fig = ens_tree.get_figure("circular", width=600, height=600) -fig.show() - -#%% -# Colouring a set of edges -# ######################## - -fig.style_edges("AfricanEl", tip2="Manatee", legendgroup="Afrotheria", - line=dict(color="magenta", width=2)) -fig.show() - -#%% -# With Contemporaneous Tips -# ######################### - -fig.contemporaneous = True -fig.label_pad = 0.23 -# sphinx_gallery_thumbnail_number = 3 -fig.show(width=550, height=500) diff -Nru python-cogent-2021.10.12a1+dfsg/doc/draw_examples/tree/plot_tree-radial.py python-cogent-2022.5.25a1+dfsg/doc/draw_examples/tree/plot_tree-radial.py --- python-cogent-2021.10.12a1+dfsg/doc/draw_examples/tree/plot_tree-radial.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/draw_examples/tree/plot_tree-radial.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,21 +0,0 @@ -""" -Display a Phylogenetic Tree with a Radial Dendrogram Style -========================================================== -""" -# %% -from cogent3.app import io - - -reader = io.load_json() - -ens_tree = reader("../../data/GN-tree.json") -fig = ens_tree.get_figure("radial", width=600, height=600) -fig.show() - -#%% -# With Contemporaneous Tips -# ######################### - -fig.contemporaneous = True -fig.label_pad = 0.23 -fig.show() diff -Nru python-cogent-2021.10.12a1+dfsg/doc/draw_examples/tree/plot_tree-square.py python-cogent-2022.5.25a1+dfsg/doc/draw_examples/tree/plot_tree-square.py --- python-cogent-2021.10.12a1+dfsg/doc/draw_examples/tree/plot_tree-square.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/draw_examples/tree/plot_tree-square.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,37 +0,0 @@ -""" -Display a Phylogenetic Tree with a Square Dendrogram Style -========================================================== - -We use a tree saved in `json` format from a likelihood function analysis of a non-stationary model. The tree was derived such that the the branch lengths are now "ENS". -""" -# %% -from cogent3.app import io - - -reader = io.load_json() - -ens_tree = reader("../../data/GN-tree.json") -fig = ens_tree.get_figure(width=600, height=600) -fig.show() - -#%% -# Changing scale bar placement -# ############################ - -fig.scale_bar = "top right" -fig.show() - -#%% -# Colouring a set of edges -# ######################## - -fig.style_edges("AfricanEl", tip2="Manatee", legendgroup="Afrotheria", - line=dict(color="magenta")) -fig.show() - -#%% -# With Contemporaneous Tips -# ######################### - -fig.contemporaneous = True -fig.show() diff -Nru python-cogent-2021.10.12a1+dfsg/doc/draw_examples/tree/plot_tree-support.py python-cogent-2022.5.25a1+dfsg/doc/draw_examples/tree/plot_tree-support.py --- python-cogent-2021.10.12a1+dfsg/doc/draw_examples/tree/plot_tree-support.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/draw_examples/tree/plot_tree-support.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,28 +0,0 @@ -""" -Display a Phylogenetic Tree Showing Bootstrap Support -===================================================== - -We use a tree saved in `json` format from a 100 replicate bootstrap resamplings. The `show_support=True` argument controls whether or not to display support. The `threshold=0.8` argument indicates only nodes with a support level ≤0.8 will have support text displayed. -""" -# %% -from cogent3.app import io - - -reader = io.load_json() - -tree = reader("../../data/tree-with-support.json") -fig = tree.get_figure(show_support=True, threshold=0.8) -fig.scale_bar = None -fig.show(width=500, height=400) - -#%% -# Change the placement of support text -# #################################### -# -# The support text is positioned relative to the `x`, `y` coordinates of the tree node. Control over support text placement is achieved using the `support_xshift` and `support_yshift` attributes. These are expressed in terms of pixels. -# -# To place the support text internal to the node, we set the yshift=0 (so at the same y-value of the node) and xshift it to the right using a positive integer. - -fig.support_xshift = 15 -fig.support_yshift = 0 -fig.show(width=500, height=400) diff -Nru python-cogent-2021.10.12a1+dfsg/doc/draw_examples/tree/README.rst python-cogent-2022.5.25a1+dfsg/doc/draw_examples/tree/README.rst --- python-cogent-2021.10.12a1+dfsg/doc/draw_examples/tree/README.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/draw_examples/tree/README.rst 1970-01-01 00:00:00.000000000 +0000 @@ -1,4 +0,0 @@ -****************** -Phylogenetic Trees -****************** - diff -Nru python-cogent-2021.10.12a1+dfsg/doc/examples/align_codons_to_protein.rst python-cogent-2022.5.25a1+dfsg/doc/examples/align_codons_to_protein.rst --- python-cogent-2021.10.12a1+dfsg/doc/examples/align_codons_to_protein.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/examples/align_codons_to_protein.rst 2022-05-24 23:42:33.000000000 +0000 @@ -7,7 +7,7 @@ .. jupyter-execute:: - from cogent3 import make_unaligned_seqs, make_aligned_seqs + from cogent3 import make_aligned_seqs, make_unaligned_seqs First I'm going to construct an artificial example, using the seqs dict as a means to get the data into the Alignment object. The basic idea, however, is that you should already have a set of DNA sequences that are in frame (i.e. position 0 is the 1st codon position), you've translated those sequences and aligned these translated sequences. The result is an alignment of aa sequences and a set of unaligned DNA sequences from which the aa seqs were derived. If your sequences are not in frame you can adjust it by either slicing, or adding N's to the beginning of the raw string. @@ -39,4 +39,4 @@ } aligned_aa = make_aligned_seqs(aligned_aa_seqs, moltype="protein") aligned_DNA = aligned_aa.replace_seqs(unaligned_DNA) - aligned_DNA \ No newline at end of file + aligned_DNA diff -Nru python-cogent-2021.10.12a1+dfsg/doc/examples/calculate_neigbourjoining_tree.rst python-cogent-2022.5.25a1+dfsg/doc/examples/calculate_neigbourjoining_tree.rst --- python-cogent-2021.10.12a1+dfsg/doc/examples/calculate_neigbourjoining_tree.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/examples/calculate_neigbourjoining_tree.rst 2022-05-24 23:42:33.000000000 +0000 @@ -55,4 +55,4 @@ import os - os.remove("test_nj.tree") \ No newline at end of file + os.remove("test_nj.tree") diff -Nru python-cogent-2021.10.12a1+dfsg/doc/examples/calculate_pairwise_distances.rst python-cogent-2022.5.25a1+dfsg/doc/examples/calculate_pairwise_distances.rst --- python-cogent-2021.10.12a1+dfsg/doc/examples/calculate_pairwise_distances.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/examples/calculate_pairwise_distances.rst 2022-05-24 23:42:33.000000000 +0000 @@ -64,4 +64,4 @@ import os for file_name in "dists_for_phylo.phylip", "dists_for_phylo.pickle": - os.remove(file_name) \ No newline at end of file + os.remove(file_name) diff -Nru python-cogent-2021.10.12a1+dfsg/doc/examples/calculate_UPGMA_cluster.rst python-cogent-2022.5.25a1+dfsg/doc/examples/calculate_UPGMA_cluster.rst --- python-cogent-2021.10.12a1+dfsg/doc/examples/calculate_UPGMA_cluster.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/examples/calculate_UPGMA_cluster.rst 2022-05-24 23:42:33.000000000 +0000 @@ -15,8 +15,8 @@ .. jupyter-execute:: from cogent3 import load_aligned_seqs - from cogent3.evolve import distance from cogent3.cluster.UPGMA import upgma + from cogent3.evolve import distance Import a substitution model (or create your own) @@ -57,4 +57,4 @@ import os - os.remove("test_upgma.tree") \ No newline at end of file + os.remove("test_upgma.tree") diff -Nru python-cogent-2021.10.12a1+dfsg/doc/examples/codon_models.rst python-cogent-2022.5.25a1+dfsg/doc/examples/codon_models.rst --- python-cogent-2021.10.12a1+dfsg/doc/examples/codon_models.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/examples/codon_models.rst 2022-05-24 23:42:33.000000000 +0000 @@ -212,9 +212,10 @@ .. jupyter-execute:: :hide-code: + from IPython.core.display import HTML from numpy import array + from cogent3 import make_table - from IPython.core.display import HTML header = ['Site Class', 'Proportion', 'Background Edges', 'Foreground Edges'] data = {'Site Class': array(['0', '1', '2a', '2b'], dtype=' 0. .. [2] This environment variable is created by the PBS system on executing the job script. .. [3] You can check your execution of the script is correct by validating you get all the ranks up to one minus the number of CPUs you requested. - diff -Nru python-cogent-2021.10.12a1+dfsg/doc/examples/period_estimation.rst python-cogent-2022.5.25a1+dfsg/doc/examples/period_estimation.rst --- python-cogent-2021.10.12a1+dfsg/doc/examples/period_estimation.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/examples/period_estimation.rst 2022-05-24 23:42:33.000000000 +0000 @@ -257,4 +257,4 @@ seq_to_symbols=seq_to_symbols, ) print(stat) - p < 0.1 \ No newline at end of file + p < 0.1 diff -Nru python-cogent-2021.10.12a1+dfsg/doc/examples/phylo_by_ls.rst python-cogent-2022.5.25a1+dfsg/doc/examples/phylo_by_ls.rst --- python-cogent-2021.10.12a1+dfsg/doc/examples/phylo_by_ls.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/examples/phylo_by_ls.rst 2022-05-24 23:42:33.000000000 +0000 @@ -16,10 +16,11 @@ :hide-code: import pickle + from cogent3 import load_aligned_seqs from cogent3.evolve import distance - from cogent3.evolve.models import HKY85 from cogent3.evolve.fast_distance import DistanceMatrix + from cogent3.evolve.models import HKY85 al = load_aligned_seqs("data/long_testseqs.fasta") d = distance.EstimateDistances(al, submodel=HKY85()) @@ -31,6 +32,7 @@ .. jupyter-execute:: import pickle + from cogent3.phylo import least_squares Now load the distance data. @@ -148,4 +150,4 @@ import os - os.remove("dists_for_phylo.pickle") \ No newline at end of file + os.remove("dists_for_phylo.pickle") diff -Nru python-cogent-2021.10.12a1+dfsg/doc/examples/rate_heterogeneity.rst python-cogent-2022.5.25a1+dfsg/doc/examples/rate_heterogeneity.rst --- python-cogent-2021.10.12a1+dfsg/doc/examples/rate_heterogeneity.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/examples/rate_heterogeneity.rst 2022-05-24 23:42:33.000000000 +0000 @@ -14,8 +14,8 @@ .. jupyter-execute:: - from cogent3.evolve.substitution_model import TimeReversibleNucleotide from cogent3 import load_tree + from cogent3.evolve.substitution_model import TimeReversibleNucleotide Make an alignment with equal split between rates 0.6 and 0.2, and then concatenate them to create a new alignment. @@ -58,4 +58,4 @@ lf = model.make_likelihood_function(tree, bins=4) lf.set_param_rule("bprobs", is_constant=True) lf.set_alignment(aln3) - lf.optimise(local=True, max_restarts=2, show_progress=False) \ No newline at end of file + lf.optimise(local=True, max_restarts=2, show_progress=False) diff -Nru python-cogent-2021.10.12a1+dfsg/doc/examples/relative_rate.rst python-cogent-2022.5.25a1+dfsg/doc/examples/relative_rate.rst --- python-cogent-2021.10.12a1+dfsg/doc/examples/relative_rate.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/examples/relative_rate.rst 2022-05-24 23:42:33.000000000 +0000 @@ -100,4 +100,4 @@ print("Likelihood ratio statistic = ", LR) print("degrees-of-freedom = ", df) - print("probability = ", P) \ No newline at end of file + print("probability = ", P) diff -Nru python-cogent-2021.10.12a1+dfsg/doc/examples/scope_model_params_on_trees.rst python-cogent-2022.5.25a1+dfsg/doc/examples/scope_model_params_on_trees.rst --- python-cogent-2021.10.12a1+dfsg/doc/examples/scope_model_params_on_trees.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/examples/scope_model_params_on_trees.rst 2022-05-24 23:42:33.000000000 +0000 @@ -209,4 +209,4 @@ lf.set_param_rule("omega", is_independent=True) lf.optimise(local=True, show_progress=False) - lf \ No newline at end of file + lf diff -Nru python-cogent-2021.10.12a1+dfsg/doc/examples/seq_features.rst python-cogent-2022.5.25a1+dfsg/doc/examples/seq_features.rst --- python-cogent-2021.10.12a1+dfsg/doc/examples/seq_features.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/examples/seq_features.rst 2022-05-24 23:42:33.000000000 +0000 @@ -62,4 +62,4 @@ .. jupyter-execute:: - exon1[0:3].get_slice() \ No newline at end of file + exon1[0:3].get_slice() diff -Nru python-cogent-2021.10.12a1+dfsg/doc/examples/simple.rst python-cogent-2022.5.25a1+dfsg/doc/examples/simple.rst --- python-cogent-2021.10.12a1+dfsg/doc/examples/simple.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/examples/simple.rst 2022-05-24 23:42:33.000000000 +0000 @@ -12,8 +12,8 @@ .. jupyter-execute:: - from cogent3.evolve.models import get_model from cogent3 import load_aligned_seqs, make_tree + from cogent3.evolve.models import get_model model = get_model("HKY85") aln = load_aligned_seqs("data/primate_cdx2_promoter.fasta") @@ -21,4 +21,4 @@ lf = model.make_likelihood_function(tree) lf.set_alignment(aln) lf.optimise(show_progress=False) - lf \ No newline at end of file + lf diff -Nru python-cogent-2021.10.12a1+dfsg/doc/examples/simulate_alignment.rst python-cogent-2022.5.25a1+dfsg/doc/examples/simulate_alignment.rst --- python-cogent-2021.10.12a1+dfsg/doc/examples/simulate_alignment.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/examples/simulate_alignment.rst 2022-05-24 23:42:33.000000000 +0000 @@ -8,6 +8,7 @@ .. jupyter-execute:: import sys + from cogent3 import make_tree from cogent3.evolve.models import get_model @@ -32,4 +33,4 @@ .. jupyter-execute:: simulated = lf.simulate_alignment(sequence_length=1000) - simulated \ No newline at end of file + simulated diff -Nru python-cogent-2021.10.12a1+dfsg/doc/examples/testing_multi_loci.rst python-cogent-2022.5.25a1+dfsg/doc/examples/testing_multi_loci.rst --- python-cogent-2021.10.12a1+dfsg/doc/examples/testing_multi_loci.rst 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/examples/testing_multi_loci.rst 2022-05-24 23:42:33.000000000 +0000 @@ -12,10 +12,10 @@ .. jupyter-execute:: - from cogent3 import load_aligned_seqs, make_tree, make_table + from cogent3 import load_aligned_seqs, make_table, make_tree from cogent3.evolve.models import HKY85 - from cogent3.recalculation.scope import EACH, ALL from cogent3.maths.stats import chisqprob + from cogent3.recalculation.scope import ALL, EACH aln = load_aligned_seqs("data/long_testseqs.fasta") half = len(aln) // 2 @@ -63,4 +63,4 @@ make_table( header=["LR", "df", "p"], rows=[[LR, df, chisqprob(LR, df)]], digits=2, space=3, - ) \ No newline at end of file + ) diff -Nru python-cogent-2021.10.12a1+dfsg/doc/Makefile python-cogent-2022.5.25a1+dfsg/doc/Makefile --- python-cogent-2021.10.12a1+dfsg/doc/Makefile 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/Makefile 2022-05-24 23:42:33.000000000 +0000 @@ -34,13 +34,11 @@ clean: -rm -rf _build/* - -rm -rf draw/ html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) _build/html @echo @echo "Build finished. The HTML pages are in _build/html." - -rm -rf draw/ github: @make html diff -Nru python-cogent-2021.10.12a1+dfsg/doc/rtd-environment.yml python-cogent-2022.5.25a1+dfsg/doc/rtd-environment.yml --- python-cogent-2021.10.12a1+dfsg/doc/rtd-environment.yml 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/rtd-environment.yml 1970-01-01 00:00:00.000000000 +0000 @@ -1,26 +0,0 @@ -name: cogent3 -channels: - - plotly - - conda-forge -dependencies: - - pandoc - - python=3.7 - - matplotlib - - plotly - - plotly-orca - - pip - - pip: - - numpydoc - - psutil - - pillow - - ipython - - ipykernel - - ipywidgets - - sphinx>=1.6 - - nbsphinx - - nbformat - - nbconvert!=5.4 - - sphinxcontrib-bibtex - - sphinx-gallery - - sphinx_bootstrap_theme - - ../ diff -Nru python-cogent-2021.10.12a1+dfsg/doc/set_working_directory.py python-cogent-2022.5.25a1+dfsg/doc/set_working_directory.py --- python-cogent-2021.10.12a1+dfsg/doc/set_working_directory.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/doc/set_working_directory.py 2022-05-24 23:42:33.000000000 +0000 @@ -6,7 +6,7 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" __version__ = "2020.2.7a" @@ -28,5 +28,12 @@ raise RuntimeError(f"could not find data dir from {current}") +def get_thumbnail_dir(): + """returns path to directory for writing html thumbnail images""" + thumbdir = pathlib.Path(__file__).parent / "_build" / "html" / "_images" + thumbdir.mkdir(exist_ok=True, parents=True) + return thumbdir + + data_dir = get_data_dir() os.chdir(data_dir) diff -Nru python-cogent-2021.10.12a1+dfsg/.github/dependabot.yml python-cogent-2022.5.25a1+dfsg/.github/dependabot.yml --- python-cogent-2021.10.12a1+dfsg/.github/dependabot.yml 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/.github/dependabot.yml 2022-05-24 23:42:33.000000000 +0000 @@ -1,8 +1,8 @@ version: 2 updates: -- package-ecosystem: pip - directory: "/" - schedule: - interval: daily - time: "19:00" - open-pull-requests-limit: 10 + - package-ecosystem: pip + directory: "/" + schedule: + interval: weekly + time: "19:00" + open-pull-requests-limit: 10 diff -Nru python-cogent-2021.10.12a1+dfsg/.github/workflows/testing_develop.yml python-cogent-2022.5.25a1+dfsg/.github/workflows/testing_develop.yml --- python-cogent-2021.10.12a1+dfsg/.github/workflows/testing_develop.yml 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/.github/workflows/testing_develop.yml 2022-05-24 23:42:33.000000000 +0000 @@ -14,7 +14,7 @@ strategy: matrix: os: [macos-latest, ubuntu-latest, windows-latest] - python-version: [3.7, 3.8, 3.9] + python-version: ["3.7", "3.8", "3.9", "3.10"] steps: - uses: "actions/checkout@v2" @@ -29,15 +29,12 @@ - name: "Installs for ${{ matrix.python-version }}" run: | python --version - python -m pip install --upgrade pip wheel setuptools flit - python -m pip install --upgrade tox tox-gh-actions - python -m pip install -r requirements.txt + pip install --upgrade pip wheel setuptools flit + pip install --upgrade nox + + - name: "Run nox for ${{ matrix.python-version }}" + run: "nox -s test-${{ matrix.python-version }}" - - name: "Run tox targets for ${{ matrix.python-version }}" - run: tox - env: - PLATFORM: ${{ matrix.platform }} - - name: Upload coverage to Codecov uses: codecov/codecov-action@v1 with: diff -Nru python-cogent-2021.10.12a1+dfsg/.gitignore python-cogent-2022.5.25a1+dfsg/.gitignore --- python-cogent-2021.10.12a1+dfsg/.gitignore 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/.gitignore 2022-05-24 23:42:33.000000000 +0000 @@ -25,6 +25,7 @@ # Unit test / coverage reports .coverage* .tox +.nox coverage.xml junit-*.xml nosetests.xml @@ -42,6 +43,7 @@ __pycache__ *.code-workspace *.wpu +.vscode/* # vi .*.swp diff -Nru python-cogent-2021.10.12a1+dfsg/.hgignore python-cogent-2022.5.25a1+dfsg/.hgignore --- python-cogent-2021.10.12a1+dfsg/.hgignore 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/.hgignore 2022-05-24 23:42:33.000000000 +0000 @@ -25,6 +25,7 @@ *.patch *.pytest_cache *.tox +*.nox *.vscode *.code-workspace coverage.xml diff -Nru python-cogent-2021.10.12a1+dfsg/.hgtags python-cogent-2022.5.25a1+dfsg/.hgtags --- python-cogent-2021.10.12a1+dfsg/.hgtags 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/.hgtags 2022-05-24 23:42:33.000000000 +0000 @@ -20,3 +20,8 @@ b306abbf306afd5b63932244b816862210bdc371 2021.04.20a 9b3797a262b6c880803b32f7633ae3b9ff831f6c 2021.5.7a a206748ce4632ae95320494f55612def65d2055c 2021.10.12a +f0d15f22c57c71f0d8dc5d4b75e49c210914a11f 2021.10.12a1 +0000000000000000000000000000000000000000 2021.10.12a1 +5bb2691a5c8921d2e2a49704e8269b1a6b9c00a0 2021.10.12a1 +f75363e6bab1ebaee7eeb8ae6802ab6ee59008e7 2022.4.15a1 +131b41f6b6fe4996cde1ea995db49d144b201b5e 2022.4.20a1 diff -Nru python-cogent-2021.10.12a1+dfsg/noxfile.py python-cogent-2022.5.25a1+dfsg/noxfile.py --- python-cogent-2021.10.12a1+dfsg/noxfile.py 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/noxfile.py 2022-05-24 23:42:33.000000000 +0000 @@ -0,0 +1,61 @@ +import pathlib + +import nox + + +dependencies = ( + "numba>0.54", + "chardet", + "numpy", + "tinydb", + "tqdm", + "click", + "pytest", + "scitrack", + "pandas", + "plotly", + "pytest-cov", +) + +_py_versions = range(7, 11) + + +@nox.session(python=[f"3.{v}" for v in _py_versions]) +def test(session): + py_version = session.python.replace(".", "") + session.install(*dependencies) + session.install(".") + session.chdir("tests") + session.run( + "pytest", + "-x", + "--junitxml", + f"junit-{py_version}.xml", + "--cov-report", + "xml", + "--cov", + "cogent3", + "--ignore", + "test_app_mpi.py", + ) + + +@nox.session(python=[f"3.{v}" for v in _py_versions]) +def testmpi(session): + session.install(*dependencies + ("mpi4py",)) + py = pathlib.Path(session.bin_paths[0]) / "python" + session.install(".") + session.chdir("tests") + session.run( + "mpiexec", + "-n", + "2", + str(py), + "-m", + "mpi4py.futures", + "-m", + "pytest", + "-x", + "test_app/test_app_mpi.py", + external=True, + ) diff -Nru python-cogent-2021.10.12a1+dfsg/pyproject.toml python-cogent-2022.5.25a1+dfsg/pyproject.toml --- python-cogent-2021.10.12a1+dfsg/pyproject.toml 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/pyproject.toml 2022-05-24 23:42:33.000000000 +0000 @@ -1,20 +1,23 @@ [build-system] -requires = ["flit_core >=2,<4"] +requires = ["flit_core >=3.2,<4"] build-backend = "flit_core.buildapi" -[tool.flit.metadata] -module = "cogent3" -author = "Gavin Huttley" -author-email = "Gavin.Huttley@anu.edu.au" -home-page = "https://cogent3.org" -keywords = "biology, genomics, statistics, phylogeny, evolution, bioinformatics" -description-file = "README.md" -license = "BSD-3" -requires-python = ">=3.7" -requires = ["chardet", +[project] +name = "cogent3" +description = """COmparative GENomics Toolkit 3: genomic sequence analysis within notebooks or on compute systems with 1000s of CPUs.""" +dynamic = ["version"] +authors = [ + { name = "Gavin Huttley", email = "Gavin.Huttley@anu.edu.au"}, +] +keywords = ["biology", "genomics", "statistics", "phylogeny", "evolution", "bioinformatics"] +readme = "README.md" +license = { file = "LICENSE" } +requires-python = ">=3.7,<3.11" +dependencies = ["chardet", "numpy", "numba>0.48.0;python_version<'3.9'", "numba>0.53; python_version>='3.9'", + "numba>0.54; python_version>='3.10'", "scitrack", "tqdm", "tinydb"] @@ -28,25 +31,22 @@ "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", ] -[tool.flit.metadata.urls] +[project.urls] Documentation = "https://www.cogent3.org/" "Bug Tracker" = "https://github.com/cogent3/cogent3/issues" "Source Code" = "https://github.com/cogent3/cogent3" Changelog = "https://github.com/cogent3/cogent3/blob/develop/changelog.md" -[tool.flit.sdist] -include = ["doc/", "requirements.txt", "src/*", "pyproject.toml"] -exclude = ["doc/*.html"] - -[tool.flit.metadata.requires-extra] +[project.optional-dependencies] test = [ "black", "isort", "pytest", "pytest-cov", - "tox"] + "nox"] doc = ["click", "ipykernel", "ipython", @@ -57,8 +57,7 @@ "jupytext", "kaleido", "matplotlib", - "nbconvert", - "nbconvert!=5.4", + "nbconvert>5.4", "nbformat", "nbsphinx", "numpydoc", @@ -67,11 +66,11 @@ "psutil", "sphinx", "sphinx-autobuild", - "sphinx-gallery==0.7", + "sphinx-gallery", "sphinx>=1.6", "sphinx_bootstrap_theme", "sphinx_panels", - "sphinxcontrib-bibtex<2.0.0"] + "sphinxcontrib-bibtex"] extra = ["pandas", "plotly", "psutil", "kaleido"] dev = ["black", "click", @@ -86,10 +85,10 @@ "jupytext", "kaleido", "matplotlib", - "nbconvert", - "nbconvert!=5.4", + "nbconvert>5.4", "nbformat", "nbsphinx", + "nox", "numpydoc", "pandas", "pillow", @@ -99,29 +98,29 @@ "pytest-cov", "sphinx", "sphinx-autobuild", - "sphinx-gallery==0.7", - "sphinx>=1.6", + "sphinx-gallery", "sphinx_bootstrap_theme", "sphinx_panels", - "sphinxcontrib-bibtex", - "sphinxcontrib-bibtex<2.0.0", - "tox"] + "sphinxcontrib-bibtex"] + +[tool.flit.sdist] +include = ["doc/", "requirements.txt", "src/*", "pyproject.toml"] +exclude = ["doc/*.html"] [tool.pytest.ini_options] -testpaths = [ - "tests", -] +addopts = ["--strict-config", "-ra"] +testpaths = "tests" [tool.black] line-length = 88 -target-version = ['py37', 'py38', 'py39'] +target-version = ["py37", "py38", "py39", "py310"] exclude = ''' /( \.eggs | \.git | \.hg | \.mypy_cache - | \.tox + | \.nox | \.venv | _build | build diff -Nru python-cogent-2021.10.12a1+dfsg/README.md python-cogent-2022.5.25a1+dfsg/README.md --- python-cogent-2021.10.12a1+dfsg/README.md 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/README.md 2022-05-24 23:42:33.000000000 +0000 @@ -1,9 +1,13 @@ +[![PyPI version](https://badge.fury.io/py/cogent3.svg)](https://badge.fury.io/py/cogent3) +[![Downloads](https://pepy.tech/badge/cogent3/month)](https://pepy.tech/project/cogent3) + [![Build Status](https://github.com/cogent3/cogent3/workflows/CI/badge.svg?branch=develop)](https://github.com/cogent3/cogent3/actions?workflow=CI) [![codecov](https://codecov.io/gh/cogent3/cogent3/branch/master/graph/badge.svg)](https://codecov.io/gh/cogent3/cogent3) -![Using Black Formatting](https://img.shields.io/badge/code%20style-black-000000.svg) -[![Python 3.7+](https://img.shields.io/badge/python-3.7+-blue.svg)](https://www.python.org/downloads/release/python-370/) [![Language grade: Python](https://img.shields.io/lgtm/grade/python/g/cogent3/cogent3.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/cogent3/cogent3/context:python) +[![Python 3.7+](https://img.shields.io/badge/python-3.7+-blue.svg)](https://www.python.org/downloads/release/python-370/) +![Using Black Formatting](https://img.shields.io/badge/code%20style-black-000000.svg) + ## `cogent3` `cogent3` is a mature python library for analysis of genomic sequence data. We endeavour to provide a first-class experience within Jupyter notebooks, but the algorithms also support parallel execution on compute systems with 1000's of processors. diff -Nru python-cogent-2021.10.12a1+dfsg/.readthedocs.yml python-cogent-2022.5.25a1+dfsg/.readthedocs.yml --- python-cogent-2021.10.12a1+dfsg/.readthedocs.yml 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/.readthedocs.yml 1970-01-01 00:00:00.000000000 +0000 @@ -1,4 +0,0 @@ -version: 2 - -conda: - environment: doc/rtd-environment.yml diff -Nru python-cogent-2021.10.12a1+dfsg/requirements.txt python-cogent-2022.5.25a1+dfsg/requirements.txt --- python-cogent-2021.10.12a1+dfsg/requirements.txt 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/requirements.txt 2022-05-24 23:42:33.000000000 +0000 @@ -1,8 +1,7 @@ chardet==4.0.0 -pillow>=8.3.2 -psutil==5.8.0 -scipy==1.5.4; python_version < '3.7' -scipy==1.6.1; python_version > '3.6' -tox==3.23.1 -tox-gh-actions==2.5.0 +pillow==9.0.1 +psutil==5.9.0 +scipy==1.7.3 +nox==2022.1.7 +plotly==5.5.0 .[dev] diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/align/align.py python-cogent-2022.5.25a1+dfsg/src/cogent3/align/align.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/align/align.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/align/align.py 2022-05-24 23:42:33.000000000 +0000 @@ -10,10 +10,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/align/compare_numba.py python-cogent-2022.5.25a1+dfsg/src/cogent3/align/compare_numba.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/align/compare_numba.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/align/compare_numba.py 2022-05-24 23:42:33.000000000 +0000 @@ -2,10 +2,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Stephen Ma"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/align/dp_calculation.py python-cogent-2022.5.25a1+dfsg/src/cogent3/align/dp_calculation.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/align/dp_calculation.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/align/dp_calculation.py 2022-05-24 23:42:33.000000000 +0000 @@ -10,10 +10,10 @@ __author__ = "Gavin Huttley and Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttleuy" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -78,7 +78,7 @@ self.rate = rate def __repr__(self): - return "Bin(Pi, Qd, %s, %s)" % (self.rate, vars(self.indel)) + return f"Bin(Pi, Qd, {self.rate}, {vars(self.indel)})" class AnnotateFloatDefn(CalculationDefn): diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/align/indel_model.py python-cogent-2022.5.25a1+dfsg/src/cogent3/align/indel_model.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/align/indel_model.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/align/indel_model.py 2022-05-24 23:42:33.000000000 +0000 @@ -7,10 +7,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/align/indel_positions.py python-cogent-2022.5.25a1+dfsg/src/cogent3/align/indel_positions.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/align/indel_positions.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/align/indel_positions.py 2022-05-24 23:42:33.000000000 +0000 @@ -1,8 +1,8 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -205,14 +205,14 @@ for (i, preds) in enumerate(pred_sets): # print i, preds for pred in preds: - print(" ", ("node%s -> node%s" % (pred, i)), file=dot) + print(" ", (f"node{pred} -> node{i}"), file=dot) if i == 0: label = "START" elif i == len(pred_sets) - 1: label = "END" else: label = str(i) - print(" ", ("node%s" % i), '[label="%s"]' % label, file=dot) + print(" ", (f"node{i}"), f'[label="{label}"]', file=dot) print("}", file=dot) print("", file=dot) diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/align/__init__.py python-cogent-2022.5.25a1+dfsg/src/cogent3/align/__init__.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/align/__init__.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/align/__init__.py 2022-05-24 23:42:33.000000000 +0000 @@ -22,10 +22,10 @@ ] __author__ = "" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Jeremy Widmann", "Gavin Huttley", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/align/pairwise_pogs_numba.py python-cogent-2022.5.25a1+dfsg/src/cogent3/align/pairwise_pogs_numba.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/align/pairwise_pogs_numba.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/align/pairwise_pogs_numba.py 2022-05-24 23:42:33.000000000 +0000 @@ -5,10 +5,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Stephen Ma"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/align/pairwise.py python-cogent-2022.5.25a1+dfsg/src/cogent3/align/pairwise.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/align/pairwise.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/align/pairwise.py 2022-05-24 23:42:33.000000000 +0000 @@ -29,10 +29,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -182,7 +182,7 @@ def __str__(self): return "".join( - "(%s,%s)%s" % (x, y, ".xym"[dx + 2 * dy]) + f"({x},{y}){'.xym'[dx + 2 * dy]}" for (state, (x, y), (dx, dy)) in self.tlist ) @@ -474,7 +474,7 @@ self.leaf = leaf def __repr__(self): - return "AlPOG(%s,%s)" % (self.pog.all_jumps, repr(self.leaf)) + return f"AlPOG({self.pog.all_jumps},{repr(self.leaf)})" def get_alignment(self): from cogent3 import make_aligned_seqs @@ -542,7 +542,7 @@ self._pog = None def __repr__(self): - return "AlSeq(%s)" % (getattr(self, "seq", "?")) + return f"AlSeq({getattr(self, 'seq', '?')})" def get_pog(self): if self._pog is None: @@ -825,7 +825,7 @@ else: msg = "dp" if memory > 500: - warnings.warn("%s will use > %sMb." % (msg, memory)) + warnings.warn(f"{msg} will use > {memory}Mb.") track = encoder.get_empty_array(problem_dimensions) else: track = encoder = None @@ -1024,7 +1024,7 @@ ) def __repr__(self): - return "%s(%s)" % (type(self).__name__, ", ".join(self.as_tuple)) + return f"{type(self).__name__}({', '.join(self.as_tuple)})" def __hash__(self): return hash(self.as_tuple) diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/align/pairwise_seqs_numba.py python-cogent-2022.5.25a1+dfsg/src/cogent3/align/pairwise_seqs_numba.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/align/pairwise_seqs_numba.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/align/pairwise_seqs_numba.py 2022-05-24 23:42:33.000000000 +0000 @@ -5,10 +5,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Stephen Ma"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/align/progressive.py python-cogent-2022.5.25a1+dfsg/src/cogent3/align/progressive.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/align/progressive.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/align/progressive.py 2022-05-24 23:42:33.000000000 +0000 @@ -5,10 +5,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -25,14 +25,7 @@ ests_from_pairwise=True, param_vals=None, ): - """Returns a multiple alignment and tree. - - Uses the provided substitution model and a tree for determining the - progressive order. If a tree is not provided a Neighbour Joining tree is - constructed from pairwise distances estimated from pairwise aligning the - sequences. If running in parallel, only the distance estimation is - parallelised and only the master CPU returns the alignment and tree, other - CPU's return None, None. + """Returns a multiple sequence alignment and tree. Parameters ---------- @@ -49,6 +42,14 @@ named key, value pairs for model parameters. These override ests_from_pairwise. + Notes + ----- + Uses a tree for determining the progressive order. If a tree is not + provided, a Neighbour Joining tree is constructed from pairwise + distances estimated (using the provided substitution model) from pairwise + aligning the sequences. + + Parameters and tree are added to ``.info["align_params"]``. """ from cogent3 import get_model @@ -99,7 +100,7 @@ numbers = dcalc.get_param_values(param) param_vals[param] = numbers.median - ui.display("Doing %s alignment" % ["progressive", "pairwise"][two_seqs]) + ui.display(f"Doing {['progressive', 'pairwise'][two_seqs]} alignment") with LF.updates_postponed(): for param, val in list(param_vals.items()): LF.set_param_rule(param, value=val, is_constant=True) diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/align/pycompare.py python-cogent-2022.5.25a1+dfsg/src/cogent3/align/pycompare.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/align/pycompare.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/align/pycompare.py 2022-05-24 23:42:33.000000000 +0000 @@ -8,10 +8,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/align/traceback.py python-cogent-2022.5.25a1+dfsg/src/cogent3/align/traceback.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/align/traceback.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/align/traceback.py 2022-05-24 23:42:33.000000000 +0000 @@ -7,10 +7,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Rob Knight", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/app/align.py python-cogent-2022.5.25a1+dfsg/src/cogent3/app/align.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/app/align.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/app/align.py 2022-05-24 23:42:33.000000000 +0000 @@ -26,10 +26,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/app/composable.py python-cogent-2022.5.25a1+dfsg/src/cogent3/app/composable.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/app/composable.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/app/composable.py 2022-05-24 23:42:33.000000000 +0000 @@ -8,16 +8,19 @@ import traceback from copy import deepcopy +from functools import wraps -import scitrack +from scitrack import CachingLogger from cogent3 import make_aligned_seqs, make_unaligned_seqs from cogent3.core.alignment import SequenceCollection +from cogent3.util import parallel as PAR from cogent3.util import progress_display as UI +from cogent3.util.io import open_ from cogent3.util.misc import ( extend_docstring_from, get_object_provenance, - open_, + in_jupyter, ) from .data_store import ( @@ -26,20 +29,22 @@ RAISE, SKIP, DataStoreMember, - SingleReadDataStore, WritableDirectoryDataStore, + get_data_source, ) __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" +from ..util.warning import discontinued + def _make_logfile_name(process): text = str(process) @@ -51,21 +56,6 @@ return result -def _get_source(source): - if isinstance(source, str): - return str(source) - - # todo maybe a dict? see about getting keys - try: - result = source.source - except AttributeError: - try: - result = source.info.source - except AttributeError: - result = None - return result - - def _get_origin(origin): return origin if type(origin) == str else origin.__class__.__name__ @@ -89,7 +79,7 @@ # todo this approach to caching persistent arguments for reconstruction # is fragile. Need an inspect module based approach origin = _get_origin(origin) - source = _get_source(source) + source = get_data_source(source) d = locals() d = {k: v for k, v in d.items() if k != "cls"} result = int.__new__(cls, False) @@ -203,13 +193,13 @@ # rules operating on result but not part of a chain self._checkpointable = False self._load_checkpoint = None - self._formatted = ["type='%s'" % self._type] + self._formatted = [f"type='{self._type}'"] def __str__(self): txt = "" if not self.input else str(self.input) if txt: txt += " + " - txt += "%s(%s)" % (self.__class__.__name__, ", ".join(self._formatted)) + txt += f"{self.__class__.__name__}({', '.join(self._formatted)})" txt = textwrap.fill( txt, width=80, break_long_words=False, break_on_hyphens=False ) @@ -240,13 +230,11 @@ v = v.name except AttributeError: pass - try: - get_ipython() + + if in_jupyter(): if p == "kwargs" and v == {"store_history": True, "silent": False}: continue - except NameError: - pass - formatted.append("%s=%r" % (p, v)) + formatted.append(f"{p}={v!r}") self._formatted += formatted def __add__(self, other): @@ -376,11 +364,11 @@ self, dstore, parallel=False, - mininterval=2, par_kw=None, - logger=True, + logger=None, cleanup=False, ui=None, + **kwargs, ): """invokes self composable function on the provided data store @@ -397,15 +385,12 @@ par_kw dict of values for configuring parallel execution. logger - Argument ignored if not an io.writer. A scitrack logger, a logfile - name or True. If True, a scitrack logger is created with a name that - defaults to the composable function names and the process ID, - e.g. load_unaligned-progressive_align-write_seqs-pid6962.log. - If string, that name is used as the logfile name. Otherwise the - logger is used as is. + Argument ignored if not an io.writer. If a scitrack logger not provided, + one is created with a name that defaults to the composable function names + and the process ID. cleanup : bool - after copying of log files into the data store, they are deleted - from their original location + after copying of log files into the data store, it is deleted + from the original location Returns ------- @@ -416,6 +401,9 @@ If run in parallel, this instance serves as the master object and aggregates results. """ + if "mininterval" in kwargs: + discontinued("argument", "mininterval", "2022.10", stack_level=1) + if isinstance(dstore, str): dstore = [dstore] @@ -423,29 +411,24 @@ if not dstore: raise ValueError("dstore is empty") - start = time.time() - loggable = hasattr(self, "data_store") - if ( - not loggable - or type(logger) != scitrack.CachingLogger - and type(logger) != str - and logger != True - ): - LOGGER = None - elif type(logger) == scitrack.CachingLogger: - LOGGER = logger - elif type(logger) == str: - LOGGER = scitrack.CachingLogger - LOGGER.log_file_path = logger - else: - log_file_path = pathlib.Path(_make_logfile_name(self)) - src = pathlib.Path(self.data_store.source) - log_file_path = src.parent / log_file_path - LOGGER = scitrack.CachingLogger() - LOGGER.log_file_path = str(log_file_path) - if LOGGER: - LOGGER.log_message(str(self), label="composable function") - LOGGER.log_versions(["cogent3"]) + am_writer = hasattr(self, "data_store") + if am_writer: + start = time.time() + if logger is None: + logger = CachingLogger() + elif not isinstance(logger, CachingLogger): + raise TypeError( + f"logger must be scitrack.CachingLogger, not {type(logger)}" + ) + + if not logger.log_file_path: + src = pathlib.Path(self.data_store.source) + logger.log_file_path = str(src.parent / _make_logfile_name(self)) + + log_file_path = str(logger.log_file_path) + logger.log_message(str(self), label="composable function") + logger.log_versions(["cogent3"]) + results = [] process = self.input or self if self.input: @@ -457,52 +440,83 @@ self.input = None # with a tinydb dstore, this also excludes data that failed to complete - todo = [m for m in dstore if not self.job_done(m)] - - for i, result in enumerate( - ui.imap( - process, todo, parallel=parallel, par_kw=par_kw, mininterval=mininterval + # todo update to consider different database backends + # we want a dict mapping input member names to their md5 sums so these can + # be logged + inputs = {} + for m in dstore: + input_id = pathlib.Path( + m if isinstance(m, DataStoreMember) else get_data_source(m) ) - ): - outcome = result if process is self else self(result) - results.append(outcome) - if LOGGER: - member = todo[i] - # ensure member is a DataStoreMember instance - if not isinstance(member, DataStoreMember): - member = SingleReadDataStore(member)[0] - - mem_id = self.data_store.make_relative_identifier(member.name) - src = self.data_store.make_relative_identifier(result) - assert ( - src == mem_id - ), f"mismatched input data and result identifiers: {src} != {mem_id}" - - LOGGER.log_message(member, label="input") - if member.md5: - LOGGER.log_message(member.md5, label="input md5sum") - - if outcome: - member = self.data_store.get_member(mem_id) - LOGGER.log_message(member, label="output") - LOGGER.log_message(member.md5, label="output md5sum") + suffixes = input_id.suffixes + input_id = input_id.name.replace("".join(suffixes), "") + inputs[input_id] = m + + if len(inputs) < len(dstore): + diff = len(dstore) - len(inputs) + raise ValueError( + f"could not construct unique identifiers for {diff} records, " + "avoid using '.' as a delimiter in names." + ) + + if parallel: + par_kw = par_kw or {} + to_do = PAR.as_completed(process, inputs.values(), **par_kw) + else: + to_do = map(process, inputs.values()) + + for result in ui.series(to_do, count=len(inputs)): + if process is not self and am_writer: + # if result is NotCompleted, it will be written as incomplete + # by data store backend. The outcome is just the + # associated db identifier for tracking steps below we need to + # know it's NotCompleted. + # Note: we directly call .write() so NotCompleted's don't + # get blocked from being written by __call__() + outcome = self.write(data=result) + if result and isinstance(outcome, DataStoreMember): + input_id = outcome.name else: - # we have a NotCompletedResult - try: - # tinydb supports storage - self.data_store.write_incomplete(mem_id, outcome.to_rich_dict()) - except AttributeError: - pass - LOGGER.log_message( + input_id = get_data_source(result) + outcome = result + input_id = pathlib.Path(pathlib.Path(input_id)) + suffixes = input_id.suffixes + input_id = input_id.name.replace("".join(suffixes), "") + elif process is not self: + outcome = self(result) + else: + outcome = result + + results.append(outcome) + + if am_writer: + # now need to search for the source member + m = inputs[input_id] + input_md5 = getattr(m, "md5", None) + logger.log_message(input_id, label="input") + if input_md5: + logger.log_message(input_md5, label="input md5sum") + + if isinstance(outcome, NotCompleted): + # log error/fail details + logger.log_message( f"{outcome.origin} : {outcome.message}", label=outcome.type ) + continue + elif not outcome: + # other cases where outcome is Falsy (e.g. None) + logger.log_message(f"unexpected value {outcome!r}", label="FAIL") + continue + + logger.log_message(outcome, label="output") + logger.log_message(outcome.md5, label="output md5sum") - finish = time.time() - taken = finish - start - if LOGGER: - LOGGER.log_message(f"{taken}", label="TIME TAKEN") - LOGGER.shutdown() - log_file_path = str(log_file_path) + if am_writer: + finish = time.time() + taken = finish - start + + logger.log_message(f"{taken}", label="TIME TAKEN") + logger.shutdown() self.data_store.add_file(log_file_path, cleanup=cleanup, keep_suffix=True) self.data_store.close() @@ -656,14 +670,14 @@ identifier = self._make_output_identifier(data) exists = identifier in self.data_store if exists and self._if_exists == RAISE: - msg = "'%s' already exists" % identifier + msg = f"'{identifier}' already exists" raise RuntimeError(msg) if self._if_exists == OVERWRITE: exists = False return exists - def write(self, data): + def write(self, data) -> DataStoreMember: # over-ride in subclass raise NotImplementedError @@ -724,7 +738,10 @@ args = self._args + args kwargs_ = deepcopy(self._kwargs) kwargs_.update(kwargs) - return self._user_func(*args, **kwargs_) + # the following enables a decorated user function (via @appify()) + # or directly passed user function + func = getattr(self._user_func, "__wrapped__", self._user_func) + return func(*args, **kwargs_) def __str__(self): txt = "" if not self.input else str(self.input) @@ -740,45 +757,45 @@ return str(self) -class appify: +@extend_docstring_from(ComposableType.__init__, pre=True) +def appify(input_types, output_types, data_types=None): """function decorator for generating user apps. Simplifies creation of - user_function() instancese, e.g. + user_function() instances, e.g. >>> @appify(SEQUENCE_TYPE, SEQUENCE_TYPE, data_types="SequenceCollection") ... def omit_seqs(seqs, quantile=None, gap_fraction=1, moltype="dna"): ... return seqs.omit_bad_seqs(quantile=quantile, gap_fraction=gap_fraction, moltype="dna") ... - `omit_seqs()` is now an app factory, allowing creating variants of the app. + ``omit_seqs()`` is now an app factory, allowing creating variants of the app. >>> omit_bad = omit_seqs(quantile=0.95) - omit_bad is now a composable user_function app. Calling with different + ``omit_bad`` is now a composable ``user_function`` app. Calling with different args/kwargs values returns a variant app, as per the behaviour of builtin apps. + """ + # the 3 nested functions are required to allow setting decorator arguments + # to allow using functools.wraps so the decorated function has the correct + # docstring, name etc... And, the final inner one gets to pass the + # reference to the wrapped function (wrapped_ref) to user_function. This + # latter is required to enable pickling of the user_function instance. + def enclosed(func): + @wraps(func) + def maker(*args, **kwargs): + # construct the user_function app + return user_function( + wrapped_ref, + input_types, + output_types, + *args, + data_types=data_types, + **kwargs, + ) - @extend_docstring_from(ComposableType.__init__) - def __init__(self, input_types, output_types, data_types=None) -> None: - self._it = input_types - self._ot = output_types - self._dt = data_types - self._func = None - - def __call__(self, func): - # executed on use as decorator - self._func = func - # makes the returned reference have the name, docs etc. - # of original function - self._make_app.__func__.__doc__ = f"appify: {func.__doc__}" - self._make_app.__func__.__repr__ = lambda x: repr(func) - self._make_app.__func__.__name__ = func.__name__ - self._make_app.__func__.__module__ = func.__module__ - - return self._make_app - - def _make_app(self, *args, **kwargs): - # construct the user_function app - return user_function( - self._func, self._it, self._ot, *args, data_types=self._dt, **kwargs - ) + wrapped_ref = maker + + return maker + + return enclosed diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/app/data_store.py python-cogent-2022.5.25a1+dfsg/src/cogent3/app/data_store.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/app/data_store.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/app/data_store.py 2022-05-24 23:42:33.000000000 +0000 @@ -3,6 +3,7 @@ import os import pathlib import re +import reprlib import shutil import weakref import zipfile @@ -21,22 +22,18 @@ from tinydb.storages import JSONStorage from cogent3.util.deserialise import deserialise_not_completed -from cogent3.util.misc import ( - atomic_write, - extend_docstring_from, - get_format_suffixes, - open_, -) +from cogent3.util.io import atomic_write, get_format_suffixes, open_ +from cogent3.util.misc import extend_docstring_from from cogent3.util.parallel import is_master_process from cogent3.util.table import Table from cogent3.util.union_dict import UnionDict __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -49,6 +46,29 @@ IGNORE = "ignore" +def get_data_source(data) -> str: + """identifies attribute of data named 'source' + + Notes + ----- + Alignment objects have a source element in their info dict + """ + if isinstance(data, (str, pathlib.Path)): + return str(data) + + if hasattr(data, "source"): + return str(data.source) + + if hasattr(data, "info"): + return get_data_source(data.info) + + if isinstance(data, dict): + value = data.get("source") + return str(value) if value else None + + return None + + def make_record_for_json(identifier, data, completed): """returns a dict for storage as json""" try: @@ -310,7 +330,7 @@ @property def members(self): if not self._members: - pattern = "%s/**/*.%s" % (self.source, self.suffix) + pattern = f"{self.source}/**/*.{self.suffix}" paths = glob.iglob(pattern, recursive=True) members = [] for i, path in enumerate(paths): @@ -357,7 +377,7 @@ def members(self): if os.path.exists(self.source) and not self._members: source_path = self.source.replace(Path(self.source).suffix, "") - pattern = "*.%s" % self.suffix + pattern = f"*.{self.suffix}" members = [] with zipfile.ZipFile(self.source) as archive: names = archive.namelist() @@ -411,12 +431,11 @@ def make_relative_identifier(self, data): """returns identifier for a new member relative to source""" - from cogent3.app.composable import _get_source if isinstance(data, DataStoreMember): data = data.name elif type(data) != str: - data = _get_source(data) + data = get_data_source(data) if data is None: raise ValueError( "objects for storage require either a " @@ -484,7 +503,7 @@ return relativeid - def write_incomplete(self, identifier, not_completed): + def write_incomplete(self, identifier, not_completed) -> DataStoreMember: """ Parameters @@ -505,9 +524,9 @@ record = make_record_for_json(identifier, not_completed, False) record = json.dumps(record) - self.write(identifier, record) + return self.write(identifier, record) - def write(self, identifier, data, *args, **kwargs): + def write(self, identifier, data, *args, **kwargs) -> DataStoreMember: """ Parameters ---------- @@ -596,6 +615,9 @@ @extend_docstring_from(WritableDataStoreBase.write) def write(self, identifier, data): + if not data: + return data + super().write(identifier, data) id_suffix = identifier.split(".")[-1] if id_suffix not in (self.suffix, "log"): @@ -742,41 +764,43 @@ @property def summary_incomplete(self): """returns a table summarising incomplete results""" + # detect last exception line + err_pat = re.compile(r"[A-Z][a-z]+[A-Z][a-z]+\:.+") types = defaultdict(list) indices = "type", "origin" for member in self.incomplete: record = member.read() record = deserialise_not_completed(record) key = tuple(getattr(record, k, None) for k in indices) - types[key].append([record.message, record.source]) + match = err_pat.findall(record.message) + types[key].append([match[-1] if match else record.message, record.source]) header = list(indices) + ["message", "num", "source"] rows = [] + maxtring = reprlib.aRepr.maxstring + reprlib.aRepr.maxstring = 45 + for record in types: messages, sources = list(zip(*types[record])) - messages = list(sorted(set(messages))) - if len(messages) > 3: - messages = messages[:3] + ["..."] - - if len(sources) > 3: - sources = sources[:3] + ("...",) - + messages = reprlib.repr( + ", ".join(m.splitlines()[-1] for m in set(messages)) + ) + sources = reprlib.repr(", ".join(s.splitlines()[-1] for s in sources)) row = list(record) + [ - ", ".join(messages), + messages, len(types[record]), - ", ".join(sources), + sources, ] rows.append(row) + reprlib.aRepr.maxstring = maxtring # restoring original val + return Table(header=header, data=rows, title="incomplete records") @property def members(self): if not self._members: - if self.suffix: - pattern = translate("*.%s" % self.suffix) - else: - pattern = translate("*") + pattern = translate(f"*.{self.suffix}") if self.suffix else translate("*") members = [] query = Query() query = (query.identifier.matches(pattern)) & (query.completed == True) @@ -955,6 +979,11 @@ def write(self, identifier, data): # writing into a tinydb has its own logic for conversion to json # so we don't validate data is a string for this case + from cogent3.app.composable import NotCompleted + + if isinstance(data, NotCompleted): + return self.write_incomplete(identifier, data) + super().write(identifier, "") id_suffix = identifier.split(".")[-1] if id_suffix not in (self.suffix, "log"): @@ -978,7 +1007,6 @@ def write_incomplete(self, identifier, not_completed): """stores an incomplete result object""" - from .composable import NotCompleted matches = self.filtered(identifier) if matches: diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/app/dist.py python-cogent-2022.5.25a1+dfsg/src/cogent3/app/dist.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/app/dist.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/app/dist.py 2022-05-24 23:42:33.000000000 +0000 @@ -17,10 +17,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/app/evo.py python-cogent-2022.5.25a1+dfsg/src/cogent3/app/evo.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/app/evo.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/app/evo.py 2022-05-24 23:42:33.000000000 +0000 @@ -1,9 +1,11 @@ -import warnings +from copy import deepcopy + +import cogent3.util.io from cogent3 import load_tree, make_tree from cogent3.core.tree import TreeNode from cogent3.evolve.models import get_model -from cogent3.util import misc, parallel +from cogent3.util import parallel from .composable import ( ALIGNED_TYPE, @@ -28,10 +30,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -51,11 +53,13 @@ tree=None, unique_trees=False, name=None, + optimise_motif_probs=False, sm_args=None, lf_args=None, time_het=None, param_rules=None, opt_args=None, + upper=50, split_codons=False, show_progress=False, verbose=False, @@ -69,14 +73,18 @@ if None, assumes a star phylogeny (only valid for 3 taxa). Can be a newick formatted tree, a path to a file containing one, or a Tree instance. - unique_trees: bool + unique_trees : bool whether to specify a unique tree per alignment. Only applies if number of sequences equals 3. - name + name : str name of the model + optimise_motif_probs : bool + whether the motif probabilities are free parameters. If False, + takes the average of frequencies from the alignment. Overrides + the setting of a sub model instance, or any value provided in + sm_args. sm_args : dict - arguments to be passed to the substitution model constructor, e.g. - dict(optimise_motif_probs=True) + arguments to be passed to the substitution model constructor lf_args : dict arguments to be passed to the likelihood function constructor time_het @@ -84,13 +92,16 @@ [dict(edges=['Human', 'Chimp'], is_independent=False, upper=10)]. Passed to the likelihood function .set_time_heterogeneity() method. - param_rules + param_rules : list other parameter rules, passed to the likelihood function set_param_rule() method opt_args : dict arguments for the numerical optimiser, e.g. dict(max_restarts=5, tolerance=1e-6, max_evaluations=1000, limit_action='ignore') + upper + Upper bound for all rate and length parameters. Overrides + values defined in ``time_het`` or ``param_rules``. split_codons : bool if True, incoming alignments are split into the 3 frames and each frame is fit separately @@ -103,7 +114,7 @@ ------- Calling an instance with an alignment returns a model_result instance with the optimised likelihood function. In the case of split_codons, - the result object has a separate entry for each. + the result object has a separate entry for each codon position. """ super(model, self).__init__( input_types=self._input_types, @@ -111,19 +122,28 @@ data_types=self._data_types, ) self._verbose = verbose + self._upper = upper self._formatted_params() assert not ( tree and unique_trees ), "cannot provide a tree when unique_trees is True" self._unique_trees = unique_trees - sm_args = sm_args or {} + sm_args = deepcopy(sm_args or {}) + if "optimise_motif_probs" in sm_args: + raise ValueError( + "'optimise_motif_probs' value in sm_args is IGNORED, use explicit argument instead", + ) + + sm_args["optimise_motif_probs"] = optimise_motif_probs if type(sm) == str: sm = get_model(sm, **sm_args) + else: + sm._optimise_motif_probs = optimise_motif_probs self._sm = sm if len(sm.get_motifs()[0]) > 1: split_codons = False - if misc.path_exists(tree): + if cogent3.util.io.path_exists(tree): tree = load_tree(filename=tree, underscore_unmunge=True) elif type(tree) == str: tree = make_tree(treestring=tree, underscore_unmunge=True) @@ -132,22 +152,24 @@ raise TypeError(f"invalid tree type {type(tree)}") self._tree = tree - self._lf_args = lf_args or {} + self._lf_args = deepcopy(lf_args or {}) if not name: name = sm.name or "unnamed model" self.name = name - self._opt_args = opt_args or dict(max_restarts=5, show_progress=show_progress) + self._opt_args = deepcopy( + opt_args or dict(max_restarts=5, show_progress=show_progress) + ) self._opt_args["show_progress"] = self._opt_args.get( "show_progress", show_progress ) - param_rules = param_rules or {} + param_rules = deepcopy(param_rules or []) if param_rules: for rule in param_rules: if rule.get("is_constant"): continue - rule["upper"] = rule.get("upper", 50) # default upper bound + rule["upper"] = rule.get("upper", upper) # default upper bound self._param_rules = param_rules - self._time_het = time_het + self._time_het = deepcopy(time_het) self._split_codons = split_codons self.func = self.fit @@ -170,16 +192,26 @@ if self._verbose: print(lf) if self._time_het == "max": - lf.set_time_heterogeneity(is_independent=True, upper=50) + lf.set_time_heterogeneity(is_independent=True, upper=self._upper) else: - lf.set_time_heterogeneity(edge_sets=self._time_het) + lf.set_time_heterogeneity(edge_sets=self._time_het, upper=self._upper) else: rules = lf.get_param_rules() for rule in rules: - if rule["par_name"] not in ("mprobs", "psubs"): - rule["upper"] = rule.get("upper", 50) + if ( + rule["par_name"] + in ( + "mprobs", + "psubs", + "bprobs", + "dpsubs", + ) + or rule.get("is_constant") + ): + continue + rule["upper"] = min(rule.get("upper") or self._upper + 1, self._upper) - lf.apply_param_rules([rule]) + lf.apply_param_rules(rules) if initialise: lf = initialise(lf, identifier) @@ -299,7 +331,7 @@ A callback function for initialising the alternate model likelihood function prior to optimisation. It must take 2 input arguments and return the modified alternate likelihood function. - Default is to use MLEs from the null model. + Default is to use MLEs from the null model. Overrides sequential. Notes ----- @@ -313,7 +345,6 @@ ) self._formatted_params() if sequential and init_alt: - warnings.warn("init_alt is specified, ignoring sequential") sequential = False self.null = null @@ -357,7 +388,7 @@ return null try: - alts = [alt for alt in self._initialised_alt(null, aln)] + alts = list(self._initialised_alt(null, aln)) except ValueError: msg = f"Hypothesis alt had bounds error {aln.info.source}" return NotCompleted("ERROR", self, msg, source=aln) @@ -368,7 +399,7 @@ return alt results = {alt.name: alt for alt in alts} - results.update({null.name: null}) + results[null.name] = null result = self._make_result(aln) result.update(results) @@ -417,6 +448,8 @@ result = bootstrap_result(aln.info.source) try: obs = self._hyp(aln) + if not obs: + return obs except ValueError as err: result = NotCompleted("ERROR", str(self._hyp), err.args[0]) return result @@ -424,7 +457,7 @@ self._null = obs.null self._inpath = aln.info.source - map_fun = map if not self._parallel else parallel.imap + map_fun = parallel.imap if self._parallel else map sym_results = [r for r in map_fun(self._fit_sim, range(self._num_reps)) if r] for sym_result in sym_results: if not sym_result: @@ -534,13 +567,12 @@ newick formatted tree, a path to a file containing one, or a Tree instance. sm_args - arguments to be passed to the substitution model constructor, e.g. - dict(optimise_motif_probs=True) + arguments to be passed to the substitution model constructor gc genetic code, either name or number (see cogent3.available_codes) optimise_motif_probs : bool If True, motif probabilities are free parameters. If False (default) - they are estimated frokm the alignment. + they are estimated from the alignment. lf_args arguments to be passed to the likelihood function constructor opt_args @@ -561,7 +593,7 @@ if not is_codon_model(sm): raise ValueError(f"{sm} is not a codon model") - if misc.path_exists(tree): + if cogent3.util.io.path_exists(tree): tree = load_tree(filename=tree, underscore_unmunge=True) elif type(tree) == str: tree = make_tree(treestring=tree, underscore_unmunge=True) @@ -572,7 +604,6 @@ # instantiate model, ensuring genetic code setting passed on sm_args = sm_args or {} sm_args["gc"] = sm_args.get("gc", gc) - sm_args["optimise_motif_probs"] = optimise_motif_probs if type(sm) == str: sm = get_model(sm, **sm_args) @@ -583,11 +614,12 @@ sm, tree, name=f"{model_name}-null", - sm_args=sm_args, + optimise_motif_probs=optimise_motif_probs, + sm_args=deepcopy(sm_args), opt_args=opt_args, show_progress=show_progress, param_rules=[dict(par_name="omega", is_constant=True, value=1.0)], - lf_args=lf_args, + lf_args=deepcopy(lf_args), verbose=verbose, ) @@ -596,10 +628,11 @@ sm, tree, name=f"{model_name}-alt", - sm_args=sm_args, + optimise_motif_probs=optimise_motif_probs, + sm_args=deepcopy(sm_args), opt_args=opt_args, show_progress=show_progress, - lf_args=lf_args, + lf_args=deepcopy(lf_args), verbose=verbose, ) hyp = hypothesis(null, alt) @@ -648,13 +681,12 @@ newick formatted tree, a path to a file containing one, or a Tree instance. sm_args - arguments to be passed to the substitution model constructor, e.g. - dict(optimise_motif_probs=True) + arguments to be passed to the substitution model constructor gc genetic code, either name or number (see cogent3.available_codes) optimise_motif_probs : bool If True, motif probabilities are free parameters. If False (default) - they are estimated frokm the alignment. + they are estimated from the alignment. tip1 : str name of tip 1 tip2 : str @@ -697,7 +729,7 @@ if not any([tip1, tip2]): raise ValueError("must provide at least a single tip name") - if misc.path_exists(tree): + if cogent3.util.io.path_exists(tree): tree = load_tree(filename=tree, underscore_unmunge=True) elif type(tree) == str: tree = make_tree(treestring=tree, underscore_unmunge=True) @@ -721,7 +753,6 @@ # instantiate model, ensuring genetic code setting passed on sm_args = sm_args or {} sm_args["gc"] = sm_args.get("gc", gc) - sm_args["optimise_motif_probs"] = optimise_motif_probs if type(sm) == str: sm = get_model(sm, **sm_args) @@ -733,13 +764,14 @@ dict(par_name="omega", bins="1", is_constant=True, value=1.0), ] lf_args = lf_args or {} - null_lf_args = lf_args.copy() + null_lf_args = deepcopy(lf_args) null_lf_args.update(dict(bins=("0", "1"))) self.null = model( sm, tree, name=f"{model_name}-null", - sm_args=sm_args, + optimise_motif_probs=optimise_motif_probs, + sm_args=deepcopy(sm_args), param_rules=null_param_rules, lf_args=null_lf_args, opt_args=opt_args, @@ -748,13 +780,14 @@ ) # defining the alternate model, param rules to be completed each call - alt_lf_args = lf_args.copy() + alt_lf_args = lf_args alt_lf_args.update(dict(bins=("0", "1", "2a", "2b"))) self.alt_args = dict( sm=sm, tree=tree, name=f"{model_name}-alt", - sm_args=sm_args, + optimise_motif_probs=optimise_motif_probs, + sm_args=deepcopy(sm_args), edges=edges, lf_args=alt_lf_args, opt_args=opt_args, @@ -782,7 +815,7 @@ r["bins"] = [bin_id, "2a"] if bin_id == "0" else [bin_id, "2b"] # set the starting values for 2a/b - alt_args = self.alt_args.copy() + alt_args = deepcopy(self.alt_args) edges = alt_args.pop("edges") upper_omega = alt_args.pop("upper_omega") rules.append( @@ -848,8 +881,7 @@ newick formatted tree, a path to a file containing one, or a Tree instance. sm_args - arguments to be passed to the substitution model constructor, e.g. - dict(optimise_motif_probs=True) + arguments to be passed to the substitution model constructor gc genetic code, either name or number (see cogent3.available_codes) optimise_motif_probs : bool @@ -877,7 +909,7 @@ if not is_codon_model(sm): raise ValueError(f"{sm} is not a codon model") - if misc.path_exists(tree): + if cogent3.util.io.path_exists(tree): tree = load_tree(filename=tree, underscore_unmunge=True) elif type(tree) == str: tree = make_tree(treestring=tree, underscore_unmunge=True) @@ -888,7 +920,6 @@ # instantiate model, ensuring genetic code setting passed on sm_args = sm_args or {} sm_args["gc"] = sm_args.get("gc", gc) - sm_args["optimise_motif_probs"] = optimise_motif_probs if type(sm) == str: sm = get_model(sm, **sm_args) @@ -900,13 +931,14 @@ dict(par_name="omega", bins="neutral", is_constant=True, value=1.0), ] lf_args = lf_args or {} - null_lf_args = lf_args.copy() + null_lf_args = deepcopy(lf_args) null_lf_args.update(dict(bins=("-ve", "neutral"))) self.null = model( sm, tree, name=f"{model_name}-null", - sm_args=sm_args, + optimise_motif_probs=optimise_motif_probs, + sm_args=deepcopy(sm_args), param_rules=null_param_rules, lf_args=null_lf_args, opt_args=opt_args, @@ -915,13 +947,14 @@ ) # defining the alternate model, param rules to be completed each call - alt_lf_args = lf_args.copy() + alt_lf_args = deepcopy(lf_args) alt_lf_args.update(dict(bins=("-ve", "neutral", "+ve"))) self.alt_args = dict( sm=sm, tree=tree, name=f"{model_name}-alt", - sm_args=sm_args, + optimise_motif_probs=optimise_motif_probs, + sm_args=deepcopy(sm_args), lf_args=alt_lf_args, opt_args=opt_args, show_progress=show_progress, @@ -943,7 +976,7 @@ break # set the starting value for +ve bin - alt_args = self.alt_args.copy() + alt_args = deepcopy(self.alt_args) upper_omega = alt_args.pop("upper_omega") rules.append( dict( @@ -1015,8 +1048,7 @@ newick formatted tree, a path to a file containing one, or a Tree instance. sm_args - arguments to be passed to the substitution model constructor, e.g. - dict(optimise_motif_probs=True) + arguments to be passed to the substitution model constructor gc genetic code, either name or number (see cogent3.available_codes) optimise_motif_probs : bool @@ -1063,7 +1095,7 @@ if not any([tip1, tip2]): raise ValueError("must provide at least a single tip name") - if misc.path_exists(tree): + if cogent3.util.io.path_exists(tree): tree = load_tree(filename=tree, underscore_unmunge=True) elif type(tree) == str: tree = make_tree(treestring=tree, underscore_unmunge=True) @@ -1087,19 +1119,19 @@ # instantiate model, ensuring genetic code setting passed on sm_args = sm_args or {} sm_args["gc"] = sm_args.get("gc", gc) - sm_args["optimise_motif_probs"] = optimise_motif_probs if type(sm) == str: sm = get_model(sm, **sm_args) model_name = sm.name # defining the null model lf_args = lf_args or {} - null_lf_args = lf_args.copy() + null_lf_args = deepcopy(lf_args) null = model( sm, tree, name=f"{model_name}-null", - sm_args=sm_args, + optimise_motif_probs=optimise_motif_probs, + sm_args=deepcopy(sm_args), lf_args=null_lf_args, opt_args=opt_args, show_progress=show_progress, @@ -1119,11 +1151,12 @@ sm, tree, name=f"{model_name}-alt", - sm_args=sm_args, + optimise_motif_probs=optimise_motif_probs, + sm_args=deepcopy(sm_args), opt_args=opt_args, show_progress=show_progress, param_rules=param_rules, - lf_args=lf_args, + lf_args=deepcopy(lf_args), verbose=verbose, ) hyp = hypothesis(null, alt) diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/app/__init__.py python-cogent-2022.5.25a1+dfsg/src/cogent3/app/__init__.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/app/__init__.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/app/__init__.py 2022-05-24 23:42:33.000000000 +0000 @@ -3,10 +3,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/app/io.py python-cogent-2022.5.25a1+dfsg/src/cogent3/app/io.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/app/io.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/app/io.py 2022-05-24 23:42:33.000000000 +0000 @@ -3,6 +3,8 @@ import pathlib import zipfile +from typing import Union + import numpy from cogent3.core.alignment import ArrayAlignment, SequenceCollection @@ -32,7 +34,6 @@ ComposableTabular, NotCompleted, _checkpointable, - _get_source, ) from .data_store import ( IGNORE, @@ -44,21 +45,76 @@ ReadOnlyZippedDataStore, SingleReadDataStore, WritableTinyDbDataStore, + get_data_source, load_record_from_json, make_record_for_json, ) __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" +_datastore_reader_map = {} + + +class register_datastore_reader: + """ + registration decorator for read only data store classes + + The registration key must be a string that of the file format suffix + (more than one suffix can be registered at a time). + + Parameters + ---------- + args: str or sequence of str + must be unique, a preceding '.' will be added if not already present + """ + + def __init__(self, *args): + args = list(args) + for i, suffix in enumerate(args): + if suffix is None: + assert ( + suffix not in _datastore_reader_map + ), f"{suffix!r} already in {list(_datastore_reader_map)}" + continue + + if not isinstance(suffix, str): + raise TypeError(f"{suffix!r} is not a string") + + if suffix.strip() == suffix and not suffix: + raise ValueError("cannot have white-space suffix") + + suffix = suffix.strip() + if suffix: + suffix = suffix if suffix[0] == "." else f".{suffix}" + + assert ( + suffix not in _datastore_reader_map + ), f"{suffix!r} already in {list(_datastore_reader_map)}" + args[i] = suffix + + self._type_str = tuple(args) + + def __call__(self, func): + for type_str in self._type_str: + _datastore_reader_map[type_str] = func + return func + + +# register the main readers +register_datastore_reader("zip")(ReadOnlyZippedDataStore) +register_datastore_reader("tinydb")(ReadOnlyTinyDbDataStore) +register_datastore_reader(None)(ReadOnlyDirectoryDataStore) + + def findall(base_path, suffix="fa", limit=None, verbose=False): """returns glob match to suffix, path is relative to base_path @@ -75,17 +131,19 @@ raise ValueError(f"'{base_path}' does not exist") zipped = zipfile.is_zipfile(base_path) - klass = ReadOnlyZippedDataStore if zipped else ReadOnlyDirectoryDataStore + klass = _datastore_reader_map.get(".zip" if zipped else None) data_store = klass(base_path, suffix=suffix, limit=limit, verbose=verbose) return data_store.members -def get_data_store(base_path, suffix=None, limit=None, verbose=False): +def get_data_store( + base_path: Union[str, pathlib.Path], suffix=None, limit=None, verbose=False +): """returns DataStore containing glob matches to suffix in base_path Parameters ---------- - base_path : str + base_path : str or Path path to directory or zipped archive suffix : str suffix of filenames @@ -97,7 +155,7 @@ """ base_path = pathlib.Path(base_path) base_path = base_path.expanduser().absolute() - if base_path.suffix == ".tinydb": + if base_path.suffix in (".tinydb", ".sqlitedb"): suffix = "json" if suffix is None: @@ -105,18 +163,18 @@ if not base_path.exists(): raise ValueError(f"'{base_path}' does not exist") - if not type(suffix) == str: + + if type(suffix) != str: raise ValueError(f"{suffix} is not a string") - zipped = zipfile.is_zipfile(base_path) - if base_path.suffix == ".tinydb": - klass = ReadOnlyTinyDbDataStore - elif zipped: - klass = ReadOnlyZippedDataStore + if zipfile.is_zipfile(base_path): + ds_suffix = ".zip" + elif base_path.suffix: + ds_suffix = base_path.suffix else: - klass = ReadOnlyDirectoryDataStore - data_store = klass(base_path, suffix=suffix, limit=limit, verbose=verbose) - return data_store + ds_suffix = None + klass = _datastore_reader_map[ds_suffix] + return klass(base_path, suffix=suffix, limit=limit) class _seq_loader: @@ -370,11 +428,21 @@ self._format = format def write(self, data, identifier=None): + from cogent3.app.composable import NotCompleted + + if isinstance(data, NotCompleted): + return self.data_store.write_incomplete(identifier, data) + + if not data: + msg = f"{self.__class__.__name__!r} does not support writing {data!r}" + raise NotImplementedError(msg) + if identifier is None: identifier = self._make_output_identifier(data) + output = data.to_string(format=self._format) - self.data_store.write(identifier, output) - return identifier + stored = self.data_store.write(identifier, output) + return stored class write_seqs(_checkpointable): @@ -432,10 +500,28 @@ self._load_checkpoint = loader def write(self, data, identifier=None): + from cogent3.app.composable import NotCompleted + + if isinstance(data, NotCompleted): + return self.data_store.write_incomplete(identifier, data) + + if not data: + msg = f"{self.__class__.__name__!r} does not support writing {data!r}" + raise NotImplementedError(msg) + if identifier is None: identifier = self._make_output_identifier(data) - data.info.stored = self.data_store.write(identifier, data.to_fasta()) - return identifier + + stored = self.data_store.write(identifier, self._formatter(data.to_dict())) + if hasattr(data, "info"): + data.info["stored"] = stored + else: + try: + data.stored = stored + except AttributeError: + pass + + return stored class load_json(Composable): @@ -499,12 +585,22 @@ self._load_checkpoint = self def write(self, data, identifier=None): + from cogent3.app.composable import NotCompleted + + if isinstance(data, NotCompleted): + return self.data_store.write_incomplete(identifier, data) + + if not data: + msg = f"{self.__class__.__name__!r} does not support writing {data!r}" + raise NotImplementedError(msg) + if identifier is None: identifier = self._make_output_identifier(data) + out = make_record_for_json(os.path.basename(identifier), data, True) out = json.dumps(out) stored = self.data_store.write(identifier, out) - # todo is anything actually using this stored attriubte? if not, delete this + # todo is anything actually using this stored attribute? if not, delete this # code and all other cases if hasattr(data, "info"): data.info["stored"] = stored @@ -513,7 +609,7 @@ data.stored = stored except AttributeError: pass - return identifier + return stored class load_db(Composable): @@ -594,21 +690,27 @@ ------- identifier """ - data_source = _get_source(data) - if (data_source and identifier is not None) and str(data_source) != str( - identifier - ): - raise ValueError(f"identifier {identifier} != data source {data_source}") + from cogent3.app.composable import NotCompleted - if identifier is None: - identifier = self._make_output_identifier(data) + identifier = identifier or get_data_source(data) + identifier = self._make_output_identifier(identifier) + + if isinstance(data, NotCompleted): + return self.data_store.write_incomplete(identifier, data) + + if not data: + msg = f"{self.__class__.__name__!r} does not support writing {data!r}" + raise NotImplementedError(msg) + + identifier = identifier or get_data_source(data) + identifier = self._make_output_identifier(identifier) # todo revisit this when we establish immutability behaviour of database try: out = data.to_json() except AttributeError: out = json.dumps(data) stored = self.data_store.write(identifier, out) - # todo is anything actually using this stored attriubte? if not, delete this + # todo is anything actually using this stored attribute? if not, delete this # code and all other cases if hasattr(data, "info"): data.info["stored"] = stored @@ -617,4 +719,4 @@ data.stored = stored except AttributeError: pass - return identifier + return stored diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/app/result.py python-cogent-2022.5.25a1+dfsg/src/cogent3/app/result.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/app/result.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/app/result.py 2022-05-24 23:42:33.000000000 +0000 @@ -3,19 +3,21 @@ from collections import OrderedDict from collections.abc import MutableMapping from functools import total_ordering +from pathlib import Path import numpy +from cogent3.app.data_store import get_data_source from cogent3.maths.stats import chisqprob from cogent3.util.misc import extend_docstring_from, get_object_provenance from cogent3.util.table import Table __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -28,6 +30,10 @@ _item_types = () def __init__(self, source): + source = get_data_source(source) + if not isinstance(source, (str, Path)): + raise ValueError(f"Cannot infer source from type {type(source)}") + self._store = {} self._construction_kwargs = dict(source=source) self.source = source @@ -44,7 +50,11 @@ break else: if self._item_types: - msg = f"{type_name} not in supported types {self._item_types}" + class_name = self.__class__.__name__ + msg = ( + f"{type_name!r} not a supported value type for {class_name!r}, " + f"supported value types are {self._item_types}" + ) raise TypeError(msg) if not hasattr(val, "to_json"): @@ -138,12 +148,13 @@ if type(stat) == str: stat = eval(stat) - self._construction_kwargs = dict( - name=name, - stat=stat.__name__, - source=source, - elapsed_time=elapsed_time, - num_evaluations=num_evaluations, + self._construction_kwargs.update( + dict( + name=name, + stat=stat.__name__, + elapsed_time=elapsed_time, + num_evaluations=num_evaluations, + ) ) self._store = {} self._name = name @@ -522,7 +533,7 @@ key for the null hypothesis """ super(hypothesis_result, self).__init__(name=name, source=source) - self._construction_kwargs = dict(name_of_null=name_of_null, source=source) + self._construction_kwargs.update(dict(name_of_null=name_of_null)) self._name_of_null = name_of_null @@ -616,7 +627,6 @@ def __init__(self, source=None): super(bootstrap_result, self).__init__(source) - self._construction_kwargs = dict(source=source) @property def observed(self): diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/app/sample.py python-cogent-2022.5.25a1+dfsg/src/cogent3/app/sample.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/app/sample.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/app/sample.py 2022-05-24 23:42:33.000000000 +0000 @@ -19,10 +19,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/app/translate.py python-cogent-2022.5.25a1+dfsg/src/cogent3/app/translate.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/app/translate.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/app/translate.py 2022-05-24 23:42:33.000000000 +0000 @@ -13,10 +13,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -66,21 +66,20 @@ min_stops, frame = stops_in_frame[0] # if min_stops > 1, cannot be translated if min_stops > 1: - raise ValueError("%s cannot be robustly translated" % seq.name) + raise ValueError(f"{seq.name!r} cannot be robustly translated") elif min_stops == 0 and require_stop: # find seq with 1 stop min_stops = 20 # nonsense value - for idx, (n, fr) in enumerate(stops_in_frame): + for n, fr in stops_in_frame: if n == 1: min_stops, frame = n, fr break - if 0 <= min_stops <= 1: - if min_stops == 1 and not translations[frame].endswith("*"): - raise ValueError("%s cannot be robustly translated" % seq.name) - else: - raise ValueError("%s cannot be robustly translated" % seq.name) + if not 0 <= min_stops <= 1: + raise ValueError(f"{seq.name!r} cannot be robustly translated") + if min_stops == 1 and not translations[frame].endswith("*"): + raise ValueError(f"{seq.name!r} cannot be robustly translated") frame += 1 if allow_rc and frame > 3: frame = 3 - frame diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/app/tree.py python-cogent-2022.5.25a1+dfsg/src/cogent3/app/tree.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/app/tree.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/app/tree.py 2022-05-24 23:42:33.000000000 +0000 @@ -10,10 +10,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/cluster/__init__.py python-cogent-2022.5.25a1+dfsg/src/cogent3/cluster/__init__.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/cluster/__init__.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/cluster/__init__.py 2022-05-24 23:42:33.000000000 +0000 @@ -5,10 +5,10 @@ __all__ = ["UPGMA"] __author__ = "" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Catherine Lozuopone", "Rob Knight", "Peter Maxwell", "Justin Kuczynski"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Catherine Lozupone" __email__ = "lozupone@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/cluster/UPGMA.py python-cogent-2022.5.25a1+dfsg/src/cogent3/cluster/UPGMA.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/cluster/UPGMA.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/cluster/UPGMA.py 2022-05-24 23:42:33.000000000 +0000 @@ -19,10 +19,10 @@ __author__ = "Catherine Lozupone" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Catherine Lozuopone", "Rob Knight", "Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Catherine Lozupone" __email__ = "lozupone@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/core/alignment.py python-cogent-2022.5.25a1+dfsg/src/cogent3/core/alignment.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/core/alignment.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/core/alignment.py 2022-05-24 23:42:33.000000000 +0000 @@ -66,11 +66,10 @@ from cogent3.parse.gff import gff_parser from cogent3.util import progress_display as UI from cogent3.util.dict_array import DictArrayTemplate +from cogent3.util.io import atomic_write, get_format_suffixes from cogent3.util.misc import ( - atomic_write, bytes_to_string, extend_docstring_from, - get_format_suffixes, get_object_provenance, get_setting_from_environ, ) @@ -78,7 +77,7 @@ __author__ = "Peter Maxwell and Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Peter Maxwell", "Rob Knight", @@ -91,7 +90,7 @@ "Jan Kosinski", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -232,7 +231,7 @@ First argument is ignored; expect this to be set as a class attribute. """ - return ["%s_%s" % (base_name, i) for i in range(start_at, start_at + num_seqs)] + return [f"{base_name}_{i}" for i in range(start_at, start_at + num_seqs)] def coerce_to_string(s): @@ -1112,7 +1111,7 @@ concatenated = [] for name in self.names: if name not in other.names: - raise ValueError("Right alignment doesn't have a '%s'" % name) + raise ValueError(f"Right alignment missing {name!r}") new_seq = self.named_seqs[name] + other.named_seqs[name] concatenated.append(new_seq) @@ -1166,10 +1165,7 @@ for seq in combined: assert ( seq.__class__ == self_seq_class - ), "Seq classes different: Expected %s, Got %s" % ( - seq.__class__, - self_seq_class, - ) + ), f"Seq classes different: Expected {seq.__class__}, Got {self_seq_class}" combined_aln = self.__class__(data=combined, info=self.info) @@ -1180,9 +1176,7 @@ after_name and after_name not in self.names ): name = before_name or after_name - raise ValueError( - "The alignment doesn't have a sequence named '{0}'".format(name) - ) + raise ValueError(f"Alignment missing sequence {name!r}") if before_name is not None: # someone might have seqname of int(0) index = self.names.index(before_name) @@ -1282,7 +1276,7 @@ translated.append((seqname, pep)) return self.__class__(translated, info=self.info, **kwargs) except AttributeError as msg: - raise AttributeError("%s -- %s" % (msg, "Did you set a DNA moltype?")) + raise AttributeError(f"{msg} -- {'Did you set a DNA moltype?'}") def get_seq(self, seqname): """Return a sequence object for the specified seqname.""" @@ -2081,7 +2075,7 @@ return self.__class__(new_map, new_seq) def __repr__(self): - return "%s of %s" % (repr(self.map), repr(self.data)) + return f"{self.map!r} of {self.data!r}" def with_termini_unknown(self): return self.__class__(self.map.with_termini_unknown(), self.data) @@ -2620,10 +2614,9 @@ result = darr.wrap(result) if drawable: drawable = drawable.lower() - if self.info.source: - trace_name = os.path.basename(self.info.source) - else: - trace_name = None + trace_name = ( + os.path.basename(self.info.source) if self.info.source else None + ) draw = Drawable("Gaps Per Sequence", showlegend=False) draw.layout |= dict(yaxis=dict(title="Gap counts")) if drawable == "bar": @@ -2643,7 +2636,7 @@ introduced gaps exceeding quantile Uses count_gaps_per_seq(unique=True) to obtain the counts of gaps - uniquely introduced by a sequence. The cutoff is the the quantile of + uniquely introduced by a sequence. The cutoff is the quantile of this distribution. Parameters @@ -2653,7 +2646,7 @@ cutoff are excluded. The default quantile is (num_seqs - 1) / num_seqs """ gap_counts = self.count_gaps_per_seq(unique=True) - quantile = quantile if quantile else (self.num_seqs - 1) / self.num_seqs + quantile = quantile or (self.num_seqs - 1) / self.num_seqs cutoff = numpy.quantile(gap_counts.array, quantile) names = [name for name, count in gap_counts.items() if count <= cutoff] return self.take_seqs(names) @@ -2716,7 +2709,7 @@ return self.gapped_by_map(sample, info=self.info) def sliding_windows(self, window, step, start=None, end=None): - """Generator yielding new Alignments of given length and interval. + """Generator yielding new alignments of given length and interval. Parameters ---------- @@ -2729,7 +2722,6 @@ first window start position end last window start position - """ start = [start, 0][start is None] end = [end, len(self) - window + 1][end is None] @@ -2852,8 +2844,8 @@ refname = names[0] refseq = output[refname] seqlen = len(refseq) - start_gap = re.search("^[%s]+" % gaps, "".join(refseq)) - end_gap = re.search("[%s]+$" % gaps, "".join(refseq)) + start_gap = re.search(f"^[{gaps}]+", "".join(refseq)) + end_gap = re.search(f"[{gaps}]+$", "".join(refseq)) start = 0 if start_gap is None else start_gap.end() end = len(refseq) if end_gap is None else end_gap.start() seq_style = [] @@ -2862,7 +2854,7 @@ for i in range(seqlen): char = refseq[i] if i < start or i >= end: - style = "terminal_ambig_%s" % selected.moltype.label + style = f"terminal_ambig_{selected.moltype.label}" else: style = styles[char] @@ -2896,7 +2888,7 @@ for n, s in zip(names, seqblock): s = "".join(s) row = "".join([label_ % n, seq_ % s]) - table.append("%s" % row) + table.append(f"{row}") table.append("") if ( limit @@ -2931,7 +2923,7 @@ "", '
', "\n".join(table), - "

%s

" % summary, + f"

{summary}

", "
", ] return "\n".join(text) @@ -2952,7 +2944,7 @@ display_names = dict([(n, name_template.format(n)) for n in names]) def make_line(label, seq): - return "%s %s" % (label, seq) + return f"{label} {seq}" if wrap is None: result = [make_line(display_names[n], "".join(output[n])) for n in names] @@ -3877,15 +3869,10 @@ elts = list(str(self.named_seqs[name])[: limit + 1]) if len(elts) > limit: elts.append("...") - seqs.append("%s[%s]" % (name, delimiter.join(elts))) + seqs.append(f"{name}[{delimiter.join(elts)}]") seqs = ", ".join(seqs) - return "%s x %s %s alignment: %s" % ( - len(self.names), - self.seq_len, - self._type, - seqs, - ) + return f"{len(self.names)} x {self.seq_len} {self._type} alignment: {seqs}" def iupac_consensus(self, alphabet=None): """Returns string containing IUPAC consensus sequence of the alignment.""" @@ -4049,7 +4036,7 @@ nondegen_index = i + 1 if nondegen_index % 3 != 0 and not allow_partial: raise ValueError( - "'%s' length not divisible by 3" % self.names[seq_num] + f"{self.names[seq_num]!r} length not divisible by 3" ) break @@ -4092,9 +4079,7 @@ """ if name not in self.names: - raise ValueError( - "The alignment doesn't have a sequence named '{0}'".format(name) - ) + raise ValueError(f"Alignment missing sequence named {name!r}") gapindex = self.alphabet.index(self.alphabet.gap) seqindex = self.names.index(name) @@ -4202,9 +4187,7 @@ new[:] = seq_gapindex # reset each time through if scale != 1: if len(orig) % scale != 0: - raise ValueError( - "%s length not divisible by %s" % (name, len(orig)) - ) + raise ValueError(f"{name!r} length not divisible by {len(orig)}") orig.resize((len(orig) // scale, scale)) new.resize(new_dim) @@ -4214,7 +4197,7 @@ new[nongap] = orig except ValueError: if nongap.sum() != orig.shape[0]: - raise ValueError("%s has incorrect length" % name) + raise ValueError(f"{name!r} has incorrect length") raise new.resize((len(self) * scale,)) @@ -4379,7 +4362,7 @@ if lengths and (max(lengths) != min(lengths)): raise DataError( "Not all sequences are the same length:\n" - + "max is %s, min is %s" % (max(lengths), min(lengths)) + + f"max is {max(lengths)}, min is {min(lengths)}" ) aligned_seqs = [] for s, n in zip(seqs, names): @@ -4416,15 +4399,10 @@ elts = list(str(self.named_seqs[name])[: limit + 1]) if len(elts) > limit: elts.append("...") - seqs.append("%s[%s]" % (name, delimiter.join(elts))) + seqs.append(f"{name}[{delimiter.join(elts)}]") seqs = ", ".join(seqs) - return "%s x %s %s alignment: %s" % ( - len(self.names), - self.seq_len, - self._type, - seqs, - ) + return f"{len(self.names)} x {self.seq_len} {self._type} alignment: {seqs}" def _mapped(self, slicemap): align = [] @@ -4509,7 +4487,7 @@ length = self.seq_len if length % motif_length != 0 and not drop_remainder: raise ValueError( - "aligned length not divisible by " "motif_length=%d" % motif_length + f"aligned length not divisible by motif_length={motif_length}" ) gv = [] kept = False @@ -4586,7 +4564,7 @@ for name in self.names: seq = self.get_gapped_seq(name) if name not in template.names: - raise ValueError("Template alignment doesn't have a '%s'" % name) + raise ValueError(f"Template alignment doesn't have {name!r}") gsq = template.get_gapped_seq(name) assert len(gsq) == len(seq) combo = [] @@ -4611,9 +4589,7 @@ """ if name not in self.names: - raise ValueError( - "The alignment doesn't have a sequence named '{0}'".format(name) - ) + raise ValueError(f"Alignment missing sequence named {name!r}") gap = self.alphabet.gap non_gap_cols = [ @@ -4677,11 +4653,10 @@ ref_seq_name = ref_aln.names[0] if ref_seq_name not in self.names: + all_names = "\n".join(self.names) raise ValueError( - "The name of reference sequence ({0})" - "not found in the alignment \n(names in the alignment:\n{1}\n)".format( - ref_seq_name, "\n".join(self.names) - ) + f"Reference sequence ({ref_seq_name!r}) " + f"not found in the alignment \n(names in the alignment:\n{all_names}\n)" ) if str(ref_aln.get_gapped_seq(ref_seq_name)) != str(self.get_seq(ref_seq_name)): @@ -4694,8 +4669,7 @@ for seq_name in ref_aln.names[1:]: if seq_name in self.names: raise ValueError( - "The name of a sequence being added ({0})" - "is already present".format(seq_name) + f"The name of a sequence being added ({seq_name!r})is already present" ) seq = ref_aln.get_gapped_seq(seq_name) @@ -4748,7 +4722,7 @@ seq = seq.data if len(seq) != len(aligned.data) * scale: - raise ValueError("%s has incorrect length" % label) + raise ValueError(f"{label!r} has incorrect length") new_seqs.append((label, Aligned(aligned.map * scale, seq))) diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/core/alphabet.py python-cogent-2022.5.25a1+dfsg/src/cogent3/core/alphabet.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/core/alphabet.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/core/alphabet.py 2022-05-24 23:42:33.000000000 +0000 @@ -11,9 +11,6 @@ has been imported, the alphabets will not know their MolType, which will cause problems. It is often useful to create alphabets and/or Enumerations on the fly, however. - -MolType provides services for resolving ambiguities, or providing the -correct ambiguity for recoding -- will move to its own module. """ import json @@ -45,10 +42,10 @@ Int = numpy.core.numerictypes.sctype2char(int) __author__ = "Peter Maxwell, Gavin Huttley and Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Rob Knight", "Andrew Butterfield"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -730,14 +727,13 @@ if hasattr(motif_probs, "keys"): sample = list(motif_probs.keys())[0] if sample not in self: - raise ValueError("Can't find motif %s in alphabet" % sample) + raise ValueError(f"Can't find motif {sample} in alphabet") motif_probs = numpy.array([motif_probs[motif] for motif in self]) else: if len(motif_probs) != len(self): if len(motif_probs) != len(self): raise ValueError( - "Can't match %s probs to %s alphabet" - % (len(motif_probs), len(self)) + f"Can't match {len(motif_probs)} probs to {len(self)} alphabet" ) motif_probs = numpy.asarray(motif_probs) assert abs(sum(motif_probs) - 1.0) < 0.0001, motif_probs diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/core/annotation.py python-cogent-2022.5.25a1+dfsg/src/cogent3/core/annotation.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/core/annotation.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/core/annotation.py 2022-05-24 23:42:33.000000000 +0000 @@ -12,10 +12,10 @@ __author__ = "Peter Maxwell and Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -76,7 +76,7 @@ base = base.parent if base is not self: raise ValueError( - "Can't map %s onto %s via %s" % (index, repr(self), containers) + f"Can't map {index} onto {repr(self)} via {containers}" ) for base in containers: feature = feature.remapped_to(base, base.map) @@ -263,7 +263,7 @@ class _Serialisable: def to_rich_dict(self): """returns {'name': name, 'seq': sequence, 'moltype': moltype.label}""" - data = self._serialisable.copy() + data = copy.deepcopy(self._serialisable) # the first constructor argument will be the instance recreating # so we pop out the two possible keys data.pop("parent", None) @@ -340,7 +340,7 @@ self.parent.detach_annotations([self]) def _mapped(self, slicemap): - name = "%s of %s" % (repr(slicemap), self.name) + name = f"{repr(slicemap)} of {self.name}" return self.__class__(self, slicemap, type="slice", name=name) def get_slice(self, complete=True): @@ -378,8 +378,8 @@ def __repr__(self): name = getattr(self, "name", "") if name: - name = ' "%s"' % name - return "%s%s at %s" % (self.type, name, self.map) + name = f' "{name}"' + return f"{self.type}{name} at {self.map}" def _projected_to_base(self, base): if self.parent == base: diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/core/genetic_code.py python-cogent-2022.5.25a1+dfsg/src/cogent3/core/genetic_code.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/core/genetic_code.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/core/genetic_code.py 2022-05-24 23:42:33.000000000 +0000 @@ -13,10 +13,10 @@ __author__ = "Greg Caporaso and Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Greg Caporaso", "Rob Knight", "Peter Maxwell", "Thomas La"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Greg Caporaso" __email__ = "caporaso@colorado.edu" __status__ = "Production" @@ -253,7 +253,7 @@ key = key.replace("U", "T") return self.codons.get(key, "X") else: - raise InvalidCodonError("Codon or aa %s has wrong length" % item) + raise InvalidCodonError(f"Codon or aa {item} has wrong length") def translate(self, dna, start=0): """Translates DNA to protein with current GeneticCode. @@ -279,7 +279,7 @@ def get_stop_indices(self, dna, start=0): """returns indexes for stop codons in the specified frame""" stops = self["*"] - stop_pattern = "(%s)" % "|".join(stops) + stop_pattern = f"({'|'.join(stops)})" stop_pattern = re.compile(stop_pattern) seq = str(dna) found = [hit.start() for hit in stop_pattern.finditer(seq)] @@ -485,7 +485,7 @@ code = gc if code is None: - raise ValueError('No genetic code matching "%s"' % code_id) + raise ValueError(f'No genetic code matching "{code_id}"') return code diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/core/info.py python-cogent-2022.5.25a1+dfsg/src/cogent3/core/info.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/core/info.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/core/info.py 2022-05-24 23:42:33.000000000 +0000 @@ -10,10 +10,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Prototype" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/core/__init__.py python-cogent-2022.5.25a1+dfsg/src/cogent3/core/__init__.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/core/__init__.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/core/__init__.py 2022-05-24 23:42:33.000000000 +0000 @@ -14,7 +14,7 @@ ] __author__ = "" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Jeremy Widmann", "Gavin Huttley", @@ -25,7 +25,7 @@ "Greg Caporaso", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/core/location.py python-cogent-2022.5.25a1+dfsg/src/cogent3/core/location.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/core/location.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/core/location.py 2022-05-24 23:42:33.000000000 +0000 @@ -53,10 +53,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight", "Peter Maxwell", "Matthew Wakefield", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Prototype" @@ -135,7 +135,7 @@ def __str__(self): """Returns string representation of self.""" - return "(%s,%s)" % (self.start, self.end) + return f"({self.start},{self.end})" def __len__(self): """Returns length of self.""" @@ -272,7 +272,7 @@ self.reverse = reverse def to_rich_dict(self): - attribs = self._serialisable.copy() + attribs = copy.deepcopy(self._serialisable) attribs["type"] = get_object_provenance(self) attribs["version"] = __version__ return attribs @@ -294,7 +294,7 @@ (start, end) = (self.start, self.end) if self.reverse: (end, start) = (start, end) - return "%s:%s" % (start, end) + return f"{start}:{end}" def reversed(self): return self.__class__( @@ -444,7 +444,7 @@ def __str__(self): """Returns string representation of self.""" - return "(%s,%s,%s)" % (self.start, self.end, bool(self.reverse)) + return f"({self.start},{self.end},{bool(self.reverse)})" def __len__(self): """Returns length of self.""" @@ -487,7 +487,7 @@ self.value = value def to_rich_dict(self): - attribs = self._serialisable.copy() + attribs = copy.deepcopy(self._serialisable) attribs["type"] = get_object_provenance(self) attribs["version"] = __version__ return attribs @@ -502,7 +502,7 @@ return (self.length, self.value) def __repr__(self): - return "-%s-" % (self.length) + return f"-{self.length}-" def where(self, index): return None @@ -547,7 +547,7 @@ terminal = True def __repr__(self): - return "?%s?" % (self.length) + return f"?{self.length}?" class Map(object): @@ -573,13 +573,8 @@ reverse = start > end if max(start, end) < 0 or min(start, end) > parent_length: raise RuntimeError( - "located outside sequence: %s" - % str((start, end, parent_length)) + f"located outside sequence: {str((start, end, parent_length))}" ) - elif max(start, end) < 0: - diff = min(start, end) - start = [start, 0][start < 0] - end = [end, 0][end < 0] elif min(start, end) > parent_length: diff = max(start, end) - parent_length start = [start, parent_length][start > parent_length] @@ -628,7 +623,7 @@ return self.length def __repr__(self): - return repr(self.spans) + "/%s" % self.parent_length + return repr(self.spans) + f"/{self.parent_length}" def __getitem__(self, slice): # A possible shorter map at the same level @@ -781,7 +776,7 @@ if lo > last_hi: new_spans.append(LostSpan(lo - last_hi)) elif lo < last_hi: - raise ValueError("Uninvertable. Overlap: %s < %s" % (lo, last_hi)) + raise ValueError(f"Uninvertable. Overlap: {lo} < {last_hi}") new_spans.append(Span(start, end, reverse=start > end)) last_hi = hi if self.parent_length > last_hi: @@ -808,7 +803,7 @@ def to_rich_dict(self): """returns dicts for contained spans [dict(), ..]""" spans = [s.to_rich_dict() for s in self.spans] - data = self._serialisable.copy() + data = copy.deepcopy(self._serialisable) data.pop("locations") data["spans"] = spans data["type"] = get_object_provenance(self) @@ -872,7 +867,7 @@ def __str__(self): """Returns string representation of self.""" - return "(%s)" % ",".join(map(str, self.spans)) + return f"({','.join(map(str, self.spans))})" def __len__(self): """Returns sum of span lengths. diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/core/moltype.py python-cogent-2022.5.25a1+dfsg/src/cogent3/core/moltype.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/core/moltype.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/core/moltype.py 2022-05-24 23:42:33.000000000 +0000 @@ -12,10 +12,10 @@ """ __author__ = "Peter Maxwell, Gavin Huttley and Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Rob Knight", "Daniel McDonald"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -25,6 +25,7 @@ import re from collections import defaultdict +from copy import deepcopy from random import choice from string import ascii_letters as letters @@ -708,14 +709,14 @@ WARNING: This doesn't allow you to reconstruct the object in its present incarnation. """ - return "MolType(%s)" % (self.alphabet,) + return f"MolType({self.alphabet})" def __getnewargs_ex__(self, *args, **kw): data = self.to_rich_dict(for_pickle=True) return (), data def to_rich_dict(self, for_pickle=False): - data = self._serialisable.copy() + data = deepcopy(self._serialisable) if not for_pickle: # we rely on reconstruction from label data = dict(type=get_object_provenance(self), moltype=self.label) data["version"] = __version__ @@ -783,7 +784,7 @@ if wildcards_allowed: alpha = alpha.union(self.missing) try: - nonalpha = re.compile("[^%s]" % re.escape("".join(alpha))) + nonalpha = re.compile(f"[^{re.escape(''.join(alpha))}]") badchar = nonalpha.search(seq) if badchar: motif = badchar.group() @@ -1036,7 +1037,7 @@ else: return sequence.__class__(result) else: - raise NotImplementedError("Got unknown method %s" % method) + raise NotImplementedError(f"Got unknown method {method}") def degap(self, sequence): """Deletes all gap characters from sequence.""" @@ -1259,7 +1260,7 @@ return inv_degens[lengths[sorted[0]]] # if we got here, nothing worked - raise TypeError("Cannot find degenerate char for symbols: %s" % symbols) + raise TypeError(f"Cannot find degenerate char for symbols: {symbols}") def get_css_style(self, colors=None, font_size=12, font_family="Lucida Console"): """returns string of CSS classes and {character: , ...} @@ -1365,7 +1366,7 @@ # the None value catches cases where a moltype has no label attribute _style_defaults = { getattr(mt, "label", ""): defaultdict( - _DefaultValue("ambig_%s" % getattr(mt, "label", "")) + _DefaultValue(f"ambig_{getattr(mt, 'label', '')}") ) for mt in (ASCII, BYTES, DNA, RNA, PROTEIN, PROTEIN_WITH_STOP, None) } @@ -1469,7 +1470,7 @@ return name name = name.lower() if name not in moltypes: - raise ValueError('unknown moltype "%s"' % name) + raise ValueError(f"unknown moltype {name!r}") return moltypes[name] diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/core/profile.py python-cogent-2022.5.25a1+dfsg/src/cogent3/core/profile.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/core/profile.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/core/profile.py 2022-05-24 23:42:33.000000000 +0000 @@ -9,10 +9,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/core/sequence.py python-cogent-2022.5.25a1+dfsg/src/cogent3/core/sequence.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/core/sequence.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/core/sequence.py 2022-05-24 23:42:33.000000000 +0000 @@ -15,6 +15,7 @@ import re import warnings +from collections import defaultdict from functools import total_ordering from operator import eq, ne from random import shuffle @@ -53,7 +54,7 @@ __author__ = "Rob Knight, Gavin Huttley, and Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Rob Knight", "Peter Maxwell", @@ -62,7 +63,7 @@ "Daniel McDonald", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -192,8 +193,7 @@ else: if len(data) % motif_length != 0: warnings.warn( - "%s length not divisible by %s, truncating" - % (self.name, motif_length) + f"{self.name} length not divisible by {motif_length}, truncating" ) limit = (len(data) // motif_length) * motif_length data = data[:limit] @@ -695,8 +695,8 @@ seq = seq if limit is None else seq[:limit] gaps = "".join(self.moltype.gaps) seqlen = len(seq) - start_gap = re.search("^[%s]+" % gaps, "".join(seq)) - end_gap = re.search("[%s]+$" % gaps, "".join(seq)) + start_gap = re.search(f"^[{gaps}]+", "".join(seq)) + end_gap = re.search(f"[{gaps}]+$", "".join(seq)) start = 0 if start_gap is None else start_gap.end() end = len(seq) if end_gap is None else end_gap.start() @@ -706,7 +706,7 @@ for i in range(seqlen): char = seq[i] if i < start or i >= end: - style = "terminal_ambig_%s" % self.moltype.label + style = f"terminal_ambig_{self.moltype.label}" else: style = styles[char] @@ -724,7 +724,7 @@ seqblock = seq[i : i + wrap].tolist() seqblock = "".join(seqblock) row = "".join([label_ % self.name, seq_ % seqblock]) - table.append("%s" % row) + table.append(f"{row}") table.append("") class_name = self.__class__.__name__ if limit and limit < len(self): @@ -744,11 +744,29 @@ "", '
', "\n".join(table), - "

%s

" % summary, + f"

{summary}

", "
", ] return "\n".join(text) + def __add__(self, other): + """Adds two sequences (other can be a string as well).""" + if hasattr(other, "moltype"): + if self.moltype != other.moltype: + raise ValueError( + f"MolTypes don't match: ({self.moltype},{other.moltype})" + ) + other_seq = str(other) + + # If two sequences with the same name are being added together the name should not be None + if type(other) == type(self): + name = self.name if self.name == other.name else None + else: + name = None + + new_seq = self.__class__(str(self) + other_seq, name=name) + return new_seq + @total_ordering class Sequence(_Annotatable, SequenceI): @@ -854,79 +872,68 @@ annot.copy_annotations_to(new) return new + def _get_feature_start(self, feature): + """returns feature offset relative to parent feature(s)""" + start = feature.map.start + offset = 0 + while feature.parent is not self: + feature = feature.parent + if feature.map.start - start: + offset += feature.map.start + + return offset + start + def annotate_from_gff(self, f, pre_parsed=False): """annotates a Sequence from a gff file where each entry has the same SeqID""" - first_seqname = None # only features with parent features included in the 'features' dict - features = dict() - fake_id = 0 - if pre_parsed: - gff_contents = f - else: - gff_contents = gff.gff_parser(f) + gff_contents = f if pre_parsed else gff.gff_parser(f) + top_level = defaultdict(list) + grouped = defaultdict(list) + num_no_id = 0 for gff_dict in gff_contents: - if first_seqname is None: - first_seqname = gff_dict["SeqID"] - else: - assert gff_dict["SeqID"] == first_seqname, ( - gff_dict["SeqID"], - first_seqname, - ) - # ensure the ID is unique + if gff_dict["SeqID"] != self.name: + # we can only handle features for this sequence + continue + id_ = gff_dict["Attributes"]["ID"] - if id_ in features.keys(): - id_ = f"{id_}:{gff_dict['Type']}:{gff_dict['Start']}-{gff_dict['End']}:{fake_id}" - fake_id = fake_id + 1 - if "Parent" not in gff_dict["Attributes"].keys(): + parents = gff_dict["Attributes"].get("Parent", None) + if parents is None and id_: + assert id_ not in top_level, f"non-unique id {id_}" + top_level[id_].append( + self.add_feature( + gff_dict["Type"], id_, [(gff_dict["Start"], gff_dict["End"])] + ) + ) + elif parents is None: + id_ = f"no-id-{num_no_id}" + num_no_id += 1 self.add_feature( gff_dict["Type"], id_, [(gff_dict["Start"], gff_dict["End"])] ) - continue - features[id_] = gff_dict - if features: - parents = {} - for id_ in features.keys(): - parents[id_] = features[id_]["Attributes"]["Parent"] - sorted_features = self._sort_parents( - parents, [], next(iter(features.keys())) - ) - for id_ in sorted_features: - matches = [] - for parent in features[id_]["Attributes"]["Parent"]: - # If a feature has multiple parents, a separate instance is added to each parent - matches.extend( - self.get_annotations_matching( - "*", name=parent, extend_query=True - ) - ) - for parent in matches: - # Start and end are relative to the parent's absolute starting position - if parent.name not in features.keys(): - parent_min = 0 - else: - parent_min = min( - features[parent.name]["Start"], features[parent.name]["End"] - ) - start = features[id_]["Start"] - parent_min - end = features[id_]["End"] - parent_min - parent.add_feature( - features[id_]["Type"], - features[id_]["Attributes"]["ID"], - [(start, end)], - ) + else: + for parent in parents: + grouped[parent].append(gff_dict) - def _sort_parents(self, parents, ordered, key): - """returns a list of feature id's with parents before children""" - keys = parents.keys() - if key in keys: - for parent in parents[key]: - if parent in keys: - return self._sort_parents(parents, ordered, parent) - ordered.append(key) - parents.pop(key) - if not parents: - return ordered - return self._sort_parents(parents, ordered, next(iter(keys))) + # we annotate the annotations + while grouped: + for key, features in top_level.items(): + child_features = grouped.pop(key, []) + if child_features: + break + + for feature in features: + feature_start = self._get_feature_start(feature) + for gff_dict in child_features: + id_ = gff_dict["Attributes"]["ID"] + b = gff_dict["Start"] + e = gff_dict["End"] + type_ = gff_dict["Type"] + sub_feat = feature.add_feature( + type_, + id_, + [(b - feature_start, e - feature_start)], + ) + top_level[gff_dict["Attributes"]["ID"]].append(sub_feat) def with_masked_annotations( self, annot_types, mask_char=None, shadow=False, extend_query=False @@ -952,7 +959,7 @@ ambigs = [(len(v), c) for c, v in list(self.moltype.ambiguities.items())] ambigs.sort() mask_char = ambigs[-1][1] - assert mask_char in self.moltype, "Invalid mask_char %s" % mask_char + assert mask_char in self.moltype, f"Invalid mask_char {mask_char}" annotations = [] annot_types = [annot_types, [annot_types]][isinstance(annot_types, str)] @@ -968,8 +975,7 @@ i = 0 segments = [] for b, e in region.get_coordinates(): - segments.append(self._seq[i:b]) - segments.append(mask_char * (e - b)) + segments.extend((self._seq[i:b], mask_char * (e - b))) i = e segments.append(self._seq[i:]) @@ -986,7 +992,7 @@ unknown = span.terminal or recode_gaps seg = "-?"[unknown] * span.length else: - raise ValueError("gap(s) in map %s" % map) + raise ValueError(f"gap(s) in map {map}") else: seg = self._seq[span.start : span.end] if span.reverse: @@ -997,8 +1003,7 @@ def gapped_by_map_motif_iter(self, map): for segment in self.gapped_by_map_segment_iter(map): - for motif in segment: - yield motif + yield from segment def gapped_by_map(self, map, recode_gaps=False): segments = self.gapped_by_map_segment_iter(map, True, recode_gaps) @@ -1014,47 +1019,17 @@ segments = self.gapped_by_map_segment_iter(map, allow_gaps=False) return self.__class__("".join(segments), self.name, info=self.info) - def __add__(self, other): - """Adds two sequences (other can be a string as well).""" - if hasattr(other, "moltype"): - if self.moltype != other.moltype: - raise ValueError( - "MolTypes don't match: (%s,%s)" % (self.moltype, other.moltype) - ) - other_seq = other._seq - else: - other_seq = other - new_seq = self.__class__(self._seq + other_seq) - # Annotations which extend past the right end of the left sequence - # or past the left end of the right sequence are dropped because - # otherwise they will annotate the wrong part of the constructed - # sequence. - left = [ - a for a in self._shifted_annotations(new_seq, 0) if a.map.end <= len(self) - ] - if hasattr(other, "_shifted_annotations"): - right = [ - a - for a in other._shifted_annotations(new_seq, len(self)) - if a.map.start >= len(self) - ] - new_seq.annotations = left + right - else: - new_seq.annotations = left - return new_seq - def __repr__(self): - myclass = "%s" % self.__class__.__name__ + myclass = f"{self.__class__.__name__}" myclass = myclass.split(".")[-1] if len(self) > 10: - seq = str(self._seq[:7]) + "... %s" % len(self) + seq = f"{str(self._seq[:7])}... {len(self)}" else: seq = str(self._seq) - return "%s(%s)" % (myclass, seq) + return f"{myclass}({seq})" def get_name(self): """Return the sequence name -- should just use name instead.""" - return self.name def __len__(self): @@ -1110,22 +1085,22 @@ seq = self._seq if motif_length == 1: return seq - else: - length = len(seq) - remainder = length % motif_length - if remainder and log_warnings: - warnings.warn( - 'Dropped remainder "%s" from end of sequence' % seq[-remainder:] - ) - return [ - seq[i : i + motif_length] - for i in range(0, length - remainder, motif_length) - ] + + length = len(seq) + remainder = length % motif_length + if remainder and log_warnings: + warnings.warn( + f'Dropped remainder "{seq[-remainder:]}" from end of sequence' + ) + return [ + seq[i : i + motif_length] + for i in range(0, length - remainder, motif_length) + ] def parse_out_gaps(self): gapless = [] segments = [] - nongap = re.compile("([^%s]+)" % re.escape("-")) + nongap = re.compile(f"([^{re.escape('-')}]+)") for match in nongap.finditer(self._seq): segments.append(match.span()) gapless.append(match.group()) @@ -1184,6 +1159,28 @@ for i in range(num_match) ] + def __add__(self, other): + """Adds two sequences (other can be a string as well)""" + new_seq = super(Sequence, self).__add__(other) + # Annotations which extend past the right end of the left sequence + # or past the left end of the right sequence are dropped because + # otherwise they will annotate the wrong part of the constructed + # sequence. + left = [ + a for a in self._shifted_annotations(new_seq, 0) if a.map.end <= len(self) + ] + if hasattr(other, "_shifted_annotations"): + right = [ + a + for a in other._shifted_annotations(new_seq, len(self)) + if a.map.start >= len(self) + ] + new_seq.annotations = left + right + else: + new_seq.annotations = left + + return new_seq + class ProteinSequence(Sequence): """Holds the standard Protein sequence.""" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/core/tree.py python-cogent-2022.5.25a1+dfsg/src/cogent3/core/tree.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/core/tree.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/core/tree.py 2022-05-24 23:42:33.000000000 +0000 @@ -37,15 +37,12 @@ from numpy import argsort, ceil, log, zeros from cogent3.maths.stats.test import correlation -from cogent3.util.misc import ( - atomic_write, - get_format_suffixes, - get_object_provenance, -) +from cogent3.util.io import atomic_write, get_format_suffixes +from cogent3.util.misc import get_object_provenance __author__ = "Gavin Huttley, Peter Maxwell and Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Gavin Huttley", "Peter Maxwell", @@ -59,7 +56,7 @@ "Justin Kuczynski", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -119,7 +116,7 @@ WARNING: Does not currently set the class to the right type. """ - return 'Tree("%s")' % self.get_newick() + return f'Tree("{self.get_newick()}")' def __str__(self): """Returns Newick-format string representation of tree.""" @@ -880,7 +877,7 @@ subtrees.append(nwk) if subtrees: - newick.append("(%s)" % ",".join(subtrees)) + newick.append(f"({','.join(subtrees)})") if self.name_loaded or with_node_names: if self.name is None or with_node_names and self.is_root(): @@ -896,7 +893,7 @@ if isinstance(self, PhyloNode): if with_distances and self.length is not None: - newick.append(":%s" % self.length) + newick.append(f":{self.length}") if semicolon: newick.append(";") @@ -967,7 +964,7 @@ if isinstance(self, PhyloNode): if with_distances and top_node.length is not None: # result.append(":%s" % top_node.length) - result[-1] = "%s:%s" % (result[-1], top_node.length) + result[-1] = f"{result[-1]}:{top_node.length}" result.append(",") @@ -979,7 +976,7 @@ return "" elif len_result == 3: # single node with name if semicolon: - return "%s;" % result[1] + return f"{result[1]};" else: return result[1] else: @@ -1039,7 +1036,7 @@ if outgroup_name is not None: outgroup = self.get_node_matching_name(outgroup_name) if outgroup.children: - raise TreeError("Outgroup (%s) must be a tip" % outgroup_name) + raise TreeError(f"Outgroup ({outgroup_name!r}) must be a tip") self = outgroup.unrooted_deepcopy() join_edge = self.get_connecting_node(tip1name, tip2name) @@ -1049,7 +1046,7 @@ if stem: if join_edge.isroot(): raise TreeError( - "LCA(%s,%s) is the root and so has no stem" % (tip1name, tip2name) + f"LCA({tip1name},{tip2name}) is the root and so has no stem" ) else: edge_names.append(join_edge.name) @@ -1283,7 +1280,7 @@ # this may take a long time for name in name_list: if name not in edge_names: - raise ValueError("edge %s not found in tree" % name) + raise ValueError(f"edge {name!r} not found in tree") new_tree = self._get_sub_tree(name_list, keep_root=keep_root, tipsonly=tipsonly) if new_tree is None: @@ -1418,15 +1415,13 @@ if parent_params is not None: params.update(parent_params) pad = " " * indent - xml = ["%s" % pad] + xml = [f"{pad}"] if self.name_loaded: - xml.append("%s %s" % (pad, self.name)) + xml.append(f"{pad} {self.name}") for (n, v) in list(self.params.items()): if v == params.get(n, None): continue - xml.append( - "%s %s%s" % (pad, n, v) - ) + xml.append(f"{pad} {n}{v}") params[n] = v for child in self.children: xml.extend(child._getXmlLines(indent + 1, params)) @@ -1450,10 +1445,16 @@ format default is newick, xml and json are alternate. Argument overrides the filename suffix. All attributes are saved in the xml format. + Value overrides the file name suffix. + + Notes + ----- + Only the cogent3 json and xml tree formats are supported. """ file_format, _ = get_format_suffixes(filename) - if file_format == "json": + format = format or file_format + if format == "json": with atomic_write(filename, mode="wt") as f: f.write(self.to_json()) return @@ -1516,7 +1517,7 @@ def get_node_matching_name(self, name): node = self._get_node_matching_name(name) if node is None: - raise TreeError("No node named '%s' in %s" % (name, self.get_tip_names())) + raise TreeError(f"No node named '{name}' in {self.get_tip_names()}") return node def get_connecting_node(self, name1, name2): @@ -1525,7 +1526,7 @@ edge2 = self.get_node_matching_name(name2) lca = edge1.last_common_ancestor(edge2) if lca is None: - raise TreeError("No LCA found for %s and %s" % (name1, name2)) + raise TreeError(f"No LCA found for {name1} and {name2}") return lca def get_connecting_edges(self, name1, name2): @@ -2017,7 +2018,7 @@ """ newroot = self.get_node_matching_name(edge_name) if not newroot.children: - raise TreeError("Can't use a tip (%s) as the root" % repr(edge_name)) + raise TreeError(f"Can't use a tip ({repr(edge_name)}) as the root") return newroot.unrooted_deepcopy() def rooted_with_tip(self, outgroup_name): diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/data/energy_params.py python-cogent-2022.5.25a1+dfsg/src/cogent3/data/energy_params.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/data/energy_params.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/data/energy_params.py 2022-05-24 23:42:33.000000000 +0000 @@ -15,10 +15,10 @@ """ __author__ = "Amanda Birmingham" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Amanda Birmingham", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Amanda Birmingham" __email__ = "amanda.birmingham@thermofisher.com" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/data/__init__.py python-cogent-2022.5.25a1+dfsg/src/cogent3/data/__init__.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/data/__init__.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/data/__init__.py 2022-05-24 23:42:33.000000000 +0000 @@ -3,10 +3,10 @@ __all__ = ["energy_params", "molecular_weight"] __author__ = "" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight", "Amanda Birmingham"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/data/molecular_weight.py python-cogent-2022.5.25a1+dfsg/src/cogent3/data/molecular_weight.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/data/molecular_weight.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/data/molecular_weight.py 2022-05-24 23:42:33.000000000 +0000 @@ -2,10 +2,10 @@ """Data for molecular weight calculations on proteins and nucleotides.""" __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/draw/dendrogram.py python-cogent-2022.5.25a1+dfsg/src/cogent3/draw/dendrogram.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/draw/dendrogram.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/draw/dendrogram.py 2022-05-24 23:42:33.000000000 +0000 @@ -10,10 +10,10 @@ __author__ = "Rahul Ghangas, Peter Maxwell and Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Rahul Ghangas"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" @@ -655,7 +655,7 @@ x = self.tree.min_x if "left" in self.scale_bar else self.tree.max_x y = self.tree.min_y if "bottom" in self.scale_bar else self.tree.max_y scale = 0.1 * self.tree.max_x - text = "{:.1e}".format(scale) if scale < 1e-2 else "{:.2f}".format(scale) + text = f"{scale:.1e}" if scale < 1e-2 else f"{scale:.2f}" shape = { "type": "line", "x0": x, diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/draw/dotplot.py python-cogent-2022.5.25a1+dfsg/src/cogent3/draw/dotplot.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/draw/dotplot.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/draw/dotplot.py 2022-05-24 23:42:33.000000000 +0000 @@ -5,10 +5,10 @@ __author__ = "Rahul Ghangas, Peter Maxwell and Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley", "Peter Maxwell", "Rahul Ghangas"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" @@ -274,16 +274,14 @@ self.layout |= dict(title=title) trace = UnionDict( - dict( - type="scatter", - x=fwd[0], - y=fwd[1], - name="+ strand", - mode="lines", - line=dict(color="blue"), - xaxis=xaxis, - yaxis=yaxis, - ) + type="scatter", + x=fwd[0], + y=fwd[1], + name="+ strand", + mode="lines", + line=dict(color="blue"), + xaxis=xaxis, + yaxis=yaxis, ) self.add_trace(trace) diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/draw/drawable.py python-cogent-2022.5.25a1+dfsg/src/cogent3/draw/drawable.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/draw/drawable.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/draw/drawable.py 2022-05-24 23:42:33.000000000 +0000 @@ -8,10 +8,10 @@ __author__ = "Rahul Ghangas and Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rahul Ghangas", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" @@ -53,45 +53,6 @@ return domains[element] -def _customise_sphinx_gallery_renderer(): - # this is an ugly hack to get around plotly's NOT robust handling of script path - # for automated file naming - import inspect - - from plotly.io import _base_renderers as base_render - from plotly.io._renderers import renderers - - class SphinxGalleryRenderer(base_render.ExternalRenderer): - def render(self, fig_dict): - # use the environment variable - # DOCUTILSCONFIG to get the location of the sphinx root doc dir - # and select the stack filename whose path is a sibling directory - # based on the maxinum number of matches to the root path - sphinx_root = pathlib.Path(os.environ.get("DOCUTILSCONFIG", "")).absolute() - sphinx_root = sphinx_root.resolve() - stack = inspect.stack() - max_match = 0 - for level in stack: - # parent directory - path = pathlib.Path(level.filename).absolute().resolve() - for i, (a, b) in enumerate(zip(path.parts, sphinx_root.parts)): - if a != b: - break - - if i > max_match: - max_match = i - filename = str(path) - - filename_root, _ = os.path.splitext(filename) - filename_html = filename_root + ".html" - filename_png = filename_root + ".png" - figure = base_render.return_figure_from_figure_or_data(fig_dict, True) - _ = base_render.write_html(fig_dict, file=filename_html) - base_render.write_image(figure, filename_png) - - renderers["sphinx_gallery"] = SphinxGalleryRenderer() - - def _show_(cls, renderer=None, **kwargs): """display figure @@ -114,9 +75,6 @@ elif renderer is None: renderer = PLOTLY_RENDERER - if renderer == "sphinx_gallery": - _customise_sphinx_gallery_renderer() - kwargs["renderer"] = renderer drawable = getattr(cls, "drawable", None) or cls fig = getattr(drawable, "figure", None) @@ -226,22 +184,21 @@ if not self.traces and hasattr(self, "_build_fig"): self._build_fig() - traces = self.traces if self.traces else [{}] - - if self.xtitle: - xtitle = self.xtitle - else: - xtitle = self.layout.xaxis.get("title", None) - - if self.ytitle: - ytitle = self.ytitle - else: - ytitle = self.layout.yaxis.get("title", None) + traces = self.traces or [{}] + xtitle = self.xtitle or self.layout.xaxis.get("title", None) + ytitle = self.ytitle or self.layout.yaxis.get("title", None) self.layout.xaxis.title = xtitle self.layout.yaxis.title = ytitle return UnionDict(data=traces, layout=self.layout) + @property + def plotly_figure(self): + """returns a plotly graph object""" + from plotly.graph_objects import Figure + + return Figure(**self.figure) + @extend_docstring_from(_show_) def show(self, renderer=None, **kwargs): _show_(self, renderer, **kwargs) diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/draw/__init__.py python-cogent-2022.5.25a1+dfsg/src/cogent3/draw/__init__.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/draw/__init__.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/draw/__init__.py 2022-05-24 23:42:33.000000000 +0000 @@ -2,7 +2,7 @@ __all__ = ["dendrogram", "dotplot", "drawable", "letter", "logo"] -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __contributors__ = [ "Peter Maxwell", "Gavin Huttley", @@ -14,5 +14,5 @@ "Sheng Han Moses Koh", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/draw/letter.py python-cogent-2022.5.25a1+dfsg/src/cogent3/draw/letter.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/draw/letter.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/draw/letter.py 2022-05-24 23:42:33.000000000 +0000 @@ -21,10 +21,10 @@ __author__ = "Sheng Han Moses Koh" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Sheng Han Moses Koh", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/draw/logo.py python-cogent-2022.5.25a1+dfsg/src/cogent3/draw/logo.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/draw/logo.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/draw/logo.py 2022-05-24 23:42:33.000000000 +0000 @@ -5,10 +5,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/best_likelihood.py python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/best_likelihood.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/best_likelihood.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/best_likelihood.py 2022-05-24 23:42:33.000000000 +0000 @@ -13,11 +13,11 @@ __author__ = "Helen Lindsay, Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Helen Lindsay", "Gavin Huttley", "Daniel McDonald"] cite = "Goldman, N. (1993). Statistical tests of models of DNA substitution. J Mol Evol, 36: 182-98" __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/bootstrap.py python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/bootstrap.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/bootstrap.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/bootstrap.py 2022-05-24 23:42:33.000000000 +0000 @@ -28,7 +28,7 @@ __author__ = "Gavin Huttley, Andrew Butterfield and Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Gavin Huttley", "Andrew Butterfield", @@ -37,7 +37,7 @@ "Peter Maxwell", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -75,7 +75,7 @@ elif pcs == 2: model_label = ["null", "alt "] else: - model_label = ["null"] + ["alt%s" % i for i in range(1, pcs)] + model_label = ["null"] + [f"alt{i}" for i in range(1, pcs)] @UI.display_wrap def each_model(alignment, ui): diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/coevolution.py python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/coevolution.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/coevolution.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/coevolution.py 2022-05-24 23:42:33.000000000 +0000 @@ -78,7 +78,7 @@ __author__ = "Greg Caporaso" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Greg Caporaso", "Gavin Huttley", @@ -87,7 +87,7 @@ "Rob Knight", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Greg Caporaso" __email__ = "gregcaporaso@gmail.com" __status__ = "Beta" @@ -1482,7 +1482,7 @@ bad_seqs.append(name) if bad_seqs: raise ValueError( - "Ambiguous characters in sequences: %s" % "; ".join(map(str, bad_seqs)) + f"Ambiguous characters in sequences: {'; '.join(map(str, bad_seqs))}" ) diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/discrete_markov.py python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/discrete_markov.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/discrete_markov.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/discrete_markov.py 2022-05-24 23:42:33.000000000 +0000 @@ -9,10 +9,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -58,7 +58,7 @@ all_cells = [] for (i, v) in enumerate(self.uniq): if v is None: - raise ValueError("input %s not set" % self.name) + raise ValueError(f"input {self.name} not set") assert hasattr(v, "get_default_value"), v value = v.get_default_value() assert hasattr(value, "shape"), value diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/distance.py python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/distance.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/distance.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/distance.py 2022-05-24 23:42:33.000000000 +0000 @@ -12,10 +12,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley", "Peter Maxwell", "Matthew Wakefield"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -260,7 +260,7 @@ """ pairwise_stats = {} - assert param in self._est_params + ["length"], "unrecognised param %s" % param + assert param in self._est_params + ["length"], f"unrecognised param {param}" if not self._param_ests: return None @@ -355,7 +355,7 @@ for comp_names, param_vals in list(self._param_ests.items()): tips = [] for name in comp_names: - tips.append(repr(name) + ":%s" % param_vals[name]) + tips.append(repr(name) + f":{param_vals[name]}") trees.append("(" + ",".join(tips) + ");") return trees diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/fast_distance.py python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/fast_distance.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/fast_distance.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/fast_distance.py 2022-05-24 23:42:33.000000000 +0000 @@ -15,10 +15,10 @@ __author__ = "Gavin Huttley, Yicheng Zhu and Ben Kaehler" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley", "Yicheng Zhu", "Ben Kaehler"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" # pending addition of protein distance metrics @@ -395,7 +395,7 @@ continue name_2 = names[j] - ui.display("%s vs %s" % (name_1, name_2), done / to_do) + ui.display(f"{name_1} vs {name_2}", done / to_do) done += 1 matrix.fill(0) s2 = self.indexed_seqs[j] @@ -670,7 +670,7 @@ if "moltype" in kwargs and kwargs.get("moltype") is None: kwargs.pop("moltype") if name not in _calculators: - raise ValueError('Unknown pairwise distance calculator "%s"' % name) + raise ValueError(f'Unknown pairwise distance calculator "{name}"') calc = _calculators[name] return calc(*args, **kwargs) diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/__init__.py python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/__init__.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/__init__.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/__init__.py 2022-05-24 23:42:33.000000000 +0000 @@ -17,7 +17,7 @@ ] __author__ = "" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Gavin Huttley", "Peter Maxwell", @@ -29,7 +29,7 @@ "Greg Caporaso", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/likelihood_calculation.py python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/likelihood_calculation.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/likelihood_calculation.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/likelihood_calculation.py 2022-05-24 23:42:33.000000000 +0000 @@ -27,10 +27,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/likelihood_function.py python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/likelihood_function.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/likelihood_function.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/likelihood_function.py 2022-05-24 23:42:33.000000000 +0000 @@ -22,7 +22,7 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Gavin Huttley", "Andrew Butterfield", @@ -34,7 +34,7 @@ "Ananias Iliadis", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -147,24 +147,20 @@ simple_to_rich[simple_param].add(rich_param) rich_to_simple[rich_param].add(simple_param) - for rich_param in rich_to_simple: - simple_counterparts = rich_to_simple[rich_param] + for rich_param, simple_counterparts in rich_to_simple.items(): if len(simple_counterparts) == 1: continue sized_simple = [(len(simple[param]), param) for param in simple_counterparts] sized_simple.sort() if sized_simple[0][0] == sized_simple[1][0]: - msg = "%s and %s tied for matrix space" % ( - sized_simple[0][1], - sized_simple[1][1], - ) + msg = f"{sized_simple[0][1]} and {sized_simple[1][1]} tied for matrix space" raise ValueError(msg) _, chosen = sized_simple.pop(0) rich_to_simple[rich_param] = [chosen] for _, simple_param in sized_simple: - simple_to_rich[simple_param].pop(rich_param) + simple_to_rich[simple_param].remove(rich_param) return simple_to_rich @@ -553,7 +549,7 @@ def _repr_html_(self): """for jupyter notebook display""" try: - lnL = "

log-likelihood = %.4f

" % self.get_log_likelihood() + lnL = f"

log-likelihood = {self.get_log_likelihood():.4f}

" except ValueError: # alignment probably not yet set lnL = "" @@ -564,7 +560,7 @@ table_.title = table_.title.capitalize() table_.set_repr_policy(show_shape=False) results[i] = table_._repr_html_() - results = ["

%s

" % title, lnL, nfp] + results + results = [f"

{title}

", lnL, nfp] + results return "\n".join(results) def __repr__(self): @@ -574,7 +570,7 @@ title, results = self._for_display() try: - lnL = "log-likelihood = %.4f" % self.get_log_likelihood() + lnL = f"log-likelihood = {self.get_log_likelihood():.4f}" except ValueError: # alignment probably not yet set lnL = None @@ -758,8 +754,7 @@ """ if motif_probs is None: motif_probs = self.get_motif_probs_by_node() - node_names = self.tree.get_node_names() - node_names.remove("root") + node_names = [n for n in self.tree.get_node_names() if n != "root"] lengths = {e: self.get_param_value("length", edge=e) for e in node_names} if not isinstance(self.model, substitution_model.Stationary): ens = {} @@ -854,7 +849,7 @@ row = [row[k] for k in heading_names] list_table.append(row) if table_dims: - title = ["", "%s params" % " ".join(table_dims)][with_titles] + title = ["", f"{' '.join(table_dims)} params"][with_titles] else: title = ["", "global params"][with_titles] row_ids = None @@ -903,7 +898,7 @@ def to_rich_dict(self): """returns detailed info on object, used by to_json""" - data = self._serialisable.copy() + data = deepcopy(self._serialisable) for key in ("model", "tree"): del data[key] @@ -925,11 +920,19 @@ alignment = self.get_param_value("alignment").to_rich_dict() mprobs = self.get_motif_probs().to_dict() else: + # this is a multi-locus likelihood function alignment = {a["locus"]: a["value"] for a in aln_defn.get_param_rules()} - mprobs = self.get_motif_probs() for k in alignment: alignment[k] = alignment[k].to_rich_dict() - mprobs[k] = mprobs[k].to_dict() + + mprobs = self.get_motif_probs() + if isinstance(mprobs, dict): + # separate mprobs per locus + for k in alignment: + mprobs[k] = mprobs[k].to_dict() + else: + # motif probs are constrained to be the same between loci + mprobs = self.get_motif_probs().to_dict() DLC = self.all_psubs_DLC() try: diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/likelihood_tree_numba.py python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/likelihood_tree_numba.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/likelihood_tree_numba.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/likelihood_tree_numba.py 2022-05-24 23:42:33.000000000 +0000 @@ -7,7 +7,7 @@ __copyright__ = "Copyright 2007-2019, The Cogent Project" __credits__ = ["Peter Maxwell", "Rob Knight", "Gavin Huttley", "Stephen Ma"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/likelihood_tree.py python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/likelihood_tree.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/likelihood_tree.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/likelihood_tree.py 2022-05-24 23:42:33.000000000 +0000 @@ -13,10 +13,10 @@ numerictypes = numpy.core.numerictypes.sctype2char __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -251,7 +251,7 @@ except alphabet.AlphabetError as detail: motif = str(detail) posn = list(sequence2).index(motif) * motif_len - raise ValueError("%s at %s:%s not in alphabet" % (repr(motif), seq_name, posn)) + raise ValueError(f"{motif!r} at {seq_name!r}:{posn} not in alphabet") return LikelihoodTreeLeaf( uniq_motifs, likelihoods, counts, index, seq_name, alphabet, sequence diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/models.py python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/models.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/models.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/models.py 2022-05-24 23:42:33.000000000 +0000 @@ -15,84 +15,88 @@ from cogent3 import DNA from cogent3.evolve import ns_substitution_model, substitution_model -from cogent3.evolve.predicate import MotifChange, omega, replacement -from cogent3.evolve.solved_models import F81, HKY85, TN93 +from cogent3.evolve.predicate import MotifChange, omega +from cogent3.evolve.solved_models import _solved_nucleotide from cogent3.evolve.substitution_model import _SubstitutionModel from cogent3.util.table import Table __author__ = "Matthew Wakefield" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Matthew Wakefield", "Peter Maxwell", "Gavin Huttley", "James Kondilios"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Matthew Wakefield" __email__ = "wakefield@wehi.edu.au" __status__ = "Production" -nucleotide_models = [ - "JC69", - "K80", - "F81", - "HKY85", - "TN93", - "GTR", - "ssGN", - "GN", - "BH", - "DT", -] - -codon_models = [ - "CNFGTR", - "CNFHKY", - "MG94HKY", - "MG94GTR", - "GY94", - "Y98", - "H04G", - "H04GK", - "H04GGK", - "GNC", -] -protein_models = ["DSO78", "AH96", "AH96_mtmammals", "JTT92", "WG01"] +nucleotide_models = [] +codon_models = [] +protein_models = [] +models = [] + + +_model_types = { + "nucleotide": nucleotide_models, + "codon": codon_models, + "protein": protein_models, +} +_all_models = {} + + +class register_model: + """ + decorator for registering functions that construct substitution models. + + The name of the wrapped function becomes the model abbreviation used + for selecting the model with ``get_models()`` + + Parameters + ---------- + model_type: str + valid values are 'codon', 'nucleotide', 'protein' + """ + + def __init__(self, model_type: str): + assert model_type in _model_types, f"{model_type!r} not in {list(_model_types)}" + self._model_type = model_type + + def __call__(self, func): + series = _model_types[self._model_type] + name = func.__name__ + if name in models: + raise ValueError(f"{name!r} already in models") + + series.append(name) + models.append(name) + _all_models[name] = func + return func -models = nucleotide_models + codon_models + protein_models # Substitution model rate matrix predicates + _gtr_preds = [MotifChange(x, y) for x, y in ["AC", "AG", "AT", "CG", "CT"]] -_kappa = (~MotifChange("R", "Y")).aliased("kappa") _omega = omega +_kappa_y = MotifChange("T", "C").aliased("kappa_y") +_kappa_r = MotifChange("A", "G").aliased("kappa_r") +_kappa = (_kappa_y | _kappa_r).aliased("kappa") _cg = MotifChange("CG").aliased("G") _cg_k = (_cg & _kappa).aliased("G.K") def _make_gn_preds(): - _general_preds = [] - for f, t in permutations("ACTG", 2): - if f != "T" or t != "G": # Match GTR's reference cell - _general_preds.append(MotifChange(f, t, forward_only=True)) - return _general_preds + return [ + MotifChange(f, t, forward_only=True) + for f, t in permutations("ACTG", 2) + if f != "T" or t != "G" + ] _general_preds = _make_gn_preds() -def _make_symn_preds(): - pair = {"A": "T", "T": "A", "G": "C", "C": "G"} - sym_preds = [] - for f, t in "AG", "AT", "CG", "CT", "GT": - sym_preds.append( - MotifChange(f, t, forward_only=True) - | MotifChange(pair[f], pair[t], forward_only=True) - ) - return sym_preds - - -_sym_preds = _make_symn_preds() - - +@register_model("nucleotide") def BH(optimise_motif_probs=True, **kw): """Barry and Hartigan Discrete Time substitution model @@ -103,6 +107,7 @@ ) +@register_model("nucleotide") def DT(optimise_motif_probs=True, motif_length=1, **kw): """ Discrete Time substitution model (non-stationary, non-reversible). @@ -116,6 +121,7 @@ return ns_substitution_model.DiscreteSubstitutionModel(alpha, **kw) +@register_model("nucleotide") def GN(optimise_motif_probs=True, **kw): """General Markov Nucleotide (non-stationary, non-reversible). @@ -129,35 +135,40 @@ return ns_substitution_model.NonReversibleNucleotide(**kwargs) +@register_model("nucleotide") def ssGN(optimise_motif_probs=True, **kw): """strand-symmetric general Markov nucleotide (non-stationary, non-reversible). Kaehler, 2017, Journal of Theoretical Biology 420: 144–51""" # note the StrandSymmetric class predefines the predicates and name - sm = ns_substitution_model.StrandSymmetric( + return ns_substitution_model.StrandSymmetric( optimise_motif_probs=optimise_motif_probs, name="ssGN", **kw ) - return sm +@register_model("nucleotide") def K80(**kw): """Kimura 1980""" required = dict(name="K80", equal_motif_probs=True, optimise_motif_probs=False) + kw["recode_gaps"] = kw.get("recode_gaps", True) kwargs = {} kwargs.update(kw) kwargs.update(required) return HKY85(**kwargs) +@register_model("nucleotide") def JC69(**kw): """Jukes and Cantor's 1969 model""" required = dict(name="JC69", equal_motif_probs=True, optimise_motif_probs=False) + kw["recode_gaps"] = kw.get("recode_gaps", True) kwargs = {} kwargs.update(kw) kwargs.update(required) return F81(**kwargs) +@register_model("nucleotide") def GTR(**kw): """General Time Reversible nucleotide substitution model.""" required = dict( @@ -169,7 +180,34 @@ return substitution_model.TimeReversibleNucleotide(**kwargs) +@register_model("nucleotide") +def TN93(**kw): + """Tamura and Nei 1993 model""" + kw["recode_gaps"] = kw.get("recode_gaps", True) + kw["name"] = "TN93" + return _solved_nucleotide([_kappa_y, _kappa_r], **kw) + + +@register_model("nucleotide") +def HKY85(**kw): + """Hasegawa, Kishino and Yano 1985 model""" + kw["recode_gaps"] = kw.get("recode_gaps", True) + # this function called by others, so we don't overwrite name if it exists + kw["name"] = kw.get("name", "HKY85") + return _solved_nucleotide([_kappa], **kw) + + +@register_model("nucleotide") +def F81(**kw): + """Felsenstein's 1981 model""" + kw["recode_gaps"] = kw.get("recode_gaps", True) + # this function called by others, so we don't overwrite name if it exists + kw["name"] = kw.get("name", "F81") + return _solved_nucleotide([], **kw) + + # Codon Models +@register_model("codon") def CNFGTR(**kw): """Conditional nucleotide frequency codon substitution model, GTR variant (with params analagous to the nucleotide GTR model). @@ -187,6 +225,7 @@ return substitution_model.TimeReversibleCodon(**kwargs) +@register_model("codon") def CNFHKY(**kw): """Conditional nucleotide frequency codon substitution model, HKY variant (with kappa, the ratio of transitions to transversions) @@ -204,6 +243,7 @@ return substitution_model.TimeReversibleCodon(**kwargs) +@register_model("codon") def MG94HKY(**kw): """Muse and Gaut 1994 codon substitution model, HKY variant (with kappa, the ratio of transitions to transversions) @@ -221,6 +261,7 @@ return substitution_model.TimeReversibleCodon(**kwargs) +@register_model("codon") def MG94GTR(**kw): """Muse and Gaut 1994 codon substitution model, GTR variant (with params analagous to the nucleotide GTR model) @@ -238,6 +279,7 @@ return substitution_model.TimeReversibleCodon(**kwargs) +@register_model("codon") def GY94(**kw): """Goldman and Yang 1994 codon substitution model. @@ -249,6 +291,7 @@ return Y98(**kwargs) +@register_model("codon") def Y98(**kw): """Yang's 1998 substitution model, a derivative of the GY94. @@ -266,6 +309,7 @@ return substitution_model.TimeReversibleCodon(**kwargs) +@register_model("codon") def H04G(**kw): """Huttley 2004 CpG substitution model. Includes a term for substitutions to or from CpG's. @@ -283,6 +327,7 @@ return substitution_model.TimeReversibleCodon(**kwargs) +@register_model("codon") def H04GK(**kw): """Huttley 2004 CpG substitution model. Includes a term for transition substitutions to or from CpG's. @@ -300,6 +345,7 @@ return substitution_model.TimeReversibleCodon(**kwargs) +@register_model("codon") def H04GGK(**kw): """Huttley 2004 CpG substitution model. Includes a general term for substitutions to or from CpG's and an adjustment for CpG transitions. @@ -317,6 +363,7 @@ return substitution_model.TimeReversibleCodon(**kwargs) +@register_model("codon") def GNC(optimise_motif_probs=True, **kw): """General Nucleotide Codon, a non-reversible codon model. @@ -2679,6 +2726,7 @@ } +@register_model("protein") def DSO78(**kw): """Dayhoff et al 1978 empirical protein model Dayhoff, MO, Schwartz RM, and Orcutt, BC. 1978 @@ -2691,6 +2739,7 @@ ) +@register_model("protein") def JTT92(**kw): """Jones, Taylor and Thornton 1992 empirical protein model Jones DT, Taylor WR, Thornton JM. @@ -2702,6 +2751,7 @@ ) +@register_model("protein") def AH96(**kw): """Adachi and Hasegawa 1996 empirical model for mitochondrial proteins. Adachi J, Hasegawa M. @@ -2734,17 +2784,17 @@ # already a substitution model return name if name not in models: - msg = 'Unknown model "%s". Model names are case sensitive!' % name + msg = f'Unknown model "{name}". Model names are case sensitive!' raise ValueError(msg) - g = globals() - return g[name](**kw) + return _all_models[name](**kw) def mtREV(**kw): return AH96(**kw) +@register_model("protein") def AH96_mtmammals(**kw): """Adachi and Hasegawa 1996 empirical model for mammalian mitochondrial proteins. @@ -2761,6 +2811,7 @@ return AH96_mtmammals(**kw) +@register_model("protein") def WG01(**kw): """Whelan and Goldman 2001 empirical model for globular proteins. Whelan S, Goldman N. @@ -2776,11 +2827,6 @@ def available_models(model_types=None): """returns Table listing the pre-defined substitution models""" column_headings = ["Model Type", "Abbreviation", "Description"] - _model_types = { - "nucleotide": nucleotide_models, - "codon": codon_models, - "protein": protein_models, - } if model_types is not None: model_types = model_types if not isinstance(model_types, str) else [model_types] else: @@ -2788,12 +2834,12 @@ rows = [] for mod_type in model_types: - for abbreviation in _model_types[mod_type]: - if eval(abbreviation).__doc__: - description = " ".join(eval(abbreviation).__doc__.split()) + for abbrev in _model_types[mod_type]: + if _all_models[abbrev].__doc__: + description = " ".join(_all_models[abbrev].__doc__.split()) else: description = "" - rows.append([mod_type, abbreviation, description]) + rows.append([mod_type, abbrev, description]) return Table( header=column_headings, diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/motif_prob_model.py python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/motif_prob_model.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/motif_prob_model.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/motif_prob_model.py 2022-05-24 23:42:33.000000000 +0000 @@ -9,10 +9,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -28,7 +28,7 @@ elif mprob_model in ["word", "tuple", None]: return SimpleMotifProbModel(tuple_alphabet) else: - raise ValueError("Unknown mprob model '%s'" % str(mprob_model)) + raise ValueError(f"Unknown mprob model '{str(mprob_model)}'") class MotifProbModel(object): diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/ns_substitution_model.py python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/ns_substitution_model.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/ns_substitution_model.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/ns_substitution_model.py 2022-05-24 23:42:33.000000000 +0000 @@ -15,10 +15,10 @@ __author__ = "Peter Maxwell, Gavin Huttley and Andrew Butterfield" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __contributors__ = ["Gavin Huttley", "Peter Maxwell", "Ben Kaeheler", "Ananias Iliadis"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -58,7 +58,7 @@ for (i, x) in enumerate(alphabet): for j in numpy.flatnonzero(mask[i]): y = alphabet[j] - self.parameter_order.append("%s/%s" % (x, y)) + self.parameter_order.append(f"{x}/{y}") self.param_pick[i, j] = len(self.parameter_order) _ = self.parameter_order.pop() self.symmetric = False @@ -122,6 +122,7 @@ row_total = numpy.dot(mprobs, R[j]) col_total = numpy.dot(mprobs, R[:, j]) required = row_total - col_total + required = abs(required) if numpy.allclose(required, 0.0) else required if required < 0.0: raise ParameterOutOfBoundsError R[i, j] = required / mprobs[i] diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/pairwise_distance_numba.py python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/pairwise_distance_numba.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/pairwise_distance_numba.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/pairwise_distance_numba.py 2022-05-24 23:42:33.000000000 +0000 @@ -5,7 +5,7 @@ __copyright__ = "Copyright 2007-2019, The Cogent Project" __credits__ = ["Gavin Huttley", "Yicheng Zhu", "Ben Kaehler", "Stephen Ma"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/parameter_controller.py python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/parameter_controller.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/parameter_controller.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/parameter_controller.py 2022-05-24 23:42:33.000000000 +0000 @@ -20,10 +20,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Andrew Butterfield", "Peter Maxwell", "Gavin Huttley", "Helen Lindsay"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.ed.au" __status__ = "Production" @@ -31,11 +31,11 @@ def _category_names(dimension, specified): if type(specified) is int: - cats = ["%s%s" % (dimension, i) for i in range(specified)] + cats = [f"{dimension}{i}" for i in range(specified)] else: cats = tuple(specified) assert len(cats) >= 1, cats - assert len(set(cats)) == len(cats), "%s names must be unique" % dimension + assert len(set(cats)) == len(cats), f"{dimension} names must be unique" return list(cats) @@ -56,7 +56,7 @@ For usage see the set_param_rule method. """ - # Basically wrapper around the more generic recalulation.ParameterController + # Basically wrapper around the more generic recalculation.ParameterController # class, which doesn't know about trees. def __init__( @@ -148,15 +148,11 @@ pseudocount=None, **kwargs, ): - counts = self.model.count_motifs(align, include_ambiguity=include_ambiguity) if is_constant is None: is_constant = not self.optimise_motif_probs - if pseudocount is None: - if is_constant: - pseudocount = 0.0 - else: - pseudocount = 0.5 + + pseudocount = 0.0 if is_constant or (counts != 0).all() else pseudocount or 0.5 counts += pseudocount mprobs = counts / (1.0 * sum(counts)) self.set_motif_probs( @@ -254,9 +250,7 @@ init=None, upper=None, ): - """modifes the scope of all submodel rate, aside from excluded params, - by constructing a list of parameter rules and using the - apply_param_rules method + """modifies the scope of substitution model rate params Parameters ---------- @@ -264,7 +258,7 @@ name(s) of substitution model predicate(s) to be excluded edge_sets series of dicts with an 'edges' key. Can also specify - is_independent, is_contstant etc.. If those are not provided, the + is_independent, is_constant etc.. If those are not provided, the method argument values are applied is_independent : bool whether edges in all edge sets are to be considered independent. @@ -304,8 +298,19 @@ if edge_sets is None: # this just makes the following algorithm consistent + # but we need to exclude any edges assigned to discrete psubs + dpsubs = self.defn_for.get("dpsubs", None) + exclude_edges = set() + if dpsubs: + dims = dpsubs.valid_dimensions + index = dims.index("edge") + for k in dpsubs.assignments: + exclude_edges.add(k[index]) + edge_sets = [ - dict(edges=[n]) for n in self.tree.get_node_names(includeself=False) + dict(edges=[n]) + for n in self.tree.get_node_names(includeself=False) + if n not in exclude_edges ] elif type(edge_sets) == dict: edge_sets = [edge_sets] @@ -389,7 +394,7 @@ if single in scope_info: v = scope_info.pop(single) if v: - assert isinstance(v, str), "%s=, maybe?" % plural + assert isinstance(v, str), f"{plural}=, maybe?" assert plural not in scope_info scopes[single] = [v] elif plural in scope_info: @@ -514,7 +519,7 @@ tip_names = set(self.tree.get_tip_names()) for index, aln in enumerate(aligns): if len(aligns) > 1: - locus_name = "for locus '%s'" % self.locus_names[index] + locus_name = f"for locus '{self.locus_names[index]}'" else: locus_name = "" assert not set(aln.names).symmetric_difference( diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/predicate.py python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/predicate.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/predicate.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/predicate.py 2022-05-24 23:42:33.000000000 +0000 @@ -15,10 +15,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -110,9 +110,9 @@ def __repr__(self): if hasattr(self, "_op_repr"): - return "%s(%s)" % (self._op_repr, self.subpredicate) + return f"{self._op_repr}({self.subpredicate})" else: - return "%s(%s)" % (self.__class__.__name__, self.subpredicate) + return f"{self.__class__.__name__}({self.subpredicate})" class _GenericPredicate(predicate): @@ -206,7 +206,7 @@ diff = "[%d]" % self.diff_at else: diff = "" - return "%s>%s%s" % (self.from_motif, self.to_motif, diff) + return f"{self.from_motif}>{self.to_motif}{diff}" def test_motif(self, motifs, query): """positions where motif pattern is found in query""" @@ -262,7 +262,7 @@ diff = "[%d]" % self.diff_at else: diff = "" - return "%s/%s%s" % (self.from_motif, self.to_motif, diff) + return f"{self.from_motif}/{self.to_motif}{diff}" def test_motifs(self, from_motifs, to_motifs, x, y): preF = self.test_motif(from_motifs, x) diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/simulate.py python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/simulate.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/simulate.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/simulate.py 2022-05-24 23:42:33.000000000 +0000 @@ -7,10 +7,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/solved_models_numba.py python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/solved_models_numba.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/solved_models_numba.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/solved_models_numba.py 2022-05-24 23:42:33.000000000 +0000 @@ -9,7 +9,7 @@ __copyright__ = "Copyright 2007-2019, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Stephen Ma"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/solved_models.py python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/solved_models.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/solved_models.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/solved_models.py 2022-05-24 23:42:33.000000000 +0000 @@ -6,7 +6,6 @@ from numpy.testing import assert_allclose -from cogent3.evolve.predicate import MotifChange from cogent3.evolve.substitution_model import ( CalcDefn, TimeReversibleNucleotide, @@ -17,10 +16,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -71,31 +70,3 @@ klass = TimeReversibleNucleotide kw["model_gaps"] = False return klass(predicates=predicates, **kw) - - -kappa_y = MotifChange("T", "C").aliased("kappa_y") -kappa_r = MotifChange("A", "G").aliased("kappa_r") -kappa = (kappa_y | kappa_r).aliased("kappa") - - -def TN93(**kw): - """Tamura and Nei 1993 model""" - kw["recode_gaps"] = True - kw["name"] = "TN93" - return _solved_nucleotide([kappa_y, kappa_r], **kw) - - -def HKY85(**kw): - """Hasegawa, Kishino and Yano 1985 model""" - kw["recode_gaps"] = True - # this function called by others, so we don't overwrite name if it exists - kw["name"] = kw.get("name", "HKY85") - return _solved_nucleotide([kappa], **kw) - - -def F81(**kw): - """Felsenstein's 1981 model""" - kw["recode_gaps"] = True - # this function called by others, so we don't overwrite name if it exists - kw["name"] = kw.get("name", "F81") - return _solved_nucleotide([], **kw) diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/substitution_calculation.py python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/substitution_calculation.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/substitution_calculation.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/substitution_calculation.py 2022-05-24 23:42:33.000000000 +0000 @@ -16,10 +16,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -67,7 +67,7 @@ return self.eigen(Q) except (ArithmeticError, LinAlgError) as detail: if not self.given_expm_warning: - warnings.warn("using slow exponentiator because '%s'" % str(detail)) + warnings.warn(f"using slow exponentiator because '{str(detail)}'") self.given_expm_warning = True return PadeExponentiator(Q) diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/substitution_model.py python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/substitution_model.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/evolve/substitution_model.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/evolve/substitution_model.py 2022-05-24 23:42:33.000000000 +0000 @@ -34,6 +34,7 @@ import warnings from collections.abc import Callable +from copy import deepcopy import numpy @@ -66,7 +67,7 @@ __author__ = "Peter Maxwell, Gavin Huttley and Andrew Butterfield" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __contributors__ = [ "Gavin Huttley", "Andrew Butterfield", @@ -77,7 +78,7 @@ "Von Bing Yap", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -150,7 +151,7 @@ motif_probs Dictionary of probabilities. optimise_motif_probs: bool - Treat like other free parameters. Any values set by the other + Treat like other free parameters. Any values set by the other motif_prob options will be used as initial values. equal_motif_probs: bool Flag to set alignment motif probs equal. @@ -254,7 +255,7 @@ return (), data def to_rich_dict(self, for_pickle=False): - data = self._serialisable.copy() + data = deepcopy(self._serialisable) if not for_pickle: for key, value in data.items(): type_ = get_object_provenance(value) @@ -279,19 +280,16 @@ def get_param_list(self): return [] - def __str__(self): - s = ["\n%s (" % self.__class__.__name__] - s.append( - "name = '%s'; type = '%s';" - % (getattr(self, "name", None), getattr(self, "type", None)) - ) + def __repr__(self): + s = [] + s.append(f"name={getattr(self, 'name', None)!r};") if hasattr(self, "predicate_masks"): parlist = list(self.predicate_masks.keys()) - s.append("params = %s;" % parlist) + s.append(f"params={parlist};") motifs = self.get_motifs() - s.append("number of motifs = %s;" % len(motifs)) - s.append("motifs = %s)\n" % motifs) - return " ".join(s) + s.append(f"num_motifs={len(motifs)};") + s.append(f"motifs={motifs})") + return f"{self.__class__.__name__}({' '.join(s)})" def get_alphabet(self): return self.alphabet @@ -505,7 +503,7 @@ elif distribution in [None, "free"]: distribution = MonotonicDefn elif isinstance(distribution, str): - raise ValueError('Unknown distribution "%s"' % distribution) + raise ValueError(f'Unknown distribution "{distribution}"') self.distrib_class = distribution if not partitioned_params: @@ -533,7 +531,7 @@ for param in self.partitioned_params: if param not in self.parameter_order and param != "rate": desc = ["partitioned", "ordered"][param == self.ordered_param] - raise ValueError('%s param "%s" unknown' % (desc, param)) + raise ValueError(f'{desc} param "{param}" unknown') def _is_instantaneous(self, x, y): diffs = sum([X != Y for (X, Y) in zip(x, y)]) @@ -732,12 +730,12 @@ # Give a clearer error in simple cases like always false or true. for (name, matrix) in list(predicate_masks.items()): if numpy.alltrue((matrix == 0).flat): - raise ValueError("Predicate %s is always false." % name) + raise ValueError(f"Predicate {name} is always false.") predicates_plus_scale = predicate_masks.copy() predicates_plus_scale[None] = self._instantaneous_mask for (name, matrix) in list(predicate_masks.items()): if numpy.alltrue((matrix == self._instantaneous_mask).flat): - raise ValueError("Predicate %s is always true." % name) + raise ValueError(f"Predicate {name} is always true.") if redundancy_in_predicate_masks(predicate_masks): raise ValueError("Redundancy in predicates.") if redundancy_in_predicate_masks(predicates_plus_scale): @@ -783,7 +781,7 @@ labels.insert(0, r"From\To") if self.name: - title = "%s rate matrix" % self.name + title = f"{self.name} rate matrix" else: title = "rate matrix" @@ -870,7 +868,7 @@ for (key, pred) in rules: (label, mask) = self.adapt_predicate(pred, key) if label in predicate_masks: - raise KeyError('Duplicate predicate name "%s"' % label) + raise KeyError(f'Duplicate predicate name "{label}"') predicate_masks[label] = mask order.append(label) return predicate_masks, order diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/format/alignment.py python-cogent-2022.5.25a1+dfsg/src/cogent3/format/alignment.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/format/alignment.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/format/alignment.py 2022-05-24 23:42:33.000000000 +0000 @@ -7,14 +7,14 @@ from cogent3.format.paml import alignment_to_paml from cogent3.format.phylip import alignment_to_phylip from cogent3.parse.record import FileFormatError -from cogent3.util.misc import atomic_write +from cogent3.util.io import atomic_write __author__ = "Peter Maxwell and Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Thomas La"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -49,7 +49,7 @@ def write_alignment_to_file(f, alignment, format, **kw): format = format.lower() if format not in FORMATTERS: - raise FileFormatError("Unsupported file format %s" % format) + raise FileFormatError(f"Unsupported file format {format}") contents = FORMATTERS[format](alignment, **kw) f.write(contents) f.close() diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/format/bedgraph.py python-cogent-2022.5.25a1+dfsg/src/cogent3/format/bedgraph.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/format/bedgraph.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/format/bedgraph.py 2022-05-24 23:42:33.000000000 +0000 @@ -2,10 +2,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "alpha" @@ -63,7 +63,7 @@ if not str(val) in valid_values[key]: raise AssertionError( "Invalid bedgraph key/val pair: " - + "got %s=%s; valid values are %s" % (key, val, valid_values[key]) + + f"got {key}={val}; valid values are {valid_values[key]}" ) @@ -93,7 +93,7 @@ if not set(kwargs) <= set(bedgraph_fields): not_allowed = set(kwargs) - set(bedgraph_fields) raise RuntimeError( - "incorrect arguments provided to bedgraph %s" % str(list(not_allowed)) + f"incorrect arguments provided to bedgraph {str(list(not_allowed))}" ) if "altColor" in kwargs: @@ -105,7 +105,7 @@ kwargs[key] = booleans(key, kwargs[key]) raise_invalid_vals(key, kwargs[key]) - header_suffix.append("%s=%s" % (key, kwargs[key])) + header_suffix.append(f"{key}={kwargs[key]}") header += header_suffix @@ -136,9 +136,7 @@ header = get_header(name=name, description=description, color=color, **kwargs) - make_data_row = lambda x: "\t".join( - list(map(str, x[:3])) + ["{0:.2f}".format(x[-1])] - ) + make_data_row = lambda x: "\t".join(list(map(str, x[:3])) + [f"{x[-1]:.2f}"]) # get independent spans for each chromosome bedgraph_data = [] data = [] diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/format/clustal.py python-cogent-2022.5.25a1+dfsg/src/cogent3/format/clustal.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/format/clustal.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/format/clustal.py 2022-05-24 23:42:33.000000000 +0000 @@ -8,10 +8,10 @@ __author__ = "Jeremy Widmann" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Jeremy Widmann"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Jeremy Widmann" __email__ = "jeremy.widmann@colorado.edu" __status__ = "Development" @@ -67,12 +67,7 @@ while curr_ix < aln_len: clustal_list.extend( [ - "%s%s%s" - % ( - x, - " " * (max_spaces - len(x)), - y[curr_ix : curr_ix + wrap], - ) + f"{x}{' ' * (max_spaces - len(x))}{y[curr_ix:curr_ix + wrap]}" for x, y in zip(order, ordered_seqs) ] ) @@ -81,7 +76,7 @@ else: clustal_list.extend( [ - "%s%s%s" % (x, " " * (max_spaces - len(x)), y) + f"{x}{' ' * (max_spaces - len(x))}{y}" for x, y in zip(order, ordered_seqs) ] ) diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/format/fasta.py python-cogent-2022.5.25a1+dfsg/src/cogent3/format/fasta.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/format/fasta.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/format/fasta.py 2022-05-24 23:42:33.000000000 +0000 @@ -6,10 +6,10 @@ __author__ = "Jeremy Widmann" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Jeremy Widmann", "Rob Knight", "Gavin Huttley", "Thomas La"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Jeremy Widmann" __email__ = "jeremy.widmann@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/format/gde.py python-cogent-2022.5.25a1+dfsg/src/cogent3/format/gde.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/format/gde.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/format/gde.py 2022-05-24 23:42:33.000000000 +0000 @@ -6,10 +6,10 @@ __author__ = "Thomas La" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Jeremy Widmann", "Rob Knight", "Gavin Huttley", "Thomas La"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Thomas La" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/format/__init__.py python-cogent-2022.5.25a1+dfsg/src/cogent3/format/__init__.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/format/__init__.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/format/__init__.py 2022-05-24 23:42:33.000000000 +0000 @@ -18,7 +18,7 @@ ] __author__ = "" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Jeremy Widmann", "Gavin Huttley", @@ -29,7 +29,7 @@ "Marcin Cieslik", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/format/nexus.py python-cogent-2022.5.25a1+dfsg/src/cogent3/format/nexus.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/format/nexus.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/format/nexus.py 2022-05-24 23:42:33.000000000 +0000 @@ -1,8 +1,8 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -30,14 +30,14 @@ nexus_out = ["#NEXUS\n\nbegin data;"] nexus_out.append(" dimensions ntax=%d nchar=%d;" % (num_seq, aln_len)) nexus_out.append( - " format datatype=%s interleave=yes missing=? " % seq_type + "gap=-;" + f" format datatype={seq_type} interleave=yes missing=? " + "gap=-;" ) nexus_out.append(" matrix") cur_ix = 0 names_seqs = sorted(aln.named_seqs.items()) while cur_ix < aln_len: nexus_out.extend( - [" %s %s" % (x, y[cur_ix : cur_ix + wrap]) for x, y in names_seqs] + [f" {x} {y[cur_ix:cur_ix + wrap]}" for x, y in names_seqs] ) nexus_out.append("") cur_ix += wrap diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/format/paml.py python-cogent-2022.5.25a1+dfsg/src/cogent3/format/paml.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/format/paml.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/format/paml.py 2022-05-24 23:42:33.000000000 +0000 @@ -6,10 +6,10 @@ __author__ = "Thomas La" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Jeremy Widmann", "Rob Knight", "Gavin Huttley", "Thomas La"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Thomas La" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/format/phylip.py python-cogent-2022.5.25a1+dfsg/src/cogent3/format/phylip.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/format/phylip.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/format/phylip.py 2022-05-24 23:42:33.000000000 +0000 @@ -6,10 +6,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley", "Thomas La"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -69,6 +69,6 @@ else: to = block + self.block_size - seqs.append("%s%s\n" % (prefix, seq[block:to])) + seqs.append(f"{prefix}{seq[block:to]}\n") return header + "".join(seqs) diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/format/table.py python-cogent-2022.5.25a1+dfsg/src/cogent3/format/table.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/format/table.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/format/table.py 2022-05-24 23:42:33.000000000 +0000 @@ -15,10 +15,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley", "Peter Maxwell", "Matthew Wakefield", "Jeremy Widmann"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -68,7 +68,7 @@ wrapped = textwrap.wrap( text, width=max_line_width, initial_indent=buffer, subsequent_indent=buffer ) - wrapped = ["%s" % line.ljust(max_line_width + 2 * space) for line in wrapped] + wrapped = [f"{line.ljust(max_line_width + 2 * space)}" for line in wrapped] return wrapped @@ -139,12 +139,12 @@ if row_cell_func is None: def row_cell_func(v, r, c): - return "%s" % v + return f"{v}" if header_cell_func is None: def header_cell_func(v, c): - return "%s" % v + return f"{v}" if merge_identical: row_iterator = _merge_cells @@ -216,7 +216,7 @@ justify = "{ %s }" % " ".join(list(justify)) if header: header = "%s \\\\" % " & ".join([r"\bf{%s}" % head.strip() for head in header]) - rows = ["%s \\\\" % " & ".join(row) for row in rows] + rows = [f"{' & '.join(row)} \\\\" for row in rows] position = position or "htp!" table_format = [ r"\begin{table}[%s]" % position, @@ -442,16 +442,16 @@ for i in range(len(divider)): d = divider[i] if justify[i] == "c": - d = ":%s:" % d[:-2] + d = f":{d[:-2]}:" elif justify[i] == "r": - d = "%s:" % d[:-1] + d = f"{d[:-1]}:" elif justify[i] == "l": - d = ":%s" % d[:-1] + d = f":{d[:-1]}" else: - raise ValueError("invalid justfication character '%s'" % justify[i]) + raise ValueError(f"invalid justfication character '{justify[i]}'") divider[i] = d - divider = "|%s|" % "|".join(divider) + divider = f"|{'|'.join(divider)}|" rows = [row_template % sep.join(header), divider] + [ row_template % sep.join(r) for r in formatted_table ] @@ -602,7 +602,7 @@ for row in formatted_table: for cdex, cell in enumerate(row): if sep in cell: - row[cdex] = '"%s"' % cell + row[cdex] = f'"{cell}"' new_table += [sep.join(row) for row in formatted_table] @@ -663,7 +663,7 @@ header_done = None for line in lines: if has_header and not header_done: - formatted = sep.join(["%s" % field for field in line]) + formatted = sep.join([f"{field}" for field in line]) header_done = True else: formatted = sep.join(formatter(line)) @@ -711,14 +711,14 @@ try: entry = row[cdex] except IndexError: - entry = "%s" % missing_data + entry = f"{missing_data}" else: not_missing = True if isinstance(entry, numpy.ndarray) else entry if not not_missing: try: float(entry) # could numerically be 0, so not missing except (ValueError, TypeError): - entry = "%s" % missing_data + entry = f"{missing_data}" # attempt formatting if col_head in column_templates: @@ -729,7 +729,7 @@ elif isinstance(entry, float): entry = float_template.format(float(entry)) else: # for any other python object - entry = "%s" % str(entry) + entry = f"{str(entry)}" formatted.append(entry) col_widths[cdex] = max(col_widths[cdex], len(entry)) @@ -761,19 +761,19 @@ # int as its end portion num = len(names) max_num_digits = len(str(num)) - assert max_num_digits < 10, "can't create a unique name for %s" % oldname + assert max_num_digits < 10, f"can't create a unique name for {oldname}" name_base = oldname[: 10 - max_num_digits] newname = None for i in range(max_num_digits): - trial_name = "%s%s" % (name_base, i) + trial_name = f"{name_base}{i}" if trial_name not in names: newname = trial_name break if not newname: - raise RuntimeError("Can't create a unique name for %s" % oldname) + raise RuntimeError(f"Can't create a unique name for {oldname}") else: - print("WARN: Seqname %s changed to %s" % (oldname, newname)) + print(f"WARN: Seqname {oldname} changed to {newname}") return newname def append_species(name, formatted_dists, mat_breaks): @@ -790,9 +790,9 @@ except IndexError: end = len(formatted_dists) prefix = ["", " "][i > 0] - rows.append("%s%s" % (prefix, " ".join(formatted_dists[start:end]))) + rows.append(f"{prefix}{' '.join(formatted_dists[start:end])}") # mod first row of formatted_dists - rows[0] = "%s%s" % (name.ljust(12), rows[0]) + rows[0] = f"{name.ljust(12)}{rows[0]}" return rows # number of seqs diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/format/util.py python-cogent-2022.5.25a1+dfsg/src/cogent3/format/util.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/format/util.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/format/util.py 2022-05-24 23:42:33.000000000 +0000 @@ -3,10 +3,10 @@ """ __author__ = "Thomas La" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Jeremy Widmann", "Rob Knight", "Gavin Huttley", "Thomas La"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/__init__.py python-cogent-2022.5.25a1+dfsg/src/cogent3/__init__.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/__init__.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/__init__.py 2022-05-24 23:42:33.000000000 +0000 @@ -1,20 +1,20 @@ -"""The most commonly used constructors are available from this toplevel module. -The rest are in the subpackages: core, draw, evolve, format, maths, parse -and phylo. -""" +"""COmparative GENomics Toolkit 3: providing a first-class genomic sequence +analysis experience within Jupyter notebooks plus supporting parallel +execution on compute systems with 1000s of CPUs.""" import os +import pathlib import pickle -import re import sys import warnings -import numpy +from typing import Callable, Optional, Union from cogent3.app import available_apps from cogent3.core.alignment import ( Alignment, ArrayAlignment, + Sequence, SequenceCollection, ) from cogent3.core.genetic_code import available_codes, get_code @@ -41,14 +41,13 @@ from cogent3.parse.sequence import FromFilenameParser from cogent3.parse.table import load_delimited from cogent3.parse.tree_xml import parse_string as tree_xml_parse_string -from cogent3.util.misc import get_format_suffixes, open_ +from cogent3.util.io import get_format_suffixes, open_ from cogent3.util.table import Table as _Table from cogent3.util.table import cast_str_to_array -from cogent3.util.warning import deprecated __author__ = "" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Gavin Huttley", "Rob Knight", @@ -72,22 +71,24 @@ "Daniel McDonald", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" -if sys.version_info < (3, 6): +_min_version = (3, 7) +if (sys.version_info.major, sys.version_info.minor) < _min_version: PY_VERSION = ".".join([str(n) for n in sys.version_info]) + _min_version = ".".join(str(e) for e in _min_version) raise RuntimeError( - "Python-3.6 or greater is required, Python-%s used." % PY_VERSION + f"Python-{_min_version} or greater is required, Python-{PY_VERSION} used." + ) +elif (sys.version_info.major, sys.version_info.minor) == (3, 7): + warnings.warn( + "The minimum supported python version will change to 3.8 at 2022.10", + category=DeprecationWarning, ) - -NUMPY_VERSION = re.split(r"[^\d]", numpy.__version__) -numpy_version_info = tuple([int(i) for i in NUMPY_VERSION if i.isdigit()]) -if numpy_version_info < (1, 3): - raise RuntimeError("Numpy-1.3 is required, %s found." % NUMPY_VERSION) version = __version__ version_info = tuple([int(v) for v in version.split(".") if v.isdigit()]) @@ -122,6 +123,26 @@ return seq +def _make_seq_container( + klass, data, moltype=None, label_to_name=None, info=None, source=None, **kw +): + """utility function for creating the different sequence collection/alignment instances""" + if moltype is not None: + moltype = get_moltype(moltype) + + info = info or {} + for other_kw in ("constructor_kw", "kw"): + other_kw = kw.pop(other_kw, None) or {} + kw.update(other_kw) + assert isinstance(info, dict), "info must be a dict" + source = source or info.get("source", "unknown") + info["source"] = str(source) + + return klass( + data=data, moltype=moltype, label_to_name=label_to_name, info=info, **kw + ) + + def make_unaligned_seqs( data, moltype=None, label_to_name=None, info=None, source=None, **kw ): @@ -138,23 +159,20 @@ info a dict from which to make an info object source - origins of this data, defaults to 'unknown' + origins of this data, defaults to 'unknown'. Converted to a string + and added to info["source"]. **kw other keyword arguments passed to SequenceCollection """ - if moltype is not None: - moltype = get_moltype(moltype) - - info = info or {} - for other_kw in ("constructor_kw", "kw"): - other_kw = kw.pop(other_kw, None) or {} - kw.update(other_kw) - assert isinstance(info, dict), "info must be a dict" - info["source"] = source or "unknown" - - return SequenceCollection( - data=data, moltype=moltype, label_to_name=label_to_name, info=info, **kw + return _make_seq_container( + SequenceCollection, + data, + moltype=moltype, + label_to_name=label_to_name, + info=info, + source=source, + **kw, ) @@ -182,27 +200,92 @@ info a dict from which to make an info object source - origins of this data, defaults to 'unknown' + origins of this data, defaults to 'unknown'. Converted to a string + and added to info["source"]. **kw - other keyword arguments passed to SequenceCollection + other keyword arguments passed to alignment class """ - if moltype is not None: - moltype = get_moltype(moltype) + klass = ArrayAlignment if array_align else Alignment + return _make_seq_container( + klass, + data, + moltype=moltype, + label_to_name=label_to_name, + info=info, + source=source, + **kw, + ) - info = info or {} + +def _load_seqs(file_format, filename, fmt, kw, parser_kw): + """utility function for loading sequences""" + fmt = fmt or file_format + if not fmt: + msg = "could not determined file format, set using the format argument" + raise ValueError(msg) + parser_kw = parser_kw or {} for other_kw in ("constructor_kw", "kw"): other_kw = kw.pop(other_kw, None) or {} kw.update(other_kw) - assert isinstance(info, dict), "info must be a dict" - info["source"] = source or "unknown" - klass = ArrayAlignment if array_align else Alignment - return klass( - data=data, moltype=moltype, label_to_name=label_to_name, info=info, **kw - ) + return list(FromFilenameParser(filename, fmt, **parser_kw)) + + +def load_seq( + filename: Union[str, pathlib.Path], + format: Optional[str] = None, + moltype: Optional[str] = None, + label_to_name: Optional[Callable] = None, + parser_kw: dict = None, + info: dict = None, + **kw, +) -> Sequence: + """ + loads unaligned sequences from file + + Parameters + ---------- + filename : str + path to sequence file + format : str + sequence file format, if not specified tries to guess from the path suffix + moltype : str + the moltype, eg DNA, PROTEIN, 'dna', 'protein' + label_to_name : callable + function for converting original name into another name. + parser_kw : dict + optional arguments for the parser + info : dict + a dict from which to make an info object + **kw + other keyword arguments passed to SequenceCollection + + Notes + ----- + Returns **one** sequence from a file. Use load_aligned_seqs or + load_unaligned_seqs to get a collection. + + Returns + ------- + ``Sequence`` + """ + info = info or {} + info["source"] = str(filename) + file_format, _ = get_format_suffixes(filename) + if file_format == "json": + seq = load_from_json(filename, (Sequence,)) + seq.name = label_to_name(seq.name) if label_to_name else seq.name + return seq + + data = _load_seqs(file_format, filename, format, kw, parser_kw) + name, seq = data[0] + name = label_to_name(name) if label_to_name else name + result = make_seq(seq, name, moltype=moltype) + result.info.update(info) + return result def load_unaligned_seqs( - filename, + filename: Union[str, pathlib.Path], format=None, moltype=None, label_to_name=None, @@ -225,6 +308,10 @@ function for converting original name into another name. parser_kw : dict optional arguments for the parser + info + a dict from which to make an info object + **kw + other keyword arguments passed to SequenceCollection Returns ------- @@ -234,16 +321,7 @@ if file_format == "json": return load_from_json(filename, (SequenceCollection,)) - format = format or file_format - if not format: - msg = "could not determined file format, set using the format argument" - raise ValueError(msg) - - parser_kw = parser_kw or {} - for other_kw in ("constructor_kw", "kw"): - other_kw = kw.pop(other_kw, None) or {} - kw.update(other_kw) - data = list(FromFilenameParser(filename, format, **parser_kw)) + data = _load_seqs(file_format, filename, format, kw, parser_kw) return make_unaligned_seqs( data, label_to_name=label_to_name, @@ -255,7 +333,7 @@ def load_aligned_seqs( - filename, + filename: Union[str, pathlib.Path], format=None, array_align=True, moltype=None, @@ -290,16 +368,7 @@ if file_format == "json": return load_from_json(filename, (Alignment, ArrayAlignment)) - format = format or file_format - if not format: - msg = "could not determined file format, set using the format argument" - raise ValueError(msg) - - parser_kw = parser_kw or {} - for other_kw in ("constructor_kw", "kw"): - other_kw = kw.pop(other_kw, None) or {} - kw.update(other_kw) - data = list(FromFilenameParser(filename, format, **parser_kw)) + data = _load_seqs(file_format, filename, format, kw, parser_kw) return make_aligned_seqs( data, array_align=array_align, @@ -369,10 +438,6 @@ if any(isinstance(a, str) for a in (header, data)): raise TypeError("str type invalid, if it's a path use load_table()") - if "index" in kwargs: - deprecated("argument", "index", "index_name", "2021.11") - index_name = kwargs.pop("index", index_name) - data = kwargs.get("rows", data) if data_frame is not None: from pandas import DataFrame @@ -400,7 +465,7 @@ def load_table( - filename, + filename: Union[str, pathlib.Path], sep=None, reader=None, digits=4, @@ -459,33 +524,27 @@ skip_inconsistent skips rows that have different length to header row """ - import pathlib - if not any(isinstance(filename, t) for t in (str, pathlib.PurePath)): raise TypeError( "filename must be string or Path, perhaps you want make_table()" ) - if "index" in kwargs: - deprecated("argument", "index", "index_name", "2021.11") - index_name = kwargs.pop("index", index_name) - sep = sep or kwargs.pop("delimiter", None) file_format, compress_format = get_format_suffixes(filename) if file_format == "json": return load_from_json(filename, (_Table,)) elif file_format in ("pickle", "pkl"): - f = open_(filename, mode="rb") - loaded_table = pickle.load(f) - f.close() + with open_(filename, mode="rb") as f: + loaded_table = pickle.load(f) + r = _Table() r.__setstate__(loaded_table) return r if reader: with open_(filename, newline=None) as f: - data = [row for row in reader(f)] + data = list(reader(f)) header = data[0] data = {column[0]: column[1:] for column in zip(*data)} else: @@ -574,7 +633,9 @@ return tree -def load_tree(filename, format=None, underscore_unmunge=False): +def load_tree( + filename: Union[str, pathlib.Path], format=None, underscore_unmunge=False +): """Constructor for tree. Parameters @@ -582,7 +643,8 @@ filename : str a file path containing a newick or xml formatted tree. format : str - either newick, xml or cogent3 json, default is newick + either xml or json, all other values default to newick. Overrides + file name suffix. underscore_unmunge : bool replace underscores with spaces in all names read, i.e. "sp_name" becomes "sp name". @@ -590,19 +652,19 @@ Notes ----- Underscore unmunging is turned off by default, although it is part - of the Newick format. + of the Newick format. Only the cogent3 json and xml tree formats are + supported. Returns ------- PhyloNode """ file_format, _ = get_format_suffixes(filename) - if file_format == "json": + format = format or file_format + if format == "json": return load_from_json(filename, (TreeNode, PhyloNode)) with open_(filename) as tfile: treestring = tfile.read() - if format is None and filename.endswith(".xml"): - format = "xml" return make_tree(treestring, format=format, underscore_unmunge=underscore_unmunge) diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/distance_transform.py python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/distance_transform.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/distance_transform.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/distance_transform.py 2022-05-24 23:42:33.000000000 +0000 @@ -75,7 +75,7 @@ __author__ = "Justin Kuczynski" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Rob Knight", "Micah Hamady", @@ -86,7 +86,7 @@ "Greg Caporaso", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Justin Kuczynski" __email__ = "justinak@gmail.com" __status__ = "Prototype" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/geometry.py python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/geometry.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/geometry.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/geometry.py 2022-05-24 23:42:33.000000000 +0000 @@ -24,7 +24,7 @@ __author__ = "Sandra Smit" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Sandra Smit", "Gavin Huttley", @@ -34,7 +34,7 @@ "Helmut Simon", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Sandra Smit" __email__ = "sandra.smit@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/__init__.py python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/__init__.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/__init__.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/__init__.py 2022-05-24 23:42:33.000000000 +0000 @@ -17,7 +17,7 @@ __author__ = "" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Gavin Huttley", "Peter Maxwell", @@ -29,7 +29,7 @@ "Ben Kaehler", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/markov.py python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/markov.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/markov.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/markov.py 2022-05-24 23:42:33.000000000 +0000 @@ -8,10 +8,10 @@ Float = numpy.core.numerictypes.sctype2char(float) __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -72,7 +72,7 @@ if hasattr(label, "__len__") and not isinstance(label, str): label = ",".join(str(z) for z in label) # Table needs unique labels - label = "%s (%s)" % (label, i) + label = f"{label} ({i})" labels.append(label) heading = [""] + labels a = [[name] + list(row) for (name, row) in zip(labels, self.Matrix)] diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/matrix_exponential_integration.py python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/matrix_exponential_integration.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/matrix_exponential_integration.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/matrix_exponential_integration.py 2022-05-24 23:42:33.000000000 +0000 @@ -19,7 +19,7 @@ __copyright__ = "Copyright 2007-2014, The Cogent Project" __credits__ = ["Ben Kaehler", "Von Bing Yap", "Gavin Huttley", "Ananias Iliadis"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Ben Kaehler" __email__ = "benjamin.kaehler@anu.edu.au" __status__ = "Production" @@ -30,7 +30,7 @@ self.Q = Q def __repr__(self): - return "%s(%s)" % (self.__class__.__name__, repr(self.Q)) + return f"{self.__class__.__name__}({repr(self.Q)})" class VanLoanIntegratingExponentiator(_Exponentiator): diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/matrix_exponentiation.py python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/matrix_exponentiation.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/matrix_exponentiation.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/matrix_exponentiation.py 2022-05-24 23:42:33.000000000 +0000 @@ -17,10 +17,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Zongzhi Liu"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -28,7 +28,7 @@ class _Exponentiator: def __repr__(self): - return "%s(%s)" % (self.__class__.__name__, repr(self.Q)) + return f"{self.__class__.__name__}({repr(self.Q)})" class EigenExponentiator(_Exponentiator): @@ -93,7 +93,7 @@ trm = numpy.dot(trm, A / float(k)) eA += trm if k >= self.q: - warnings.warn("Taylor series lengthened from %s to %s" % (self.q, k + 1)) + warnings.warn(f"Taylor series lengthened from {self.q} to {k + 1}") self.q = k + 1 return eA diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/matrix_logarithm.py python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/matrix_logarithm.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/matrix_logarithm.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/matrix_logarithm.py 2022-05-24 23:42:33.000000000 +0000 @@ -18,7 +18,7 @@ __copyright__ = "Copyright 2007-2014, The Cogent Project" __credits__ = ["Rob Knight", "Gavin Huttley", "Von Bing Yap", "Ben Kaehler"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/measure.py python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/measure.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/measure.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/measure.py 2022-05-24 23:42:33.000000000 +0000 @@ -10,10 +10,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -87,43 +87,39 @@ return pl -def jsd(freqs1, freqs2, validate=False): - """calculate Jensen–Shannon divergence between two probability distributions +def jsd(*vectors, validate=False): + """calculate Jensen–Shannon divergence between two or more probability distributions Parameters ---------- - freqs1 : one dimensional array - row vector frequencies, sum to 1 - freqs2 : one dimensional array - row vector frequencies, sum to 1 + *vectors + >= 2 frequency vectors, each vector must sum to 1 validate : bool - + check the consistency of the provided vectors, namely they're + all 1D, have the same number of elements and sum to 1 """ # Convert input arrays into numpy arrays - freqs1 = array(freqs1) - freqs2 = array(freqs2) - + vectors = [array(v) for v in vectors] + num_vectors = len(vectors) if validate: - assert_equal( - freqs1.shape, freqs2.shape, err_msg="freqs1/freqs2 mismatched shape" - ) - assert freqs1.ndim == 1, "freqs1 has incorrect dimension" - assert freqs2.ndim == 1, "freqs2 has incorrect dimension" + assert len({f.shape for f in vectors}) == 1, "mismatched shape" + dims = {v.ndim for v in vectors} + assert dims == {1}, "incorrect dimension" try: - validate_freqs_array(freqs1) - validate_freqs_array(freqs2) + for v in vectors: + validate_freqs_array(v) except ValueError as err: raise AssertionError("freqs not valid") from err - H_mn = fsum(safe_p_log_p(freqs1 / 2 + freqs2 / 2)) - mn_H = fsum([fsum(i) for i in map(safe_p_log_p, [freqs1, freqs2])]) / 2 + H_mn = fsum(safe_p_log_p(array(vectors).mean(axis=0))) + mn_H = fsum([fsum(i) for i in map(safe_p_log_p, vectors)]) / num_vectors jsd_ = H_mn - mn_H - if jsd_ < 0 and isclose(jsd_, 0, atol=1e-10): + if jsd_ < 0: + if not isclose(jsd_, 0, atol=1e-10): + raise ArithmeticError( + f"{jsd_} is negative and below defined precision threshold" + ) jsd_ = 0 - elif jsd_ < 0: - raise ArithmeticError( - f"{jsd_} is negative and below defined precision threshold" - ) return jsd_ diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/optimisers.py python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/optimisers.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/optimisers.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/optimisers.py 2022-05-24 23:42:33.000000000 +0000 @@ -15,10 +15,10 @@ LocalOptimiser = Powell __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Andrew Butterfield", "Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -104,7 +104,7 @@ result = f(x, **kw) if not numpy.isfinite(result): if not acceptable_inf(result): - warnings.warn("Non-finite f %s from %s" % (result, x)) + warnings.warn(f"Non-finite f {result} from {x}") raise ParameterOutOfBoundsError except (ArithmeticError, ParameterOutOfBoundsError): result = out_of_bounds_value @@ -167,13 +167,10 @@ try: fval = f(x) except (ArithmeticError, ParameterOutOfBoundsError) as detail: - raise ValueError( - "Initial parameter values must be valid %s" % repr(detail.args) - ) + raise ValueError(f"Initial parameter values must be valid {repr(detail.args)}") if not numpy.isfinite(fval): raise ValueError( - "Initial parameter values must evaluate to a finite value, not %s. %s" - % (fval, x) + f"Initial parameter values must evaluate to a finite value, not {fval}. {x}" ) f = bounds_exception_catching_function(f) diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/period_numba.py python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/period_numba.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/period_numba.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/period_numba.py 2022-05-24 23:42:33.000000000 +0000 @@ -8,7 +8,7 @@ __copyright__ = "Copyright 2007-2019, The Cogent Project" __credits__ = ["Julien Epps", "Hua Ying", "Gavin Huttley", "Stephen Ma"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/period.py python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/period.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/period.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/period.py 2022-05-24 23:42:33.000000000 +0000 @@ -16,10 +16,10 @@ __author__ = "Hua Ying, Julien Epps and Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Julien Epps", "Hua Ying", "Gavin Huttley", "Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -87,20 +87,35 @@ self.period = period - def getNumStats(self): + def get_num_stats(self): """returns the number of statistics computed by this calculator""" return 1 class AutoCorrelation(_PeriodEstimator): + """class for repetitive calculation of autocorrelation for series of + fixed length""" + def __init__(self, length, llim=None, ulim=None, period=None): - """class for repetitive calculation of autocorrelation for series of - fixed length + """ + Parameters + ---------- + length : int + the signal length + llim : int + lower limit + ulim : int + upper limit + period : int + a specific period to return the IPDFT power for + + Notes + ----- + If ``x = [1,1,1,1]`, ``xc = [1,2,3,4,3,2,1]`` - e.g. if x = [1,1,1,1], xc = [1,2,3,4,3,2,1] The middle element of xc corresponds to a lag (period) of 0 - xc is always symmetric for real x - N is the length of x""" + ``xc`` is always symmetric for real ``x`` + """ super(AutoCorrelation, self).__init__(length, llim, ulim, period) periods = list(range(-length + 1, length)) @@ -110,7 +125,7 @@ self.periods = array(periods[self.min_idx : self.max_idx + 1]) self.xc = zeros(2 * self.length - 1) - def evaluate(self, x): + def __call__(self, x): x = array(x, float64) self.xc.fill(0.0) autocorr_inner(x, self.xc, self.length) @@ -120,8 +135,6 @@ return xc, self.periods - __call__ = evaluate - def auto_corr(x, llim=None, ulim=None): """returns the autocorrelation of x @@ -139,12 +152,18 @@ """factory function for computing the integer period discrete Fourier transform for repeated application to signals of the same length. - Argument: - - length: the signal length - - llim: lower limit - - ulim: upper limit - - period: a specific period to return the IPDFT power for - - abs_ft_sig: if True, returns absolute value of signal + Parameters + ---------- + length : int + the signal length + llim : int + lower limit + ulim : int + upper limit + period : int + a specific period to return the IPDFT power for + abs_ft_sig : bool + if True, returns absolute value of signal """ if period is not None: llim = period @@ -155,7 +174,7 @@ self.X = array([0 + 0j] * self.length) self.abs_ft_sig = abs_ft_sig - def evaluate(self, x): + def __call__(self, x): x = array(x, float64) self.X.fill(0 + 0j) self.X = ipdft_inner(x, self.X, self.W, self.ulim, self.length) @@ -169,22 +188,18 @@ return array(pwr), self.periods - __call__ = evaluate - class Goertzel(_PeriodEstimator): """Computes the power of a signal for a specific period""" - def __init__(self, length=None, llim=None, ulim=None, period=None, abs_ft_sig=True): + def __init__(self, length=None, period=None, **kwargs): assert period is not None, "Goertzel requires a period" super(Goertzel, self).__init__(length=length, period=period) - def evaluate(self, x): + def __call__(self, x): x = array(x, float64) return goertzel_inner(x, self.length, self.period) - __call__ = evaluate - class Hybrid(_PeriodEstimator): """hybrid statistic and corresponding periods for signal x @@ -200,23 +215,33 @@ abs_ft_sig=True, return_all=False, ): - """Arguments: - - length: the length of signals to be encountered - - period: specified period at which to return the signal - - llim, ulim: the smallest, largest periods to evaluate - - return_all: whether to return the hybrid, ipdft, autocorr - statistics as a numpy array, or just the hybrid statistic + """ + Parameters + ---------- + length : int + the signal length + llim : int + lower limit + ulim : int + upper limit + period : int + a specific period to return the IPDFT power for + abs_ft_sig : bool + if True, returns absolute value of signal + return_all : bool + whether to return the hybrid, ipdft, autocorr statistics as + a numpy array, or just the hybrid statistic """ super(Hybrid, self).__init__(length, llim, ulim, period) self.ipdft = Ipdft(length, llim, ulim, period, abs_ft_sig) self.auto = AutoCorrelation(length, llim, ulim, period) self._return_all = return_all - def getNumStats(self): + def get_num_stats(self): """the number of stats computed by this calculator""" return [1, 3][self._return_all] - def evaluate(self, x): + def __call__(self, x): if self.period is None: auto_sig, auto_periods = self.auto(x) ft_sig, ft_periods = self.ipdft(x) @@ -227,7 +252,7 @@ result = hybrid, ft_periods else: auto_sig = self.auto(x) - # ft_sig = goertzel(x, period) # performance slower than ipdft! + # note that goertzel(x, period) performance is slower than ipdft! ft_sig = self.ipdft(x) hybrid = auto_sig * ft_sig if self._return_all: @@ -236,8 +261,6 @@ result = abs(hybrid) return result - __call__ = evaluate - def ipdft(x, llim=None, ulim=None, period=None): """returns the integer period discrete Fourier transform of the signal x @@ -246,11 +269,12 @@ ---------- x series of symbols - llim + llim : int lower limit - ulim + ulim : int upper limit - + period : int + a specific period to return the IPDFT power for """ x = array(x, float64) ipdft_calc = Ipdft(len(x), llim, ulim, period) @@ -263,9 +287,17 @@ Parameters ---------- - return_all - whether to return the hybrid, ipdft, autocorr - statistics as a numpy array, or just the hybrid statistic + x + series of symbols + llim : int + lower limit + ulim : int + upper limit + period : int + a specific period to return the IPDFT power for + return_all : bool + whether to return the hybrid, ipdft, autocorr statistics as + a numpy array, or just the hybrid statistic See Epps. EURASIP Journal on Bioinformatics and Systems Biology, 2009, 9 """ @@ -287,12 +319,3 @@ pwr.reverse() periods.reverse() return array(pwr), array(periods) - - -if __name__ == "__main__": - from numpy import sin - - x = sin(2 * pi / 5 * arange(1, 9)) - print(x) - print(goertzel(x, 4)) - print(goertzel(x, 8)) diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/scipy_optimisers.py python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/scipy_optimisers.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/scipy_optimisers.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/scipy_optimisers.py 2022-05-24 23:42:33.000000000 +0000 @@ -7,10 +7,10 @@ __author__ = "Peter Maxwell and Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/scipy_optimize.py python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/scipy_optimize.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/scipy_optimize.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/scipy_optimize.py 2022-05-24 23:42:33.000000000 +0000 @@ -50,7 +50,7 @@ pymin = builtins.min pymax = builtins.max -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" _epsilon = sqrt(numpy.finfo(float).eps) @@ -556,7 +556,7 @@ else: if disp: print("Optimization terminated successfully.") - print(" Current function value: %f" % fval) + print(f" Current function value: {fval:f}") print(" Iterations: %d" % iter) print(" Function evaluations: %d" % fcalls[0]) diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/simannealingoptimiser.py python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/simannealingoptimiser.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/simannealingoptimiser.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/simannealingoptimiser.py 2022-05-24 23:42:33.000000000 +0000 @@ -19,10 +19,10 @@ __author__ = "Andrew Butterfield and Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley", "Andrew Butterfield", "Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -48,7 +48,7 @@ "step_cycles", ]: if getattr(self, attr) != getattr(other, attr): - raise ValueError("Checkpoint file ignored - %s different" % attr) + raise ValueError(f"Checkpoint file ignored - {attr} different") def roundsToReach(self, T): from math import log diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/solve.py python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/solve.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/solve.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/solve.py 2022-05-24 23:42:33.000000000 +0000 @@ -1,10 +1,10 @@ #!/usr/bin/env python __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/contingency.py python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/stats/contingency.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/contingency.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/stats/contingency.py 2022-05-24 23:42:33.000000000 +0000 @@ -8,10 +8,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/distribution.py python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/stats/distribution.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/distribution.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/stats/distribution.py 2022-05-24 23:42:33.000000000 +0000 @@ -30,10 +30,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight", "Sandra Smit", "Gavin Huttley", "Daniel McDonald"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -96,9 +96,9 @@ """ x = fix_rounding_error(x) if x < 0: - raise ValueError("chi_low: x must be >= 0 (got %s)." % x) + raise ValueError(f"chi_low: x must be >= 0 (got {x}).") if df < 1: - raise ValueError("chi_low: df must be >= 1 (got %s)." % df) + raise ValueError(f"chi_low: df must be >= 1 (got {df}).") return igam(df / 2, x / 2) @@ -115,9 +115,9 @@ x = fix_rounding_error(x) if x < 0: - raise ValueError("chi_high: x must be >= 0 (got %s)." % x) + raise ValueError(f"chi_high: x must be >= 0 (got {x}).") if df < 1: - raise ValueError("chi_high: df must be >= 1 (got %s)." % df) + raise ValueError(f"chi_high: df must be >= 1 (got {df}).") return igamc(df / 2, x / 2) @@ -132,7 +132,7 @@ See Cephes docs for details. """ if df < 1: - raise ValueError("t_low: df must be >= 1 (got %s)." % df) + raise ValueError(f"t_low: df must be >= 1 (got {df}).") return stdtr(df, t) @@ -147,7 +147,7 @@ See Cephes docs for details. """ if df < 1: - raise ValueError("t_high: df must be >= 1 (got %s)." % df) + raise ValueError(f"t_high: df must be >= 1 (got {df}).") return stdtr(df, -t) # distribution is symmetric @@ -247,13 +247,13 @@ Use in case var(a) < var(b) """ if F < 0: - raise ValueError("fprob: F must be >= 0 (got %s)." % F) + raise ValueError(f"fprob: F must be >= 0 (got {F}).") if side == "right": return 2 * f_high(dfn, dfd, F) elif side == "left": return 2 * f_low(dfn, dfd, F) else: - raise ValueError("Not a valid value for side %s" % (side)) + raise ValueError(f"Not a valid value for side {side}") def stdtr(k, t): diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/information_criteria.py python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/stats/information_criteria.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/information_criteria.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/stats/information_criteria.py 2022-05-24 23:42:33.000000000 +0000 @@ -2,10 +2,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -27,7 +27,7 @@ if sample_size is None: correction = 1 else: - assert sample_size > 0, "Invalid sample_size %s" % sample_size + assert sample_size > 0, f"Invalid sample_size {sample_size}" correction = sample_size / (sample_size - nfp - 1) return -2 * lnL + 2 * nfp * correction diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/__init__.py python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/stats/__init__.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/__init__.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/stats/__init__.py 2022-05-24 23:42:33.000000000 +0000 @@ -20,7 +20,7 @@ __author__ = "" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Gavin Huttley", "Rob Knight", @@ -29,7 +29,7 @@ "Micah Hamady", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/jackknife.py python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/stats/jackknife.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/jackknife.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/stats/jackknife.py 2022-05-24 23:42:33.000000000 +0000 @@ -4,10 +4,10 @@ __author__ = "Anuj Pahwa, Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Anuj Pahwa", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -146,7 +146,7 @@ try: num_datasets = len(subset_stats) for i in range(num_datasets): - header.append("Stat_%s-i" % i) + header.append(f"Stat_{i}-i") except TypeError: header.append("Stat-i") @@ -179,7 +179,7 @@ try: num_datasets = len(pseudovalues) for i in range(num_datasets): - header.append("Pseudovalue_%s-i" % i) + header.append(f"Pseudovalue_{i}-i") except TypeError: header.append("Pseudovalue-i") diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/kendall.py python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/stats/kendall.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/kendall.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/stats/kendall.py 2022-05-24 23:42:33.000000000 +0000 @@ -13,10 +13,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley", "Daniel McDonald"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/ks.py python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/stats/ks.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/ks.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/stats/ks.py 2022-05-24 23:42:33.000000000 +0000 @@ -25,10 +25,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/number.py python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/stats/number.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/number.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/stats/number.py 2022-05-24 23:42:33.000000000 +0000 @@ -7,10 +7,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -19,10 +19,7 @@ class SummaryStatBase: @property def mean(self): - stat = 0 - if len(self) > 0: - stat = numpy.mean(self.expanded_values()) - return stat + return numpy.mean(self.expanded_values()) if len(self) > 0 else 0 @property def std(self): @@ -33,23 +30,14 @@ @property def var(self): - stat = 0 - if len(self) > 0: - stat = numpy.var(self.expanded_values(), ddof=1) - return stat + return numpy.var(self.expanded_values(), ddof=1) if len(self) > 0 else 0 def quantile(self, q): - stat = 0 - if len(self) > 0: - stat = numpy.quantile(self.expanded_values(), q=q) - return stat + return numpy.quantile(self.expanded_values(), q=q) if len(self) > 0 else 0 @property def median(self): - stat = 0 - if len(self) > 0: - stat = numpy.median(self.expanded_values()) - return stat + return numpy.median(self.expanded_values()) if len(self) > 0 else 0 @property def mode(self): @@ -58,10 +46,7 @@ @property def sum(self): - stat = 0 - if len(self) > 0: - stat = numpy.sum(self.expanded_values()) - return stat + return numpy.sum(self.expanded_values()) if len(self) > 0 else 0 class CategoryCounter(MutableMapping, SummaryStatBase): @@ -234,8 +219,7 @@ arr[i] = data[key][i] data[key] = arr else: - for key in self: - break + key = next(iter(self)) assert len(key) == len(column_names), "mismatched dimensions" data = defaultdict(list) for key, count in self.items(): @@ -409,6 +393,7 @@ @property def var(self): + """unbiased estimate of the variance""" # we scale the variance contribution of a number by its occurrence mean = self.mean var = sum(self[k] * (k - mean) ** 2 for k in self) @@ -416,8 +401,7 @@ @property def std(self): - var = self.var - return numpy.sqrt(var) + return numpy.sqrt(self.var) def update_from_counts(self, data): """updates values of self using counts dict""" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/period.py python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/stats/period.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/period.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/stats/period.py 2022-05-24 23:42:33.000000000 +0000 @@ -14,10 +14,10 @@ __author__ = "Hua Ying, Julien Epps and Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Julien Epps", "Hua Ying", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -76,7 +76,8 @@ a list of sequence motifs motif_length length of first motif - + result : ndarray + working array. Contents are reset to zero before encoding """ if result is None: result = numpy.zeros(len(seq), numpy.uint8) @@ -111,19 +112,19 @@ self.motif_length = motif_length or len(motifs[0]) self.working = None if length is not None: - self.setResultArray(length) + self.set_result_array(length) - def setResultArray(self, length): + def set_result_array(self, length): """sets a result array for length""" self.working = numpy.zeros(length, numpy.uint8) self.length = length def __call__(self, seq, result=None): if result is None and self.working is None: - self.setResultArray(len(seq)) + self.set_result_array(len(seq)) elif self.working is not None: if len(seq) != self.working.shape[0]: - self.setResultArray(len(seq)) + self.set_result_array(len(seq)) result = self.working result.fill(0) @@ -136,7 +137,19 @@ def circular_indices(vector, start, length, num): - """docstring for circular_indices""" + """ + + Parameters + ---------- + vector : list[int] + sequential integers + start : int + index to start sampling from + length : int + length of returned vector + num : int + k-mer size to support + """ if start > length: start = start - length @@ -155,7 +168,7 @@ num_seg, remainder = divmod(length, block_size) vector = list(range(length)) result = [] - for seg_num in range(num_seg): + for _ in range(num_seg): i = choice(vector) result += circular_indices(vector, i, length, block_size) @@ -194,11 +207,7 @@ """ signal_length = len(signal) - if seq_to_symbols is not None: - dtype = "c" - else: - dtype = None # let numpy guess - + dtype = "c" if seq_to_symbols is not None else None signal = numpy.array(list(signal), dtype=dtype) if seq_to_symbols is not None: @@ -208,24 +217,19 @@ data = signal obs_stat = calc(data) - if seq_to_symbols is not None: - if sum(symbolic) == 0: - p = [numpy.array([1.0, 1.0, 1.0]), 1.0][num_stats == 1] + if seq_to_symbols is not None and sum(data) == 0: + p = [numpy.array([1.0, 1.0, 1.0]), 1.0][num_stats == 1] - return obs_stat, p + return obs_stat, p if num_stats is None: try: - num_stats = calc.getNumStats() + num_stats = calc.get_num_stats() except AttributeError: num_stats = 1 - if num_stats == 1: - count = 0 - else: - count = numpy.zeros(num_stats) - - for rep in range(num_reps): + count = 0 if num_stats == 1 else numpy.zeros(num_stats) + for _ in range(num_reps): # get sample positions sampled_indices = sampled_places(block_size, signal_length) new_signal = signal.take(sampled_indices) @@ -242,10 +246,3 @@ count += 1 return obs_stat, count / num_reps - - -# def percrb4(): -# """Return SNR and CRB for periodicity estimates from symbolic signals""" -# # TODO: complete the function according to Julien's percrb4.m -# pass -# diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/special.py python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/stats/special.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/special.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/stats/special.py 2022-05-24 23:42:33.000000000 +0000 @@ -6,10 +6,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley", "Rob Knight", "Sandra Smit", "Daniel McDonald"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -74,7 +74,7 @@ """ # Validation: k must be be between 0 and n (inclusive), and n must be >=0. if k > n: - raise IndexError("Can't choose %s items from %s" % (k, n)) + raise IndexError(f"Can't choose {k} items from {n}") elif k < 0: raise IndexError("Can't choose negative number of items") elif n < 0: @@ -115,7 +115,7 @@ """ # Validation: k must be be between 0 and n (inclusive), and n must be >=0. if k > n: - raise IndexError("Can't choose %s items from %s" % (k, n)) + raise IndexError(f"Can't choose {k} items from {n}") elif k < 0: raise IndexError("Can't choose negative number of items") elif n < 0: diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/test.py python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/stats/test.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/stats/test.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/stats/test.py 2022-05-24 23:42:33.000000000 +0000 @@ -18,9 +18,11 @@ isnan, log, mean, + nonzero, + ones, + ravel, + sqrt, ) -from numpy import median as _median -from numpy import nonzero, ones, ravel, sqrt from numpy import std as _std from numpy import sum as npsum from numpy import take, tanh, trace, zeros @@ -51,11 +53,10 @@ log_one_minus, one_minus_exp, ) -from cogent3.util.warning import discontinued __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Gavin Huttley", "Rob Knight", @@ -68,7 +69,7 @@ "Michael Dwan", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -152,30 +153,6 @@ return sqrt(sample_variance) -def median(m, axis=None): # pragma: no cover - """Returns medians by axis (similiar to numpy.mean) - - numpy.median does not except an axis parameter. Is safe for substition for - numpy.median - """ - discontinued("function", "median", "2021.11") - median_vals = [] - rows, cols = m.shape - - if axis is None: - return _median(ravel(m)) - elif axis == 0: - for col in range(cols): - median_vals.append(_median(m[:, col])) - elif axis == 1 or axis == -1: - for row in range(rows): - median_vals.append(_median(m[row, :])) - else: - raise ValueError("axis(=%s) out of bounds" % axis) - - return array(median_vals) - - class ZeroExpectedError(ValueError): """Class for handling tests where an expected value was zero.""" @@ -1050,11 +1027,11 @@ corr_fn = spearman else: raise ValueError( - "Invalid method '%s'. Must be either 'pearson' or " "'spearman'." % method + f"Invalid method '{method}'. Must be either 'pearson' or 'spearman'." ) if tails is not None and tails != "high" and tails != "low": raise ValueError( - "Invalid tail type '%s'. Must be either None, " "'high', or 'low'." % tails + f"Invalid tail type '{tails}'. Must be either None, 'high', or 'low'." ) if permutations < 0: raise ValueError( @@ -1108,8 +1085,7 @@ # don't want to return a p-value of 0 if someone passes in a bogus # tail type somehow. raise ValueError( - "Invalid tail type '%s'. Must be either None, " - "'high', or 'low'." % tails + f"Invalid tail type '{tails}'. Must be either None, 'high', or 'low'." ) if permutations > 0: nonparametric_p_val = (better + 1) / (permutations + 1) @@ -1567,7 +1543,7 @@ elif alt == ALT_HIGH: stat = cumsum.max() else: - raise RuntimeError("Unknown alt: %s" % alt) + raise RuntimeError(f"Unknown alt: {alt}") if exact and alt == ALT_TWO_SIDED and not ties: Pval = 1 - psmirnov2x(stat, num_x, num_y) else: diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/util.py python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/util.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/maths/util.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/maths/util.py 2022-05-24 23:42:33.000000000 +0000 @@ -11,10 +11,10 @@ err = numpy.seterr(divide="raise") __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight", "Sandra Smit", "Thomas La"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Development" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/blast.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/blast.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/blast.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/blast.py 2022-05-24 23:42:33.000000000 +0000 @@ -8,10 +8,10 @@ __author__ = "Micah Hamady" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Micah Hamady", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Micah Hamady" __email__ = "hamady@colorado.edu" __status__ = "Prototype" @@ -42,7 +42,7 @@ return True # blast info line for t_str in t_strs: - if line.startswith("# %s" % t_str): + if line.startswith(f"# {t_str}"): return True return False diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/blast_xml.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/blast_xml.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/blast_xml.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/blast_xml.py 2022-05-24 23:42:33.000000000 +0000 @@ -2,11 +2,11 @@ """ __author__ = "Kristian Rother" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __contributors__ = ["Micah Hamady"] __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Kristian Rother" __email__ = "krother@rubor.de" __status__ = "Prototype" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/cigar.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/cigar.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/cigar.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/cigar.py 2022-05-24 23:42:33.000000000 +0000 @@ -22,15 +22,15 @@ __author__ = "Hua Ying" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Hua Ying"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Hua Ying" __email__ = "hua.ying@anu.edu.au" __status__ = "Production" -pattern = re.compile("([0-9]*)([DM])") +_pattern = re.compile("([0-9]*)([DM])") def map_to_cigar(map): @@ -43,10 +43,7 @@ else: num_chars = span.length char = "D" - if num_chars == 1: - cigar += char - else: - cigar += str(num_chars) + char + cigar += char if num_chars == 1 else str(num_chars) + char return cigar @@ -54,19 +51,14 @@ """convert cigar string into Map""" assert "I" not in cigar_text spans, posn = [], 0 - for n, c in pattern.findall(cigar_text): - if n: - n = int(n) - else: - n = 1 - + for n, c in _pattern.findall(cigar_text): + n = int(n) if n else 1 if c == "M": spans.append(Span(posn, posn + n)) posn += n else: spans.append(LostSpan(n)) - map = Map(spans=spans, parent_length=posn) - return map + return Map(spans=spans, parent_length=posn) def aligned_from_cigar(cigar_text, seq, moltype=DNA): @@ -79,10 +71,7 @@ def _slice_by_aln(map, left, right): slicemap = map[left:right] - if hasattr(slicemap, "start"): - location = [slicemap.start, slicemap.end] - else: - location = [] + location = [slicemap.start, slicemap.end] if hasattr(slicemap, "start") else [] return slicemap, location @@ -101,14 +90,13 @@ new_map.parent_length = map.end else: spans = [] + length = None for span in map.spans: - if span.lost: - spans.append(span) - else: + if not span.lost: span.start = span.start - start span.end = span.end - start length = span.end - spans.append(span) + spans.append(span) new_map = Map(spans=spans, parent_length=length) return new_map diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/cisbp.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/cisbp.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/cisbp.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/cisbp.py 2022-05-24 23:42:33.000000000 +0000 @@ -8,7 +8,7 @@ __copyright__ = "Copyright 2007-2012, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/clustal.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/clustal.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/clustal.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/clustal.py 2022-05-24 23:42:33.000000000 +0000 @@ -22,10 +22,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight", "Sandra Smit", "Gavin Huttley", "Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Development" @@ -49,7 +49,7 @@ key, val = splitter(line.rstrip()) except: if strict: - raise RecordError("Failed to extract key and value from line %s" % line) + raise RecordError(f"Failed to extract key and value from line {line}") else: continue # just skip the line if not strict diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/cogent3_json.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/cogent3_json.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/cogent3_json.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/cogent3_json.py 2022-05-24 23:42:33.000000000 +0000 @@ -4,14 +4,15 @@ from cogent3.app.data_store import load_record_from_json from cogent3.util.deserialise import deserialise_object -from cogent3.util.misc import get_object_provenance, open_ +from cogent3.util.io import open_ +from cogent3.util.misc import get_object_provenance __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley", "Stephen Ma"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Development" @@ -22,9 +23,10 @@ Parameters ---------- - filename: name of the json file - classes: A series of the Cogent3 types, for example: (Alignment, ArrayAlignment) - + filename : Union[str,Path] + name of the json file + classes : Sequence[type] + A series of the Cogent3 types, for example: (Alignment, ArrayAlignment) """ assert all( (isinstance(klass, type) for klass in classes) diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/dialign.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/dialign.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/dialign.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/dialign.py 2022-05-24 23:42:33.000000000 +0000 @@ -6,10 +6,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/ebi.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/ebi.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/ebi.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/ebi.py 2022-05-24 23:42:33.000000000 +0000 @@ -16,7 +16,7 @@ __author__ = "Zongzhi Liu and Sandra Smit" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Zongzhi Liu", "Sandra Smit", @@ -25,7 +25,7 @@ "Daniel McDonald", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Zongzhi Liu" __email__ = "zongzhi.liu@gmail.com" __status__ = "Development" @@ -159,14 +159,14 @@ if handler: value = handler(raw_value) else: # no handler found for key - raise ValueError("No handler found for %s" % key) + raise ValueError(f"No handler found for {key}") return key, value # build the result dict. result = {} for key, raw_value in key_values: if all_keys and key not in all_keys: - raise ValueError("key: %s not in all_keys: %s" % (repr(key), all_keys)) + raise ValueError(f"key: {repr(key)} not in all_keys: {all_keys}") key, value = handle_value(key, raw_value) add_item(result, key, value) return result @@ -255,7 +255,7 @@ dict(pairs) # catch error for any not splitted pair. except ValueError: # dictionary update sequence element #1 has length 1; if strict: - raise ValueError("e\nFailed to get a dict from pairs: %s" % pairs) + raise ValueError(f"e\nFailed to get a dict from pairs: {pairs}") else: # return the splitted list without constucting return pairs @@ -978,7 +978,7 @@ if strict: for topic in result: if topic not in all_cc_topics: - raise FieldError("Invalid topic: %s" % topic) + raise FieldError(f"Invalid topic: {topic}") return result @@ -1030,7 +1030,7 @@ if lines[-1] == license_border: lines.pop() else: - raise FieldError("No bottom line for license: %s" % lines) + raise FieldError(f"No bottom line for license: {lines}") # normalize license lines to the format of topic lines license_idx = lines.index(license_border) @@ -1201,9 +1201,7 @@ labels["RA/RG"] = True for rlabel in required_ref_labels: if rlabel not in labels: - raise RecordError( - "The reference block lacks required label: " "%s" % rlabel - ) + raise RecordError(f"The reference block lacks required label: {rlabel}") # parse each field with relevant parser parsed_dict = pairs_to_dict(list(raw_dict.items()), handlers=ref_parsers) @@ -1443,7 +1441,7 @@ if strict: for rlabel in required_labels: if rlabel not in raw_dict: - raise RecordError("The record lacks required label: " "%s" % rlabel) + raise RecordError(f"The record lacks required label: {rlabel}") # no sequence found if "" not in raw_dict: @@ -1574,7 +1572,7 @@ lines = open(args[0]) print("Parsing the file") for i, rec in enumerate(EbiParser(lines, strict=True)): - print("\r %s: %s" % (i, rec[1]["ID"]["EntryName"]), end=" ") + print(f"\r {i}: {rec[1]['ID']['EntryName']}", end=" ") else: lines = """\ ID Q9U9C5_CAEEL PRELIMINARY; PRT; 218 AA. diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/fasta.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/fasta.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/fasta.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/fasta.py 2022-05-24 23:42:33.000000000 +0000 @@ -10,14 +10,14 @@ from cogent3.core.moltype import ASCII, BYTES from cogent3.parse.record import RecordError from cogent3.parse.record_finder import LabeledRecordFinder -from cogent3.util.misc import open_ +from cogent3.util.io import open_ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight", "Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Development" @@ -68,12 +68,12 @@ # first line must be a label line if not rec[0][0] in label_characters: if strict: - raise RecordError("Found Fasta record without label line: %s" % rec) + raise RecordError(f"Found Fasta record without label line: {rec}") continue # record must have at least one sequence if len(rec) < 2: if strict: - raise RecordError("Found label line without sequences: %s" % rec) + raise RecordError(f"Found label line without sequences: {rec}") else: continue @@ -104,7 +104,7 @@ (lo, hi) = (hi, lo) else: assert strand == "+" - return "%s:%s:%s-%s" % (sp, contig, lo, hi) + return f"{sp}:{contig}:{lo}-{hi}" def is_xmfa_blank_or_comment(x): @@ -156,7 +156,7 @@ yield name, seq_maker(seq, name=name, info=info) except Exception: raise RecordError( - "Sequence construction failed on record with label %s" % label + f"Sequence construction failed on record with label {label}" ) else: # not strict: just skip any record that raises an exception @@ -181,7 +181,7 @@ try: ignore, gi, db, db_ref, description = list(map(strip, line.split("|", 4))) except ValueError: # probably got wrong value - raise RecordError("Unable to parse label line %s" % line) + raise RecordError(f"Unable to parse label line {line}") info.GI = gi info[NcbiLabels[db]] = db_ref info.Description = description @@ -239,8 +239,8 @@ for index, field, converter in field_formatters: if field in display_template: indexed = True - assert indexed, "display_template [%s] does not use a field name" % display_template - sep = re.compile("[%s]" % split_with) + assert indexed, f"display_template [{display_template}] does not use a field name" + sep = re.compile(f"[{split_with}]") def call(label): label = [label, label[1:]][label[0] == ">"] diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/gbseq.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/gbseq.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/gbseq.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/gbseq.py 2022-05-24 23:42:33.000000000 +0000 @@ -9,10 +9,10 @@ __author__ = "Matthew Wakefield" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Matthew Wakefield", "Peter Maxwell", "Gavin Huttley", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Matthew Wakefield" __email__ = "wakefield@wehi.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/gcg.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/gcg.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/gcg.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/gcg.py 2022-05-24 23:42:33.000000000 +0000 @@ -1,8 +1,8 @@ __author__ = "Matthew Wakefield" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Matthew Wakefield", "Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Matthew Wakefield" __email__ = "wakefield@wehi.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/genbank.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/genbank.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/genbank.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/genbank.py 2022-05-24 23:42:33.000000000 +0000 @@ -11,10 +11,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight", "Peter Maxwell", "Matthew Wakefield", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -351,9 +351,9 @@ if self.IsBetween: # between two bases try: first, last = self._data - curr = "%s^%s" % (first, last) + curr = f"{first}^{last}" except TypeError: # only one base? must be this or the next - curr = "%s^%s" % (first, first + 1) + curr = f"{first}^{first + 1}" else: # not self.IsBetween try: data = int(self._data) @@ -367,9 +367,9 @@ # objects first, last = self._data if self.IsBounds: - curr = "(%s%s%s)" % (first, ".", last) + curr = f"({first}{'.'}{last})" else: - curr = "%s%s%s" % (first, "..", last) + curr = f"{first}{'..'}{last}" # check if we need to add on the accession and database if self.Accession: curr = self.Accession + ":" + curr @@ -378,7 +378,7 @@ curr = self.Db + "::" + curr # check if it's complemented if self.Strand == -1: - curr = "complement(%s)" % curr + curr = f"complement({curr})" return curr def first(self): @@ -445,12 +445,12 @@ """Extracts pieces of self from sequence.""" result = [] for i in self: - first, last = i.first(), i.last() + 1 # inclusive, not exclusive + first, last = i.first() - 1, i.last() # inclusive, not exclusive # translate to 0-based indices and check if it wraps around if first < last: - curr = sequence[first - 1 : last - 1] + curr = sequence[first:last] else: - curr = sequence[first - 1 :] + sequence[: last - 1] + curr = sequence[first:] + sequence[:last] # reverse-complement if necessary if i.Strand == -1: curr = curr.translate(trans_table)[::-1] @@ -638,7 +638,7 @@ """ info_excludes = info_excludes or [] - moltype = get_moltype(moltype or "text") + moltype = get_moltype(moltype) if moltype else None for rec in MinimalGenbankParser(handle): info = Info() # populate the info object, excluding the sequence @@ -647,11 +647,17 @@ continue info[label] = value - if rec["mol_type"].lower() in ("dna", "rna", "protein"): - moltype = get_moltype(rec["mol_type"].lower()) + if moltype is None: + rec_moltype = rec["mol_type"].lower() + rec_moltype = ( + rec_moltype if rec_moltype in ("dna", "rna", "protein") else "text" + ) + rec_moltype = get_moltype(rec_moltype) + else: + rec_moltype = moltype try: - seq = moltype.make_seq( + seq = rec_moltype.make_seq( rec["sequence"].upper(), info=info, name=rec["locus"] ) except KeyError: diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/gff.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/gff.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/gff.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/gff.py 2022-05-24 23:42:33.000000000 +0000 @@ -1,7 +1,11 @@ #!/usr/bin/env python +from pathlib import Path + +from cogent3.util.io import open_ + __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Peter Maxwell", "Matthew Wakefield", @@ -9,15 +13,11 @@ "Christopher Bradley", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" -from pathlib import Path - -from cogent3.util.misc import open_ - def gff_parser(f): """parses a gff file @@ -51,6 +51,11 @@ gff3 = gff3_header in f.readline() f.seek(0) + if gff3: + attribute_parser = parse_attributes_gff3 + else: + attribute_parser = parse_attributes_gff2 + for line in f: # comments and blank lines if "#" in line: @@ -66,10 +71,10 @@ if len(cols) == 8: cols.append("") assert len(cols) == 9, len(line) - (seqid, source, type_, start, end, score, strand, phase, attributes) = cols + seqid, source, type_, start, end, score, strand, phase, attributes = cols # adjust for 0-based indexing - (start, end) = (int(start) - 1, int(end)) + start, end = int(start) - 1, int(end) # start is always meant to be less than end in GFF # features that extend beyond sequence have negative indices if start < 0 or end < 0: @@ -81,10 +86,6 @@ (start, end) = (end, start) # all attributes have an "ID" but this may not be unique - if gff3: - attribute_parser = parse_attributes_gff3 - else: - attribute_parser = parse_attributes_gff2 attributes = attribute_parser(attributes, (start, end)) rtn = { @@ -114,16 +115,12 @@ """Returns a dictionary containing all the attributes""" attributes = attributes.strip(";") attributes = attributes.split(";") - if attributes[0]: - attributes = dict(t.split("=") for t in attributes) - else: - attributes = {} - if "Parent" in attributes.keys(): + attributes = dict(t.split("=") for t in attributes) if attributes[0] else {} + if "Parent" in attributes: # There may be multiple parents if "," in attributes["Parent"]: attributes["Parent"] = attributes["Parent"].split(",") else: attributes["Parent"] = [attributes["Parent"]] - if "ID" not in attributes.keys(): - attributes["ID"] = "" + attributes["ID"] = attributes.get("ID", "") return attributes diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/greengenes.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/greengenes.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/greengenes.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/greengenes.py 2022-05-24 23:42:33.000000000 +0000 @@ -11,10 +11,10 @@ __author__ = "Daniel McDonald" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Daniel McDonald"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Daniel McDonald" __email__ = "daniel.mcdonald@colorado.edu" __status__ = "Prototype" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/__init__.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/__init__.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/__init__.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/__init__.py 2022-05-24 23:42:33.000000000 +0000 @@ -29,7 +29,7 @@ ] __author__ = "" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Gavin Huttley", "Peter Maxwell", @@ -48,7 +48,7 @@ "Marcin Cieslik", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/jaspar.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/jaspar.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/jaspar.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/jaspar.py 2022-05-24 23:42:33.000000000 +0000 @@ -10,7 +10,7 @@ __copyright__ = "Copyright 2007-2012, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/locuslink.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/locuslink.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/locuslink.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/locuslink.py 2022-05-24 23:42:33.000000000 +0000 @@ -52,10 +52,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Development" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/ncbi_taxonomy.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/ncbi_taxonomy.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/ncbi_taxonomy.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/ncbi_taxonomy.py 2022-05-24 23:42:33.000000000 +0000 @@ -7,10 +7,10 @@ __author__ = "Jason Carnes" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Jason Carnes", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Jason Carnes" __email__ = "jason.carnes@sbri.org" __status__ = "Development" @@ -235,8 +235,7 @@ except KeyError: # found a child whose parent doesn't exist if strict: raise MissingParentError( - "Node %s has parent %s, which isn't in taxa." - % (t_id, t.ParentId) + f"Node {t_id} has parent {t.ParentId}, which isn't in taxa." ) else: deadbeats[t.ParentId] = t diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/newick.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/newick.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/newick.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/newick.py 2022-05-24 23:42:33.000000000 +0000 @@ -22,10 +22,10 @@ EOT = None __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Andrew Butterfield", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -55,7 +55,7 @@ def error(self, detail=""): if self.token: - msg = 'Unexpected "%s" at ' % self.token + msg = f'Unexpected "{self.token}" at ' else: msg = "At " (line, column) = self.posn @@ -63,9 +63,9 @@ if column > 30: sample = "..." + sample[-20:] if line > 0: - msg += 'line %s:%s "%s"' % (line + 1, column, sample) + msg += f'line {line + 1}:{column} "{sample}"' else: - msg += 'char %s "%s"' % (column, sample) + msg += f'char {column} "{sample}"' return TreeParseError(msg + ". " + detail) def tokens(self): @@ -147,7 +147,7 @@ """ if "(" not in text and ";" not in text and text.strip(): # otherwise "filename" is a valid (if small) tree - raise TreeParseError('Not a Newick tree: "%s"' % text[:10]) + raise TreeParseError(f'Not a Newick tree: "{text[:10]}"') sentinals = [";", EOT] stack = [] nodes = [] @@ -160,7 +160,7 @@ try: attributes[attr_name] = attr_cast(token) except ValueError: - raise tokeniser.error("Can't convert %s '%s'" % (attr_name, token)) + raise tokeniser.error(f"Can't convert {attr_name} '{token}'") expected_attribute = None elif token == "(": if children is not None: @@ -192,7 +192,7 @@ ) else: if name is not None: - raise tokeniser.error("Already have a name '%s' for this node." % name) + raise tokeniser.error(f"Already have a name '{name}' for this node.") elif attributes: raise tokeniser.error("name should come before length.") name = token diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/nexus.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/nexus.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/nexus.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/nexus.py 2022-05-24 23:42:33.000000000 +0000 @@ -9,14 +9,14 @@ from collections import defaultdict from cogent3.parse.record import RecordError -from cogent3.util.misc import open_ +from cogent3.util.io import open_ __author__ = "Catherine Lozupone" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Catherine Lozuopone", "Rob Knight", "Micah Hamady", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Catherine Lozupone" __email__ = "lozupone@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/paml_matrix.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/paml_matrix.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/paml_matrix.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/paml_matrix.py 2022-05-24 23:42:33.000000000 +0000 @@ -5,10 +5,10 @@ __author__ = "Matthew Wakefield" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Matthew Wakefield", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Matthew Wakefield" __email__ = "wakefield@wehi.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/paml.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/paml.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/paml.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/paml.py 2022-05-24 23:42:33.000000000 +0000 @@ -3,10 +3,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/phylip.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/phylip.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/phylip.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/phylip.py 2022-05-24 23:42:33.000000000 +0000 @@ -4,10 +4,10 @@ __author__ = "Micah Hamady" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Micah Hamady", "Peter Maxwell", "Gavin Huttley", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Micah Hamady" __email__ = "hamady@colorado.edu" __status__ = "Prototype" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/psl.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/psl.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/psl.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/psl.py 2022-05-24 23:42:33.000000000 +0000 @@ -7,10 +7,10 @@ __author__ = "Gavin Huttley, Anuj Pahwa" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight", "Peter Maxwell", "Gavin Huttley", "Anuj Pahwa"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Development" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/rdb.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/rdb.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/rdb.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/rdb.py 2022-05-24 23:42:33.000000000 +0000 @@ -12,10 +12,10 @@ __author__ = "Sandra Smit" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Sandra Smit", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Sandra Smit" __email__ = "sandra.smit@colorado.edu" __status__ = "Development" @@ -87,7 +87,7 @@ if not index: if strict: raise RecordError( - "Found Rdb record without seq label " + "line: %s" % rec[0] + "Found Rdb record without seq label " + f"line: {rec[0]}" ) else: continue @@ -100,7 +100,7 @@ # if there are no sequences throw error or skip if not sequence: if strict: - raise RecordError("Found Rdb record without sequences: %s" % rec[0]) + raise RecordError(f"Found Rdb record without sequences: {rec[0]}") else: continue diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/record_finder.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/record_finder.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/record_finder.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/record_finder.py 2022-05-24 23:42:33.000000000 +0000 @@ -16,10 +16,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight", "Gavin Huttley", "Zongzhi Liu"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -79,7 +79,7 @@ curr.append(line) if curr: if strict: - raise RecordError("Found additional data after records: %s" % (curr)) + raise RecordError(f"Found additional data after records: {curr}") else: yield curr @@ -206,6 +206,6 @@ yield curr curr = [] if curr: - raise RecordError("Non-blank lines not even multiple of %s" % num) + raise RecordError(f"Non-blank lines not even multiple of {num}") return parser diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/record.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/record.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/record.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/record.py 2022-05-24 23:42:33.000000000 +0000 @@ -7,10 +7,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight", "Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Development" @@ -51,7 +51,7 @@ assert num >= 1 except: raise ValueError( - "Grouper.NumItems must be positive int, not %s" % (self.NumItems) + f"Grouper.NumItems must be positive int, not {self.NumItems}" ) curr = [] for i, item in enumerate(seq): @@ -165,7 +165,7 @@ Note: Fails silently if item absent. """ if item in self.Required: - raise AttributeError("%s is a required item" % (item,)) + raise AttributeError(f"{item} is a required item") try: super(GenericRecord, self).__delitem__(item) except KeyError: @@ -277,7 +277,7 @@ """Deletes attribute, converting name if necessary. Fails silently.""" normal_attr = self.unalias(attr) if normal_attr in self.Required: - raise AttributeError("%s is a required attribute" % (attr,)) + raise AttributeError(f"{attr} is a required attribute") else: try: super(MappedRecord, self).__delattr__(normal_attr) @@ -453,7 +453,7 @@ field, mapper = new_field, fieldmap[new_field] else: if self.Strict: - raise FieldError("Got unrecognized field %s" % (raw_field,)) + raise FieldError(f"Got unrecognized field {raw_field}") else: identity_setter(result, raw_field, val) continue @@ -463,7 +463,7 @@ except: # Warning: this is a catchall for _any_ exception, # and may mask what's actually going wrong. if self.Strict: - raise FieldError("Could not handle line %s" % (line,)) + raise FieldError(f"Could not handle line {line}") return result @@ -520,8 +520,7 @@ items = splitter(line) if len(items) != len(fields): raise FieldError( - "Expected %s items but got %s: %s" - % (len(fields), len(items), items) + f"Expected {len(fields)} items but got {len(items)}: {items}" ) return constructor(dict(list(zip(fields, items)))) @@ -531,8 +530,7 @@ items = splitter(line) if len(items) != len(fields): raise FieldError( - "Expected %s items but got %s: %s" - % (len(fields), len(items), items) + f"Expected {len(fields)} items but got {len(items)}: {items}" ) return dict(list(zip(fields, items))) @@ -541,7 +539,7 @@ def raise_unknown_field(field, data): """Raises a FieldError, displaying the offending field and data.""" - raise FieldError("Got unknown field %s with data %s" % (field, data)) + raise FieldError(f"Got unknown field {field} with data {data}") class FieldMorpher(object): diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/sequence.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/sequence.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/sequence.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/sequence.py 2022-05-24 23:42:33.000000000 +0000 @@ -17,11 +17,11 @@ tinyseq, ) from cogent3.parse.record import FileFormatError -from cogent3.util.misc import open_ +from cogent3.util.io import open_ __author__ = "Cath Lawrence" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Cath Lawrence", "Gavin Huttley", @@ -30,7 +30,7 @@ "Rob Knight", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -61,11 +61,11 @@ if doctype is None: doctype = str(dom.doctype.name).lower() if doctype not in XML_PARSERS: - raise FileFormatError("Unsupported XML doctype %s" % doctype) + raise FileFormatError(f"Unsupported XML doctype {doctype}") parser = XML_PARSERS[doctype] else: if format not in PARSERS: - raise FileFormatError("Unsupported file format %s" % format) + raise FileFormatError(f"Unsupported file format {format}") parser = PARSERS[format] source = f for (name, seq) in parser(source, **kw): diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/table.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/table.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/table.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/table.py 2022-05-24 23:42:33.000000000 +0000 @@ -1,17 +1,16 @@ import csv import pathlib -from cogent3.util.misc import open_ -from cogent3.util.warning import deprecated +from cogent3.util.io import open_ from .record_finder import is_empty __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -54,11 +53,7 @@ The line elements are strings. """ self.with_header = with_header - columns = ( - [columns] - if isinstance(columns, int) or isinstance(columns, str) - else columns - ) + columns = [columns] if isinstance(columns, (int, str)) else columns if columns is not None and isinstance(columns[0], str) and not with_header: raise ValueError("with_header must be True for columns with str values") @@ -95,7 +90,7 @@ Elements within a row are strings """ input_from_path = False - if isinstance(lines, str) or isinstance(lines, pathlib.Path): + if isinstance(lines, (str, pathlib.Path)): path = pathlib.Path(lines).expanduser() input_from_path = path.exists() @@ -139,10 +134,10 @@ filename, header=True, sep=",", - delimiter=None, with_title=False, with_legend=False, limit=None, + **kwargs, ): """ basic processing of tabular data @@ -170,10 +165,6 @@ ----- All row values remain as strings. """ - if delimiter: - sep = delimiter - deprecated("argument", "delimiter", "sep", "2022.1") - if limit is not None and header: limit += 1 # don't count header line diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/tinyseq.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/tinyseq.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/tinyseq.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/tinyseq.py 2022-05-24 23:42:33.000000000 +0000 @@ -9,10 +9,10 @@ __author__ = "Matthew Wakefield" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Matthew Wakefield", "Peter Maxwell", "Gavin Huttley", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Matthew Wakefield" __email__ = "wakefield@wehi.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/tree.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/tree.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/tree.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/tree.py 2022-05-24 23:42:33.000000000 +0000 @@ -22,10 +22,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight", "Catherine Lozupone", "Daniel McDonald"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Development" @@ -105,7 +105,7 @@ right_count = data.count(")") if left_count != right_count: raise RecordError( - "Found %s left parens but %s right parens." % (left_count, right_count) + f"Found {left_count} left parens but {right_count} right parens." ) tokens = DndTokenizer(data) @@ -162,7 +162,7 @@ elif state == "PostColon": # length data for the current node curr_node.length = float(t) else: # can't think of a reason to get here - raise RecordError("Incorrect PhyloNode state? %s" % t) + raise RecordError(f"Incorrect PhyloNode state? {t}") state = "PreColon" # get here for any non-colon token state1 = "PreClosed" last_token = t diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/tree_xml.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/tree_xml.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/tree_xml.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/tree_xml.py 2022-05-24 23:42:33.000000000 +0000 @@ -32,10 +32,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -70,7 +70,7 @@ self.current += str(text) def endElement(self, name): - getattr(self, "process_%s" % name)(self.current, **self.data) + getattr(self, f"process_{name}")(self.current, **self.data) (self.data, self.in_clade, self.current) = self.stack.pop() self.parent = self.stack[-1][0] diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/unigene.py python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/unigene.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/parse/unigene.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/parse/unigene.py 2022-05-24 23:42:33.000000000 +0000 @@ -14,10 +14,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Development" @@ -156,4 +156,4 @@ stdout.write(".") stdout.flush() count += 1 - print("read %s records" % count) + print(f"read {count} records") diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/consensus.py python-cogent-2022.5.25a1+dfsg/src/cogent3/phylo/consensus.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/consensus.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/phylo/consensus.py 2022-05-24 23:42:33.000000000 +0000 @@ -11,10 +11,10 @@ __author__ = "Matthew Wakefield" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Matthew Wakefield", "Peter Maxwell", "Gavin Huttley", "Ben Kaehler"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Matthew Wakefield" __email__ = "wakefield@wehi.edu.au" __status__ = "Production" @@ -299,7 +299,7 @@ for filename in sys.argv[1:]: for tree in open(filename): trees.append(make_tree(treestring=tree)) - print("Consensus of %s trees from %s" % (len(trees), sys.argv[1:])) + print(f"Consensus of {len(trees)} trees from {sys.argv[1:]}") outtrees = majority_rule(trees, strict=True) for tree in outtrees: print(tree.ascii_art(compact=True, show_internal=False)) diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/__init__.py python-cogent-2022.5.25a1+dfsg/src/cogent3/phylo/__init__.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/__init__.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/phylo/__init__.py 2022-05-24 23:42:33.000000000 +0000 @@ -8,10 +8,10 @@ ] __author__ = "" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley", "Peter Maxwell", "Matthew Wakefield"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/least_squares.py python-cogent-2022.5.25a1+dfsg/src/cogent3/phylo/least_squares.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/least_squares.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/phylo/least_squares.py 2022-05-24 23:42:33.000000000 +0000 @@ -12,10 +12,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/maximum_likelihood.py python-cogent-2022.5.25a1+dfsg/src/cogent3/phylo/maximum_likelihood.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/maximum_likelihood.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/phylo/maximum_likelihood.py 2022-05-24 23:42:33.000000000 +0000 @@ -4,10 +4,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/nj.py python-cogent-2022.5.25a1+dfsg/src/cogent3/phylo/nj.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/nj.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/phylo/nj.py 2022-05-24 23:42:33.000000000 +0000 @@ -21,10 +21,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley", "Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/tree_collection.py python-cogent-2022.5.25a1+dfsg/src/cogent3/phylo/tree_collection.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/tree_collection.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/phylo/tree_collection.py 2022-05-24 23:42:33.000000000 +0000 @@ -1,15 +1,15 @@ from numpy import exp, log -from cogent3.util.misc import atomic_write +from cogent3.util.io import atomic_write from . import consensus __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Ben Kaehler"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" @@ -138,7 +138,7 @@ line = line.split(None, 1) lnL = float(line[0]) if lnL > 1: - raise ValueError("likelihoods expected, not %s" % lnL) + raise ValueError(f"likelihoods expected, not {lnL}") elif lnL > 0: assert klass in [list, WeightedTreeCollection] klass = WeightedTreeCollection diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/tree_space.py python-cogent-2022.5.25a1+dfsg/src/cogent3/phylo/tree_space.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/tree_space.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/phylo/tree_space.py 2022-05-24 23:42:33.000000000 +0000 @@ -11,10 +11,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -260,7 +260,7 @@ candidates = ui.imap( grown_tree, specs, - noun=("%s leaf tree" % n), + noun=f"{n} leaf tree", start=work_done[n - 1] / total_work, end=work_done[n] / total_work, ) diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/util.py python-cogent-2022.5.25a1+dfsg/src/cogent3/phylo/util.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/phylo/util.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/phylo/util.py 2022-05-24 23:42:33.000000000 +0000 @@ -9,10 +9,10 @@ # reconstruction algorithms. __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "pm67nz@gmail.com" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -46,7 +46,7 @@ elif v1 is None or v2 is None or v1 == v2: return v1 or v2 else: - raise ValueError("d[%s,%s] != d[%s,%s]" % (a, b, b, a)) + raise ValueError(f"d[{a},{b}] != d[{b},{a}]") def distance_dict_to_2D(dists): diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/recalculation/calculation.py python-cogent-2022.5.25a1+dfsg/src/cogent3/recalculation/calculation.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/recalculation/calculation.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/recalculation/calculation.py 2022-05-24 23:42:33.000000000 +0000 @@ -2,6 +2,7 @@ import os import time +import warnings import numpy @@ -16,10 +17,10 @@ TRACE_SCALE = 100000 __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Daniel McDonald"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -84,7 +85,7 @@ return self.order != other.order def __repr__(self): - return "%s(%s)" % (self.__class__.__name__, self.label) + return f"{self.__class__.__name__}({self.label})" def get_optimiser_bounds(self): lower = self.transform_to_optimiser(self.lower) @@ -110,7 +111,7 @@ try: return numpy.log(value) except OverflowError: - raise OverflowError("log(%s)" % value) + raise OverflowError(f"log({value})") class EvaluatedCell(object): @@ -175,7 +176,7 @@ if self.failure_count < 2: print("%s inputs were:", len(self.arg_ranks)) for (i, arg) in enumerate(self.arg_ranks): - print("%s: " % i + repr(data[arg])) + print(f"{i}: " + repr(data[arg])) class ConstCell(object): @@ -236,10 +237,13 @@ except KeyboardInterrupt: raise except Exception: - print(("Failed initial calculation of %s" % cell.name)) + warnings.warn( + f"Failed initial calculation of {cell.name}", + category=UserWarning, + ) raise else: - raise RuntimeError("Unexpected Cell type %s" % type(cell)) + raise RuntimeError(f"Unexpected Cell type {type(cell)}") self._switch = 0 self.recycled_cells = [cell.rank for cell in self._cells if cell.recycled] @@ -279,8 +283,7 @@ for arg in cell.args: if arg is not cell: edges.append( - '"%s":%s -> "%s":%s' - % (arg.name, arg.rank, cell.name, cell.rank) + f'"{arg.name}":{arg.rank} -> "{cell.name}":{cell.rank}' ) for name in evs: all_const = True @@ -289,27 +292,37 @@ for cell in nodes[name]: value = self._get_current_cell_value(cell) if isinstance(value, float): - label = "%5.2e" % value + label = f"{value:5.2e}" else: label = "[]" - label = "<%s> %s" % (cell.rank, label) + label = f"<{cell.rank}> {label}" enodes.append(label) all_const = all_const and cell.is_constant some_const = some_const or cell.is_constant enodes = "|".join(enodes) colour = ["", " fillcolor=gray90, style=filled,"][some_const] colour = [colour, " fillcolor=gray, style=filled,"][all_const] - lines.append( - '"%s" [shape = "record",%s label="%s"];' % (name, colour, enodes) - ) + lines.append(f'"{name}" [shape = "record",{colour} label="{enodes}"];') lines.extend(edges) lines.append("}") return "\n".join(lines).replace("edge", "egde").replace("QQQ", "edge") def optimise(self, **kw): x = self.get_value_array() - bounds = self.get_bounds_vectors() - maximise(self, x, bounds, **kw) + low, high = self.get_bounds_vectors() + # due to numerical precision, it occasionally happens that + # a value no longer lies within bounds. The following logic + # catches those cases. + x = numpy.array(x) + # NOTE: numpy.allclose([], []) == True + if numpy.allclose(x[low > x], low[low > x]): + x[low > x] = low[low > x] + if numpy.allclose(x[high < x], high[high < x]): + x[high < x] = high[high < x] + + # We can still get ParameterOutOfBounds exceptions + # if values are further outside the bounds + maximise(self, x, (low, high), **kw) self.optimised = True def set_tracing(self, trace=False): @@ -323,7 +336,7 @@ n_cells = len([c for c in self._cells if not c.is_constant]) print(n_opars, "OptPars and", n_cells - n_opars, "derived values") print("OptPars: ", ", ".join([par.name for par in self.opt_pars])) - print("Times in 1/%sths of a second" % TRACE_SCALE) + print(f"Times in 1/{TRACE_SCALE}ths of a second") groups = [] groupd = {} @@ -543,9 +556,9 @@ for (i, v) in changes: cell = self._cells[i] if isinstance(cell, OptPar): - par_descs.append("%s=%8.6f" % (cell.name, v)) + par_descs.append(f"{cell.name}={v:8.6f}") else: - par_descs.append("%s=?" % cell.name) + par_descs.append(f"{cell.name}=?") par_descs = ", ".join(par_descs)[:22].ljust(22) print(" | ".join(tds + [""]), end=" ") if exception: diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/recalculation/definition.py python-cogent-2022.5.25a1+dfsg/src/cogent3/recalculation/definition.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/recalculation/definition.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/recalculation/definition.py 2022-05-24 23:42:33.000000000 +0000 @@ -68,6 +68,7 @@ import numpy +from cogent3.maths.optimisers import ParameterOutOfBoundsError from cogent3.maths.util import proportions_to_ratios, ratios_to_proportions from cogent3.util.dict_array import DictArrayTemplate @@ -83,14 +84,15 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" + DIM_PLURALS = { "bin": "bins", "edge": "edges", @@ -263,6 +265,21 @@ def update_from_calculator(self, calc): outputs = calc.get_current_cell_values_for_defn(self) for (output, setting) in zip(outputs, self.uniq): + # catch cases where parameters fall outside bounds due to precision + if setting.is_constant: + ... # block trying other conditions + elif setting.lower and output < setting.lower: + if not numpy.allclose(output, setting.lower): + raise ParameterOutOfBoundsError( + f"calculator value {output} for {self.name!r} is < {setting.lower}" + ) + output = setting.lower + elif setting.upper and output > setting.upper: + if not numpy.allclose(output, setting.upper): + raise ParameterOutOfBoundsError( + f"calculator value {output} for {self.name!r} is > {setting.upper}" + ) + output = setting.upper setting.value = output def get_num_free_params(self): @@ -393,7 +410,7 @@ def check_setting_is_valid(self, setting): if not isinstance(setting, ConstVal): - raise ValueError("%s can only be constant" % self.name) + raise ValueError(f"{self.name} can only be constant") def make_cells(self, input_soup=None, variable=None): input_soup = input_soup or {} @@ -475,22 +492,18 @@ ) for part in value: if part < 0: - raise ValueError("Negative probability in %s" % self.name) + raise ValueError(f"Negative probability in {self.name}") if part > 1: - raise ValueError("Probability > 1 in %s" % self.name) + raise ValueError(f"Probability > 1 in {self.name}") if not is_constant: # 0 or 1 leads to log(0) or log(inf) in optimiser code if part == 0: - raise ValueError( - "Zeros allowed in %s only when constant" % self.name - ) + raise ValueError(f"Zeros allowed in {self.name} only when constant") if part == 1: - raise ValueError( - "Ones allowed in %s only when constant" % self.name - ) + raise ValueError(f"Ones allowed in {self.name} only when constant") if abs(sum(value) - 1.0) > 0.00001: raise ValueError( - "Elements of %s must sum to 1.0, not %s" % (self.name, sum(value)) + f"Elements of {self.name} must sum to 1.0, not {sum(value)}" ) def _make_partition_cell(self, name, scope, value): @@ -510,7 +523,7 @@ all_cells = [] for (i, v) in enumerate(self.uniq): if v is None: - raise ValueError("input %s not set" % self.name) + raise ValueError(f"input {self.name} not set") assert hasattr(v, "get_default_value"), v value = v.get_default_value() assert hasattr(value, "shape"), value @@ -541,7 +554,7 @@ def check_setting_is_valid(self, setting): if setting is not None and setting.value is not self.default: - raise ValueError("%s is constant" % self.name) + raise ValueError(f"{self.name} is constant") class SelectForDimension(_Defn): diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/recalculation/__init__.py python-cogent-2022.5.25a1+dfsg/src/cogent3/recalculation/__init__.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/recalculation/__init__.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/recalculation/__init__.py 2022-05-24 23:42:33.000000000 +0000 @@ -2,10 +2,10 @@ __all__ = ["calculation", "definition", "scope", "setting"] __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/recalculation/scope.py python-cogent-2022.5.25a1+dfsg/src/cogent3/recalculation/scope.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/recalculation/scope.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/recalculation/scope.py 2022-05-24 23:42:33.000000000 +0000 @@ -14,10 +14,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -54,7 +54,7 @@ if self.cats is None: return self.__class__.__name__ else: - return "%s(%s)" % (self.__class__.__name__, self.cats) + return f"{self.__class__.__name__}({self.cats})" class EACH(_ExistentialQualifier): @@ -95,7 +95,7 @@ if len(s) > maxwidth: s = s[: maxwidth - 4] + "..." else: - template = "%%%ss" % width + template = f"%{width}s" s = "".join([(template % (v,)).replace("\n", " ")[:width] for v in values]) return s @@ -108,7 +108,7 @@ self.name = name def __repr__(self): - return "Undef(%s)" % self.name + return f"Undef({self.name})" def nullor(name, f, recycled=False): @@ -177,7 +177,7 @@ # Defn needs from an input Defn with `arg_dimensions` if not self.activated: assert not self.clients, self.clients - raise RuntimeError('Value at "%s" step never used' % self.name) + raise RuntimeError(f'Value at "{self.name}" step never used') if self.assignments: result = [] for scope_t in self.assignments: @@ -227,16 +227,16 @@ for scope_t in self.interpret_scope(**scope): posns.add(self.index[scope_t]) if len(posns) == 0: - raise InvalidScopeError("no value for %s at %s" % (self.name, scope)) + raise InvalidScopeError(f"no value for {self.name} at {scope}") if len(posns) > 1: raise IncompleteScopeError( - "%s distinct values of %s within %s" % (len(posns), self.name, scope) + f"{len(posns)} distinct values of {self.name} within {scope}" ) return the_one_item_in(posns) def wrap_value(self, value): if isinstance(value, Undefined): - raise ValueError('Input "%s" is not defined' % value.name) + raise ValueError(f'Input "{value.name}" is not defined') if getattr(self, "array_template", None) is not None: value = self.array_template.wrap(value) return value @@ -379,9 +379,9 @@ (d, key) = (d[key], key2) if key in d and value != d[key]: - msg = "Multiple values for %s" % self.name + msg = f"Multiple values for {self.name}" if scope: - msg += " within scope %s" % "/".join(scope) + msg += f" within scope {'/'.join(scope)}" raise IncompleteScopeError(msg) d[key] = value @@ -560,7 +560,7 @@ elif not self.numeric: if lower is not None or upper is not None: raise ValueError( - "Non-scalar input '%s' doesn't support bounds" % self.name + f"Non-scalar input '{self.name}' doesn't support bounds" ) setting = Var((None, s_value, None)) else: @@ -575,13 +575,13 @@ elif (s_lower is not None) and s_value < s_lower: s_value = s_lower warnings.warn( - "Value of %s increased to keep within bounds" % self.name, + f"Value of {self.name} increased to keep within bounds", stacklevel=3, ) elif (s_upper is not None) and s_value > s_upper: s_value = s_upper warnings.warn( - "Value of %s decreased to keep within bounds" % self.name, + f"Value of {self.name} decreased to keep within bounds", stacklevel=3, ) setting = Var((s_lower, s_value, s_upper)) @@ -602,7 +602,7 @@ for value in values: if not numpy.isclose(value, s_value).all(): warnings.warn( - "Used mean of %s %s values" % (len(values), self.name), + f"Used mean of {len(values)} {self.name} values", stacklevel=4, ) break @@ -631,7 +631,7 @@ ) def _local_repr(self, col_width, max_width): - template = "%%%s.%sf" % (col_width, (col_width - 1) // 2) + template = f"%{col_width}.{(col_width - 1) // 2}f" assignments = [] for (i, a) in list(self.assignments.items()): if a is None: @@ -808,8 +808,8 @@ def assign_all(self, par_name, *args, **kw): defn = self.defn_for[par_name] if not isinstance(defn, _LeafDefn): - args = " and ".join(['"%s"' % a.name for a in defn.args]) - msg = '"%s" is not settable as it is derived from %s.' % (par_name, args) + args = " and ".join([f'"{a.name}"' for a in defn.args]) + msg = f'"{par_name}" is not settable as it is derived from {args}.' raise ValueError(msg) defn.assign_all(*args, **kw) self.update_intermediate_values([defn]) @@ -880,7 +880,7 @@ lc.optimise(**kw) except MaximumEvaluationsReached as detail: evals = detail.args[0] - err_msg = "FORCED EXIT from optimiser after %s evaluations" % evals + err_msg = f"FORCED EXIT from optimiser after {evals} evaluations" if limit_action == "ignore": pass elif limit_action == "warn": diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/recalculation/setting.py python-cogent-2022.5.25a1+dfsg/src/cogent3/recalculation/setting.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/recalculation/setting.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/recalculation/setting.py 2022-05-24 23:42:33.000000000 +0000 @@ -6,10 +6,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -71,7 +71,7 @@ ]: if bound is not None: constraints.append(template % bound) - return "Var(%s)" % " ".join(constraints) + return f"Var({' '.join(constraints)})" class ConstVal(Setting): @@ -88,7 +88,7 @@ return repr(self.value) # short as in table def __repr__(self): - return "ConstVal(%s)" % repr(self.value) + return f"ConstVal({repr(self.value)})" # indep useful sometimes! # def __eq__(self, other): diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/util/checkpointing.py python-cogent-2022.5.25a1+dfsg/src/cogent3/util/checkpointing.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/util/checkpointing.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/util/checkpointing.py 2022-05-24 23:42:33.000000000 +0000 @@ -5,10 +5,10 @@ __author__ = ["Peter Maxwell", "Gavin Huttley"] -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -28,7 +28,7 @@ def load(self): assert self.filename is not None, "check .available() first" - print("RESUMING from file '%s'" % self.filename) + print(f"RESUMING from file '{self.filename}'") with open(self.filename, "rb") as f: obj = pickle.load(f) self.last_time = time.time() @@ -41,7 +41,7 @@ elapsed = now - self.last_time if always or elapsed > self.interval: if self.noisy: - print("CHECKPOINTING to file '%s'" % self.filename) + print(f"CHECKPOINTING to file '{self.filename}'") if msg is not None: print(msg) with open(self.filename, "wb") as f: diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/util/deserialise.py python-cogent-2022.5.25a1+dfsg/src/cogent3/util/deserialise.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/util/deserialise.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/util/deserialise.py 2022-05-24 23:42:33.000000000 +0000 @@ -8,18 +8,50 @@ from cogent3.core.alignment import Aligned from cogent3.core.genetic_code import get_code from cogent3.core.moltype import _CodonAlphabet, get_moltype -from cogent3.util.misc import open_, path_exists +from cogent3.util.io import open_, path_exists __author__ = ["Gavin Huttley"] -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" +_deserialise_func_map = {} + + +class register_deserialiser: + """ + registration decorator for functions to inflate objects that were + serialised using json. + + Functions are added to a dict which is used by the deserialise_object() + function. The type string(s) must uniquely identify the appropriate + value for the dict 'type' entry, e.g. 'cogent3.util.table.Table'. + + Parameters + ---------- + args: str or sequence of str + must be unique + """ + + def __init__(self, *args): + for type_str in args: + if not isinstance(type_str, str): + raise TypeError(f"{type_str!r} is not a string") + assert ( + type_str not in _deserialise_func_map + ), f"{type_str!r} already in {list(_deserialise_func_map)}" + self._type_str = args + + def __call__(self, func): + for type_str in self._type_str: + _deserialise_func_map[type_str] = func + return func + def _get_class(provenance): index = provenance.rfind(".") @@ -32,6 +64,11 @@ return klass +@register_deserialiser( + "cogent3.util.table.Table", + "cogent3.util.dict_array.DictArray", + "cogent3.evolve.fast_distance.DistanceMatrix", +) def deserialise_tabular(data): """deserialising DictArray, Table instances""" data.pop("version", None) @@ -61,6 +98,7 @@ return result +@register_deserialiser("cogent3.app.composable.NotCompleted") def deserialise_not_completed(data): """deserialising NotCompletedResult""" data.pop("version", None) @@ -100,17 +138,14 @@ parent.annotations += tuple(annots) +@register_deserialiser("cogent3.app.result") def deserialise_result(data): """returns a result object""" data.pop("version", None) klass = _get_class(data.pop("type")) kwargs = data.pop("result_construction") result = klass(**kwargs) - if "items" in data: - items = data.pop("items") - else: - # retain support for the old style result serialisation - items = data.items() + items = data.pop("items") if "items" in data else data.items() for key, value in items: # only deserialise the result object, other attributes loaded as # required @@ -123,6 +158,7 @@ return result +@register_deserialiser("cogent3.core.moltype") def deserialise_moltype(data): """returns a cogent3 MolType instance, or a CodonAlphabet""" data.pop("version", None) @@ -139,6 +175,7 @@ return result +@register_deserialiser("cogent3.core.alphabet") def deserialise_alphabet(data): """returns a cogent3 Alphabet instance""" data.pop("version", None) @@ -155,6 +192,7 @@ return result +@register_deserialiser("cogent3.core.sequence") def deserialise_seq(data, aligned=False): """deserialises sequence and any annotations @@ -193,6 +231,7 @@ return result +@register_deserialiser("cogent3.core.alignment") def deserialise_seq_collections(data): """returns a cogent3 sequence/collection/alignment instance""" # We first try to load moltype/alphabet using get_moltype @@ -219,6 +258,7 @@ return result +@register_deserialiser("cogent3.core.tree") def deserialise_tree(data): """returns a cogent3 PhyloNode instance""" data.pop("version", None) @@ -232,6 +272,9 @@ return tree +@register_deserialiser( + "cogent3.evolve.substitution_model", "cogent3.evolve.ns_substitution_model" +) def deserialise_substitution_model(data): """returns a cogent3 substitution model instance""" from cogent3.evolve.models import get_model @@ -254,6 +297,7 @@ return sm +@register_deserialiser("cogent3.evolve.parameter_controller") def deserialise_likelihood_function(data): """returns a cogent3 likelihood function instance""" data.pop("version", None) @@ -266,13 +310,19 @@ lf = model.make_likelihood_function(tree, **constructor_args) lf.set_name(name) lf = model.make_likelihood_function(tree, **constructor_args) + if isinstance(constructor_args["loci"], list): + locus_names = constructor_args["loci"] align = data["alignment"] - aln = [deserialise_seq_collections(align[k]) for k in align] - mprobs = [motif_probs[k] for k in motif_probs] + aln = [deserialise_seq_collections(align[k]) for k in locus_names] + if locus_names[0] in motif_probs: + mprobs = [motif_probs[k] for k in motif_probs] + else: + mprobs = [motif_probs] else: aln = deserialise_seq_collections(data.pop("alignment")) mprobs = [motif_probs] + lf.set_alignment(aln) with lf.updates_postponed(): for motif_probs in mprobs: @@ -294,6 +344,11 @@ ------- If the dict from json.loads does not contain a "type" key, the object will be returned as is. Otherwise, it will be deserialised to a cogent3 object. + + Notes + ----- + The value of the "type" key is used to identify the specific function for recreating + the original instance. """ if path_exists(data): with open_(data) as infile: @@ -306,33 +361,11 @@ if type_ is None: return data - if "core.sequence" in type_: - func = deserialise_seq - elif "core.alignment" in type_: - func = deserialise_seq_collections - elif "core.tree" in type_: - func = deserialise_tree - elif ( - "evolve.substitution_model" in type_ or "evolve.ns_substitution_model" in type_ - ): - func = deserialise_substitution_model - elif "evolve.parameter_controller" in type_: - func = deserialise_likelihood_function - elif "core.moltype" in type_: - func = deserialise_moltype - elif "core.alphabet" in type_: - func = deserialise_alphabet - elif "app.result" in type_: - func = deserialise_result - elif "notcompleted" in type_.lower(): - func = deserialise_not_completed - elif type_.lower().endswith("table"): - func = deserialise_tabular - elif "dictarray" in type_.lower(): - func = deserialise_tabular - elif "distancematrix" in type_.lower(): - func = deserialise_tabular + for type_str, func in _deserialise_func_map.items(): + if type_str in type_: + break else: - msg = "deserialising '%s' from json" % type_ + msg = f"deserialising '{type_}' from json" raise NotImplementedError(msg) + return func(data) diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/util/dict_array.py python-cogent-2022.5.25a1+dfsg/src/cogent3/util/dict_array.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/util/dict_array.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/util/dict_array.py 2022-05-24 23:42:33.000000000 +0000 @@ -26,14 +26,15 @@ import numpy -from cogent3.util.misc import atomic_write, get_object_provenance +from cogent3.util.io import atomic_write +from cogent3.util.misc import get_object_provenance __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Ben Kaehler"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" @@ -497,7 +498,7 @@ def __repr__(self): if self.array.ndim > 2: - return "%s dimensional %s" % (self.array.ndim, type(self).__name__) + return f"{self.array.ndim} dimensional {type(self).__name__}" t = self.to_table() t.set_repr_policy(show_shape=False) @@ -557,7 +558,7 @@ def _repr_html_(self): if self.array.ndim > 2: - return "%s dimensional %s" % (self.array.ndim, type(self).__name__) + return f"{self.array.ndim} dimensional {type(self).__name__}" t = self.to_table() t.set_repr_policy(show_shape=False) diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/util/__init__.py python-cogent-2022.5.25a1+dfsg/src/cogent3/util/__init__.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/util/__init__.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/util/__init__.py 2022-05-24 23:42:33.000000000 +0000 @@ -14,7 +14,7 @@ ] __author__ = "" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Gavin Huttley", "Rob Knight", @@ -28,7 +28,7 @@ "Thomas La", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/util/io.py python-cogent-2022.5.25a1+dfsg/src/cogent3/util/io.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/util/io.py 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/util/io.py 2022-05-24 23:42:33.000000000 +0000 @@ -0,0 +1,270 @@ +import shutil +import uuid + +from bz2 import open as bzip_open +from gzip import open as gzip_open +from os import path as os_path +from os import remove +from pathlib import Path +from tempfile import mkdtemp +from typing import Union +from zipfile import ZipFile + +from chardet import detect + +from cogent3.util.misc import _wout_period + + +__author__ = "Gavin Huttley" +__copyright__ = "Copyright 2007-2022, The Cogent Project" +__credits__ = ["Gavin Huttley"] +__license__ = "BSD-3" +__version__ = "2022.5.25a1" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" +__status__ = "Production" + + +def open_zip(filename: Union[str, Path], mode="r", **kwargs): + """open a single member zip-compressed file + + Note + ---- + If mode="r". The function raises ValueError if zip has > 1 record. + The returned object is wrapped by TextIOWrapper with latin encoding + (so it's not a bytes string). + + If mode="w", returns an atomic_write() instance. + """ + # import of standard library io module as some code quality tools + # confuse this with a circular import + from io import TextIOWrapper + + binary_mode = "b" in mode + mode = mode[:1] + + encoding = kwargs.pop("encoding") if "encoding" in kwargs else "latin-1" + if mode.startswith("w"): + return atomic_write(filename, mode=mode, in_zip=True) + + mode = mode.strip("t") + with ZipFile(filename) as zf: + if len(zf.namelist()) != 1: + raise ValueError("Archive is supposed to have only one record.") + + opened = zf.open(zf.namelist()[0], mode=mode, **kwargs) + + if binary_mode: + return opened + + return TextIOWrapper(opened, encoding=encoding) + + +def open_(filename: Union[str, Path], mode="rt", **kwargs): + """open that handles different compression""" + + filename = Path(filename).expanduser().absolute() + op = {".gz": gzip_open, ".bz2": bzip_open, ".zip": open_zip}.get( + filename.suffix, open + ) + + encoding = kwargs.pop("encoding", None) + need_encoding = mode.startswith("r") and "b" not in mode + if need_encoding: + if "encoding" not in kwargs: + with op(filename, mode="rb") as infile: + data = infile.read(100) + + encoding = detect(data) + encoding = encoding["encoding"] + + return op(filename, mode, encoding=encoding, **kwargs) + + +def _path_relative_to_zip_parent(zip_path, member_path): + """returns member_path relative to zip_path + + Parameters + ---------- + zip_path: Path + member_path: Path + + Notes + ----- + with zip_path = "parentdir/named.zip", then member_path="named/member.tsv" + or path="member.tsv" will return "named/member.tsv" + """ + zip_name = zip_path.name.replace(".zip", "") + if zip_name not in member_path.parts: + return Path(zip_name) / member_path + + return Path(*member_path.parts[member_path.parts.index(zip_name) :]) + + +class atomic_write: + """performs atomic write operations, cleans up if fails""" + + def __init__( + self, path: Union[str, Path], tmpdir=None, in_zip=None, mode="w", encoding=None + ): + """ + + Parameters + ---------- + path + path to file, or relative to directory specified by in_zip + tmpdir + directory where temporary file will be created + in_zip + path to the zip archive containing path, + e.g. if in_zip="path/to/data.zip", then path="data/seqs.tsv" + Decompressing the archive will produce the "data/seqs.tsv" + mode + file writing mode + encoding + text encoding + """ + path = Path(path).expanduser() + in_zip = Path(in_zip) if isinstance(in_zip, str) else in_zip + _, cmp = get_format_suffixes(path) + if in_zip and cmp == "zip": + in_zip = path if isinstance(in_zip, bool) else in_zip + path = Path(str(path)[: str(path).rfind(".zip")]) + + if in_zip: + path = _path_relative_to_zip_parent(in_zip, path) + + self._path = path + self._cmp = cmp + self._mode = mode + self._file = None + self._encoding = encoding + self._in_zip = in_zip + self._tmppath = self._make_tmppath(tmpdir) + + self.succeeded = None + self._close_func = ( + self._close_rename_zip if in_zip else self._close_rename_standard + ) + + def _make_tmppath(self, tmpdir): + """returns path of temporary file + + Parameters + ---------- + tmpdir: Path + to directory + + Returns + ------- + full path to a temporary file + + Notes + ----- + Uses a random uuid as the file name, adds suffixes from path + """ + suffixes = ( + "".join(self._path.suffixes) + if not self._in_zip + else "".join(self._path.suffixes[:-1]) + ) + parent = self._in_zip.parent if self._in_zip else self._path.parent + name = f"{uuid.uuid4()}{suffixes}" + tmpdir = Path(mkdtemp(dir=parent)) if tmpdir is None else Path(tmpdir) + + if not tmpdir.exists(): + raise FileNotFoundError(f"{tmpdir} directory does not exist") + + tmp_path = tmpdir / name + return tmp_path + + def _get_fileobj(self): + """returns file to be written to""" + if self._file is None: + self._file = open_(self._tmppath, self._mode, encoding=self._encoding) + + return self._file + + def __enter__(self): + return self._get_fileobj() + + def _close_rename_standard(self, src): + dest = Path(self._path) + try: + dest.unlink() + except FileNotFoundError: + pass + finally: + src.rename(dest) + + shutil.rmtree(src.parent) + + def _close_rename_zip(self, src): + with ZipFile(self._in_zip, "a") as out: + out.write(str(src), arcname=self._path) + + shutil.rmtree(src.parent) + + def __exit__(self, exc_type, exc_val, exc_tb): + self._file.close() + if exc_type is None: + self._close_func(self._tmppath) + self.succeeded = True + else: + self.succeeded = False + shutil.rmtree(self._tmppath.parent) + + def write(self, text): + """writes text to file""" + fileobj = self._get_fileobj() + fileobj.write(text) + + def close(self): + """closes file""" + self.__exit__(None, None, None) + + +def get_format_suffixes(filename: Union[str, Path]): + """returns file, compression suffixes""" + filename = Path(filename) + if not filename.suffix: + return None, None + + compression_suffixes = ("bz2", "gz", "zip") + suffixes = [_wout_period.sub("", sfx).lower() for sfx in filename.suffixes[-2:]] + if suffixes[-1] in compression_suffixes: + cmp_suffix = suffixes[-1] + else: + cmp_suffix = None + + if len(suffixes) == 2 and cmp_suffix is not None: + suffix = suffixes[0] + elif cmp_suffix is None: + suffix = suffixes[-1] + else: + suffix = None + return suffix, cmp_suffix + + +def remove_files(list_of_filepaths, error_on_missing=True): + """Remove list of filepaths, optionally raising an error if any are missing""" + missing = [] + for fp in list_of_filepaths: + try: + remove(fp) + except OSError: + missing.append(fp) + + if error_on_missing and missing: + raise OSError("Some filepaths were not accessible: %s" % "\t".join(missing)) + + +def path_exists(path): + """whether path is a valid path and it exists""" + if not (isinstance(path, str) or isinstance(path, Path)): + return False + try: + is_path = os_path.exists(str(path)) + except (ValueError, TypeError): + is_path = False + return is_path diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/util/misc.py python-cogent-2022.5.25a1+dfsg/src/cogent3/util/misc.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/util/misc.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/util/misc.py 2022-05-24 23:42:33.000000000 +0000 @@ -2,30 +2,18 @@ """ import os import re -import shutil -import uuid import warnings -import zipfile -from bz2 import open as bzip_open -from gzip import open as gzip_open -from io import TextIOWrapper -from os import path as os_path -from os import remove -from pathlib import Path from random import randint -from tempfile import mkdtemp from warnings import warn -from zipfile import ZipFile import numpy -from chardet import detect from numpy import array, finfo, float64 __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Rob Knight", "Peter Maxwell", @@ -37,7 +25,7 @@ "Marcin Cieslik", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -68,7 +56,7 @@ result sums to 1 within machine precision if 2D array, assumes row-order""" - assert 0 <= minprob < 1, "invalid minval %s" % minprob + assert 0 <= minprob < 1, f"invalid minval {minprob}" probs = array(probs, dtype=float64) if (probs > minprob).all(): return probs @@ -103,16 +91,12 @@ if lower <= value <= upper: return value - assert action in ("warn", "raise", "ignore"), "Unknown action %s" % repr(action) + assert action in ("warn", "raise", "ignore"), f"Unknown action {repr(action)}" value = float64(value) eps = float64(eps) + finfo(float64).eps - err_msg = "value[%s] not within lower[%s]/upper[%s] bounds" % (value, lower, upper) - wrn_msg = "value[%s] forced within lower[%s]/upper[%s] bounds" % ( - value, - lower, - upper, - ) + err_msg = f"value[{value}] not within lower[{lower}]/upper[{upper}] bounds" + wrn_msg = f"value[{value}] forced within lower[{lower}]/upper[{upper}] bounds" if value < lower and (lower - value) <= eps: value = lower @@ -134,224 +118,9 @@ return data -def open_zip(filename, mode="r", **kwargs): - """open a single member zip-compressed file - - Note - ---- - If mode="r". The function raises ValueError if zip has > 1 record. - The returned object is wrapped by TextIOWrapper with latin encoding - (so it's not a bytes string). - - If mode="w", returns an atomic_write() instance. - """ - binary_mode = "b" in mode - mode = mode[:1] - - encoding = kwargs.pop("encoding") if "encoding" in kwargs else "latin-1" - if mode.startswith("w"): - return atomic_write(filename, mode=mode, in_zip=True) - - mode = mode.strip("t") - with ZipFile(filename) as zf: - if len(zf.namelist()) != 1: - raise ValueError("Archive is supposed to have only one record.") - - opened = zf.open(zf.namelist()[0], mode=mode, **kwargs) - - if binary_mode: - return opened - - return TextIOWrapper(opened, encoding=encoding) - - -def open_(filename, mode="rt", **kwargs): - """open that handles different compression""" - - filename = Path(filename).expanduser().absolute() - op = {".gz": gzip_open, ".bz2": bzip_open, ".zip": open_zip}.get( - filename.suffix, open - ) - - encoding = kwargs.pop("encoding", None) - need_encoding = mode.startswith("r") and "b" not in mode - if need_encoding: - if "encoding" not in kwargs: - with op(filename, mode="rb") as infile: - data = infile.read(100) - - encoding = detect(data) - encoding = encoding["encoding"] - - return op(filename, mode, encoding=encoding, **kwargs) - - -def _path_relative_to_zip_parent(zip_path, member_path): - """returns member_path relative to zip_path - - Parameters - ---------- - zip_path: Path - member_path: Path - - Notes - ----- - with zip_path = "parentdir/named.zip", then member_path="named/member.tsv" - or path="member.tsv" will return "named/member.tsv" - """ - zip_name = zip_path.name.replace(".zip", "") - if zip_name not in member_path.parts: - return Path(zip_name) / member_path - - return Path(*member_path.parts[member_path.parts.index(zip_name) :]) - - -class atomic_write: - """performs atomic write operations, cleans up if fails""" - - def __init__(self, path, tmpdir=None, in_zip=None, mode="w", encoding=None): - """ - - Parameters - ---------- - path - path to file, or relative to directory specified by in_zip - tmpdir - directory where temporary file will be created - in_zip - path to the zip archive containing path, - e.g. if in_zip="path/to/data.zip", then path="data/seqs.tsv" - Decompressing the archive will produce the "data/seqs.tsv" - mode - file writing mode - encoding - text encoding - """ - path = Path(path).expanduser() - in_zip = Path(in_zip) if isinstance(in_zip, str) else in_zip - _, cmp = get_format_suffixes(path) - if in_zip and cmp == "zip": - in_zip = path if isinstance(in_zip, bool) else in_zip - path = Path(str(path)[: str(path).rfind(".zip")]) - - if in_zip: - path = _path_relative_to_zip_parent(in_zip, path) - - self._path = path - self._cmp = cmp - self._mode = mode - self._file = None - self._encoding = encoding - self._in_zip = in_zip - self._tmppath = self._make_tmppath(tmpdir) - - self.succeeded = None - self._close_func = ( - self._close_rename_zip if in_zip else self._close_rename_standard - ) - - def _make_tmppath(self, tmpdir): - """returns path of temporary file - - Parameters - ---------- - tmpdir: Path - to directory - - Returns - ------- - full path to a temporary file - - Notes - ----- - Uses a random uuid as the file name, adds suffixes from path - """ - suffixes = ( - "".join(self._path.suffixes) - if not self._in_zip - else "".join(self._path.suffixes[:-1]) - ) - parent = self._in_zip.parent if self._in_zip else self._path.parent - name = f"{uuid.uuid4()}{suffixes}" - tmpdir = Path(mkdtemp(dir=parent)) if tmpdir is None else Path(tmpdir) - - if not tmpdir.exists(): - raise FileNotFoundError(f"{tmpdir} directory does not exist") - - tmp_path = tmpdir / name - return tmp_path - - def _get_fileobj(self): - """returns file to be written to""" - if self._file is None: - self._file = open_(self._tmppath, self._mode, encoding=self._encoding) - - return self._file - - def __enter__(self): - return self._get_fileobj() - - def _close_rename_standard(self, src): - dest = Path(self._path) - try: - dest.unlink() - except FileNotFoundError: - pass - finally: - src.rename(dest) - - shutil.rmtree(src.parent) - - def _close_rename_zip(self, src): - with zipfile.ZipFile(self._in_zip, "a") as out: - out.write(str(src), arcname=self._path) - - shutil.rmtree(src.parent) - - def __exit__(self, exc_type, exc_val, exc_tb): - self._file.close() - if exc_type is None: - self._close_func(self._tmppath) - self.succeeded = True - else: - self.succeeded = False - shutil.rmtree(self._tmppath.parent) - - def write(self, text): - """writes text to file""" - fileobj = self._get_fileobj() - fileobj.write(text) - - def close(self): - """closes file""" - self.__exit__(None, None, None) - - _wout_period = re.compile(r"^\.") -def get_format_suffixes(filename): - """returns file, compression suffixes""" - filename = Path(filename) - if not filename.suffix: - return None, None - - compression_suffixes = ("bz2", "gz", "zip") - suffixes = [_wout_period.sub("", sfx).lower() for sfx in filename.suffixes[-2:]] - if suffixes[-1] in compression_suffixes: - cmp_suffix = suffixes[-1] - else: - cmp_suffix = None - - if len(suffixes) == 2 and cmp_suffix is not None: - suffix = suffixes[0] - elif cmp_suffix is None: - suffix = suffixes[-1] - else: - suffix = None - return suffix, cmp_suffix - - def iterable(item): """If item is iterable, returns item. Otherwise, returns [item]. @@ -377,7 +146,7 @@ if a: curry_params.extend([e for e in a]) if kw: - curry_params.extend(["%s=%s" % (k, v) for k, v in list(kw.items())]) + curry_params.extend([f"{k}={v}" for k, v in list(kw.items())]) # str it to prevent error in join() curry_params = list(map(str, curry_params)) @@ -513,7 +282,7 @@ for c in Classes: if type(c) != type_type: raise TypeError( - "ClassChecker found non-type object '%s' in parameter list." % c + f"ClassChecker found non-type object '{c}' in parameter list." ) self.Classes = list(Classes) @@ -744,7 +513,7 @@ self._constraint = constraint else: raise ConstraintError( - "Sequence '%s' incompatible with constraint '%s'" % (self, constraint) + f"Sequence '{self}' incompatible with constraint '{constraint}'" ) constraint = property(_get_constraint, _set_constraint) @@ -776,8 +545,7 @@ return list.__iadd__(self, other) else: raise ConstraintError( - "Sequence '%s' has items not in constraint '%s'" - % (other, self.constraint) + f"Sequence '{other}' has items not in constraint '{self.constraint}'" ) def __mul__(self, multiplier): @@ -808,7 +576,7 @@ else: if not self.item_is_valid(item): raise ConstraintError( - "Item '%s' not in constraint '%s'" % (item, self.constraint) + f"Item '{item}' not in constraint '{self.constraint}'" ) item = self.mask(item) list.__setitem__(self, index, item) @@ -819,15 +587,14 @@ list.__setslice__(self, start, end, list(map(self.mask, sequence))) else: raise ConstraintError( - "Sequence '%s' has items not in constraint '%s'" - % (sequence, self.constraint) + f"Sequence '{sequence}' has items not in constraint '{self.constraint}'" ) def append(self, item): """Appends item to self.""" if not self.item_is_valid(item): raise ConstraintError( - "Item '%s' not in constraint '%s'" % (item, self.constraint) + f"Item '{item}' not in constraint '{self.constraint}'" ) list.append(self, self.mask(item)) @@ -837,15 +604,14 @@ list.extend(self, list(map(self.mask, sequence))) else: raise ConstraintError( - "Some items in '%s' not in constraint '%s'" - % (sequence, self.constraint) + f"Some items in '{sequence}' not in constraint '{self.constraint}'" ) def insert(self, position, item): """Inserts item at position in self.""" if not self.item_is_valid(item): raise ConstraintError( - "Item '%s' not in constraint '%s'" % (item, self.constraint) + f"Item '{item}' not in constraint '{self.constraint}'" ) list.insert(self, position, self.mask(item)) @@ -923,9 +689,7 @@ def __setitem__(self, key, value): """Sets self[key] to value if value in constraint.""" if not self.item_is_valid(key): - raise ConstraintError( - "Item '%s' not in constraint '%s'" % (key, self.constraint) - ) + raise ConstraintError(f"Item '{key}' not in constraint '{self.constraint}'") key, value = self.mask(key), self.value_mask(value) dict.__setitem__(self, key, value) @@ -1050,19 +814,6 @@ return parser -def remove_files(list_of_filepaths, error_on_missing=True): - """Remove list of filepaths, optionally raising an error if any are missing""" - missing = [] - for fp in list_of_filepaths: - try: - remove(fp) - except OSError: - missing.append(fp) - - if error_on_missing and missing: - raise OSError("Some filepaths were not accessible: %s" % "\t".join(missing)) - - def get_independent_coords(spans, random_tie_breaker=False): """returns non-overlapping spans. spans must have structure [(start, end, ..), (..)]. spans can be decorated with arbitrary data @@ -1193,17 +944,6 @@ return result -def path_exists(path): - """whether path is a valid path and it exists""" - if not (isinstance(path, str) or isinstance(path, Path)): - return False - try: - is_path = os_path.exists(str(path)) - except (ValueError, TypeError): - is_path = False - return is_path - - def extend_docstring_from(source, pre=False): def docstring_inheriting_decorator(dest): parts = [source.__doc__, dest.__doc__ or ""] @@ -1270,3 +1010,8 @@ ) return result + + +def in_jupyter() -> bool: + """whether code is being executed within a jupyter notebook""" + return callable(globals().get("get_ipython")) diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/util/parallel.py python-cogent-2022.5.25a1+dfsg/src/cogent3/util/parallel.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/util/parallel.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/util/parallel.py 2022-05-24 23:42:33.000000000 +0000 @@ -8,14 +8,14 @@ multiprocessing.set_start_method( - "fork" if sys.platform == "darwin" else "spawn", force=True + "forkserver" if sys.platform == "darwin" else "spawn", force=True ) __author__ = "Sheng Han Moses Koh" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Sheng Han Moses Koh", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" @@ -71,7 +71,7 @@ return False elif sys.version_info[1] >= 7: process_name = multiprocessing.current_process().name - if "ForkProcess" in process_name or "SpawnProcess" in process_name: + if "Fork" in process_name or "Spawn" in process_name: return False else: raise RuntimeError("is_master_process() requires Python 3.7 or greater") @@ -86,7 +86,7 @@ return self.func(*args, **kw) -def set_default_chunksize(s, max_workers): +def get_default_chunksize(s, max_workers): chunksize, remainder = divmod(len(s), max_workers * 4) if remainder: chunksize += 1 @@ -110,12 +110,13 @@ values are 'raise', 'ignore', 'warn'. Defaults to 'raise'. chunksize : int or None Size of data chunks executed by worker processes. Defaults to None - where stable chunksize is determined by set_default_chunksize() + where stable chunksize is determined by get_default_chunksize() Returns ------- - imap is a generator yielding result of f(s[i]), map returns the result - series + imap and as_completed are generators yielding result of f(s[i]), map returns the result + series. imap and map return results in the same order as s, as_completed returns results + in the order completed (which can differ from the order in s). Notes ----- @@ -135,15 +136,16 @@ if not USING_MPI: raise RuntimeError("Cannot use MPI") - err_msg = ( - "Execution in serial. For parallel MPI execution, use:\n" - " $ mpiexec -n python3 -m mpi4py.futures " - ) + if COMM.Get_attr(MPI.UNIVERSE_SIZE) == 1: + err_msg = ( + "Execution in serial. For parallel MPI execution, use:\n" + " $ mpiexec -n python3 -m mpi4py.futures " + ) - if COMM.Get_attr(MPI.UNIVERSE_SIZE) == 1 and if_serial == "raise": - raise RuntimeError(err_msg) - elif COMM.Get_attr(MPI.UNIVERSE_SIZE) == 1 and if_serial == "warn": - warnings.warn(err_msg, UserWarning) + if if_serial == "raise": + raise RuntimeError(err_msg) + elif if_serial == "warn": + warnings.warn(err_msg, UserWarning) max_workers = max_workers or 1 @@ -154,26 +156,85 @@ max_workers = min(max_workers, COMM.Get_attr(MPI.UNIVERSE_SIZE) - 1) if not chunksize: - chunksize = set_default_chunksize(s, max_workers) + chunksize = get_default_chunksize(s, max_workers) with MPIfutures.MPIPoolExecutor(max_workers=max_workers) as executor: - for result in executor.map(f, s, chunksize=chunksize): - yield result + yield from executor.map(f, s, chunksize=chunksize) else: if not max_workers: max_workers = multiprocessing.cpu_count() - 1 assert max_workers < multiprocessing.cpu_count() if not chunksize: - chunksize = set_default_chunksize(s, max_workers) + chunksize = get_default_chunksize(s, max_workers) f = PicklableAndCallable(f) with concurrentfutures.ProcessPoolExecutor(max_workers) as executor: - for result in executor.map(f, s, chunksize=chunksize): - yield result + yield from executor.map(f, s, chunksize=chunksize) @extend_docstring_from(imap) def map(f, s, max_workers=None, use_mpi=False, if_serial="raise", chunksize=None): return list(imap(f, s, max_workers, use_mpi, if_serial, chunksize)) + + +def _as_completed_mpi(f, s, max_workers, if_serial, chunksize=None): + """MPI version of as_completed""" + if not USING_MPI: + raise RuntimeError("Cannot use MPI") + + if COMM.Get_attr(MPI.UNIVERSE_SIZE) == 1: + err_msg = ( + "Execution in serial. For parallel MPI execution, use:\n" + " $ mpiexec -n python3 -m mpi4py.futures " + ) + + if if_serial == "raise": + raise RuntimeError(err_msg) + elif if_serial == "warn": + warnings.warn(err_msg, UserWarning) + + max_workers = max_workers or 1 + + f = PicklableAndCallable(f) + + if max_workers > COMM.Get_attr(MPI.UNIVERSE_SIZE): + warnings.warn("max_workers too large, reducing to UNIVERSE_SIZE-1", UserWarning) + + max_workers = min(max_workers, COMM.Get_attr(MPI.UNIVERSE_SIZE) - 1) + if not chunksize: + chunksize = get_default_chunksize(s, max_workers) + + with MPIfutures.MPIPoolExecutor( + max_workers=max_workers, chunksize=chunksize + ) as executor: + to_do = [executor.submit(f, e) for e in s] + for result in concurrentfutures.as_completed(to_do): + yield result.result() + + +def _as_completed_mproc(f, s, max_workers): + """multiprocess version of as_completed""" + if not max_workers: + max_workers = multiprocessing.cpu_count() - 1 + assert max_workers < multiprocessing.cpu_count() + + f = PicklableAndCallable(f) + + with concurrentfutures.ProcessPoolExecutor(max_workers=max_workers) as executor: + to_do = [executor.submit(f, e) for e in s] + for result in concurrentfutures.as_completed(to_do): + yield result.result() + + +@extend_docstring_from(imap, pre=True) +def as_completed( + f, s, max_workers=None, use_mpi=False, if_serial="raise", chunksize=None +): + if_serial = if_serial.lower() + assert if_serial in ("ignore", "raise", "warn"), f"invalid choice '{if_serial}'" + if use_mpi: + yield from _as_completed_mpi(f, s, max_workers, if_serial, chunksize) + else: + yield from _as_completed_mproc(f, s, max_workers) diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/util/progress_display.py python-cogent-2022.5.25a1+dfsg/src/cogent3/util/progress_display.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/util/progress_display.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/util/progress_display.py 2022-05-24 23:42:33.000000000 +0000 @@ -10,14 +10,16 @@ __author__ = "Sheng Han Moses Koh" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Sheng Han Moses Koh"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" +from cogent3.util.misc import in_jupyter + class LogFileOutput: """A fake progress bar for when progress bars are impossible""" @@ -37,7 +39,7 @@ def refresh(self): if self.message: - delta = "+%s" % int(time.time() - self.t0) + delta = f"+{int(time.time() - self.t0)}" progress = int(100 * self.n + 0.5) print( "%s %5s %3i%% %s" % (self.lpad, delta, progress, str(self.message)), @@ -116,7 +118,7 @@ else: if noun: noun += " " - template = "%s%%%sd/%s" % (noun, len(str(count)), count) + template = f"{noun}%{len(str(count))}d/{count}" labels = [template % (i + 1) for i in range(0, count)] for (i, item) in enumerate(items): self.display(msg=labels[i], progress=start + step * i) @@ -124,7 +126,7 @@ self.display(progress=end) def write(self, *args, **kw): - if self.progress_bar_type and len(kw) < 3 and not using_notebook(): + if self.progress_bar_type and len(kw) < 3 and not in_jupyter(): self.progress_bar_type.write(*args, **kw) else: print(*args, **kw) @@ -163,14 +165,6 @@ CURRENT.context = None -def using_notebook(): - try: - get_ipython() - return True - except NameError: - return False - - def display_wrap(slow_function): """Decorator which give the function its own UI context. The function will receive an extra argument, 'ui', @@ -181,7 +175,7 @@ if getattr(CURRENT, "context", None) is None: if sys.stdout.isatty(): klass = tqdm - elif using_notebook(): + elif in_jupyter(): klass = notebook.tqdm elif isinstance(sys.stdout, io.FileIO): klass = LogFileOutput diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/util/recode_alignment.py python-cogent-2022.5.25a1+dfsg/src/cogent3/util/recode_alignment.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/util/recode_alignment.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/util/recode_alignment.py 2022-05-24 23:42:33.000000000 +0000 @@ -42,10 +42,10 @@ __author__ = "Greg Caporaso" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Greg Caporaso"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Greg Caporaso" __email__ = "gregcaporaso@gmail.com" __status__ = "Beta" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/util/table.py python-cogent-2022.5.25a1+dfsg/src/cogent3/util/table.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/util/table.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/util/table.py 2022-05-24 23:42:33.000000000 +0000 @@ -25,14 +25,9 @@ from cogent3.format import bedgraph from cogent3.format import table as table_format from cogent3.util.dict_array import DictArray, DictArrayTemplate -from cogent3.util.misc import ( - atomic_write, - extend_docstring_from, - get_format_suffixes, - get_object_provenance, -) +from cogent3.util.io import atomic_write, get_format_suffixes +from cogent3.util.misc import extend_docstring_from, get_object_provenance from cogent3.util.union_dict import UnionDict -from cogent3.util.warning import deprecated try: @@ -41,10 +36,10 @@ display = lambda x: print(repr(x)) __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley", "Felix Schill", "Sheng Koh"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -492,10 +487,6 @@ if k not in ("self", "__class__", "data", "header", "kwargs") } - if "index" in kwargs: - deprecated("argument", "index", "index_name", "2021.11") - index_name = kwargs.pop("index", index_name) - attrs.update(kwargs) self._persistent_attrs = attrs @@ -712,7 +703,7 @@ shape_info += f"\n{self.shape[0]:,} rows x {self.shape[1]:,} columns" unset_columns = [c for c in self.header if not len(self.columns[c])] unset_columns = ( - "unset columns: %s" % ", ".join(map(repr, unset_columns)) + f"unset columns: {', '.join(map(repr, unset_columns))}" if unset_columns else None ) @@ -1612,10 +1603,6 @@ ---------- concat_title_legend : bool the table caption is formed by concatenating the table title and legend - rows - table data in row orientation - header - table header justify column justification, default is right aligned. label @@ -2090,9 +2077,9 @@ as the header. Defaults to the first column. """ select_as_header = select_as_header or self.columns.order[0] - assert select_as_header in self.columns, ( - '"%s" not in table header' % select_as_header - ) + assert ( + select_as_header in self.columns + ), f'"{select_as_header}" not in table header' if len(self.distinct_values(select_as_header)) != len(self): raise ValueError(f"not all '{select_as_header}' values unique") diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/util/transform.py python-cogent-2022.5.25a1+dfsg/src/cogent3/util/transform.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/util/transform.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/util/transform.py 2022-05-24 23:42:33.000000000 +0000 @@ -14,10 +14,10 @@ """ __author__ = "Sandra Smit" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Sandra Smit", "Rob Knight", "Zongzhi Liu"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Sandra Smit" __email__ = "sandra.smit@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/util/union_dict.py python-cogent-2022.5.25a1+dfsg/src/cogent3/util/union_dict.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/util/union_dict.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/util/union_dict.py 2022-05-24 23:42:33.000000000 +0000 @@ -3,10 +3,10 @@ __author__ = "Thomas La" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley", "Thomas La"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/src/cogent3/util/warning.py python-cogent-2022.5.25a1+dfsg/src/cogent3/util/warning.py --- python-cogent-2021.10.12a1+dfsg/src/cogent3/util/warning.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/src/cogent3/util/warning.py 2022-05-24 23:42:33.000000000 +0000 @@ -4,10 +4,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley", "Jai Ram Rideout"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/benchmark_aligning.py python-cogent-2022.5.25a1+dfsg/tests/benchmark_aligning.py --- python-cogent-2021.10.12a1+dfsg/tests/benchmark_aligning.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/benchmark_aligning.py 2022-05-24 23:42:33.000000000 +0000 @@ -9,10 +9,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Peter Maxwell" __email__ = "pm67nz@gmail.com" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/benchmark.py python-cogent-2022.5.25a1+dfsg/tests/benchmark.py --- python-cogent-2021.10.12a1+dfsg/tests/benchmark.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/benchmark.py 2022-05-24 23:42:33.000000000 +0000 @@ -13,10 +13,10 @@ __author__ = "Peter Maxwell and Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/data/ensembl_sample.gff3 python-cogent-2022.5.25a1+dfsg/tests/data/ensembl_sample.gff3 --- python-cogent-2021.10.12a1+dfsg/tests/data/ensembl_sample.gff3 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/data/ensembl_sample.gff3 2022-05-24 23:42:33.000000000 +0000 @@ -0,0 +1,17 @@ +##gff-version 3 +##sequence-region 22 1 40 +#!genome-build Genome Reference Consortium GRCh38.p13 +#!genome-version GRCh38 +#!genome-date 2013-12 +#!genome-build-accession GCA_000001405.28 +#!genebuild-last-updated 2021-11 +22 GRCh38 chromosome 1 40 . . . ID=chromosome:22;Alias=CM000684.2,chr22,NC_000022.11 +### +22 . biological_region 37 38 2.94e+03 . . external_name=oe %3D 0.82;logic_name=cpg +22 . biological_region 39 39 0.999 - . logic_name=eponine +### +22 . biological_region 2 5 2.94e+03 . . external_name=oe %3D 0.82;logic_name=cpg +22 ensembl_havana gene 6 36 . + . ID=gene:ENSG00000215568;Name=GAB4;biotype=protein_coding;description=GRB2 associated binding protein family member 4 [Source:HGNC Symbol%3BAcc:HGNC:18325];gene_id=ENSG00000215568;logic_name=ensembl_havana_gene_homo_sapiens;version=9 +22 havana mRNA 8 30 . + . ID=transcript:ENST00000651146;Parent=gene:ENSG00000215568;Name=GAB4-206;biotype=nonsense_mediated_decay;transcript_id=ENST00000651146;version=1 +22 havana exon 12 20 . + . Parent=transcript:ENST00000651146;Name=ENSE00003848117;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003848117;rank=9;version=1 +22 havana exon 23 27 . + . Parent=transcript:ENST00000651146;Name=ENSE00003513343;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003513343;rank=8;version=1 diff -Nru python-cogent-2021.10.12a1+dfsg/tests/__init__.py python-cogent-2022.5.25a1+dfsg/tests/__init__.py --- python-cogent-2021.10.12a1+dfsg/tests/__init__.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/__init__.py 2022-05-24 23:42:33.000000000 +0000 @@ -11,7 +11,7 @@ exec(f"from {__name__} import {sub_module}") __author__ = "" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Peter Maxwell", "Gavin Huttley", @@ -21,7 +21,7 @@ "Edward Lang", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_align/__init__.py python-cogent-2022.5.25a1+dfsg/tests/test_align/__init__.py --- python-cogent-2021.10.12a1+dfsg/tests/test_align/__init__.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_align/__init__.py 2022-05-24 23:42:33.000000000 +0000 @@ -2,10 +2,10 @@ __all__ = ["test_align"] __author__ = "" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Jeremy Widmann", "Peter Maxwell", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Jeremy Widmann" __email__ = "jeremy.widmann@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_align/test_align.py python-cogent-2022.5.25a1+dfsg/tests/test_align/test_align.py --- python-cogent-2021.10.12a1+dfsg/tests/test_align/test_align.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_align/test_align.py 2022-05-24 23:42:33.000000000 +0000 @@ -22,10 +22,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -184,7 +184,7 @@ self._test_aln({"A": "tacagta", "B": "tac-gtc", "C": "ta---ta", "D": "tac-gtc"}) def test_progressive_est_tree(self): - """excercise progressive alignment without a guide tree""" + """exercise progressive alignment without a guide tree""" seqs = make_unaligned_seqs( data={ "A": "TGTGGCACAAATGCTCATGCCAGCTCTTTACAGCATGAGAACA", @@ -197,6 +197,20 @@ ) expect = { + "A": "TGTGGCACAAATGCTCATGCCAGCTCTTTACAGCATGAGAACA-------", + "C": "TGTGGCACAAGTACTCATGCCAGCTCAGTACAGCATGAGAACAGCAGTTT", + "B": "TGTGGCACAGATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTT", + } + self.assertEqual(aln.to_dict(), expect) + + aln, tree = cogent3.align.progressive.TreeAlign( + HKY85(), + seqs, + show_progress=False, + ests_from_pairwise=True, + ) + + expect = { "A": "TGTGGCACAAATGCTCATGCCAGCTCTTTACAGCATGAGAACA-------", "C": "TGTGGCACAAGTACTCATGCCAGCTCAGTACAGCATGAGAACAGCAGTTT", "B": "TGTGGCACAGATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTT", diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_app/test_align.py python-cogent-2022.5.25a1+dfsg/tests/test_app/test_align.py --- python-cogent-2021.10.12a1+dfsg/tests/test_app/test_align.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_app/test_align.py 2022-05-24 23:42:33.000000000 +0000 @@ -24,10 +24,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -153,7 +153,7 @@ # fails if not all sequences same seq = DNA.make_seq("AACCCGTT") all_gaps = dict([(0, 3), (2, 1), (5, 3), (6, 3)]) - final_seq = make_aligned(all_gaps, seq) + make_aligned(all_gaps, seq) gap_sets = [ dict([(5, 1), (6, 3)]), dict([(2, 1), (5, 3)]), @@ -175,7 +175,7 @@ def test_gap_difference(self): """correctly identifies the difference in gaps""" seq = DNA.make_seq("AACCCGTT") - all_gaps = dict([(0, 3), (2, 1), (5, 3), (6, 3)]) + dict([(0, 3), (2, 1), (5, 3), (6, 3)]) gap_sets = [ dict([(5, 1), (6, 3)]), dict([(2, 1), (5, 3)]), diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_app/test_app_mpi.py python-cogent-2022.5.25a1+dfsg/tests/test_app/test_app_mpi.py --- python-cogent-2021.10.12a1+dfsg/tests/test_app/test_app_mpi.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_app/test_app_mpi.py 2022-05-24 23:42:33.000000000 +0000 @@ -9,10 +9,10 @@ __author__ = "Sheng Han Moses Koh" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley", "Sheng Han Moses Koh"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -35,7 +35,6 @@ r = process.apply_to( members, - logger=False, show_progress=False, parallel=True, par_kw=dict(use_mpi=True), diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_app/test_composable.py python-cogent-2022.5.25a1+dfsg/tests/test_app/test_composable.py --- python-cogent-2021.10.12a1+dfsg/tests/test_app/test_composable.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_app/test_composable.py 2022-05-24 23:42:33.000000000 +0000 @@ -1,10 +1,13 @@ import os import pathlib +from pickle import dumps, loads from tempfile import TemporaryDirectory from unittest import TestCase, main from unittest.mock import Mock +from scitrack import CachingLogger + from cogent3.app import io as io_app from cogent3.app import sample as sample_app from cogent3.app.composable import ( @@ -21,10 +24,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -43,7 +46,7 @@ read_write = reader + writer got = read_write(path) # should skip reading and return path - self.assertEqual(got, outpath) + self.assertTrue(got.endswith(outpath)) read_write.disconnect() # allows us to reuse bits read_write_degen = reader + writer + omit_degens # should return an alignment instance @@ -69,14 +72,14 @@ """composables can only be used in a single composition""" aseqfunc1 = ComposableSeq(input_types="sequences", output_types="sequences") aseqfunc2 = ComposableSeq(input_types="sequences", output_types="sequences") - comb = aseqfunc1 + aseqfunc2 + aseqfunc1 + aseqfunc2 with self.assertRaises(AssertionError): aseqfunc3 = ComposableSeq(input_types="sequences", output_types="sequences") - comb2 = aseqfunc1 + aseqfunc3 + aseqfunc1 + aseqfunc3 # the other order with self.assertRaises(AssertionError): aseqfunc3 = ComposableSeq(input_types="sequences", output_types="sequences") - comb2 = aseqfunc3 + aseqfunc2 + aseqfunc3 + aseqfunc2 def test_composable_to_self(self): """this should raise a ValueError""" @@ -96,7 +99,7 @@ self.assertEqual(aseqfunc3.input, None) self.assertEqual(aseqfunc3.output, None) # should be able to compose a new one now - comb2 = aseqfunc1 + aseqfunc3 + aseqfunc1 + aseqfunc3 def test_apply_to(self): """correctly applies iteratively""" @@ -108,7 +111,7 @@ self.assertEqual(len(got), len(dstore)) # should also be able to apply the results to another composable func min_length = sample_app.min_length(10) - got = min_length.apply_to(got, show_progress=False, logger=True) + got = min_length.apply_to(got, show_progress=False) self.assertEqual(len(got), len(dstore)) # should work on a chained function proc = reader + min_length @@ -144,6 +147,19 @@ self.assertEqual(len(process.data_store.logs), 1) process.data_store.close() + def test_apply_to_non_unique_identifiers(self): + """should fail if non-unique names""" + dstore = [ + "brca1.bats.fasta", + "brca1.apes.fasta", + ] + with TemporaryDirectory(dir=".") as dirname: + reader = io_app.load_aligned(format="fasta", moltype="dna") + min_length = sample_app.min_length(10) + process = reader + min_length + with self.assertRaises(ValueError): + process.apply_to(dstore) + def test_apply_to_logging(self): """correctly creates log file""" dstore = io_app.get_data_store("data", suffix="fasta", limit=3) @@ -153,21 +169,39 @@ outpath = os.path.join(os.getcwd(), dirname, "delme.tinydb") writer = io_app.write_db(outpath) process = reader + min_length + writer - r = process.apply_to(dstore, show_progress=False, logger=False) - self.assertEqual(len(process.data_store.logs), 0) + r = process.apply_to(dstore, show_progress=False) + # always creates a log + self.assertEqual(len(process.data_store.logs), 1) process.data_store.close() + def test_apply_to_logger(self): + """correctly uses user provided logger""" + dstore = io_app.get_data_store("data", suffix="fasta", limit=3) with TemporaryDirectory(dir=".") as dirname: + LOGGER = CachingLogger() reader = io_app.load_aligned(format="fasta", moltype="dna") - # trigger creation of notcompleted min_length = sample_app.min_length(10) outpath = os.path.join(os.getcwd(), dirname, "delme.tinydb") writer = io_app.write_db(outpath) process = reader + min_length + writer - r = process.apply_to(dstore, show_progress=False, logger=True) + r = process.apply_to(dstore, show_progress=False, logger=LOGGER) self.assertEqual(len(process.data_store.logs), 1) process.data_store.close() + def test_apply_to_invalid_logger(self): + """incorrect logger value raises TypeError""" + dstore = io_app.get_data_store("data", suffix="fasta", limit=3) + for logger_val in (True, "somepath.log"): + with TemporaryDirectory(dir=".") as dirname: + reader = io_app.load_aligned(format="fasta", moltype="dna") + min_length = sample_app.min_length(10) + outpath = os.path.join(os.getcwd(), dirname, "delme.tinydb") + writer = io_app.write_db(outpath) + process = reader + min_length + writer + with self.assertRaises(TypeError): + process.apply_to(dstore, show_progress=False, logger=logger_val) + process.data_store.close() + def test_apply_to_not_completed(self): """correctly creates notcompleted""" dstore = io_app.get_data_store("data", suffix="fasta", limit=3) @@ -276,7 +310,6 @@ class TestPicklable(TestCase): def test_composite_pickleable(self): """composable functions should be pickleable""" - from pickle import dumps from cogent3.app import align, evo, io, sample, translate, tree @@ -299,8 +332,6 @@ def test_not_completed_result(self): """should survive roundtripping pickle""" - from pickle import dumps, loads - err = NotCompleted("FAIL", "mytest", "can we roundtrip") p = dumps(err) new = loads(p) @@ -312,7 +343,7 @@ def test_triggers_bugcatcher(self): """a composable that does not trap failures returns NotCompletedResult requesting bug report""" - from cogent3.app import align, evo, io, sample, translate, tree + from cogent3.app import io read = io.load_aligned(moltype="dna") read.func = lambda x: None @@ -325,6 +356,13 @@ return ctx.frame_start == expect +# for testing appify +@appify(SERIALISABLE_TYPE, SERIALISABLE_TYPE) +def slicer(val, index=2): + """my docstring""" + return val[:index] + + class TestUserFunction(TestCase): def foo(self, val, *args, **kwargs): return val[:4] @@ -371,13 +409,7 @@ def test_appify(self): """acts like a decorator should!""" - - @appify(SERIALISABLE_TYPE, SERIALISABLE_TYPE) - def slicer(val, index=2): - """my docstring""" - return val[:index] - - self.assertEqual(slicer.__doc__, "appify: my docstring") + self.assertEqual(slicer.__doc__, "my docstring") self.assertEqual(slicer.__name__, "slicer") app = slicer() self.assertTrue(SERIALISABLE_TYPE in app._input_types) @@ -386,6 +418,13 @@ app2 = slicer(index=3) self.assertEqual(app2(list(range(4))), [0, 1, 2]) + def test_appify_pickle(self): + """appified function should be pickleable""" + app = slicer(index=6) + dumped = dumps(app) + loaded = loads(dumped) + self.assertEqual(loaded(list(range(10))), list(range(6))) + def test_user_function_repr(self): u_function_1 = user_function(self.foo, "aligned", "aligned") u_function_2 = user_function(self.bar, "aligned", "pairwise_distances") diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_app/test_data_store.py python-cogent-2022.5.25a1+dfsg/tests/test_app/test_data_store.py --- python-cogent-2021.10.12a1+dfsg/tests/test_app/test_data_store.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_app/test_data_store.py 2022-05-24 23:42:33.000000000 +0000 @@ -20,16 +20,17 @@ SingleReadDataStore, WritableDirectoryDataStore, WritableTinyDbDataStore, + get_data_source, load_record_from_json, ) from cogent3.parse.fasta import MinimalFastaParser __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -81,7 +82,6 @@ def test_get_member(self): """returns a matching member""" - basedir = self.basedir.split(".")[0] dstore = self.ReadClass(self.basedir, suffix=".fasta") member = dstore.get_member("brca1.fasta") self.assertNotEqual(member, None) @@ -142,7 +142,6 @@ dstore = self.ReadClass(self.basedir, suffix="*") re_dstore = loads(dumps(dstore)) - got = re_dstore[0].read() self.assertEqual(str(dstore), str(re_dstore)) self.assertEqual(dstore[0].read(), re_dstore[0].read()) @@ -448,8 +447,18 @@ with self.assertRaises(FileNotFoundError): self.WriteClass(path, suffix=".json", create=False) - # correctly creates tinydb when full path does not exist - _ = self.WriteClass(path, suffix=".json", create=True) + def test_write_not_completed(self): + """directory data store ignores""" + with TemporaryDirectory(dir=".") as dirname: + # tests the case when the directory has the file with the same suffix to self.suffix + from cogent3.app.composable import NotCompleted + + with TemporaryDirectory(dir=".") as dirname: + path = Path(dirname) / "subdir" + writer = self.WriteClass(path, suffix=".fasta", create=True) + nc = NotCompleted("FAIL", "test", "dummy fail", source="blah.json") + got = writer.write(nc.source, nc) + assert got is nc class ZippedDataStoreReadTests(TestCase, DataStoreBaseReadTests): @@ -523,7 +532,7 @@ path = os.path.join(dirname, self.basedir) dstore = self.WriteClass(path, if_exists="overwrite") identifier = dstore.make_relative_identifier(keys[0]) - got = dstore.write(identifier, self.data[keys[0]]) + dstore.write(identifier, self.data[keys[0]]) path = dstore.add_file(log_path, keep_suffix=True, cleanup=False) self.assertTrue("some.log" in dstore) dstore.close() @@ -636,7 +645,8 @@ path = os.path.join(dirname, self.basedir) dstore = self.WriteClass(path, if_exists="overwrite") id_ = dstore.make_relative_identifier(incomplete[0]) - dstore.write_incomplete(id_, incomplete[1]) + got = dstore.write(id_, incomplete[1]) + self.assertIsInstance(got, DataStoreMember) for k in keys: id_ = dstore.make_relative_identifier(k) dstore.write(id_, self.data[k]) @@ -798,7 +808,9 @@ path = dirname / f"{self.basedir}.tinydb" dstore = self.WriteClass(path, create=True) with self.assertRaises(ValueError): - dstore.write("1", dict(a=24, b="some text")) + got = dstore.write("1", dict(a=24, b="some text")) + # validate return type + self.assertIsInstance(got, DataStoreMember) dstore.write("1.json", dict(a=24, b="some text")) dstore.close() @@ -856,6 +868,55 @@ self.assertEqual(data_, expected) self.assertEqual(compl, True) + def test_get_data_source_str_pathlib(self): + """handles case where input is string object or pathlib object""" + for val_klass in (str, pathlib.Path): + value = val_klass("some/path.txt") + got = get_data_source(value) + self.assertEqual(got, str(value)) + + def test_get_data_source_seqcoll(self): + """handles case where input is sequence collection object""" + from cogent3 import make_unaligned_seqs + + for val_klass in (str, pathlib.Path): + value = val_klass("some/path.txt") + obj = make_unaligned_seqs( + data=dict(seq1="ACGG"), info=dict(source=value, random_key=1234) + ) + got = get_data_source(obj) + self.assertEqual(got, str(value)) + + def test_get_data_source_attr(self): + """handles case where input has source attribute string object or pathlib object""" + + class dummy: + source = None + + for val_klass in (str, pathlib.Path): + obj = dummy() + value = val_klass("some/path.txt") + obj.source = value + got = get_data_source(obj) + self.assertEqual(got, str(value)) + + def test_get_data_source_dict(self): + """handles case where input is dict (sub)class instance with top level source key""" + from cogent3.util.union_dict import UnionDict + + for klass in (dict, UnionDict): + for val_klass in (str, pathlib.Path): + value = val_klass("some/path.txt") + data = klass(source=value) + got = get_data_source(data) + self.assertEqual(got, str(value)) + + def test_get_data_source_none(self): + """handles case where input does not have a source attribute or key""" + for data in (None, dict(), set(), dict(info=dict())): + got = get_data_source(data) + self.assertIsNone(got) + if __name__ == "__main__": main() diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_app/test_dist.py python-cogent-2022.5.25a1+dfsg/tests/test_app/test_dist.py --- python-cogent-2021.10.12a1+dfsg/tests/test_app/test_dist.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_app/test_dist.py 2022-05-24 23:42:33.000000000 +0000 @@ -13,10 +13,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley", "Stephen Ma"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_app/test_evo.py python-cogent-2022.5.25a1+dfsg/tests/test_app/test_evo.py --- python-cogent-2021.10.12a1+dfsg/tests/test_app/test_evo.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_app/test_evo.py 2022-05-24 23:42:33.000000000 +0000 @@ -19,10 +19,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -39,9 +39,9 @@ got = " ".join(str(model).splitlines()) expect = ( "model(type='model', sm='HKY85', tree=None, unique_trees=False, " - "name=None, sm_args=None, lf_args=None, " + "name=None, optimise_motif_probs=False, sm_args=None, lf_args=None, " "time_het='max', param_rules=None, " - "opt_args=None, split_codons=False, " + "opt_args=None, upper=50, split_codons=False, " "show_progress=False, verbose=False)" ) self.assertEqual( @@ -49,6 +49,29 @@ expect, ) + def test_model_opt_mprob_arg(self): + """argument controls optimisability of motif prob settings""" + for mn in ("HKY85", "GN", "CNFGTR"): + for value in (True, False): + # check setting via sm_args is overridden + with self.assertRaises(ValueError): + model = evo_app.model( + mn, + optimise_motif_probs=value, + sm_args=dict(optimise_motif_probs=not value), + ) + model = evo_app.model( + mn, + optimise_motif_probs=value, + ) + self.assertEqual(model._sm._optimise_motif_probs, value) + # check picking a different value for constructor get's overriden + model = evo_app.model( + get_model(mn, optimise_motif_probs=not value), + optimise_motif_probs=value, + ) + self.assertEqual(model._sm._optimise_motif_probs, value) + def test_model_tree(self): """allows tree to be string, None or tree""" treestring = "(a,b,c)" @@ -62,7 +85,7 @@ model1 = evo_app.model("HKY85") model2 = evo_app.model("HKY85", time_het="max") with self.assertRaises(ValueError): - hyp = evo_app.hypothesis(model1, model2) + evo_app.hypothesis(model1, model2) def test_hyp_init(self): """uses user specified init_alt function, or not""" @@ -112,7 +135,7 @@ ) def test_model_collection_init_sequential(self): - """modelc collection uses preceding model to initialise function""" + """model collection uses preceding model to initialise function""" opt_args = dict(max_evaluations=15, limit_action="ignore") model1 = evo_app.model("F81", opt_args=opt_args) model2 = evo_app.model("HKY85", opt_args=opt_args) @@ -139,6 +162,15 @@ self.assertIsInstance(result, model_collection_result) + # now with a single discrete edge + lf_args = dict(discrete_edges=["Opossum"]) + model2 = evo_app.model("HKY85", opt_args=opt_args, lf_args=lf_args) + model3 = evo_app.model("GTR", opt_args=opt_args, lf_args=lf_args) + # defaults to initialise model3 from model 2 from model1 + mod_coll = evo_app.model_collection(model2, model3, sequential=True) + result = mod_coll(aln) + self.assertIsInstance(result, model_collection_result) + def test_model_time_het(self): """support lf time-het argument edge_sets""" _data = { @@ -149,6 +181,7 @@ aln = make_aligned_seqs(data=_data, moltype="dna") mod = evo_app.model( "GN", + optimise_motif_probs=True, time_het=[dict(edges=["Mouse", "Human"], is_independent=False)], opt_args=dict(max_evaluations=25, limit_action="ignore"), ) @@ -214,10 +247,14 @@ aln = load_aligned_seqs("data/primate_brca1.fasta", moltype="dna") aln = aln.take_seqs(["Human", "Rhesus", "Galago"])[2::3].omit_gap_pos() model1 = evo_app.model( - "F81", opt_args=dict(max_evaluations=25, limit_action="ignore") + "F81", + optimise_motif_probs=False, + opt_args=dict(max_evaluations=25, limit_action="ignore"), ) model2 = evo_app.model( - "HKY85", opt_args=dict(max_evaluations=100, limit_action="ignore") + "HKY85", + optimise_motif_probs=False, + opt_args=dict(max_evaluations=100, limit_action="ignore"), ) hyp = evo_app.hypothesis(model1, model2) result = hyp(aln) @@ -233,8 +270,8 @@ expect = ( "hypothesis(type='hypothesis', null='HKY85', " "alternates=(model(type='model', sm='HKY85', tree=None, unique_trees=False, " - "name='hky85-max-het', sm_args=None, lf_args=None, " - "time_het='max', param_rules=None, opt_args=None," + "name='hky85-max-het', optimise_motif_probs=False, sm_args=None, lf_args=None, " + "time_het='max', param_rules=None, opt_args=None, upper=50," " split_codons=False, show_progress=False, verbose=False),)," " sequential=True, init_alt=None)" ) @@ -465,8 +502,12 @@ } aln = make_aligned_seqs(data=_data, moltype="dna") opt_args = dict(max_evaluations=10, limit_action="ignore") - m1 = evo_app.model("F81", split_codons=True, opt_args=opt_args) - m2 = evo_app.model("GTR", split_codons=True, opt_args=opt_args) + m1 = evo_app.model( + "F81", optimise_motif_probs=False, split_codons=True, opt_args=opt_args + ) + m2 = evo_app.model( + "GTR", optimise_motif_probs=False, split_codons=True, opt_args=opt_args + ) hyp = evo_app.hypothesis(m1, m2) r = hyp(aln) bm = r.select_models() @@ -784,11 +825,25 @@ strapper = evo_app.bootstrap(hyp, num_reps=2, parallel=False) result = strapper(aln) nd = result.null_dist - self.assertTrue(set(type(v) for v in nd), {float}) + self.assertTrue({type(v) for v in nd}, {float}) json = result.to_json() got = deserialise_object(json) self.assertIsInstance(got, evo_app.bootstrap_result) + def test_bstrap_fail(self): + """invalid data returns meaningful error""" + aln = load_aligned_seqs(join(data_dir, "brca1.fasta"), moltype="dna") + aln = aln.take_seqs(aln.names[:3]) + opt_args = dict(max_evaluations=20, limit_action="ignore") + m1 = evo_app.model("F81", opt_args=opt_args) + # we've retained gaps, so this should fail at first call as incompatible with model + m2 = evo_app.model("GTR", opt_args=opt_args, sm_args=dict(recode_gaps=False)) + hyp = evo_app.hypothesis(m1, m2) + strapper = evo_app.bootstrap(hyp, num_reps=2, parallel=False) + result = strapper(aln) + # correct message being relayed + self.assertTrue("ValueError: '-' at" in result.message) + def test_bstrap_parallel(self): """exercising bootstrap with parallel""" aln = load_aligned_seqs(join(data_dir, "brca1.fasta"), moltype="dna") diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_app/test_init.py python-cogent-2022.5.25a1+dfsg/tests/test_app/test_init.py --- python-cogent-2021.10.12a1+dfsg/tests/test_app/test_init.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_app/test_init.py 2022-05-24 23:42:33.000000000 +0000 @@ -9,10 +9,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -83,7 +83,7 @@ composable_application_tuple[0].disconnect() composable_application_tuple[1].disconnect() # Compose two composable applications, there should not be exceptions. - res = composable_application_tuple[0] + composable_application_tuple[1] + composable_application_tuple[0] + composable_application_tuple[1] for app in applications: if hasattr(app, "data_store"): diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_app/test_io.py python-cogent-2022.5.25a1+dfsg/tests/test_app/test_io.py --- python-cogent-2021.10.12a1+dfsg/tests/test_app/test_io.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_app/test_io.py 2022-05-24 23:42:33.000000000 +0000 @@ -16,6 +16,7 @@ from cogent3.app import align as align_app from cogent3.app import io as io_app from cogent3.app.composable import NotCompleted +from cogent3.app.data_store import DataStoreMember from cogent3.app.io import write_db from cogent3.app.result import generic_result from cogent3.core.alignment import ArrayAlignment, SequenceCollection @@ -23,14 +24,13 @@ from cogent3.evolve.fast_distance import DistanceMatrix from cogent3.maths.util import safe_log from cogent3.util.table import Table -from cogent3.util.union_dict import UnionDict __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -174,6 +174,7 @@ with TemporaryDirectory(dir=".") as dirname: writer = io_app.write_seqs(dirname, if_exists="ignore") wrote = list(map(writer, alns)) + self.assertIsInstance(wrote[0], DataStoreMember) written = list(io_app.findall(dirname, suffix="fasta")) for i, wrote in enumerate(written): self.assertEqual(alns[i].info.stored, join(dirname, wrote)) @@ -216,6 +217,7 @@ writer = write_db(outpath, create=True, if_exists="ignore") gr = _get_generic_result(join("blah", "delme.json")) got = writer(gr) + self.assertIsInstance(got, DataStoreMember) writer.data_store.db.close() dstore = io_app.get_data_store(f"{outpath}.tinydb", suffix="json") reader = io_app.load_db() @@ -239,16 +241,6 @@ dstore.close() self.assertEqual(got, data) - def test_write_db_invalid(self): - """value error if identifier does not match data.info.source""" - with TemporaryDirectory(dir=".") as dirname: - outpath = join(dirname, "delme") - writer = write_db(outpath, create=True, if_exists="ignore") - data = UnionDict(a=[1, 2], b="string", source="delme2.json") - got = writer(data, identifier=join("blah", "delme.json")) - self.assertTrue("ValueError" in got.message) - writer.data_store.db.close() - def test_load_db_failure_json_file(self): """informative load_db error message when given a json file path""" # todo this test has a trapped exception about being unable to delete @@ -299,7 +291,8 @@ with TemporaryDirectory(dir=".") as dirname: writer = io_app.write_tabular(data_path=dirname, format="tsv") outpath = join(dirname, "delme.tsv") - writer.write(mca, identifier=outpath) + got = writer.write(mca, identifier=outpath) + self.assertIsInstance(got, DataStoreMember) new = loader(outpath) # when written to file in tabular form # the loaded table will have dim-1 dim-2 as column labels @@ -478,7 +471,8 @@ obj = generic_result(source=join("blah", "delme.json")) obj["dna"] = DNA writer = io_app.write_json(outdir, create=True) - _ = writer(obj) + got = writer(obj) + self.assertIsInstance(got, DataStoreMember) reader = io_app.load_json() got = reader(join(outdir, "delme.json")) got.deserialised_values() @@ -522,7 +516,7 @@ writer = write_db("delme.tinydb", create=True, if_exists="overwrite") process = reader + aligner + writer - r = process.apply_to(members, logger=False, show_progress=False, parallel=True) + r = process.apply_to(members, show_progress=False, parallel=True) expect = [str(m) for m in process.data_store] process.data_store.close() diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_app/test_result.py python-cogent-2022.5.25a1+dfsg/tests/test_app/test_result.py --- python-cogent-2021.10.12a1+dfsg/tests/test_app/test_result.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_app/test_result.py 2022-05-24 23:42:33.000000000 +0000 @@ -1,7 +1,10 @@ +import pathlib + from unittest import TestCase, main from cogent3 import make_aligned_seqs, make_table from cogent3.app import evo as evo_app +from cogent3.app.data_store import DataStoreMember from cogent3.app.result import ( generic_result, hypothesis_result, @@ -10,19 +13,18 @@ tabular_result, ) from cogent3.util.deserialise import deserialise_object +from cogent3.util.dict_array import DictArray __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" -from cogent3.util.dict_array import DictArray - class TestGenericResult(TestCase): def test_deserialised_values(self): @@ -55,8 +57,8 @@ data = {"type": "cogent3.core.moltype.MolType", "moltype": "dna"} result = generic_result(source="blah.json") result["key"] = data - r = repr(result) - s = str(result) + repr(result) + str(result) def test_keys(self): """it works""" @@ -72,6 +74,30 @@ with self.assertRaises(TypeError): gr["null"] = {0, 23} + def test_infers_source(self): + """flexible handling of data source""" + # works for string + source = "path/blah.fasta" + aln = make_aligned_seqs( + {"A": "ACGT"}, info=dict(source=source, random_key=1234) + ) + gr = generic_result(aln) + self.assertEqual(gr.source, "path/blah.fasta") + + # or Path + aln.info.source = pathlib.Path(source) + gr = generic_result(aln) + self.assertEqual(str(gr.source), str(pathlib.Path("path/blah.fasta"))) + + # or DataStoreMember + aln.info.source = DataStoreMember(source) + gr = generic_result(aln) + self.assertEqual(str(gr.source), "path/blah.fasta") + + aln.info = {} + with self.assertRaises(ValueError): + generic_result(aln) + class TestModelResult(TestCase): def test_repr(self): @@ -161,7 +187,7 @@ opt_args=dict(max_evaluations=55, limit_action="ignore"), ) result = mod(aln) - s = repr(result) + repr(result) def test_model_result_tree_split_pos_model(self): """returns tree from lf with split codon positions""" @@ -246,7 +272,7 @@ def test_model_result_invalid_setitem(self): """model_result raise TypeError if trying to set incorrect item type""" - mr = model_result() + mr = model_result(source="blah") with self.assertRaises(TypeError): mr["null"] = 23 @@ -283,7 +309,7 @@ def test_get_best_model(self): """should correctly identify the best model""" - coll = model_collection_result(None) + coll = model_collection_result(source="blah") coll.update(self._model_results) got = coll.get_best_model() # we ensure a model_result instance is returned from the possible set @@ -292,7 +318,7 @@ def test_select_model(self): """correctly select models""" # we ensure a series of model_result instances is returned - coll = model_collection_result(None) + coll = model_collection_result(source="blah") coll.update(self._model_results) got = coll.select_models() self.assertTrue(len(got) > 0) @@ -302,8 +328,8 @@ def test_model_collection_result_repr(self): """constructed result can do the different repr""" - result = model_collection_result(None) - coll = model_collection_result(None) + result = model_collection_result(source="blah") + coll = model_collection_result(source="blah") coll.update(self._model_results) got = result.__repr__() self.assertIsInstance(got, str) @@ -342,7 +368,7 @@ def test_model_collection_result_invalid_setitem(self): """model_collection_result raise TypeError if trying to set incorrect item type""" - mcr = model_collection_result() + mcr = model_collection_result(source="blah") with self.assertRaises(TypeError): mcr["null"] = 23 @@ -373,7 +399,7 @@ def test_invalid_setitem(self): """hypothesis_result raise TypeError if trying to set incorrect item type""" - hr = hypothesis_result("null") + hr = hypothesis_result(name_of_null="null", source="blah") with self.assertRaises(TypeError): hr["null"] = {0, 23} diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_app/test_sample.py python-cogent-2022.5.25a1+dfsg/tests/test_app/test_sample.py --- python-cogent-2021.10.12a1+dfsg/tests/test_app/test_sample.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_app/test_sample.py 2022-05-24 23:42:33.000000000 +0000 @@ -7,10 +7,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_app/test_translate.py python-cogent-2022.5.25a1+dfsg/tests/test_app/test_translate.py --- python-cogent-2021.10.12a1+dfsg/tests/test_app/test_translate.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_app/test_translate.py 2022-05-24 23:42:33.000000000 +0000 @@ -13,10 +13,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_app/test_tree.py python-cogent-2022.5.25a1+dfsg/tests/test_app/test_tree.py --- python-cogent-2021.10.12a1+dfsg/tests/test_app/test_tree.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_app/test_tree.py 2022-05-24 23:42:33.000000000 +0000 @@ -1,29 +1,20 @@ -import json import os -from tempfile import TemporaryDirectory from unittest import TestCase, main -from cogent3 import ( - DNA, - load_aligned_seqs, - load_tree, - make_aligned_seqs, - make_tree, -) +from cogent3 import DNA, load_aligned_seqs, make_aligned_seqs, make_tree from cogent3.app import dist from cogent3.app import tree as tree_app from cogent3.app.composable import NotCompleted from cogent3.core.tree import PhyloNode from cogent3.evolve.fast_distance import DistanceMatrix -from cogent3.util.misc import get_object_provenance, open_ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_cluster/__init__.py python-cogent-2022.5.25a1+dfsg/tests/test_cluster/__init__.py --- python-cogent-2021.10.12a1+dfsg/tests/test_cluster/__init__.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_cluster/__init__.py 2022-05-24 23:42:33.000000000 +0000 @@ -2,10 +2,10 @@ __all__ = ["test_UPGMA"] __author__ = "" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Catherine Lozuopone", "Peter Maxwell", "Rob Knight", "Justin Kuczynski"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_cluster/test_UPGMA.py python-cogent-2022.5.25a1+dfsg/tests/test_cluster/test_UPGMA.py --- python-cogent-2021.10.12a1+dfsg/tests/test_cluster/test_UPGMA.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_cluster/test_UPGMA.py 2022-05-24 23:42:33.000000000 +0000 @@ -1,5 +1,4 @@ #!/usr/bin/env python -from collections import defaultdict from unittest import TestCase, main import numpy @@ -17,17 +16,17 @@ upgma, ) from cogent3.core.tree import PhyloNode -from cogent3.util.dict_array import DictArray, DictArrayTemplate, convert2DDict +from cogent3.util.dict_array import DictArray Float = numpy.core.numerictypes.sctype2char(float) __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_core/__init__.py python-cogent-2022.5.25a1+dfsg/tests/test_core/__init__.py --- python-cogent-2021.10.12a1+dfsg/tests/test_core/__init__.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_core/__init__.py 2022-05-24 23:42:33.000000000 +0000 @@ -17,7 +17,7 @@ ] __author__ = "" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Catherine Lozupone", "Peter Maxwell", @@ -29,7 +29,7 @@ "Justin Kuczynski", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_core/test_alignment.py python-cogent-2022.5.25a1+dfsg/tests/test_core/test_alignment.py --- python-cogent-2021.10.12a1+dfsg/tests/test_core/test_alignment.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_core/test_alignment.py 2022-05-24 23:42:33.000000000 +0000 @@ -3,26 +3,17 @@ import os import pathlib import re -import sys -import unittest -import warnings from os import remove from tempfile import TemporaryDirectory, mktemp from unittest import TestCase, main import numpy -import pytest -from numpy import arange, array, log2, nan, transpose +from numpy import array, log2, nan, transpose from numpy.testing import assert_allclose, assert_equal -from cogent3 import ( - load_aligned_seqs, - load_unaligned_seqs, - make_aligned_seqs, - make_seq, -) +from cogent3 import load_aligned_seqs, load_unaligned_seqs, make_seq, open_ from cogent3.core.alignment import ( Aligned, Alignment, @@ -37,33 +28,26 @@ aln_from_empty, aln_from_fasta, aln_from_generic, - coerce_to_string, make_gap_filter, seqs_from_aln, seqs_from_array, seqs_from_array_seqs, - seqs_from_dict, seqs_from_empty, seqs_from_fasta, seqs_from_generic, seqs_from_kv_pairs, ) from cogent3.core.alphabet import AlphabetError -from cogent3.core.annotation import Feature, _Annotatable +from cogent3.core.annotation import Feature from cogent3.core.moltype import AB, ASCII, BYTES, DNA, PROTEIN, RNA -from cogent3.core.sequence import ( - ArraySequence, - RnaSequence, - Sequence, - frac_same, -) +from cogent3.core.sequence import ArraySequence, RnaSequence, Sequence from cogent3.maths.util import safe_p_log_p from cogent3.parse.fasta import MinimalFastaParser -from cogent3.util.misc import get_object_provenance, open_ +from cogent3.util.misc import get_object_provenance __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Jeremy Widmann", "Catherine Lozuopone", @@ -73,7 +57,7 @@ "Jan Kosinski", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -422,7 +406,7 @@ s2_ORIG = ">x\nCA\n>b\nAA\n>>xx\nGG" s2 = ">aa\nAC\n>bb\nAA\n>c\nGG\n" d = ArrayAlignment(MinimalFastaParser(s2.splitlines())) - da = d.to_fasta() + d.to_fasta() self.assertEqual(d.to_fasta(), aln.to_fasta()) def test_aln_from_fasta(self): @@ -642,8 +626,8 @@ def test_get_similar(self): """SequenceCollection get_similar should get all sequences close to target seq""" aln = self.many - x = RnaSequence("GGGGGGGGGG") - y = RnaSequence("----------") + RnaSequence("GGGGGGGGGG") + RnaSequence("----------") # test min and max similarity ranges result = aln.get_similar( aln.named_seqs["a"], min_similarity=0.4, max_similarity=0.7 @@ -1062,7 +1046,7 @@ # check for a failure when no moltype specified alignment = self.Class(data=seqs) try: - peps = alignment.get_translation() + alignment.get_translation() except AttributeError: pass @@ -1220,7 +1204,7 @@ # be gone too raw_seq = "---??-??TC-GGCG-GCA-G-GC-?-C-TAN-GCGC-CCTC-AGGA?-???-??--" raw_ungapped = re.sub("[-?]", "", raw_seq) - raw_no_ambigs = re.sub("[N?]+", "", raw_seq) + re.sub("[N?]+", "", raw_seq) dna = DNA.make_seq(raw_seq) aln = self.Class(data=[("a", dna), ("b", dna)]) @@ -1335,7 +1319,7 @@ def test_apply_pssm(self): """should successfully produce pssm scores""" - from cogent3.parse import cisbp, jaspar + from cogent3.parse import jaspar _, pwm = jaspar.read("data/sample.jaspar") data = { @@ -1526,7 +1510,7 @@ # no longer applicable in new implementation with self.assertRaises(ValueError): - r = align_rag.to_phylip() + align_rag.to_phylip() def test_pad_seqs_ragged(self): """SequenceCollection pad_seqs should work on ragged alignment.""" @@ -2635,11 +2619,11 @@ logo = aln.seqlogo(wrap=20) # should work for protein too aa = aln.get_translation() - logo = aa.seqlogo() + aa.seqlogo() # without a defined moltype aln = self.Class(data) - logo = aln.seqlogo() + aln.seqlogo() class ArrayAlignmentTests(AlignmentBaseTests, TestCase): diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_core/test_alphabet.py python-cogent-2022.5.25a1+dfsg/tests/test_core/test_alphabet.py --- python-cogent-2021.10.12a1+dfsg/tests/test_core/test_alphabet.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_core/test_alphabet.py 2022-05-24 23:42:33.000000000 +0000 @@ -29,10 +29,10 @@ AminoAcids = CharAlphabet("ACDEFGHIKLMNPQRSTVWY") __author__ = "Rob Knight, Peter Maxwell and Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Rob Knight", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_core/test_annotation.py python-cogent-2022.5.25a1+dfsg/tests/test_core/test_annotation.py --- python-cogent-2021.10.12a1+dfsg/tests/test_core/test_annotation.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_core/test_annotation.py 2022-05-24 23:42:33.000000000 +0000 @@ -3,16 +3,16 @@ import unittest from cogent3 import DNA, make_aligned_seqs -from cogent3.core.annotation import Feature, Variable, _Feature +from cogent3.core.annotation import Feature, _Feature from cogent3.core.location import Map, Span, as_map from cogent3.core.sequence import DnaSequence, RnaSequence __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -225,7 +225,6 @@ manipulation.""" def test_span(self): - length = 100 forward = Span(20, 30) reverse = Span(70, 80, reverse=True) assert forward.reversed_relative_to(100) == reverse diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_core/test_core_standalone.py python-cogent-2022.5.25a1+dfsg/tests/test_core/test_core_standalone.py --- python-cogent-2021.10.12a1+dfsg/tests/test_core/test_core_standalone.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_core/test_core_standalone.py 2022-05-24 23:42:33.000000000 +0000 @@ -2,17 +2,18 @@ import json import os import pathlib -import re import tempfile import unittest from tempfile import TemporaryDirectory -from cogent3 import DNA, PROTEIN, RNA -from cogent3 import STANDARD_CODON as CODON from cogent3 import ( + DNA, + PROTEIN, + RNA, get_format_suffixes, load_aligned_seqs, + load_seq, load_unaligned_seqs, make_aligned_seqs, make_seq, @@ -29,10 +30,10 @@ __author__ = "Peter Maxwell, Gavin Huttley and Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -68,8 +69,9 @@ _ = make_unaligned_seqs(data, info=2) # source works - got = make_unaligned_seqs(data, source="somewhere") - self.assertEqual(got.info["source"], "somewhere") + for src in ("somewhere", pathlib.Path("somewhere")): + got = make_unaligned_seqs(data, source=src) + self.assertEqual(got.info["source"], str(src)) def test_make_aligned_seqs(self): """test Alignment/ArrayAlignment constructor utility function""" @@ -88,8 +90,9 @@ _ = make_unaligned_seqs(data, info=2) # source works - got = make_aligned_seqs(data, source="somewhere") - self.assertEqual(got.info["source"], "somewhere") + for src in ("somewhere", pathlib.Path("somewhere")): + got = make_aligned_seqs(data, source=src) + self.assertEqual(got.info["source"], str(src)) # array_align works got = make_aligned_seqs(data, array_align=False) @@ -97,6 +100,32 @@ self.assertEqual(got.to_dict(), data) self.assertEqual(got.info["source"], "unknown") + def test_load_seq(self): + """load single sequence""" + from cogent3 import Sequence + + paths = ( + "c_elegans_WS199_dna_shortened.fasta", + "annotated_seq.gb", + "brca1_5.250.paml", + ) + seq_names = ("I", "AE017341", "NineBande") + data_dir = pathlib.Path(data_path) + for i, path in enumerate(paths): + got = load_seq(data_dir / path) + assert isinstance(got, Sequence) + assert got.info.source == str(data_dir / path) + assert got.name == seq_names[i] + + # try json + seq = got + with TemporaryDirectory(dir=".") as dirname: + outpath = pathlib.Path(dirname) / "seq.json" + outpath.write_text(seq.to_json()) + got = load_seq(outpath) + assert str(got) == str(seq) + assert got.name == seq.name + def test_load_unaligned_seqs(self): """test loading unaligned from file""" path = os.path.join(data_path, "brca1_5.paml") @@ -108,7 +137,7 @@ def test_load_unaligned_seqs_no_format(self): """test loading unaligned from file""" with self.assertRaises(ValueError): - got = load_unaligned_seqs("somepath") + load_unaligned_seqs("somepath") def test_load_aligned_seqs(self): """test loading aligned from file""" @@ -126,7 +155,7 @@ def test_load_aligned_seqs_no_format(self): """test loading unaligned from file""" with self.assertRaises(ValueError): - got = load_aligned_seqs("somepath") + load_aligned_seqs("somepath") def test_load_unaligned_seqs_from_json(self): """test loading an unaligned object from json file""" @@ -290,7 +319,7 @@ # in Py3 for reasons that are not clear. This needs to be looked # more closely dmp = pickle.dumps(aln, protocol=1) - aln2 = pickle.loads(dmp) + pickle.loads(dmp) def test_empty_seq(self): """test creation of an alignment from scratch, with one sequence pure gap""" @@ -316,7 +345,6 @@ def test_get_sub_alignment(self): """test slicing otus, and return of new alignment""" - fullset = ["DogFaced", "Human", "HowlerMon", "Mouse", "NineBande"] subset = ["DogFaced", "Human", "HowlerMon", "Mouse"] subset.sort() sub_align = self.alignment.take_seqs(subset) @@ -616,7 +644,7 @@ data={"seq1": "ABCDEFGHIJKLMNOP", "seq2": "ABCDEFGHIJKLMNOP"} ) # effectively permute columns, preserving length - shuffled = alignment.sample() + alignment.sample() # ensure length correct sample = alignment.sample(10) self.assertEqual(len(sample), 10) @@ -661,7 +689,7 @@ # check for a failure when no moltype specified alignment = make_aligned_seqs(data=seqs) try: - peps = alignment.get_translation() + alignment.get_translation() except AttributeError: pass diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_core/test_features.py python-cogent-2022.5.25a1+dfsg/tests/test_core/test_features.py --- python-cogent-2021.10.12a1+dfsg/tests/test_core/test_features.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_core/test_features.py 2022-05-24 23:42:33.000000000 +0000 @@ -7,10 +7,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -255,7 +255,7 @@ data=[["x", "-AAAA"], ["y", "TTTTT"]], array_align=False ) seq = DNA.make_seq("CCCCCCCCCCCCCCCCCCCC", "x") - exon = seq.add_feature("exon", "A", [(5, 8)]) + seq.add_feature("exon", "A", [(5, 8)]) aln.get_seq("x").copy_annotations(seq) copied = list(aln.get_annotations_from_seq("x", "exon")) self.assertEqual(str(copied), '[exon "A" at [5:5, -4-]/5]') @@ -271,7 +271,7 @@ data=[["x", "-AAAAAAAAA"], ["y", "TTTT--TTTT"]], array_align=False ) seq = DNA.make_seq("CCCCCCCCCCCCCCCCCCCC", "x") - match_exon = seq.add_feature("exon", "A", [(5, 8)]) + seq.add_feature("exon", "A", [(5, 8)]) aln.get_seq("y").copy_annotations(seq) copied = list(aln.get_annotations_from_seq("y", "exon")) self.assertEqual(str(copied), '[exon "A" at [7:10]/10]') @@ -285,7 +285,7 @@ data=[["x", "-AAAAAAAAA"], ["y", "TTTT--TTTT"]], array_align=False ) diff_len_seq = DNA.make_seq("CCCCCCCCCCCCCCCCCCCCCCCCCCCC", "x") - nonmatch = diff_len_seq.add_feature("repeat", "A", [(12, 14)]) + diff_len_seq.add_feature("repeat", "A", [(12, 14)]) aln.get_seq("y").copy_annotations(diff_len_seq) copied = list(aln.get_annotations_from_seq("y", "repeat")) self.assertEqual(str(copied), '[repeat "A" at [10:10, -6-]/10]') @@ -298,14 +298,14 @@ aln = make_aligned_seqs( data=[["x", "-AAAAAAAAA"], ["y", "------TTTT"]], array_align=False ) - exon = aln.get_seq("x").add_feature("exon", "fred", [(3, 8)]) + aln.get_seq("x").add_feature("exon", "fred", [(3, 8)]) aln_exons = list(aln.get_annotations_from_seq("x", "exon")) self.assertEqual(str(aln_exons), '[exon "fred" at [4:9]/10]') self.assertEqual(str(aln_exons[0].get_slice()), ">x\nAAAAA\n>y\n--TTT\n") aln = make_aligned_seqs( data=[["x", "-AAAAAAAAA"], ["y", "TTTT--T---"]], array_align=False ) - exon = aln.get_seq("x").add_feature("exon", "fred", [(3, 8)]) + aln.get_seq("x").add_feature("exon", "fred", [(3, 8)]) aln_exons = list(aln.get_annotations_from_seq("x", "exon")) self.assertEqual(str(aln_exons[0].get_slice()), ">x\nAAAAA\n>y\n--T--\n") @@ -581,7 +581,6 @@ def test_roundtrip_json(self): """features can roundtrip from json""" - from cogent3.util.deserialise import deserialise_seq seq = DNA.make_seq("AAAAATATTATTGGGT") seq.add_annotation(Feature, "exon", "myname", [(0, 5)]) @@ -652,12 +651,12 @@ aln = make_aligned_seqs( data=[["x", "-AAAGGGGGAACCCT"], ["y", "TTTT--TTTTAGGGA"]], array_align=False ) - of1 = aln.get_seq("x").add_annotation(Feature, "exon", "E1", [(3, 8)]) - of2 = aln.get_seq("x").add_annotation(Feature, "exon", "E2", [(10, 13)]) + aln.get_seq("x").add_annotation(Feature, "exon", "E1", [(3, 8)]) + aln.get_seq("x").add_annotation(Feature, "exon", "E2", [(10, 13)]) # at the alignment level sub_aln = aln[:-3] s = sub_aln.named_seqs["x"] - e2 = s.data.get_annotations_matching("exon", "E2")[0] + s.data.get_annotations_matching("exon", "E2")[0] d = s.data[:11] json = s.to_json() new = deserialise_object(json) @@ -679,8 +678,8 @@ array_align=False, moltype="dna", ) - of1 = aln.get_seq("x").add_annotation(Feature, "exon", "E1", [(3, 8)]) - of2 = aln.get_seq("x").add_annotation(Feature, "exon", "E2", [(10, 13)]) + aln.get_seq("x").add_annotation(Feature, "exon", "E1", [(3, 8)]) + aln.get_seq("x").add_annotation(Feature, "exon", "E2", [(10, 13)]) raln = aln.rc() json = raln.to_json() diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_core/test_genetic_code.py python-cogent-2022.5.25a1+dfsg/tests/test_core/test_genetic_code.py --- python-cogent-2021.10.12a1+dfsg/tests/test_core/test_genetic_code.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_core/test_genetic_code.py 2022-05-24 23:42:33.000000000 +0000 @@ -16,10 +16,10 @@ __author__ = "Greg Caporaso" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Greg Caporaso", "Rob Knight", "Peter Maxwell", "Thomas La"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Greg Caporaso" __email__ = "caporaso@colorado.edu" __status__ = "Production" @@ -176,8 +176,7 @@ obs_ile = sgc["I"] self.assertEqual(obs_ile, exp_ile) - exp_arg = ["AGA", "AGG", "CGT", "CGC", "CGA", "CGG"] - obs_arg = sgc["R"] + sgc["R"] self.assertEqual(obs_ile, exp_ile) exp_leu = ["TTA", "TTG", "CTT", "CTC", "CTA", "CTG"] diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_core/test_info.py python-cogent-2022.5.25a1+dfsg/tests/test_core/test_info.py --- python-cogent-2021.10.12a1+dfsg/tests/test_core/test_info.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_core/test_info.py 2022-05-24 23:42:33.000000000 +0000 @@ -9,10 +9,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -152,7 +152,7 @@ with warnings.catch_warnings(record=True) as w: d1 = Info({"key1": "value1", "key2": "value2", "key3": "value3"}) d2 = Info({"key2": "value2", "key3": "value3", "key4": "value4"}) - d3 = d1.update(d2) + d1.update(d2) self.assertEqual(len(w), 1) diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_core/test_location.py python-cogent-2022.5.25a1+dfsg/tests/test_core/test_location.py --- python-cogent-2021.10.12a1+dfsg/tests/test_core/test_location.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_core/test_location.py 2022-05-24 23:42:33.000000000 +0000 @@ -15,10 +15,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -152,8 +152,6 @@ self.overlapping, ) - n = Span(30, 36) - expected_order = [s, e] first = expected_order[:] first.sort() diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_core/test_maps.py python-cogent-2022.5.25a1+dfsg/tests/test_core/test_maps.py --- python-cogent-2021.10.12a1+dfsg/tests/test_core/test_maps.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_core/test_maps.py 2022-05-24 23:42:33.000000000 +0000 @@ -3,15 +3,15 @@ import unittest from cogent3 import DNA, make_aligned_seqs -from cogent3.core.annotation import Feature, _Annotatable, _Feature +from cogent3.core.annotation import Feature, _Feature from cogent3.core.location import Map, Span __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Rob Knight", "Matthew Wakefield"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -61,8 +61,8 @@ def test_maps_on_maps(self): seq = DNA.make_seq("ATCGATCGAT" * 5, name="base") feat1 = annotate(seq, 10, 20, "fake") - feat2 = annotate(feat1, 3, 5, "fake2") - feat3 = annotate(seq, 1, 3, "left") + annotate(feat1, 3, 5, "fake2") + annotate(seq, 1, 3, "left") seq2 = seq[5:] self.assertEqual( diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_core/test_moltype.py python-cogent-2022.5.25a1+dfsg/tests/test_core/test_moltype.py --- python-cogent-2021.10.12a1+dfsg/tests/test_core/test_moltype.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_core/test_moltype.py 2022-05-24 23:42:33.000000000 +0000 @@ -31,10 +31,10 @@ __author__ = "Gavin Huttley, Peter Maxwell, and Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight", "Gavin Huttley", "Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_core/test_profile.py python-cogent-2022.5.25a1+dfsg/tests/test_core/test_profile.py --- python-cogent-2021.10.12a1+dfsg/tests/test_core/test_profile.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_core/test_profile.py 2022-05-24 23:42:33.000000000 +0000 @@ -11,10 +11,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Sandra Smit", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -28,7 +28,7 @@ states = "ACGT" rows = [CategoryCounter([b] * 20) for b in "ACGT"] rows = [r.tolist(states) for r in rows] - pwm = MotifCountsArray(rows, states) + MotifCountsArray(rows, states) data = [[2, 4], [3, 5], [4, 8]] got = MotifCountsArray(array(data), "AB") @@ -42,22 +42,22 @@ # can't use a string data = [["A", "A"], ["A", "A"], ["A", "A"]] with self.assertRaises(ValueError): - got = MotifCountsArray(data, "AB") + MotifCountsArray(data, "AB") # or a float data = [[1.1, 2.1], [0.0, 2.1], [3.0, 4.5]] with self.assertRaises(ValueError): - got = MotifCountsArray(data, "AB") + MotifCountsArray(data, "AB") # or be empty with self.assertRaises(ValueError): - got = MotifCountsArray([], "AB") + MotifCountsArray([], "AB") with self.assertRaises(ValueError): - got = MotifCountsArray([[], []], "AB") + MotifCountsArray([[], []], "AB") data = [[2, 4], [3, 5], [4, 8]] with self.assertRaises(ValueError): - pssm = PSSM(data, "ACGT") + PSSM(data, "ACGT") def test_str_repr(self): """exercise str and repr""" @@ -168,7 +168,6 @@ ] ) marr = MotifCountsArray(array(data), "ACGT") - adj = data + 1 got = marr.to_pssm(pseudocount=1) freqs = marr._to_freqs(pseudocount=1) expect = log2(freqs / 0.25) @@ -222,30 +221,30 @@ def test_construct_succeeds(self): """construct from float array or list""" data = [[2 / 6, 4 / 6], [3 / 8, 5 / 8], [4 / 12, 8 / 12]] - got = MotifFreqsArray(array(data), "AB") + MotifFreqsArray(array(data), "AB") data = [[2 / 6, 4 / 6], [3 / 8, 5 / 8], [4 / 12, 8 / 12]] - got = MotifFreqsArray(data, "AB") + MotifFreqsArray(data, "AB") def test_construct_fails(self): """valid freqs only""" # no negatives data = [[-2 / 6, 4 / 6], [3 / 8, 5 / 8], [4 / 12, 8 / 12]] with self.assertRaises(ValueError): - got = MotifFreqsArray(data, "AB") + MotifFreqsArray(data, "AB") # must sum to 1 on axis=1 data = [[2 / 5, 4 / 6], [3 / 8, 5 / 8], [4 / 12, 8 / 12]] with self.assertRaises(ValueError): - got = MotifFreqsArray(data, "AB") + MotifFreqsArray(data, "AB") data = [["A", "A"], ["A", "A"], ["A", "A"]] with self.assertRaises(ValueError): - got = MotifFreqsArray(data, "AB") + MotifFreqsArray(data, "AB") # int's not allowed data = [[2, 4], [3, 5], [4, 8]] with self.assertRaises(ValueError): - got = MotifFreqsArray(data, "AB") + MotifFreqsArray(data, "AB") def test_entropy_terms(self): """Checks entropy_terms works correctly""" @@ -473,7 +472,7 @@ [0.0, 0.0, 0.0, 0.0], ] with self.assertRaises(ValueError): - pssm = PSSM(data_all_zero, "ACTG") + PSSM(data_all_zero, "ACTG") # fails for numpy.nan data_nan = [ @@ -483,7 +482,7 @@ [-1.263, -0.737, -2.322, -0.322], ] with self.assertRaises(ValueError): - pssm = PSSM(data_nan, "ACTG") + PSSM(data_nan, "ACTG") # fails for entries all negative numbers data = [ @@ -493,7 +492,7 @@ [-1.263, -0.737, -2.322, -0.322], ] with self.assertRaises(ValueError): - pssm = PSSM(data, "ACTG") + PSSM(data, "ACTG") def test_score_indices(self): """produce correct score from indexed seq""" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_core/test_seq_aln_integration.py python-cogent-2022.5.25a1+dfsg/tests/test_core/test_seq_aln_integration.py --- python-cogent-2021.10.12a1+dfsg/tests/test_core/test_seq_aln_integration.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_core/test_seq_aln_integration.py 2022-05-24 23:42:33.000000000 +0000 @@ -11,10 +11,10 @@ __author__ = "Sandra Smit" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Sandra Smit", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Sandra Smit" __email__ = "sandra.smit@colorado.edu" __status__ = "Production" @@ -95,7 +95,7 @@ def test_seqFreqs(self): """seqFreqs should work the same on Alignment and ArrayAlignment""" - get_index = RNA.alphabets.degen_gapped.index + RNA.alphabets.degen_gapped.index # 'UCAGGG' # 'YCU-RG' # 'CAA-NR' diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_core/test_sequence.py python-cogent-2022.5.25a1+dfsg/tests/test_core/test_sequence.py --- python-cogent-2021.10.12a1+dfsg/tests/test_core/test_sequence.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_core/test_sequence.py 2022-05-24 23:42:33.000000000 +0000 @@ -40,10 +40,10 @@ __author__ = "Rob Knight, Gavin Huttley and Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight", "Gavin Huttley", "Peter Maxwell", "Matthew Wakefield"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -248,6 +248,48 @@ # 13 features with one having 2 parents, so 14 instances should be found self.assertEqual(len(matches), 14) + def test_annotate_gff_nested_features(self): + """correctly annotate a sequence with nested features""" + # the synthetic example + # 1111111111222222222333333333334 + # 1234567890123456789012345678901234567890 + # **** biological_region + # ** biological_region + # * biological_region + # ******************************* gene + # ********************* mRNA + # ********* exon + # ***** exon + # ACCCCGGAAAATTTTTTTTTAAGGGGGAAAAAAAAACCCCCCC... + seq = DNA.make_seq("ACCCCGGAAAATTTTTTTTTAAGGGGGAAAAAAAAACCCCCCC", name="22") + gff3_path = os.path.join("data/ensembl_sample.gff3") + seq.annotate_from_gff(gff3_path) + # we have 1 "full chromosome" annotation, 3 generic regions and 1 gene + self.assertEqual(len(seq.annotations), 5) + + # get the gene and check it has a single annotation and that + # its slice is correct + ann = seq.get_annotations_matching("gene") + self.assertEqual(len(ann), 1) + self.assertEqual(len(ann[0].annotations), 1) + seq = ann[0].get_slice() + self.assertEqual(str(seq), "GGAAAATTTTTTTTTAAGGGGGAAAAAAAAA") + + # the gene has 1 transcript + ann = seq.get_annotations_matching("mRNA", extend_query=True) + self.assertEqual(len(ann), 1) + self.assertEqual(len(ann[0].annotations), 2) # 2 exons + seq = ann[0].get_slice() + self.assertEqual(str(seq), "AAAATTTTTTTTTAAGGGGGAAA") + + # the transcript has 2 exons + ann = seq.get_annotations_matching("exon", extend_query=True) + self.assertEqual(len(ann), 2) + exon_seqs = ("TTTTTTTTT", "GGGGG") + for x in ann: + self.assertEqual(len(x.annotations), 0) + self.assertTrue(str(x.get_slice()) in exon_seqs, msg=x.get_slice()) + def test_strip_degenerate(self): """Sequence strip_degenerate should remove any degenerate bases""" self.assertEqual(self.RNA("UCAG-").strip_degenerate(), "UCAG-") @@ -992,6 +1034,54 @@ self.assertEqual(got_num, 2) os.environ.pop(env_name, None) + def test_add(self): + """Test for the add method within sequence""" + + even = "TCAGAT" + odd = even + "AAA" + original_sequence = self.SEQ(even, name="even") + duplicate_sequence = self.SEQ(even, name="even") + name_only_duplicate = self.SEQ(even, name="odd") + different_sequence = self.SEQ(odd, name="odd") + + added_duplicates = original_sequence + duplicate_sequence + added_name_only_duplicate = original_sequence + name_only_duplicate + different_sequences = original_sequence + different_sequence + + self.assertIsNone(different_sequences.name) + self.assertIsNotNone(added_duplicates.name) + self.assertIsNotNone(added_name_only_duplicate) + + self.assertEqual(original_sequence.name, added_duplicates.name) + self.assertNotEqual(original_sequence.name, added_name_only_duplicate.name) + self.assertNotEqual(original_sequence.name, different_sequences.name) + + def test_add2(self): + """name property correctly handled in sequence add""" + a1 = self.SEQ("AAA", name="1") + a2 = self.SEQ("CC", name="1") + a = a1 + a2 + self.assertEqual(a.name, "1") + self.assertEqual(a, "AAACC") + + b = self.SEQ("GGGG", name="2") + self._check_mix_add(a1, b) + c = self.SEQ("TT") + self._check_mix_add(a1, c) + + e = "AA" + be = b + e + self.assertIsNone(be.name) + self.assertEqual(be, str(b) + e) + + def _check_mix_add(self, s1, s2): + s1s2 = s1 + s2 + s2s1 = s2 + s1 + self.assertIsNone(s1s2.name) + self.assertIsNone(s2s1.name) + self.assertEqual(s1s2, str(s1) + str(s2)) + self.assertEqual(s2s1, str(s2) + str(s1)) + class SequenceSubclassTests(TestCase): """Only one general set of tests, since the subclasses are very thin.""" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_core/test_tree.py python-cogent-2022.5.25a1+dfsg/tests/test_core/test_tree.py --- python-cogent-2021.10.12a1+dfsg/tests/test_core/test_tree.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_core/test_tree.py 2022-05-24 23:42:33.000000000 +0000 @@ -3,6 +3,7 @@ """ import json import os +import pathlib from copy import copy, deepcopy from tempfile import TemporaryDirectory @@ -11,15 +12,15 @@ from numpy import array from numpy.testing import assert_allclose, assert_equal -from cogent3 import load_tree, make_tree +from cogent3 import load_tree, make_tree, open_ from cogent3.core.tree import PhyloNode, TreeError, TreeNode from cogent3.maths.stats.test import correlation from cogent3.parse.tree import DndParser -from cogent3.util.misc import get_object_provenance, open_ +from cogent3.util.misc import get_object_provenance __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Rob Knight", "Catherine Lozupone", @@ -33,7 +34,7 @@ "Jose Carlos Clemente Litran", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -319,7 +320,7 @@ # can't break up easily... sorry 80char exp_str = "((a:1.0,(b:2.0,c:3.0):0.0)d:4.0,((e:5.0,(f:6.0,g:7.0):0.0)h:8.0,(i:9.0,(j:10.0,k:11.0):0.0)l:12.0):0.0)m:14.0;" - obs = t.bifurcating() + t.bifurcating() def test_eq(self): """TreeNode comparison should compare using id""" @@ -1127,7 +1128,6 @@ b = nodes["b"] c = nodes["c"] d = nodes["d"] - e = nodes["e"] f = nodes["f"] g = nodes["g"] h = nodes["h"] @@ -1176,7 +1176,7 @@ self.assertTrue(tree.name not in names) names = tree.get_node_names(includeself=True, tipsonly=False) self.assertTrue(tree.name in names) - a = tree.get_node_matching_name("a") + tree.get_node_matching_name("a") def test_reassign_names(self): """reassign_names should rename node names based on dict mapping""" @@ -1505,12 +1505,8 @@ """PhyloNode str should give expected results""" nodes, tree = self.TreeNode, self.TreeRoot a = nodes["a"] - b = nodes["b"] c = nodes["c"] - d = nodes["d"] - e = nodes["e"] f = nodes["f"] - g = nodes["g"] h = nodes["h"] self.assertEqual(str(h), "h:2;") @@ -1590,8 +1586,8 @@ tree = DndParser("(a:1,((c:1,d:2.5)n3:1,b:1)n2:1)rt;") tmid = tree.root_at_midpoint() self.assertEqual(tmid.get_distances(), tree.get_distances()) - tipnames = tree.get_tip_names() - nontipnames = [t.name for t in tree.nontips()] + tree.get_tip_names() + [t.name for t in tree.nontips()] self.assertTrue(tmid.is_root()) self.assertEqual(tmid.distance(tmid.get_node_matching_name("d")), 2.75) @@ -1600,8 +1596,8 @@ tree = DndParser("(a:1,((c:1,d:3)n3:1,b:1)n2:1)rt;") tmid = tree.root_at_midpoint() self.assertEqual(tmid.get_distances(), tree.get_distances()) - tipnames = tree.get_tip_names() - nontipnames = [t.name for t in tree.nontips()] + tree.get_tip_names() + [t.name for t in tree.nontips()] # for tipname in tipnames: # tmid_tip = tmid.get_node_matching_name(tipname) # orig_tip = tree.get_node_matching_name(tipname) @@ -1620,8 +1616,8 @@ tree = DndParser("""(BLO_1:0.649351,BLO_2:0.649351):0.0;""") tmid = tree.root_at_midpoint() self.assertEqual(tmid.get_distances(), tree.get_distances()) - tipnames = tree.get_tip_names() - nontipnames = [t.name for t in tree.nontips()] + tree.get_tip_names() + [t.name for t in tree.nontips()] self.assertTrue(tmid.is_root()) assert_allclose(tmid.distance(tmid.get_node_matching_name("BLO_2")), 0.649351) @@ -2047,7 +2043,7 @@ ) # note c,j is len 0 node orig_dists = t1.get_distances() subtree = t1.get_sub_tree(set(["a", "b", "d", "e", "c"])) - sub_dists = subtree.get_distances() + subtree.get_distances() # for pair, dist in sub_dists.items(): # self.assertEqual((pair,dist), (pair,orig_dists[pair])) t2 = DndParser( @@ -2204,6 +2200,28 @@ self.tree.get_newick(with_node_names=True), ) self.assertEqual(got.get_node_names(), self.tree.get_node_names()) + # now try using non json suffix + json_path = os.path.join(dirname, "tree.txt") + self.tree.write(json_path, format="json") + got = load_tree(json_path, format="json") + self.assertIsInstance(got, PhyloNode) + + def test_load_tree(self): + """tests loading a newick formatted Tree""" + with TemporaryDirectory(dir=".") as dirname: + tree_path = os.path.join(dirname, "tree.tree") + self.tree.write(tree_path) + got = load_tree(tree_path) + self.assertIsInstance(got, PhyloNode) + self.assertEqual( + got.get_newick(), + self.tree.get_newick(), + ) + self.assertEqual(got.get_node_names(), self.tree.get_node_names()) + # now try specifying path as pathlib.Path + tree_path = pathlib.Path(tree_path) + got = load_tree(tree_path) + self.assertIsInstance(got, PhyloNode) def test_ascii(self): self.tree.ascii_art() @@ -2257,7 +2275,7 @@ # Fell over on small tree because "stem descended from root # joiner was a tip" a, b = self.otu_names[:2] - clade = self.tree.get_edge_names(a, b, True, False) + self.tree.get_edge_names(a, b, True, False) def test_get_tip_names(self): """testing (well, exercising at least), get_tip_names""" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_data/__init__.py python-cogent-2022.5.25a1+dfsg/tests/test_data/__init__.py --- python-cogent-2021.10.12a1+dfsg/tests/test_data/__init__.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_data/__init__.py 2022-05-24 23:42:33.000000000 +0000 @@ -2,10 +2,10 @@ __all__ = ["test_molecular_weight"] __author__ = "" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_data/test_molecular_weight.py python-cogent-2022.5.25a1+dfsg/tests/test_data/test_molecular_weight.py --- python-cogent-2021.10.12a1+dfsg/tests/test_data/test_molecular_weight.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_data/test_molecular_weight.py 2022-05-24 23:42:33.000000000 +0000 @@ -7,10 +7,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_draw/__init__.py python-cogent-2022.5.25a1+dfsg/tests/test_draw/__init__.py --- python-cogent-2021.10.12a1+dfsg/tests/test_draw/__init__.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_draw/__init__.py 2022-05-24 23:42:33.000000000 +0000 @@ -7,10 +7,10 @@ ] __author__ = "Gavin Huttley and Rahul Ghangas" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_draw/test_dendrogram.py python-cogent-2022.5.25a1+dfsg/tests/test_draw/test_dendrogram.py --- python-cogent-2021.10.12a1+dfsg/tests/test_draw/test_dendrogram.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_draw/test_dendrogram.py 2022-05-24 23:42:33.000000000 +0000 @@ -16,7 +16,7 @@ __copyright__ = "Copyright 2007-2012, The Cogent Project" __credits__ = ["Gavin Huttley", "Rahul Ghangas"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_draw/test_dotplot.py python-cogent-2022.5.25a1+dfsg/tests/test_draw/test_dotplot.py --- python-cogent-2021.10.12a1+dfsg/tests/test_draw/test_dotplot.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_draw/test_dotplot.py 2022-05-24 23:42:33.000000000 +0000 @@ -1,6 +1,6 @@ from unittest import TestCase, main -from cogent3 import DNA, load_aligned_seqs, make_unaligned_seqs +from cogent3 import DNA, make_unaligned_seqs from cogent3.core.alignment import Aligned, ArrayAlignment from cogent3.draw.dotplot import ( Dotplot, @@ -16,7 +16,7 @@ __copyright__ = "Copyright 2007-2012, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_draw/test_draw_integration.py python-cogent-2022.5.25a1+dfsg/tests/test_draw/test_draw_integration.py --- python-cogent-2021.10.12a1+dfsg/tests/test_draw/test_draw_integration.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_draw/test_draw_integration.py 2022-05-24 23:42:33.000000000 +0000 @@ -1,4 +1,3 @@ -import os import pathlib import unittest @@ -13,7 +12,7 @@ __copyright__ = "Copyright 2007-2012, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" @@ -25,11 +24,11 @@ aln = load_aligned_seqs(path, array_align=False, moltype="dna") aln = aln.omit_gap_pos() if annotate1: - x1 = aln.get_seq(aln.names[0]).add_feature("gene", "abcde1", [(20, 50)]) - x2 = aln.get_seq(aln.names[0]).add_feature("variation", "one", [(11, 12)]) + aln.get_seq(aln.names[0]).add_feature("gene", "abcde1", [(20, 50)]) + aln.get_seq(aln.names[0]).add_feature("variation", "one", [(11, 12)]) if annotate2: - y1 = aln.get_seq(aln.names[1]).add_feature("gene", "abcde2", [(20, 50)]) - y2 = aln.get_seq(aln.names[1]).add_feature("domain", "abcde2", [(10, 15)]) + aln.get_seq(aln.names[1]).add_feature("gene", "abcde2", [(20, 50)]) + aln.get_seq(aln.names[1]).add_feature("domain", "abcde2", [(10, 15)]) return aln @@ -123,6 +122,15 @@ self.assertEqual(f.data, d.traces) self.assertEqual(f.layout, d.layout) + def test_plotly_figure(self): + """is a plotly graph object Figure instance""" + from plotly.graph_objects import Figure + + trace = dict(type="scatter", x=[0, 1], y=[0, 1]) + layout = dict(title="layout", width=20) + d = Drawable(traces=[trace], layout=layout) + self.assertIsInstance(d.plotly_figure, Figure) + class AnnotatedDrawableObjectTests(unittest.TestCase): """testing AnnotatedDrawable object methods and properties""" @@ -143,6 +151,17 @@ f = ad._build_fig() self.assertEqual(f["data"][0]["yaxis"], "y3") + def test_plotly_figure(self): + """is a plotly graph object Figure instance""" + from plotly.graph_objects import Figure + + trace = dict(type="scatter", x=[0, 1], y=[0, 1], xaxis="x", yaxis="y") + layout = dict(title="layout", width=20, yaxis2=dict(overlaying="free")) + cd = Drawable(traces=[trace]) + + ad = AnnotatedDrawable(cd, layout=layout) + self.assertIsInstance(ad.plotly_figure, Figure) + class BaseDrawablesTests(unittest.TestCase): """methods for checking drawables""" @@ -367,7 +386,7 @@ _ = aln.get_seq("b").add_feature("variation", "1", [(1, 5)]) _ = aln.get_seq("b").add_feature("gene", "1", [(1, 5)]) _ = aln.get_seq("b").add_feature("gene", "1", [(5, 1)]) - drawable = aln.get_drawable() + aln.get_drawable() class TableDrawablesTest(BaseDrawablesTests): diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_draw/test_logo.py python-cogent-2022.5.25a1+dfsg/tests/test_draw/test_logo.py --- python-cogent-2021.10.12a1+dfsg/tests/test_draw/test_logo.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_draw/test_logo.py 2022-05-24 23:42:33.000000000 +0000 @@ -9,7 +9,7 @@ __copyright__ = "Copyright 2007-2012, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" @@ -28,7 +28,7 @@ [0.6, 0.15, 0.05, 0.2], ] data = DictArrayTemplate(5, "ACGT").wrap(data) - d = get_logo(data) + get_logo(data) def test_get_logo_missing(self): """copes with positions with no values""" @@ -40,7 +40,7 @@ [0.6, 0.15, 0.05, 0.2], ] data = DictArrayTemplate(5, "ACGT").wrap(data) - d = get_logo(data) + get_logo(data) def test_get_logo_alt_input_type(self): """copes with positions with no values""" @@ -51,10 +51,10 @@ {"A": 0.7, "C": 0.1, "G": 0.1, "T": 0.1}, {"A": 0.6, "C": 0.15, "G": 0.05, "T": 0.2}, ] - d = get_logo(data) + get_logo(data) data[-2] = {} - d = get_logo(data) + get_logo(data) def test_letter_methods(self): """exercising some Letter methods""" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_draw/test_shapes.py python-cogent-2022.5.25a1+dfsg/tests/test_draw/test_shapes.py --- python-cogent-2021.10.12a1+dfsg/tests/test_draw/test_shapes.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_draw/test_shapes.py 2022-05-24 23:42:33.000000000 +0000 @@ -9,7 +9,7 @@ __copyright__ = "Copyright 2007-2012, The Cogent Project" __credits__ = ["Gavin Huttley", "Rahul Ghangas"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_evolve/__init__.py python-cogent-2022.5.25a1+dfsg/tests/test_evolve/__init__.py --- python-cogent-2021.10.12a1+dfsg/tests/test_evolve/__init__.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_evolve/__init__.py 2022-05-24 23:42:33.000000000 +0000 @@ -14,10 +14,10 @@ ] __author__ = "" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight", "Peter Maxwell", "Greg Caporaso"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_best_likelihood.py python-cogent-2022.5.25a1+dfsg/tests/test_evolve/test_best_likelihood.py --- python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_best_likelihood.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_evolve/test_best_likelihood.py 2022-05-24 23:42:33.000000000 +0000 @@ -17,10 +17,10 @@ __author__ = "Helen Lindsay" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley", "Helen Lindsay"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Helen Lindsay" __email__ = "helen.lindsay@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_bootstrap.py python-cogent-2022.5.25a1+dfsg/tests/test_evolve/test_bootstrap.py --- python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_bootstrap.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_evolve/test_bootstrap.py 2022-05-24 23:42:33.000000000 +0000 @@ -8,7 +8,7 @@ __author__ = "Peter Maxwell and Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Peter Maxwell", "Gavin Huttley", @@ -17,7 +17,7 @@ "Andrew Butterfield", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -117,7 +117,6 @@ def test_prob(self): """testing estimation of probability.""" - import sys alignobj = self.getalignmentobj() prob_bstrap = bootstrap.EstimateProbability( diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_coevolution.py python-cogent-2022.5.25a1+dfsg/tests/test_evolve/test_coevolution.py --- python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_coevolution.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_evolve/test_coevolution.py 2022-05-24 23:42:33.000000000 +0000 @@ -94,10 +94,10 @@ __author__ = "Greg Caporaso" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Greg Caporaso"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Greg Caporaso" __email__ = "gregcaporaso@gmail.com" __status__ = "Beta" @@ -593,7 +593,6 @@ aln2 = ArrayAlignment(data={"1": "EFW", "2": "EGY"}, moltype=PROTEIN) combined_aln = ArrayAlignment(data={"1": "ACEFW", "2": "AUEGY"}) t = make_tree(treestring="(1:0.5,2:0.5);") - cutoff = 0.50 # MI m = mi_alignment(combined_aln) expected = array([[m[2, 0], m[2, 1]], [m[3, 0], m[3, 1]], [m[4, 0], m[4, 1]]]) diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_distance.py python-cogent-2022.5.25a1+dfsg/tests/test_evolve/test_distance.py --- python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_distance.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_evolve/test_distance.py 2022-05-24 23:42:33.000000000 +0000 @@ -29,7 +29,6 @@ _fill_diversity_matrix, _hamming, _jc69_from_matrix, - _tn93_from_matrix, available_distances, get_distance_calculator, get_moltype_index_array, @@ -49,10 +48,10 @@ __author__ = "Gavin Huttley, Yicheng Zhu and Ben Kaehler" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley", "Yicheng Zhu", "Ben Kaehler"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -303,7 +302,7 @@ aln = aln.get_translation() logdet_calc = LogDetPair(moltype=PROTEIN, alignment=aln) logdet_calc.run(use_tk_adjustment=True, show_progress=False) - dists = logdet_calc.get_pairwise_distances() + logdet_calc.get_pairwise_distances() def test_logdet_missing_states(self): """should calculate logdet measurement with missing states""" @@ -361,7 +360,7 @@ var /= 16 * len(data[0][1]) logdet_calc.run(use_tk_adjustment=False, show_progress=False) - dists = logdet_calc.get_pairwise_distances() + logdet_calc.get_pairwise_distances() assert_allclose(logdet_calc.variances[1, 1], var, atol=1e-3) def test_logdet_for_determinant_lte_zero(self): @@ -391,7 +390,7 @@ aln = aln.get_translation() paralinear_calc = ParalinearPair(moltype=PROTEIN, alignment=aln) paralinear_calc.run(show_progress=False) - dists = paralinear_calc.get_pairwise_distances() + paralinear_calc.get_pairwise_distances() def test_paralinear_distance(self): """calculate paralinear variance consistent with hand calculation""" @@ -417,7 +416,7 @@ if J[i, i] == 0: J[i, i] += 0.5 J /= J.sum() - M = numpy.linalg.inv(J) + numpy.linalg.inv(J) f = J.sum(1), J.sum(0) dist = -0.25 * numpy.log( numpy.linalg.det(J) / numpy.sqrt(f[0].prod() * f[1].prod()) diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_likelihood_function.py python-cogent-2022.5.25a1+dfsg/tests/test_evolve/test_likelihood_function.py --- python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_likelihood_function.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_evolve/test_likelihood_function.py 2022-05-24 23:42:33.000000000 +0000 @@ -52,7 +52,7 @@ MotifChange = predicate.MotifChange __author__ = "Peter Maxwell and Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Peter Maxwell", "Gavin Huttley", @@ -62,7 +62,7 @@ "Ananias Iliadis", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -214,7 +214,7 @@ obs = round(sum(values) / len(values), 6) self.assertEqual(obs, 1.0) self.assertEqual(len(values), 3) - shape = lf.get_param_value("rate_shape") + lf.get_param_value("rate_shape") def test_binned_gamma_ordered_param(self): """rate is gamma distributed omega follows""" @@ -229,7 +229,7 @@ values = list(lf.get_param_value_dict(["bin"])["omega_factor"].values()) self.assertEqual(round(sum(values) / len(values), 6), 1.0) self.assertEqual(len(values), 3) - shape = lf.get_param_value("rate_shape") + lf.get_param_value("rate_shape") def test_binned_partition(self): submod = substitution_model.TimeReversibleCodon( @@ -472,7 +472,7 @@ """excercising the most likely ancestral sequences""" likelihood_function = self._makeLikelihoodFunction() self._setLengthsAndBetas(likelihood_function) - result = likelihood_function.likely_ancestral_seqs() + likelihood_function.likely_ancestral_seqs() def test_simulate_alignment(self): "Simulate DNA alignment" @@ -489,7 +489,7 @@ lf = self.submodel.make_likelihood_function(self.tree, bins=["low", "high"]) lf.set_param_rule("beta", bin="low", value=0.1) lf.set_param_rule("beta", bin="high", value=10.0) - simulated_alignment = lf.simulate_alignment(100) + lf.simulate_alignment(100) def test_simulatePatchyHetergeneousAlignment(self): "Simulate patchy substitution-heterogeneous DNA alignment" @@ -498,7 +498,7 @@ ) lf.set_param_rule("beta", bin="low", value=0.1) lf.set_param_rule("beta", bin="high", value=10.0) - simulated_alignment = lf.simulate_alignment(100) + lf.simulate_alignment(100) def test_simulate_alignment1(self): "Simulate alignment when no alignment set" @@ -608,6 +608,16 @@ ------------------------------------""", ) + def test_set_param_rule_adjust_bounds(self): + """check behaviour when modify bound and reset param rule""" + lf = self._makeLikelihoodFunction() + lf.set_param_rule( + "beta", init=4.0, is_independent=True, edges=["DogFaced", "NineBande"] + ) + lf.set_param_rule("beta", upper=2) + val = lf.get_param_value("beta", edge="DogFaced") + self.assertLess(val, 4) # it will be the average of default and set values + def test_get_motif_probs(self): likelihood_function = self._makeLikelihoodFunction() mprobs = likelihood_function.get_motif_probs() @@ -737,7 +747,7 @@ sm = get_model("BH") lf = sm.make_likelihood_function(self.tree) lf.set_alignment(self.data) - psubs = lf.get_all_psubs() + lf.get_all_psubs() def test_get_all_rate_matrices(self): """return matrices when just a pair""" @@ -782,7 +792,7 @@ """lf ignores tree lengths if a discrete Markov model""" t = make_tree(treestring="(a:0.4,b:0.3,(c:0.15,d:0.2)edge.0:0.1)root;") dm = ns_substitution_model.DiscreteSubstitutionModel(DNA.alphabet) - lf = dm.make_likelihood_function(t) + dm.make_likelihood_function(t) def test_exercise_set_align(self): "lf.set_align should work for different models" @@ -823,6 +833,27 @@ new_lnL = lf.get_log_likelihood() assert_allclose(new_lnL, lnL) + def test_get_param_rules_multilocus(self): + """correctly return rules from multilocus lf""" + data = load_aligned_seqs( + filename=os.path.join(os.getcwd(), "data", "brca1_5.paml") + ) + half = len(data) // 2 + aln1 = data[:half] + aln2 = data[half:] + loci_names = ["1st-half", "2nd-half"] + loci = [aln1, aln2] + tree = make_tree(tip_names=data.names) + model = get_model("HKY85", optimise_motif_probs=True) + lf = model.make_likelihood_function(tree, loci=loci_names) + lf.set_alignment(loci) + lf.set_param_rule("mprobs", is_independent=False) + rules = lf.get_param_rules() + lf2 = model.make_likelihood_function(tree, loci=loci_names) + lf2.set_alignment(loci) + lf2.apply_param_rules(rules=rules) + assert_allclose(lf.lnL, lf2.lnL) + def test_get_param_rules_discrete(self): """discrete time models produce valid rules""" sm = get_model("BH") @@ -959,6 +990,35 @@ nfp2 = lf.nfp self.assertEqual(nfp2 - nfp1, 1) + def test_set_time_heterogeneity_multilocus(self): + """apply time heterogeneity for multilocus function""" + half = len(self.data) // 2 + aln1 = self.data[:half] + aln2 = self.data[half:] + loci_names = ["1st-half", "2nd-half"] + loci = [aln1, aln2] + model = get_model("GN", optimise_motif_probs=True) + # should not fail + lf = model.make_likelihood_function(self.tree, loci=loci_names) + assert lf.locus_names == loci_names + lf.set_alignment(loci) + edges = ["Human", "HowlerMon"] + lf = model.make_likelihood_function( + self.tree, + loci=loci_names, + discrete_edges=edges, + ) + lf.set_time_heterogeneity(upper=100, is_independent=True) + lf.set_alignment(loci) + lf.optimise(max_evaluations=10, limit_action="ignore", show_progress=False) + stats = lf.get_statistics() + timehet_edge_names = set( + n for n in self.tree.get_node_names(includeself=False) if n not in edges + ) + for t in stats: + if t.title == "edge locus params": + assert set(t.columns["edge"]) == timehet_edge_names + def test_getting_pprobs(self): """posterior bin probs same length as aln for phylo-HMM model""" with open("data/site-het-param-rules.json") as infile: @@ -1281,6 +1341,31 @@ glf.initialise_from_nested(slf) assert_allclose(glf.get_log_likelihood(), slf.get_log_likelihood()) + def test_initialise_from_nested_codon_scoped(self): + """scoped non-reversible likelihood initialised from nested scoped, non-reversible""" + simple = get_model("H04GK") + tree = make_tree(tip_names=["Human", "Mouse", "Opossum"]) + slf = simple.make_likelihood_function(tree) + slf.set_alignment(_aln) + slf.set_time_heterogeneity( + edge_sets=[ + dict(edges=["Opossum"], is_independent=True), + ], + exclude_params=["kappa", "omega"], + ) + slf.optimise(max_evaluations=50, limit_action="ignore", show_progress=False) + glf = simple.make_likelihood_function(tree) + glf.set_alignment(_aln) + glf.set_time_heterogeneity( + edge_sets=[ + dict(edges=["Opossum"], is_independent=True), + dict(edges=["Human", "Mouse"], is_independent=True), + ], + exclude_params=["kappa", "omega"], + ) + glf.initialise_from_nested(slf) + assert_allclose(glf.lnL, slf.lnL) + def test_get_lengths_as_ens_equal(self): """lengths equals ENS for a time-reversible model""" moprobs = numpy.array([0.1, 0.2, 0.3, 0.4]) diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_models.py python-cogent-2022.5.25a1+dfsg/tests/test_evolve/test_models.py --- python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_models.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_evolve/test_models.py 2022-05-24 23:42:33.000000000 +0000 @@ -21,10 +21,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -47,7 +47,7 @@ """excercising nucleotide model construction""" self._make_model_cache() # just checking present - for name in ["JC69", "F81", "HKY85", "GTR", "GN", "ssGN"]: + for name in ["JC69", "F81", "HKY85", "GTR", "GN", "ssGN", "BH"]: self.assertIn(name, self._cached_models) def test_codon_models(self): @@ -76,8 +76,8 @@ def test_bin_options(self): kwargs = dict(with_rate=True, distribution="gamma") - model = WG01(**kwargs) - model = GTR(**kwargs) + WG01(**kwargs) + GTR(**kwargs) def test_empirical_values_roundtrip(self): model = WG01() diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_motifchange.py python-cogent-2022.5.25a1+dfsg/tests/test_evolve/test_motifchange.py --- python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_motifchange.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_evolve/test_motifchange.py 2022-05-24 23:42:33.000000000 +0000 @@ -7,7 +7,7 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Peter Maxwell", "Gavin Huttley", @@ -16,7 +16,7 @@ "Brett Easton", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_newq.py python-cogent-2022.5.25a1+dfsg/tests/test_evolve/test_newq.py --- python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_newq.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_evolve/test_newq.py 2022-05-24 23:42:33.000000000 +0000 @@ -31,10 +31,10 @@ __author__ = "Peter Maxwell and Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -133,7 +133,7 @@ di = TimeReversibleNucleotide(motif_length=2, mprob_model=model) di.adapt_motif_probs(self.cond_root_probs, auto=True) lf = di.make_likelihood_function(self.tree) - s = str(lf) + str(lf) def test_get_statistics(self): """get statistics should correctly apply arguments""" @@ -173,7 +173,7 @@ lf = di.make_likelihood_function(self.tree) lf.set_param_rule("length", is_independent=False, init=0.4) lf.set_alignment(self.aln) - sim = lf.simulate_alignment() + lf.simulate_alignment() def test_reconstruct_ancestor(self): """should be able to reconstruct ancestral sequences under all @@ -184,7 +184,7 @@ lf = di.make_likelihood_function(self.tree) lf.set_param_rule("length", is_independent=False, init=0.4) lf.set_alignment(self.aln) - ancestor = lf.reconstruct_ancestral_seqs() + lf.reconstruct_ancestral_seqs() def test_results_different(self): for (i, (mprobs, dummy)) in enumerate(self.ordered_by_complexity): @@ -240,7 +240,7 @@ sm = TimeReversibleNucleotide(motif_length=2, mprob_model="monomers") lf = sm.make_likelihood_function(self.tree) lf.set_alignment(self.aln) - mprobs = lf.get_motif_probs() + lf.get_motif_probs() posn12_lnL = lf.get_log_likelihood() assert_allclose(posn12_lnL, expect_lnL, rtol=1e-4) diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_ns_substitution_model.py python-cogent-2022.5.25a1+dfsg/tests/test_evolve/test_ns_substitution_model.py --- python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_ns_substitution_model.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_evolve/test_ns_substitution_model.py 2022-05-24 23:42:33.000000000 +0000 @@ -4,10 +4,10 @@ import numpy -from numpy import array, dot, empty, ones +from numpy import array, dot, empty from numpy.testing import assert_allclose -from cogent3 import DNA, get_model, make_aligned_seqs, make_tree +from cogent3 import DNA, make_aligned_seqs, make_tree from cogent3.evolve.ns_substitution_model import ( DiscreteSubstitutionModel, General, @@ -28,10 +28,10 @@ __author__ = "Peter Maxwell and Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley", "Ananias Iliadis"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -274,7 +274,7 @@ "optimise_motif_probs": True, "recode_gaps": True, } - sm = StrandSymmetric(**kw) + StrandSymmetric(**kw) def test_nr_nucleotide(self): """This is exercising a NonReversibleNucleotide""" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_parameter_controller.py python-cogent-2022.5.25a1+dfsg/tests/test_evolve/test_parameter_controller.py --- python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_parameter_controller.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_evolve/test_parameter_controller.py 2022-05-24 23:42:33.000000000 +0000 @@ -10,10 +10,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Matthew Wakefield"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -66,6 +66,28 @@ lf.set_param_rule(par_name="kappa", is_independent=True, edges=["b", "d"]) self.assertEqual(null + 2, lf.get_num_free_params()) + def test_set_get_motif_probs_nstat(self): + from cogent3 import get_model + + aln = make_aligned_seqs( + data=dict( + a="AACGAAGCAGAGTCACGGCA", + b="ACGGAAGTTGAGTCACCCCA", + c="TGCATCGAAAAGTCACGCTG", + ), + moltype="dna", + ) + bases = "ACGT" + expect = aln.get_motif_probs() + expect = [expect[b] for b in bases] + tree = make_tree("(a,b,c)") + gn = get_model("GN") + lf = gn.make_likelihood_function(tree) + lf.set_alignment(aln) + got = lf.get_motif_probs().to_dict() + got = [got[b] for b in bases] + assert_allclose(got, expect) + def test_set_motif_probs(self): """Mprobs supplied to the parameter controller""" @@ -109,7 +131,7 @@ compare_mprobs(motif_probs, correct_probs) assert_allclose(sum(motif_probs.values()), 1.0) - def test_setMultiLocus(self): + def test_set_multilocus(self): """2 loci each with own mprobs""" model = cogent3.evolve.substitution_model.TimeReversibleNucleotide( motif_probs=None @@ -123,12 +145,9 @@ for is_constant in [False, True]: lf.set_motif_probs(mprobs_a, is_constant=is_constant) - s = str(lf) lf.set_motif_probs(mprobs_b, locus="b") self.assertEqual(lf.get_motif_probs(locus="a"), mprobs_a) self.assertEqual(lf.get_motif_probs(locus="b"), mprobs_b) - s = str(lf) - # lf.set_param_rule('mprobs', is_independent=False) def test_set_param_rules(self): lf = self.model.make_likelihood_function(self.tree) @@ -146,9 +165,6 @@ (KeyError, TypeError, AssertionError, ValueError), do_rules, rule_set ) - def test_set_local_clock(self): - pass - def test_set_constant_lengths(self): t = make_tree(treestring="((a:1,b:2):3,(c:4,d:5):6,e:7);") lf = self.model.make_likelihood_function(t) # self.tree) diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_scale_rules.py python-cogent-2022.5.25a1+dfsg/tests/test_evolve/test_scale_rules.py --- python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_scale_rules.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_evolve/test_scale_rules.py 2022-05-24 23:42:33.000000000 +0000 @@ -12,10 +12,10 @@ __author__ = "Peter Maxwell and Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_simulation.py python-cogent-2022.5.25a1+dfsg/tests/test_evolve/test_simulation.py --- python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_simulation.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_evolve/test_simulation.py 2022-05-24 23:42:33.000000000 +0000 @@ -10,10 +10,10 @@ __author__ = "Peter Maxwell and Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_substitution_model.py python-cogent-2022.5.25a1+dfsg/tests/test_evolve/test_substitution_model.py --- python-cogent-2021.10.12a1+dfsg/tests/test_evolve/test_substitution_model.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_evolve/test_substitution_model.py 2022-05-24 23:42:33.000000000 +0000 @@ -10,10 +10,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -91,9 +91,9 @@ def test_to_rich_dict(self): """returns complete dict of attributes""" - f81 = F81().to_rich_dict() - hky85 = HKY85().to_rich_dict() - gn = GN().to_rich_dict() + F81().to_rich_dict() + HKY85().to_rich_dict() + GN().to_rich_dict() # TODO need to assess ability to reconstruct from this @@ -380,8 +380,8 @@ def test_str_(self): """str() and repr() of a substitution model""" - s = str(self.standardcode) - r = repr(self.standardcode) + str(self.standardcode) + repr(self.standardcode) class ModelDataInteractionTestMethods(TestCase): @@ -397,7 +397,7 @@ def test_getMotifs(self): """testing return of motifs""" - model_motifs = substitution_model.TimeReversibleNucleotide().get_motifs() + substitution_model.TimeReversibleNucleotide().get_motifs() def test_get_param_list(self): """testing getting the parameter list""" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_format/__init__.py python-cogent-2022.5.25a1+dfsg/tests/test_format/__init__.py --- python-cogent-2021.10.12a1+dfsg/tests/test_format/__init__.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_format/__init__.py 2022-05-24 23:42:33.000000000 +0000 @@ -2,7 +2,7 @@ __all__ = ["test_bedgraph", "test_clustal", "test_fasta"] __author__ = "" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Rob Knight", "Gavin Huttley", @@ -11,7 +11,7 @@ "Jeremy Widmann", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_format/test_bedgraph.py python-cogent-2022.5.25a1+dfsg/tests/test_format/test_bedgraph.py --- python-cogent-2021.10.12a1+dfsg/tests/test_format/test_bedgraph.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_format/test_bedgraph.py 2022-05-24 23:42:33.000000000 +0000 @@ -2,15 +2,14 @@ from unittest import TestCase, main -from cogent3.format.bedgraph import get_header from cogent3.util.table import Table __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_format/test_clustal.py python-cogent-2022.5.25a1+dfsg/tests/test_format/test_clustal.py --- python-cogent-2021.10.12a1+dfsg/tests/test_format/test_clustal.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_format/test_clustal.py 2022-05-24 23:42:33.000000000 +0000 @@ -4,16 +4,14 @@ from unittest import TestCase, main from cogent3.core.alignment import Alignment -from cogent3.core.info import Info -from cogent3.core.sequence import Sequence from cogent3.format.clustal import clustal_from_alignment __author__ = "Jeremy Widmann" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Jeremy Widmann"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Jeremy Widmann" __email__ = "jeremy.widmann@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_format/test_fasta.py python-cogent-2022.5.25a1+dfsg/tests/test_format/test_fasta.py --- python-cogent-2021.10.12a1+dfsg/tests/test_format/test_fasta.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_format/test_fasta.py 2022-05-24 23:42:33.000000000 +0000 @@ -10,10 +10,10 @@ __author__ = "Jeremy Widmann" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Jeremy Widmann", "Gavin Huttley", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Jeremy Widmann" __email__ = "jeremy.widmann@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_maths/__init__.py python-cogent-2022.5.25a1+dfsg/tests/test_maths/__init__.py --- python-cogent-2021.10.12a1+dfsg/tests/test_maths/__init__.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_maths/__init__.py 2022-05-24 23:42:33.000000000 +0000 @@ -9,7 +9,7 @@ ] __author__ = "" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Rob Knight", "Peter Maxwell", @@ -20,7 +20,7 @@ "Antonio Gonzalez Pena", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_distance_transform.py python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_distance_transform.py --- python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_distance_transform.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_distance_transform.py 2022-05-24 23:42:33.000000000 +0000 @@ -44,10 +44,10 @@ __author__ = "Justin Kuczynski" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __contributors__ = ["Justin Kuczynski", "Zongzhi Liu", "Greg Caporaso"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Justin Kuczynski" __email__ = "justinak@gmail.com" __status__ = "Prototype" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_geometry.py python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_geometry.py --- python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_geometry.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_geometry.py 2022-05-24 23:42:33.000000000 +0000 @@ -3,19 +3,8 @@ from math import sqrt from unittest import TestCase, main -from numpy import ( - all, - allclose, - arange, - array, - insert, - isclose, - mean, - ones, - sum, - take, -) -from numpy.linalg import inv, norm +from numpy import allclose, arange, array, insert, isclose, sum, take +from numpy.linalg import norm from numpy.random import choice, dirichlet from numpy.testing import assert_allclose, assert_equal @@ -35,10 +24,10 @@ __author__ = "Sandra Smit" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Sandra Smit", "Rob Knight", "Helmut Simon"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Sandra Smit" __email__ = "sandra.smit@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_matrix_exponential_integration.py python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_matrix_exponential_integration.py --- python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_matrix_exponential_integration.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_matrix_exponential_integration.py 2022-05-24 23:42:33.000000000 +0000 @@ -13,7 +13,7 @@ __copyright__ = "Copyright 2007-2014, The Cogent Project" __credits__ = ["Ben Kaehler", "Ananias Iliadis", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Ben Kaehler" __email__ = "benjamin.kaehler@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_matrix_logarithm.py python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_matrix_logarithm.py --- python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_matrix_logarithm.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_matrix_logarithm.py 2022-05-24 23:42:33.000000000 +0000 @@ -12,10 +12,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight", "Gavin Huttley", "Ben Kaehler"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_measure.py python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_measure.py --- python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_measure.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_measure.py 2022-05-24 23:42:33.000000000 +0000 @@ -1,6 +1,6 @@ from unittest import TestCase, main -from numpy import diag_indices, dot, finfo, float64 +from numpy import arange, array, diag_indices, dot, finfo, float64 from numpy.random import random from numpy.testing import assert_allclose @@ -15,10 +15,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley", "Stephen Ka-Wah Ma"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -238,6 +238,18 @@ result = jsd(pi_0, pi_1) self.assertTrue(result >= 0) + def test_general_jsd(self): + """check correctness of JSD for > 2 distributions""" + freqs = (0.1, 0.2, 0.3, 0.4), (0.4, 0.3, 0.2, 0.1), (0.1, 0.4, 0.2, 0.3) + got = jsd(*freqs, validate=True) + # expected value from the R-package philentropy gJSD implementation + assert_allclose(got, 0.1374318, atol=1e-7) + + # with invalid freqs + freqs = (0.1, 0.2, 0.3, 0.4), (0.4, 0.3, 0.1, 0.2), (0.1, 0.4, 0.4, 0.3) + with self.assertRaises(AssertionError): + jsd(*freqs, validate=True) + def test_jsm(self): """evaluate jsm between identical, and non-identical distributions""" case1 = [ diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_optimisers.py python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_optimisers.py --- python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_optimisers.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_optimisers.py 2022-05-24 23:42:33.000000000 +0000 @@ -3,20 +3,17 @@ import os import sys -import time from unittest import TestCase, main -import numpy - from cogent3.maths.optimisers import MaximumEvaluationsReached, maximise __author__ = "Peter Maxwell and Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -64,9 +61,6 @@ class OptimiserTestCase(TestCase): def _test_optimisation(self, target=-4, xinit=1.0, bounds=None, **kw): bounds = bounds or ([-10, 10]) - local = kw.get("local", None) - max_evaluations = kw.get("max_evaluations", None) - f, last, evals = MakeF() x = quiet(maximise, f, [xinit], bounds, **kw) diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_period.py python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_period.py --- python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_period.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_period.py 2022-05-24 23:42:33.000000000 +0000 @@ -12,10 +12,10 @@ __author__ = "Hua Ying, Julien Epps and Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Julien Epps", "Hua Ying", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/__init__.py python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_stats/__init__.py --- python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/__init__.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_stats/__init__.py 2022-05-24 23:42:33.000000000 +0000 @@ -2,10 +2,10 @@ __all__ = ["test_distribution", "test_special", "test_ks", "test_test"] __author__ = "" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight", "Catherine Lozupone", "Gavin Huttley", "Sandra Smit"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_contingency.py python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_stats/test_contingency.py --- python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_contingency.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_stats/test_contingency.py 2022-05-24 23:42:33.000000000 +0000 @@ -9,10 +9,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_distribution.py python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_stats/test_distribution.py --- python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_distribution.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_stats/test_distribution.py 2022-05-24 23:42:33.000000000 +0000 @@ -45,10 +45,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley", "Rob Knight", "Sandra Smit"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -1963,14 +1963,13 @@ expect = probability_points(4) got = theoretical_quantiles(4, dist="uniform") assert_almost_equal(got, expect) - dists = ["normal", "chisq", "t", "poisson", "binomial", "F", "gamma"] expect = ( -1.049131397963971, -0.299306910465667, 0.299306910465667, 1.049131397963971, ) - p = probability_points(4) + probability_points(4) got = theoretical_quantiles(len(expect), dist="normal") assert_almost_equal(got, expect) diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_information_criteria.py python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_stats/test_information_criteria.py --- python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_information_criteria.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_stats/test_information_criteria.py 2022-05-24 23:42:33.000000000 +0000 @@ -5,10 +5,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_jackknife.py python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_stats/test_jackknife.py --- python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_jackknife.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_stats/test_jackknife.py 2022-05-24 23:42:33.000000000 +0000 @@ -6,10 +6,10 @@ __author__ = "Anuj Pahwa, Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Anuj Pahwa", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_ks.py python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_stats/test_ks.py --- python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_ks.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_stats/test_ks.py 2022-05-24 23:42:33.000000000 +0000 @@ -11,10 +11,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_number.py python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_stats/test_number.py --- python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_number.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_stats/test_number.py 2022-05-24 23:42:33.000000000 +0000 @@ -9,10 +9,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_period.py python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_stats/test_period.py --- python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_period.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_stats/test_period.py 2022-05-24 23:42:33.000000000 +0000 @@ -15,10 +15,10 @@ __author__ = "Hua Ying, Julien Epps and Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Julien Epps", "Hua Ying", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -259,11 +259,11 @@ hybrid_calc = Hybrid(150, llim=2, period=4) ipdft_calc = Ipdft(150, llim=2, period=4) autocorr_calc = AutoCorrelation(150, llim=2, period=4) - self.assertEqual(hybrid_calc.getNumStats(), 1) - self.assertEqual(ipdft_calc.getNumStats(), 1) - self.assertEqual(autocorr_calc.getNumStats(), 1) + self.assertEqual(hybrid_calc.get_num_stats(), 1) + self.assertEqual(ipdft_calc.get_num_stats(), 1) + self.assertEqual(autocorr_calc.get_num_stats(), 1) hybrid_calc = Hybrid(150, llim=2, period=4, return_all=True) - self.assertEqual(hybrid_calc.getNumStats(), 3) + self.assertEqual(hybrid_calc.get_num_stats(), 3) def test_permutation_skips(self): """permutation test correctly handles data without symbols""" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_special.py python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_stats/test_special.py --- python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_special.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_stats/test_special.py 2022-05-24 23:42:33.000000000 +0000 @@ -23,15 +23,15 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley", "Rob Knight", "Sandra Smit"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" -from numpy.testing import assert_allclose, assert_almost_equal +from numpy.testing import assert_allclose class SpecialTests(TestCase): diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_test.py python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_stats/test_test.py --- python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_stats/test_test.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_stats/test_test.py 2022-05-24 23:42:33.000000000 +0000 @@ -12,11 +12,9 @@ isfinite, logical_and, ones, - ravel, reshape, testing, tril, - zeros, ) from cogent3.maths.stats.number import NumberCounter @@ -51,7 +49,6 @@ mantel, mantel_test, mc_t_two_sample, - median, multiple_comparisons, multiple_inverse, multiple_n, @@ -82,7 +79,7 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Rob Knight", "Catherine Lozupone", @@ -93,7 +90,7 @@ "Michael Dwan", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -236,23 +233,6 @@ for obs, exp in zip(res, exp3d): testing.assert_almost_equal(obs, exp) - def test_median(self): - """_median should work similarly to numpy.mean (in terms of axis)""" - m = array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]) - expected = 6.5 - observed = median(m, axis=None) - self.assertEqual(observed, expected) - - expected = array([5.5, 6.5, 7.5]) - observed = median(m, axis=0) - assert_equal(observed, expected) - - expected = array([2.0, 5.0, 8.0, 11.0]) - observed = median(m, axis=1) - assert_equal(observed, expected) - - self.assertRaises(ValueError, median, m, 10) - def test_tail(self): """tail should return x/2 if test is true; 1-(x/2) otherwise""" assert_allclose(tail(0.25, "a" == "a"), 0.25 / 2) @@ -554,7 +534,6 @@ equal = [0.25, 0.25, 0.25, 0.25] unequal = [0.5, 0.25, 0.125, 0.125] equal_answer = [1, 1, 1, 1] - unequal_answer = [2, 1, 0.5, 0.5] not_unity = [0.7, 0.7, 0.7, 0.7] for obs, exp in zip(likelihoods(equal, unequal), equal_answer): @@ -932,7 +911,6 @@ def test_mc_t_two_sample_no_mc(self): """Test no MC stats if initial t-test is bad.""" x = array([1, 1, 1]) - y = array([0, 0, 0]) self.assertEqual(mc_t_two_sample(x, x), (None, None, [], None)) def test_mc_t_two_sample_no_variance(self): diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_util.py python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_util.py --- python-cogent-2021.10.12a1+dfsg/tests/test_maths/test_util.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_maths/test_util.py 2022-05-24 23:42:33.000000000 +0000 @@ -9,17 +9,7 @@ import numpy -from numpy import ( - arange, - array, - ones, - ravel, - reshape, - sqrt, - trace, - transpose, - zeros, -) +from numpy import array, transpose from numpy.testing import assert_allclose, assert_equal from cogent3.maths.util import ( @@ -38,10 +28,10 @@ Float = numpy.core.numerictypes.sctype2char(float) __author__ = "Rob Knight and Jeremy Widmann" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Jeremy Widmann", "Rob Knight", "Sandra Smit"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_parse/__init__.py python-cogent-2022.5.25a1+dfsg/tests/test_parse/__init__.py --- python-cogent-2021.10.12a1+dfsg/tests/test_parse/__init__.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_parse/__init__.py 2022-05-24 23:42:33.000000000 +0000 @@ -19,7 +19,7 @@ ] __author__ = "" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Jeremy Widmann", "Catherine Lozuopone", @@ -36,7 +36,7 @@ "Marcin Cieslik", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_blast.py python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_blast.py --- python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_blast.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_blast.py 2022-05-24 23:42:33.000000000 +0000 @@ -25,7 +25,7 @@ __copyright__ = "Copyright 2007-2016, The Cogent Project" __credits__ = ["Micah Hamady", "Rob Knight"] __license__ = "GPL" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Micah Hamady" __email__ = "hamady@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_blast_xml.py python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_blast_xml.py --- python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_blast_xml.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_blast_xml.py 2022-05-24 23:42:33.000000000 +0000 @@ -4,11 +4,11 @@ # __author__ = "Kristian Rother" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __contributors__ = ["Micah Hamady"] __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Kristian Rother" __email__ = "krother@rubor.de" __status__ = "Prototype" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_cigar.py python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_cigar.py --- python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_cigar.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_cigar.py 2022-05-24 23:42:33.000000000 +0000 @@ -1,6 +1,4 @@ #!/usr/bin/env python -import os -import sys import unittest from cogent3 import DNA, make_aligned_seqs @@ -14,10 +12,10 @@ __author__ = "Hua Ying" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Hua Ying", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Hua Ying" __email__ = "hua.ying@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_clustal.py python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_clustal.py --- python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_clustal.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_clustal.py 2022-05-24 23:42:33.000000000 +0000 @@ -3,9 +3,7 @@ """ from unittest import TestCase, main -from cogent3.core.alignment import Alignment from cogent3.parse.clustal import ( - LabelLineParser, MinimalClustalParser, delete_trailing_number, is_clustal_seq_line, @@ -15,10 +13,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight", "Sandra Smit"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_dialign.py python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_dialign.py --- python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_dialign.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_dialign.py 2022-05-24 23:42:33.000000000 +0000 @@ -7,10 +7,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_ebi.py python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_ebi.py --- python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_ebi.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_ebi.py 2022-05-24 23:42:33.000000000 +0000 @@ -4,8 +4,6 @@ from unittest import TestCase, main -from cogent3.core.info import Info -from cogent3.core.sequence import Sequence from cogent3.parse.ebi import ( EbiFinder, EbiParser, @@ -63,10 +61,10 @@ __author__ = "Zongzhi Liu" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Zongzhi Liu", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Zongzhi Liu" __email__ = "zongzhi.liu@gmail.com" __status__ = "Development" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_fasta.py python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_fasta.py --- python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_fasta.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_fasta.py 2022-05-24 23:42:33.000000000 +0000 @@ -22,10 +22,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -210,7 +210,7 @@ def dnastrict(x, **kwargs): try: return Dna(x, check=True, **kwargs) - except Exception as e: + except Exception: raise RecordError("Could not convert sequence") self.assertRaises(RecordError, list, FastaParser(self.oneX, dnastrict)) diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_gbseq.py python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_gbseq.py --- python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_gbseq.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_gbseq.py 2022-05-24 23:42:33.000000000 +0000 @@ -7,10 +7,10 @@ __author__ = "Matthew Wakefield" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Matthew Wakefield"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Matthew Wakefield" __email__ = "wakefield@wehi.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_genbank.py python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_genbank.py --- python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_genbank.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_genbank.py 2022-05-24 23:42:33.000000000 +0000 @@ -23,10 +23,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight", "Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -477,16 +477,26 @@ parser = RichGenbankParser(infile) got_1 = [s for _, s in parser][0] - with open("data/annotated_seq.gb") as infile: - parser = RichGenbankParser(infile, moltype="dna") - got_2 = [s for _, s in parser][0] - - self.assertEqual(len(got_1.annotations), len(got_2.annotations)) - self.assertEqual(got_2.moltype.label, "dna") # name formed from /product value - got = {f.name for f in got_2.get_annotations_matching("mRNA")} + got = {f.name for f in got_1.get_annotations_matching("mRNA")} self.assertEqual(got, {"conserved hypothetical protein", "chaperone, putative"}) + # the file defines itself as DNA + self.assertEqual(got_1.moltype.label, "dna") + + # but that is overridden by user setting moltype explicitly + for moltype in ("dna", "rna", "text"): + with open("data/annotated_seq.gb") as infile: + parser = RichGenbankParser(infile, moltype=moltype) + got_2 = [s for _, s in parser][0] + + self.assertEqual(len(got_1.annotations), len(got_2.annotations)) + self.assertEqual(got_2.moltype.label, moltype) + got = {f.name for f in got_1.get_annotations_matching("mRNA")} + self.assertEqual( + got, {"conserved hypothetical protein", "chaperone, putative"} + ) + class LocationTests(TestCase): """Tests of the Location class.""" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_gff.py python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_gff.py --- python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_gff.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_gff.py 2022-05-24 23:42:33.000000000 +0000 @@ -11,10 +11,10 @@ __author__ = "Matthew Wakefield" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Matthew Wakefield"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Matthew Wakefield" __email__ = "wakefield@wehi.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_greengenes.py python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_greengenes.py --- python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_greengenes.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_greengenes.py 2022-05-24 23:42:33.000000000 +0000 @@ -3,7 +3,6 @@ from unittest import TestCase, main from cogent3.parse.greengenes import ( - DefaultDelimitedSplitter, MinimalGreengenesParser, SpecificGreengenesParser, make_ignore_f, @@ -11,11 +10,11 @@ __author__ = "Daniel McDonald" -__copyright__ = "Copyright 2007-2021, The Cogent Project" # consider project name +__copyright__ = "Copyright 2007-2022, The Cogent Project" # consider project name # remember to add yourself if you make changes __credits__ = ["Daniel McDonald"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Daniel McDonald" __email__ = "daniel.mcdonald@colorado.edu" __status__ = "Prototype" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_locuslink.py python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_locuslink.py --- python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_locuslink.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_locuslink.py 2022-05-24 23:42:33.000000000 +0000 @@ -6,7 +6,6 @@ from cogent3.parse.locuslink import ( LinesToLocusLink, LLFinder, - LocusLink, _read_accession, _read_accnum, _read_cdd, @@ -19,18 +18,14 @@ _read_pmid, _read_rell, _read_sts, - commas, - first_pipe, - ll_start, - pipes, ) __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_ncbi_taxonomy.py python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_ncbi_taxonomy.py --- python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_ncbi_taxonomy.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_ncbi_taxonomy.py 2022-05-24 23:42:33.000000000 +0000 @@ -17,10 +17,10 @@ __author__ = "Jason Carnes" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Jason Carnes", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -102,7 +102,7 @@ node = NcbiTaxon(good) self.assertEqual(str(node), good) root = """1\t|\t1\t|\tno rank\t|\t\t|\t8\t|\t0\t|\t1\t|\t0\t|\t0\t|\t0\t|\t0\t|\t0\t|\t\t|""" - root_node = NcbiTaxon(root) + NcbiTaxon(root) self.assertEqual(str(root), root) def test_bad_input(self): diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_nexus.py python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_nexus.py --- python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_nexus.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_nexus.py 2022-05-24 23:42:33.000000000 +0000 @@ -19,10 +19,10 @@ __author__ = "Catherine Lozupone" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Catherine Lozupone", "Rob Knight", "Micah Hamady"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Catherine Lozupone" __email__ = "lozupone@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_pamlmatrix.py python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_pamlmatrix.py --- python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_pamlmatrix.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_pamlmatrix.py 2022-05-24 23:42:33.000000000 +0000 @@ -7,10 +7,10 @@ __author__ = "Matthew Wakefield" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Matthew Wakefield"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Matthew Wakefield" __email__ = "wakefield@wehi.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_phylip.py python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_phylip.py --- python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_phylip.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_phylip.py 2022-05-24 23:42:33.000000000 +0000 @@ -6,14 +6,13 @@ from unittest import TestCase, main from cogent3.parse.phylip import MinimalPhylipParser, get_align_for_phylip -from cogent3.parse.record import RecordError __author__ = "Micah Hamady" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Micah Hamady", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Micah Hamady" __email__ = "hamady@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_psl.py python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_psl.py --- python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_psl.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_psl.py 2022-05-24 23:42:33.000000000 +0000 @@ -9,10 +9,10 @@ __author__ = "Gavin Huttley, Anuj Pahwa" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight", "Peter Maxwell", "Gavin Huttley", "Anuj Pahwa"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Development" @@ -48,13 +48,13 @@ ] infile = open(fname) parser = MinimalPslParser(infile) - version = next(parser) + next(parser) header = next(parser) infile.close() self.assertEqual(header, expect) def test_psl_to_table(self): - table = PslToTable(fname) + PslToTable(fname) def test_getting_seq_coords(self): """get correct sequence coordinates to produce a trimmed sequence""" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_pwm_parsers.py python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_pwm_parsers.py --- python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_pwm_parsers.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_pwm_parsers.py 2022-05-24 23:42:33.000000000 +0000 @@ -11,7 +11,7 @@ __copyright__ = "Copyright 2007-2012, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Alpha" @@ -24,7 +24,7 @@ mid, pwm = jaspar.read(path) assert mid == ["PSSMid", "HGNCsymbol"], "ID line wrong" # note state indices are ordered by moltype - base_order = list(get_moltype("dna")) + list(get_moltype("dna")) expect = [ [35, 374, 30, 121, 6, 121, 33], [0, 10, 0, 0, 3, 2, 44], diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_rdb.py python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_rdb.py --- python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_rdb.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_rdb.py 2022-05-24 23:42:33.000000000 +0000 @@ -17,10 +17,10 @@ __author__ = "Sandra Smit" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Sandra Smit", "Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Sandra Smit" __email__ = "sandra.smit@colorado.edu" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_record_finder.py python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_record_finder.py --- python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_record_finder.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_record_finder.py 2022-05-24 23:42:33.000000000 +0000 @@ -14,10 +14,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight", "Zongzhi Liu"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_record.py python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_record.py --- python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_record.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_record.py 2022-05-24 23:42:33.000000000 +0000 @@ -12,7 +12,6 @@ Grouper, LineOrientedConstructor, MappedRecord, - RecordError, StrictFieldWrapper, TypeSetter, bool_setter, @@ -21,16 +20,15 @@ list_adder, list_extender, raise_unknown_field, - str_setter, string_and_strip, ) __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_tinyseq.py python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_tinyseq.py --- python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_tinyseq.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_tinyseq.py 2022-05-24 23:42:33.000000000 +0000 @@ -1,17 +1,16 @@ #!/usr/bin/env python import xml.dom.minidom -from io import StringIO from unittest import TestCase, main from cogent3.parse.tinyseq import TinyseqParser __author__ = "Matthew Wakefield" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Matthew Wakefield"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Matthew Wakefield" __email__ = "wakefield@wehi.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_tree.py python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_tree.py --- python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_tree.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_tree.py 2022-05-24 23:42:33.000000000 +0000 @@ -16,10 +16,10 @@ # return parse_string(data, constructor) __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight", "Peter Maxwell", "Daniel McDonald"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_unigene.py python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_unigene.py --- python-cogent-2021.10.12a1+dfsg/tests/test_parse/test_unigene.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_parse/test_unigene.py 2022-05-24 23:42:33.000000000 +0000 @@ -15,10 +15,10 @@ __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_phylo.py python-cogent-2022.5.25a1+dfsg/tests/test_phylo.py --- python-cogent-2021.10.12a1+dfsg/tests/test_phylo.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_phylo.py 2022-05-24 23:42:33.000000000 +0000 @@ -19,14 +19,14 @@ WeightedTreeCollection, make_trees, ) -from cogent3.util.misc import remove_files +from cogent3.util.io import remove_files warnings.filterwarnings("ignore", "Not using MPI as mpi4py not found") __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Peter Maxwell", "Gavin Huttley", @@ -35,7 +35,7 @@ "Ben Kaehler", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_recalculation.py python-cogent-2022.5.25a1+dfsg/tests/test_recalculation.py --- python-cogent-2021.10.12a1+dfsg/tests/test_recalculation.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_recalculation.py 2022-05-24 23:42:33.000000000 +0000 @@ -8,10 +8,10 @@ __author__ = "Peter Maxwell" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -159,7 +159,7 @@ # For likelihood functions it is more convenient to provide 'p' rather than # 'dropoff', dropoff = chdtri(1, p) / 2.0. Also in general you won't need ultra precise answers, # so don't use 'xtol=0.0', that's just to make the doctest work. - gz = pc.graphviz() + pc.graphviz() if __name__ == "__main__": diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_util/__init__.py python-cogent-2022.5.25a1+dfsg/tests/test_util/__init__.py --- python-cogent-2021.10.12a1+dfsg/tests/test_util/__init__.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_util/__init__.py 2022-05-24 23:42:33.000000000 +0000 @@ -9,7 +9,7 @@ ] __author__ = "" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Jeremy Widmann", "Sandra Smit", @@ -20,7 +20,7 @@ "Greg Caporaso", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_util/test_deserialise.py python-cogent-2022.5.25a1+dfsg/tests/test_util/test_deserialise.py --- python-cogent-2021.10.12a1+dfsg/tests/test_util/test_deserialise.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_util/test_deserialise.py 2022-05-24 23:42:33.000000000 +0000 @@ -25,10 +25,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -233,7 +233,7 @@ edge_vals = zip(aln.names, (2, 3, 4)) for edge, val in edge_vals: lf.set_param_rule("kappa", edge=edge, init=val) - result = model_result(name="test") + result = model_result(name="test", source="blah") result[1] = lf self.assertIs(result[1], lf) self.assertEqual(result.nfp, lf.nfp) @@ -449,8 +449,26 @@ got = deserialise_object(jdata) self.assertEqual(got, data) - def test_deserialise_likelihood_function(self): + def test_deserialise_likelihood_function1(self): """correctly deserialise data into likelihood function""" + # tests single alignment + aln = load_aligned_seqs( + filename=os.path.join(os.getcwd(), "data", "brca1_5.paml") + ) + tree = make_tree(tip_names=aln.names) + model = get_model("HKY85") + lf = model.make_likelihood_function(tree) + lf.set_alignment(aln) + lf_rich_dict = lf.to_rich_dict() + got = deserialise_likelihood_function(lf_rich_dict) + self.assertEqual(str(lf.defn_for["mprobs"]), str(got.defn_for["mprobs"])) + self.assertEqual( + str(lf.defn_for["alignment"].assignments), + str(got.defn_for["alignment"].assignments), + ) + + def test_deserialise_likelihood_function_multilocus(self): + """correctly deserialise data of multilocus likelihood function""" # tests multiple alignments data = load_aligned_seqs( filename=os.path.join(os.getcwd(), "data", "brca1_5.paml") @@ -461,7 +479,7 @@ loci_names = ["1st-half", "2nd-half"] loci = [aln1, aln2] tree = make_tree(tip_names=data.names) - model = get_model("HKY85") + model = get_model("HKY85", optimise_motif_probs=True) lf = model.make_likelihood_function(tree, loci=loci_names) lf.set_alignment(loci) lf_rich_dict = lf.to_rich_dict() @@ -471,10 +489,8 @@ str(lf.defn_for["alignment"].assignments), str(got.defn_for["alignment"].assignments), ) - # tests single alignment - model = get_model("HKY85") - lf = model.make_likelihood_function(tree) - lf.set_alignment(aln1) + # now constrain mprobs to be the same + lf.set_param_rule("mprobs", is_independent=False) lf_rich_dict = lf.to_rich_dict() got = deserialise_likelihood_function(lf_rich_dict) self.assertEqual(str(lf.defn_for["mprobs"]), str(got.defn_for["mprobs"])) @@ -483,6 +499,28 @@ str(got.defn_for["alignment"].assignments), ) + def test_custom_deserialiser(self): + """correctly registers a function to inflate a custom object""" + from cogent3.util.deserialise import register_deserialiser + + @register_deserialiser("myfunkydata") + def astuple(data): + data.pop("type") + return tuple(data["data"]) + + orig = {"type": "myfunkydata", "data": (1, 2, 3)} + txt = json.dumps(orig) + got = deserialise_object(txt) + self.assertEqual(got, (1, 2, 3)) + self.assertIsInstance(got, tuple) + + with self.assertRaises(TypeError): + + @register_deserialiser + def astupled(data): + data.pop("type") + return tuple(data["data"]) + if __name__ == "__main__": main() diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_util/test_dictarray.py python-cogent-2022.5.25a1+dfsg/tests/test_util/test_dictarray.py --- python-cogent-2021.10.12a1+dfsg/tests/test_util/test_dictarray.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_util/test_dictarray.py 2022-05-24 23:42:33.000000000 +0000 @@ -22,10 +22,10 @@ __author__ = "Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -279,7 +279,6 @@ d1 = dict(a=dict(k=1, l=2, m=3), b=dict(k=4, l=5, m=6)) darr1 = DictArray(d1) d2 = darr1.to_dict() - darr2 = DictArray(d2) self.assertEqual(d1, d2) d3 = DictArray(d2) self.assertEqual(d1, d3) @@ -318,8 +317,9 @@ dict(a=0, b=35, c=45), b, ) + # exercising construction for data in data_types: - g = DictArray(data) + _ = DictArray(data) def test_getitem(self): """correctly slices""" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_util/test_io.py python-cogent-2022.5.25a1+dfsg/tests/test_util/test_io.py --- python-cogent-2021.10.12a1+dfsg/tests/test_util/test_io.py 1970-01-01 00:00:00.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_util/test_io.py 2022-05-24 23:42:33.000000000 +0000 @@ -0,0 +1,307 @@ +import bz2 +import gzip +import os +import pathlib +import tempfile +import zipfile + +from os import remove, rmdir +from os.path import exists +from tempfile import TemporaryDirectory +from unittest import TestCase, main + +from cogent3.util.io import ( + _path_relative_to_zip_parent, + atomic_write, + get_format_suffixes, + open_, + path_exists, + remove_files, +) + + +__author__ = "Gavin Huttley" +__copyright__ = "Copyright 2007-2022, The Cogent Project" +__credits__ = ["Gavin Huttley"] +__license__ = "BSD-3" +__version__ = "2022.5.25a1" +__maintainer__ = "Gavin Huttley" +__email__ = "Gavin.Huttley@anu.edu.au" +__status__ = "Production" + + +class AtomicWriteTests(TestCase): + """testing the atomic_write class.""" + + def test_does_not_write_if_exception(self): + """file does not exist if an exception raised before closing""" + # create temp file directory + with tempfile.TemporaryDirectory(".") as dirname: + dirname = pathlib.Path(dirname) + test_filepath = dirname / "Atomic_write_test" + with self.assertRaises(AssertionError): + with atomic_write(test_filepath, mode="w") as f: + f.write("abc") + raise AssertionError + self.assertFalse(test_filepath.exists()) + + def test_writes_compressed_formats(self): + """correctly writes / reads different compression formats""" + fpath = pathlib.Path("data/sample.tsv") + with open(fpath) as infile: + expect = infile.read() + + with tempfile.TemporaryDirectory(".") as dirname: + dirname = pathlib.Path(dirname) + for suffix in ["gz", "bz2", "zip"]: + outpath = dirname / f"{fpath.name}.{suffix}" + with atomic_write(outpath, mode="wt") as f: + f.write(expect) + + with open_(outpath) as infile: + got = infile.read() + + self.assertEqual(got, expect, msg=f"write failed for {suffix}") + + def test_rename(self): + """Renames file as expected""" + # create temp file directory + with tempfile.TemporaryDirectory(".") as dirname: + # create temp filepath + dirname = pathlib.Path(dirname) + test_filepath = dirname / "Atomic_write_test" + # touch the filepath so it exists + f = open(test_filepath, "w").close() + self.assertTrue(exists(test_filepath)) + # file should overwrite file if file already exists + with atomic_write(test_filepath, mode="w") as f: + f.write("abc") + + def test_atomic_write_noncontext(self): + """atomic write works as more regular file object""" + with TemporaryDirectory(dir=".") as dirname: + path = pathlib.Path(dirname) / "foo.txt" + zip_path = path.parent / f"{path.name}.zip" + aw = atomic_write(path, in_zip=zip_path, mode="w") + aw.write("some data") + aw.close() + with open_(zip_path) as ifile: + got = ifile.read() + self.assertEqual(got, "some data") + + def test_open_handles_bom(self): + """handle files with a byte order mark""" + with TemporaryDirectory(dir=".") as dirname: + # create the different file types + dirname = pathlib.Path(dirname) + + text = "some text" + + # plain text + textfile = dirname / "sample.txt" + textfile.write_text(text, encoding="utf-8-sig") + + # gzipped + gzip_file = dirname / "sample.txt.gz" + with gzip.open(gzip_file, "wt", encoding="utf-8-sig") as outfile: + outfile.write(text) + + # bzipped + bzip_file = dirname / "sample.txt.bz2" + with bz2.open(bzip_file, "wt", encoding="utf-8-sig") as outfile: + outfile.write(text) + + # zipped + zip_file = dirname / "sample.zip" + with zipfile.ZipFile(zip_file, "w") as outfile: + outfile.write(textfile, "sample.txt") + + for path in (bzip_file, gzip_file, textfile, zip_file): + with open_(path) as infile: + got = infile.read() + self.assertEqual(got, text, msg=f"failed reading {path}") + + def test_aw_zip_from_path(self): + """supports inferring zip archive name from path""" + with TemporaryDirectory(dir=".") as dirname: + path = pathlib.Path(dirname) / "foo.txt" + zip_path = path.parent / f"{path.name}.zip" + aw = atomic_write(zip_path, in_zip=True, mode="w") + aw.write("some data") + aw.close() + with open_(zip_path) as ifile: + got = ifile.read() + self.assertEqual(got, "some data") + + path = pathlib.Path(dirname) / "foo2.txt" + zip_path = path.parent / f"{path.name}.zip" + aw = atomic_write(path, in_zip=zip_path, mode="w") + aw.write("some data") + aw.close() + with open_(zip_path) as ifile: + got = ifile.read() + self.assertEqual(got, "some data") + + def test_expanduser(self): + """expands user correctly""" + # create temp file directory + home = pathlib.Path("~").expanduser() + with tempfile.TemporaryDirectory(dir=home) as dirname: + # create temp filepath + dirname = pathlib.Path(dirname) + test_filepath = dirname / "Atomic_write_test" + test_filepath = str(test_filepath).replace(str(home), "~") + with atomic_write(test_filepath, mode="w") as f: + f.write("abc") + + def test_path_relative_to_zip_parent(self): + """correctly generates member paths for a zip archive""" + zip_path = pathlib.Path("some/path/to/a/data.zip") + for member in ("data/member.txt", "member.txt", "a/b/c/member.txt"): + got = _path_relative_to_zip_parent(zip_path, pathlib.Path(member)) + self.assertEqual(got.parts[0], "data") + + +class UtilsTests(TestCase): + """Tests of individual functions in utils""" + + def setUp(self): + """ """ + self.files_to_remove = [] + self.dirs_to_remove = [] + + def tearDown(self): + """ """ + list(map(remove, self.files_to_remove)) + list(map(rmdir, self.dirs_to_remove)) + + def test_remove_files(self): + """Remove files functions as expected""" + # create list of temp file paths + test_filepaths = [ + tempfile.NamedTemporaryFile(prefix="remove_files_test").name + for i in range(5) + ] + + # try to remove them with remove_files and verify that an IOError is + # raises + self.assertRaises(OSError, remove_files, test_filepaths) + # now get no error when error_on_missing=False + remove_files(test_filepaths, error_on_missing=False) + + # touch one of the filepaths so it exists + open(test_filepaths[2], "w").close() + # check that an error is raised on trying to remove the files... + self.assertRaises(OSError, remove_files, test_filepaths) + # ... but that the existing file was still removed + self.assertFalse(exists(test_filepaths[2])) + + # touch one of the filepaths so it exists + open(test_filepaths[2], "w").close() + # no error is raised on trying to remove the files + # (although 4 don't exist)... + remove_files(test_filepaths, error_on_missing=False) + # ... and the existing file was removed + self.assertFalse(exists(test_filepaths[2])) + + def test_get_format_suffixes_returns_lower_case(self): + """should always return lower case""" + a, b = get_format_suffixes("suffixes.GZ") + self.assertTrue(a == None and b == "gz") + a, b = get_format_suffixes("suffixes.ABCD") + self.assertTrue(a == "abcd" and b == None) + a, b = get_format_suffixes("suffixes.ABCD.BZ2") + self.assertTrue(a == "abcd" and b == "bz2") + a, b = get_format_suffixes("suffixes.abcd.BZ2") + self.assertTrue(a == "abcd" and b == "bz2") + a, b = get_format_suffixes("suffixes.ABCD.bz2") + self.assertTrue(a == "abcd" and b == "bz2") + + def test_get_format_suffixes(self): + """correctly return suffixes for compressed etc.. formats""" + a, b = get_format_suffixes("no_suffixes") + self.assertTrue(a == b == None) + a, b = get_format_suffixes("suffixes.gz") + self.assertTrue(a == None and b == "gz") + a, b = get_format_suffixes("suffixes.abcd") + self.assertTrue(a == "abcd" and b == None) + a, b = get_format_suffixes("suffixes.abcd.bz2") + self.assertTrue(a == "abcd" and b == "bz2") + a, b = get_format_suffixes("suffixes.zip") + self.assertTrue(a == None and b == "zip") + + def test_get_format_suffixes_pathlib(self): + """correctly return suffixes for compressed etc.. formats from pathlib""" + Path = pathlib.Path + a, b = get_format_suffixes(Path("no_suffixes")) + self.assertTrue(a == b == None) + a, b = get_format_suffixes(Path("suffixes.gz")) + self.assertTrue(a == None and b == "gz") + a, b = get_format_suffixes(Path("suffixes.abcd")) + self.assertTrue(a == "abcd" and b == None) + a, b = get_format_suffixes(Path("suffixes.abcd.bz2")) + self.assertTrue(a == "abcd" and b == "bz2") + a, b = get_format_suffixes(Path("suffixes.zip")) + self.assertTrue(a == None and b == "zip") + + def test_path_exists(self): + """robustly identifies whether an object is a valid path and exists""" + self.assertFalse(path_exists({})) + self.assertFalse(path_exists("not an existing path")) + self.assertFalse(path_exists("(a,b,(c,d))")) + self.assertFalse(path_exists("(a:0.1,b:0.1,(c:0.1,d:0.1):0.1)")) + # works for a Path instance + p = pathlib.Path(__file__) + self.assertTrue(path_exists(p)) + # or string instance + self.assertTrue(path_exists(__file__)) + + def test_open_reads_zip(self): + """correctly reads a zip compressed file""" + with TemporaryDirectory(dir=".") as dirname: + text_path = os.path.join(dirname, "foo.txt") + with open(text_path, "w") as f: + f.write("any str") + + zip_path = os.path.join(dirname, "foo.zip") + with zipfile.ZipFile(zip_path, "w") as zip: + zip.write(text_path) + + with open_(zip_path) as got: + self.assertEqual(got.readline(), "any str") + + def test_open_writes_zip(self): + """correctly writes a zip compressed file""" + with TemporaryDirectory(dir=".") as dirname: + zip_path = pathlib.Path(dirname) / "foo.txt.zip" + + with open_(zip_path, "w") as f: + f.write("any str") + + with zipfile.ZipFile(zip_path, "r") as zip: + name = zip.namelist()[0] + got = zip.open(name).read() + self.assertEqual(got, b"any str") + + def test_open_zip_multi(self): + """zip with multiple records cannot be opened using open_""" + with TemporaryDirectory(dir=".") as dirname: + text_path1 = os.path.join(dirname, "foo.txt") + with open(text_path1, "w") as f: + f.write("any str") + + text_path2 = os.path.join(dirname, "bar.txt") + with open(text_path2, "w") as f: + f.write("any str") + + zip_path = os.path.join(dirname, "foo.zip") + with zipfile.ZipFile(zip_path, "w") as zip: + zip.write(text_path1) + zip.write(text_path2) + + with self.assertRaises(ValueError): + open_(zip_path) + + +if __name__ == "__main__": + main() diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_util/test_misc.py python-cogent-2022.5.25a1+dfsg/tests/test_util/test_misc.py --- python-cogent-2021.10.12a1+dfsg/tests/test_util/test_misc.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_util/test_misc.py 2022-05-24 23:42:33.000000000 +0000 @@ -2,17 +2,9 @@ """Unit tests for utility functions and classes. """ -import bz2 -import gzip -import os -import pathlib -import tempfile -import zipfile from copy import copy, deepcopy from os import remove, rmdir -from os.path import exists -from tempfile import TemporaryDirectory from unittest import TestCase, main from numpy.testing import assert_allclose @@ -29,14 +21,11 @@ MappedDict, MappedList, NestedSplitter, - _path_relative_to_zip_parent, add_lowercase, adjusted_gt_minprob, adjusted_within_bounds, - atomic_write, curry, extend_docstring_from, - get_format_suffixes, get_independent_coords, get_merged_by_value_coords, get_merged_overlapping_coords, @@ -50,16 +39,12 @@ iterable, list_flatten, not_list_tuple, - open_, - open_zip, - path_exists, recursive_flatten, - remove_files, ) __author__ = "Rob Knight" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = [ "Rob Knight", "Amanda Birmingham", @@ -69,7 +54,7 @@ "Daniel McDonald", ] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Production" @@ -284,35 +269,6 @@ self.assertRaises(KeyError, d, "c", 1) self.assertRaises(KeyError, d, "b", 3) - def test_remove_files(self): - """Remove files functions as expected""" - # create list of temp file paths - test_filepaths = [ - tempfile.NamedTemporaryFile(prefix="remove_files_test").name - for i in range(5) - ] - - # try to remove them with remove_files and verify that an IOError is - # raises - self.assertRaises(OSError, remove_files, test_filepaths) - # now get no error when error_on_missing=False - remove_files(test_filepaths, error_on_missing=False) - - # touch one of the filepaths so it exists - open(test_filepaths[2], "w").close() - # check that an error is raised on trying to remove the files... - self.assertRaises(OSError, remove_files, test_filepaths) - # ... but that the existing file was still removed - self.assertFalse(exists(test_filepaths[2])) - - # touch one of the filepaths so it exists - open(test_filepaths[2], "w").close() - # no error is raised on trying to remove the files - # (although 4 don't exist)... - remove_files(test_filepaths, error_on_missing=False) - # ... and the existing file was removed - self.assertFalse(exists(test_filepaths[2])) - def test_independent_spans(self): """get_independent_coords returns truly non-overlapping (decorated) spans""" # single span is returned @@ -438,46 +394,6 @@ data = [[20, 21, 0.11], [21, 22, 0.12], [22, 23, 0.13], [23, 24, 0.14]] self.assertEqual(get_merged_by_value_coords(data, digits=1), [[20, 24, 0.1]]) - def test_get_format_suffixes_returns_lower_case(self): - """should always return lower case""" - a, b = get_format_suffixes("suffixes.GZ") - self.assertTrue(a == None and b == "gz") - a, b = get_format_suffixes("suffixes.ABCD") - self.assertTrue(a == "abcd" and b == None) - a, b = get_format_suffixes("suffixes.ABCD.BZ2") - self.assertTrue(a == "abcd" and b == "bz2") - a, b = get_format_suffixes("suffixes.abcd.BZ2") - self.assertTrue(a == "abcd" and b == "bz2") - a, b = get_format_suffixes("suffixes.ABCD.bz2") - self.assertTrue(a == "abcd" and b == "bz2") - - def test_get_format_suffixes(self): - """correctly return suffixes for compressed etc.. formats""" - a, b = get_format_suffixes("no_suffixes") - self.assertTrue(a == b == None) - a, b = get_format_suffixes("suffixes.gz") - self.assertTrue(a == None and b == "gz") - a, b = get_format_suffixes("suffixes.abcd") - self.assertTrue(a == "abcd" and b == None) - a, b = get_format_suffixes("suffixes.abcd.bz2") - self.assertTrue(a == "abcd" and b == "bz2") - a, b = get_format_suffixes("suffixes.zip") - self.assertTrue(a == None and b == "zip") - - def test_get_format_suffixes_pathlib(self): - """correctly return suffixes for compressed etc.. formats from pathlib""" - Path = pathlib.Path - a, b = get_format_suffixes(Path("no_suffixes")) - self.assertTrue(a == b == None) - a, b = get_format_suffixes(Path("suffixes.gz")) - self.assertTrue(a == None and b == "gz") - a, b = get_format_suffixes(Path("suffixes.abcd")) - self.assertTrue(a == "abcd" and b == None) - a, b = get_format_suffixes(Path("suffixes.abcd.bz2")) - self.assertTrue(a == "abcd" and b == "bz2") - a, b = get_format_suffixes(Path("suffixes.zip")) - self.assertTrue(a == None and b == "zip") - def test_get_object_provenance(self): """correctly deduce object provenance""" result = get_object_provenance("abncd") @@ -545,64 +461,6 @@ for arg2, result in knowns: self.assertEqual(curry_test(arg2), result) - def test_path_exists(self): - """robustly identifies whether an object is a valid path and exists""" - self.assertFalse(path_exists({})) - self.assertFalse(path_exists("not an existing path")) - self.assertFalse(path_exists("(a,b,(c,d))")) - self.assertFalse(path_exists("(a:0.1,b:0.1,(c:0.1,d:0.1):0.1)")) - # works for a Path instance - p = pathlib.Path(__file__) - self.assertTrue(path_exists(p)) - # or string instance - self.assertTrue(path_exists(__file__)) - - def test_open_reads_zip(self): - """correctly reads a zip compressed file""" - with TemporaryDirectory(dir=".") as dirname: - text_path = os.path.join(dirname, "foo.txt") - with open(text_path, "w") as f: - f.write("any str") - - zip_path = os.path.join(dirname, "foo.zip") - with zipfile.ZipFile(zip_path, "w") as zip: - zip.write(text_path) - - with open_(zip_path) as got: - self.assertEqual(got.readline(), "any str") - - def test_open_writes_zip(self): - """correctly writes a zip compressed file""" - with TemporaryDirectory(dir=".") as dirname: - zip_path = pathlib.Path(dirname) / "foo.txt.zip" - - with open_(zip_path, "w") as f: - f.write("any str") - - with zipfile.ZipFile(zip_path, "r") as zip: - name = zip.namelist()[0] - got = zip.open(name).read() - self.assertEqual(got, b"any str") - - def test_open_zip_multi(self): - """zip with multiple records cannot be opened using open_""" - with TemporaryDirectory(dir=".") as dirname: - text_path1 = os.path.join(dirname, "foo.txt") - with open(text_path1, "w") as f: - f.write("any str") - - text_path2 = os.path.join(dirname, "bar.txt") - with open(text_path2, "w") as f: - f.write("any str") - - zip_path = os.path.join(dirname, "foo.zip") - with zipfile.ZipFile(zip_path, "w") as zip: - zip.write(text_path1) - zip.write(text_path2) - - with self.assertRaises(ValueError): - open_(zip_path) - def test_get_setting_from_environ(self): """correctly recovers environment variables""" import os @@ -637,138 +495,6 @@ os.environ.pop(env_name, None) -class AtomicWriteTests(TestCase): - """testing the atomic_write class.""" - - def test_does_not_write_if_exception(self): - """file does not exist if an exception raised before closing""" - # create temp file directory - with tempfile.TemporaryDirectory(".") as dirname: - dirname = pathlib.Path(dirname) - test_filepath = dirname / "Atomic_write_test" - with self.assertRaises(AssertionError): - with atomic_write(test_filepath, mode="w") as f: - f.write("abc") - raise AssertionError - self.assertFalse(test_filepath.exists()) - - def test_writes_compressed_formats(self): - """correctly writes / reads different compression formats""" - fpath = pathlib.Path("data/sample.tsv") - with open(fpath) as infile: - expect = infile.read() - - with tempfile.TemporaryDirectory(".") as dirname: - dirname = pathlib.Path(dirname) - for suffix in ["gz", "bz2", "zip"]: - outpath = dirname / f"{fpath.name}.{suffix}" - with atomic_write(outpath, mode="wt") as f: - f.write(expect) - - with open_(outpath) as infile: - got = infile.read() - - self.assertEqual(got, expect, msg=f"write failed for {suffix}") - - def test_rename(self): - """Renames file as expected""" - # create temp file directory - with tempfile.TemporaryDirectory(".") as dirname: - # create temp filepath - dirname = pathlib.Path(dirname) - test_filepath = dirname / "Atomic_write_test" - # touch the filepath so it exists - f = open(test_filepath, "w").close() - self.assertTrue(exists(test_filepath)) - # file should overwrite file if file already exists - with atomic_write(test_filepath, mode="w") as f: - f.write("abc") - - def test_atomic_write_noncontext(self): - """atomic write works as more regular file object""" - with TemporaryDirectory(dir=".") as dirname: - path = pathlib.Path(dirname) / "foo.txt" - zip_path = path.parent / f"{path.name}.zip" - aw = atomic_write(path, in_zip=zip_path, mode="w") - aw.write("some data") - aw.close() - with open_(zip_path) as ifile: - got = ifile.read() - self.assertEqual(got, "some data") - - def test_open_handles_bom(self): - """handle files with a byte order mark""" - with TemporaryDirectory(dir=".") as dirname: - # create the different file types - dirname = pathlib.Path(dirname) - - text = "some text" - - # plain text - textfile = dirname / "sample.txt" - textfile.write_text(text, encoding="utf-8-sig") - - # gzipped - gzip_file = dirname / "sample.txt.gz" - with gzip.open(gzip_file, "wt", encoding="utf-8-sig") as outfile: - outfile.write(text) - - # bzipped - bzip_file = dirname / "sample.txt.bz2" - with bz2.open(bzip_file, "wt", encoding="utf-8-sig") as outfile: - outfile.write(text) - - # zipped - zip_file = dirname / "sample.zip" - with zipfile.ZipFile(zip_file, "w") as outfile: - outfile.write(textfile, "sample.txt") - - for path in (bzip_file, gzip_file, textfile, zip_file): - with open_(path) as infile: - got = infile.read() - self.assertEqual(got, text, msg=f"failed reading {path}") - - def test_aw_zip_from_path(self): - """supports inferring zip archive name from path""" - with TemporaryDirectory(dir=".") as dirname: - path = pathlib.Path(dirname) / "foo.txt" - zip_path = path.parent / f"{path.name}.zip" - aw = atomic_write(zip_path, in_zip=True, mode="w") - aw.write("some data") - aw.close() - with open_(zip_path) as ifile: - got = ifile.read() - self.assertEqual(got, "some data") - - path = pathlib.Path(dirname) / "foo2.txt" - zip_path = path.parent / f"{path.name}.zip" - aw = atomic_write(path, in_zip=zip_path, mode="w") - aw.write("some data") - aw.close() - with open_(zip_path) as ifile: - got = ifile.read() - self.assertEqual(got, "some data") - - def test_expanduser(self): - """expands user correctly""" - # create temp file directory - home = pathlib.Path("~").expanduser() - with tempfile.TemporaryDirectory(dir=home) as dirname: - # create temp filepath - dirname = pathlib.Path(dirname) - test_filepath = dirname / "Atomic_write_test" - test_filepath = str(test_filepath).replace(str(home), "~") - with atomic_write(test_filepath, mode="w") as f: - f.write("abc") - - def test_path_relative_to_zip_parent(self): - """correctly generates member paths for a zip archive""" - zip_path = pathlib.Path("some/path/to/a/data.zip") - for member in ("data/member.txt", "member.txt", "a/b/c/member.txt"): - got = _path_relative_to_zip_parent(zip_path, pathlib.Path(member)) - self.assertEqual(got.parts[0], "data") - - class _my_dict(dict): """Used for testing subclass behavior of ClassChecker""" @@ -894,7 +620,7 @@ def test_init(self): """Delegator should init OK when data supplied""" - ls = _list_and_string([1, 2, 3], "abc") + _list_and_string([1, 2, 3], "abc") self.assertRaises(TypeError, _list_and_string, [123]) def test_getattr(self): diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_util/test_parallel.py python-cogent-2022.5.25a1+dfsg/tests/test_util/test_parallel.py --- python-cogent-2021.10.12a1+dfsg/tests/test_util/test_parallel.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_util/test_parallel.py 2022-05-24 23:42:33.000000000 +0000 @@ -10,10 +10,10 @@ __author__ = "Sheng Han Moses Koh" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley", "Sheng Han Moses Koh"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "Gavin.Huttley@anu.edu.au" __status__ = "Alpha" @@ -38,7 +38,7 @@ def test_create_processes(self): """Procressor pool should create multiple distingue processes""" max_worker_count = multiprocessing.cpu_count() - 1 - index = [index for index in range(max_worker_count)] + index = list(range(max_worker_count)) result = parallel.map(get_process_value, index, max_workers=None, use_mpi=False) result_processes = [v[0] for v in result] result_values = [v[1] for v in result] @@ -71,14 +71,6 @@ master_processes += 1 self.assertEqual(master_processes, 0) - @skipIf(sys.version_info[1] >= 7, "exception test for Python 3.6") - def test_is_master_process_version_exception(self): - """ - is_master_process() should throw an exception - for Python versions below 3.7 - """ - self.assertRaises(RuntimeError, parallel.is_master_process) - if __name__ == "__main__": main() diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_util/test_recode_alignment.py python-cogent-2022.5.25a1+dfsg/tests/test_util/test_recode_alignment.py --- python-cogent-2021.10.12a1+dfsg/tests/test_util/test_recode_alignment.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_util/test_recode_alignment.py 2022-05-24 23:42:33.000000000 +0000 @@ -16,10 +16,10 @@ __author__ = "Greg Caporaso" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Greg Caporaso"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Greg Caporaso" __email__ = "gregcaporaso@gmail.com" __status__ = "Beta" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_util/test_table.py python-cogent-2022.5.25a1+dfsg/tests/test_util/test_table.py --- python-cogent-2021.10.12a1+dfsg/tests/test_util/test_table.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_util/test_table.py 2022-05-24 23:42:33.000000000 +0000 @@ -2,7 +2,6 @@ """Unit tests for table. """ -import contextlib import json import os import pathlib @@ -17,14 +16,14 @@ from numpy import arange from numpy.testing import assert_equal -from cogent3 import load_table, make_table +from cogent3 import load_table, make_table, open_ from cogent3.format.table import ( formatted_array, get_continuation_tables_headers, is_html_markup, ) from cogent3.parse.table import FilteringParser -from cogent3.util.misc import get_object_provenance, open_ +from cogent3.util.misc import get_object_provenance from cogent3.util.table import ( Table, cast_str_to_array, @@ -41,10 +40,10 @@ TEST_ROOT = pathlib.Path(__file__).parent.parent __author__ = "Thomas La" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley", "Thomas La", "Christopher Bradley"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" @@ -1436,6 +1435,11 @@ data = table.columns.to_dict() self.assertEqual(data, dict(a=[0, 1], b=[2, 3], c=["abc", "efg"])) + def test_load_table_limit(self): + """limit argument to function works""" + t = load_table("data/sample.tsv", limit=2) + self.assertEqual(t.shape[0], 2) + def test_load_table_returns_static_columns(self): """for static data, load_table gives same dtypes for static_columns_type=True/False""" t = load_table("data/sample.tsv", sep="\t", static_column_types=False) @@ -1591,7 +1595,7 @@ t = make_table(self.t2_header, data=self.t2_rows) t = t[:, 0] # the next line was previously failing - g = t._get_repr_() + t._get_repr_() table = Table(header=["a", "b"], data=[[1, 2]]) table, _, unset_columns = table._get_repr_() @@ -1769,20 +1773,23 @@ r = cast_str_to_numeric(d) assert_equal(d, r) - for d_type in [numpy.int, numpy.complex, numpy.float64]: - d = d.astype(d_type) - r = cast_str_to_numeric(d) - self.assertIsInstance(r[0], type(d[0])) + with numpy.testing.suppress_warnings() as sup: + # we know that converting to real loses imaginary + sup.filter(numpy.ComplexWarning) + for d_type in [numpy.int64, numpy.complex128, numpy.float64]: + d = d.astype(d_type) + r = cast_str_to_numeric(d) + self.assertIsInstance(r[0], type(d[0])) d = d.astype(str) r = cast_str_to_numeric(d) self.assertIsInstance(r[0], numpy.float64) d = numpy.array(d, dtype="U") r = cast_str_to_numeric(d) - self.assertIsInstance(r[0], numpy.float) + self.assertIsInstance(r[0], numpy.float64) d = numpy.array(d, dtype="S") r = cast_str_to_numeric(d) - self.assertIsInstance(r[0], numpy.float) + self.assertIsInstance(r[0], numpy.float64) def test_cast_str_to_array(self): """handle processing string series""" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_util/test_transform.py python-cogent-2022.5.25a1+dfsg/tests/test_util/test_transform.py --- python-cogent-2021.10.12a1+dfsg/tests/test_util/test_transform.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_util/test_transform.py 2022-05-24 23:42:33.000000000 +0000 @@ -13,10 +13,10 @@ __author__ = "Sandra Smit" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Rob Knight", "Sandra Smit", "Zongzhi Liu"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Sandra Smit" __email__ = "sandra.smit@colorado.edu" __status__ = "Production" @@ -118,7 +118,6 @@ long_norm = lambda s, x, y: (s + 0.0) / max(len(x), len(y)) times_two = lambda s, x, y: 2 * s - empty = [] s1 = [1, 2, 3, 4, 5] s2 = [1, 3, 2, 4, 5] s3 = [1, 1, 1, 1, 1] diff -Nru python-cogent-2021.10.12a1+dfsg/tests/test_util/test_union_dict.py python-cogent-2022.5.25a1+dfsg/tests/test_util/test_union_dict.py --- python-cogent-2021.10.12a1+dfsg/tests/test_util/test_union_dict.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/test_util/test_union_dict.py 2022-05-24 23:42:33.000000000 +0000 @@ -8,10 +8,10 @@ __author__ = "Thomas La" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Gavin Huttley", "Thomas La"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tests/timetrial.py python-cogent-2022.5.25a1+dfsg/tests/timetrial.py --- python-cogent-2021.10.12a1+dfsg/tests/timetrial.py 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tests/timetrial.py 2022-05-24 23:42:33.000000000 +0000 @@ -12,10 +12,10 @@ __author__ = "Peter Maxwell and Gavin Huttley" -__copyright__ = "Copyright 2007-2021, The Cogent Project" +__copyright__ = "Copyright 2007-2022, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley", "Edward Lang"] __license__ = "BSD-3" -__version__ = "2021.10.12a1" +__version__ = "2022.5.25a1" __maintainer__ = "Gavin Huttley" __email__ = "gavin.huttley@anu.edu.au" __status__ = "Production" diff -Nru python-cogent-2021.10.12a1+dfsg/tox.ini python-cogent-2022.5.25a1+dfsg/tox.ini --- python-cogent-2021.10.12a1+dfsg/tox.ini 2021-10-12 00:17:34.000000000 +0000 +++ python-cogent-2022.5.25a1+dfsg/tox.ini 1970-01-01 00:00:00.000000000 +0000 @@ -1,58 +0,0 @@ -[tox] -envlist = py37, py38, py39, py37mpi, py38mpi, py39mpi -isolated_build = True - -[testenv] -passenv = * -changedir = tests -deps = py{37,38}: numba>0.48.0 - py39: numba>0.52 - chardet - numpy - tinydb - tqdm - click - pytest - scitrack - pandas - pytest-cov - py{37mpi,38mpi,39mpi}: mpi4py - -[testenv:py39] -basepython = python3.9 -commands = - pytest -x --junitxml=junit-{envname}.xml --cov-report xml --cov=cogent3 ./ --ignore=test_app_mpi.py - -[testenv:py38] -basepython = python3.8 -commands = - pytest -x --junitxml=junit-{envname}.xml --cov-report xml --cov=cogent3 ./ --ignore=test_app_mpi.py - -[testenv:py37] -basepython = python3.7 -commands = - pytest -x --junitxml=junit-{envname}.xml --cov-report xml --cov=cogent3 ./ --ignore=test_app_mpi.py - -[testenv:py37mpi] -basepython = python3.7 -whitelist_externals = mpiexec -commands = - mpiexec -n 1 {envpython} -m mpi4py.futures -m pytest -x --junitxml=junit-{envname}.xml --cov-report xml --cov=cogent3 test_app/test_app_mpi.py - -[testenv:py38mpi] -basepython = python3.8 -whitelist_externals = mpiexec -commands = - mpiexec -n 1 {envpython} -m mpi4py.futures -m pytest -x --junitxml=junit-{envname}.xml --cov-report xml --cov=cogent3 test_app/test_app_mpi.py - -[testenv:py39mpi] -basepython = python3.9 -whitelist_externals = mpiexec -commands = - mpiexec -n 1 {envpython} -m mpi4py.futures -m pytest -x --junitxml=junit-{envname}.xml --cov-report xml --cov=cogent3 test_app/test_app_mpi.py - -[gh-actions] -python = - 3.7: py37 - 3.8: py38 - 3.9: py39