diff -Nru tkp-3.1.1/alembic/env.py tkp-4.0/alembic/env.py --- tkp-3.1.1/alembic/env.py 1970-01-01 00:00:00.000000000 +0000 +++ tkp-4.0/alembic/env.py 2017-02-03 10:06:41.000000000 +0000 @@ -0,0 +1,84 @@ +from __future__ import with_statement +from alembic import context +from sqlalchemy import engine_from_config, pool +from logging.config import fileConfig +import os +import getpass +from tkp.db.model import Base + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +fileConfig(config.config_file_name) + +# add your model's MetaData object here +# for 'autogenerate' support +# from myapp import mymodel +# target_metadata = mymodel.Base.metadata +target_metadata = Base.metadata + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def get_url(): + user = getpass.getuser() + engine = os.environ.get('TKP_DBENGINE', 'postgresql') + name = os.environ.get('TKP_DBNAME', user) + user = os.environ.get('TKP_DBUSER', user) + password = os.environ.get('TKP_DBPASSWORD', user) + host = os.environ.get('TKP_DBHOST', 'localhost') + port = os.environ.get('TKP_DBPORT', '5432') + return '%s://%s:%s@%s:%s/%' % ( engine, user, password, host, port, name ) + + +def run_migrations_offline(): + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = get_url() + context.configure( + url=url, target_metadata=target_metadata, literal_binds=True) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online(): + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + connectable = engine_from_config( + config.get_section(config.config_ini_section), + prefix='sqlalchemy.', + poolclass=pool.NullPool) + + with connectable.connect() as connection: + context.configure( + connection=connection, + target_metadata=target_metadata + ) + + with context.begin_transaction(): + context.run_migrations() + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff -Nru tkp-3.1.1/alembic/README tkp-4.0/alembic/README --- tkp-3.1.1/alembic/README 1970-01-01 00:00:00.000000000 +0000 +++ tkp-4.0/alembic/README 2017-02-03 10:06:41.000000000 +0000 @@ -0,0 +1,31 @@ +To find out how to work with migrations read the docs + +https://alembic.readthedocs.io/en/latest/autogenerate.html + +you can work on a specific TKP database by settings these +environment variables: + + - TKP_DBENGINE + - TKP_DBNAME + - TKP_DBUSER + - TKP_DBPASSWORD + - TKP_DBHOST + - TKP_DBPORT + +This is similar to how we configure the test suite: + +http://tkp.readthedocs.io/en/latest/devref/procedures/testing.html#database + +annoying thing is that for now you also need to set the database config +in the alembic.ini file (sqlalchemy.url) + +basically to create a migration you should modify the model and then run: + +$ alembic revision --autogenerate -m "description" + +and then check the generated migration. + +An end user can then upgrade to the latest version using: + +$ alembic upgrade head + diff -Nru tkp-3.1.1/alembic/script.py.mako tkp-4.0/alembic/script.py.mako --- tkp-3.1.1/alembic/script.py.mako 1970-01-01 00:00:00.000000000 +0000 +++ tkp-4.0/alembic/script.py.mako 2017-02-03 10:06:41.000000000 +0000 @@ -0,0 +1,24 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" + +# revision identifiers, used by Alembic. +revision = ${repr(up_revision)} +down_revision = ${repr(down_revision)} +branch_labels = ${repr(branch_labels)} +depends_on = ${repr(depends_on)} + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +def upgrade(): + ${upgrades if upgrades else "pass"} + + +def downgrade(): + ${downgrades if downgrades else "pass"} diff -Nru tkp-3.1.1/alembic/versions/8f0577e411f0_40rc2.py tkp-4.0/alembic/versions/8f0577e411f0_40rc2.py --- tkp-3.1.1/alembic/versions/8f0577e411f0_40rc2.py 1970-01-01 00:00:00.000000000 +0000 +++ tkp-4.0/alembic/versions/8f0577e411f0_40rc2.py 2017-02-03 10:06:41.000000000 +0000 @@ -0,0 +1,43 @@ +"""upgrade to 4.0rc2 + +Revision ID: 8f0577e411f0 +Revises: e3b50f666fbb +Create Date: 2016-10-13 13:42:05.605723 + +""" + +# revision identifiers, used by Alembic. +revision = '8f0577e411f0' +down_revision = 'e3b50f666fbb' +branch_labels = None +depends_on = None + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + op.create_table('imagedata', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('image', sa.Integer(), nullable=False), + sa.Column('fits_header', sa.String(), nullable=True), + sa.Column('fits_data', sa.LargeBinary(), nullable=True), + sa.ForeignKeyConstraint(['image'], ['image.id'], ), + sa.PrimaryKeyConstraint('id') + ) + op.create_index(op.f('ix_imagedata_image'), 'imagedata', ['image'], unique=False) + op.add_column(u'frequencyband', sa.Column('dataset', sa.Integer(), nullable=False)) + op.create_index(op.f('ix_frequencyband_dataset'), 'frequencyband', ['dataset'], unique=False) + op.create_foreign_key(None, 'frequencyband', 'dataset', ['dataset'], ['id']) + op.drop_index('ix_varmetric_runcat', table_name='varmetric') + op.create_index(op.f('ix_varmetric_runcat'), 'varmetric', ['runcat'], unique=True) + + +def downgrade(): + op.drop_index(op.f('ix_varmetric_runcat'), table_name='varmetric') + op.create_index('ix_varmetric_runcat', 'varmetric', ['runcat'], unique=False) + op.drop_constraint(None, 'frequencyband', type_='foreignkey') + op.drop_index(op.f('ix_frequencyband_dataset'), table_name='frequencyband') + op.drop_column(u'frequencyband', 'dataset') + op.drop_index(op.f('ix_imagedata_image'), table_name='imagedata') + op.drop_table('imagedata') diff -Nru tkp-3.1.1/alembic/versions/e3b50f666fbb_the_first_alembic_revision.py tkp-4.0/alembic/versions/e3b50f666fbb_the_first_alembic_revision.py --- tkp-3.1.1/alembic/versions/e3b50f666fbb_the_first_alembic_revision.py 1970-01-01 00:00:00.000000000 +0000 +++ tkp-4.0/alembic/versions/e3b50f666fbb_the_first_alembic_revision.py 2017-02-03 10:06:41.000000000 +0000 @@ -0,0 +1,24 @@ +"""the first alembic revision + +Revision ID: e3b50f666fbb +Revises: +Create Date: 2016-05-30 11:32:22.585690 + +""" + +# revision identifiers, used by Alembic. +revision = 'e3b50f666fbb' +down_revision = None +branch_labels = None +depends_on = None + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + pass + + +def downgrade(): + pass diff -Nru tkp-3.1.1/alembic.ini tkp-4.0/alembic.ini --- tkp-3.1.1/alembic.ini 1970-01-01 00:00:00.000000000 +0000 +++ tkp-4.0/alembic.ini 2017-02-03 10:06:41.000000000 +0000 @@ -0,0 +1,67 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts +script_location = alembic + +# template used to generate migration files +# file_template = %%(rev)s_%%(slug)s + +# max length of characters to apply to the +# "slug" field +#truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; this defaults +# to alembic/versions. When using multiple version +# directories, initial revisions must be specified with --version-path +# version_locations = %(here)s/bar %(here)s/bat alembic/versions + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +sqlalchemy.url = postgresql://user:pass@localhost/dbname + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff -Nru tkp-3.1.1/CHANGES.md tkp-4.0/CHANGES.md --- tkp-3.1.1/CHANGES.md 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/CHANGES.md 2017-02-03 10:06:41.000000000 +0000 @@ -1,5 +1,135 @@ # Changelog ----------- + +##4.0 + +No changes since 4.0rc1 + +##4.0 release candidate 1 + +### RMS basd image quality check added for all images. + +A quality check of each image is now done based on either when ther the +image data contains nans, the background RMS of the image, rejected +if the RMS is outside of the user set range in job_params.: + + [persistence] + rms_est_max = 100 ; global maximum acceptable rms + rms_est_min = 0.0 ; global minimum acceptable rms + +or if after a number images, the image RMS is rms_est_sigma away from +the mean RMS of the last number of images, where the number is set in +job_params: + + [persistence] + rms_est_history = 100 ; how many images used for calculating rms histogram + +[#512]: https://github.com/transientskp/tkp/issues/512 + +### frequency band logic change + +the band determination logic has changed. Before all bands where split +into 1 MHz intervals and associated as such. With this release images +are put in the same band if their bandwidths overlap. + +We added an option to limit the bandwidth used for band association +([#492][]). Limiting the bandwidth for an image is done by +setting `bandwidth_max` in *job_params.cfg* under the +`persistence section`. E.g.:: + + [persistence] + bandwidth_max = 0.0 + +Setting the value to 0.0 will use the bandwidth defined in the image +headers, a non 0.0 value will override this value. + +[#492]: https://github.com/transientskp/tkp/issues/492 + + +### added streaming telescope support + +The internals of TraP have been rewritten to support streaming AARTFAAC +data ([#483][]). There is now a new section in the job_params.cfg file +with a mode setting. Setting this to batch will keep the old TraP +behavior, but setting mode to stream will enable the new behavior. +TraP will connect to a network port and process these images untill +terminated. +The hosts and ports where to connect to is controlled with the hosts +and ports settings:: + + [pipeline] + mode = 'stream' + hosts = 'struis.science.uva.nl,struis.science.uva.nl' + ports = '6666,6667' + +The batch mode should mostly be unaffected, only the order of actions +has changed. TraP will process the full dataset now in chunks grouped by +timstamp. Tthe storing of images, quality checks and meta data +extraction is now run together with the source extraction and assocation +cycle, where before this was all done at the start of a TraP run. +This makes it more similar to how we process streaming data and enabled +other optimisations in the future. + +[#483]: https://github.com/transientskp/tkp/pull/483 + + +### Removal of MongoDB image store + +If you enable the ``copy_images`` setting in your pipeline.cfg file +the images are now stored in the sql database ([#534][]). This makes it +much easier to manage the files, for example delete them. Also the +images load faster in banana. This makes setting up and configuring +MongoDB obsolete. + + +[#534]: https://github.com/transientskp/tkp/pull/534 + + +### Add command line option to delete dataset + +It is now possible to delete a dataset ([#533][]):: + + + $ trap-manage.py deldataset 5 -y + + dataset 5 has been deleted! + + +[#533]: https://github.com/transientskp/tkp/pull/533 + + +### Make TraP more resilient against faulty data + +TraP often crashed on faulty image data. On popular request TraP will +now try to continue, giving a warning. [#522][] + +[#522]: https://github.com/transientskp/tkp/issues/522 + + +### Various other changes and bugfixes + +* Fix Numpy 1.9+ compatibility [#509][]) +* TraP sourcefinder error on updated AARTFAAC images [#505][] +* forced fits is not parallelised [#526][] +* restructure logging, make less verbose. Also multiproc workers will + log to stdout. +* fix multiprocess job cancelling problem (ctrl-c) + +[#509]: https://github.com/transientskp/tkp/issues/509 +[#505]: https://github.com/transientskp/tkp/issues/505 +[#526]: https://github.com/transientskp/tkp/issues/526 + + +### known issues + +* Streaming mode gives a harmless error [#536][] +* Alembic upgrade is not working yet [#535][] + + +[#535]: https://github.com/transientskp/tkp/issues/535 +[#536]: https://github.com/transientskp/tkp/issues/536 + + ## R3.1.1 (2016-05-20) Adds a 'generic' (i.e. not telescope-specific) quality check for flat images, diff -Nru tkp-3.1.1/debian/changelog tkp-4.0/debian/changelog --- tkp-3.1.1/debian/changelog 2016-09-20 14:01:19.000000000 +0000 +++ tkp-4.0/debian/changelog 2017-02-03 10:14:37.000000000 +0000 @@ -1,3 +1,9 @@ +tkp (4.0-1) xenial; urgency=medium + + * new upstream release + + -- Gijs Molenaar (launchpad ppa build key) Fri, 03 Feb 2017 10:14:20 +0000 + tkp (3.1.1-1kern1) xenial; urgency=medium * add missing python-setuptools dependency diff -Nru tkp-3.1.1/debian/watch tkp-4.0/debian/watch --- tkp-3.1.1/debian/watch 1970-01-01 00:00:00.000000000 +0000 +++ tkp-4.0/debian/watch 2017-02-03 10:11:57.000000000 +0000 @@ -0,0 +1,3 @@ +version=3 +opts=filenamemangle=s/.+\/v?(\d\S+)\.tar\.gz/tkp-$1\.tar\.gz/ \ + https://github.com/transientskp/tkp/tags .*/r?(\d\S+)\.tar\.gz diff -Nru tkp-3.1.1/documentation/conf.py tkp-4.0/documentation/conf.py --- tkp-3.1.1/documentation/conf.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/documentation/conf.py 2017-02-03 10:06:41.000000000 +0000 @@ -80,10 +80,11 @@ # rst_prolog is included in all our rst files. Use it to define common # substitutions, like the database version number. -from tkp.db.database import DB_VERSION +from tkp.db.model import SCHEMA_VERSION + rst_prolog = """ .. |db_version| replace:: %d -""" % (DB_VERSION) +""" % (SCHEMA_VERSION) # The encoding of source files. #source_encoding = 'utf-8-sig' @@ -284,27 +285,25 @@ # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { 'python': ('http://docs.python.org/', None), -# 'numpy': ('http://docs.scipy.org/doc/numpy/', None), -# 'scipy': ('http://docs.scipy.org/doc/scipy/reference/', None), + 'astropy': ('http://docs.astropy.org/en/stable/', None), + 'sqlalchemy': ('http://docs.sqlalchemy.org/en/rel_1_0/', None), + 'casacore': ('http://casacore.github.io/python-casacore/', None), + 'numpy': ('http://docs.scipy.org/doc/numpy/', None), + 'scipy': ('http://docs.scipy.org/doc/scipy/reference/', None), } # Warn when references cannot be resolved: nitpicky = True nitpick_ignore = [ - ("py:obj", "numpy.ndarray"), - ("py:obj", "numpy.ma.MaskedArray"), ("py:obj", "dict/list/tuple"), ("py:obj", "3-tuple of float"), ("py:obj", "3-tuple"), - ("py:obj", "casacore.images.image"), - ("py:obj", "casacore.tables.table"), ("py:obj", "list of (RA, Dec) tuples"), ("py:obj", "list of tuples"), ("py:obj", "ExtractedSourceTuple"), ("py:obj", "list of MockSource"), ("py:obj", "casacore measure"), ("py:obj", "lambda"), - ("py:obj", "sqlalchemy.orm.Session"), # These result from incorrect docstrings, should really be fixed # but we'll just suppress the errors for now. ("py:obj", "-"), diff -Nru tkp-3.1.1/documentation/devref/tkp/distribute/index.rst tkp-4.0/documentation/devref/tkp/distribute/index.rst --- tkp-3.1.1/documentation/devref/tkp/distribute/index.rst 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/documentation/devref/tkp/distribute/index.rst 2017-02-03 10:06:41.000000000 +0000 @@ -4,3 +4,15 @@ .. automodule:: tkp.distribute :members: + :undoc-members: + :show-inheritance: + +.. automodule:: tkp.distribute.serial + :members: + :undoc-members: + :show-inheritance: + +.. automodule:: tkp.distribute.multiproc + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff -Nru tkp-3.1.1/documentation/devref/tkp/index.rst tkp-4.0/documentation/devref/tkp/index.rst --- tkp-3.1.1/documentation/devref/tkp/index.rst 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/documentation/devref/tkp/index.rst 2017-02-03 10:06:41.000000000 +0000 @@ -27,4 +27,4 @@ :maxdepth: 1 main - + stream diff -Nru tkp-3.1.1/documentation/devref/tkp/quality/index.rst tkp-4.0/documentation/devref/tkp/quality/index.rst --- tkp-3.1.1/documentation/devref/tkp/quality/index.rst 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/documentation/devref/tkp/quality/index.rst 2017-02-03 10:06:41.000000000 +0000 @@ -30,10 +30,3 @@ .. automodule:: tkp.quality.rms :members: - -:mod:`tkp.quality.statistics` -============================= - -.. automodule:: tkp.quality.statistics - :members: - diff -Nru tkp-3.1.1/documentation/devref/tkp/stream.rst tkp-4.0/documentation/devref/tkp/stream.rst --- tkp-3.1.1/documentation/devref/tkp/stream.rst 1970-01-01 00:00:00.000000000 +0000 +++ tkp-4.0/documentation/devref/tkp/stream.rst 2017-02-03 10:06:41.000000000 +0000 @@ -0,0 +1,10 @@ +++++++++++++++++++++++++++++++++++++++++++ +``tkp.stream`` -- Streaming telescope data +++++++++++++++++++++++++++++++++++++++++++ + +:mod:`tkp.stream` +========================== + +.. automodule:: tkp.stream + :members: + :undoc-members: \ No newline at end of file diff -Nru tkp-3.1.1/documentation/userref/config/job_params_cfg.rst tkp-4.0/documentation/userref/config/job_params_cfg.rst --- tkp-3.1.1/documentation/userref/config/job_params_cfg.rst 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/documentation/userref/config/job_params_cfg.rst 2017-02-03 10:06:41.000000000 +0000 @@ -40,6 +40,10 @@ Integer. Determines the size of the subsection used for RMS measurement: the central ``1/f`` of the image will be used (where f=rms_est_fraction). +``bandwidth_max`` + Float. Limit the maximum bandwidth used when determining if two images + belong to the same 'band' grouping. + .. _job_params_quality: ``quality_lofar`` Section diff -Nru tkp-3.1.1/documentation/userref/config/pipeline_cfg.rst tkp-4.0/documentation/userref/config/pipeline_cfg.rst --- tkp-3.1.1/documentation/userref/config/pipeline_cfg.rst 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/documentation/userref/config/pipeline_cfg.rst 2017-02-03 10:06:41.000000000 +0000 @@ -95,22 +95,12 @@ ``image_cache`` Section ======================= -This section configures the -:ref:`image caching or 'pixel store' ` functionality. +This section configures the image caching functionality. -See also: the 'optional dependencies' section of your relevant -:ref:`installation ` guide. ``copy_images`` Boolean. If ``True``, image pixel data will be stored to a MongoDB database. -``mongo_host``, ``mongo_port`` - String, integer. Network hostname and port to use to connect to MongoDB. - Only used if ``copy_images`` is ``True``. - -``mongo_db`` - String. Name of MongoDB database in which to store image pixel data. Only - used if ``copy_images`` is ``True``. .. _pipeline_cfg_parallelise: diff -Nru tkp-3.1.1/documentation/userref/structure/stages/persistence.rst tkp-4.0/documentation/userref/structure/stages/persistence.rst --- tkp-3.1.1/documentation/userref/structure/stages/persistence.rst 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/documentation/userref/structure/stages/persistence.rst 2017-02-03 10:06:41.000000000 +0000 @@ -12,9 +12,9 @@ between images in a single databset, for instance. All images being processed are added to the same dataset. -Optionally, a copy of the image pixel data may be stored to a :ref:`MongoDB -` instance at the same time. This is configured in -the :ref:`image_cache section ` of the pipeline config. +Optionally, a copy of the image pixel data may be stored to the database. This +is configured in the :ref:`image_cache section ` of +the pipeline config. Note that only images which meet the :ref:`data accessor ` requirements are stored in the database. Any other data provided to the diff -Nru tkp-3.1.1/.gitignore tkp-4.0/.gitignore --- tkp-3.1.1/.gitignore 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/.gitignore 2017-02-03 10:06:41.000000000 +0000 @@ -3,3 +3,5 @@ build dist/ tkp.egg-info/ +.virtualenv/ +.virtualenv3/ diff -Nru tkp-3.1.1/setup.py tkp-4.0/setup.py --- tkp-3.1.1/setup.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/setup.py 2017-02-03 10:06:41.000000000 +0000 @@ -1,6 +1,5 @@ #!/usr/bin/env python from setuptools import setup, find_packages -from tkp import __version__ as tkp_version install_requires = """ astropy @@ -10,14 +9,14 @@ python-casacore python-dateutil>=1.4.1 pytz - pywcs>=1.12 scipy>=0.7.0 sqlalchemy>=1.0.0 + alembic + monotonic """.split() extras_require = { - 'pixelstore': ['pymongo>=3.0'], - 'monetdb': ['python-monetdb>=11.11.11', 'sqlalchemy_monetdb>=0.9.1'], + 'monetdb': ['sqlalchemy_monetdb>=0.9.1'], } tkp_scripts = [ @@ -35,15 +34,15 @@ package_list = find_packages(where='.', exclude=['tests']) setup( - name = "tkp", - version = tkp_version, - packages = package_list, - scripts = tkp_scripts, + name="tkp", + version="4.0", + packages=package_list, + scripts=tkp_scripts, package_data=package_data, - description = "LOFAR Transients Key Project (TKP)", - author = "TKP Discovery WG", - author_email = "discovery@transientskp.org", - url = "http://docs.transientskp.org/", + description="LOFAR Transients Key Project (TKP)", + author="TKP Discovery WG", + author_email="discovery@transientskp.org", + url="http://docs.transientskp.org/", install_requires=install_requires, extras_require=extras_require ) diff -Nru tkp-3.1.1/tests/test_accessors/test_fitsblob.py tkp-4.0/tests/test_accessors/test_fitsblob.py --- tkp-3.1.1/tests/test_accessors/test_fitsblob.py 1970-01-01 00:00:00.000000000 +0000 +++ tkp-4.0/tests/test_accessors/test_fitsblob.py 2017-02-03 10:06:41.000000000 +0000 @@ -0,0 +1,26 @@ +""" +Try the in memory fits stream Accessor +""" + +import os +import unittest +from astropy.io.fits import open as fitsopen +from tkp.accessors import open as tkpopen +from tkp.testutil.data import DATAPATH +from tkp.testutil.decorators import requires_data +from tkp.accessors.fitsimageblob import FitsImageBlob + +FITS_FILE = os.path.join(DATAPATH, 'accessors/aartfaac.fits') + + +@requires_data(FITS_FILE) +class PyfitsFitsImage(unittest.TestCase): + + def setUp(self): + self.hudelist = fitsopen(FITS_FILE) + + def test_tkp_open(self): + accessor = tkpopen(FITS_FILE) + + def test_fits_blob_accessor(self): + accessor = FitsImageBlob(self.hudelist) diff -Nru tkp-3.1.1/tests/test_accessors/test_lofarcasatable.py tkp-4.0/tests/test_accessors/test_lofarcasatable.py --- tkp-3.1.1/tests/test_accessors/test_lofarcasatable.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tests/test_accessors/test_lofarcasatable.py 2017-02-03 10:06:41.000000000 +0000 @@ -80,5 +80,5 @@ def col(self, name): if name == 'START': return [100, 100, 200] elif name == 'END': return [150, 175, 300] - self.accessor.subtables = {'LOFAR_ORIGIN': MockOriginTable()} - self.assertEqual(self.accessor.parse_tautime(), 175) + subtables = {'LOFAR_ORIGIN': MockOriginTable()} + self.assertEqual(self.accessor.parse_tautime(subtables), 175) diff -Nru tkp-3.1.1/tests/test_accessors/test_pickle.py tkp-4.0/tests/test_accessors/test_pickle.py --- tkp-3.1.1/tests/test_accessors/test_pickle.py 1970-01-01 00:00:00.000000000 +0000 +++ tkp-4.0/tests/test_accessors/test_pickle.py 2017-02-03 10:06:41.000000000 +0000 @@ -0,0 +1,44 @@ +""" +Makes sure the accessors are pickleable +""" + +from os import path +import unittest +import cPickle +from astropy.io.fits import open as open_fits +from tkp.testutil.data import DATAPATH +from tkp.accessors.fitsimage import FitsImage +from tkp.accessors.fitsimageblob import FitsImageBlob +from tkp.accessors.aartfaaccasaimage import AartfaacCasaImage +from tkp.accessors.lofarcasaimage import LofarCasaImage + +AARTFAAC_FITS = path.join(DATAPATH, 'accessors/aartfaac.fits') +CASA_TABLE = path.join(DATAPATH, 'casatable/L55596_000TO009_skymodellsc_wmax6000_noise_mult10_cell40_npix512_wplanes215.img.restored.corr') +AARTFAAC_TABLE = path.join(DATAPATH, 'accessors/aartfaac.table') + + +class TestAccessorPickle(unittest.TestCase): + def test_fits_pickle(self): + accessor = FitsImage(AARTFAAC_FITS) + pickled = cPickle.dumps(accessor) + unpickled = cPickle.loads(pickled) + self.assertEqual(type(unpickled), type(accessor)) + + def test_fitsblob_pickle(self): + fits = open_fits(AARTFAAC_FITS) + accessor = FitsImageBlob(fits) + pickled = cPickle.dumps(accessor) + unpickled = cPickle.loads(pickled) + self.assertEqual(type(unpickled), type(accessor)) + + def test_lofar_casatable_pickle(self): + accessor = LofarCasaImage(CASA_TABLE) + pickled = cPickle.dumps(accessor) + unpickled = cPickle.loads(pickled) + self.assertEqual(type(unpickled), type(accessor)) + + def test_aartfaac_casatable_pickle(self): + accessor = AartfaacCasaImage(AARTFAAC_TABLE) + pickled = cPickle.dumps(accessor) + unpickled = cPickle.loads(pickled) + self.assertEqual(type(unpickled), type(accessor)) diff -Nru tkp-3.1.1/tests/test_database/test_alchemy.py tkp-4.0/tests/test_database/test_alchemy.py --- tkp-3.1.1/tests/test_database/test_alchemy.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tests/test_database/test_alchemy.py 2017-02-03 10:06:41.000000000 +0000 @@ -3,8 +3,9 @@ from datetime import datetime, timedelta import tkp.db +import tkp.db.alchemy.varmetric import tkp.db.model -import tkp.db.alchemy + from tkp.testutil.alchemy import gen_band, gen_dataset, gen_skyregion, \ gen_lightcurve @@ -13,8 +14,6 @@ logging.getLogger('sqlalchemy.engine').setLevel(logging.WARNING) - - class TestApi(unittest.TestCase): @classmethod def setUpClass(cls): @@ -26,95 +25,98 @@ # make 2 datasets with 2 lightcurves each. Lightcurves have different # band - band1 = gen_band(central=150**6) - band2 = gen_band(central=160**6) self.dataset1 = gen_dataset('sqlalchemy test') self.dataset2 = gen_dataset('sqlalchemy test') + d1_b1 = gen_band(dataset=self.dataset1, central=150**6) + d1_b2 = gen_band(dataset=self.dataset1, central=160**6) + d2_b1 = gen_band(dataset=self.dataset2, central=150**6) + d2_b2 = gen_band(dataset=self.dataset2, central=160**6) + skyregion1 = gen_skyregion(self.dataset1) skyregion2 = gen_skyregion(self.dataset2) - lightcurve1 = gen_lightcurve(band1, self.dataset1, skyregion1) - lightcurve2 = gen_lightcurve(band2, self.dataset1, skyregion1) - lightcurve3 = gen_lightcurve(band1, self.dataset2, skyregion2) - lightcurve4 = gen_lightcurve(band2, self.dataset2, skyregion2) + lightcurve1 = gen_lightcurve(d1_b1, self.dataset1, skyregion1) + lightcurve2 = gen_lightcurve(d1_b2, self.dataset1, skyregion1) + lightcurve3 = gen_lightcurve(d2_b1, self.dataset2, skyregion2) + lightcurve4 = gen_lightcurve(d2_b2, self.dataset2, skyregion2) db_objecsts = lightcurve1 + lightcurve2 + lightcurve3 + lightcurve4 self.session.add_all(db_objecsts) self.session.flush() self.session.commit() def test_last_assoc_timestamps(self): - q = tkp.db.alchemy._last_assoc_timestamps(self.session, self.dataset1) + q = tkp.db.alchemy.varmetric._last_assoc_timestamps(self.session, self.dataset1) r = self.session.query(q).all() self.assertEqual(len(r), 2) # we have two bands def test_last_assoc_per_band(self): - q = tkp.db.alchemy._last_assoc_per_band(self.session, self.dataset1) + q = tkp.db.alchemy.varmetric._last_assoc_per_band(self.session, self.dataset1) r = self.session.query(q).all() self.assertEqual(len(r), 2) # we have two bands def test_last_ts_fmax(self): - q = tkp.db.alchemy._last_ts_fmax(self.session, self.dataset1) + q = tkp.db.alchemy.varmetric._last_ts_fmax(self.session, self.dataset1) r = self.session.query(q).all()[0] self.assertEqual(r.max_flux, 0.01) def test_newsrc_trigger(self): - q = tkp.db.alchemy._newsrc_trigger(self.session, self.dataset1) + q = tkp.db.alchemy.varmetric._newsrc_trigger(self.session, self.dataset1) self.session.query(q).all() def test_combined(self): - q = tkp.db.alchemy._combined(self.session, self.dataset1) + q = tkp.db.alchemy.varmetric._combined(self.session, self.dataset1) r = list(self.session.query(q).all()[0]) r = [item for i, item in enumerate(r) if i not in (0, 5, 6, 10, 11, 16)] shouldbe = [1.0, 1.0, 1.0, 1.0, 1, 0.0, 0.0, None, None, 0.01, 0.01] self.assertEqual(r, shouldbe) def test_calculate_varmetric(self): - r = tkp.db.alchemy.calculate_varmetric(self.session, self.dataset1).all() + r = tkp.db.alchemy.varmetric.calculate_varmetric(self.session, self.dataset1).all() self.assertEqual(len(r), 2) def test_calculate_varmetric_region(self): """ Ra & Decl filtering """ - r = tkp.db.alchemy.calculate_varmetric(self.session, self.dataset1, - ra_range=(0, 2), - decl_range=(0, 2)).all() + r = tkp.db.alchemy.varmetric.calculate_varmetric(self.session, self.dataset1, + ra_range=(0, 2), + decl_range=(0, 2)).all() self.assertEqual(len(r), 2) - r = tkp.db.alchemy.calculate_varmetric(self.session, self.dataset1, - ra_range=(20, 22), - decl_range=(20, 22)).all() + r = tkp.db.alchemy.varmetric.calculate_varmetric(self.session, self.dataset1, + ra_range=(20, 22), + decl_range=(20, 22)).all() self.assertEqual(len(r), 0) def test_calculate_varmetric_cutoff(self): """ V_int & eta_int filtering """ - r = tkp.db.alchemy.calculate_varmetric(self.session, self.dataset1, - v_int_min=0, - eta_int_min=0).all() + r = tkp.db.alchemy.varmetric.calculate_varmetric(self.session, self.dataset1, + v_int_min=0, + eta_int_min=0).all() self.assertEqual(len(r), 2) - q = tkp.db.alchemy.calculate_varmetric(self.session, self.dataset1, - v_int_min=1000, - eta_int_min=0) + q = tkp.db.alchemy.varmetric.calculate_varmetric(self.session, self.dataset1, + v_int_min=1000, + eta_int_min=0) r = q.all() self.assertEqual(len(r), 0) - r = tkp.db.alchemy.calculate_varmetric(self.session, self.dataset1, - v_int_min=0, - eta_int_min=1000).all() + r = tkp.db.alchemy.varmetric.calculate_varmetric(self.session, self.dataset1, + v_int_min=0, + eta_int_min=1000).all() self.assertEqual(len(r), 0) def test_calculate_varmetric_newsource(self): """ Ra & Decl filtering """ - r = tkp.db.alchemy.calculate_varmetric(self.session, self.dataset1, - new_src_only=True).all() + r = tkp.db.alchemy.varmetric.calculate_varmetric(self.session, self.dataset1, + new_src_only=True).all() self.assertEqual(len(r), 2) def test_store_varmetric(self): - q = tkp.db.alchemy.store_varmetric(self.session, self.dataset1) + q = tkp.db.alchemy.varmetric.store_varmetric(self.session, self.dataset1) self.session.execute(q) r = self.session.query(tkp.db.model.Varmetric).\ join(tkp.db.model.Varmetric.runcat).\ diff -Nru tkp-3.1.1/tests/test_database/test_algorithms.py tkp-4.0/tests/test_database/test_algorithms.py --- tkp-3.1.1/tests/test_database/test_algorithms.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tests/test_database/test_algorithms.py 2017-02-03 10:06:41.000000000 +0000 @@ -2,6 +2,8 @@ from tkp.db.orm import DataSet, Image import tkp.db import tkp.db.database +from tkp.db.associations import associate_extracted_sources +from tkp.db.general import insert_extracted_sources from tkp.testutil.decorators import requires_database from tkp.testutil import db_subs from tkp.db.generic import columns_from_table @@ -10,6 +12,7 @@ deRuiter_r = 3.7 new_source_sigma_margin = 3 + class TestSourceAssociation(unittest.TestCase): @requires_database() def setUp(self): @@ -24,7 +27,6 @@ def tearDown(self): tkp.db.rollback() - def test_null_case_sequential(self): """test_null_case_sequential @@ -33,20 +35,15 @@ """ for im in self.im_params: - self.db_imgs.append(Image( data=im, dataset=self.dataset)) - self.db_imgs[-1].insert_extracted_sources([],'blind') - self.db_imgs[-1].associate_extracted_sources(deRuiter_r, - new_source_sigma_margin) + self.db_imgs.append(Image(data=im, dataset=self.dataset)) + insert_extracted_sources(self.db_imgs[-1]._id, [],'blind') + associate_extracted_sources(self.db_imgs[-1]._id, deRuiter_r, + new_source_sigma_margin) running_cat = columns_from_table(table="runningcatalog", - keywords="*", - where={"dataset":self.dataset.id}) + keywords="*", + where={"dataset":self.dataset.id}) self.assertEqual(len(running_cat), 0) -# def test_null_case_post_insert(self): -# for im in self.im_params: -# self.db_imgs.append( tkp.db.Image( data=im, dataset=self.dataset) ) -# pass - def test_only_first_epoch_source(self): """test_only_first_epoch_source @@ -56,33 +53,30 @@ - Check runcat and assocxtrsource are correct. """ - - first_epoch = True - extracted_source_ids=[] + extracted_source_ids = [] for im in self.im_params: - self.db_imgs.append( Image( data=im, dataset=self.dataset) ) - last_img =self.db_imgs[-1] + self.db_imgs.append(Image( data=im, dataset=self.dataset)) + last_img = self.db_imgs[-1] if first_epoch: - last_img.insert_extracted_sources( - [db_subs.example_extractedsource_tuple()],'blind') + insert_extracted_sources(last_img._id, + [db_subs.example_extractedsource_tuple()], 'blind') - last_img.associate_extracted_sources(deRuiter_r, - new_source_sigma_margin) + associate_extracted_sources(last_img._id, deRuiter_r, + new_source_sigma_margin) - #First, check the runcat has been updated correctly: + # First, check the runcat has been updated correctly running_cat = columns_from_table(table="runningcatalog", - keywords=['datapoints'], - where={"dataset":self.dataset.id}) + keywords=['datapoints'], + where={"dataset": self.dataset.id}) self.assertEqual(len(running_cat), 1) self.assertEqual(running_cat[0]['datapoints'], 1) last_img.update() last_img.update_sources() img_xtrsrc_ids = [src.id for src in last_img.sources] -# print "ImageID:", last_img.id -# print "Imgs sources:", img_xtrsrc_ids + if first_epoch: self.assertEqual(len(img_xtrsrc_ids),1) extracted_source_ids.extend(img_xtrsrc_ids) @@ -107,7 +101,6 @@ self.assertEqual(assocxtrsrcs_rows[0]['xtrsrc'], extracted_source_ids[0], "Runcat xtrsrc entry must match the only extracted source") - def test_single_fixed_source(self): """test_single_fixed_source @@ -119,12 +112,12 @@ fixed_src_runcat_id = None for img_idx, im in enumerate(self.im_params): - self.db_imgs.append( Image( data=im, dataset=self.dataset) ) - last_img =self.db_imgs[-1] - last_img.insert_extracted_sources( + self.db_imgs.append( Image(data=im, dataset=self.dataset)) + last_img = self.db_imgs[-1] + insert_extracted_sources(last_img._id, [db_subs.example_extractedsource_tuple()],'blind') - last_img.associate_extracted_sources(deRuiter_r, - new_source_sigma_margin) + associate_extracted_sources(last_img._id, deRuiter_r, + new_source_sigma_margin) running_cat = columns_from_table(table="runningcatalog", keywords=['id', 'datapoints'], @@ -132,7 +125,7 @@ self.assertEqual(len(running_cat), 1) self.assertEqual(running_cat[0]['datapoints'], img_idx+1) - #Check runcat ID does not change for a steady single source + # Check runcat ID does not change for a steady single source if img_idx == 0: fixed_src_runcat_id = running_cat[0]['id'] self.assertIsNotNone(fixed_src_runcat_id, "No runcat id assigned to source") diff -Nru tkp-3.1.1/tests/test_database/test_associations.py tkp-4.0/tests/test_database/test_associations.py --- tkp-3.1.1/tests/test_database/test_associations.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tests/test_database/test_associations.py 2017-02-03 10:06:41.000000000 +0000 @@ -11,6 +11,7 @@ from tkp.db.orm import DataSet import tkp.db.associations as assoc_subs from tkp.db.associations import associate_extracted_sources +from tkp.db.general import insert_extracted_sources from tkp.db.generic import columns_from_table, get_db_rows_as_dicts from tkp.testutil import db_subs from tkp.testutil.decorators import requires_database @@ -175,7 +176,7 @@ for im_param in im_params: image = tkp.db.Image(dataset=dataset, data=im_param) - image.insert_extracted_sources([extracted_source]) + insert_extracted_sources(image._id, [extracted_source]) # NB choice of De Ruiter radius is arbitrary. associate_extracted_sources(image.id, deRuiter_r=1.0) @@ -333,7 +334,7 @@ for im in im_list: image = tkp.db.Image(dataset=dataset, data=im) - image.insert_extracted_sources(srcs) + insert_extracted_sources(image._id, srcs) associate_extracted_sources(image.id, deRuiter_r=3.717) runcat = columns_from_table('runningcatalog', ['wm_ra'], @@ -367,7 +368,7 @@ for im in imgs_input_data: image = tkp.db.Image(dataset=dataset, data=im) - image.insert_extracted_sources([src]) + insert_extracted_sources(image._id, [src]) associate_extracted_sources(image.id, deRuiter_r=3.717) runcat = columns_from_table('runningcatalog', ['wm_ra'], @@ -395,7 +396,7 @@ for im in im_list: image = tkp.db.Image(dataset=dataset, data=im) - image.insert_extracted_sources(sources) + insert_extracted_sources(image._id, sources) associate_extracted_sources(image.id, deRuiter_r=3.717) runcat = columns_from_table('runningcatalog', ['wm_ra'], @@ -406,7 +407,6 @@ self.assertAlmostEqual(wm_ras[ctr], ra) -#@unittest.skip @requires_database() class TestMeridianOne2One(unittest.TestCase): """ @@ -441,7 +441,7 @@ source = db_subs.example_extractedsource_tuple(ra=0.0, dec=0.0) for im in im_list: image = tkp.db.Image(dataset=dataset, data=im) - image.insert_extracted_sources([source]) + insert_extracted_sources(image._id, [source]) associate_extracted_sources(image.id, deRuiter_r=3.717) runcat = columns_from_table('runningcatalog', ['wm_ra'], @@ -481,10 +481,10 @@ for ra, dec in zip(ras, decs)] image1 = tkp.db.Image(dataset=dataset, data=im1) - image1.insert_extracted_sources(im1_srcs) + insert_extracted_sources(image1._id, im1_srcs) associate_extracted_sources(image1.id, deRuiter_r=3.717) image2 = tkp.db.Image(dataset=dataset, data=im2) - image2.insert_extracted_sources(im2_srcs) + insert_extracted_sources(image2._id, im2_srcs) associate_extracted_sources(image2.id, deRuiter_r=3.717) runcat = columns_from_table('runningcatalog', ['wm_ra', 'wm_decl'], @@ -520,16 +520,15 @@ for idx, im in enumerate(im_params): im['centre_ra'] = 359.9 image = tkp.db.Image(dataset=dataset, data=im) - image.insert_extracted_sources([src_list[idx]]) + insert_extracted_sources(image._id, [src_list[idx]]) associate_extracted_sources(image.id, deRuiter_r=3.717) runcat = columns_from_table('runningcatalog', ['datapoints', 'wm_ra'], - where={'dataset':dataset.id}) -# print "***\nRESULTS:", runcat, "\n*****" + where={'dataset': dataset.id}) self.assertEqual(len(runcat), 1) self.assertEqual(runcat[0]['datapoints'], 3) - avg_ra = ((src0.ra+180)%360 + (src1.ra+180)%360 + (src2.ra+180)%360)/3 - 180 - #Ensure our Python calculation is wrapped to positive: - avg_ra = (avg_ra + 360.0)%360.0 + avg_ra = ((src0.ra+180) % 360 + (src1.ra+180) % 360 + (src2.ra+180) % 360)/3 - 180 + # Ensure our Python calculation is wrapped to positive + avg_ra = (avg_ra + 360.0) % 360.0 self.assertAlmostEqual(runcat[0]['wm_ra'], avg_ra) def TestMeridianCrossHighLowEdgeCase(self): @@ -551,16 +550,15 @@ for idx, im in enumerate(im_params): im['centre_ra'] = 359.9 image = tkp.db.Image(dataset=dataset, data=im) - image.insert_extracted_sources([src_list[idx]]) + insert_extracted_sources(image._id, [src_list[idx]]) associate_extracted_sources(image.id, deRuiter_r=3.717) runcat = columns_from_table( 'runningcatalog', ['datapoints', 'wm_ra'], - where={'dataset':dataset.id}) -# print "***\nRESULTS:", runcat, "\n*****" + where={'dataset': dataset.id}) self.assertEqual(len(runcat), 1) self.assertEqual(runcat[0]['datapoints'], 3) - avg_ra = ((src0.ra+180)%360 + (src1.ra+180)%360 + (src2.ra+180)%360)/3 - 180 - #Ensure our Python calculation is wrapped to positive: + avg_ra = ((src0.ra+180)%360 + (src1.ra+180) % 360 + (src2.ra+180) % 360)/3 - 180 + # Ensure our Python calculation is wrapped to positive avg_ra = (avg_ra + 360.0)%360.0 self.assertAlmostEqual(runcat[0]['wm_ra'], avg_ra) @@ -583,16 +581,15 @@ for idx, im in enumerate(im_params): im['centre_ra'] = 359.9 image = tkp.db.Image(dataset=dataset, data=im) - image.insert_extracted_sources([src_list[idx]]) + insert_extracted_sources(image._id, [src_list[idx]]) associate_extracted_sources(image.id, deRuiter_r=3.717) runcat = columns_from_table('runningcatalog', ['datapoints', 'wm_ra'], - where={'dataset':dataset.id}) -# print "***\nRESULTS:", runcat, "\n*****" + where={'dataset': dataset.id}) self.assertEqual(len(runcat), 1) self.assertEqual(runcat[0]['datapoints'], 3) avg_ra = (src0.ra + src1.ra +src2.ra)/3 - #Ensure our Python calculation is wrapped to positive: - avg_ra = (avg_ra + 360.0)%360.0 + # Ensure our Python calculation is wrapped to positive + avg_ra = (avg_ra + 360.0) % 360.0 self.assertAlmostEqual(runcat[0]['wm_ra'], avg_ra) def TestMeridianLowerEdgeCase(self): @@ -618,16 +615,15 @@ for idx, im in enumerate(im_params): im['centre_ra'] = 359.9 image = tkp.db.Image(dataset=dataset, data=im) - image.insert_extracted_sources([src_list[idx]]) + insert_extracted_sources(image._id, [src_list[idx]]) associate_extracted_sources(image.id, deRuiter_r=3.717) runcat = columns_from_table('runningcatalog', ['datapoints', 'wm_ra'], where={'dataset':dataset.id}) -# print "***\nRESULTS:", runcat, "\n*****" self.assertEqual(len(runcat), 1) self.assertEqual(runcat[0]['datapoints'], 3) avg_ra = (src0.ra + src1.ra +src2.ra)/3 - #Ensure our Python calculation is wrapped to positive: - avg_ra = (avg_ra + 360.0)%360.0 + # Ensure our Python calculation is wrapped to positive + avg_ra = (avg_ra + 360.0) % 360.0 self.assertAlmostEqual(runcat[0]['wm_ra'], avg_ra) def TestDeRuiterCalculation(self): @@ -637,12 +633,11 @@ im_params = db_subs.generate_timespaced_dbimages_data(n_images, centre_ra=10, centre_decl=0) - - #Note ra / ra_fit_err are in degrees. + # Note ra / ra_fit_err are in degrees. # ew_sys_err is in arcseconds, but we set it = 0 so doesn't matter. - #ra_fit_err cannot be zero or we get div by zero errors. - #Also, there is a hard limit on association radii: - #currently this defaults to 0.03 degrees== 108 arcseconds + # ra_fit_err cannot be zero or we get div by zero errors. + # Also, there is a hard limit on association radii: + # currently this defaults to 0.03 degrees== 108 arcseconds src0 = db_subs.example_extractedsource_tuple(ra=10.00, dec=0.0, error_radius=10.0, ew_sys_err=0.0, ns_sys_err=0.0) @@ -658,17 +653,17 @@ for idx in [0, 1]: image = tkp.db.Image(dataset=dataset, - data=im_params[idx]) - image.insert_extracted_sources([src_list[idx]]) - #Peform very loose association since we just want to store DR value. + data=im_params[idx]) + insert_extracted_sources(image._id, [src_list[idx]]) + # Perform very loose association since we just want to store DR value. associate_extracted_sources(image.id, deRuiter_r=100) runcat = columns_from_table('runningcatalog', ['id'], - where={'dataset':dataset.id}) -# print "***\nRESULTS:", runcat, "\n*****" + where={'dataset': dataset.id}) + self.assertEqual(len(runcat), 1) assoc = columns_from_table('assocxtrsource', ['r'], - where={'runcat':runcat[0]['id']}) -# print "Got assocs:", assoc + where={'runcat': runcat[0]['id']}) + self.assertEqual(len(assoc), 2) self.assertAlmostEqual(assoc[1]['r'], expected_DR_radius) @@ -688,7 +683,7 @@ for p in positions] image = tkp.db.Image(dataset=dataset, data=im_list[0]) - image.insert_extracted_sources(sources1) + insert_extracted_sources(image._id, sources1) associate_extracted_sources(image.id, deRuiter_r=3.717) # Now shift the positions to be associated in both RA and Dec, @@ -700,14 +695,14 @@ sources2 = [db_subs.example_extractedsource_tuple(ra=p[0], dec=p[1]) for p in positions] - + expected_dr1 = db_subs.deRuiter_radius(sources1[0], sources2[0]) expected_dr2 = db_subs.deRuiter_radius(sources1[1], sources2[1]) image = tkp.db.Image(dataset=dataset, data=im_list[1]) - image.insert_extracted_sources(sources2) + insert_extracted_sources(image._id, sources2) associate_extracted_sources(image.id, deRuiter_r=1e6) - + # Now inspect the contents of assocxtrsource: # Order results by runningcatalog id, then DR radius. query = """\ @@ -729,7 +724,7 @@ runcat = dr_result[0] xtrsrc = dr_result[1] dr_radius = dr_result[2] - + self.assertEqual(len(runcat), 4) # Ordered by image and position, since we cannot rely on the # generated ids by the db. @@ -740,7 +735,7 @@ self.assertAlmostEqual(dr_radius[1], 0) self.assertAlmostEqual(dr_radius[2], expected_dr1) self.assertAlmostEqual(dr_radius[3], expected_dr2) - + class TestOne2Many(unittest.TestCase): """ @@ -931,7 +926,6 @@ self.assertEqual(count[0][0], 0) - class TestMany2One(unittest.TestCase): """ These tests will check the many-to-1 source associations, i.e. one extractedsource @@ -1137,7 +1131,7 @@ def tearDown(self): """remove all stuff after the test has been run""" tkp.db.rollback() - + def insert_many_to_many_sources(self, dataset, im_params, image1_srcs, image2_srcs, dr_limit): diff -Nru tkp-3.1.1/tests/test_database/test_band.py tkp-4.0/tests/test_database/test_band.py --- tkp-3.1.1/tests/test_database/test_band.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tests/test_database/test_band.py 2017-02-03 10:06:41.000000000 +0000 @@ -1,13 +1,36 @@ import unittest -import datetime from tkp.testutil.decorators import requires_database from tkp.testutil import db_subs import tkp.db from tkp.db.orm import DataSet, Image -from tkp.db.database import Database from copy import copy + +def get_band_for_image(image): + """ + Returns the band number corresponding to a particular image + """ + return tkp.db.execute(""" + SELECT band + FROM image + WHERE image.id = %(id)s + """, {"id": image.id}).fetchone()[0] + + +def get_freq_for_image(image): + """ + Returns the stored frequency corresponding to a particular image + """ + return tkp.db.execute(""" + SELECT freq_central + FROM image + ,frequencyband + WHERE image.id = %(id)s + AND image.band = frequencyband.id + """, {"id": image.id}).fetchone()[0] + + class TestBand(unittest.TestCase): def setUp(self): @@ -25,14 +48,6 @@ # same band (unless they fall over a band boundary). See #4801. # Bands are 1 MHz wide and centred on the MHz. - def get_band_for_image(image): - # Returns the band number corresponding to a particular image. - return tkp.db.execute(""" - SELECT band - FROM image - WHERE image.id = %(id)s - """, {"id": image.id}).fetchone()[0] - data = copy(self.image_data) dataset1 = DataSet(data={'description': self._testMethodName}, database=self.database) @@ -47,16 +62,16 @@ image2 = Image(dataset=dataset1, data=data) self.assertEqual(get_band_for_image(image1), get_band_for_image(image2)) - # Another image at a frequency 1 MHz different should be in + # Another image at a frequency 10 MHz different should be in # a different band... - data['freq_eff'] = data['freq_eff'] - 1e6 + data['freq_eff'] -= 1e7 image3 = Image(dataset=dataset1, data=data) self.assertNotEqual(get_band_for_image(image1), get_band_for_image(image3)) - # ...even if it has a huge bandwidth. + # but in the same if the bandwidths overlap data['freq_bw'] *= 100 image4 = Image(dataset=dataset1, data=data) - self.assertNotEqual(get_band_for_image(image1), get_band_for_image(image4)) + self.assertEqual(get_band_for_image(image1), get_band_for_image(image4)) # Finally, this image should be in the same band, since it's at an only # slightly different frequency. @@ -70,35 +85,49 @@ """ Determine range of frequencies supported by DB schema. """ - - def get_freq_for_image(image): - # Returns the stored frequency corresponding to a particular image. - return tkp.db.execute(""" - SELECT freq_central - FROM image - ,frequencyband - WHERE image.id = %(id)s - AND image.band = frequencyband.id - """, {"id": image.id}).fetchone()[0] - dataset = DataSet(data={'description': self._testMethodName}, - database=self.database) + database=self.database) data = copy(self.image_data) data['freq_eff'] = 1e6 # 1MHz - data['freq_bw'] = 1e3 # 1KHz + data['freq_bw'] = 1e3 # 1KHz mhz_freq_image = Image(dataset=dataset, data=data) self.assertEqual(data['freq_eff'], get_freq_for_image(mhz_freq_image)) data['freq_eff'] = 100e9 # 100 GHz (e.g. CARMA) - data['freq_bw'] = 5e9 # 5GHz + data['freq_bw'] = 5e9 # 5GHz ghz_freq_image = Image(dataset=dataset, data=data) self.assertEqual(data['freq_eff'], get_freq_for_image(ghz_freq_image)) data['freq_eff'] = 5e15 # 5 PHz (e.g. UV obs) - data['freq_bw'] = 1e14 # 5GHz + data['freq_bw'] = 1e14 # 100 THz phz_freq_image = Image(dataset=dataset, data=data) self.assertEqual(data['freq_eff'], get_freq_for_image(phz_freq_image)) -if __name__ == "__main__": - unittest.main() + def test_max_bandwidth(self): + """ + Test if setting max bandwidth correctly affects the band of images + """ + data = copy(self.image_data) + dataset_data = {'description': self._testMethodName} + dataset = DataSet(data=dataset_data, database=self.database) + + data['freq_eff'] = 50e6 # 50 MHz + data['freq_bw'] = 2e6 # 2 MHz + data['freq_bw_max'] = 0.0 # no band association limiting + first_image = Image(dataset=dataset, data=data) + + # this image should be assigned the same band, since within bandwidth + data['freq_eff'] = 51e6 # 50 MHz + data['freq_bw'] = 2e6 # 2 MHz + data['freq_bw_max'] = 0.0 # no band association limiting + assocated_image = Image(dataset=dataset, data=data) + self.assertEqual(get_band_for_image(first_image), get_band_for_image(assocated_image)) + + # this image should *not* be assigned the same band, since bandwidth is + # limited + data['freq_eff'] = 47e6 # 50 MHz + data['freq_bw'] = 5e6 # 2 MHz + data['freq_bw_max'] = 0.5e5 # limit bandwith to 0.5 MHz + assocated_image = Image(dataset=dataset, data=data) + self.assertNotEqual(get_band_for_image(first_image), get_band_for_image(assocated_image)) diff -Nru tkp-3.1.1/tests/test_database/test_delete.py tkp-4.0/tests/test_database/test_delete.py --- tkp-3.1.1/tests/test_database/test_delete.py 1970-01-01 00:00:00.000000000 +0000 +++ tkp-4.0/tests/test_database/test_delete.py 2017-02-03 10:06:41.000000000 +0000 @@ -0,0 +1,53 @@ +import unittest +from tkp.db.database import Database +from tkp.testutil.alchemy import gen_dataset, gen_image, gen_band,\ + gen_skyregion, gen_runningcatalog, gen_extractedsource +from tkp.db.model import Image + +import logging + +logging.basicConfig() +logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO) + + +class TestDeleteDataset(unittest.TestCase): + def test_delete_dataset(self): + db = Database() + dataset = gen_dataset('delete test') + band = gen_band(dataset) + skyregion = gen_skyregion(dataset) + image = gen_image(band, dataset, skyregion) + extractedsource = gen_extractedsource(image) + runningcatalog = gen_runningcatalog(extractedsource, dataset) + db.session.add_all(( + dataset, band, skyregion, image, extractedsource, runningcatalog, + )) + db.session.flush() + + db.session.delete(dataset) + db.session.flush() + + images = db.session.query(Image).filter(Image.dataset==dataset).all() + self.assertEqual(len(images), 0) + +# this depends on store_in_db PR +""" +class TestDeleteImageData(unittest.TestCase): + def test_delete_dataset(self): + db = Database() + dataset = gen_dataset('delete test') + band = gen_band(dataset) + skyregion = gen_skyregion(dataset) + image = gen_image(band, dataset, skyregion) + image.fits_data = 'bigdata' + db.session.add_all((dataset, band, skyregion, image)) + db.session.flush() + + db.session.query(Image).\ + filter(Image.dataset==dataset).\ + update({Image.fits_data: None}) + db.session.flush() + + for i in db.session.query(Image).filter(Image.dataset==dataset).all(): + self.assertEqual(i.fits_data, None) +""" \ No newline at end of file diff -Nru tkp-3.1.1/tests/test_database/test_expire.py tkp-4.0/tests/test_database/test_expire.py --- tkp-3.1.1/tests/test_database/test_expire.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tests/test_database/test_expire.py 2017-02-03 10:06:41.000000000 +0000 @@ -40,8 +40,8 @@ make a dataset with 10 images with 2 ligthcurves + one empty image """ self.session = self.database.Session() - self.band = gen_band(central=150**6) self.dataset = gen_dataset('expiring runningcatalog test') + self.band = gen_band(dataset=self.dataset, central=150**6) skyregion = gen_skyregion(self.dataset) datapoints = 10 diff -Nru tkp-3.1.1/tests/test_database/test_fluxes.py tkp-4.0/tests/test_database/test_fluxes.py --- tkp-3.1.1/tests/test_database/test_fluxes.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tests/test_database/test_fluxes.py 2017-02-03 10:06:41.000000000 +0000 @@ -3,6 +3,7 @@ import tkp.db.general as dbgen from tkp.db.generic import get_db_rows_as_dicts, columns_from_table from tkp.db.associations import associate_extracted_sources +from tkp.db.general import insert_extracted_sources from tkp.testutil import db_subs from tkp.testutil import db_queries from tkp.testutil.decorators import requires_database @@ -37,7 +38,7 @@ for idx, im in enumerate(im_params): image = tkp.db.Image(database=self.database, dataset=dataset, data=im) - image.insert_extracted_sources([src_list[idx]]) + insert_extracted_sources(image._id, [src_list[idx]]) associate_extracted_sources(image.id, deRuiter_r=3.717) query = """\ @@ -526,7 +527,7 @@ for idx, im in enumerate(im_params): image = tkp.db.Image(database=self.database, dataset=dataset, data=im) - image.insert_extracted_sources([src_list[idx]]) + insert_extracted_sources(image._id, [src_list[idx]]) associate_extracted_sources(image.id, deRuiter_r=3.717) query = """\ diff -Nru tkp-3.1.1/tests/test_database/test_image_store.py tkp-4.0/tests/test_database/test_image_store.py --- tkp-3.1.1/tests/test_database/test_image_store.py 1970-01-01 00:00:00.000000000 +0000 +++ tkp-4.0/tests/test_database/test_image_store.py 2017-02-03 10:06:41.000000000 +0000 @@ -0,0 +1,32 @@ +from os import path +import unittest +import cPickle +from astropy.io import fits +from astropy.io.fits.header import Header +from tkp.db.image_store import store_fits +from tkp.db.model import Image +from tkp.db.database import Database +from tkp.testutil.data import DATAPATH +from tkp.testutil.alchemy import gen_image + +FITS_FILE = path.join(DATAPATH, 'accessors/aartfaac.fits') + + + +class TestImageStore(unittest.TestCase): + def setUp(self): + self.db = Database() + self.image = gen_image() + self.db.session.add(self.image) + self.db.session.flush() + + def test_image_store(self): + fits_object = fits.open(FITS_FILE) + expected_data = cPickle.dumps(fits_object[0].data) + expected_header = fits_object[0].header + store_fits([self.image], [expected_data], [str(expected_header)]) + fetched_image = self.db.session.query(Image).filter(Image.id==self.image.id).first() + returned_data = cPickle.loads(fetched_image.data.fits_data) + returned_header = Header.fromstring(fetched_image.data.fits_header) + self.assertTrue((returned_data, expected_data)) + self.assertEqual(returned_header, expected_header) diff -Nru tkp-3.1.1/tests/test_database/test_lightcurve.py tkp-4.0/tests/test_database/test_lightcurve.py --- tkp-3.1.1/tests/test_database/test_lightcurve.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tests/test_database/test_lightcurve.py 2017-02-03 10:06:41.000000000 +0000 @@ -1,17 +1,16 @@ -from operator import attrgetter, itemgetter -from collections import namedtuple - +from operator import attrgetter import unittest - import datetime from tkp.testutil.decorators import requires_database -from tkp.testutil import db_queries from tkp.testutil import db_subs from tkp.testutil import db_queries from tkp.db.orm import DataSet from tkp.db.orm import Image import tkp.db -from tkp.db.generic import get_db_rows_as_dicts, columns_from_table +from tkp.db.associations import associate_extracted_sources +from tkp.db.general import insert_extracted_sources +from tkp.db.general import lightcurve as ligtcurve_func +from tkp.db.generic import get_db_rows_as_dicts, columns_from_table class TestLightCurve(unittest.TestCase): @@ -58,9 +57,9 @@ ) lightcurves_sorted_by_ra[src_idx].append(src) img_sources.append(src) - image.insert_extracted_sources(img_sources) - image.associate_extracted_sources(deRuiter_r=3.7, - new_source_sigma_margin=3) + insert_extracted_sources(image._id, img_sources) + associate_extracted_sources(image._id, deRuiter_r=3.7, + new_source_sigma_margin=3) # updates the dataset and its set of images self.dataset.update() @@ -75,7 +74,7 @@ # and extract its light curve sources = self.dataset.images[-1].sources sources = sorted(sources, key=attrgetter('ra')) - lightcurve = sources[0].lightcurve() + lightcurve = ligtcurve_func(sources[0]._id) # check if the sources are associated in all images self.assertEqual(len(images), len(lightcurve)) diff -Nru tkp-3.1.1/tests/test_database/test_lightsurface.py tkp-4.0/tests/test_database/test_lightsurface.py --- tkp-3.1.1/tests/test_database/test_lightsurface.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tests/test_database/test_lightsurface.py 2017-02-03 10:06:41.000000000 +0000 @@ -6,6 +6,8 @@ from tkp.testutil.decorators import requires_database from tkp.db.orm import DataSet from tkp.db.orm import Image +from tkp.db.associations import associate_extracted_sources +from tkp.db.general import insert_extracted_sources import tkp.db from tkp.testutil import db_subs @@ -64,11 +66,11 @@ sources.append(source) # Insert the sources - image.insert_extracted_sources(sources) + insert_extracted_sources(image._id, sources) # Run the association for each list of source for an image - image.associate_extracted_sources(deRuiter_r=3.7, - new_source_sigma_margin=3) + associate_extracted_sources(image._id, deRuiter_r=3.7, + new_source_sigma_margin=3) # updates the dataset and its set of images self.dataset.update() diff -Nru tkp-3.1.1/tests/test_database/test_orm.py tkp-4.0/tests/test_database/test_orm.py --- tkp-3.1.1/tests/test_database/test_orm.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tests/test_database/test_orm.py 2017-02-03 10:06:41.000000000 +0000 @@ -9,6 +9,7 @@ from tkp.db.orm import ExtractedSource from tkp.testutil import db_subs from tkp.db.generic import columns_from_table +from tkp.db.general import insert_extracted_sources # We're cheating here: a unit test shouldn't really depend on an # external dependency like the database being up and running @@ -195,7 +196,7 @@ # Inserting a standard example extractedsource should be fine extracted_source = db_subs.example_extractedsource_tuple() - image.insert_extracted_sources([extracted_source]) + insert_extracted_sources(image._id, [extracted_source]) inserted = columns_from_table('extractedsource', where= {'image' : image.id}) self.assertEqual(len(inserted), 1) @@ -211,7 +212,7 @@ hdlr = logging.StreamHandler(iostream) logging.getLogger().addHandler(hdlr) - image.insert_extracted_sources([extracted_source]) + insert_extracted_sources(image._id, [extracted_source]) logging.getLogger().removeHandler(hdlr) # We want to be sure that the error has been appropriately logged. diff -Nru tkp-3.1.1/tests/test_database/test_reject.py tkp-4.0/tests/test_database/test_reject.py --- tkp-3.1.1/tests/test_database/test_reject.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tests/test_database/test_reject.py 2017-02-03 10:06:41.000000000 +0000 @@ -4,6 +4,8 @@ from tkp.testutil.decorators import requires_database import tkp.db.quality as db_quality from tkp.db.associations import associate_extracted_sources +from tkp.db.general import insert_extracted_sources + @requires_database() class TestRejection(unittest.TestCase): @@ -40,8 +42,8 @@ im_params in db_subs.generate_timespaced_dbimages_data(n_images) ] - # The first image is rejected for an arbitrary reason - # (for the sake of argument, we use an unacceptable RMS). + # The first image is rejected for an arbitrary reason + # (for the sake of argument, we use an unacceptable RMS). db_quality.reject( imageid=db_imgs[0].id, reason=db_quality.reject_reasons['rms'], @@ -54,7 +56,7 @@ # Since we rejected the first image, we only find a source in the # second. source = db_subs.example_extractedsource_tuple() - db_imgs[1].insert_extracted_sources([source]) + insert_extracted_sources(db_imgs[1]._id, [source]) # Standard source association procedure etc. associate_extracted_sources(db_imgs[1].id, deRuiter_r=3.7, diff -Nru tkp-3.1.1/tests/test_database/test_skyregion.py tkp-4.0/tests/test_database/test_skyregion.py --- tkp-3.1.1/tests/test_database/test_skyregion.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tests/test_database/test_skyregion.py 2017-02-03 10:06:41.000000000 +0000 @@ -4,6 +4,8 @@ from tkp.testutil import db_subs from tkp.testutil.decorators import requires_database, duration from tkp.db.generic import columns_from_table, get_db_rows_as_dicts +from tkp.db.associations import associate_extracted_sources +from tkp.db.general import insert_extracted_sources # Convenient default values deRuiter_r = 3.7 @@ -98,10 +100,10 @@ ra=im_params[0]['centre_ra'], dec=im_params[0]['centre_decl'],) - ##First image: + # First image image0 = tkp.db.Image(dataset=self.dataset, data=im_params[0]) - image0.insert_extracted_sources([src_in_img0]) - image0.associate_extracted_sources(deRuiter_r, new_source_sigma_margin) + insert_extracted_sources(image0._id, [src_in_img0]) + associate_extracted_sources(image0._id, deRuiter_r, new_source_sigma_margin) image0.update() runcats = columns_from_table('runningcatalog', @@ -159,11 +161,11 @@ dec=im_params[1]['centre_decl'] + im_params[1]['xtr_radius'] * 0.5) - ##First insert new sources in img1 and check association to parent field: - ## (This is always asserted without calculation, for efficiency) + # First insert new sources in img1 and check association to parent field + # This is always asserted without calculation, for efficiency image1 = tkp.db.Image(dataset=self.dataset, data=im_params[1]) - image1.insert_extracted_sources([src_in_imgs_0_1, src_in_img_1_only]) - image1.associate_extracted_sources(deRuiter_r, new_source_sigma_margin) + insert_extracted_sources(image1._id, [src_in_imgs_0_1, src_in_img_1_only]) + associate_extracted_sources(image1._id, deRuiter_r, new_source_sigma_margin) image1.update() runcats = columns_from_table('runningcatalog', @@ -219,13 +221,13 @@ src_b = src_a._replace(ra=src_a.ra + 1. / 60.) # 1 arcminute offset imgs = [] imgs.append(tkp.db.Image(dataset=self.dataset, data=im_params[idx])) - imgs[idx].insert_extracted_sources([src_a]) - imgs[idx].associate_extracted_sources(deRuiter_r, new_source_sigma_margin) + insert_extracted_sources(imgs[idx]._id, [src_a]) + associate_extracted_sources(imgs[idx]._id, deRuiter_r, new_source_sigma_margin) idx = 1 imgs.append(tkp.db.Image(dataset=self.dataset, data=im_params[idx])) - imgs[idx].insert_extracted_sources([src_a, src_b]) - imgs[idx].associate_extracted_sources(deRuiter_r, new_source_sigma_margin) + insert_extracted_sources(imgs[idx]._id, [src_a, src_b]) + associate_extracted_sources(imgs[idx]._id, deRuiter_r, new_source_sigma_margin) imgs[idx].update() runcats = columns_from_table('runningcatalog', where={'dataset':self.dataset.id}) @@ -267,8 +269,8 @@ dec=im_params[idx]['centre_decl']) imgs.append(tkp.db.Image(dataset=self.dataset, data=im_params[idx])) - imgs[idx].insert_extracted_sources([central_src]) - imgs[idx].associate_extracted_sources(deRuiter_r, new_source_sigma_margin) + insert_extracted_sources(imgs[idx]._id, [central_src]) + associate_extracted_sources(imgs[idx]._id, deRuiter_r, new_source_sigma_margin) runcats = columns_from_table('runningcatalog', where={'dataset':self.dataset.id}) @@ -317,16 +319,16 @@ imgs.append(tkp.db.Image(dataset=self.dataset, data=im_params[0])) imgs.append(tkp.db.Image(dataset=self.dataset, data=im_params[1])) - imgs[0].insert_extracted_sources([lower_steady_src, overlap_steady_src]) - imgs[0].associate_extracted_sources(deRuiter_r=0.1, - new_source_sigma_margin=new_source_sigma_margin) + insert_extracted_sources(imgs[0]._id, [lower_steady_src, overlap_steady_src]) + associate_extracted_sources(imgs[0]._id, deRuiter_r=0.1, + new_source_sigma_margin=new_source_sigma_margin) nd_posns = dbnd.get_nulldetections(imgs[0].id) self.assertEqual(len(nd_posns), 0) - imgs[1].insert_extracted_sources([upper_steady_src, overlap_steady_src, - overlap_transient]) - imgs[1].associate_extracted_sources(deRuiter_r=0.1, - new_source_sigma_margin=new_source_sigma_margin) + insert_extracted_sources(imgs[1]._id, [upper_steady_src, overlap_steady_src, + overlap_transient]) + associate_extracted_sources(imgs[1]._id, deRuiter_r=0.1, + new_source_sigma_margin=new_source_sigma_margin) nd_posns = dbnd.get_nulldetections(imgs[1].id) self.assertEqual(len(nd_posns), 0) @@ -377,16 +379,16 @@ imgs.append(tkp.db.Image(dataset=self.dataset, data=im_params[0])) imgs.append(tkp.db.Image(dataset=self.dataset, data=im_params[1])) - imgs[0].insert_extracted_sources([lower_steady_src, overlap_steady_src, - overlap_transient]) - imgs[0].associate_extracted_sources(deRuiter_r=0.1, - new_source_sigma_margin=new_source_sigma_margin) + insert_extracted_sources(imgs[0]._id, [lower_steady_src, overlap_steady_src, + overlap_transient]) + associate_extracted_sources(imgs[0]._id, deRuiter_r=0.1, + new_source_sigma_margin=new_source_sigma_margin) nd_posns = dbnd.get_nulldetections(imgs[0].id) self.assertEqual(len(nd_posns), 0) - imgs[1].insert_extracted_sources([upper_steady_src, overlap_steady_src]) - imgs[1].associate_extracted_sources(deRuiter_r=0.1, - new_source_sigma_margin=new_source_sigma_margin) + insert_extracted_sources(imgs[1]._id, [upper_steady_src, overlap_steady_src]) + associate_extracted_sources(imgs[1]._id, deRuiter_r=0.1, + new_source_sigma_margin=new_source_sigma_margin) #This time we don't expect to get an immediate transient detection, #but we *do* expect to get a null-source forced extraction request: nd_posns = dbnd.get_nulldetections(imgs[1].id) diff -Nru tkp-3.1.1/tests/test_database/test_sql/test_view.py tkp-4.0/tests/test_database/test_sql/test_view.py --- tkp-3.1.1/tests/test_database/test_sql/test_view.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tests/test_database/test_sql/test_view.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,98 +0,0 @@ -import unittest -import tkp.db -from tkp.testutil import db_subs - - -class TestAugmentedRunningcatalog(unittest.TestCase): - def setUp(self): - """ - create a fake transient. Taken from the transient test. - :return: - """ - self.database = tkp.db.Database() - self.dataset = tkp.db.DataSet(data={'description': - "Augmented Runningcatalog test"}, - database=self.database) - - self.n_images = 4 - self.new_source_sigma_margin = 3 - image_rms = 1e-3 - detection_thresh = 10 - - self.search_params = {'eta_min': 1, 'v_min': 0.1} - - self.barely_detectable_flux = 1.01 * image_rms * detection_thresh - self.reliably_detectable_flux = 1.01 * image_rms * (detection_thresh + - self.new_source_sigma_margin) - - # 1mJy image RMS, 10-sigma detection threshold = 10mJy threshold. - test_specific_img_params = {'rms_qc': image_rms, 'rms_min': image_rms, - 'rms_max': image_rms, - 'detection_thresh': detection_thresh} - - self.im_params = db_subs.generate_timespaced_dbimages_data( - self.n_images, **test_specific_img_params) - - im_params = self.im_params - src_tuple = db_subs.example_extractedsource_tuple(ra=im_params[0]['centre_ra'], - dec=im_params[0]['centre_decl'],) - transient_src = db_subs.MockSource( - template_extractedsource=src_tuple, - lightcurve={im_params[2]['taustart_ts']: - self.reliably_detectable_flux} - ) - - for img_pars in im_params: - db_subs.insert_image_and_simulated_sources(self.dataset, img_pars, - [transient_src], - self.new_source_sigma_margin) - - def tearDown(self): - tkp.db.rollback() - - def test_extra_columns(self): - query = """ - SELECT - v_int, eta_int, - sigma_rms_max, sigma_rms_min, - lightcurve_max, lightcurve_avg - FROM - augmented_runningcatalog - WHERE - dataset = %s - ORDER BY - id - """ % self.dataset.id - - cursor = tkp.db.execute(query) - rows = cursor.fetchall() - self.assertEqual(len(rows), 1) - v_int, eta_int, sigma_max, sigma_min, lightcurve_max, lightcurve_avg = rows[0] - self.assertAlmostEqual(v_int, 1.41421356237309) - self.assertAlmostEqual(eta_int, 344.7938) - self.assertAlmostEqual(sigma_max, 13.13) - self.assertAlmostEqual(sigma_min, 13.13) - self.assertAlmostEqual(lightcurve_max, 0.01313) - self.assertAlmostEqual(lightcurve_avg, 0.006565) - - @unittest.skip( - """ - This test fails when we mix the old "augmented runningcatalog" and - the new SQLAlchemy code. It's unclear why it's suddenly borked, but - since the relevant query is about to be reimplemented we skip it for - now and will debug the new version. - """) - def test_count(self): - """ - make sure the augmented view has a reasonable number of rows. - """ - n_runcats_qry = "select count(id) from runningcatalog" - n_runcat_flux_qry = "select count(id) from runningcatalog_flux" - n_in_view_qry = "select count(id) from augmented_runningcatalog" - - n_runcats = tkp.db.execute(n_runcats_qry).fetchall()[0][0] - n_runcat_flux = tkp.db.execute(n_runcat_flux_qry).fetchall()[0][0] - n_in_view = tkp.db.execute(n_in_view_qry).fetchall()[0][0] - - self.assertGreaterEqual(n_in_view, n_runcats) - self.assertGreaterEqual(n_runcat_flux, n_in_view) \ No newline at end of file diff -Nru tkp-3.1.1/tests/test_database/test_transients.py tkp-4.0/tests/test_database/test_transients.py --- tkp-3.1.1/tests/test_database/test_transients.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tests/test_database/test_transients.py 2017-02-03 10:06:41.000000000 +0000 @@ -1,9 +1,8 @@ import unittest from collections import defaultdict import tkp.db - -from tkp.db.generic import get_db_rows_as_dicts -from tkp.db import nulldetections +from tkp.db.associations import associate_extracted_sources +from tkp.db.general import insert_extracted_sources, frequency_bands, runcat_entries from tkp.testutil import db_subs from tkp.testutil.db_subs import (example_extractedsource_tuple, MockSource, @@ -15,6 +14,7 @@ # Convenient default values deRuiter_r = 3.7 + class TestSimplestCases(unittest.TestCase): """ Various basic test-cases of the transient-detection logic. @@ -121,26 +121,26 @@ self.assertEqual(len(transients), 1) newsrc_properties = transients[0] # Check that the bands for the images are the same as the transient's band - freq_bands = self.dataset.frequency_bands() + freq_bands = frequency_bands(self.dataset._id) self.assertEqual(len(freq_bands), 1) self.assertEqual(freq_bands[0], newsrc_properties['band']) - #Sanity check that the runcat is correctly matched - runcats = self.dataset.runcat_entries() + # Sanity check that the runcat is correctly matched + runcats = runcat_entries(self.dataset._id) self.assertEqual(len(runcats), 1) self.assertEqual(runcats[0]['runcat'], newsrc_properties['runcat_id']) - #Since it is a single-epoch source, variability indices default to 0: + # Since it is a single-epoch source, variability indices default to 0: self.assertEqual(newsrc_properties['v_int'],0) self.assertEqual(newsrc_properties['eta_int'],0) - #Bright 'new-source' / single-epoch transient; should have high sigmas: + # Bright 'new-source' / single-epoch transient; should have high sigmas: self.assertTrue( newsrc_properties['low_thresh_sigma']> self.new_source_sigma_margin) self.assertEqual(newsrc_properties['low_thresh_sigma'], newsrc_properties['high_thresh_sigma']) - #Check the correct trigger xtrsrc was identified: + # Check the correct trigger xtrsrc was identified: self.assertEqual(newsrc_properties['taustart_ts'], transient_src.lightcurve.keys()[0]) @@ -157,13 +157,10 @@ self.assertEqual(len(transients), 1) transient_properties = transients[0] - # - #And now we should have a non-zero variability value: - # print "\n",transient_properties - self.assertNotAlmostEqual(transient_properties['v_int'],0) - self.assertNotAlmostEqual(transient_properties['eta_int'],0) - + # And now we should have a non-zero variability value + self.assertNotAlmostEqual(transient_properties['v_int'], 0) + self.assertNotAlmostEqual(transient_properties['eta_int'], 0) def test_single_epoch_weak_transient(self): """ @@ -185,50 +182,45 @@ self.barely_detectable_flux} ) - inserted_sources = [] - for img_pars in im_params[:3]: - image, _,forced_fits = insert_image_and_simulated_sources( - self.dataset,img_pars,[transient_src], + image, _, forced_fits = insert_image_and_simulated_sources( + self.dataset, img_pars, [transient_src], self.new_source_sigma_margin) self.assertEqual(forced_fits, []) newsources = get_newsources_for_dataset(self.dataset.id) self.assertEqual(len(newsources), 0) transients = get_sources_filtered_by_final_variability( dataset_id=self.dataset.id, **self.search_params) - #No variability yet + # No variability yet self.assertEqual(len(transients), 0) - #Now, the final, empty image: + # Now, the final, empty image: image, blind_extractions, forced_fits = insert_image_and_simulated_sources( - self.dataset,im_params[3],[transient_src], + self.dataset, im_params[3], [transient_src], self.new_source_sigma_margin) - self.assertEqual(len(blind_extractions),0) + self.assertEqual(len(blind_extractions), 0) self.assertEqual(len(forced_fits), 1) - #No changes to newsource table + # No changes to newsource table newsources = get_newsources_for_dataset(self.dataset.id) self.assertEqual(len(newsources), 0) - #But it now has high variability + # But it now has high variability transients = get_sources_filtered_by_final_variability( dataset_id=self.dataset.id, **self.search_params) self.assertEqual(len(transients), 1) transient_properties = transients[0] # Check that the bands for the images are the same as the transient's band - freq_bands = self.dataset.frequency_bands() + freq_bands = frequency_bands(self.dataset._id) self.assertEqual(len(freq_bands), 1) self.assertEqual(freq_bands[0], transient_properties['band']) - #Sanity check that the runcat is correctly matched - runcats = self.dataset.runcat_entries() + # Sanity check that the runcat is correctly matched + runcats = runcat_entries(self.dataset._id) self.assertEqual(len(runcats), 1) self.assertEqual(runcats[0]['runcat'], transient_properties['runcat_id']) - - - def test_multi_epoch_source_flare_and_fade(self): """ A steady source (i.e. detected in first image) flares up, @@ -266,12 +258,12 @@ self.assertEqual(len(transients), 1) transient_properties = transients[0] # Check that the bands for the images are the same as the transient's band - freq_bands = self.dataset.frequency_bands() + freq_bands = frequency_bands(self.dataset._id) self.assertEqual(len(freq_bands), 1) self.assertEqual(freq_bands[0], transient_properties['band']) #Sanity check that the runcat is correctly matched - runcats = self.dataset.runcat_entries() + runcats = runcat_entries(self.dataset._id) self.assertEqual(len(runcats), 1) self.assertEqual(runcats[0]['runcat'], transient_properties['runcat_id']) @@ -398,8 +390,8 @@ xtr = bright_transient.simulate_extraction(img, extraction_type='blind') if xtr is not None: - img.insert_extracted_sources([xtr],'blind') - img.associate_extracted_sources(deRuiter_r, self.new_source_sigma_margin) + insert_extracted_sources(img._id, [xtr], 'blind') + associate_extracted_sources(img._id, deRuiter_r, self.new_source_sigma_margin) newsources = get_newsources_for_dataset(self.dataset.id) #Should have one 'definite' transient @@ -446,29 +438,27 @@ lightcurve={img_params[1]['taustart_ts'] : marginal_transient_flux} ) - - #First, check that we've set up the test correctly: + # First, check that we've set up the test correctly rms_min0 = img_params[0]['rms_min'] rms_max0 = img_params[0]['rms_max'] det0 = img_params[0]['detection_thresh'] self.assertTrue(marginal_transient_flux < - rms_max0*(det0 + self.new_source_sigma_margin ) ) + rms_max0 * (det0 + self.new_source_sigma_margin)) self.assertTrue(marginal_transient_flux > - rms_min0*(det0 + self.new_source_sigma_margin ) ) + rms_min0 * (det0 + self.new_source_sigma_margin)) for pars in self.img_params: - img = tkp.db.Image(data=pars,dataset=self.dataset) + img = tkp.db.Image(data=pars, dataset=self.dataset) xtr = marginal_transient.simulate_extraction(img, - extraction_type='blind') + extraction_type='blind') if xtr is not None: - img.insert_extracted_sources([xtr],'blind') - img.associate_extracted_sources(deRuiter_r, self.new_source_sigma_margin) - + insert_extracted_sources(img._id, [xtr], 'blind') + associate_extracted_sources(img._id, deRuiter_r, self.new_source_sigma_margin) newsources = get_newsources_for_dataset(self.dataset.id) - #Should have one 'possible' transient - self.assertEqual(len(newsources),1) + # Should have one 'possible' transient + self.assertEqual(len(newsources), 1) self.assertTrue( newsources[0]['low_thresh_sigma'] > self.new_source_sigma_margin) self.assertTrue( @@ -502,23 +492,23 @@ lightcurve=defaultdict(lambda :marginal_steady_src_flux) ) - #First, check that we've set up the test correctly: + # First, check that we've set up the test correctly rms_min0 = img_params[0]['rms_min'] det0 = img_params[0]['detection_thresh'] self.assertTrue(marginal_steady_src_flux < - rms_min0*(det0 + self.new_source_sigma_margin ) ) + rms_min0 * (det0 + self.new_source_sigma_margin)) - #Insert first image, no sources. - tkp.db.Image(data=img_params[0],dataset=self.dataset) - #Now set up second image. - img1 = tkp.db.Image(data=img_params[1],dataset=self.dataset) + # Insert first image, no sources. + tkp.db.Image(data=img_params[0], dataset=self.dataset) + # Now set up second image. + img1 = tkp.db.Image(data=img_params[1], dataset=self.dataset) xtr = marginal_steady_src.simulate_extraction(img1, extraction_type='blind') - img1.insert_extracted_sources([xtr],'blind') - img1.associate_extracted_sources(deRuiter_r, self.new_source_sigma_margin) + insert_extracted_sources(img1._id, [xtr], 'blind') + associate_extracted_sources(img1._id, deRuiter_r, self.new_source_sigma_margin) newsources = get_newsources_for_dataset(self.dataset.id) - #Should have no flagged new sources - self.assertEqual(len(newsources),0) + # Should have no flagged new sources + self.assertEqual(len(newsources), 0) class TestIncreasingImageRMS(unittest.TestCase): @@ -668,18 +658,18 @@ lightcurve=defaultdict(lambda :self.always_detectable_flux) ) - #Insert first image, no sources. + # Insert first image, no sources. tkp.db.Image(data=img_params[0],dataset=self.dataset) - #Now set up second image. + # Now set up second image. img1 = tkp.db.Image(data=img_params[1],dataset=self.dataset) xtr = steady_low_freq_src.simulate_extraction(img1, extraction_type='blind') - img1.insert_extracted_sources([xtr],'blind') - img1.associate_extracted_sources(deRuiter_r, self.new_source_sigma_margin) + insert_extracted_sources(img1._id, [xtr], 'blind') + associate_extracted_sources(img1._id, deRuiter_r, self.new_source_sigma_margin) transients = get_newsources_for_dataset(self.dataset.id) - #Should have no marked transients - self.assertEqual(len(transients),0) + # Should have no marked transients + self.assertEqual(len(transients), 0) class TestPreviousLimitsImageId(unittest.TestCase): @@ -901,7 +891,7 @@ self.n_transients_after_image[img_idx]) #Sanity check that everything went into one band - bands = self.dataset.frequency_bands() + bands = frequency_bands(self.dataset._id) self.assertEqual(len(bands), 1) all_transients = get_sources_filtered_by_final_variability( diff -Nru tkp-3.1.1/tests/test_database/test_version.py tkp-4.0/tests/test_database/test_version.py --- tkp-3.1.1/tests/test_database/test_version.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tests/test_database/test_version.py 2017-02-03 10:06:41.000000000 +0000 @@ -11,4 +11,4 @@ def test_version(self): session = self.database.Session() v = session.query(Version).filter(Version.name == 'revision').one() - self.assertEqual(v.value, SCHEMA_VERSION) \ No newline at end of file + self.assertEqual(v.value, SCHEMA_VERSION) diff -Nru tkp-3.1.1/tests/test_distribute/test_runner.py tkp-4.0/tests/test_distribute/test_runner.py --- tkp-3.1.1/tests/test_distribute/test_runner.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tests/test_distribute/test_runner.py 2017-02-03 10:06:41.000000000 +0000 @@ -9,7 +9,7 @@ for method in 'serial', 'multiproc': runner = tkp.distribute.Runner(method) - runner.map("persistence_node_step", []) + runner.map("get_accessors", []) def test_set_cores(self): cores = 10 diff -Nru tkp-3.1.1/tests/test_inject.py tkp-4.0/tests/test_inject.py --- tkp-3.1.1/tests/test_inject.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tests/test_inject.py 2017-02-03 10:06:41.000000000 +0000 @@ -28,11 +28,6 @@ os.chdir(cls.start_dir) shutil.rmtree(cls.temp_dir) - def test_no_injection(self): - # Without injection the file cannot be opened because the frequency - # isn't specified. - self.assertRaises(TypeError, tkp.accessors.open, fits_file) - def test_injection(self): c = SafeConfigParser() c.read(default_header_inject_config) diff -Nru tkp-3.1.1/tests/test_quality/test_rms.py tkp-4.0/tests/test_quality/test_rms.py --- tkp-3.1.1/tests/test_quality/test_rms.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tests/test_quality/test_rms.py 2017-02-03 10:06:41.000000000 +0000 @@ -1,12 +1,11 @@ import os import numpy +import tkp.quality.rms from numpy.testing import assert_array_equal, assert_array_almost_equal import unittest from tkp.quality.rms import rms_invalid -from tkp import accessors import tkp.quality -from tkp.quality import statistics import tkp.telescope.lofar as lofar from tkp.testutil.decorators import requires_data from tkp.testutil.data import DATAPATH @@ -17,25 +16,25 @@ class TestRms(unittest.TestCase): def test_subrgion(self): - sub = statistics.subregion(numpy.ones((800, 800))) + sub = tkp.quality.rms.subregion(numpy.ones((800, 800))) self.assertEqual(sub.shape, (400, 400)) def test_rms(self): - self.assertEquals(statistics.rms(numpy.ones([4,4])*4), 0) + self.assertEquals(tkp.quality.rms.rms(numpy.ones([4, 4]) * 4), 0) def test_clip(self): a = numpy.ones([50, 50]) * 10 a[20, 20] = 20 - clipped = statistics.clip(a) + clipped = tkp.quality.rms.clip(a) check = numpy.array([10] * (50*50-1)) assert_array_equal(clipped, check) def test_rmsclippedsubregion(self): o = numpy.ones((800, 800)) - sub = statistics.subregion(o) - clip = statistics.clip(sub) - rms = statistics.rms(clip) - self.assertEqual(rms, statistics.rms_with_clipped_subregion(o)) + sub = tkp.quality.rms.subregion(o) + clip = tkp.quality.rms.clip(sub) + rms = tkp.quality.rms.rms(clip) + self.assertEqual(rms, tkp.quality.rms.rms_with_clipped_subregion(o)) def test_calculate_theoreticalnoise(self): # Sample data from a LOFAR image header. diff -Nru tkp-3.1.1/tests/test_sourcefinder/test_image.py tkp-4.0/tests/test_sourcefinder/test_image.py --- tkp-3.1.1/tests/test_sourcefinder/test_image.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tests/test_sourcefinder/test_image.py 2017-02-03 10:06:41.000000000 +0000 @@ -228,8 +228,8 @@ boxsize_proportion = 0.5 nanbox_radius *= boxsize_proportion - nandata[x0-nanbox_radius:x0+nanbox_radius+1, - y0-nanbox_radius:y0+nanbox_radius+1] = float('nan') + nandata[int(x0 - nanbox_radius):int(x0 + nanbox_radius+1), + int(y0 - nanbox_radius):int(y0 + nanbox_radius+1)] = float('nan') # Dump image data for manual inspection: # import astropy.io.fits as fits diff -Nru tkp-3.1.1/tests/test_steps/test_persistence.py tkp-4.0/tests/test_steps/test_persistence.py --- tkp-3.1.1/tests/test_steps/test_persistence.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tests/test_steps/test_persistence.py 2017-02-03 10:06:41.000000000 +0000 @@ -1,11 +1,11 @@ import os import unittest import tkp.steps.persistence -from tkp.testutil.decorators import requires_mongodb import tkp.testutil.data as testdata from tkp.testutil.decorators import requires_database, requires_data import tkp.db import tkp.db.generic +import tkp.accessors from ConfigParser import SafeConfigParser from tkp.config import parse_to_dict, initialize_pipeline_config @@ -13,6 +13,7 @@ datafile = os.path.join(testdata.DATAPATH, "sourcefinder/NCP_sample_image_1.fits") + class TestPersistence(unittest.TestCase): def tearDown(self): @@ -25,6 +26,7 @@ dataset = tkp.db.DataSet(data={'description': "Test persistence"}) cls.dataset_id = dataset.id cls.images = [datafile] + cls.accessors = [tkp.accessors.open(datafile)] cls.extraction_radius = 256 job_config = SafeConfigParser() job_config.read(default_job_config) @@ -41,15 +43,17 @@ tkp.steps.persistence.create_dataset(dataset_id, "test") def test_extract_metadatas(self): - tkp.steps.persistence.extract_metadatas(self.images, - rms_est_sigma=4, rms_est_fraction=8) + tkp.steps.persistence.extract_metadatas(self.accessors, + rms_est_sigma=4, + rms_est_fraction=8) def test_store_images(self): images_metadata = tkp.steps.persistence.extract_metadatas( - self.images, rms_est_sigma=4, rms_est_fraction=8) - img_ids = tkp.steps.persistence.store_images(images_metadata, - self.extraction_radius, - self.dataset_id) + self.accessors, rms_est_sigma=4, rms_est_fraction=8) + img_ids = tkp.steps.persistence.store_images_in_db(images_metadata, + self.extraction_radius, + self.dataset_id, + 0.0) # xtr_radius >=0 is a Postgres constraint, but we should also test # manually, in case running MonetDB: for id in img_ids: @@ -58,18 +62,4 @@ where=dict(id=image.skyrgn)) self.assertGreaterEqual(skyrgn[0]['xtr_radius'], 0) - def test_node_steps(self): - tkp.steps.persistence.node_steps(self.images, self.image_cache_pars, - rms_est_sigma=4, rms_est_fraction=8) - - -@requires_mongodb() -class TestMongoDb(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.images = [datafile] - @unittest.skip("disabled for now since no proper way to configure (yet)") - def test_image_to_mongodb(self): - self.assertTrue(tkp.steps.persistence.image_to_mongodb(self.images[0], - hostname, port, database)) diff -Nru tkp-3.1.1/tests/test_steps/test_quality.py tkp-4.0/tests/test_steps/test_quality.py --- tkp-3.1.1/tests/test_steps/test_quality.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tests/test_steps/test_quality.py 2017-02-03 10:06:41.000000000 +0000 @@ -18,7 +18,7 @@ self.job_config = parse_to_dict(config) def test_check(self): - tkp.steps.quality.reject_check(self.accessor.url, self.job_config) + tkp.steps.quality.reject_check(self.accessor, self.job_config) def test_zero_integration(self): self.accessor._tau_time = 0 diff -Nru tkp-3.1.1/tests/test_steps/test_source_extraction.py tkp-4.0/tests/test_steps/test_source_extraction.py --- tkp-3.1.1/tests/test_steps/test_source_extraction.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tests/test_steps/test_source_extraction.py 2017-02-03 10:06:41.000000000 +0000 @@ -32,7 +32,8 @@ @requires_data(fits_file) def test_extract_sources(self): image_path = fits_file - tkp.steps.source_extraction.extract_sources(image_path, self.parset) + accessor = tkp.accessors.open(image_path) + tkp.steps.source_extraction.extract_sources(accessor, self.parset) @requires_data(fits_file) def test_for_appropriate_arguments(self): @@ -45,7 +46,8 @@ mock_method = Mock(MockImage([])) orig_method = tkp.steps.source_extraction.sourcefinder_image_from_accessor tkp.steps.source_extraction.sourcefinder_image_from_accessor = mock_method - tkp.steps.source_extraction.extract_sources(image_path, self.parset) + accessor = tkp.accessors.open(image_path) + tkp.steps.source_extraction.extract_sources(accessor, self.parset) tkp.steps.source_extraction.sourcefinder_image_from_accessor = orig_method # Arguments to sourcefinder_image_from_accessor() diff -Nru tkp-3.1.1/tests/test_steps/test_varmetric.py tkp-4.0/tests/test_steps/test_varmetric.py --- tkp-3.1.1/tests/test_steps/test_varmetric.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tests/test_steps/test_varmetric.py 2017-02-03 10:06:41.000000000 +0000 @@ -7,7 +7,6 @@ gen_lightcurve import tkp.db -import tkp.db.alchemy from tkp.steps.varmetric import execute_store_varmetric @@ -25,8 +24,8 @@ def setUp(self): self.session = self.db.Session() - band = gen_band(central=150**6) self.dataset = gen_dataset('test varmetric step') + band = gen_band(dataset=self.dataset, central=150**6) skyregion = gen_skyregion(self.dataset) lightcurve = gen_lightcurve(band, self.dataset, skyregion) self.session.add_all(lightcurve) @@ -36,3 +35,11 @@ def test_execute_store_varmetric(self): session = self.db.Session() execute_store_varmetric(session=session, dataset_id=self.dataset.id) + self.session.flush() + + def test_execute_store_varmetric_twice(self): + session = self.db.Session() + execute_store_varmetric(session=session, dataset_id=self.dataset.id) + self.session.flush() + execute_store_varmetric(session=session, dataset_id=self.dataset.id) + self.session.flush() diff -Nru tkp-3.1.1/tests/test_stream.py tkp-4.0/tests/test_stream.py --- tkp-3.1.1/tests/test_stream.py 1970-01-01 00:00:00.000000000 +0000 +++ tkp-4.0/tests/test_stream.py 2017-02-03 10:06:41.000000000 +0000 @@ -0,0 +1,54 @@ +import unittest +from datetime import datetime +import dateutil +import tkp.stream +from tkp.testutil.stream_emu import create_fits_hdu, serialize_hdu, make_window + + +class TestStream(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.hdu = create_fits_hdu() + now = datetime.now() + cls.hdu.header['date-obs'] = now.isoformat() + + def test_reconstruct(self): + data, header = serialize_hdu(self.hdu) + hdulist = tkp.stream.reconstruct_fits(header, data) + dateutil.parser.parse(hdulist[0].header['date-obs']) + + def test_get_bytes(self): + class MockSocket(object): + def recv(self, count): + return count * "s" + + x = tkp.stream.getbytes(MockSocket(), 10) + self.assertEqual(x, 10 * "s") + + def test_get_bytes_closed(self): + class MockSocket(object): + def recv(self, _): + return "" + self.assertRaises(Exception, tkp.stream.getbytes, MockSocket(), 10) + + def test_read_window(self): + window = make_window(self.hdu) + data, header = serialize_hdu(self.hdu) + + class MockSocket(object): + counter = 0 + + def recv(self2, bytes): + data = window[self2.counter:self2.counter+bytes] + self2.counter += bytes + return data + + fits_bytes, image_bytes = tkp.stream.read_window(MockSocket()) + self.assertEqual(header, fits_bytes) + self.assertEqual(data, image_bytes) + + def test_reconstruct_fits(self): + data, header = serialize_hdu(self.hdu) + hdulist = tkp.stream.reconstruct_fits(header, data) + self.assertEqual(self.hdu.data.all(), hdulist[0].data.all()) + self.assertEqual(self.hdu.header, hdulist[0].header) diff -Nru tkp-3.1.1/tests/test_utility/test_sorting.py tkp-4.0/tests/test_utility/test_sorting.py --- tkp-3.1.1/tests/test_utility/test_sorting.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tests/test_utility/test_sorting.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,49 +0,0 @@ -import unittest -from collections import namedtuple -from datetime import datetime, timedelta -import random -from tkp.steps.misc import group_per_timestep - -MockOrmImage = namedtuple('MockOrmImage', ['taustart_ts', 'freq_eff', 'stokes']) - -now = datetime.now() - -def create_input(): - """ - returns a list of mock orm images with taustart_ts, freq_eff and stokes set. - """ - mockimages = [] - for hours in 1, 2, 3: - taustart_ts = now - timedelta(hours=hours) - for freq_eff in 100, 150, 200: - for stokes in 1, 2, 3, 4: - mockimages.append(MockOrmImage(taustart_ts=taustart_ts, - freq_eff=freq_eff ** 6, - stokes=stokes)) - - # when we seed the RNG with a constant the shuffle will be deterministic - random.seed(1) - random.shuffle(mockimages) - return mockimages - - -def create_output(): - mockimages = [] - for hours in 3, 2, 1: - taustart_ts = now - timedelta(hours=hours) - group = [] - for freq_eff in 100, 150, 200: - for stokes in 1, 2, 3, 4: - group.append(MockOrmImage(taustart_ts=taustart_ts, - freq_eff=freq_eff ** 6, - stokes=stokes)) - mockimages.append((taustart_ts, group)) - return mockimages - - -class TestSorting(unittest.TestCase): - def test_sorting(self): - input = create_input() - should_be = create_output() - evaluated = group_per_timestep(input) - self.assertEqual(should_be, evaluated) diff -Nru tkp-3.1.1/tkp/accessors/aartfaaccasaimage.py tkp-4.0/tkp/accessors/aartfaaccasaimage.py --- tkp-3.1.1/tkp/accessors/aartfaaccasaimage.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/accessors/aartfaaccasaimage.py 2017-02-03 10:06:41.000000000 +0000 @@ -1,5 +1,6 @@ import logging from tkp.accessors import CasaImage +from casacore.tables import table as casacore_table logger = logging.getLogger(__name__) @@ -8,15 +9,15 @@ def __init__(self, url, plane=0, beam=None): super(AartfaacCasaImage, self).__init__(url, plane=0, beam=None) - - self.taustart_ts = self.parse_taustartts() - self.telescope = self.table.getkeyword('coords')['telescope'] + table = casacore_table(self.url.encode(), ack=False) + self.taustart_ts = self.parse_taustartts(table) + self.telescope = table.getkeyword('coords')['telescope'] # TODO: header does't contain integration time # aartfaac imaginig pipeline issue #25 self.tau_time = 1 - def parse_frequency(self): + def parse_frequency(self, table): """ Extract frequency related information from headers @@ -24,7 +25,7 @@ from the 'spectral2' sub-table.) """ - keywords = self.table.getkeywords() + keywords = table.getkeywords() # due to some undocumented casacore feature, the 'spectral' keyword # changes from spectral1 to spectral2 when AARTFAAC imaging developers diff -Nru tkp-3.1.1/tkp/accessors/amicasaimage.py tkp-4.0/tkp/accessors/amicasaimage.py --- tkp-3.1.1/tkp/accessors/amicasaimage.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/accessors/amicasaimage.py 2017-02-03 10:06:41.000000000 +0000 @@ -3,6 +3,7 @@ """ import logging from tkp.accessors.casaimage import CasaImage +from casacore.tables import table as casacore_table logger = logging.getLogger(__name__) @@ -26,5 +27,6 @@ """ def __init__(self, url, plane=0, beam=None): super(AmiCasaImage, self).__init__(url, plane, beam) - self.taustart_ts = self.parse_taustartts() + table = casacore_table(self.url.encode(), ack=False) + self.taustart_ts = self.parse_taustartts(table) self.tau_time = 1 # Placeholder value until properly implemented diff -Nru tkp-3.1.1/tkp/accessors/casaimage.py tkp-4.0/tkp/accessors/casaimage.py --- tkp-3.1.1/tkp/accessors/casaimage.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/accessors/casaimage.py 2017-02-03 10:06:41.000000000 +0000 @@ -8,6 +8,7 @@ logger = logging.getLogger(__name__) + class CasaImage(DataAccessor): """ Provides common functionality for pulling data from the CASA image format @@ -22,24 +23,26 @@ def __init__(self, url, plane=0, beam=None): super(CasaImage, self).__init__() self.url = url - self.table = casacore_table(self.url.encode(), ack=False) - self.data = self.parse_data(plane) - self.wcs = self.parse_coordinates() - self.centre_ra, self.centre_decl = self.parse_phase_centre() - self.freq_eff, self.freq_bw = self.parse_frequency() + + # we don't want the table as a property since it makes the accessor + # not serializable + table = casacore_table(self.url.encode(), ack=False) + self.data = self.parse_data(table, plane) + self.wcs = self.parse_coordinates(table) + self.centre_ra, self.centre_decl = self.parse_phase_centre(table) + self.freq_eff, self.freq_bw = self.parse_frequency(table) self.pixelsize = self.parse_pixelsize() if beam: (bmaj, bmin, bpa) = beam else: - bmaj, bmin, bpa = self.parse_beam() + bmaj, bmin, bpa = self.parse_beam(table) self.beam = self.degrees2pixels( bmaj, bmin, bpa, self.pixelsize[0], self.pixelsize[1]) - - def parse_data(self, plane=0): + def parse_data(self, table, plane=0): """extract and massage data from CASA table""" - data = self.table[0]['map'].squeeze() + data = table[0]['map'].squeeze() planes = len(data.shape) if planes != 2: msg = "received datacube with %s planes, assuming Stokes I and taking plane 0" % planes @@ -49,10 +52,10 @@ data = data.transpose() return data - def parse_coordinates(self): + def parse_coordinates(self, table): """Returns a WCS object""" wcs = WCS() - my_coordinates = self.table.getkeyword('coords')['direction0'] + my_coordinates = table.getkeyword('coords')['direction0'] wcs.crval = my_coordinates['crval'] wcs.crpix = my_coordinates['crpix'] wcs.cdelt = my_coordinates['cdelt'] @@ -66,17 +69,16 @@ wcs.ctype = tuple(ctype) # Rotation, units? We better set a default wcs.crota = (0., 0.) - wcs.cunit = self.table.getkeyword('coords')['direction0']['units'] + wcs.cunit = table.getkeyword('coords')['direction0']['units'] return wcs - - def parse_frequency(self): + def parse_frequency(self, table): """extract frequency related information from headers""" - freq_eff = self.table.getkeywords()['coords']['spectral2']['restfreq'] - freq_bw = self.table.getkeywords()['coords']['spectral2']['wcs']['cdelt'] + freq_eff = table.getkeywords()['coords']['spectral2']['restfreq'] + freq_bw = table.getkeywords()['coords']['spectral2']['wcs']['cdelt'] return freq_eff, freq_bw - def parse_beam(self): + def parse_beam(self, table): """ Returns: - Beam parameters, (semimajor, semiminor, parallactic angle) in @@ -92,22 +94,20 @@ else: raise Exception("Beam units (%s) unknown" % quantity['unit']) - restoringbeam = self.table.getkeyword('imageinfo')['restoringbeam'] + restoringbeam = table.getkeyword('imageinfo')['restoringbeam'] bmaj = ensure_degrees(restoringbeam['major']) bmin = ensure_degrees(restoringbeam['minor']) bpa = ensure_degrees(restoringbeam['positionangle']) return bmaj, bmin, bpa - - def parse_phase_centre(self): - # The units for the pointing centre are not given in either the image cube - # itself or in the ICD. Assume radians. + def parse_phase_centre(self, table): + # The units for the pointing centre are not given in either the image + # cubeitself or in the ICD. Assume radians. # Note that we'll return the RA modulo 360 so it's always 0 <= RA < 360 - centre_ra, centre_decl = self.table.getkeyword('coords')['pointingcenter']['value'] + centre_ra, centre_decl = table.getkeyword('coords')['pointingcenter']['value'] return degrees(centre_ra) % 360, degrees(centre_decl) - - def parse_taustartts(self): + def parse_taustartts(self, table): """ Extract integration start-time from CASA table header. @@ -120,7 +120,7 @@ Returns: Time of image start as a instance of ``datetime.datetime`` """ - obsdate = self.table.getkeyword('coords')['obsdate']['m0']['value'] + obsdate = table.getkeyword('coords')['obsdate']['m0']['value'] return mjd2datetime(obsdate) diff -Nru tkp-3.1.1/tkp/accessors/dataaccessor.py tkp-4.0/tkp/accessors/dataaccessor.py --- tkp-3.1.1/tkp/accessors/dataaccessor.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/accessors/dataaccessor.py 2017-02-03 10:06:41.000000000 +0000 @@ -1,5 +1,5 @@ import logging -from tkp.quality.statistics import rms_with_clipped_subregion +from tkp.quality.rms import rms_with_clipped_subregion from tkp.accessors.requiredatts import RequiredAttributesMetaclass from math import degrees, sqrt, sin, pi, cos @@ -123,7 +123,7 @@ else: raise ValueError("Unrecognised WCS co-ordinate system") - #NB. What's a reasonable epsilon here? + # NB. What's a reasonable epsilon here? eps = 1e-7 if abs(abs(deltax) - abs(deltay)) > eps: raise ValueError("Image WCS header suggests non-square pixels." diff -Nru tkp-3.1.1/tkp/accessors/fitsimageblob.py tkp-4.0/tkp/accessors/fitsimageblob.py --- tkp-3.1.1/tkp/accessors/fitsimageblob.py 1970-01-01 00:00:00.000000000 +0000 +++ tkp-4.0/tkp/accessors/fitsimageblob.py 2017-02-03 10:06:41.000000000 +0000 @@ -0,0 +1,56 @@ +import numpy +import logging +from tkp.accessors.fitsimage import FitsImage + +logger = logging.getLogger(__name__) + + +class FitsImageBlob(FitsImage): + """ + A Fits image Blob. Same as ``tkp.accessors.fitsimage.FitsImage`` but + constructed from an in memory fits file, not a fits file on disk. + """ + def __init__(self, hdulist, plane=None, beam=None, hdu_index=0): + # set the URL in case we need it during header parsing for error loggign + self.url = "AARTFAAC streaming image" + super(FitsImage, self).__init__() + + self.header = self._get_header(hdulist, hdu_index) + self.wcs = self.parse_coordinates() + self.data = self.read_data(hdulist, hdu_index, plane) + self.taustart_ts, self.tau_time = self.parse_times() + self.freq_eff, self.freq_bw = self.parse_frequency() + self.pixelsize = self.parse_pixelsize() + + elements = "memory://AARTFAAC", self.taustart_ts, self.tau_time,\ + self.freq_eff, self.freq_bw + self.url = "_".join([str(x) for x in elements]) + + if beam: + (bmaj, bmin, bpa) = beam + else: + (bmaj, bmin, bpa) = self.parse_beam() + self.beam = self.degrees2pixels( + bmaj, bmin, bpa, self.pixelsize[0], self.pixelsize[1] + ) + self.centre_ra, self.centre_decl = self.calculate_phase_centre() + + # Bonus attribute + if 'TELESCOP' in self.header: + self.telescope = self.header['TELESCOP'] + + def _get_header(self, hdulist, hdu_index): + return hdulist[hdu_index].header + + def read_data(self, hdulist, hdu_index, plane): + hdu = hdulist[hdu_index] + data = numpy.float64(hdu.data.squeeze()) + if plane is not None and len(data.shape) > 2: + data = data[plane].squeeze() + n_dim = len(data.shape) + if n_dim != 2: + logger.warn( + "Loaded datacube with %s dimensions, assuming Stokes I and taking plane 0" % n_dim) + data = data[0, :, :] + data = data.transpose() + return data \ No newline at end of file diff -Nru tkp-3.1.1/tkp/accessors/fitsimage.py tkp-4.0/tkp/accessors/fitsimage.py --- tkp-3.1.1/tkp/accessors/fitsimage.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/accessors/fitsimage.py 2017-02-03 10:06:41.000000000 +0000 @@ -48,8 +48,6 @@ hdu = hdulist[hdu_index] return hdu.header.copy() - - def read_data(self, hdu_index, plane): """ Read and store data from our FITS file. @@ -109,7 +107,7 @@ def calculate_phase_centre(self): x, y = self.data.shape centre_ra, centre_decl = self.wcs.p2s((x / 2, y / 2)) - return centre_ra, centre_decl + return float(centre_ra), float(centre_decl) @@ -125,9 +123,15 @@ freq_bw = None try: header = self.header - if header['TELESCOP'] == 'LOFAR': + if header['TELESCOP'] in ('LOFAR', 'AARTFAAC'): freq_eff = header['RESTFRQ'] - freq_bw = header['RESTBW'] + if 'RESTBW' in header: + freq_bw = header['RESTBW'] + + else: + logger.warning("bandwidth header missing in image {}," + " setting to 1 MHz".format(self.url)) + freq_bw = 1e6 else: if header['ctype3'] in ('FREQ', 'VOPT'): freq_eff = header['crval3'] @@ -139,7 +143,7 @@ freq_eff = header['restfreq'] freq_bw = 0.0 except KeyError: - msg = "Frequency not specified in FITS" + msg = "Frequency not specified in headers for {}".format(self.url) logger.error(msg) raise TypeError(msg) @@ -198,7 +202,6 @@ return bmaj, bmin, bpa - def parse_times(self): """Returns: - taustart_ts: tz naive (implicit UTC) datetime at start of observation. @@ -216,7 +219,7 @@ try: end = dateutil.parser.parse(self.header['end_utc']) except KeyError: - msg = "End time not specified or unreadable" + msg = "End time not specified in {}, setting to start".format(self.url) logger.warning(msg) end = start diff -Nru tkp-3.1.1/tkp/accessors/__init__.py tkp-4.0/tkp/accessors/__init__.py --- tkp-3.1.1/tkp/accessors/__init__.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/accessors/__init__.py 2017-02-03 10:06:41.000000000 +0000 @@ -6,6 +6,7 @@ """ import os +from astropy.io.fits.hdu.hdulist import HDUList import astropy.io.fits as pyfits from tkp.sourcefinder.image import ImageData from tkp.accessors.dataaccessor import DataAccessor @@ -14,10 +15,10 @@ from tkp.accessors.aartfaaccasaimage import AartfaacCasaImage from tkp.accessors.lofarfitsimage import LofarFitsImage from tkp.accessors.lofarcasaimage import LofarCasaImage +from tkp.accessors.fitsimageblob import FitsImageBlob import tkp.accessors.detection - def sourcefinder_image_from_accessor(image, **args): """Create a source finder ImageData object from an image 'accessor' @@ -61,11 +62,16 @@ Will raise an exception if something went wrong or no matching accessor class is found. """ - if not os.access(path, os.F_OK): - raise IOError("%s does not exist!" % path) - if not os.access(path, os.R_OK): - raise IOError("Don't have permission to read %s!" % path) - Accessor = tkp.accessors.detection.detect(path) - if not Accessor: - raise IOError("no accessor found for %s" % path) - return Accessor(path, *args, **kwargs) + if type(path) == HDUList: + return FitsImageBlob(path, *args, **kwargs) + elif type(path) == str: + if not os.access(path, os.F_OK): + raise IOError("%s does not exist!" % path) + if not os.access(path, os.R_OK): + raise IOError("Don't have permission to read %s!" % path) + Accessor = tkp.accessors.detection.detect(path) + if not Accessor: + raise IOError("no accessor found for %s" % path) + return Accessor(path, *args, **kwargs) + else: + raise Exception("image should be path or HDUlist, got " + str(path)) diff -Nru tkp-3.1.1/tkp/accessors/kat7casaimage.py tkp-4.0/tkp/accessors/kat7casaimage.py --- tkp-3.1.1/tkp/accessors/kat7casaimage.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/accessors/kat7casaimage.py 2017-02-03 10:06:41.000000000 +0000 @@ -8,6 +8,7 @@ logger = logging.getLogger(__name__) + class Kat7CasaImage(CasaImage): """ Use casacore to pull image data out of a CASA table as produced by KAT-7. @@ -27,6 +28,7 @@ """ def __init__(self, url, plane=0, beam=None): super(Kat7CasaImage, self).__init__(url, plane, beam) - self.taustart_ts = self.parse_taustartts() - self.tau_time = 1 # Placeholder value + table = casacore_table(self.url.encode(), ack=False) + self.taustart_ts = self.parse_taustartts(table) + self.tau_time = 1 # Placeholder value diff -Nru tkp-3.1.1/tkp/accessors/lofaraccessor.py tkp-4.0/tkp/accessors/lofaraccessor.py --- tkp-3.1.1/tkp/accessors/lofaraccessor.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/accessors/lofaraccessor.py 2017-02-03 10:06:41.000000000 +0000 @@ -1,5 +1,6 @@ from tkp.accessors.dataaccessor import RequiredAttributesMetaclass + class LofarAccessor(object): __metaclass__ = RequiredAttributesMetaclass """ diff -Nru tkp-3.1.1/tkp/accessors/lofarcasaimage.py tkp-4.0/tkp/accessors/lofarcasaimage.py --- tkp-3.1.1/tkp/accessors/lofarcasaimage.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/accessors/lofarcasaimage.py 2017-02-03 10:06:41.000000000 +0000 @@ -5,7 +5,6 @@ http://www.lofar.org/operations/lib/exe/fetch.php?media=:public:documents:casa_image_for_lofar_0.03.00.pdf """ import logging -import warnings import numpy import datetime from casacore.tables import table as casacore_table @@ -43,19 +42,18 @@ """ def __init__(self, url, plane=0, beam=None): super(LofarCasaImage, self).__init__(url, plane, beam) - - self.subtables = self.open_subtables() - self.taustart_ts = self.parse_taustartts() - self.tau_time = self.parse_tautime() + table = casacore_table(self.url.encode(), ack=False) + subtables = self.open_subtables(table) + self.taustart_ts = self.parse_taustartts(subtables) + self.tau_time = self.parse_tautime(subtables) # Additional, LOFAR-specific metadata - self.antenna_set = self.parse_antennaset() - self.ncore, self.nremote, self.nintl = self.parse_stations() - self.subbandwidth = self.parse_subbandwidth() - self.subbands = self.parse_subbands() - + self.antenna_set = self.parse_antennaset(subtables) + self.ncore, self.nremote, self.nintl = self.parse_stations(subtables) + self.subbandwidth = self.parse_subbandwidth(subtables) + self.subbands = self.parse_subbands(subtables) - def open_subtables(self): + def open_subtables(self, table): """open all subtables defined in the LOFAR format args: table: a casacore table handler to a LOFAR CASA table @@ -64,17 +62,17 @@ """ subtables = {} for subtable in subtable_names: - subtable_location = self.table.getkeyword("ATTRGROUPS")[subtable] + subtable_location = table.getkeyword("ATTRGROUPS")[subtable] subtables[subtable] = casacore_table(subtable_location, ack=False) return subtables - - def parse_taustartts(self): + def parse_taustartts(self, subtables): """ extract image start time from CASA table header """ - # Note that we sort the table in order of ascending start time then choose - # the first value to ensure we get the earliest possible starting time. - observation_table = self.subtables['LOFAR_OBSERVATION'] + # Note that we sort the table in order of ascending start time then + # choose the first value to ensure we get the earliest possible + # starting time. + observation_table = subtables['LOFAR_OBSERVATION'] julianstart = observation_table.query( sortlist="OBSERVATION_START", limit=1).getcell( "OBSERVATION_START", 0 @@ -83,7 +81,6 @@ taustart_ts = datetime.datetime.fromtimestamp(unixstart) return taustart_ts - @staticmethod def non_overlapping_time(series): """ @@ -104,40 +101,35 @@ start = overlapend return total - overlap - - def parse_tautime(self): + def parse_tautime(self, subtables): """ Returns the total on-sky time for this image. """ - origin_table = self.subtables['LOFAR_ORIGIN'] + origin_table = subtables['LOFAR_ORIGIN'] startcol = origin_table.col('START') endcol = origin_table.col('END') series = [(int(start), int(end)) for start, end in zip(startcol, endcol)] tau_time = LofarCasaImage.non_overlapping_time(series) return tau_time - - - def parse_antennaset(self): - observation_table = self.subtables['LOFAR_OBSERVATION'] - antennasets = CasaImage.unique_column_values(observation_table, "ANTENNA_SET") + def parse_antennaset(self, subtables): + observation_table = subtables['LOFAR_OBSERVATION'] + antennasets = CasaImage.unique_column_values(observation_table, + "ANTENNA_SET") if len(antennasets) == 1: return antennasets[0] else: raise Exception("Cannot handle multiple antenna sets in image") - - - def parse_subbands(self): - origin_table = self.subtables['LOFAR_ORIGIN'] + def parse_subbands(self, subtables): + origin_table = subtables['LOFAR_ORIGIN'] num_chans = CasaImage.unique_column_values(origin_table, "NUM_CHAN") if len(num_chans) == 1: return num_chans[0] else: raise Exception("Cannot handle varying numbers of channels in image") - - def parse_subbandwidth(self): + def parse_subbandwidth(self, subtables): # subband # see http://www.lofar.org/operations/doku.php?id=operator:background_to_observations&s[]=subband&s[]=width&s[]=clock&s[]=frequency freq_units = { @@ -146,9 +138,10 @@ 'MHz': 10 ** 6, 'GHz': 10 ** 9, } - observation_table = self.subtables['LOFAR_OBSERVATION'] + observation_table = subtables['LOFAR_OBSERVATION'] clockcol = observation_table.col('CLOCK_FREQUENCY') - clock_values = CasaImage.unique_column_values(observation_table, "CLOCK_FREQUENCY") + clock_values = CasaImage.unique_column_values(observation_table, + "CLOCK_FREQUENCY") if len(clock_values) == 1: clock = clock_values[0] unit = clockcol.getkeyword('QuantumUnits')[0] @@ -158,16 +151,14 @@ else: raise Exception("Cannot handle varying clocks in image") - - - def parse_stations(self): + def parse_stations(self, subtables): """Extract number of specific LOFAR stations used returns: (number of core stations, remote stations, international stations) """ - observation_table = self.subtables['LOFAR_OBSERVATION'] - antenna_table = self.subtables['LOFAR_ANTENNA'] + observation_table = subtables['LOFAR_OBSERVATION'] + antenna_table = subtables['LOFAR_ANTENNA'] nvis_used = observation_table.getcol('NVIS_USED') names = numpy.array(antenna_table.getcol('NAME')) mask = numpy.sum(nvis_used, axis=2) > 0 diff -Nru tkp-3.1.1/tkp/config/job_template/job_params.cfg tkp-4.0/tkp/config/job_template/job_params.cfg --- tkp-3.1.1/tkp/config/job_template/job_params.cfg 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/config/job_template/job_params.cfg 2017-02-03 10:06:41.000000000 +0000 @@ -1,38 +1,44 @@ [persistence] description = "TRAP dataset" dataset_id = -1 -#Sigma value used for iterative clipping in RMS estimation: -rms_est_sigma = 4 -#Determines size of image subsection used for RMS estimation: -rms_est_fraction = 8 +rms_est_sigma = 4 ; Sigma value used for iterative clipping in RMS estimation +rms_est_fraction = 8 ; Determines size of image subsection used for RMS estimation +rms_est_history = 100 ; how many images used for calculating rms histogram +rms_est_max = 100 ; global maximum acceptable rms +rms_est_min = 0.0 ; global minimum acceptable rms +bandwidth_max = 0.0 ; if non zero override bandwidth of image, determines which images fall in same band [quality_lofar] -low_bound = 1 ; multiplied with noise to define lower threshold -high_bound = 80 ; multiplied with noise to define upper threshold -oversampled_x = 30 ; threshold for oversampled check -elliptical_x = 2.0 ; threshold for elliptical check -min_separation = 10 ; minimum distance to a bright source (in degrees) +low_bound = 1 ; multiplied with noise to define lower threshold +high_bound = 80 ; multiplied with noise to define upper threshold +oversampled_x = 30 ; threshold for oversampled check +elliptical_x = 2.0 ; threshold for elliptical check +min_separation = 10 ; minimum distance to a bright source (in degrees) [source_extraction] -# extraction threshold (S/N) -detection_threshold = 8 +detection_threshold = 8 ; extraction threshold (S/N) analysis_threshold = 3 back_size_x = 50 back_size_y = 50 margin = 10 -deblend_nthresh = 0 ; Number of subthresholds for deblending; 0 disables +deblend_nthresh = 0 ; Number of subthresholds for deblending; 0 disables extraction_radius_pix = 250 force_beam = False box_in_beampix = 10 -# ew/ns_sys_err: Systematic errors on ra & decl (units in arcsec) -# See Dario Carbone's presentation at TKP Meeting 2012/12/04 -ew_sys_err = 10 +ew_sys_err = 10 ; Systematic errors on ra & decl (units in arcsec) ns_sys_err = 10 -expiration = 10 ; number of forced fits performed after a blind fit +expiration = 10 ; number of forced fits performed after a blind fit [association] deruiter_radius = 5.68 beamwidths_limit = 1.0 [transient_search] -new_source_sigma_margin = 3 \ No newline at end of file +new_source_sigma_margin = 3 + +[pipeline] +mode = 'batch' ; batch or stream +; below are the hosts and ports defined. Needs to be a string, if multiple +; hosts split by ,. Lengths need to match. +hosts = ',,,,,' ; if stream, the stream server +ports = '6666,6667,6668,6669,6670,6671' ; the port of the stream diff -Nru tkp-3.1.1/tkp/config/project_template/pipeline.cfg tkp-4.0/tkp/config/project_template/pipeline.cfg --- tkp-3.1.1/tkp/config/project_template/pipeline.cfg 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/config/project_template/pipeline.cfg 2017-02-03 10:06:41.000000000 +0000 @@ -9,22 +9,18 @@ colorlog = True [database] -engine = ;(monetdb or postgresql) -database = "" ; e.g. '{% user_name %}' -user = "" ; e.g. '{% user_name %}', or 'postgres' -password = "" ; e.g. '{% user_name %}' -host = "localhost" -port = -passphrase = -dump_backup_copy = False +engine = 'postgresql' ; or monetdb +database = '' ; for example: '{% user_name %}' +user = '' ; for example: '{% user_name %}' +password = '' ; for example: '{% user_name %}' +host = '' ; for example: 'localhost' +port = 5432 +passphrase = '' ; for MonetDB +dump_backup_copy = False ; make database backup for every run? [image_cache] copy_images = True -mongo_host = "localhost" -mongo_port = 27017 -mongo_db = "tkp" - [parallelise] -method = "multiproc" ; or serial -cores = 0 ; the number of cores to use. Set to 0 for autodetect \ No newline at end of file +method = "multiproc" ; or serial +cores = 0 ; number of cores to use. 0 for auto detect \ No newline at end of file diff -Nru tkp-3.1.1/tkp/db/alchemy/image.py tkp-4.0/tkp/db/alchemy/image.py --- tkp-3.1.1/tkp/db/alchemy/image.py 1970-01-01 00:00:00.000000000 +0000 +++ tkp-4.0/tkp/db/alchemy/image.py 2017-02-03 10:06:41.000000000 +0000 @@ -0,0 +1,198 @@ +import math +from datetime import datetime +from tkp.db.model import Frequencyband, Skyregion, Image, Dataset +from tkp.utility.coordinates import eq_to_cart +from sqlalchemy import func, cast +from sqlalchemy.dialects.postgresql import DOUBLE_PRECISION as Double + + + +def get_band(session, dataset, freq_eff, freq_bw, freq_bw_max=.0): + """ + Returns the frequency band for the given frequency parameters. Will create a new frequency band entry in the + database if no match is found. You can limit the bandwidth of the band association with the freq_bw_max. + + args: + session (sqlalchemy.orm.session.Session): a SQLAlchemy session object + dataset (tkp.db.model.Dataset): the TraP dataset + freq_eff (float): The central frequency of image to get band for + freq_bw (float): The bandwidth of image to get band for + freq_bw_max (float): The maximum bandwith used for band association. Not used if 0.0 (default). + + returns: + tkp.db.model.Frequencyband: a frequency band object + """ + + if freq_bw_max == .0: + bw_half = freq_bw / 2 + low = freq_eff - bw_half + high = freq_eff + bw_half + else: + bw_half = freq_bw_max / 2 + low = freq_eff - bw_half + high = freq_eff + bw_half + + w1 = high - low + w2 = Frequencyband.freq_high - Frequencyband.freq_low + max_ = func.greatest(high, Frequencyband.freq_high) + min_ = func.least(low, Frequencyband.freq_low) + + band = session.query(Frequencyband).filter( + (Frequencyband.dataset == dataset) & (max_ - min_ < w1 + w2) + ).first() + + if not band: + # no match so we create a new band + band = Frequencyband(freq_central=freq_eff, freq_low=low, freq_high=high, dataset=dataset) + session.add(band) + + return band + + +def update_skyregion_members(session, skyregion): + """ + This function performs a simple distance-check against current members of the + runningcatalog to find sources that should be visible in the given skyregion, + and updates the assocskyrgn table accordingly. + + Any previous entries in assocskyrgn relating to this skyregion are + deleted first. + + Note 1. We use the variable 'inter' to cache the extraction_radius as transformed + onto the unit sphere, so this does not have to be recalculated for every + comparison. + + Note 2. (To Do:) This distance check could be made more efficient by + restricting to a range of RA values, as we do with the Dec. + However, this optimization is complicated by the meridian wrap-around issue. + """ + inter = 2. * math.sin(math.radians(skyregion.xtr_radius) / 2.) + inter_sq = inter * inter + + q = """ + INSERT INTO assocskyrgn + ( + runcat + ,skyrgn + ,distance_deg + ) + SELECT rc.id as runcat + ,sky.id as skyrgn + ,DEGREES(2 * ASIN(SQRT( (rc.x - sky.x) * (rc.x - sky.x) + + (rc.y - sky.y) * (rc.y - sky.y) + + (rc.z - sky.z) * (rc.z - sky.z) + ) / 2 ) + ) + FROM skyregion sky + ,runningcatalog rc + WHERE sky.id = %(skyregion_id)s + AND rc.dataset = sky.dataset + AND rc.wm_decl BETWEEN sky.centre_decl - sky.xtr_radius + AND sky.centre_decl + sky.xtr_radius + AND ( (rc.x - sky.x) * (rc.x - sky.x) + + (rc.y - sky.y) * (rc.y - sky.y) + + (rc.z - sky.z) * (rc.z - sky.z) + ) < %(inter_sq)s + ; + """ % {'inter_sq': inter_sq, 'skyregion_id': skyregion.id} + session.execute(q) + return inter + + +def get_skyregion(session, dataset, centre_ra, centre_decl, xtr_radius): + """ + gets an id for a skyregion, given a pair of central co-ordinates and a radius. + + If no matching skyregion is found, a new one is inserted. In this case we also trigger execution of + `updateSkyRgnMembers` for the new skyregion - this performs a simple assocation with current members of the + runningcatalog to find sources that should be visible in the new skyregion, and updates the assocskyrgn table + accordingly. + + args: + session (sqlalchemy.orm.session.Session): a SQLAlchemy session object + dataset_id (int): the dataset ID + centre_ra (float): center RA coordinate + centre_decl (float): center DECL coordinate + xtr_radius (float): The extraction radius + + returns: + tkp.db.models.Skyregion: a SQLalchemy skyregion + """ + skyregion = session.query(Skyregion).filter(Skyregion.dataset == dataset, + Skyregion.centre_ra == centre_ra, + Skyregion.centre_decl == centre_decl, + Skyregion.xtr_radius == xtr_radius).one_or_none() + if not skyregion: + x, y, z = eq_to_cart(centre_ra, centre_decl) + skyregion = Skyregion(dataset=dataset, centre_ra=centre_ra, centre_decl=centre_decl, + xtr_radius=xtr_radius, x=x, y=y, z=z) + session.add(skyregion) + session.flush() + update_skyregion_members(session, skyregion) + return skyregion + + +def insert_image(session, dataset, freq_eff, freq_bw, taustart_ts, tau_time, beam_smaj_pix, beam_smin_pix, + beam_pa_rad, deltax, deltay, url, centre_ra, centre_decl, xtr_radius, rms_qc, freq_bw_max=0.0, + rms_min=None, rms_max=None, detection_thresh=None, analysis_thresh=None): + """ + Insert an image for a given dataset. + + Args: + session (sqlalchemy.orm.session.Session): A SQLalchemy sessions + dataset (int): ID of parent dataset. + freq_eff: See :ref:`Image table definitions `. + freq_bw: See :ref:`Image table definitions `. + freq_bw_max (float): Optional override for freq_bw, not used if 0. + taustart_ts: See :ref:`Image table definitions `. + taus_time: See :ref:`Image table definitions `. + beam_smaj_pix (float): Restoring beam semimajor axis length in pixels. + (Converted to degrees before storing to database). + beam_smin_pix (float): Restoring beam semiminor axis length in pixels. + (Converted to degrees before storing to database). + beam_pa_rad (float): Restoring beam parallactic angle in radians. + (Converted to degrees before storing to database). + deltax(float): Degrees per pixel increment in x-direction. + deltay(float): Degrees per pixel increment in y-direction. + centre_ra(float): Image central RA co-ord, in degrees. + centre_decl(float): Image central Declination co-ord, in degrees. + xtr_radius(float): Radius in degrees from field centre that will be used + for source extraction. + + """ + # this looks a bit weird, but this simplifies backwards compatibility + dataset_id = dataset + dataset = session.query(Dataset).filter(Dataset.id == dataset_id).one() + + skyrgn = get_skyregion(session, dataset, centre_ra, centre_decl, xtr_radius) + band = get_band(session, dataset, freq_eff, freq_bw, freq_bw_max) + rb_smaj = beam_smaj_pix * math.fabs(deltax) + rb_smin = beam_smin_pix * math.fabs(deltay) + rb_pa = 180 * beam_pa_rad / math.pi + + args = ['dataset', 'band', 'tau_time', 'freq_eff', 'freq_bw', 'taustart_ts', 'skyrgn', 'rb_smaj', 'rb_smin', + 'rb_pa', 'deltax', 'deltay', 'url', 'rms_qc', 'rms_min', 'rms_max', 'detection_thresh', 'analysis_thresh'] + + l = locals() + kwargs = {arg: l[arg] for arg in args} + image = Image(**kwargs) + session.add(image) + return image + + +def insert_dataset(session, description): + rerun = session.query(func.max(Dataset.rerun)). \ + select_from(Dataset). \ + filter(Dataset.description == "description"). \ + one()[0] + + if not rerun: + rerun = 0 + else: + rerun += 1 + + dataset = Dataset(rerun=rerun, + process_start_ts=datetime.now(), + description=description) + session.add(dataset) + return dataset diff -Nru tkp-3.1.1/tkp/db/alchemy/__init__.py tkp-4.0/tkp/db/alchemy/__init__.py --- tkp-3.1.1/tkp/db/alchemy/__init__.py 1970-01-01 00:00:00.000000000 +0000 +++ tkp-4.0/tkp/db/alchemy/__init__.py 2017-02-03 10:06:41.000000000 +0000 @@ -0,0 +1,6 @@ +""" +This is a placeholder for code that uses the SQLAlchemy ORM. In contains +helper functions that should make it easier to query the database +""" + + diff -Nru tkp-3.1.1/tkp/db/alchemy/varmetric.py tkp-4.0/tkp/db/alchemy/varmetric.py --- tkp-3.1.1/tkp/db/alchemy/varmetric.py 1970-01-01 00:00:00.000000000 +0000 +++ tkp-4.0/tkp/db/alchemy/varmetric.py 2017-02-03 10:06:41.000000000 +0000 @@ -0,0 +1,271 @@ +""" +An example how to use this is shown in an IPython notebook: + +https://github.com/transientskp/notebooks/blob/master/transients.ipynb +""" +from sqlalchemy import func, insert, delete +from sqlalchemy.orm import aliased +from tkp.db.model import Assocxtrsource, Extractedsource, Runningcatalog,\ + Image, Newsource, Varmetric + + +def _last_assoc_timestamps(session, dataset): + """ + Get the timestamps of the latest assocxtrc per runningcatalog and band. + + We can't get the assoc ID's directly, because they are unique and can't + by put in the group by. You can get the eventual assoc ID's by joining + this query again with the assoc table (see last_assoc_per_band func) + + args: + session (session): A SQLAlchemy session + dataset (Dataset): A SQLALchemy dataset model + + returns: a SQLAlchemy subquery containing runcat id, timestamp, band id + """ + a = aliased(Assocxtrsource, name='a_timestamps') + e = aliased(Extractedsource, name='e_timestamps') + r = aliased(Runningcatalog, name='r_timestamps') + i = aliased(Image, name='i_timestamps') + return session.query(r.id.label('runcat'), + func.max(i.taustart_ts).label('max_time'), + i.band_id.label('band') + ). \ + select_from(r). \ + join(a, r.id == a.runcat_id). \ + join(e, a.xtrsrc_id == e.id). \ + join(i, i.id == e.image_id). \ + group_by(r.id, i.band_id). \ + filter(i.dataset == dataset). \ + subquery(name='last_assoc_timestamps') + + +def _last_assoc_per_band(session, dataset): + """ + Get the ID's of the latest assocxtrc per runningcatalog and band. + + Very similar to last_assoc_timestamps, but returns the ID's + + args: + session: SQLalchemy session objects + dataset: tkp.db.model.dataset object + + returns: SQLAlchemy subquery + """ + l = _last_assoc_timestamps(session, dataset) + a = aliased(Assocxtrsource, name='a_laids') + e = aliased(Extractedsource, name='e_laids') + i = aliased(Image, name='i_laids') + + return session.query(a.id.label('assoc_id'), l.c.max_time, + l.c.band, l.c.runcat). \ + select_from(l). \ + join(a, a.runcat_id == l.c.runcat). \ + join(e, a.xtrsrc_id == e.id). \ + join(i, (i.id == e.image_id) & (i.taustart_ts == l.c.max_time)). \ + subquery(name='last_assoc_per_band') + + +def _last_ts_fmax(session, dataset): + """ + Select peak flux per runcat at last timestep (over all bands) + + args: + session: SQLalchemy session objects + dataset: tkp.db.model.dataset object + + returns: SQLAlchemy subquery + """ + a = aliased(Assocxtrsource, name='a_lt') + e = aliased(Extractedsource, name='e_lt') + + subquery = _last_assoc_per_band(session, dataset) + return session.query(a.runcat_id.label('runcat_id'), + func.max(e.f_int).label('max_flux') + ). \ + select_from(subquery). \ + join(a, a.id == subquery.c.assoc_id). \ + join(e, a.xtrsrc_id == e.id). \ + group_by(a.runcat_id). \ + subquery(name='last_ts_fmax') + + +def _newsrc_trigger(session, dataset): + """ + Grab newsource /trigger details where possible + + args: + session: SQLalchemy session objects + + returns: SQLAlchemy subquery + """ + newsource = aliased(Newsource, name='n_ntr') + e = aliased(Extractedsource, name='e_ntr') + i = aliased(Image, name='i_ntr') + return session.query( + newsource.id, + newsource.runcat_id.label('rc_id'), + (e.f_int / i.rms_min).label('sigma_rms_min'), + (e.f_int / i.rms_max).label('sigma_rms_max') + ). \ + select_from(newsource). \ + join(e, e.id == newsource.trigger_xtrsrc_id). \ + join(i, i.id == newsource.previous_limits_image_id). \ + filter(i.dataset == dataset). \ + subquery(name='newsrc_trigger') + + +def _combined(session, dataset): + """ + + args: + session (Session): SQLAlchemy session + runcat (Runningcatalog): Running catalog model object + dataset (Dataset): Dataset model object + + return: a SQLALchemy subquery + """ + runcat = aliased(Runningcatalog, name='r') + match_assoc = aliased(Assocxtrsource, name='match_assoc') + match_ex = aliased(Extractedsource, name='match_ex') + match_img = aliased(Image, name='match_img') + agg_img = aliased(Image, name='agg_img') + agg_assoc = aliased(Assocxtrsource, name='agg_assoc') + agg_ex = aliased(Extractedsource, name='agg_ex') + + newsrc_trigger_query = _newsrc_trigger(session, dataset) + last_ts_fmax_query = _last_ts_fmax(session, dataset) + + return session.query( + runcat.id.label('runcat'), + runcat.wm_ra.label('ra'), + runcat.wm_decl.label('decl'), + runcat.wm_uncertainty_ew, + runcat.wm_uncertainty_ns, + runcat.xtrsrc_id, + runcat.dataset_id.label('dataset_id'), + runcat.datapoints, + match_assoc.v_int, + match_assoc.eta_int, + match_img.band_id, + newsrc_trigger_query.c.id.label('newsource'), + newsrc_trigger_query.c.sigma_rms_max.label('sigma_rms_max'), + newsrc_trigger_query.c.sigma_rms_min.label('sigma_rms_min'), + func.max(agg_ex.f_int).label('lightcurve_max'), + func.avg(agg_ex.f_int).label('lightcurve_avg'), + func.median(agg_ex.f_int).label('lightcurve_median') + ). \ + select_from(last_ts_fmax_query). \ + join(match_assoc, match_assoc.runcat_id == last_ts_fmax_query.c.runcat_id). \ + join(match_ex, + (match_assoc.xtrsrc_id == match_ex.id) & + (match_ex.f_int == last_ts_fmax_query.c.max_flux)). \ + join(runcat, runcat.id == last_ts_fmax_query.c.runcat_id). \ + join(match_img, match_ex.image_id == match_img.id). \ + outerjoin(newsrc_trigger_query, newsrc_trigger_query.c.rc_id == runcat.id). \ + join(agg_assoc, runcat.id == agg_assoc.runcat_id). \ + join(agg_ex, agg_assoc.xtrsrc_id == agg_ex.id). \ + join(agg_img, + (agg_ex.image_id == agg_img.id) & (agg_img.band_id == match_img.band_id)). \ + group_by(runcat.id, + runcat.wm_ra, + runcat.wm_decl, + runcat.wm_uncertainty_ew, + runcat.wm_uncertainty_ns, + runcat.xtrsrc_id, + runcat.dataset_id, + runcat.datapoints, + match_assoc.v_int, + match_assoc.eta_int, + match_img.band_id, + newsrc_trigger_query.c.id, + newsrc_trigger_query.c.sigma_rms_max, + newsrc_trigger_query.c.sigma_rms_min, + ). \ + filter(runcat.dataset == dataset). \ + subquery() + + +def del_duplicate_varmetric(session, dataset): + """ + can't figure out how to update in a simple way, for now just delete + the updated rows. This code should be rewritten anyway when we make it + optional to specify a list of runcat entries to update + """ + del_varmetrics = session.query(Varmetric.id).\ + filter(Varmetric.runcat_id == Runningcatalog.id, + Runningcatalog.dataset == dataset).subquery() + return delete(Varmetric).where(Varmetric.id.in_(del_varmetrics)) + + +def store_varmetric(session, dataset): + """ + Stores the augmented runningcatalog values in the varmetric table. + args: + session: A SQLAlchemy session + dataset: a dataset model object + + :return: a SQLAlchemy query + """ + fields = ['runcat', 'v_int', 'eta_int', 'band', 'newsource', + 'sigma_rms_max', 'sigma_rms_min', 'lightcurve_max', + 'lightcurve_avg', 'lightcurve_median'] + + subquery = _combined(session=session, dataset=dataset) + + # only select the columns we are going to insert + filtered = session.query(*fields).select_from(subquery) + + return insert(Varmetric).from_select(names=fields, select=filtered) + + +def calculate_varmetric(session, dataset, ra_range=None, decl_range=None, + v_int_min=None, eta_int_min=None, sigma_rms_min_range=None, + sigma_rms_max_range=None, new_src_only=False): + """ + Calculate sigma_min, sigma_max, v_int, eta_int and the max and avg + values for lightcurves, for all runningcatalogs + + It starts by getting the extracted source from latest image for a runcat. + This is arbitrary, since you have multiple bands. We pick the band with the + max integrated flux. Now we have v_int and eta_int. + The flux is then divided by the RMS_max and RMS_min of the previous image + (stored in newsource.previous_limits_image) to obtain sigma_max and + sigma_min. + + args: + dataset (Dataset): SQLAlchemy dataset object + ra_range (tuple): 2 element tuple of ra range + decl_range (tuple): 2 element tuple + v_int_min (float): 2 element tuple + eta_int_min (float): 2 element tuple + sigma_rms_min_range (tuple): 2 element tuple + sigma_rms_max_range (tuple): 2 element tuple + new_src_only (bool): New sources only + + returns: a SQLAlchemy query + """ + + subquery = _combined(session, dataset=dataset) + query = session.query(subquery) + + if ra_range and decl_range: + query = query.filter(subquery.c.ra.between(*ra_range) & + subquery.c.decl.between(*decl_range)) + + if v_int_min != None: + query = query.filter(subquery.c.v_int >= v_int_min) + + if eta_int_min != None: + query = query.filter(subquery.c.eta_int >= eta_int_min) + + if sigma_rms_min_range: + query = query.filter(subquery.c.sigma_rms_min.between(*sigma_rms_min_range)) + + if sigma_rms_max_range: + query = query.filter(subquery.c.sigma_rms_max.between(*sigma_rms_max_range)) + + if new_src_only: + query = query.filter(subquery.c.newsource != None) + + return query \ No newline at end of file diff -Nru tkp-3.1.1/tkp/db/alchemy.py tkp-4.0/tkp/db/alchemy.py --- tkp-3.1.1/tkp/db/alchemy.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/db/alchemy.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,265 +0,0 @@ -""" -This is a placeholder for code that uses the SQLAlchemy ORM. In contains -helper functions that should make it easier to query the database - -An example how to use this is shown in an IPython notebook: - -https://github.com/transientskp/notebooks/blob/master/transients.ipynb - -""" - -from sqlalchemy.orm import aliased -from sqlalchemy.sql import func, insert - -from tkp.db.model import (Assocxtrsource, Extractedsource, Image, Newsource, - Runningcatalog, Varmetric) - - -def _last_assoc_timestamps(session, dataset): - """ - Get the timestamps of the latest assocxtrc per runningcatalog and band. - - We can't get the assoc ID's directly, because they are unique and can't - by put in the group by. You can get the eventual assoc ID's by joining - this query again with the assoc table (see last_assoc_per_band func) - - args: - session (session): A SQLAlchemy session - dataset (Dataset): A SQLALchemy dataset model - - returns: a SQLAlchemy subquery containing runcat id, timestamp, band id - """ - a = aliased(Assocxtrsource, name='a_timestamps') - e = aliased(Extractedsource, name='e_timestamps') - r = aliased(Runningcatalog, name='r_timestamps') - i = aliased(Image, name='i_timestamps') - return session.query(r.id.label('runcat'), - func.max(i.taustart_ts).label('max_time'), - i.band_id.label('band') - ). \ - select_from(r). \ - join(a, r.id == a.runcat_id). \ - join(e, a.xtrsrc_id == e.id). \ - join(i, i.id == e.image_id). \ - group_by(r.id, i.band_id). \ - filter(i.dataset == dataset). \ - subquery(name='last_assoc_timestamps') - - -def _last_assoc_per_band(session, dataset): - """ - Get the ID's of the latest assocxtrc per runningcatalog and band. - - Very similar to last_assoc_timestamps, but returns the ID's - - args: - session: SQLalchemy session objects - dataset: tkp.db.model.dataset object - - returns: SQLAlchemy subquery - """ - l = _last_assoc_timestamps(session, dataset) - a = aliased(Assocxtrsource, name='a_laids') - e = aliased(Extractedsource, name='e_laids') - i = aliased(Image, name='i_laids') - - return session.query(a.id.label('assoc_id'), l.c.max_time, - l.c.band, l.c.runcat). \ - select_from(l). \ - join(a, a.runcat_id == l.c.runcat). \ - join(e, a.xtrsrc_id == e.id). \ - join(i, (i.id == e.image_id) & (i.taustart_ts == l.c.max_time)). \ - subquery(name='last_assoc_per_band') - - -def _last_ts_fmax(session, dataset): - """ - Select peak flux per runcat at last timestep (over all bands) - - args: - session: SQLalchemy session objects - dataset: tkp.db.model.dataset object - - returns: SQLAlchemy subquery - """ - a = aliased(Assocxtrsource, name='a_lt') - e = aliased(Extractedsource, name='e_lt') - - subquery = _last_assoc_per_band(session, dataset) - return session.query(a.runcat_id.label('runcat_id'), - func.max(e.f_int).label('max_flux') - ). \ - select_from(subquery). \ - join(a, a.id == subquery.c.assoc_id). \ - join(e, a.xtrsrc_id == e.id). \ - group_by(a.runcat_id). \ - subquery(name='last_ts_fmax') - - -def _newsrc_trigger(session, dataset): - """ - Grab newsource /trigger details where possible - - args: - session: SQLalchemy session objects - - returns: SQLAlchemy subquery - """ - newsource = aliased(Newsource, name='n_ntr') - e = aliased(Extractedsource, name='e_ntr') - i = aliased(Image, name='i_ntr') - return session.query( - newsource.id, - newsource.runcat_id.label('rc_id'), - (e.f_int / i.rms_min).label('sigma_rms_min'), - (e.f_int / i.rms_max).label('sigma_rms_max') - ). \ - select_from(newsource). \ - join(e, e.id == newsource.trigger_xtrsrc_id). \ - join(i, i.id == newsource.previous_limits_image_id). \ - filter(i.dataset == dataset). \ - subquery(name='newsrc_trigger') - - -def _combined(session, dataset): - """ - - args: - session (Session): SQLAlchemy session - runcat (Runningcatalog): Running catalog model object - dataset (Dataset): Dataset model object - - return: a SQLALchemy subquery - """ - runcat = aliased(Runningcatalog, name='r') - match_assoc = aliased(Assocxtrsource, name='match_assoc') - match_ex = aliased(Extractedsource, name='match_ex') - match_img = aliased(Image, name='match_img') - agg_img = aliased(Image, name='agg_img') - agg_assoc = aliased(Assocxtrsource, name='agg_assoc') - agg_ex = aliased(Extractedsource, name='agg_ex') - - newsrc_trigger_query = _newsrc_trigger(session, dataset) - last_ts_fmax_query = _last_ts_fmax(session, dataset) - - return session.query( - runcat.id.label('runcat'), - runcat.wm_ra.label('ra'), - runcat.wm_decl.label('decl'), - runcat.wm_uncertainty_ew, - runcat.wm_uncertainty_ns, - runcat.xtrsrc_id, - runcat.dataset_id.label('dataset_id'), - runcat.datapoints, - match_assoc.v_int, - match_assoc.eta_int, - match_img.band_id, - newsrc_trigger_query.c.id.label('newsource'), - newsrc_trigger_query.c.sigma_rms_max.label('sigma_rms_max'), - newsrc_trigger_query.c.sigma_rms_min.label('sigma_rms_min'), - func.max(agg_ex.f_int).label('lightcurve_max'), - func.avg(agg_ex.f_int).label('lightcurve_avg'), - func.median(agg_ex.f_int).label('lightcurve_median') - ). \ - select_from(last_ts_fmax_query). \ - join(match_assoc, match_assoc.runcat_id == last_ts_fmax_query.c.runcat_id). \ - join(match_ex, - (match_assoc.xtrsrc_id == match_ex.id) & - (match_ex.f_int == last_ts_fmax_query.c.max_flux)). \ - join(runcat, runcat.id == last_ts_fmax_query.c.runcat_id). \ - join(match_img, match_ex.image_id == match_img.id). \ - outerjoin(newsrc_trigger_query, newsrc_trigger_query.c.rc_id == runcat.id). \ - join(agg_assoc, runcat.id == agg_assoc.runcat_id). \ - join(agg_ex, agg_assoc.xtrsrc_id == agg_ex.id). \ - join(agg_img, - (agg_ex.image_id == agg_img.id) & (agg_img.band_id == match_img.band_id)). \ - group_by(runcat.id, - runcat.wm_ra, - runcat.wm_decl, - runcat.wm_uncertainty_ew, - runcat.wm_uncertainty_ns, - runcat.xtrsrc_id, - runcat.dataset_id, - runcat.datapoints, - match_assoc.v_int, - match_assoc.eta_int, - match_img.band_id, - newsrc_trigger_query.c.id, - newsrc_trigger_query.c.sigma_rms_max, - newsrc_trigger_query.c.sigma_rms_min, - ). \ - filter(runcat.dataset == dataset). \ - subquery() - - -def store_varmetric(session, dataset): - """ - Stores the augmented runningcatalog values in the varmetric table. - args: - session: A SQLAlchemy session - dataset: a dataset model object - - :return: a SQLAlchemy query - """ - fields = ['runcat', 'v_int', 'eta_int', 'band', 'newsource', - 'sigma_rms_max', 'sigma_rms_min', 'lightcurve_max', - 'lightcurve_avg', 'lightcurve_median'] - - subquery = _combined(session=session, dataset=dataset) - - # only select the columns we are going to insert - filtered = session.query(*fields).select_from(subquery) - - return insert(Varmetric).from_select(names=fields, select=filtered) - - -def calculate_varmetric(session, dataset, ra_range=None, decl_range=None, - v_int_min=None, eta_int_min=None, sigma_rms_min_range=None, - sigma_rms_max_range=None, new_src_only=False): - """ - Calculate sigma_min, sigma_max, v_int, eta_int and the max and avg - values for lightcurves, for all runningcatalogs - - It starts by getting the extracted source from latest image for a runcat. - This is arbitrary, since you have multiple bands. We pick the band with the - max integrated flux. Now we have v_int and eta_int. - The flux is then devided by the RMS_max and RMS_min of the previous image - (stored in newsource.previous_limits_image) to obtain sigma_max and - sigma_min. - - args: - dataset (Dataset): SQLAlchemy dataset object - ra_range (tuple): 2 element tuple of ra range - decl_range (tuple): 2 element tuple - v_int_min (float): 2 element tuple - eta_int_min (float): 2 element tuple - sigma_rms_min_range (tuple): 2 element tuple - sigma_rms_max_range (tuple): 2 element tuple - new_src_only (bool): New sources only - - returns: a SQLAlchemy query - """ - - subquery = _combined(session, dataset=dataset) - query = session.query(subquery) - - if ra_range and decl_range: - query = query.filter(subquery.c.ra.between(*ra_range) & - subquery.c.decl.between(*decl_range)) - - if v_int_min != None: - query = query.filter(subquery.c.v_int >= v_int_min) - - if eta_int_min != None: - query = query.filter(subquery.c.eta_int >= eta_int_min) - - if sigma_rms_min_range: - query = query.filter(subquery.c.sigma_rms_min.between(*sigma_rms_min_range)) - - if sigma_rms_max_range: - query = query.filter(subquery.c.sigma_rms_max.between(*sigma_rms_max_range)) - - if new_src_only: - query = query.filter(subquery.c.newsource != None) - - return query diff -Nru tkp-3.1.1/tkp/db/database.py tkp-4.0/tkp/db/database.py --- tkp-3.1.1/tkp/db/database.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/db/database.py 2017-02-03 10:06:41.000000000 +0000 @@ -1,19 +1,17 @@ import logging import numpy -import tkp.config -from tkp.utility import substitute_inf + from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker +from sqlalchemy.pool import NullPool +import tkp.config +from tkp.utility import substitute_inf +from tkp.db.model import SCHEMA_VERSION logger = logging.getLogger(__name__) -# The version of the TKP DB schema which is assumed by the current tree. -# Increment whenever the schema changes. -DB_VERSION = 35 - - def sanitize_db_inputs(params): """ @@ -25,10 +23,10 @@ * Convert infs into the string "Infinity". Args: - params (dict/list/tuple): (Potentially) dirty database inputs + params: (Potentially) dirty database inputs Returns: - cleaned (dict/list/tuple): Sanitized database inputs + Sanitized database inputs """ def sanitize(val): val = substitute_inf(val) @@ -54,6 +52,7 @@ _configured = False transaction = None cursor = None + session = None # this makes this class a singleton _instance = None @@ -91,11 +90,13 @@ self.host, self.port, self.database), - echo=False + echo=False, + poolclass=NullPool, ) self.Session = sessionmaker(bind=self.alchemy_engine) + self.session = self.Session() - def connect(self, check=False): + def connect(self, check=True): """ connect to the configured database @@ -113,9 +114,9 @@ q = "SELECT value FROM version WHERE name='revision'" cursor = self.connection.execute(q) schema_version = cursor.fetchone()[0] - if schema_version != DB_VERSION: + if schema_version != SCHEMA_VERSION: error = ("Database version incompatibility (needed %d, got %d)" % - (DB_VERSION, schema_version)) + (SCHEMA_VERSION, schema_version)) logger.error(error) self._connection.close() self._connection = None @@ -147,8 +148,12 @@ """ close the connection if open """ + if self.session: + self.session.close() + if self._connection: self._connection.close() + self._connection = None def vacuum(self, table): diff -Nru tkp-3.1.1/tkp/db/general.py tkp-4.0/tkp/db/general.py --- tkp-3.1.1/tkp/db/general.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/db/general.py 2017-02-03 10:06:41.000000000 +0000 @@ -5,15 +5,17 @@ that don't fit into a more specific collection. """ -import math -import logging import itertools +import logging +import math import tkp.db -from tkp.utility.coordinates import eq_to_cart -from tkp.utility.coordinates import alpha_inflate +from datetime import datetime +from tkp.db.alchemy.image import insert_dataset as alchemy_insert_dataset +from tkp.db.generic import columns_from_table from tkp.utility import substitute_inf - +from tkp.utility.coordinates import alpha_inflate +from tkp.utility.coordinates import eq_to_cart logger = logging.getLogger(__name__) @@ -43,7 +45,7 @@ update_dataset_process_end_ts_query = """ UPDATE dataset - SET process_end_ts = NOW() + SET process_end_ts = %(end)s WHERE id = %(dataset_id)s """ @@ -51,20 +53,20 @@ """Update dataset start-of-processing timestamp. """ - args = {'dataset_id': dataset_id} + args = {'dataset_id': dataset_id, 'end': datetime.now()} tkp.db.execute(update_dataset_process_end_ts_query, args, commit=True) return dataset_id def insert_dataset(description): - """Insert dataset with description as given by argument. - - DB function insertDataset() sets the necessary default values. """ - query = "SELECT insertDataset(%s)" - arguments = (description,) - cursor = tkp.db.execute(query, arguments, commit=True) - dataset_id = cursor.fetchone()[0] + Insert dataset with description as given by argument. + """ + db = tkp.db.Database() + dataset = alchemy_insert_dataset(db.session, description) + db.session.add(dataset) + db.session.commit() + dataset_id = dataset.id return dataset_id @@ -101,86 +103,7 @@ (insert_num, dataset_id)) -def insert_image(dataset, freq_eff, freq_bw, - taustart_ts, tau_time, - beam_smaj_pix, beam_smin_pix, beam_pa_rad, - deltax, deltay, - url, - centre_ra, centre_decl, xtr_radius, - rms_qc, rms_min, rms_max, - detection_thresh, analysis_thresh - ): - """ - Insert an image for a given dataset. - - Args: - dataset (int): ID of parent dataset. - freq_eff: See :ref:`Image table definitions `. - freq_bw: See :ref:`Image table definitions `. - taustart_ts: See :ref:`Image table definitions `. - taus_time: See :ref:`Image table definitions `. - beam_smaj_pix (float): Restoring beam semimajor axis length in pixels. - (Converted to degrees before storing to database). - beam_smin_pix (float): Restoring beam semiminor axis length in pixels. - (Converted to degrees before storing to database). - beam_pa_rad (float): Restoring beam parallactic angle in radians. - (Converted to degrees before storing to database). - deltax(float): Degrees per pixel increment in x-direction. - deltay(float): Degrees per pixel increment in y-direction. - centre_ra(float): Image central RA co-ord, in degrees. - centre_decl(float): Image central Declination co-ord, in degrees. - xtr_radius(float): Radius in degrees from field centre that will be used - for source extraction. - - """ - query = """\ - SELECT insertImage(%(dataset)s - ,%(tau_time)s - ,%(freq_eff)s - ,%(freq_bw)s - ,%(taustart_ts)s - ,%(rb_smaj)s - ,%(rb_smin)s - ,%(rb_pa)s - ,%(deltax)s - ,%(deltay)s - ,%(url)s - ,%(centre_ra)s - ,%(centre_decl)s - ,%(xtr_radius)s - ,%(rms_qc)s - ,%(rms_min)s - ,%(rms_max)s - ,%(detection_thresh)s - ,%(analysis_thresh)s - ) - """ - arguments = {'dataset': dataset, - 'tau_time': tau_time, - 'freq_eff': freq_eff, - 'freq_bw': freq_bw, - 'taustart_ts': taustart_ts, - 'rb_smaj': beam_smaj_pix * math.fabs(deltax), - 'rb_smin': beam_smin_pix * math.fabs(deltay), - 'rb_pa': 180 * beam_pa_rad / math.pi, - 'deltax': deltax, - 'deltay': deltay, - 'url': url, - 'centre_ra': centre_ra, - 'centre_decl': centre_decl, - 'xtr_radius': xtr_radius, - 'rms_qc': rms_qc, - 'rms_min': rms_min, - 'rms_max': rms_max, - 'detection_thresh': detection_thresh, - 'analysis_thresh': analysis_thresh, - } - cursor = tkp.db.execute(query, arguments, commit=True) - image_id = cursor.fetchone()[0] - return image_id - - -def insert_extracted_sources(image_id, results, extract_type, +def insert_extracted_sources(image_id, results, extract_type='blind', ff_runcat_ids=None, ff_monitor_ids=None): """ Insert all detections from sourcefinder into the extractedsource table. @@ -382,3 +305,32 @@ args = {'xtrsrc': xtrsrcid} cursor = tkp.db.execute(lightcurve_query, args) return cursor.fetchall() + + +def frequency_bands(dataset_id): + """Return a list of distinct bands present in the dataset.""" + query = """\ + SELECT DISTINCT(band) + FROM image + WHERE dataset = %s + """ + cursor = tkp.db.execute(query, (dataset_id,)) + bands = zip(*cursor.fetchall())[0] + return bands + + +def runcat_entries(dataset_id): + """ + Returns: + list: a list of dictionarys representing rows in runningcatalog, + for all sources belonging to this dataset + + Column 'id' is returned with the key 'runcat' + + Currently only returns 3 columns: + [{'runcat,'xtrsrc','datapoints'}] + """ + return columns_from_table('runningcatalog', + keywords=['id', 'xtrsrc', 'datapoints'], + alias={'id': 'runcat'}, + where={'dataset': dataset_id}) diff -Nru tkp-3.1.1/tkp/db/image_store.py tkp-4.0/tkp/db/image_store.py --- tkp-3.1.1/tkp/db/image_store.py 1970-01-01 00:00:00.000000000 +0000 +++ tkp-4.0/tkp/db/image_store.py 2017-02-03 10:06:41.000000000 +0000 @@ -0,0 +1,36 @@ +""" +Code for storing FITS images in the database +""" +from tkp.db import Database +from tkp.db.model import ImageData +import logging +from sqlalchemy.sql.expression import bindparam +from sqlalchemy import insert + + +logger = logging.getLogger(__name__) + + +def store_fits(db_images, fits_datas, fits_headers): + """ + bulk store fits data in database + + args: + db_images (list): list of ``tkp.db.model.Image``s + fits_datas (list): list of serialised numpy arrays + fits_headers (list): list of serialised fits headers (string) + """ + values = [{'image': i.id, 'fits_data': d, 'fits_header': h} for i, d, h in + zip(db_images, fits_datas, fits_headers)] + stmt = insert(ImageData).values( + { + 'image': bindparam('image'), + 'fits_data': bindparam('fits_data'), + 'fits_header': bindparam('fits_header'), + } + ) + + db = Database() + db.session.execute(stmt, values) + db.session.commit() + diff -Nru tkp-3.1.1/tkp/db/__init__.py tkp-4.0/tkp/db/__init__.py --- tkp-3.1.1/tkp/db/__init__.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/db/__init__.py 2017-02-03 10:06:41.000000000 +0000 @@ -1,5 +1,4 @@ import logging -import numpy from tkp.db.database import Database, sanitize_db_inputs from tkp.db.orm import DataSet, Image, ExtractedSource diff -Nru tkp-3.1.1/tkp/db/model.py tkp-4.0/tkp/db/model.py --- tkp-3.1.1/tkp/db/model.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/db/model.py 2017-02-03 10:06:41.000000000 +0000 @@ -3,20 +3,22 @@ revision history: + 40 - Move image data to seperate table for speed + 39 - Remove SQL insert functions, add dataset row to frequencyband table. Add image data. 38 - add varmetric table 37 - add forcedfits_count column to runningcatalog 36 - switch to SQLAlchemy schema initialisation """ from sqlalchemy import Boolean, Column, DateTime, ForeignKey, Index,\ - Integer, SmallInteger, String, text, Sequence + Integer, SmallInteger, String, text, Sequence, LargeBinary from sqlalchemy.orm import relationship, backref from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.dialects.postgresql import DOUBLE_PRECISION as Double -SCHEMA_VERSION = 38 +SCHEMA_VERSION = 40 Base = declarative_base() metadata = Base.metadata @@ -27,7 +29,9 @@ id = Column(Integer, primary_key=True) runcat_id = Column('runcat', ForeignKey('runningcatalog.id'), nullable=False, index=True) - runcat = relationship('Runningcatalog') + runcat = relationship('Runningcatalog', + backref=backref('assocskyrgns', + cascade="all,delete")) skyrgn_id = Column('skyrgn', ForeignKey('skyregion.id'), nullable=False, index=True) skyrgn = relationship('Skyregion') @@ -69,7 +73,8 @@ id = Column(Integer, primary_key=True) dataset_id = Column('dataset', ForeignKey('dataset.id'), nullable=False) - dataset = relationship('Dataset') + dataset = relationship('Dataset', + backref=backref('configs', cascade="all,delete")) section = Column(String(100)) key = Column(String(100)) @@ -79,6 +84,7 @@ seq_dataset = Sequence('seq_dataset') + class Dataset(Base): __tablename__ = 'dataset' @@ -110,7 +116,7 @@ id = Column(Integer, primary_key=True) image_id = Column('image', ForeignKey('image.id'), nullable=False, index=True) - image = relationship('Image') + image = relationship('Image', backref=backref('extractedsources', cascade="all,delete")) ff_runcat_id = Column('ff_runcat', ForeignKey('runningcatalog.id')) ff_runcat = relationship('Runningcatalog', primaryjoin='Extractedsource.ff_runcat_id == Runningcatalog.id') @@ -160,6 +166,9 @@ id = Column(Integer, seq_frequencyband, primary_key=True, server_default=seq_frequencyband.next_value()) + dataset_id = Column('dataset', Integer, ForeignKey('dataset.id'), + nullable=False, index=True) + dataset = relationship('Dataset', backref=backref('frequencybands', cascade="all,delete")) freq_central = Column(Double) freq_low = Column(Double) freq_high = Column(Double) @@ -175,13 +184,13 @@ server_default=seq_image.next_value()) dataset_id = Column('dataset', Integer, ForeignKey('dataset.id'), nullable=False, index=True) - dataset = relationship('Dataset', backref=backref('images')) + dataset = relationship('Dataset', backref=backref('images', cascade="delete")) band_id = Column('band', ForeignKey('frequencyband.id'), nullable=False, index=True) - band = relationship('Frequencyband') + band = relationship('Frequencyband', cascade="delete") skyrgn_id = Column('skyrgn', Integer, ForeignKey('skyregion.id'), nullable=False, index=True) - skyrgn = relationship('Skyregion', backref=backref('images')) + skyrgn = relationship('Skyregion', backref=backref('images', cascade="delete")) tau = Column(Integer) stokes = Column(SmallInteger, nullable=False, server_default=text("1")) @@ -205,6 +214,18 @@ node = Column(SmallInteger, nullable=False, server_default=text("1")) nodes = Column(SmallInteger, nullable=False, server_default=text("1")) + data = relationship("ImageData", uselist=False, back_populates="image") + + +class ImageData(Base): + __tablename__ = 'imagedata' + + id = Column(Integer, primary_key=True) + image_id = Column('image', Integer, ForeignKey('image.id'), nullable=False, index=True) + image = relationship("Image", back_populates="data") + fits_header = Column(String) + fits_data = Column(LargeBinary) + class Monitor(Base): __tablename__ = 'monitor' @@ -227,7 +248,8 @@ id = Column(Integer, primary_key=True) runcat_id = Column('runcat', ForeignKey('runningcatalog.id'), nullable=False, index=True) - runcat = relationship('Runningcatalog') + runcat = relationship('Runningcatalog', backref=backref("newsources", + cascade="all,delete")) trigger_xtrsrc_id = Column('trigger_xtrsrc', ForeignKey('extractedsource.id'), nullable=False, index=True) trigger_xtrsrc = relationship('Extractedsource') @@ -286,7 +308,9 @@ id = Column(Integer, primary_key=True) xtrsrc_id = Column('xtrsrc', ForeignKey('extractedsource.id'), nullable=False, unique=True) - xtrsrc = relationship('Extractedsource', primaryjoin='Runningcatalog.xtrsrc_id == Extractedsource.id') + xtrsrc = relationship('Extractedsource', + primaryjoin='Runningcatalog.xtrsrc_id == Extractedsource.id', + backref=backref('extractedsources', cascade="all,delete")) dataset_id = Column('dataset', ForeignKey('dataset.id'), nullable=False, index=True) dataset = relationship('Dataset') @@ -314,7 +338,8 @@ secondary='assocxtrsource', backref='runningcatalogs') - varmetric = relationship("Varmetric", uselist=False, backref="runcat") + varmetric = relationship("Varmetric", uselist=False, backref="runcat", + cascade="all,delete") class Varmetric(Base): @@ -323,14 +348,14 @@ id = Column(Integer, primary_key=True) runcat_id = Column('runcat', ForeignKey('runningcatalog.id'), - nullable=False, index=True) + nullable=False, index=True, unique=True) v_int = Column(Double, index=True) eta_int = Column(Double) band_id = Column('band', ForeignKey('frequencyband.id'), nullable=False, index=True) - band = relationship('Frequencyband') + band = relationship('Frequencyband', cascade="delete") newsource = Column(Integer) sigma_rms_max = Column(Double, index=True) @@ -350,10 +375,12 @@ id = Column(Integer, primary_key=True) runcat_id = Column('runcat', ForeignKey('runningcatalog.id'), nullable=False) - runcat = relationship('Runningcatalog') + runcat = relationship('Runningcatalog', + backref=backref('runningcatalogfluxs', + cascade="all,delete")) band_id = Column('band', ForeignKey('frequencyband.id'), nullable=False, index=True) - band = relationship('Frequencyband') + band = relationship('Frequencyband', cascade="delete") stokes = Column(SmallInteger, nullable=False, server_default=text("1")) f_datapoints = Column(Integer, nullable=False) @@ -378,7 +405,8 @@ server_default=seq_skyregion.next_value()) dataset_id = Column('dataset', ForeignKey('dataset.id'), nullable=False, index=True) - dataset = relationship('Dataset') + dataset = relationship('Dataset', + backref=backref('skyregions',cascade="all,delete")) centre_ra = Column(Double, nullable=False) centre_decl = Column(Double, nullable=False) @@ -403,7 +431,7 @@ dataset = relationship('Dataset') band_id = Column('band', ForeignKey('frequencyband.id'), nullable=False, index=True) - band = relationship('Frequencyband') + band = relationship('Frequencyband', cascade="delete") distance_arcsec = Column(Double, nullable=False) r = Column(Double, nullable=False) diff -Nru tkp-3.1.1/tkp/db/orm.py tkp-4.0/tkp/db/orm.py --- tkp-3.1.1/tkp/db/orm.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/db/orm.py 2017-02-03 10:06:41.000000000 +0000 @@ -114,9 +114,8 @@ import logging from tkp.db.generic import columns_from_table, set_columns_for_table -from tkp.db.general import (insert_dataset, insert_image, - insert_extracted_sources, lightcurve) -from tkp.db.associations import associate_extracted_sources +from tkp.db.general import insert_dataset +from tkp.db.alchemy.image import insert_image import tkp.db import tkp.db.quality from tkp.db.database import Database @@ -310,8 +309,6 @@ @property def id(self): """Add or obtain an id to/from the table - - This uses the SQL function insertDataset(). """ if self._id is None: try: @@ -331,34 +328,6 @@ image_ids = [row[0] for row in result] self.images = [Image(database=self.database, id=id) for id in image_ids] - def runcat_entries(self): - """ - Returns: - list: a list of dictionarys representing rows in runningcatalog, - for all sources belonging to this dataset - - Column 'id' is returned with the key 'runcat' - - Currently only returns 3 columns: - [{'runcat,'xtrsrc','datapoints'}] - """ - return columns_from_table('runningcatalog', - keywords=['id', 'xtrsrc', 'datapoints'], - alias={'id':'runcat'}, - where={'dataset':self.id}) - - - def frequency_bands(self): - """Return a list of distinct bands present in the dataset.""" - query = """\ - SELECT DISTINCT(band) - FROM image - WHERE dataset = %s - """ - self.database.cursor.execute(query, (self.id,)) - bands = zip(*self.database.cursor.fetchall())[0] - return bands - class Image(DBObject): """Class corresponding to the images table in the database""" @@ -370,8 +339,6 @@ 'deltax', 'deltay', 'url', 'centre_ra', 'centre_decl', 'xtr_radius', 'rms_qc') - - def __init__(self, data=None, dataset=None, database=None, id=None): """If id is supplied, the data and image arguments are ignored.""" super(Image, self).__init__(data=data, database=database, id=id) @@ -390,51 +357,28 @@ if not self.dataset: self.dataset = DataSet(id=self._data['dataset'], database=self.database) - - - # Inserting images is handled a little different than normal inserts - # -- We call an SQL function 'insertImage' which takes care of - # assigning a new image id. @property def id(self): """Add or obtain an id to/from the table - This uses the SQL function insertImage() + If the ID does not exist the image is inserted into the database """ if self._id is None: + args = self._data.copy() + # somehow _data contains a garbage kwargs + args.pop('kwargs', None) + args['session'] = self.database.session try: - #if 'bsmaj' not in self._data: - # self._data['bsmaj'] = None - # self._data['bsmin'] = None - # self._data['bpa'] = None - # self._data['deltax'] = None - # self._data['deltay'] = None - # Insert a default image - self._id = insert_image(self.dataset.id, - self._data['freq_eff'], self._data['freq_bw'], - self._data['taustart_ts'], self._data['tau_time'], - self._data['beam_smaj_pix'], self._data['beam_smin_pix'], - self._data['beam_pa_rad'], - self._data['deltax'], - self._data['deltay'], - self._data['url'], - self._data['centre_ra'], #Degrees J2000 - self._data['centre_decl'], #Degrees J2000 - self._data['xtr_radius'], #Degrees - self._data['rms_qc'], - self._data.get('rms_min',None), - self._data.get('rms_max',None), - self._data.get('detection_thresh',None), - self._data.get('analysis_thresh',None), - ) + image = insert_image(**args) + self.database.session.commit() + self._id = image.id except Exception as e: logger.error("ORM: error inserting image, %s: %s" % (type(e).__name__, str(e))) raise return self._id - def update_sources(self): """Renew the set of sources by getting the sources for this image from the database @@ -458,47 +402,6 @@ self.sources = sources - def insert_extracted_sources(self, results, extract='blind'): - """Insert a list of sources - - Args: - - results (list): list of - utility.containers.ExtractionResult objects (as - returned from - sourcefinder.image.ImageData().extract()), or a list - of data tuples with the source information as follows: - (ra, dec, - ra_fit_err, dec_fit_err, - peak, peak_err, - flux, flux_err, - significance level, - beam major width (as), beam minor width(as), - beam parallactic angle - ew_sys_err, ns_sys_err, - error_radius). - extract (str):'blind', 'ff_nd' or 'ff_ms' - (see db.general.insert_extracted_sources) - """ - #To do: Figure out a saner method of passing the results around - # (Namedtuple, for starters?) - - insert_extracted_sources(self._id, results=results, extract_type=extract) - - def associate_extracted_sources(self, deRuiter_r, new_source_sigma_margin): - """Associate sources from the last images with previously - extracted sources within the same dataset - - Args: - - deRuiter_r (float): The De Ruiter radius for source - association. The default value is set through the - tkp.config module - """ - associate_extracted_sources(self._id, deRuiter_r, - new_source_sigma_margin=new_source_sigma_margin) - - class ExtractedSource(DBObject): """Class corresponding to the extractedsource table in the database""" @@ -527,18 +430,3 @@ raise ValueError( "can't create ExtractedSource object without a Database() object") self._init_data() - - def lightcurve(self): - """Obtain the complete light curve (within the current dataset) - for this source. - - Returns: - list: list of 5-tuples, each tuple being: - - observation start time as a datetime.datetime object - - integration time (float) - - integrated flux (float) - - integrated flux error (float) - - database ID of this particular source - """ - - return lightcurve(self._id) diff -Nru tkp-3.1.1/tkp/db/quality.py tkp-4.0/tkp/db/quality.py --- tkp-3.1.1/tkp/db/quality.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/db/quality.py 2017-02-03 10:06:41.000000000 +0000 @@ -21,7 +21,7 @@ Check if rejectreasons are in sync. If not, insert as needed and commit. Args: - session (sqlalchemy.orm.Session): Database session. + session (sqlalchemy.orm.session.Session): Database session. """ if session.query(Rejectreason).count() != len(reject_reasons): dbreason_id_rows = session.query(Rejectreason.id).all() @@ -43,7 +43,7 @@ imageid (int): The image ID of the image to reject reason (tkp.db.model.Rejectreason): Why is the image rejected comment (str): An optional comment with details about the reason - session (sqlalchemy.orm.Session): Database session. + session (sqlalchemy.orm.session.Session): Database session. """ r = Rejection(image_id=imageid, rejectreason_id=reason.id, @@ -58,7 +58,7 @@ Args: imageid: The image ID - session (sqlalchemy.orm.Session): Database session. + session (sqlalchemy.orm.session.Session): Database session. """ session.query(Rejection).filter( Rejection.image_id == imageid).delete() @@ -69,7 +69,7 @@ Find out if an image is rejected or not Args: imageid: The image ID - session (sqlalchemy.orm.Session): Database session. + session (sqlalchemy.orm.session.Session): Database session. Returns: list: Empty if not rejected, a list of strings formatted as '{description}: {comment}' if rejected. diff -Nru tkp-3.1.1/tkp/db/sql/populate.py tkp-4.0/tkp/db/sql/populate.py --- tkp-3.1.1/tkp/db/sql/populate.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/db/sql/populate.py 2017-02-03 10:06:41.000000000 +0000 @@ -5,7 +5,7 @@ import re import sys import tkp -from tkp.db.database import DB_VERSION +from tkp.db.model import SCHEMA_VERSION import tkp.db.database import tkp.db.model import tkp.db.quality @@ -18,13 +18,6 @@ from tkp.db.sql.preprocessor import dialectise -# use these to replace strings in the SQL files -tokens = ( - ('%NODE%', '1'), - ('%NODES%', '1'), - ('%VERSION%', str(DB_VERSION)) -) - def get_input(text): """ @@ -174,23 +167,21 @@ get_database_config(dbconfig, apply=True) database = tkp.db.database.Database() - database.connect() + database.connect(check=False) if dbconfig['destroy']: destroy(dbconfig) - session = database.Session() - if dbconfig['engine'] == 'postgresql': # make sure plpgsql is enabled try: - session.execute("CREATE LANGUAGE plpgsql;") + database.session.execute("CREATE LANGUAGE plpgsql;") except ProgrammingError: - session.rollback() + database.session.rollback() if dbconfig['engine'] == 'monetdb': - set_monetdb_schema(session, dbconfig) + set_monetdb_schema(database.session, dbconfig) # reconnect to switch to schema - session.commit() + database.session.commit() database.reconnect() batch_file = os.path.join(sql_repo, 'batch') @@ -200,12 +191,11 @@ tkp.db.model.Base.metadata.create_all(database.alchemy_engine) - version = tkp.db.model.Version(name='revision', value=tkp.db.model.SCHEMA_VERSION) - session.add(version) + database.session.add(version) - tkp.db.quality.sync_rejectreasons(session) + tkp.db.quality.sync_rejectreasons(database.session) for line in [l.strip() for l in open(batch_file) if not l.startswith("#")]: if not line: # skip empty lines @@ -214,14 +204,16 @@ sql_file = os.path.join(sql_repo, line) with open(sql_file) as sql_handler: sql = sql_handler.read() - dialected = dialectise(sql, dbconfig['engine'], tokens).strip() + dialected = dialectise(sql, dbconfig['engine']).strip() if not dialected: # empty query, can happen continue try: - session.execute(dialected) + database.session.execute(dialected) except Exception as e: sys.stderr.write(error % sql_file) raise - session.commit() + + database.session.commit() + database.close() diff -Nru tkp-3.1.1/tkp/db/sql/preprocessor.py tkp-4.0/tkp/db/sql/preprocessor.py --- tkp-3.1.1/tkp/db/sql/preprocessor.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/db/sql/preprocessor.py 2017-02-03 10:06:41.000000000 +0000 @@ -16,6 +16,8 @@ # {%% endifdb %%}: match the end tag # \s*: match zero or more spaces # \n?: match zero or one newline + + ifdb_regexp = r'{%% ifdb %(dialect)s %%}\n?(.*?){%% endifdb %%}' diff -Nru tkp-3.1.1/tkp/db/sql/statements/batch tkp-4.0/tkp/db/sql/statements/batch --- tkp-3.1.1/tkp/db/sql/statements/batch 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/db/sql/statements/batch 2017-02-03 10:06:41.000000000 +0000 @@ -1,24 +1,4 @@ -# -# ordered list of sql files that should be imported -# -#system functions -# functions/degrad.sql functions/alpha.sql functions/cartesian.sql -functions/getBand.sql -functions/updateSkyRgnMembers.sql -functions/getSkyRgn.sql -functions/insertImage.sql -functions/insertDataset.sql functions/median.sql -procedures/BuildFrequencyBands.sql -procedures/BuildNodes.sql -# -# initialisation -# -init/tables.sql -# -# views -# -views/augmented_runningcatalog.sql diff -Nru tkp-3.1.1/tkp/db/sql/statements/functions/getBand.sql tkp-4.0/tkp/db/sql/statements/functions/getBand.sql --- tkp-3.1.1/tkp/db/sql/statements/functions/getBand.sql 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/db/sql/statements/functions/getBand.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1,104 +0,0 @@ ---DROP FUNCTION getBand; - -CREATE FUNCTION getBand(ifreq_eff DOUBLE PRECISION, ibandwidth DOUBLE PRECISION) -RETURNS SMALLINT - -{% ifdb monetdb %} - -BEGIN - - DECLARE nfreqbandid INT; - DECLARE ifreqbandid SMALLINT; - DECLARE ofreqbandid SMALLINT; - - -- We allow a small tolerance (of 1.0) to allow for rounding errors. - SELECT COUNT(*) - INTO nfreqbandid - FROM frequencyband - WHERE ABS(freq_central - ifreq_eff) <= 1.0 - AND ABS(freq_high - freq_low - ibandwidth) <= 1.0 - ; - - -- Due to the small tolerance above, in a corner case we might have two very - -- similar bands which match our criteria. We arbitrarily choose one of - -- them. - IF nfreqbandid >= 1 THEN - SELECT MAX(id) - INTO ifreqbandid - FROM frequencyband - WHERE ABS(freq_central - ifreq_eff) <= 1.0 - AND ABS(freq_high - freq_low - ibandwidth) <= 1.0 - ; - ELSE - SELECT NEXT VALUE FOR seq_frequencyband INTO ifreqbandid; - INSERT INTO frequencyband - (id - ,freq_central - ,freq_low - ,freq_high - ) - VALUES - (ifreqbandid - ,ifreq_eff - ,ifreq_eff - (ibandwidth / 2) - ,ifreq_eff + (ibandwidth / 2) - ) - ; - END IF; - - SET ofreqbandid = ifreqbandid; - RETURN ofreqbandid; - -END; - -{% endifdb %} - - - -{% ifdb postgresql %} - -AS $$ - - DECLARE nfreqbandid INT; - DECLARE ifreqbandid SMALLINT; - -BEGIN - -- We allow a small tolerance (of 1.0) to allow for rounding errors. - SELECT COUNT(*) - INTO nfreqbandid - FROM frequencyband - WHERE ABS(freq_central - ifreq_eff) <= 1.0 - AND ABS(freq_high - freq_low - ibandwidth) <= 1.0 - ; - - -- Due to the small tolerance above, in a corner case we might have two very - -- similar bands which match our criteria. We arbitrarily choose one of - -- them. - IF nfreqbandid >= 1 THEN - SELECT MAX(id) - INTO ifreqbandid - FROM frequencyband - WHERE ABS(freq_central - ifreq_eff) <= 1.0 - AND ABS(freq_high - freq_low - ibandwidth) <= 1.0 - ; - ELSE - INSERT INTO frequencyband - (freq_central - ,freq_low - ,freq_high - ) - VALUES - (ifreq_eff - ,ifreq_eff - (ibandwidth / 2) - ,ifreq_eff + (ibandwidth / 2) - ) - RETURNING id into ifreqbandid - ; - END IF; - - RETURN ifreqbandid; -END; - -$$ LANGUAGE plpgsql; - -{% endifdb %} \ No newline at end of file diff -Nru tkp-3.1.1/tkp/db/sql/statements/functions/getSkyRgn.sql tkp-4.0/tkp/db/sql/statements/functions/getSkyRgn.sql --- tkp-3.1.1/tkp/db/sql/statements/functions/getSkyRgn.sql 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/db/sql/statements/functions/getSkyRgn.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1,141 +0,0 @@ ---DROP FUNCTION getSkyRgn; - -/* - * This function gets an id for a skyregion, - * given a pair of central co-ordinates and a radius. - * - * If no matching skyregion is found, a new one is inserted. - * In this case we also trigger execution of `updateSkyRgnMembers` for the new - * skyregion - this performs a simple assocation with current members of the - * runningcatalog to find sources that should be visible in the new skyregion, - * and updates the assocskyrgn table accordingly. - */ - -CREATE FUNCTION getSkyRgn(idataset INTEGER, icentre_ra DOUBLE PRECISION, - icentre_decl DOUBLE PRECISION, - ixtr_radius DOUBLE PRECISION) -RETURNS INT - -{% ifdb postgresql %} -AS $$ - DECLARE nskyrgn INT; - DECLARE oskyrgnid INT; - DECLARE dummy DOUBLE PRECISION; -BEGIN - - SELECT COUNT(*) - INTO nskyrgn - FROM skyregion - WHERE dataset = idataset - AND centre_ra = icentre_ra - AND centre_decl = icentre_decl - AND xtr_radius = ixtr_radius - ; - - IF nskyrgn = 1 THEN - SELECT id - INTO oskyrgnid - FROM skyregion - WHERE dataset = idataset - AND centre_ra = icentre_ra - AND centre_decl = icentre_decl - AND xtr_radius = ixtr_radius - ; - ELSE - - INSERT INTO skyregion - (dataset - ,centre_ra - ,centre_decl - ,xtr_radius - ,x - ,y - ,z - ) - SELECT idataset - ,icentre_ra - ,icentre_decl - ,ixtr_radius - ,cart.x - ,cart.y - ,cart.z - FROM (SELECT * - FROM cartesian(icentre_ra,icentre_decl) - ) cart - RETURNING id into oskyrgnid - ; - - SELECT updateSkyRgnMembers(oskyrgnid) - INTO dummy; - - END IF; - - RETURN oskyrgnid; - -END; - -$$ LANGUAGE plpgsql; -{% endifdb %} - - - -{% ifdb monetdb %} -BEGIN - - DECLARE nskyrgn INT; - DECLARE oskyrgnid INT; - - SELECT COUNT(*) - INTO nskyrgn - FROM skyregion - WHERE dataset = idataset - AND centre_ra = icentre_ra - AND centre_decl = icentre_decl - AND xtr_radius = ixtr_radius - ; - - IF nskyrgn = 1 THEN - SELECT id - INTO oskyrgnid - FROM skyregion - WHERE dataset = idataset - AND centre_ra = icentre_ra - AND centre_decl = icentre_decl - AND xtr_radius = ixtr_radius - ; - ELSE - SELECT NEXT VALUE FOR seq_skyregion INTO oskyrgnid; - - INSERT INTO skyregion - (id - ,dataset - ,centre_ra - ,centre_decl - ,xtr_radius - ,x - ,y - ,z - ) - SELECT oskyrgnid - ,idataset - ,icentre_ra - ,icentre_decl - ,ixtr_radius - ,cart.x - ,cart.y - ,cart.z - FROM (SELECT * - FROM cartesian(icentre_ra,icentre_decl) - ) cart - ; - - DECLARE dummy DOUBLE; - SELECT updateSkyRgnMembers(oskyrgnid) - INTO dummy; - - END IF; - - RETURN oskyrgnid; - -END; -{% endifdb %} \ No newline at end of file diff -Nru tkp-3.1.1/tkp/db/sql/statements/functions/insertDataset.sql tkp-4.0/tkp/db/sql/statements/functions/insertDataset.sql --- tkp-3.1.1/tkp/db/sql/statements/functions/insertDataset.sql 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/db/sql/statements/functions/insertDataset.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1,94 +0,0 @@ ---DROP FUNCTION insertDataset; - -/** - * This function inserts a row in the datasets table, - * and returns the value of the id under which it is known. - * If the dataset name (description field) already exists, a new row is added - * and the rerun value is incremented by 1. If not, it is set - * to its default value 0. - * - * Note: To be able to create a function that modifies data - * (by insertion) we have to set the global bin log var: - * mysql> SET GLOBAL log_bin_trust_function_creators = 1; - * - */ -CREATE FUNCTION insertDataset(idescription VARCHAR(100)) RETURNS INT - -{% ifdb monetdb %} -BEGIN - - DECLARE idsid INT; - DECLARE odsid INT; - DECLARE irerun INT; - - SELECT MAX(rerun) - INTO irerun - FROM dataset - WHERE description = idescription - ; - - IF irerun IS NULL THEN - SET irerun = 0; - ELSE - SET irerun = irerun + 1; - END IF; - - SELECT NEXT VALUE FOR seq_dataset INTO idsid; - - INSERT INTO dataset - (id - ,rerun - ,process_start_ts - ,description - ) - VALUES - (idsid - ,irerun - ,NOW() - ,idescription - ) - ; - - SET odsid = idsid; - RETURN odsid; - -END; -{% endifdb %} - - -{% ifdb postgresql %} -AS $$ - DECLARE idsid INT; - DECLARE odsid INT; - DECLARE irerun INT; -BEGIN - SELECT MAX(rerun) - INTO irerun - FROM dataset - WHERE description = idescription - ; - - IF irerun IS NULL THEN - irerun := 0; - ELSE - irerun := irerun + 1; - END IF; - - INSERT INTO dataset - (rerun - ,process_start_ts - ,description - ) - VALUES - (irerun - ,NOW() - ,idescription - ) - ; - - RETURN lastval(); - -END; - -$$ LANGUAGE plpgsql; -{% endifdb %} diff -Nru tkp-3.1.1/tkp/db/sql/statements/functions/insertImage.sql tkp-4.0/tkp/db/sql/statements/functions/insertImage.sql --- tkp-3.1.1/tkp/db/sql/statements/functions/insertImage.sql 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/db/sql/statements/functions/insertImage.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1,168 +0,0 @@ ---DROP FUNCTION insertImage; - -/** - * This function inserts a row in the image table, - * and returns the value of the id under which it is known. - * - * Note I: To be able to create a function that modifies data - * (by insertion) we have to set the global bin log var: - * mysql> SET GLOBAL log_bin_trust_function_creators = 1; - * - * Note II: The params in comment should be specified soon. - * This means this function inserts deafult values so long. - * - * Note III: Two subroutines are called, getBand and getSkyRgn. - * These return: - * - A matching band_id. Bands are always 1 MHz wide and centred on the - * effective frequency rounded to the nearest MHz (see #4801). - * - A matching skyregion_id, given the field centre and extraction radius. - * - */ -CREATE FUNCTION insertImage(idataset INT - ,itau_time DOUBLE PRECISION - ,ifreq_eff DOUBLE PRECISION - ,ifreq_bw DOUBLE PRECISION - ,itaustart_ts TIMESTAMP - ,irb_smaj DOUBLE PRECISION - ,irb_smin DOUBLE PRECISION - ,irb_pa DOUBLE PRECISION - ,ideltax DOUBLE PRECISION - ,ideltay DOUBLE PRECISION - ,iurl VARCHAR(1024) - ,icentre_ra DOUBLE PRECISION - ,icentre_decl DOUBLE PRECISION - ,ixtr_radius DOUBLE PRECISION - ,irms_qc DOUBLE PRECISION - ,irms_min DOUBLE PRECISION - ,irms_max DOUBLE PRECISION - ,idetection_thresh DOUBLE PRECISION - ,ianalysis_thresh DOUBLE PRECISION - ) RETURNS INT - - -{% ifdb postgresql %} -AS $$ - DECLARE iimageid INT; - DECLARE oimageid INT; - DECLARE iband SMALLINT; - DECLARE itau INT; - DECLARE iskyrgn INT; - -BEGIN - iband := getBand(1e6 * FLOOR(ifreq_eff/1e6 + 0.5), 1e6); - iskyrgn := getSkyRgn(idataset, icentre_ra, icentre_decl, ixtr_radius); - - INSERT INTO image - (dataset - ,band - ,tau_time - ,freq_eff - ,freq_bw - ,taustart_ts - ,skyrgn - ,rb_smaj - ,rb_smin - ,rb_pa - ,deltax - ,deltay - ,url - ,rms_qc - ,rms_min - ,rms_max - ,detection_thresh - ,analysis_thresh - ) - VALUES - (idataset - ,iband - ,itau_time - ,ifreq_eff - ,ifreq_bw - ,itaustart_ts - ,iskyrgn - ,irb_smaj - ,irb_smin - ,irb_pa - ,ideltax - ,ideltay - ,iurl - ,irms_qc - ,irms_min - ,irms_max - ,idetection_thresh - ,ianalysis_thresh - ) - RETURNING id INTO oimageid - ; - - RETURN oimageid; - -END; - -$$ LANGUAGE plpgsql; -{% endifdb %} - - -{% ifdb monetdb %} -BEGIN - - DECLARE iimageid INT; - DECLARE oimageid INT; - DECLARE iband SMALLINT; - DECLARE itau INT; - DECLARE iskyrgn INT; - - SET iband = getBand(1e6 * FLOOR(ifreq_eff/1e6 + 0.5), 1e6); - SET iskyrgn = getSkyRgn(idataset, icentre_ra, icentre_decl, ixtr_radius); - - SELECT NEXT VALUE FOR seq_image INTO iimageid; - - INSERT INTO image - (id - ,dataset - ,band - ,tau_time - ,freq_eff - ,freq_bw - ,taustart_ts - ,skyrgn - ,rb_smaj - ,rb_smin - ,rb_pa - ,deltax - ,deltay - ,url - ,rms_qc - ,rms_min - ,rms_max - ,detection_thresh - ,analysis_thresh - ) - VALUES - (iimageid - ,idataset - ,iband - ,itau_time - ,ifreq_eff - ,ifreq_bw - ,itaustart_ts - ,iskyrgn - ,irb_smaj - ,irb_smin - ,irb_pa - ,ideltax - ,ideltay - ,iurl - ,irms_qc - ,irms_min - ,irms_max - ,idetection_thresh - ,ianalysis_thresh - ) - ; - - SET oimageid = iimageid; - RETURN oimageid; - -END; -{% endifdb %} diff -Nru tkp-3.1.1/tkp/db/sql/statements/functions/updateSkyRgnMembers.sql tkp-4.0/tkp/db/sql/statements/functions/updateSkyRgnMembers.sql --- tkp-3.1.1/tkp/db/sql/statements/functions/updateSkyRgnMembers.sql 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/db/sql/statements/functions/updateSkyRgnMembers.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1,90 +0,0 @@ ---DROP FUNCTION updateSkyRgnMembers; - -/* - * This function performs a simple distance-check against current members of the - * runningcatalog to find sources that should be visible in the given skyregion, - * and updates the assocskyrgn table accordingly. - * - * Any previous entries in assocskyrgn relating to this skyregion are - * deleted first. - * - * Note 1. We use the variable 'inter' to cache the extraction_radius as transformed - * onto the unit sphere, so this does not have to be recalculated for every - * comparison. - * - * - * Note 2. (To Do:) This distance check could be made more efficient by - * restricting to a range of RA values, as we do with the Dec. - * However, this optimization is complicated by the meridian wrap-around issue. - * - */ -CREATE FUNCTION updateSkyRgnMembers(isky_rgn_id INTEGER) -RETURNS DOUBLE PRECISION - -{% ifdb postgresql %} -AS $$ - DECLARE inter DOUBLE PRECISION; - DECLARE inter_sq DOUBLE PRECISION; -BEGIN - DELETE - FROM assocskyrgn - WHERE assocskyrgn.skyrgn = isky_rgn_id - ; - - inter := (SELECT 2.0 * SIN(RADIANS(xtr_radius) / 2.0) - FROM skyregion - WHERE id=isky_rgn_id); - - inter_sq := inter * inter; -{% endifdb %} - - -{% ifdb monetdb %} -BEGIN - DECLARE inter DOUBLE PRECISION; - DECLARE inter_sq DOUBLE PRECISION; - - DELETE - FROM assocskyrgn - WHERE assocskyrgn.skyrgn = isky_rgn_id - ; - - SET inter = (SELECT 2.0*SIN(RADIANS(xtr_radius)/2.0) - FROM skyregion - WHERE id=isky_rgn_id); - - SET inter_sq = inter*inter; -{% endifdb %} - - - INSERT INTO assocskyrgn - ( - runcat - ,skyrgn - ,distance_deg - ) - SELECT rc.id as runcat - ,sky.id as skyrgn - ,DEGREES(2 * ASIN(SQRT( (rc.x - sky.x) * (rc.x - sky.x) - + (rc.y - sky.y) * (rc.y - sky.y) - + (rc.z - sky.z) * (rc.z - sky.z) - ) / 2 ) - ) - FROM skyregion sky - ,runningcatalog rc - WHERE sky.id = isky_rgn_id - AND rc.dataset = sky.dataset - AND rc.wm_decl BETWEEN sky.centre_decl - sky.xtr_radius - AND sky.centre_decl + sky.xtr_radius - AND ( (rc.x - sky.x) * (rc.x - sky.x) - + (rc.y - sky.y) * (rc.y - sky.y) - + (rc.z - sky.z) * (rc.z - sky.z) - ) < inter_sq - ; - - RETURN inter; -END; - -{% ifdb postgresql %} -$$ LANGUAGE plpgsql; -{% endifdb %} \ No newline at end of file diff -Nru tkp-3.1.1/tkp/db/sql/statements/init/tables.sql tkp-4.0/tkp/db/sql/statements/init/tables.sql --- tkp-3.1.1/tkp/db/sql/statements/init/tables.sql 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/db/sql/statements/init/tables.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1,13 +0,0 @@ -{% ifdb monetdb %} -CALL BuildFrequencyBands(); -{% endifdb %} - -{% ifdb postgresql %} -SELECT BuildFrequencyBands(); -{% endifdb %} - - - - ---CALL BuildNodes(-90,90, FALSE); - diff -Nru tkp-3.1.1/tkp/db/sql/statements/procedures/BuildFrequencyBands.sql tkp-4.0/tkp/db/sql/statements/procedures/BuildFrequencyBands.sql --- tkp-3.1.1/tkp/db/sql/statements/procedures/BuildFrequencyBands.sql 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/db/sql/statements/procedures/BuildFrequencyBands.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1,156 +0,0 @@ ---DROP PROCEDURE BuildFrequencyBands; - -/** - */ -{% ifdb monetdb %} -CREATE PROCEDURE BuildFrequencyBands() -{% endifdb %} - -{% ifdb postgresql %} -CREATE OR REPLACE FUNCTION BuildFrequencyBands() -RETURNS void -AS $$ -{% endifdb %} - - -BEGIN - - /* Some cataloged sources do not have spectral information - (e.g. exoplanets) and are referred to freq 0 - */ - INSERT INTO frequencyband - (id - ,freq_central - ,freq_low - ,freq_high - ) - VALUES - (0 - ,NULL - ,NULL - ,NULL - ) - ; - - INSERT INTO frequencyband - (freq_central - ,freq_low - ,freq_high - ) - VALUES - (31000000 - ,30000000 - ,32000000 - ) - , - (37000000 - ,36000000 - ,38000000 - ) - , - (43000000 - ,42000000 - ,44000000 - ) - , - (49000000 - ,48000000 - ,50000000 - ) - , - (54000000 - ,53000000 - ,55000000 - ) - , - (60000000 - ,59000000 - ,61000000 - ) - , - (66000000 - ,65000000 - ,67000000 - ) - , - (74000000 - ,73000000 - ,75000000 - ) - , - (120000000 - ,120000000 - 350000 / 2 - ,120000000 + 350000 / 2 - ) - , - (130000000 - ,130000000 - 450000 / 2 - ,130000000 + 450000 / 2 - ) - , - (140000000 - ,140000000 - 550000 / 2 - ,140000000 + 550000 / 2 - ) - , - (150000000 - ,150000000 - 700000 / 2 - ,150000000 + 700000 / 2 - ) - , - (160000000 - ,160000000 - 850000 / 2 - ,160000000 + 850000 / 2 - ) - , - (170000000 - ,170000000 - 1100000 / 2 - ,170000000 + 1100000 / 2 - ) - , - (325000000 - ,325000000 - 10000000 / 2 - ,325000000 + 10000000 / 2 - ) - , - (352000000 - ,352000000 - 20000000 / 2 - ,352000000 + 20000000 / 2 - ) - , - (640000000 - ,640000000 - 100000000 / 2 - ,640000000 + 100000000 / 2 - ) - , - (850000000 - ,850000000 - 100000000 / 2 - ,850000000 + 100000000 / 2 - ) - , - (1400000000 - ,1400000000 - 260000000 / 2 - ,1400000000 + 260000000 / 2 - ) - , - (2300000000 - ,2300000000 - 250000000 / 2 - ,2300000000 + 250000000 / 2 - ) - , - (4800000000 - ,4800000000 - 250000000 / 2 - ,4800000000 + 250000000 / 2 - ) - , - (8500000000 - ,8500000000 - 250000000 / 2 - ,8500000000 + 250000000 / 2 - ) - ; - -END; - -{% ifdb postgresql %} -$$ LANGUAGE plpgsql; -{% endifdb %} \ No newline at end of file diff -Nru tkp-3.1.1/tkp/db/sql/statements/procedures/BuildNodes.sql tkp-4.0/tkp/db/sql/statements/procedures/BuildNodes.sql --- tkp-3.1.1/tkp/db/sql/statements/procedures/BuildNodes.sql 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/db/sql/statements/procedures/BuildNodes.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1,89 +0,0 @@ ---DROP PROCEDURE BuildNodes; - -/** - * This procedure builds the zones table according to - * the input zoneheight and theta (both in degrees). - * ATTENTION: - * The zone column in the extractedsources table will NOT be modified! - * It is best to run this before an observation, - * i.e. at initialisation time, - * and when you have an idea about the zoneheight. - * TODO: Find out what a good zoneheight will be. - */ - - {% ifdb monetdb %} -CREATE PROCEDURE BuildNodes(inode_min INT, inode_max INT, max_incl BOOLEAN) - -BEGIN - DECLARE izone INT; - - SET izone = inode_min; - IF max_incl THEN - WHILE izone <= inode_max DO - INSERT INTO node - (zone - ,zoneheight - ) - VALUES - (izone - ,1.0 - ) - ; - SET izone = izone + 1; - END WHILE; - ELSE - WHILE izone < inode_max DO - INSERT INTO node - (zone - ,zoneheight - ) - VALUES - (izone - ,1.0 - ) - ; - SET izone = izone + 1; - END WHILE; - END IF; - -END; - -{% endifdb %} - -{% ifdb postgresql %} -CREATE OR REPLACE FUNCTION BuildNodes(inode_min INT, inode_max INT, max_incl BOOLEAN) -RETURNS void -AS $$ - DECLARE izone INT; -BEGIN - izone := inode_min; - IF max_incl THEN - WHILE izone <= inode_max LOOP - INSERT INTO node - (zone - ,zoneheight - ) - VALUES - (izone - ,1.0 - ) - ; - izone := izone + 1; - END LOOP; - ELSE - WHILE izone < inode_max LOOP - INSERT INTO node - (zone - ,zoneheight - ) - VALUES - (izone - ,1.0 - ) - ; - izone := izone + 1; - END LOOP; - END IF; -END; -$$ LANGUAGE plpgsql; -{% endifdb %} \ No newline at end of file diff -Nru tkp-3.1.1/tkp/db/sql/statements/views/augmented_runningcatalog.sql tkp-4.0/tkp/db/sql/statements/views/augmented_runningcatalog.sql --- tkp-3.1.1/tkp/db/sql/statements/views/augmented_runningcatalog.sql 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/db/sql/statements/views/augmented_runningcatalog.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1,92 +0,0 @@ --- this view is used in Banana to compute columns that are impossible to --- create with the django orm. --- It Calculate sigma_min, sigma_max, v_int, eta_int and the max and avg --- values for lightcurves, all foor all runningcatalogs - --- It starts by getting the extracted source from latest image for a runcat. --- This is arbitrary, since you have multiple bands. We pick the band with the --- max integrated flux. Now we have v_int and eta_int. --- The flux is then devided by the RMS_max and RMS_min of the previous image --- (stored in newsource.previous_limits_image) to obtain sigma_max and sigma_min. - -CREATE VIEW augmented_runningcatalog AS - SELECT r.id - /* and finally construct the final table */ - ,r.wm_ra - ,r.wm_decl - ,r.wm_uncertainty_ew - ,r.wm_uncertainty_ns - ,r.xtrsrc - ,r.dataset - ,r.datapoints - ,match_assoc.v_int - ,match_assoc.eta_int - ,match_img.band - ,newsrc_trigger.id as newsource - ,newsrc_trigger.sigma_rms_max - ,newsrc_trigger.sigma_rms_min - ,MAX(agg_ex.f_int) AS lightcurve_max - ,AVG(agg_ex.f_int) AS lightcurve_avg - - {% ifdb postgresql %} - ,median(agg_ex.f_int) AS lightcurve_median - {% endifdb %} - {% ifdb monetdb %} - ,sys.median(agg_ex.f_int) AS lightcurve_median - {% endifdb %} - - FROM ( /* Select peak flux per runcat at last timestep (over all bands) */ - SELECT a_1.runcat AS runcat_id - ,MAX(e_1.f_int) AS max_flux - FROM (SELECT MAX(a_2.id) AS assoc_id - FROM assocxtrsource a_2 - JOIN runningcatalog r_1 ON a_2.runcat = r_1.id - JOIN extractedsource e_2 ON a_2.xtrsrc = e_2.id - JOIN image i_2 ON e_2.image = i_2.id - GROUP BY r_1.id, i_2.band - ) last_ts_per_band /* maximum timestamps per runcat and band */ - JOIN assocxtrsource a_1 ON a_1.id = last_ts_per_band.assoc_id - JOIN extractedsource e_1 ON a_1.xtrsrc = e_1.id - GROUP BY a_1.runcat - ) last_ts_fmax - /* Pull out the matching var. indices. at last timestep, - matched via runcat id, flux val: */ - JOIN assocxtrsource match_assoc - ON match_assoc.runcat = last_ts_fmax.runcat_id - JOIN extractedsource match_ex - ON match_assoc.xtrsrc = match_ex.id AND match_ex.f_int = last_ts_fmax.max_flux - JOIN runningcatalog r ON r.id = last_ts_fmax.runcat_id - JOIN image match_img on match_ex.image = match_img.id - LEFT JOIN ( - /* Grab newsource /trigger details where possible */ - SELECT n.id - ,n.runcat as rc_id - ,(e2.f_int/i.rms_min) as sigma_rms_min - ,(e2.f_int/i.rms_max) as sigma_rms_max - FROM newsource n - JOIN extractedsource e2 ON e2.id = n.trigger_xtrsrc - JOIN image i ON i.id = n.previous_limits_image - ) as newsrc_trigger - ON newsrc_trigger.rc_id = r.id - /* and we need to join these again to calculate max and avg for lightcurve */ - /* I.e. the aggregate values */ - JOIN assocxtrsource agg_assoc ON r.id = agg_assoc.runcat - JOIN extractedsource agg_ex ON agg_assoc.xtrsrc = agg_ex.id - JOIN image agg_img ON agg_ex.image = agg_img.id - AND agg_img.band = match_img.band - GROUP BY r.id - ,r.wm_ra - ,r.wm_decl - ,r.wm_uncertainty_ew - ,r.wm_uncertainty_ns - ,r.xtrsrc - ,r.dataset - ,r.datapoints - ,match_assoc.v_int - ,match_assoc.eta_int - ,match_img.band - ,newsrc_trigger.id - ,newsrc_trigger.sigma_rms_max - ,newsrc_trigger.sigma_rms_min - -; \ No newline at end of file diff -Nru tkp-3.1.1/tkp/distribute/multiproc/__init__.py tkp-4.0/tkp/distribute/multiproc/__init__.py --- tkp-3.1.1/tkp/distribute/multiproc/__init__.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/distribute/multiproc/__init__.py 2017-02-03 10:06:41.000000000 +0000 @@ -3,9 +3,27 @@ module. the Pool.map function only accepts one argument, so we need to zip the iterable together with the arguments. """ -from multiprocessing import Pool, cpu_count +import sys +from multiprocessing import Pool, cpu_count, log_to_stderr -pool = Pool(processes=cpu_count()) +import logging +import atexit + +# use this for debugging. Will not fork processes but run everything threaded +THREADED = False + +if THREADED: + from multiprocessing.pool import ThreadPool + pool = ThreadPool(processes=cpu_count()) +else: + pool = Pool(processes=cpu_count()) + +atexit.register(lambda: pool.terminate()) + +logger = log_to_stderr() +logger.setLevel(logging.WARNING) +logger.info("initialising multiprocessing module with " + "{} cores".format(cpu_count())) def set_cores(cores=0): @@ -20,5 +38,9 @@ def map(func, iterable, args): zipped = ((i, args) for i in iterable) - return pool.map(func, zipped) - + try: + return pool.map_async(func, zipped).get(9999999) + except KeyboardInterrupt: + pool.terminate() + print "You pressed CTRL-C, exiting" + sys.exit(1) diff -Nru tkp-3.1.1/tkp/distribute/multiproc/tasks.py tkp-4.0/tkp/distribute/multiproc/tasks.py --- tkp-3.1.1/tkp/distribute/multiproc/tasks.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/distribute/multiproc/tasks.py 2017-02-03 10:06:41.000000000 +0000 @@ -4,28 +4,60 @@ from __future__ import absolute_import import logging import tkp.steps - +from tkp.steps.misc import ImageMetadataForSort +from tkp.steps.forced_fitting import perform_forced_fits logger = logging.getLogger(__name__) -def persistence_node_step(zipped): - logger.info("running persistence task") +def extract_metadatas(zipped): + logger.debug("running extract metadatas task") + images, args = zipped + sigma, f = args + return tkp.steps.persistence.extract_metadatas(images, sigma, f) + + +def open_as_fits(zipped): + logger.debug("opening files as fits objects") images, args = zipped - image_cache_config, sigma, f = args - return tkp.steps.persistence.node_steps(images, image_cache_config, - sigma, f) + return list(tkp.steps.persistence.paths_to_fits(images)) def quality_reject_check(zipped): - logger.info("running quality task") + logger.debug("running quality task") url, args = zipped job_config = args[0] return tkp.steps.quality.reject_check(url, job_config) def extract_sources(zipped): - logger.info("running extracted sources task") - url, args = zipped + logger.debug("running extracted sources task") + accessor, args = zipped extraction_params = args[0] - return tkp.steps.source_extraction.extract_sources(url, extraction_params) + return tkp.steps.source_extraction.extract_sources(accessor, + extraction_params) + + +def forced_fits(zipped): + logger.debug("running forced fits task") + accessor, db_image_id, fit_posns, fit_ids, extraction_params = zipped[0] + successful_fits, successful_ids = perform_forced_fits(fit_posns, fit_ids, + accessor, + extraction_params) + return successful_fits, successful_ids, db_image_id + + +def get_accessors(zipped): + logger.debug("Creating accessors for images") + images, args = zipped + return tkp.steps.persistence.get_accessors(images) + + +def get_metadata_for_ordering(zipped): + logger.debug("Retrieving ordering metadata from accessors") + images, args = zipped + l = [] + for a in tkp.steps.persistence.get_accessors(images): + l.append(ImageMetadataForSort(url=a.url, timestamp=a.taustart_ts, + frequency=a.freq_eff)) + return l diff -Nru tkp-3.1.1/tkp/distribute/serial/tasks.py tkp-4.0/tkp/distribute/serial/tasks.py --- tkp-3.1.1/tkp/distribute/serial/tasks.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/distribute/serial/tasks.py 2017-02-03 10:06:41.000000000 +0000 @@ -1,22 +1,57 @@ from __future__ import absolute_import import logging import tkp.steps - +from tkp.steps.misc import ImageMetadataForSort +from tkp.steps.forced_fitting import perform_forced_fits logger = logging.getLogger(__name__) -def persistence_node_step(images, image_cache_config, sigma, f): - logger.debug("running persistence task") - return tkp.steps.persistence.node_steps(images, image_cache_config, - sigma, f) +def extract_metadatas(accessors, sigma, f): + logger.debug("running extract metadatas task") + return tkp.steps.persistence.extract_metadatas(accessors, sigma, f) + + +def open_as_fits(images): + logger.debug("opening files as fits objects") + return tkp.steps.persistence.paths_to_fits(images) -def quality_reject_check(url, job_config): +def quality_reject_check(accessor, job_config): logger.debug("running quality task") - return tkp.steps.quality.reject_check(url, job_config) + return tkp.steps.quality.reject_check(accessor, job_config) -def extract_sources(url, extraction_params): +def extract_sources(accessor, extraction_params): logger.debug("running extracted sources task") - return tkp.steps.source_extraction.extract_sources(url, extraction_params) + return tkp.steps.source_extraction.extract_sources(accessor, + extraction_params) + + +def get_accessors(images): + logger.debug("Creating accessors for images") + return tkp.steps.persistence.get_accessors(images) + + +def get_metadata_for_ordering(images): + """ + args: + images (list): list of image urls + returns: + list: of ImageMetadataForSort + """ + logger.debug("Retrieving ordering metadata from accessors") + l = [] + for a in tkp.steps.persistence.get_accessors(images): + l.append(ImageMetadataForSort(url=a.url, timestamp=a.taustart_ts, + frequency=a.freq_eff)) + return l + + +def forced_fits(zipped): + logger.debug("running forced fits task") + accessor, db_image_id, fit_posns, fit_ids, extraction_params = zipped + successful_fits, successful_ids = perform_forced_fits(fit_posns, fit_ids, + accessor, + extraction_params) + return successful_fits, successful_ids, db_image_id \ No newline at end of file diff -Nru tkp-3.1.1/tkp/__init__.py tkp-4.0/tkp/__init__.py --- tkp-3.1.1/tkp/__init__.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/__init__.py 2017-02-03 10:06:41.000000000 +0000 @@ -10,5 +10,9 @@ For details, see http://docs.transientskp.org/. """ +import pkg_resources -__version__ = "3.1.1" +try: + __version__ = pkg_resources.require("tkp")[0].version +except pkg_resources.DistributionNotFound: + __version__ = "dev" diff -Nru tkp-3.1.1/tkp/main.py tkp-4.0/tkp/main.py --- tkp-3.1.1/tkp/main.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/main.py 2017-02-03 10:06:41.000000000 +0000 @@ -3,10 +3,14 @@ """ import imp import logging +import atexit import os from tkp import steps from tkp.config import initialize_pipeline_config, get_database_config import tkp.db +from tkp.db.image_store import store_fits +from astropy.io.fits.hdu import HDUList +from itertools import chain from tkp.db import consistency as dbconsistency from tkp.db import Image from tkp.db import general as dbgen @@ -19,33 +23,32 @@ group_per_timestep ) from tkp.db.configstore import store_config, fetch_config -from tkp.steps.persistence import create_dataset, store_images +from tkp.steps.persistence import create_dataset, store_images_in_db import tkp.steps.forced_fitting as steps_ff from tkp.steps.varmetric import execute_store_varmetric +from tkp.stream import stream_generator +from tkp.quality.rms import reject_historical_rms logger = logging.getLogger(__name__) -def run(job_name, supplied_mon_coords=[]): - pipe_config = initialize_pipeline_config( - os.path.join(os.getcwd(), "pipeline.cfg"), - job_name) - - # get parallelise props. Defaults to multiproc with autodetect num cores - parallelise = pipe_config.get('parallelise', {}) - distributor = os.environ.get('TKP_PARALLELISE', parallelise.get('method', - 'multiproc')) - runner = Runner(distributor=distributor, - cores=parallelise.get('cores', 0)) +def get_pipe_config(job_name): + return initialize_pipeline_config(os.path.join(os.getcwd(), "pipeline.cfg"), + job_name) - #Setup logfile before we do anything else +def setup(pipe_config, supplied_mon_coords=None): + """ + Initialises the pipeline run. + """ + if not supplied_mon_coords: + supplied_mon_coords = [] + + # Setup logfile before we do anything else log_dir = pipe_config.logging.log_dir - setup_logging(log_dir, - debug=pipe_config.logging.debug, - use_colorlog=pipe_config.logging.colorlog - ) + setup_logging(log_dir, debug=pipe_config.logging.debug, + use_colorlog=pipe_config.logging.colorlog) job_dir = pipe_config.DEFAULT.job_directory if not os.access(job_dir, os.X_OK): @@ -58,31 +61,75 @@ dump_database_backup(db_config, job_dir) job_config = load_job_config(pipe_config) - se_parset = job_config.source_extraction - deruiter_radius = job_config.association.deruiter_radius - beamwidths_limit = job_config.association.beamwidths_limit - new_src_sigma = job_config.transient_search.new_source_sigma_margin - - all_images = imp.load_source('images_to_process', - os.path.join(job_dir, - 'images_to_process.py')).images + dump_configs_to_logdir(log_dir, job_config, pipe_config) + + sync_rejectreasons(tkp.db.Database().Session()) + + job_config, dataset_id = initialise_dataset(job_config, supplied_mon_coords) - logger.info("dataset %s contains %s images" % (job_name, len(all_images))) + return job_dir, job_config, dataset_id + +def get_runner(pipe_config): + """ + get parallelise props. Defaults to multiproc with autodetect num cores. Wil + initialise the distributor. + + One should not mix threads and multiprocessing, but for example AstroPy uses + threads internally. Best practice then is to first do multiprocessing, + and then threading per process. This is the reason why this function + should be called as one of the first functions the in the TraP pipeline + lifespan. + """ + para = pipe_config.parallelise + logging.info("using '{}' method for parallellisation".format(para.method)) + distributor = os.environ.get('TKP_PARALLELISE', para.method) + return Runner(distributor=distributor, cores=para.cores) + + +def load_images(job_name, job_dir): + """ + Load all the images for a specific TraP job. + + returns: + list: a list of paths + """ + path = os.path.join(job_dir, 'images_to_process.py') + images = imp.load_source('images_to_process', path).images + logger.info("dataset %s contains %s images" % (job_name, + len(images))) + return images + + +def consistency_check(): logger.info("performing database consistency check") if not dbconsistency.check(): - logger.error("Inconsistent database found; aborting") - return 1 + msg = "Inconsistent database found; aborting" + logger.error(msg) + raise RuntimeError(msg) - sync_rejectreasons(tkp.db.Database().Session()) +def initialise_dataset(job_config, supplied_mon_coords): + """ + sets up a dataset in the database. + + if the dataset already exists it will return the job_config from the + previous dataset run. + + args: + job_config: a job configuration object + supplied_mon_coords (list): a list of monitoring positions + + returns: + tuple: job_config and dataset ID + """ dataset_id = create_dataset(job_config.persistence.dataset_id, job_config.persistence.description) if job_config.persistence.dataset_id == -1: store_config(job_config, dataset_id) # new data set if supplied_mon_coords: - dbgen.insert_monitor_positions(dataset_id,supplied_mon_coords) + dbgen.insert_monitor_positions(dataset_id, supplied_mon_coords) else: job_config_from_db = fetch_config(dataset_id) # existing data set if check_job_configs_match(job_config, job_config_from_db): @@ -96,84 +143,295 @@ if supplied_mon_coords: logger.warn("Monitor positions supplied will be ignored. " "(Previous dataset specified)") + return job_config, dataset_id - dump_configs_to_logdir(log_dir, job_config, pipe_config) - logger.info("performing persistence step") - image_cache_params = pipe_config.image_cache - imgs = [[img] for img in all_images] +def extract_metadata(job_config, accessors, runner): + """ - rms_est_sigma = job_config.persistence.rms_est_sigma - rms_est_fraction = job_config.persistence.rms_est_fraction - metadatas = runner.map("persistence_node_step", imgs, - [image_cache_params, rms_est_sigma, rms_est_fraction]) + args: + job_config: a TKP config object + accessors (list): list of tkp.Accessor objects + runner (tkp.distribute.Runner): the runner to use + + returns: + list: a list of metadata dicts + """ + logger.debug("Extracting metadata from images") + imgs = [[a] for a in accessors] + metadatas = runner.map("extract_metadatas", + imgs, + [job_config.persistence.rms_est_sigma, + job_config.persistence.rms_est_fraction]) metadatas = [m[0] for m in metadatas if m] + return metadatas - logger.info("Storing images") - image_ids = store_images(metadatas, - job_config.source_extraction.extraction_radius_pix, - dataset_id) +def store_image_metadata(metadatas, job_config, dataset_id): + logger.debug("Storing image metadata in SQL database") + r = job_config.source_extraction.extraction_radius_pix + image_ids = store_images_in_db(metadatas, r, dataset_id, + job_config.persistence.bandwidth_max) db_images = [Image(id=image_id) for image_id in image_ids] + return db_images + + +def extract_fits_from_files(runner, paths): + # we assume pahtss is uniform + if type(paths[0]) == str: + fitss = runner.map("open_as_fits", [[p] for p in paths]) + return zip(*list(chain.from_iterable(fitss))) + elif type(paths[0]) == HDUList: + return [f[0].data for f in paths], [str(f[0].header) for f in paths] + else: + logging.error('unknown type') + - logger.info("performing quality check") - urls = [img.url for img in db_images] +def quality_check(db_images, accessors, job_config, runner): + """ + returns: + list: a list of db_image and accessor tuples + """ + logger.debug("performing quality check") arguments = [job_config] - rejecteds = runner.map("quality_reject_check", urls, arguments) + rejecteds = runner.map("quality_reject_check", accessors, arguments) + db = tkp.db.Database() + history = job_config.persistence.rms_est_history + rms_max = job_config.persistence.rms_est_max + rms_min = job_config.persistence.rms_est_min + est_sigma = job_config.persistence.rms_est_sigma good_images = [] - for image, rejected in zip(db_images, rejecteds): + for db_image, rejected, accessor in zip(db_images, rejecteds, accessors): + if not rejected: + rejected = reject_historical_rms(db_image.id, db.session, + history, est_sigma, rms_max, rms_min) + if rejected: reason, comment = rejected - steps.quality.reject_image(image.id, reason, comment) + steps.quality.reject_image(db_image.id, reason, comment) else: - good_images.append(image) + good_images.append((db_image, accessor)) if not good_images: - logger.warn("No good images under these quality checking criteria") - return + msg = "No good images under these quality checking criteria" + logger.warn(msg) + return good_images + + +def source_extraction(accessors, job_config, runner): + logger.debug("performing source extraction") + arguments = [job_config.source_extraction] + extraction_results = runner.map("extract_sources", accessors, arguments) + total = sum(len(i[0]) for i in extraction_results) + logger.info('found {} blind sources in {} images'.format(total, len(extraction_results))) + return extraction_results + + +def do_forced_fits(runner, all_forced_fits): + logger.debug('performing forced fitting') + returned = runner.map("forced_fits", all_forced_fits) + total = sum(len(i[0]) for i in returned) + logger.info('performed {} forced fits in {} images'.format(total, len(returned))) + return returned + + +def store_extractions(images, extraction_results, job_config): + logger.debug("storing extracted sources to database") + # we also set the image max,min RMS values which calculated during + # source extraction + detection_thresh = job_config.source_extraction['detection_threshold'] + analysis_thresh = job_config.source_extraction['analysis_threshold'] + for (db_image, accessor), results in zip(images, extraction_results): + db_image.update(rms_min=results.rms_min, rms_max=results.rms_max, + detection_thresh=detection_thresh, + analysis_thresh=analysis_thresh) + dbgen.insert_extracted_sources(db_image.id, results.sources, 'blind') + + +def assocate_and_get_force_fits(db_image, job_config): + logger.debug("performing DB operations for image {} ({})".format(db_image.id, + db_image.url)) + + r = job_config.association.deruiter_radius + s = job_config.transient_search.new_source_sigma_margin + dbass.associate_extracted_sources(db_image.id, deRuiter_r=r, + new_source_sigma_margin=s) + + expiration = job_config.source_extraction.expiration + all_fit_posns, all_fit_ids = steps_ff.get_forced_fit_requests(db_image, + expiration) + return all_fit_posns, all_fit_ids - grouped_images = group_per_timestep(good_images) - timestep_num = len(grouped_images) - for n, (timestep, images) in enumerate(grouped_images): - msg = "processing %s images in timestep %s (%s/%s)" - logger.info(msg % (len(images), timestep, n+1, timestep_num)) - logger.debug("performing source extraction") - urls = [img.url for img in images] - arguments = [se_parset] - - extraction_results = runner.map("extract_sources", urls, arguments) - - logger.debug("storing extracted sources to database") - # we also set the image max,min RMS values which calculated during - # source extraction - for image, results in zip(images, extraction_results): - image.update(rms_min=results.rms_min, rms_max=results.rms_max, - detection_thresh=se_parset['detection_threshold'], - analysis_thresh=se_parset['analysis_threshold']) - dbgen.insert_extracted_sources(image.id, results.sources, 'blind') - - for image in images: - logger.info("performing DB operations for image {} ({})".format( - image.id, image.url)) - - dbass.associate_extracted_sources(image.id, - deRuiter_r=deruiter_radius, - new_source_sigma_margin=new_src_sigma) - - expiration = job_config.source_extraction.expiration - all_fit_posns, all_fit_ids = steps_ff.get_forced_fit_requests(image, - expiration) - if all_fit_posns: - successful_fits, successful_ids = steps_ff.perform_forced_fits( - all_fit_posns, all_fit_ids, image.url, se_parset) +def varmetric(dataset_id): + logger.info("calculating variability metrics") + execute_store_varmetric(dataset_id) + - steps_ff.insert_and_associate_forced_fits(image.id,successful_fits, - successful_ids) +def close_database(dataset_id): + dbgen.update_dataset_process_end_ts(dataset_id) + db = tkp.db.Database() + db.session.commit() + db.close() - dbgen.update_dataset_process_end_ts(dataset_id) +def get_accessors(runner, all_images): + imgs = [[img] for img in all_images] + accessors = runner.map("get_accessors", imgs) + return [a[0] for a in accessors if a] + + +def get_metadata_for_sorting(runner, image_paths): + """ + Group images per timestamp. Will open all images in parallel using runner. + + args: + runner (tkp.distribute.Runner): Runner to use for distribution + image_paths (list): list of image paths + returns: + list: list of tuples, (timestamp, [list_of_images]) + """ + nested_img = [[i] for i in image_paths] + results = runner.map("get_metadata_for_ordering", nested_img) + if results and results[0]: + metadatas = [t[0] for t in results] + return metadatas + else: + logger.warning("no images to process!") + return [] + + +def store_image_data(db_images, fits_datas, fits_headers): + logger.info("storing {} images to database".format(len(db_images))) + store_fits(db_images, fits_datas, fits_headers) + + +def timestamp_step(runner, images, job_config, dataset_id, copy_images): + """ + Called from the main loop with all images in a certain timestep + + args: + runner (tkp.distribute.Runner): Runner to use for distribution + images (list): list of things tkp.accessors can handle, like image + paths or fits objects + job_config: a tkp job config object + dataset_id (int): The ``tkp.db.model.Dataset`` id + + returns: + list: of tuples (rms_qc, band) + """ + # gather all image info + accessors = get_accessors(runner, images) + metadatas = extract_metadata(job_config, accessors, runner) + db_images = store_image_metadata(metadatas, job_config, dataset_id) + error = "%s != %s != %s" % (len(accessors), len(metadatas), len(db_images)) + assert len(accessors) == len(metadatas) == len(db_images), error + + # store copy of image data in database + if copy_images: + fits_datas, fits_headers = extract_fits_from_files(runner, images) + store_image_data(db_images, fits_datas, fits_headers) + + # filter out the bad ones + good_images = quality_check(db_images, accessors, job_config, runner) + good_accessors = [i[1] for i in good_images] + + # do the source extractions + extraction_results = source_extraction(good_accessors, job_config, runner) + + store_extractions(good_images, extraction_results, job_config) + + all_forced_fits = [] + # assocate the sources + for (db_image, accessor) in good_images: + fit_poss, fit_ids = assocate_and_get_force_fits(db_image, job_config) + all_forced_fits.append((accessor, db_image.id, fit_poss, fit_ids, + job_config.source_extraction)) + + # do the forced fitting + all_forced_fits_results = do_forced_fits(runner, all_forced_fits) + + # store and associate the forced fits + for (successful_fits, successful_ids, db_image_id) in all_forced_fits_results: + steps_ff.insert_and_associate_forced_fits(db_image_id, + successful_fits, + successful_ids) + + # update the variable metrics for running catalogs + varmetric(dataset_id) + + +def run_stream(runner, job_config, dataset_id, copy_images): + """ + Run the pipeline in stream mode. + + Daemon function, doesn't return. + + args: + runner (tkp.distribute.Runner): Runner to use for distribution + job_config: a job configuration object + dataset_id (int): The dataset ID to use + """ + hosts = job_config.pipeline.hosts.split(',') + ports = [int(p) for p in job_config.pipeline.ports.split(',')] + from datetime import datetime + for images in stream_generator(hosts=hosts, ports=ports): + logger.info("processing {} stream images...".format(len(images))) + trap_start = datetime.now() + try: + timestamp_step(runner, images, job_config, dataset_id, copy_images) + except Exception as e: + logger.error("timestep raised {} exception: {}".format(type(e), str(e))) + else: + trap_end = datetime.now() + delta = (trap_end - trap_start).microseconds/1000 + logging.info("trap iteration took {} ms".format(delta)) + + +def run_batch(image_paths, job_config, runner, dataset_id, copy_images): + """ + Run the pipeline in batch mode. + + args: + job_name (str): job name, used for locating images script + pipe_config: the pipeline configuration object + job_config: a job configuration object + runner (tkp.distribute.Runner): Runner to use for distribution + dataset_id (int): The dataset ID to use + """ + sorting_metadata = get_metadata_for_sorting(runner, image_paths) + grouped_images = group_per_timestep(sorting_metadata) + + for n, (timestep, images) in enumerate(grouped_images): + msg = "processing %s images in timestep %s (%s/%s)" + logger.info(msg % (len(images), timestep, n + 1, len(grouped_images))) + try: + timestamp_step(runner, images, job_config, dataset_id, copy_images) + except Exception as e: + logger.error("timestep raised {} exception: {}".format(type(e), str(e))) + + +def run(job_name, supplied_mon_coords=None): + """ + TKP pipeline main loop entry point. + + args: + job_name (str): name of the jbo to run + supplied_mon_coords (list): list of coordinates to monitor + """ + pipe_config = get_pipe_config(job_name) + runner = get_runner(pipe_config) + job_dir, job_config, dataset_id = setup(pipe_config, supplied_mon_coords) + + # make sure we close the database connection at program exit + atexit.register(close_database, dataset_id) + + copy_images = pipe_config.image_cache['copy_images'] + if job_config.pipeline.mode == 'stream': + run_stream(runner, job_config, dataset_id, copy_images) + elif job_config.pipeline.mode == 'batch': + image_paths = load_images(job_name, job_dir) + run_batch(image_paths, job_config, runner, dataset_id, copy_images) + - logger.info("calculating variability metrics") - execute_store_varmetric(dataset_id) diff -Nru tkp-3.1.1/tkp/management.py tkp-4.0/tkp/management.py --- tkp-3.1.1/tkp/management.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/management.py 2017-02-03 10:06:41.000000000 +0000 @@ -242,6 +242,28 @@ populate(dbconfig) +def deldataset(options): + if not options.yes: + answer = raw_input("\nAre you sure you want to delete dataset {}? " + "[y/N]: ".format(options.id)) + if answer.lower() != 'y': + sys.stderr.write("Aborting.\n") + sys.exit(1) + from tkp.db.database import Database + from tkp.db.model import Dataset + from sqlalchemy.orm.exc import NoResultFound + db = Database() + try: + dataset = db.session.query(Dataset).filter(Dataset.id==options.id).one() + except NoResultFound: + print("\ndataset {} does not exist!\n".format(options.id)) + sys.exit(1) + db.session.delete(dataset) + db.session.commit() + print("\ndataset {} has been deleted!\n".format(options.id)) + db.close() + + def get_parser(): trap_manage_note = """ A tool for managing TKP projects. @@ -321,6 +343,16 @@ "(only works with Postgres backend)", action="store_true") initdb_parser.set_defaults(func=init_db) + + + # deldataset + deldataset_parser = parser_subparsers.add_parser( + 'deldataset', help="Delete a dataset from a database") + deldataset_parser.add_argument('id', help='dataset id', type=int) + deldataset_parser.add_argument('-y', '--yes', + help="don't ask for confirmation", + action="store_true") + deldataset_parser.set_defaults(func=deldataset) return parser diff -Nru tkp-3.1.1/tkp/quality/brightsource.py tkp-4.0/tkp/quality/brightsource.py --- tkp-3.1.1/tkp/quality/brightsource.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/quality/brightsource.py 2017-02-03 10:06:41.000000000 +0000 @@ -1,7 +1,5 @@ import sys import logging -import warnings -import casacore.quanta as qa from io import BytesIO from casacore.measures import measures from tkp.utility.coordinates import unix2julian diff -Nru tkp-3.1.1/tkp/quality/rms.py tkp-4.0/tkp/quality/rms.py --- tkp-3.1.1/tkp/quality/rms.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/quality/rms.py 2017-02-03 10:06:41.000000000 +0000 @@ -1,5 +1,9 @@ +import numpy from tkp.utility import nice_format - +from scipy.stats import norm +from sqlalchemy.sql.expression import desc +from tkp.db.model import Image +from tkp.db.quality import reject_reasons def rms_invalid(rms, noise, low_bound=1, high_bound=50): """ @@ -19,3 +23,94 @@ (nice_format(rms), nice_format(ratio), nice_format(noise)) else: return False + + +def rms(data): + """Returns the RMS of the data about the median. + Args: + data: a numpy array + """ + data -= numpy.median(data) + return numpy.sqrt(numpy.power(data, 2).sum()/len(data)) + + +def clip(data, sigma=3): + """Remove all values above a threshold from the array. + Uses iterative clipping at sigma value until nothing more is getting clipped. + Args: + data: a numpy array + """ + raveled = data.ravel() + median = numpy.median(raveled) + std = numpy.std(raveled) + newdata = raveled[numpy.abs(raveled-median) <= sigma*std] + if len(newdata) and len(newdata) != len(raveled): + return clip(newdata, sigma) + else: + return newdata + + +def subregion(data, f=4): + """Returns the inner region of a image, according to f. + + Resulting area is 4/(f*f) of the original. + Args: + data: a numpy array + """ + x, y = data.shape + return data[(x/2 - x/f):(x/2 + x/f), (y/2 - y/f):(y/2 + y/f)] + + +def rms_with_clipped_subregion(data, rms_est_sigma=3, rms_est_fraction=4): + """ + RMS for quality-control. + + Root mean square value calculated from central region of an image. + We sigma-clip the input-data in an attempt to exclude source-pixels + and keep only background-pixels. + + Args: + data: A numpy array + rms_est_sigma: sigma value used for clipping + rms_est_fraction: determines size of subsection, result will be + 1/fth of the image size where f=rms_est_fraction + returns the rms value of a iterative sigma clipped subsection of an image + """ + return rms(clip(subregion(data, rms_est_fraction), rms_est_sigma)) + + +def reject_historical_rms(image_id, session, history=100, est_sigma=4, rms_max=100., rms_min=0.0): + """ + Check if the RMS value of an image lies within a range defined + by a gaussian fit on the histogram calculated from the last x RMS + values in this subband. Upper and lower bound are then controlled + by est_sigma multiplied with the sigma of the gaussian. + + args: + image_id (int): database ID of the image we want to check + session (sqlalchemy.orm.session.Session): the database session + history (int): the number of timestamps we want to use for histogram + est_sigma (float): sigma multiplication factor + rms_max (float): global maximum rms for image quality check + rms_min (float): global minimum rms for image quality check + returns: + bool: None if not rejected, (rejectreason, comment) if rejected + """ + image = session.query(Image).filter(Image.id == image_id).one() + rmss = session.query(Image.rms_qc).filter( + (Image.band == image.band)).order_by(desc(Image.taustart_ts)).limit( + history).all() + if len(rmss) < history: + return False + mu, sigma = norm.fit(rmss) + t_low = mu - sigma * est_sigma + t_high = mu + sigma * est_sigma + + if not rms_min < image.rms_qc < rms_max: + return reject_reasons['rms'],\ + "RMS value not within {} and {}".format(0.0, rms_max) + + if not t_low < image.rms_qc < t_high or not 0.0 < image.rms_qc < rms_max: + return reject_reasons['rms'],\ + "RMS value not within {} and {}".format(t_low, t_high) + diff -Nru tkp-3.1.1/tkp/quality/statistics.py tkp-4.0/tkp/quality/statistics.py --- tkp-3.1.1/tkp/quality/statistics.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/quality/statistics.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,58 +0,0 @@ -""" -functions for calculating statistical properties of LOFAR images -""" -import numpy - - -def rms(data): - """Returns the RMS of the data about the median. - Args: - data: a numpy array - """ - data -= numpy.median(data) - return numpy.sqrt(numpy.power(data, 2).sum()/len(data)) - - -def clip(data, sigma=3): - """Remove all values above a threshold from the array. - Uses iterative clipping at sigma value until nothing more is getting clipped. - Args: - data: a numpy array - """ - raveled = data.ravel() - median = numpy.median(raveled) - std = numpy.std(raveled) - newdata = raveled[numpy.abs(raveled-median) <= sigma*std] - if len(newdata) and len(newdata) != len(raveled): - return clip(newdata, sigma) - else: - return newdata - - -def subregion(data, f=4): - """Returns the inner region of a image, according to f. - - Resulting area is 4/(f*f) of the original. - Args: - data: a numpy array - """ - x, y = data.shape - return data[(x/2 - x/f):(x/2 + x/f), (y/2 - y/f):(y/2 + y/f)] - - -def rms_with_clipped_subregion(data, rms_est_sigma=3, rms_est_fraction=4): - """ - RMS for quality-control. - - Root mean square value calculated from central region of an image. - We sigma-clip the input-data in an attempt to exclude source-pixels - and keep only background-pixels. - - Args: - data: A numpy array - rms_est_sigma: sigma value used for clipping - rms_est_fraction: determines size of subsection, result will be - 1/fth of the image size where f=rms_est_fraction - returns the rms value of a iterative sigma clipped subsection of an image - """ - return rms(clip(subregion(data, rms_est_fraction), rms_est_sigma)) diff -Nru tkp-3.1.1/tkp/sourcefinder/deconv.py tkp-4.0/tkp/sourcefinder/deconv.py --- tkp-3.1.1/tkp/sourcefinder/deconv.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/sourcefinder/deconv.py 2017-02-03 10:06:41.000000000 +0000 @@ -26,10 +26,8 @@ cpa (float): Clean beam position angle of major axis Returns: - rmaj (float): Real major axis - rmin (float): Real minor axis - rpa (float): Real position angle of major axis - ierr (int): Number of components which failed to deconvolve + tuple: real major axis, real minor axis, real position angle of major + axis, number of components which failed to deconvolve """ HALF_RAD = 90.0 / pi cmaj2 = cmaj * cmaj diff -Nru tkp-3.1.1/tkp/sourcefinder/image.py tkp-4.0/tkp/sourcefinder/image.py --- tkp-3.1.1/tkp/sourcefinder/image.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/sourcefinder/image.py 2017-02-03 10:06:41.000000000 +0000 @@ -315,6 +315,11 @@ slicey = slice(-0.5, -0.5+yratio, 1j*my_ydim) my_map = numpy.ma.MaskedArray(numpy.zeros(self.data.shape), mask = self.data.mask) + + # Remove the MaskedArrayFutureWarning warning and keep old numpy < 1.11 + # behavior + my_map.unshare_mask() + my_map[useful_chunk[0]] = ndimage.map_coordinates( grid, numpy.mgrid[slicex, slicey], mode='nearest', order=INTERPOLATE_ORDER) @@ -505,11 +510,13 @@ x-numpix:x+numpix+1].max() @staticmethod - def box_slice_about_pixel(x,y,box_radius): + def box_slice_about_pixel(x, y, box_radius): """ Returns a slice centred about (x,y), of width = 2*int(box_radius) + 1 """ ibr = int(box_radius) + x = int(x) + y = int(y) return (slice(x - ibr, x + ibr + 1), slice(y - ibr, y + ibr + 1)) @@ -525,7 +532,7 @@ Returns an instance of :class:`tkp.sourcefinder.extract.Detection`. """ - logger.debug("Force-fitting pixel location ({},{})".format(x,y)) + logger.debug("Force-fitting pixel location ({},{})".format(x, y)) # First, check that x and y are actually valid semi-positive integers. # Otherwise, # If they are too high (positive), then indexing will fail @@ -554,7 +561,7 @@ isinstance(self.rmsmap, numpy.ma.core.MaskedConstant) ) or ( # Old NumPy - numpy.ma.is_masked(self.rmsmap[x, y]) + numpy.ma.is_masked(self.rmsmap[int(x), int(y)]) )): logger.error("Background is masked: cannot fit") return None @@ -574,8 +581,7 @@ ) ) - - mylabel = labels[x, y] + mylabel = labels[int(x), int(y)] if mylabel == 0: # 'Background' raise ValueError("Fit region is below specified threshold, fit aborted.") mask = numpy.where(labels[chunk] == mylabel, 0, 1) @@ -605,7 +611,7 @@ raise TypeError("Unkown fixed parameter") if threshold is not None: - threshold_at_pixel = threshold * self.rmsmap[x, y] + threshold_at_pixel = threshold * self.rmsmap[int(x), int(y)] else: threshold_at_pixel = None @@ -613,7 +619,7 @@ measurement, residuals = extract.source_profile_and_errors( fitme, threshold_at_pixel, - self.rmsmap[x, y], + self.rmsmap[int(x), int(y)], self.beam, fixed=fixed ) @@ -919,8 +925,8 @@ # The axis will not likely fall exactly on a pixel number, so # check all the surroundings. def check_point(x, y): - x = (numpy.floor(x), numpy.ceil(x)) - y = (numpy.floor(y), numpy.ceil(y)) + x = (int(x), int(numpy.ceil(x))) + y = (int(y), int(numpy.ceil(y))) for position in itertools.product(x, y): try: if self.data.mask[position[0], position[1]]: diff -Nru tkp-3.1.1/tkp/steps/forced_fitting.py tkp-4.0/tkp/steps/forced_fitting.py --- tkp-3.1.1/tkp/steps/forced_fitting.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/steps/forced_fitting.py 2017-02-03 10:06:41.000000000 +0000 @@ -1,7 +1,5 @@ import logging -import tkp.accessors from tkp.accessors import sourcefinder_image_from_accessor -import tkp.accessors from tkp.db import general as dbgen from tkp.db import monitoringlist as dbmon from tkp.db import nulldetections as dbnd @@ -11,7 +9,7 @@ def get_forced_fit_requests(image, expiration): nd_requested_fits = dbnd.get_nulldetections(image.id, expiration) - logger.info("Found %s null detections" % len(nd_requested_fits)) + logger.debug("Found %s null detections" % len(nd_requested_fits)) mon_entries = dbmon.get_monitor_entries(image.dataset.id) all_fit_positions = [] @@ -31,7 +29,6 @@ nd_extractions=[] nd_runcats=[] - ms_extractions=[] ms_ids = [] @@ -46,27 +43,25 @@ raise ValueError("Forced fit type id not recognised:" + id[0]) if nd_extractions: - logger.info("adding null detections") + logger.debug("adding null detections") dbgen.insert_extracted_sources(image_id, nd_extractions, extract_type='ff_nd', ff_runcat_ids=nd_runcats) dbnd.associate_nd(image_id) else: - logger.info("No successful nulldetection fits") + logger.debug("No successful nulldetection fits") if ms_extractions: dbgen.insert_extracted_sources(image_id, ms_extractions, extract_type='ff_ms', ff_monitor_ids=ms_ids) - logger.info("adding monitoring sources") + logger.debug("adding monitoring sources") dbmon.associate_ms(image_id) else: - logger.info("No successful monitor fits") - + logger.debug("No successful monitor fits") -def perform_forced_fits(fit_posns, fit_ids, - image_path, extraction_params): +def perform_forced_fits(fit_posns, fit_ids, accessor, extraction_params): """ Perform forced source measurements on an image based on a list of positions. @@ -83,25 +78,31 @@ NB returned lists may be shorter than input lists if some fits are unsuccessful. """ - logger.info("Forced fitting in image: %s" % (image_path)) - fitsimage = tkp.accessors.open(image_path) - - data_image = sourcefinder_image_from_accessor(fitsimage, - margin=extraction_params['margin'], - radius=extraction_params['extraction_radius_pix'], - back_size_x=extraction_params['back_size_x'], - back_size_y=extraction_params['back_size_y']) - + logger.debug("Forced fitting in image: %s" % (accessor.url)) - boxsize = extraction_params['box_in_beampix'] * max(data_image.beam[0], - data_image.beam[1]) - successful_fits, successful_ids = data_image.fit_fixed_positions( - fit_posns, boxsize, ids=fit_ids) + if not len(fit_ids): + logging.debug("nothing to force fit") + return [], [] + + margin = extraction_params['margin'] + radius = extraction_params['extraction_radius_pix'] + back_size_x = extraction_params['back_size_x'] + back_size_y = extraction_params['back_size_y'] + data_image = sourcefinder_image_from_accessor(accessor, margin=margin, + radius=radius, + back_size_x=back_size_x, + back_size_y=back_size_y) + + box_in_beampix = extraction_params['box_in_beampix'] + boxsize = box_in_beampix * max(data_image.beam[0], data_image.beam[1]) + fits = data_image.fit_fixed_positions( fit_posns, boxsize, ids=fit_ids) + successful_fits, successful_ids = fits if successful_fits: - serialized =[ + serialized = [ f.serialize( - extraction_params['ew_sys_err'], extraction_params['ns_sys_err']) + extraction_params['ew_sys_err'], + extraction_params['ns_sys_err']) for f in successful_fits] return serialized, successful_ids else: - return [],[] + return [], [] diff -Nru tkp-3.1.1/tkp/steps/misc.py tkp-4.0/tkp/steps/misc.py --- tkp-3.1.1/tkp/steps/misc.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/steps/misc.py 2017-02-03 10:06:41.000000000 +0000 @@ -10,7 +10,7 @@ import os from pprint import pprint -from collections import defaultdict +from collections import defaultdict, namedtuple from tkp.config import parse_to_dict from tkp.db.dump import dump_db @@ -65,61 +65,60 @@ info_log_file = os.path.join(log_dir, basename+'.log') debug_log_file = os.path.join(log_dir, basename+'.debug.log') - - - long_formatter = logging.Formatter( + formatter = logging.Formatter( '%(asctime)s %(levelname)s %(name)s: %(message)s', datefmt="%Y-%m-%d %H:%M:%S" ) - short_formatter = logging.Formatter( - '%(asctime)s %(levelname)s %(name)s: %(message)s', - datefmt="%H:%M:%S") + + debug_formatter = logging.Formatter( + '%(asctime)s %(levelname)s %(name)s %(funcName)s() process %(processName)s ' + '(%(process)d) thread %(threadName)s (%(thread)d) : %(message)s', + datefmt="%Y-%m-%d %H:%M:%S" + ) info_hdlr = logging.FileHandler(info_log_file) info_hdlr.setLevel(logging.INFO) - info_hdlr.setFormatter(long_formatter) + info_hdlr.setFormatter(formatter) debug_hdlr = logging.FileHandler(debug_log_file) debug_hdlr.setLevel(logging.DEBUG) - debug_hdlr.setFormatter(long_formatter) + debug_hdlr.setFormatter(debug_formatter) stdout_handler = logging.StreamHandler() - color_fmt = colorlog.ColoredFormatter( - "%(log_color)s%(asctime)s:%(name)s:%(levelname)s%(reset)s %(blue)s%(message)s", - datefmt= "%H:%M:%S", - reset=True, - log_colors={ - 'DEBUG': 'cyan', - 'INFO': 'green', - 'WARNING': 'yellow', - 'ERROR': 'red', - 'CRITICAL': 'red', - } - ) - if use_colorlog: - stdout_handler.setFormatter(color_fmt) - else: - stdout_handler.setFormatter(short_formatter) - - if debug: stdout_handler.setLevel(logging.DEBUG) + formatter = debug_formatter else: stdout_handler.setLevel(logging.INFO) + formatter = formatter + if use_colorlog: + formatter = colorlog.ColoredFormatter( + "%(log_color)s" + formatter._fmt, + datefmt="%H:%M:%S", + reset=True, + log_colors={ + 'DEBUG': 'cyan', + 'INFO': 'green', + 'WARNING': 'yellow', + 'ERROR': 'red', + 'CRITICAL': 'red', + } + ) + stdout_handler.setFormatter(formatter) root_logger = logging.getLogger() - #We set level to debug, and handle output via handler-levels + # We set level to debug, and handle output via handler-levels root_logger.setLevel(logging.DEBUG) - #Trash any preset handlers and start fresh + # Trash any preset handlers and start fresh root_logger.handlers = [] root_logger.addHandler(stdout_handler) root_logger.addHandler(info_hdlr) root_logger.addHandler(debug_hdlr) logger.info("logging to %s" % log_dir) - #Suppress noisy streams: + # Suppress noisy streams logging.getLogger('tkp.sourcefinder.image.sigmaclip').setLevel(logging.INFO) @@ -138,40 +137,42 @@ output_name ) -def group_per_timestep(images): - """ - groups a list of TRAP images per time step. +ImageMetadataForSort = namedtuple('ImageMetadataForSort', [ + 'url', + 'timestamp', + 'frequency', +]) - Per time step the images are order per frequency and then per stokes. The - eventual order is: - (t1, f1, s1), (t1, f1, s2), (t1, f2, s1), (t1, f2, s2), (t2, f1, s1), ...) - where: +def group_per_timestep(metadatas): + """ + groups a list of TRAP images per time step. - * t is time sorted by old to new - * f is frequency sorted from low to high - * s is stokes, sorted by ID as defined in the database schema + Per time step the images are order per frequency. We could add other + ordering logic (per stoke) later on. Args: - images (list): Images to group. + metadatas (list): list of ImageMetadataForSort Returns: list: List of tuples. The list is sorted by timestamp. Each tuple has the timestamp as a first element, - and a list of images sorted by frequency and then stokes - as the second element. + and a list of ImageMetadataForSort sorted by frequency as the + second element. """ - timestamp_to_images_map = defaultdict(list) - for image in images: - timestamp_to_images_map[image.taustart_ts].append(image) + grouped_dict = defaultdict(list) + for metadata in metadatas: + grouped_dict[metadata.timestamp].append(metadata) - #List of (timestamp, [images_at_timestamp]) tuples: - grouped_images = timestamp_to_images_map.items() + grouped_tuple = grouped_dict.items() - # sort the tuples by first element (timestamps) - grouped_images.sort() + # sort for timestamp + grouped_tuple.sort() # and then sort the nested items per freq and stokes - [l[1].sort(key=lambda x: (x.freq_eff, x.stokes)) for l in grouped_images] - return grouped_images + [l[1].sort(key=lambda x: x.frequency) for l in grouped_tuple] + + # only return the urls + return [(stamp, [m.url for m in metas]) for stamp, metas in grouped_tuple] + diff -Nru tkp-3.1.1/tkp/steps/persistence.py tkp-4.0/tkp/steps/persistence.py --- tkp-3.1.1/tkp/steps/persistence.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/steps/persistence.py 2017-02-03 10:06:41.000000000 +0000 @@ -1,66 +1,21 @@ """ This `step` is used for the storing of images and metadata -to the database and image cache (mongodb). +to the database """ import os import logging -import warnings +import cPickle from tempfile import NamedTemporaryFile - +from astropy.io.fits import open as fits_open from casacore.images import image as casacore_image import tkp.accessors from tkp.db.database import Database from tkp.db.orm import DataSet, Image -from tkp.quality.statistics import rms_with_clipped_subregion - +from tkp.quality.rms import rms_with_clipped_subregion logger = logging.getLogger(__name__) -def image_to_mongodb(filename, hostname, port, db): - """Copy a file into mongodb""" - - try: - import pymongo - import gridfs - except ImportError: - msg = "Could not import MongoDB modules" - logger.error(msg) - warnings.warn(msg) - return False - - try: - connection = pymongo.MongoClient(host=hostname, port=port) - gfs = gridfs.GridFS(connection[db]) - if gfs.exists(filename=filename): - logger.debug("File already in database") - - else: - # This conversion should work whether the input file - # is in FITS or CASA format. - # temp_fits_file is removed automatically when closed. - temp_fits_file = NamedTemporaryFile() - i = casacore_image(filename) - i.tofits(temp_fits_file.name) - new_file = gfs.new_file(filename=filename) - with open(temp_fits_file.name, "r") as f: - new_file.write(f) - new_file.close() - logger.info("Saved local copy of %s on %s"\ - % (os.path.basename(filename), hostname)) - except Exception, e: - logger.exception("Failed to save image to MongoDB: {}".format(filename)) - return False - - finally: - # Only clear up things which have been created - if "connection" in locals(): - connection.close() - if "temp_fits_file" in locals(): - temp_fits_file.close() - - return True - def create_dataset(dataset_id, description): """ Creates a dataset if it doesn't exists @@ -80,12 +35,12 @@ return dataset.id -def extract_metadatas(images, rms_est_sigma, rms_est_fraction): +def extract_metadatas(accessors, rms_est_sigma, rms_est_fraction): """ Extracts metadata and rms_qc values from the list of images. Args: - images: list of image urls + accessors: list of image accessors rms_est_sigma: used for RMS calculation, see `tkp.quality.statistics` rms_est_fraction: used for RMS calculation, see `tkp.quality.statistics` @@ -93,22 +48,17 @@ a list of metadata's. The metadata will be False if extraction failed. """ results = [] - for image in images: - logger.info("Extracting metadata from %s" % image) - try: - accessor = tkp.accessors.open(image) - except TypeError as e: - logging.error("Can't open image %s: %s" % (image, e)) - results.append(False) - else: - metadata = accessor.extract_metadata() - metadata['rms_qc'] = rms_with_clipped_subregion(accessor.data, - rms_est_sigma,rms_est_fraction) - results.append(metadata) + for accessor in accessors: + logger.debug("Extracting metadata from %s" % accessor.url) + metadata = accessor.extract_metadata() + metadata['rms_qc'] = rms_with_clipped_subregion(accessor.data, + rms_est_sigma, + rms_est_fraction) + results.append(metadata) return results -def store_images(images_metadata, extraction_radius_pix, dataset_id): +def store_images_in_db(images_metadata, extraction_radius_pix, dataset_id, bandwidth_max): """ Add images to database. Note that all images in one dataset should be inserted in one go, since the order is very important here. If you don't add them all in once, you should @@ -133,29 +83,45 @@ images_metadata.sort(key=lambda m: m['taustart_ts']) for metadata in images_metadata: + metadata['freq_bw_max'] = bandwidth_max metadata['xtr_radius'] = extraction_radius_pix * abs(metadata['deltax']) filename = metadata['url'] db_image = Image(data=metadata, dataset=dataset) image_ids.append(db_image.id) - logger.info("stored %s with ID %s" % (os.path.basename(filename), db_image.id)) + logger.debug("stored %s with ID %s" % (os.path.basename(filename), + db_image.id)) return image_ids -def node_steps(images, image_cache_config, rms_est_sigma, rms_est_fraction): +def get_accessors(images): + results = [] + for image in images: + try: + accessor = tkp.accessors.open(image) + except TypeError as e: + logger.error("Can't open image %s: %s" % (image, e)) + raise + else: + results.append(accessor) + return results + + +def paths_to_fits(paths): """ - this function executes all persistence steps that should be executed on a node. - Note: Should only be used in a node recipe + paths (list): list of paths to a astronomical image which can be opened with + casacore + + returns: + list: of HDUlist objects """ - mongohost = image_cache_config['mongo_host'] - mongoport = image_cache_config['mongo_port'] - mongodb = image_cache_config['mongo_db'] - copy_images = image_cache_config['copy_images'] - - if copy_images: - for image in images: - image_to_mongodb(image, mongohost, mongoport, mongodb) - else: - logger.info("Not copying images to mongodb") - - metadatas = extract_metadatas(images, rms_est_sigma, rms_est_fraction) - return metadatas + for path in paths: + try: + i = casacore_image(path) + except RuntimeError: + logging.error("can't open image {}".format(path)) + yield + else: + with NamedTemporaryFile() as temp_file: + i.tofits(temp_file.name) + fits = fits_open(temp_file.name) + yield cPickle.dumps(fits[0].data), str(fits[0].header) diff -Nru tkp-3.1.1/tkp/steps/quality.py tkp-4.0/tkp/steps/quality.py --- tkp-3.1.1/tkp/steps/quality.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/steps/quality.py 2017-02-03 10:06:41.000000000 +0000 @@ -16,23 +16,19 @@ logger = logging.getLogger(__name__) -def reject_check(image_path, job_config): +def reject_check(accessor, job_config): """ Check if an image passes the quality checks. If not, a rejection reason is returned. args: - id: database ID of image. This is not used but kept as a reference for - distributed computation! - image_path: path to image - parset_file: parset file location with quality check parameters + accessor: a TKP accessor representing the image + job_config: parset file location with quality check parameters + Returns: (rejection ID, description) if rejected, else None """ - - accessor = tkp.accessors.open(image_path) - rejected = reject_check_generic_data(accessor) if rejected: return rejected @@ -49,7 +45,7 @@ return rejected else: msg = "no specific quality checks for " + accessor.telescope - logger.warn(msg) + logger.debug(msg) def reject_image(image_id, reason, comment): diff -Nru tkp-3.1.1/tkp/steps/source_extraction.py tkp-4.0/tkp/steps/source_extraction.py --- tkp-3.1.1/tkp/steps/source_extraction.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/steps/source_extraction.py 2017-02-03 10:06:41.000000000 +0000 @@ -1,11 +1,10 @@ import logging -import tkp.accessors from tkp.accessors import sourcefinder_image_from_accessor -import tkp.accessors from collections import namedtuple logger = logging.getLogger(__name__) + #Short-lived struct for returning results from the source extraction routine: ExtractionResults = namedtuple('ExtractionResults', ['sources', @@ -13,12 +12,12 @@ 'rms_max']) -def extract_sources(image_path, extraction_params): +def extract_sources(accessor, extraction_params): """ Extract sources from an image. args: - image_path: path to file from which to extract sources. + images: a tuple of image DB object and accessor extraction_params: dictionary containing at least the detection and analysis threshold and the association radius, the last one a multiplication factor of the de Ruiter radius. @@ -27,8 +26,8 @@ min RMS value and max RMS value """ logger.debug("Detecting sources in image %s at detection threshold %s", - image_path, extraction_params['detection_threshold']) - accessor = tkp.accessors.open(image_path) + accessor, extraction_params['detection_threshold']) + data_image = sourcefinder_image_from_accessor(accessor, margin=extraction_params['margin'], radius=extraction_params['extraction_radius_pix'], @@ -48,7 +47,7 @@ deblend_nthresh=extraction_params['deblend_nthresh'], force_beam=extraction_params['force_beam'] ) - logger.info("Detected %d sources in image %s" % (len(results), image_path)) + logger.debug("Detected %d sources in image %s" % (len(results), accessor.url)) ew_sys_err = extraction_params['ew_sys_err'] ns_sys_err = extraction_params['ns_sys_err'] diff -Nru tkp-3.1.1/tkp/steps/varmetric.py tkp-4.0/tkp/steps/varmetric.py --- tkp-3.1.1/tkp/steps/varmetric.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/steps/varmetric.py 2017-02-03 10:06:41.000000000 +0000 @@ -1,4 +1,4 @@ -from tkp.db.alchemy import store_varmetric +from tkp.db.alchemy.varmetric import store_varmetric, del_duplicate_varmetric from tkp.db.model import Dataset from tkp.db import Database @@ -6,10 +6,11 @@ def execute_store_varmetric(dataset_id, session=None): """ Executes the storing varmetric function. Will create a database session - if none is supplied + if none is supplied. args: - dataset_id: the ID of the dataset for which you want to store the varmetrics + dataset_id: the ID of the dataset for which you want to store the + varmetrics session: An optional SQLAlchemy session """ if not session: @@ -17,6 +18,8 @@ session = database.Session() dataset = Dataset(id=dataset_id) - query = store_varmetric(session, dataset=dataset) - session.execute(query) - session.commit() \ No newline at end of file + delete_ = del_duplicate_varmetric(session=session, dataset=dataset) + session.execute(delete_) + insert_ = store_varmetric(session, dataset=dataset) + session.execute(insert_) + session.commit() diff -Nru tkp-3.1.1/tkp/stream.py tkp-4.0/tkp/stream.py --- tkp-3.1.1/tkp/stream.py 1970-01-01 00:00:00.000000000 +0000 +++ tkp-4.0/tkp/stream.py 2017-02-03 10:06:41.000000000 +0000 @@ -0,0 +1,232 @@ +""" +Code for parsing streaming telescope data. For now we only support AARTFAAC +data version 1. We implemented an emulator for this stream in +`testutil.stream_emu`. +""" +from __future__ import print_function + +import logging +import socket +import StringIO +import struct +import astropy.io.fits.header +import astropy.io.fits +import numpy as np +import time +import dateutil.parser + +from Queue import Full +from multiprocessing import Manager + + +# the checksum is used to check if we are not drifting in the data flow +CHECKSUM = 0x47494A53484F4D4F + +# how many images groups do we keep before we start dropping +BACK_LOG = 10 + +# use this for debugging. Will not fork processes but run everything threaded +THREADED = False + +logger = logging.getLogger(__name__) + + +def extract_timestamp(hdulist): + """ + args: + hdulist (astropy.io.fits.HDUList): fits header to extract timestamp from + + returns: + datetime.datetime: extracted timestamp + + """ + return dateutil.parser.parse(hdulist[0].header['date-obs']) + + +def getbytes(socket_, bytes_): + """ + Read an amount of bytes from the socket + + args: + socket_ (socket.socket): socket to use for reading + bytes_ (int): amount of bytes to read + returns: + str: raw bytes from socket + """ + result = StringIO.StringIO() + count = bytes_ + while count > 0: + recv = socket_.recv(count) + if len(recv) == 0: + raise socket.error("Server closed connection") + count -= len(recv) + result.write(recv) + return result.getvalue() + + +def read_window(socket_): + """ + read raw aarfaac protocol window + + args: + socket_ (socket.socket): socket to read from + returns: + fits_bytes, image_bytes + """ + header_bytes = getbytes(socket_, 512) + magic = struct.unpack('Q', header_bytes[:8])[0] + fits_length = struct.unpack('=L', header_bytes[8:12])[0] + array_length = struct.unpack('=L', header_bytes[12:16])[0] + assert magic == CHECKSUM, str(magic) + '!=' + str(CHECKSUM) + fits_bytes = getbytes(socket_, fits_length) + image_bytes = getbytes(socket_, array_length) + return fits_bytes, image_bytes + + +def reconstruct_fits(fits_bytes, image_bytes): + """ + reconstruct a fits object from serialised fits header and data. + + args: + fits_bytes (str): a string with serialized fits bytes + image_bytes (str): a string with serialized image data + returns: + astropy.io.fits.HDUList: the fits object + + """ + hdu_header = astropy.io.fits.header.Header.fromstring(fits_bytes) + width = hdu_header["NAXIS1"] + length = hdu_header["NAXIS2"] + image_array = struct.unpack(str(len(image_bytes)/4) + 'f', image_bytes) + image_matrix = np.reshape(image_array, (width, length)) + hdu = astropy.io.fits.PrimaryHDU(image_matrix) + hdu.header = hdu_header + hdulist = astropy.io.fits.HDUList([hdu]) + return hdulist + + +def connection_handler(socket_, image_queue): + """ + Handles the connection, waits until a windows is returned and puts it in + the queue. + + Daemon thread, will loop forever. + + args: + socket_ (socket.socket): socket used for reading + image_queue (Queue.Queue): used for putting images in + """ + while True: + try: + fits_bytes, image_bytes = read_window(socket_) + except Exception as e: + logger.error("error reading data: {}".format(str(e))) + logger.info("sleeping for 5 seconds") + time.sleep(5) + break + else: + hdulist = reconstruct_fits(fits_bytes, image_bytes) + image_queue.put(hdulist) + + +def connector(host, port, image_queue): + """ + Tries to connect to a specific host and port, if succesfull will call + connection_handler() with the connection. + + args: + host (str): host to connect to + port (int): port to connect to + image_queue (Queue.Queue): Will be used for putting the images in + + """ + while True: + logger.info("connecting to {}:{}".format(host, port)) + try: + socket_ = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + socket_.settimeout(5) + socket_.connect((host, port)) + except socket.error as e: + logger.error("cant connect to {}:{}: {}".format(host, port, str(e))) + logger.info("will try reconnecting in 5 seconds") + time.sleep(5) + else: + logger.info("connected to {}:{}".format(host, port)) + connection_handler(socket_, image_queue) + + +def merger(image_queue, grouped_queue): + """ + Will monitor image_queue for images and group them by timestamp. When an + image with an successive timestamp is received the group is put on the + grouped queue. + + args: + image_queue (Queue): the incoming image queue + grouped_queue (Queue): the outgoing grouped image queue + """ + logger.info("merger thread started") + first_image = image_queue.get() + logger.info("merger received first image") + images = [first_image] + previous_timestamp = extract_timestamp(first_image) + + while True: + new_image = image_queue.get() + new_timestamp = extract_timestamp(new_image) + logger.info("merger received image with timestamp {}".format(new_timestamp)) + if new_timestamp < previous_timestamp: + logger.error("timing error, older image received after newer image") + if new_timestamp == previous_timestamp: + images.append(new_image) + else: + previous_timestamp = new_timestamp + logger.info("collected {} images, processing...".format(len(images))) + try: + grouped_queue.put(images, block=False) + except Full: + logger.error("grouped image queue full ({}), dropping group" + " ({} images)".format(grouped_queue.qsize(), + len(images))) + + images = [new_image] + + +def stream_generator(hosts, ports): + """ + Connects to all hosts on port in ports. Returns a generator yielding sets of + images with the same timestamp. + + args: + hosts (list): list of hosts to connect to + ports (list): list of ports to connect to + """ + + if THREADED: + import threading + from queue import Queue + method = threading.Thread + image_queue = Queue() + grouped_queue = Queue(maxsize=BACK_LOG) + else: + import multiprocessing + manager = Manager() + image_queue = manager.Queue() + grouped_queue = manager.Queue(maxsize=BACK_LOG) + method = multiprocessing.Process + + for host, port in zip(hosts, ports): + name = 'port_{}_proc'.format(port) + args = dict(target=connector, name=name, args=(host, port, image_queue)) + con_proc = method(**args) + con_proc.daemon = True + con_proc.start() + + args = dict(target=merger, name='merger_proc', + args=(image_queue, grouped_queue)) + merger_proc = method(**args) + merger_proc.daemon = True + merger_proc.start() + + while True: + yield grouped_queue.get() diff -Nru tkp-3.1.1/tkp/telescope/lofar/quality.py tkp-4.0/tkp/telescope/lofar/quality.py --- tkp-3.1.1/tkp/telescope/lofar/quality.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/telescope/lofar/quality.py 2017-02-03 10:06:41.000000000 +0000 @@ -2,8 +2,7 @@ import tkp.db import tkp.db.quality as dbquality from tkp.quality.restoringbeam import beam_invalid -from tkp.quality.rms import rms_invalid -from tkp.quality.statistics import rms_with_clipped_subregion +from tkp.quality.rms import rms_invalid, rms_with_clipped_subregion from tkp.telescope.lofar.noise import noise_level from tkp.utility import nice_format diff -Nru tkp-3.1.1/tkp/testutil/alchemy.py tkp-4.0/tkp/testutil/alchemy.py --- tkp-3.1.1/tkp/testutil/alchemy.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/testutil/alchemy.py 2017-02-03 10:06:41.000000000 +0000 @@ -1,16 +1,18 @@ from datetime import datetime, timedelta import tkp.db +import tkp.db.model -def gen_band(central=150**6, low=None, high=None): + +def gen_band(dataset, central=150**6, low=None, high=None): if not low: low = central * .9 if not high: high = central * 1.1 - return tkp.db.model.Frequencyband(freq_low=low, freq_central=central, + return tkp.db.model.Frequencyband(dataset=dataset, freq_low=low, freq_central=central, freq_high=high) -def gen_dataset(description): +def gen_dataset(description='A test dataset without a description'): return tkp.db.model.Dataset(process_start_ts=datetime.now(), description=description) @@ -20,7 +22,13 @@ xtr_radius=1, x=1, y=1, z=1) -def gen_image(band, dataset, skyregion, taustart_ts=None): +def gen_image(band=None, dataset=None, skyregion=None, taustart_ts=None): + if not dataset: + dataset = gen_dataset() + if not band: + band = gen_band(dataset=dataset) + if not skyregion: + skyregion=gen_skyregion(dataset=dataset) if not taustart_ts: taustart_ts = datetime.now() return tkp.db.model.Image(band=band, dataset=dataset, skyrgn=skyregion, diff -Nru tkp-3.1.1/tkp/testutil/db_subs.py tkp-4.0/tkp/testutil/db_subs.py --- tkp-3.1.1/tkp/testutil/db_subs.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/testutil/db_subs.py 2017-02-03 10:06:41.000000000 +0000 @@ -5,7 +5,8 @@ import tkp.db from tkp.db.generic import get_db_rows_as_dicts from tkp.db.database import Database -from tkp.db.orm import DataSet, Image +from tkp.db.associations import associate_extracted_sources +from tkp.db.general import insert_extracted_sources from tkp.db import general as dbgen from tkp.db import nulldetections import tkp.testutil.data as testdata @@ -80,27 +81,28 @@ regions. """ starttime = datetime.datetime(2012, 1, 1) # Happy new year - time_spacing = datetime.timedelta(seconds=600) - init_im_params = {'tau_time':300, - 'freq_eff':140e6, - 'freq_bw':2e6, - 'taustart_ts':starttime, - 'beam_smaj_pix': float(2.7), - 'beam_smin_pix': float(2.3), - 'beam_pa_rad': float(1.7), - 'deltax': float(-0.01111), - 'deltay': float(0.01111), - 'url':testdata.fits_file, # just an arbitrary existing fits file - 'centre_ra': 123., # Arbitarily picked. - 'centre_decl': 10., # Arbitarily picked. - 'xtr_radius': 10., # (Degrees) - 'rms_qc': 1., - 'rms_min': 1e-4, #0.1mJy RMS - 'rms_max': 3e-4, #0.3mJy RMS - 'detection_thresh': 6, - 'analysis_thresh': 3 - } + init_im_params = { + 'tau_time': 300, + 'freq_eff': 140e6, + 'freq_bw': 2e6, + 'freq_bw_max': 0.0, + 'taustart_ts': starttime, + 'beam_smaj_pix': float(2.7), + 'beam_smin_pix': float(2.3), + 'beam_pa_rad': float(1.7), + 'deltax': float(-0.01111), + 'deltay': float(0.01111), + 'url': testdata.fits_file, # just an arbitrary existing fits file + 'centre_ra': 123., # Arbitrarily picked. + 'centre_decl': 10., # Arbitrarily picked. + 'xtr_radius': 10., # (Degrees) + 'rms_qc': 1., + 'rms_min': 1e-4, # 0.1mJy RMS + 'rms_max': 3e-4, # 0.3mJy RMS + 'detection_thresh': 6, + 'analysis_thresh': 3 + } init_im_params.update(kwargs) return init_im_params @@ -339,15 +341,15 @@ 3-tuple (image, list of blind extractions, list of forced fits). """ - image = tkp.db.Image(data=image_params,dataset=dataset) + image = tkp.db.Image(data=image_params, dataset=dataset) blind_extractions=[] for src in mock_sources: - xtr = src.simulate_extraction(image,extraction_type='blind') + xtr = src.simulate_extraction(image, extraction_type='blind') if xtr is not None: blind_extractions.append(xtr) - image.insert_extracted_sources(blind_extractions,'blind') - image.associate_extracted_sources(deRuiter_r=deruiter_radius, - new_source_sigma_margin=new_source_sigma_margin) + insert_extracted_sources(image._id, blind_extractions, 'blind') + associate_extracted_sources(image._id, deRuiter_r=deruiter_radius, + new_source_sigma_margin=new_source_sigma_margin) nd_ids_posns = nulldetections.get_nulldetections(image.id) nd_posns = [(ra,decl) for ids, ra, decl in nd_ids_posns] forced_fits = [] diff -Nru tkp-3.1.1/tkp/testutil/decorators.py tkp-4.0/tkp/testutil/decorators.py --- tkp-3.1.1/tkp/testutil/decorators.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/testutil/decorators.py 2017-02-03 10:06:41.000000000 +0000 @@ -8,12 +8,6 @@ return lambda func: func -def requires_mongodb(): - if os.environ.get("TKP_DISABLEMONGODB", False): - return unittest.skip("mongodb functionality disabled in configuration") - return lambda func: func - - def requires_data(*args): for filename in args: if not os.path.exists(filename): diff -Nru tkp-3.1.1/tkp/testutil/stream_emu.py tkp-4.0/tkp/testutil/stream_emu.py --- tkp-3.1.1/tkp/testutil/stream_emu.py 1970-01-01 00:00:00.000000000 +0000 +++ tkp-4.0/tkp/testutil/stream_emu.py 2017-02-03 10:06:41.000000000 +0000 @@ -0,0 +1,271 @@ +#!/usr/bin/env python +""" +This is an AARTFAAC imaging pipeline simulator. It will spawn 6 server sockets +where multiple clients can connect to. once connected the server will +send out empty fits images, with increasing timestamps. Each port has a +different band. +""" +import struct +import time +import monotonic +from datetime import datetime +import socket +import StringIO +import logging +from Queue import Queue +from os import path +import atexit +from threading import Lock, Thread, active_count +from tkp.testutil.data import DATAPATH +from astropy.io import fits +from tkp.stream import CHECKSUM + +# if true only start one thread on the first port, useful for debugging +DEBUGGING = False + +DEFAULT_PORTS = range(6666, 6672) +DEFAULT_FREQS = range(int(6e6), int(9e6), int(5e5)) + +logger = logging.getLogger(__name__) + + +class Repeater(object): + """ + repeats incoming queue messages to subscribed queues + """ + lock = Lock() + receivers = [] + + def __init__(self): + pass + + def run(self, in_queue): + """ + Monitor incoming queue for message, repeat them to all subscribers + """ + while True: + mesg = in_queue.get() + self.put(mesg) + + def put(self, mesg): + """ + broadcast message to all subscribers + + args: + mesg (object): + """ + with self.lock: + logger.debug("relaying to {} subscribers".format(len(self.receivers))) + [r.put(mesg) for r in self.receivers] + + def subscribe(self, queue): + """ + Add a queue to the subscription list + + args: + out_queue (Queue.Queue): + """ + logger.debug("subscriber") + with self.lock: + self.receivers.append(queue) + + def unsubscribe(self, out_queue): + """ + Remove a queue from the subscription list + + args: + out_queue (Queue.Queue): + """ + logger.debug("unsubscriber") + with self.lock: + try: + self.receivers.remove(out_queue) + except ValueError: + logger.error("item not in queue") + + +def create_fits_hdu(): + """ + Create a fake AARTFAAC file used as a base for the emulated servers. Could + be anything but for now we just take the fits file from the test data. + + returns: + astropy.io.fits.HDUList: a fits object + """ + hdulist = fits.open(path.join(DATAPATH, 'accessors/aartfaac.fits')) + hdu = hdulist[0] + return hdu + + +def serialize_hdu(hdu): + """ + Serialize a fits object. + + args: + hdu (astropy.io.fits.HDUList): a fits object + returns: + str: a serialized fits object. + """ + data = struct.pack('=%sf' % hdu.data.size, *hdu.data.flatten('F')) + header = hdu.header.tostring() + return data, header + + +def create_header(fits_length, array_length): + """ + make a AARTFAAC header. Header is padded with zeros up to 512 bytes. + + args: + fits_lenght (int): how long will the fits header be + array_length (int): How long will the data be + + returns: + str: aartfaac header ready for transmission. + """ + # 512 - 16: Q = 8, L = 4 + return struct.pack('=QLL496x', CHECKSUM, fits_length, array_length) + + +def make_window(hdu): + """ + Construct a complete serialised image including aartfaac header + + args: + hdu (astropy.io.fits.HDUList): the first header + returns: + str: serialised fits file + """ + result = StringIO.StringIO() + data, fits_header = serialize_hdu(hdu) + header = create_header(len(fits_header), len(data)) + result.write(header) + result.write(fits_header) + result.write(data) + return result.getvalue() + + +def client_handler(conn, addr, freq): + """ + Handling a client connection. Will push a serialised fits image plus + AARTFAAC header to the connected client, triggered by an external queue + supplying timestamps. + + args: + conn (socket.socket): The connection with the client + addr (str): address of the client + freq (int): the subband frequency of this connection + """ + repeater = Repeater() + port = conn.getsockname()[1] + logger.info('connection from {} on {}'.format(addr, port)) + hdu = create_fits_hdu() + hdu.header['RESTFREQ'] = str(freq) + queue = Queue() + repeater.subscribe(queue) + while True: + # block until we get a timestamp + timestamp = queue.get() + logger.info("sending to {} on {} ts {}".format(addr, port, timestamp)) + hdu.header['date-obs'] = timestamp.isoformat() + window = make_window(hdu) + try: + conn.send(window) + except socket.error: + logger.info("client {} disconnected".format(addr)) + break + conn.close() + repeater.unsubscribe(queue) + + +def socket_listener(port, freq): + """ + Will listen on a specific socket and fire of threads if a client connects. + Will try to reconnect every 5 seconds in case of connect failure. + + args: + port (int): On that port to listen + freq (int): The corresponds frequency that belongs to the port + """ + # loop and sleep for 5 until we can bind + while True: + try: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + # don't wait for socket release + sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + sock.bind(('', port)) + except socket.error as e: + logger.error("can't bind to port {}: {}".format(port, str(e))) + logger.error("retrying in 5 seconds...") + time.sleep(5) + else: + break + + sock.listen(2) + atexit.register(lambda s: s.close(), sock) # close socket on exit + logger.info("Server listening on port {}".format(port)) + + while True: + conn, addr_port = sock.accept() + if DEBUGGING: + client_handler(conn, addr_port[0], freq) + else: + t = Thread(target=client_handler, name='repeater_thread', + args=(conn, addr_port[0], freq)) + t.daemon = True + t.start() + + +def timer(queue): + """ + Pushes a timestamp on a Queue exactly every second. + + args: + queue (Queue.Queue): a queue + """ + while True: + # We use monotonic so the timing doesn't drift. + duty_cycle = 1 # seconds + time.sleep(duty_cycle - monotonic.monotonic() % duty_cycle) + now = datetime.now() + logger.debug("timer is pushing {}".format(now)) + queue.put(now) + + +def emulator(ports=DEFAULT_PORTS, freqs=DEFAULT_FREQS): + """ + Run the aartfaac stream emulator. Will listen on all ports defined in ports + and change the frequency in the fits headers according to the freqs list. + + Daemon function, does not return. + + args: + ports (list): list of ints representing ports + freqs (list): list of frequencies + """ + heartbeat_queue = Queue() + repeater = Repeater() + + timer_thread = Thread(target=timer, name='timer_thread', + args=(heartbeat_queue,)) + timer_thread.daemon = True + timer_thread.start() + + repeater_thread = Thread(target=repeater.run, name='repeater_thread', + args=(heartbeat_queue,)) + repeater_thread.daemon = True + repeater_thread.start() + + # start all listening threads + for port, freq in zip(ports, freqs): + name = 'socket_{}_thread'.format(port) + args = port, freq + t = Thread(target=socket_listener, name=name, args=args) + t.daemon = True + t.start() + + while active_count(): + time.sleep(1) + +if __name__ == '__main__': + logging.basicConfig(level=logging.DEBUG) + emulator() diff -Nru tkp-3.1.1/tkp/utility/coordinates.py tkp-4.0/tkp/utility/coordinates.py --- tkp-3.1.1/tkp/utility/coordinates.py 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/tkp/utility/coordinates.py 2017-02-03 10:06:41.000000000 +0000 @@ -6,7 +6,7 @@ import sys import math -import pywcs +from astropy import wcs as pywcs import logging import datetime import pytz @@ -552,14 +552,16 @@ return ra, dec + def eq_to_cart(ra, dec): """Find the cartesian co-ordinates on the unit sphere given the eq. co-ords. ra, dec should be in degrees. """ - return (math.cos(math.radians(dec)) * math.cos(math.radians(ra)), # Cartesian x - math.cos(math.radians(dec)) * math.sin(math.radians(ra)), # Cartesian y - math.sin(math.radians(dec))) # Cartesian z + return (math.cos(math.radians(dec)) * math.cos(math.radians(ra)), # Cartesian x + math.cos(math.radians(dec)) * math.sin(math.radians(ra)), # Cartesian y + math.sin(math.radians(dec))) # Cartesian z + class CoordSystem(object): """A container for constant strings representing different coordinate @@ -660,13 +662,13 @@ pixpos (list): [x, y] pixel position Returns: - ra (float): Right ascension corresponding to position [x, y] - dec (float): Declination corresponding to position [x, y] + tuple: ra (float) Right ascension corresponding to position [x, y] + dec (float) Declination corresponding to position [x, y] """ - [ra], [dec] = self.wcs.wcs_pix2sky(pixpos[0], pixpos[1], self.ORIGIN) + ra, dec = self.wcs.wcs_pix2world(pixpos[0], pixpos[1], self.ORIGIN) if math.isnan(ra) or math.isnan(dec): raise RuntimeError("Spatial position is not a number") - return ra, dec + return float(ra), float(dec) def s2p(self, spatialpos): """ @@ -676,10 +678,10 @@ pixpos (list): [ra, dec] spatial position Returns: - x (float): X pixel value corresponding to position [ra, dec] - y (float): Y pixel value corresponding to position [ra, dec] + tuple: X pixel value corresponding to position [ra, dec], + Y pixel value corresponding to position [ra, dec] """ - [x], [y] = self.wcs.wcs_sky2pix(spatialpos[0], spatialpos[1], self.ORIGIN) + x, y = self.wcs.wcs_world2pix(spatialpos[0], spatialpos[1], self.ORIGIN) if math.isnan(x) or math.isnan(y): raise RuntimeError("Pixel position is not a number") - return x, y + return float(x), float(y) diff -Nru tkp-3.1.1/.travis.yml tkp-4.0/.travis.yml --- tkp-3.1.1/.travis.yml 2016-05-20 11:38:44.000000000 +0000 +++ tkp-4.0/.travis.yml 2017-02-03 10:06:41.000000000 +0000 @@ -40,7 +40,7 @@ - cd /tmp - "${TRAVIS_BUILD_DIR}/tkp/bin/trap-manage.py initproject pipeline" - cd pipeline - - "${TRAVIS_BUILD_DIR}/tkp/bin/trap-manage.py initdb -y" + - "${TRAVIS_BUILD_DIR}/tkp/bin/trap-manage.py initdb -dy" - cd ${TRAVIS_BUILD_DIR}/tests - TKP_DBUSER=postgres TKP_DBPASSWORD= nosetests -sv deploy: