diff -Nru filmulator-0.6.3/appimage/AppRun filmulator-0.7.0/appimage/AppRun --- filmulator-0.6.3/appimage/AppRun 2017-09-21 03:43:03.000000000 +0000 +++ filmulator-0.7.0/appimage/AppRun 1970-01-01 00:00:00.000000000 +0000 @@ -1,4 +0,0 @@ -#! /bin/bash -APPDIR=$(pwd) -echo "APPDIR = $APPDIR" -usr/bin/filmulator-gui $* diff -Nru filmulator-0.6.3/core/diffuse.cpp filmulator-0.7.0/core/diffuse.cpp --- filmulator-0.6.3/core/diffuse.cpp 2017-09-21 03:43:03.000000000 +0000 +++ filmulator-0.7.0/core/diffuse.cpp 2018-05-30 22:12:13.000000000 +0000 @@ -169,6 +169,8 @@ // in Signal Processing 44 (1995) 139-151 //Referencing code from here: //https://github.com/halide/Halide/blob/e23f83b9bde63ed64f4d9a2fbe1ed29b9cfbf2e6/test/generator/gaussian_blur_generator.cpp + +//Don't use this for radii > 70!!! void diffuse_short_convolution(matrix &developer_concentration, const float sigma_const, const float pixels_per_millimeter, @@ -178,7 +180,7 @@ const int width = developer_concentration.nc(); //Compute the standard deviation of the blur we want, in pixels. - double sigma = sqrt(timestep*pow(sigma_const*pixels_per_millimeter,2)); + const double sigma = sqrt(timestep*pow(sigma_const*pixels_per_millimeter,2)); //We set the padding to be 4 standard deviations so as to catch as much as possible. const int paddedWidth = width + 4*sigma + 3; @@ -456,3 +458,34 @@ } } } + +//Since the aforementioned infinite impulse response doesn't work nicely with large radii, +//this will downsample it so that the radius ends up at about 30. +//Then, it'll apply the van Vliet IIR filter. +//If the radius was already less than 70, then it won't downsample at all. +void diffuse_resize_iir(matrix &developer_concentration, + const float sigma_const, + const float pixels_per_millimeter, + const float timestep) +{ + //set up test sigma + const double sigma = sqrt(timestep*pow(sigma_const*pixels_per_millimeter,2)); + + std::cout << "sigma: " << sigma << "=======================================================" << std::endl; + + //If it's small enough, we're not going to resize at all. + if (sigma < 70) + { + diffuse_short_convolution(developer_concentration, + sigma_const, + pixels_per_millimeter, + timestep); + } + else + { + diffuse(developer_concentration, + sigma_const, + pixels_per_millimeter, + timestep); + } +} diff -Nru filmulator-0.6.3/core/filmSim.hpp filmulator-0.7.0/core/filmSim.hpp --- filmulator-0.6.3/core/filmSim.hpp 2017-09-21 03:43:03.000000000 +0000 +++ filmulator-0.7.0/core/filmSim.hpp 2018-05-30 22:12:13.000000000 +0000 @@ -97,6 +97,11 @@ const float pixels_per_millimeter, const float timestep); +void diffuse_resize_iir(matrix &developer_concentration, + const float sigma_const, + const float pixels_per_millimeter, + const float timestep); + //Reading raws with libraw //TODO: remove //PROBABLY NOT NECESSARY ANYMORE diff -Nru filmulator-0.6.3/core/filmulate.cpp filmulator-0.7.0/core/filmulate.cpp --- filmulator-0.6.3/core/filmulate.cpp 2017-09-21 03:43:03.000000000 +0000 +++ filmulator-0.7.0/core/filmulate.cpp 2018-05-30 22:12:13.000000000 +0000 @@ -31,7 +31,7 @@ FilmParams filmParam; AbortStatus abort; Valid valid; - std::tie(valid, abort, filmParam) = paramManager->claimFilmParams(FilmFetch::initial); + std::tie(valid, abort, filmParam) = paramManager->claimFilmParams(); if(abort == AbortStatus::restart) { return true; @@ -120,14 +120,14 @@ for(int i = 0; i <= development_steps; i++) { //Check for cancellation - std::tie(valid, abort, filmParam) = paramManager->claimFilmParams(FilmFetch::subsequent); + abort = paramManager->claimFilmAbort(); if(abort == AbortStatus::restart) { return true; } //Updating for starting the development simulation. Valid is one too high here. - pipeline->updateProgress(Valid::prefilmulation, float(i)/float(development_steps)); + pipeline->updateProgress(Valid::partfilmulation, float(i)/float(development_steps)); gettimeofday(&develop_start,NULL); @@ -148,26 +148,30 @@ gettimeofday(&diffuse_start,NULL); //Check for cancellation - std::tie(valid, abort, filmParam) = paramManager->claimFilmParams(FilmFetch::subsequent); + abort = paramManager->claimFilmAbort(); if(abort == AbortStatus::restart) { return true; } //Updating for starting the diffusion simulation. Valid is one too high here. - pipeline->updateProgress(Valid::prefilmulation, float(i)/float(development_steps)); + pipeline->updateProgress(Valid::partfilmulation, float(i)/float(development_steps)); //Now, we are going to perform the diffusion part. //Here we mix the layer among itself, which grants us the // local contrast increases. -// diffuse(developer_concentration, -// sigma_const, -// pixels_per_millimeter, -// timestep); - diffuse_short_convolution(developer_concentration, - sigma_const, - pixels_per_millimeter, - timestep); + diffuse(developer_concentration, + sigma_const, + pixels_per_millimeter, + timestep); +// diffuse_short_convolution(developer_concentration, +// sigma_const, +// pixels_per_millimeter, +// timestep); +// diffuse_resize_iir(developer_concentration, +// sigma_const, +// pixels_per_millimeter, +// timestep); diffuse_dif += timeDiff(diffuse_start); @@ -216,7 +220,7 @@ struct timeval mult_start; gettimeofday(&mult_start,NULL); - std::tie(valid, abort, filmParam) = paramManager->claimFilmParams(FilmFetch::subsequent); + abort = paramManager->claimFilmAbort(); if(abort == AbortStatus::restart) { return true; diff -Nru filmulator-0.6.3/core/imagePipeline.cpp filmulator-0.7.0/core/imagePipeline.cpp --- filmulator-0.6.3/core/imagePipeline.cpp 2017-09-21 03:43:03.000000000 +0000 +++ filmulator-0.7.0/core/imagePipeline.cpp 2018-05-30 22:12:13.000000000 +0000 @@ -24,15 +24,11 @@ int ImagePipeline::libraw_callback(void *data, LibRaw_progress, int, int) { AbortStatus abort; - Valid validity; //Recover the param_manager from the data ParameterManager * pManager = static_cast(data); //See whether to abort or not. - //Because LibRaw does the demosaicing, we need to use the check that's performed afterwards - //That's prefilmulation. - //If we ever use LibRaw only for decoding, then change this to do the check for demosaicing. - std::tie(validity, abort, std::ignore) = pManager->claimPrefilmParams(); + abort = pManager->claimDemosaicAbort(); if (abort == AbortStatus::restart) { return 1;//cancel processing @@ -63,7 +59,7 @@ LoadParams loadParam; DemosaicParams demosaicParam; PrefilmParams prefilmParam; - FilmParams filmParam; + //FilmParams filmParam; BlackWhiteParams blackWhiteParam; FilmlikeCurvesParams curvesParam; @@ -80,13 +76,17 @@ return emptyMatrix(); } //In the future we'll actually perform loading here. + valid = paramManager->markLoadComplete(); updateProgress(valid, 0.0f); + [[fallthrough]]; } + case partdemosaic: [[fallthrough]]; case load://Do demosaic { AbortStatus abort; //Because the load params are used here std::tie(valid, abort, loadParam) = paramManager->claimLoadParams(); + paramManager->markLoadComplete();//otherwise we reset validity back half a step std::tie(valid, abort, demosaicParam) = paramManager->claimDemosaicParams(); if (abort == AbortStatus::restart) { @@ -96,7 +96,6 @@ cout << "imagePipeline.cpp: Opening " << loadParam.fullFilename << endl; - matrix input_image; //Reads in the photo. cout << "load start:" << timeDiff (timeRequested) << endl; struct timeval imload_time; @@ -116,7 +115,12 @@ return emptyMatrix(); } */ - if (loadParam.tiffIn) + if ((HighQuality == quality) && stealData)//only full pipelines may steal data + { + scaled_image = stealVictim->input_image; + exifData = stealVictim->exifData; + } + else if (loadParam.tiffIn) { if (imread_tiff(loadParam.fullFilename, input_image, exifData)) { @@ -138,13 +142,13 @@ LibRaw image_processor; //Connect image processor with callback for cancellation - image_processor.set_progress_handler(ImagePipeline::libraw_callback, paramManager); + //image_processor.set_progress_handler(ImagePipeline::libraw_callback, paramManager); //Open the file. const char *cstr = loadParam.fullFilename.c_str(); if (0 != image_processor.open_file(cstr)) { - cerr << "processImage: Could not read input file!" << endl; + cout << "processImage: Could not read input file!" << endl; return emptyMatrix(); } //Make abbreviations for brevity in accessing data. @@ -176,7 +180,7 @@ } AbortStatus abort; - std::tie(valid, abort, prefilmParam) = paramManager->claimPrefilmParams(); + abort = paramManager->claimDemosaicAbort(); if (abort == AbortStatus::restart) { return emptyMatrix(); @@ -190,7 +194,7 @@ return emptyMatrix(); } - std::tie(valid, abort, prefilmParam) = paramManager->claimPrefilmParams(); + abort = paramManager->claimDemosaicAbort(); if (abort == AbortStatus::restart) { return emptyMatrix(); @@ -236,16 +240,31 @@ cout << "scale start:" << timeDiff (timeRequested) << endl; struct timeval downscale_time; gettimeofday( &downscale_time, NULL ); - downscale_and_crop(input_image,cropped_image, 0, 0, (input_image.nc()/3)-1,input_image.nr()-1, 600, 600); - //cropped_image = input_image; + downscale_and_crop(input_image,scaled_image, 0, 0, (input_image.nc()/3)-1,input_image.nr()-1, 600, 600); + cout << "scale end: " << timeDiff( downscale_time ) << endl; + } + else if (PreviewQuality == quality) + { + cout << "scale start:" << timeDiff (timeRequested) << endl; + struct timeval downscale_time; + gettimeofday( &downscale_time, NULL ); + downscale_and_crop(input_image,scaled_image, 0, 0, (input_image.nc()/3)-1,input_image.nr()-1, resolution, resolution); cout << "scale end: " << timeDiff( downscale_time ) << endl; } else { - cropped_image = input_image; + if (!stealData) //If we had to compute the input image ourselves + { + scaled_image = input_image; + input_image.set_size(0,0); + } } + + valid = paramManager->markDemosaicComplete(); updateProgress(valid, 0.0f); + [[fallthrough]]; } + case partprefilmulation: [[fallthrough]]; case demosaic://Do pre-filmulation work. { AbortStatus abort; @@ -256,7 +275,7 @@ } //Here we apply the exposure compensation and white balance. - matrix exposureImage = cropped_image * pow(2, prefilmParam.exposureComp); + matrix exposureImage = scaled_image * pow(2, prefilmParam.exposureComp); whiteBalance(exposureImage, pre_film_image, prefilmParam.temperature, @@ -265,7 +284,7 @@ if (NoCache == cache) { - cropped_image.set_size( 0, 0 ); + scaled_image.set_size( 0, 0 ); cacheEmpty = true; } else @@ -280,12 +299,15 @@ cout << "ImagePipeline::processImage: Prefilmulation complete." << endl; + valid = paramManager->markPrefilmComplete(); updateProgress(valid, 0.0f); + [[fallthrough]]; } + case partfilmulation: [[fallthrough]]; case prefilmulation://Do filmulation { //We don't need to check abort status out here, because - //the filmulate function will do so inside its loop multiple times. + //the filmulate function will do so inside its loop. //We just check for it returning an empty matrix. //Here we do the film simulation on the image... @@ -315,12 +337,11 @@ cout << "ImagePipeline::processImage: Filmulation complete." << endl; - //Now, since we didn't check abort status out here, we do have to at least - // increment the validity. - AbortStatus abort; - std::tie(valid, abort, filmParam) = paramManager->claimFilmParams(FilmFetch::subsequent); + valid = paramManager->markFilmComplete(); updateProgress(valid, 0.0f); + [[fallthrough]]; } + case partblackwhite: [[fallthrough]]; case filmulation://Do whitepoint_blackpoint { AbortStatus abort; @@ -373,10 +394,10 @@ height = imHeight; } - matrix actually_cropped_image; + matrix cropped_image; downscale_and_crop(rotated_image, - actually_cropped_image, + cropped_image, startX, startY, endX, @@ -386,13 +407,16 @@ rotated_image.set_size(0, 0);// clean up ram that's not needed anymore - whitepoint_blackpoint(actually_cropped_image,//filmulated_image, + whitepoint_blackpoint(cropped_image,//filmulated_image, contrast_image, blackWhiteParam.whitepoint, blackWhiteParam.blackpoint); + valid = paramManager->markBlackWhiteComplete(); updateProgress(valid, 0.0f); + [[fallthrough]]; } + case partcolorcurve: [[fallthrough]]; case blackwhite: // Do color_curve { //It's not gonna abort because we have no color curves yet.. @@ -414,8 +438,12 @@ { cacheEmpty = false; } + + valid = paramManager->markColorCurvesComplete(); updateProgress(valid, 0.0f); + [[fallthrough]]; } + case partfilmlikecurve: [[fallthrough]]; case colorcurve://Do film-like curve { AbortStatus abort; @@ -457,6 +485,7 @@ curvesParam.saturation); updateProgress(valid, 0.0f); + [[fallthrough]]; } default://output { @@ -473,6 +502,7 @@ { interface->updateHistFinal(vibrance_saturation_image); } + valid = paramManager->markFilmLikeCurvesComplete(); updateProgress(valid, 0.0f); exifOutput = exifData; diff -Nru filmulator-0.6.3/core/imagePipeline.h filmulator-0.7.0/core/imagePipeline.h --- filmulator-0.6.3/core/imagePipeline.h 2017-09-21 03:43:03.000000000 +0000 +++ filmulator-0.7.0/core/imagePipeline.h 2018-05-30 22:12:13.000000000 +0000 @@ -8,7 +8,7 @@ enum Cache {WithCache, NoCache}; enum Histo {WithHisto, NoHisto}; -enum QuickQuality { LowQuality, HighQuality }; +enum QuickQuality { LowQuality, PreviewQuality, HighQuality }; class ImagePipeline { @@ -30,6 +30,13 @@ //Lets the consumer turn cache on and off void setCache(Cache cacheIn); + //Variable relating to stealing the demosaiced data from another imagepipeline + bool stealData = false; + ImagePipeline * stealVictim; + + //The resolution of a quick preview + int resolution; + protected: matrix emptyMatrix(){matrix mat; return mat;} @@ -48,7 +55,8 @@ struct timeval timeRequested; - matrix cropped_image; + matrix input_image; + matrix scaled_image; matrix pre_film_image; Exiv2::ExifData exifData; matrix filmulated_image; @@ -61,7 +69,7 @@ void updateProgress(Valid valid, float CurrFractionCompleted); //The core filmulation. It needs to access ProcessingParameters, so it's here. - bool filmulate(matrix &cropped_image, + bool filmulate(matrix &scaled_image, matrix &output_density, ParameterManager * paramManager, ImagePipeline * pipeline); diff -Nru filmulator-0.6.3/core/imread.cpp filmulator-0.7.0/core/imread.cpp --- filmulator-0.6.3/core/imread.cpp 2017-09-21 03:43:03.000000000 +0000 +++ filmulator-0.7.0/core/imread.cpp 2018-05-30 22:12:13.000000000 +0000 @@ -43,7 +43,7 @@ #define COLOR image_processor.imgdata.color //Now we'll set demosaic and other processing settings. - PARAM.user_qual = 10;//9;//10 is AMaZE; -q[#] in dcraw + PARAM.user_qual = 9;//10 is AMaZE; -q[#] in dcraw PARAM.no_auto_bright = 1;//Don't autoadjust brightness (-W) PARAM.output_bps = 16;//16 bits per channel (-6) PARAM.gamm[0] = 1; diff -Nru filmulator-0.6.3/core/scale.cpp filmulator-0.7.0/core/scale.cpp --- filmulator-0.6.3/core/scale.cpp 2017-09-21 03:43:03.000000000 +0000 +++ filmulator-0.7.0/core/scale.cpp 2018-05-30 22:12:13.000000000 +0000 @@ -15,6 +15,11 @@ const double scaleFactor, const bool interleaved); +template +void upscaleBilinear1D(const matrix input, + matrix &output, + const int outNumCols, + const bool interleaved); //Scales the input to the output to fit within the output sizes. @@ -174,5 +179,34 @@ output(i,j) = startWeight*double(input(i,inputStart)) + endWeight*double(input(i,inputEnd)); } } +} + +//Scales the image up so that the number of columns is increased to the desired number. +//outputNumCols should be for the un-interleaved image. +//TODO: COMPLETE THIS +template +void upscaleBilinear1D(const matrix input, + matrix &output, + const int outNumCols, + const bool interleaved) +{ + const int inputNumRows = input.nr(); + const int inputNumCols = input.nc(); + + if (outNumCols <= inputNumCols) + { + output.set_size(0,0); + return; + } + + if (interleaved) + { + output.set_size(inputNumRows, outNumCols*3); + } + else + { + output.set_size(inputNumRows, outNumCols); + } + } diff -Nru filmulator-0.6.3/database/importModel.cpp filmulator-0.7.0/database/importModel.cpp --- filmulator-0.6.3/database/importModel.cpp 2017-09-21 03:43:03.000000000 +0000 +++ filmulator-0.7.0/database/importModel.cpp 2018-05-30 22:12:13.000000000 +0000 @@ -1,9 +1,11 @@ #include "importModel.h" #include +#include using std::cout; using std::endl; +using std::max; ImportModel::ImportModel(QObject *parent) : SqlModel(parent) { @@ -77,6 +79,32 @@ } } return false; +} + +//Check for whether a directory can be created. +//Apparently this might fail on Windows because you might have write permissions +// *in* a directory but not *to* the directory itself. +bool ImportModel::pathWritable(const QString dir) +{ + QString parentDir = dir; + while (parentDir.length() > 0) + { + QFileInfo fileInfo(parentDir); + if (fileInfo.isWritable()) + { + return true; + } + else //the dir hasn't been created yet + { + int lastIndex = max(parentDir.lastIndexOf("/"),parentDir.lastIndexOf("\\")); + if (lastIndex < 0) + { + return false; + } + parentDir.truncate(lastIndex); + } + } + return false; } void ImportModel::importDirectory_r(const QString dir, const bool importInPlace, const bool replaceLocation) diff -Nru filmulator-0.6.3/database/importModel.h filmulator-0.7.0/database/importModel.h --- filmulator-0.6.3/database/importModel.h 2017-09-21 03:43:03.000000000 +0000 +++ filmulator-0.7.0/database/importModel.h 2018-05-30 22:12:13.000000000 +0000 @@ -55,6 +55,7 @@ public: explicit ImportModel(QObject *parent = 0); Q_INVOKABLE bool pathContainsDCIM(const QString dir, const bool notDirectory); + Q_INVOKABLE bool pathWritable(const QString dir); Q_INVOKABLE void importDirectory_r(const QString dir, const bool importInPlace, const bool replaceLocation); Q_INVOKABLE Validity importFile(const QString name, const bool importInPlace, const bool replaceLocation, const bool onlyCheck); Q_INVOKABLE void importFileList(const QString name, const bool importInPlace, const bool replaceLocation); diff -Nru filmulator-0.6.3/debian/changelog filmulator-0.7.0/debian/changelog --- filmulator-0.6.3/debian/changelog 2017-10-18 16:18:43.000000000 +0000 +++ filmulator-0.7.0/debian/changelog 2018-06-06 17:47:51.000000000 +0000 @@ -1,11 +1,8 @@ -filmulator (0.6.3-1dhor~artful) artful; urgency=medium +filmulator (0.7.0-1dhor~artful) artful; urgency=medium - * cropping feature - * Error handling was added for the case where Filmulator cannot create a directory for the database, causing the program to exit instead of polluting your home directory. - * Metadata writing to TIFFs has been temporarily disabled; the thumbnail was appearing as a second layer and this confused GIMP when trying to import them. + * Optimized preview - - -- Dariusz Duma Wed, 18 Oct 2017 17:52:29 +0200 + -- Dariusz Duma Wed, 06 Jun 2018 19:47:51 +0200 filmulator (0.6-1dhor~artful) artful; urgency=medium diff -Nru filmulator-0.6.3/debian/control filmulator-0.7.0/debian/control --- filmulator-0.6.3/debian/control 2016-12-31 08:26:29.000000000 +0000 +++ filmulator-0.7.0/debian/control 2018-06-06 17:47:51.000000000 +0000 @@ -2,9 +2,16 @@ Section: graphics Priority: optional Maintainer: Dariusz Duma -Build-Depends: debhelper (>=9), libtiff-dev, libgomp1, libexiv2-dev, libjpeg-dev, libraw-dev, qt5-default, qtdeclarative5-dev +Build-Depends: debhelper (>=9), + libtiff-dev, + libgomp1, + libexiv2-dev, + libjpeg-dev, + libraw-dev, + qt5-default, + qtdeclarative5-dev Standards-Version: 3.9.6 -Homepage: +Homepage: https://github.com/CarVac/filmulator-gui #Vcs-Git: git://anonscm.debian.org/collab-maint/filmulator.git #Vcs-Browser: https://anonscm.debian.org/cgit/collab-maint/filmulator.git @@ -16,5 +23,6 @@ qml-module-qtquick-dialogs, qml-module-qtquick-layouts, qml-module-qtquick-dialogs -Description: - +Description: Filmulator - a film emulator with all of the positives and none of the negatives + Filmulator accepts raw files from cameras and simulates the development of film as if exposed to the same light as the camera's sensor. For various reasons, this inherently brings about several benefits: + The program's design ideology is to have the best tool for any job, and only that one tool. The tradeoff here is a slight decrease in flexibility, but gaining a greatly simplified and streamlined user interface. diff -Nru filmulator-0.6.3/debian/patches/desktop_file filmulator-0.7.0/debian/patches/desktop_file --- filmulator-0.6.3/debian/patches/desktop_file 2017-10-18 15:50:50.000000000 +0000 +++ filmulator-0.7.0/debian/patches/desktop_file 2016-12-31 08:27:50.000000000 +0000 @@ -1,5 +1,5 @@ --- /dev/null -+++ filmulator-0.6.3/filmulator-gui.desktop ++++ filmulator-0.6/filmulator-gui.desktop @@ -0,0 +1,10 @@ +[Desktop Entry] +Type=Application diff -Nru filmulator-0.6.3/filmulator-gui.desktop.in filmulator-0.7.0/filmulator-gui.desktop.in --- filmulator-0.6.3/filmulator-gui.desktop.in 2017-09-21 03:43:03.000000000 +0000 +++ filmulator-0.7.0/filmulator-gui.desktop.in 2018-06-06 19:37:42.000000000 +0000 @@ -3,7 +3,7 @@ Encoding=UTF-8 Name=Filmulator Comment=Streamlined photo editor -Exec=cd/usr/lib/filmulator-gui && /usr/lib/filmulator-gui/filmulator-gui +Exec=/usr/lib/filmulator-gui/filmulator-gui Icon=filmulator-gui Terminal=false Categories=Graphics;Photography;Qt; diff -Nru filmulator-0.6.3/filmulator-gui.desktop.in~ filmulator-0.7.0/filmulator-gui.desktop.in~ --- filmulator-0.6.3/filmulator-gui.desktop.in~ 1970-01-01 00:00:00.000000000 +0000 +++ filmulator-0.7.0/filmulator-gui.desktop.in~ 2018-05-30 22:12:13.000000000 +0000 @@ -0,0 +1,10 @@ +[Desktop Entry] +Type=Application +Encoding=UTF-8 +Name=Filmulator +Comment=Streamlined photo editor +Exec=cd/usr/lib/filmulator-gui && /usr/lib/filmulator-gui/filmulator-gui +Icon=filmulator-gui +Terminal=false +Categories=Graphics;Photography;Qt; +Name[en_US]=Filmulator Binary files /tmp/tmpDtk1Ll/PwBBBD7ut_/filmulator-0.6.3/Halide/000734_levels.png and /tmp/tmpDtk1Ll/pinM9xnGZF/filmulator-0.7.0/Halide/000734_levels.png differ Binary files /tmp/tmpDtk1Ll/PwBBBD7ut_/filmulator-0.6.3/Halide/000734.png and /tmp/tmpDtk1Ll/pinM9xnGZF/filmulator-0.7.0/Halide/000734.png differ diff -Nru filmulator-0.6.3/Halide/applyfilmlikecurve.cpp filmulator-0.7.0/Halide/applyfilmlikecurve.cpp --- filmulator-0.6.3/Halide/applyfilmlikecurve.cpp 1970-01-01 00:00:00.000000000 +0000 +++ filmulator-0.7.0/Halide/applyfilmlikecurve.cpp 2018-05-30 22:12:13.000000000 +0000 @@ -0,0 +1,43 @@ +#include +Func applyFilmlikeCurve(Func input, Func LUT){ + + Var x,y,c,o; + Func order; + Expr rVal = input(x,y,0), gVal = input(x,y,1), bVal = input(x,y,1); + Expr r = 0, g = 1, b = 2; + order(x,y,o) = select(rVal >= gVal, + select(gVal >= bVal, + select(o == 0, b, o == 1, g, r), //Order is BGR + select(rVal >= bVal, + select(o == 0, g, o == 1, b, r), //GBR + select(o == 0, g, o == 1, r, b))), //GRB + select(rVal >= bVal, + select(o == 0, b, o == 1, r, g), //BRG + select(gVal >= bVal, + select(o == 0, r, o == 1, b, g), //RBG + select(o == 0, r, o == 1, g, b)))); //RGB + Func curved; + Expr maxVal = 2^16; + curved(x,y,o) = LUT(cast(UInt(16),maxVal*input(x,y,order(x,y,o)))); + + Expr lowOld = input(x,y,order(x,y,0)); + Expr midOld = input(x,y,order(x,y,1)); + Expr hiOld = input(x,y,order(x,y,2)); + + Expr lowNew = curved(x,y,0); + Expr hiNew = curved(x,y,2); + + Expr epsilon = FLT_MIN; + Expr multFactor = (hiNew - lowNew + epsilon)/(hiOld - lowOld + epsilon); + Expr correction = (lowNew - multFactor*lowOld); + curved(x,y,1) = multFactor*midOld + correction; + + Func output; + output(x,y,c) = undef(); + output(x,y,order(x,y,0)) = curved(x,y,0); + output(x,y,order(x,y,1)) = curved(x,y,1); + output(x,y,order(x,y,2)) = curved(x,y,2); + + return output; +} + diff -Nru filmulator-0.6.3/Halide/calcLayerMix.cpp filmulator-0.7.0/Halide/calcLayerMix.cpp --- filmulator-0.6.3/Halide/calcLayerMix.cpp 1970-01-01 00:00:00.000000000 +0000 +++ filmulator-0.7.0/Halide/calcLayerMix.cpp 2018-05-30 22:12:13.000000000 +0000 @@ -0,0 +1,45 @@ +#include +#include +#include + +using namespace Halide; +//using namespace Halide::BoundaryConditions; +using namespace std; + +using Halide::Image; +#include "image_io.h" +#include "halideFilmulate.h" + +Var x, y, c; + +Func calcLayerMix(Func developer_concentration, Expr layer_mix_const, Expr timestep, + Expr layer_time_divisor, Expr reservoir_developer_concentration){ + Expr layer_mix = pow(layer_mix_const,timestep/layer_time_divisor); + + Expr reservoir_portion = (1 - layer_mix) * reservoir_developer_concentration; + + Func output; + output(x,y) = developer_concentration(x,y) * (layer_mix - 1) + reservoir_portion; + return output; +} + +int main(int argc, char **argv) { + + Param reservoirConcentration; + Param stepTime; + Param layerMixConst; + Param layerTimeDivisor; + + Func sumDx; + Func layerMixed; + Func initialDeveloperMirrored; + ImageParam devConc(type_of(),2); + Func dDevelConc; + Func developerConcentration = lambda(x,y,devConc(x,y)); + dDevelConc = calcLayerMix(developerConcentration, layerMixConst, stepTime, + layerTimeDivisor, reservoirConcentration); + std::vector ddcArgs = dDevelConc.infer_arguments(); + dDevelConc.compile_to_file("calcLayerMix",ddcArgs); + + return 0; +} diff -Nru filmulator-0.6.3/Halide/calcReservoirConcentration.cpp filmulator-0.7.0/Halide/calcReservoirConcentration.cpp --- filmulator-0.6.3/Halide/calcReservoirConcentration.cpp 1970-01-01 00:00:00.000000000 +0000 +++ filmulator-0.7.0/Halide/calcReservoirConcentration.cpp 2018-05-30 22:12:13.000000000 +0000 @@ -0,0 +1,36 @@ +#include +#include +#include + +using namespace Halide; +//using namespace Halide::BoundaryConditions; +using namespace std; + +using Halide::Image; +#include "image_io.h" +#include "halideFilmulate.h" + +Var x, y, c; + +int main(int argc, char **argv) { + + Param reservoirConcentration; + Param reservoirThickness; + Param activeLayerThickness; + Param filmArea; + + ImageParam devMoved(type_of(),2); + Func developerMoved = lambda(x,y,devMoved(x,y)); + Expr pixelsPerMillimeter = sqrt(devMoved.width()*devMoved.height()/filmArea); + RDom r(0 ,devMoved.width(), 0, devMoved.height()); + Func sumD; + sumD(x) = 0.0f; + sumD(0) += developerMoved(r.x,r.y); + Func newResConc; + newResConc(x) = undef(); + newResConc(0) = reservoirConcentration - sumD(0)*activeLayerThickness/(pow(pixelsPerMillimeter,2)*reservoirThickness); + std::vector newResConcArgs = newResConc.infer_arguments(); + newResConc.compile_to_file("calcReservoirConcentration",newResConcArgs); + + return 0; +} diff -Nru filmulator-0.6.3/Halide/compareSim.m filmulator-0.7.0/Halide/compareSim.m --- filmulator-0.6.3/Halide/compareSim.m 1970-01-01 00:00:00.000000000 +0000 +++ filmulator-0.7.0/Halide/compareSim.m 2018-05-30 22:12:13.000000000 +0000 @@ -0,0 +1,58 @@ +clear; +clc; + +initialDeveloperConcentration = single(1); +reservoirThickness = single(1000); +activeLayerThickness = single(0.1); +crystalsPerPixel = single(500); +initialCrystalRadius = single(0.00001); +initialSilverSaltDensity = single(1); +developerConsumptionConst = single(2000000); +crystalGrowthConst = single(0.00001); +silverSaltConsumptionConst = single(2000000); +totalDevelopmentTime = single(100); +agitateCount = single(1); +developmentSteps = single(12); +filmArea = single(864); +sigmaConst = single(0.2); +layerMixConst = single(0.2); +layerTimeDivisor = single(20); + +thisAmplitude = 2^15; +inputImage = (2^14)*ones(1000,1000,3); +inputImage(:,500,1) = thisAmplitude; + +% inputImage = single(imread('~/Downloads/P1020340.ppm')); + +[numRows, numCols, ~] = size(inputImage); + +simCrystals = filmSim(inputImage,filmArea,layerMixConst); +simOutput = inputImage.*simCrystals.^2; + +initialData = single(zeros(numRows,numCols,10)); +initialData(:,:,1:3) = initialCrystalRadius; +initialData(:,:,4:6) = inputImage*crystalsPerPixel*0.00015387105; +initialData(:,:,7:9) = initialSilverSaltDensity; +initialData(:,:,10) = initialDeveloperConcentration; +reservoirConcentration = initialDeveloperConcentration; + +for i = 1:developmentSteps + outData = single(zeros(numRows,numCols,10)); + outReservoirConcentration = single(ones(2,1)); + filmulateIterationGenerator(reservoirConcentration,reservoirThickness, ... + crystalGrowthConst,activeLayerThickness, ... + developerConsumptionConst,silverSaltConsumptionConst,... + totalDevelopmentTime/developmentSteps,filmArea,sigmaConst, ... + layerMixConst,layerTimeDivisor,true, ... + initialData,outData,outReservoirConcentration); + initialData = outData; + reservoirConcentration = outReservoirConcentration(1); +end +realCrystals = outData(:,:,1:3); +realOutput = inputImage.*realCrystals.^2; + +figure(1); +plot(1:11,realCrystals(500,495:505,1),1:11,simCrystals(500,495:505,1)) + +figure(2); +plot(1:1000,realCrystals(500,:,3),1:1000,simCrystals(500,:,3)) \ No newline at end of file diff -Nru filmulator-0.6.3/Halide/demosaic.cpp filmulator-0.7.0/Halide/demosaic.cpp --- filmulator-0.6.3/Halide/demosaic.cpp 1970-01-01 00:00:00.000000000 +0000 +++ filmulator-0.7.0/Halide/demosaic.cpp 2018-05-30 22:12:13.000000000 +0000 @@ -0,0 +1,469 @@ +//Compile with: +//g++ demosaic.cpp -g -I include/ -L bin/ -lHalide `libpng-config --cflags --ldflags` -lpthread -ldl -o demosaic -std=c++11 +// +//Run with: +//LD_LIBRARY_PATH=bin ./demosaic +//For debug +//LD_LIBRARY_PATH=bin HL_DEBUG_CODEGEN=[#] ./demosaic +// +//g++ demosaic.cpp -g -I include/ -L bin/ -lHalide `libpng-config --cflags --ldflags` -lpthread -ldl -o demosaic -std=c++11 && LD_LIBRARY_PATH=bin HL_DEBUG_CODEGEN=0 ./demosaic +#include +#include +#include +#include +using Halide::Image; +#include +//#include + +using namespace Halide; + +Halide::Func bayerize(Func in) +{ + Func out; + Var x,y,c; + Expr evenRow = (y%2 == 0); + Expr oddRow = (y%2 == 1); + Expr evenCol = (x%2 == 0); + Expr oddCol = (x%2 == 1); + out(x,y) = select( + evenRow, select( + evenCol, + in(x,y,1), + in(x,y,0)), + select( // odd row + evenCol, + in(x,y,2), + in(x,y,1))); + // G R G R + // B G B G + // G R G R + // B G B G + + return out; +} + +Halide::Func blurRatio_v(Func vert) +{ + Var x,y; + //Low pass filter (sigma=2 L=4) + Expr h0, h1, h2, h3, h4, hsum; + h0 = .203125f; + h1 = .1796875f; + h2 = .1171875f; + h3 = .0703125f; + h4 = .03125f; + + + Func out; + out(x,y) = h0 * vert(x,y) + + h1 * (vert(x,y-1) + vert(x,y+1)) + + h2 * (vert(x,y-2) + vert(x,y+2)) + + h3 * (vert(x,y-3) + vert(x,y+3)) + + h4 * (vert(x,y-4) + vert(x,y+4)); + return out; +} + +Halide::Func blurRatio_h(Func hor) +{ + Var x,y; + //Low pass filter (sigma=2 L=4) + Expr h0, h1, h2, h3, h4, hsum; + h0 = .203125f; + h1 = .1796875f; + h2 = .1171875f; + h3 = .0703125f; + h4 = .03125f; + + Func out; + out(x,y) = h0 * hor(x,y) + + h1 * (hor(x-1,y) + hor(x+1,y)) + + h2 * (hor(x-2,y) + hor(x+2,y)) + + h3 * (hor(x-3,y) + hor(x+3,y)) + + h4 * (hor(x-4,y) + hor(x+4,y)); + return out; +} + +Tuple swap2(Expr a, Expr b) { + return Tuple(min(a,b), max(a,b)); +} + +Tuple sort3(Expr a, Expr b, Expr c) { + Tuple x = swap2(a,b); + Tuple y = swap2(x[1],c); + Tuple z = swap2(x[0],y[1]); + return Tuple(z[0], z[1], y[1]); +} + +Halide::Func demosaic(Func deinterleaved) +{ + Func output; + //A large part of the algorithm is spent processing vertical and horizontal separately. + //This means that we can duplicate the original data into vertical and horizontal + //And run the horizontal actually vertically in memory + //Then we can vectorize very easily. + + Var x, y, c; + Var xo, yo, xi, yi, tile_index; + //Optimization variables + + //Group the pixels into fours. + Func r_r, g_gr, g_gb, b_b; + + Func wb; wb(x) = 1; + g_gr(x, y) = deinterleaved(2*x ,2*y )*wb(0) + 0.01f; + r_r(x, y) = deinterleaved(2*x+1,2*y )*wb(1) + 0.01f; + b_b(x, y) = deinterleaved(2*x ,2*y+1)*wb(2) + 0.01f; + g_gb(x, y) = deinterleaved(2*x+1,2*y+1)*wb(3) + 0.01f; + + // G R G R + // B G B G + // G R G R + // B G B G + + //Initial demosaic: + //We need to make this bilinear, and sharpen the estimated colors at the end. + // + //The paper uses an implementation that sharpens the off colors first, but that + //violates their assumption of smooth color transitions and worsens performance. + + + //First calculate the green at the red and blue pixels. + + //Red pixels + Func gAtR_v, gAtR_h; + gAtR_h(x,y) = (g_gr(x,y) + g_gr(x+1,y))/2.0f; + gAtR_v(x,y) = (g_gb(x,y-1) + g_gb(x,y))/2.0f; + //Blue pixels + Func gAtB_v, gAtB_h; + gAtB_h(x,y) = (g_gb(x-1,y) + g_gb(x,y))/2.0f; + gAtB_v(x,y) = (g_gr(x,y) + g_gr(x,y+1))/2.0f; + + //Next, calculate the red and blue at the green pixels. + + //Red rows + Func rAtGR_h, bAtGR_v; + rAtGR_h(x,y) = (r_r(x-1,y) + r_r(x,y))/2.0f; + bAtGR_v(x,y) = (b_b(x,y-1) + b_b(x,y))/2.0f; + //Blue rows + Func bAtGB_h, rAtGB_v; + bAtGB_h(x,y) = (b_b(x,y) + b_b(x+1,y))/2.0f; + rAtGB_v(x,y) = (r_r(x,y) + r_r(x,y+1))/2.0f; + + + //Get the logs of the color ratios + + //On red pixels + Func grRatioAtR_h, grRatioAtR_v; + grRatioAtR_h(x,y) = gAtR_h(x,y)/r_r(x,y); + grRatioAtR_v(x,y) = gAtR_v(x,y)/r_r(x,y); + //On blue pixels + Func gbRatioAtB_h, gbRatioAtB_v; + gbRatioAtB_h(x,y) = gAtB_h(x,y)/b_b(x,y); + gbRatioAtB_v(x,y) = gAtB_v(x,y)/b_b(x,y); + //On green pixels in red rows + Func grRatioAtGR_h, gbRatioAtGR_v; + grRatioAtGR_h(x,y) = g_gr(x,y)/rAtGR_h(x,y); + gbRatioAtGR_v(x,y) = g_gr(x,y)/bAtGR_v(x,y); + //On green pixels in blue rows + Func gbRatioAtGB_h, grRatioAtGB_v; + gbRatioAtGB_h(x,y) = g_gb(x,y)/bAtGB_h(x,y); + grRatioAtGB_v(x,y) = g_gb(x,y)/rAtGB_v(x,y); + + + //Blur the color ratios, to estimate the actual color ratio. + //These are 1d blurs. + //Vertical is blurred vertically. + //Horizontal is blurred horizontally. + //It just happens to work out. + + //Combined color ratios + Func colorRatios_h, colorRatios_v; + colorRatios_h(x,y) = select(y%2 == 0,//Rows + select(x%2 == 0,//Red row + grRatioAtGR_h(x/2, y/2),//Green in red row + grRatioAtR_h(x/2, y/2)),//Red + select(x%2 == 0,//Blue row + gbRatioAtB_h(x/2, y/2),//Blue + gbRatioAtGB_h(x/2,y/2)));//Green in blue row + colorRatios_v(x,y) = select(y%2 == 0,//Rows + select(x%2 == 0,//Red row + gbRatioAtGR_v(x/2, y/2),//Green in red row + grRatioAtR_v(x/2, y/2)),//Red + select(x%2 == 0,//Blue row + gbRatioAtB_v(x/2, y/2),//Blue + grRatioAtGB_v(x/2,y/2)));//Green in blue row + + //Now we take the logs of them. + + //First is the log of the color ratios. + //This is the Observed Color Difference Y. + Func Y_h, Y_v; + Y_h(x,y) = log(colorRatios_h(x,y)); + Y_v(x,y) = log(colorRatios_v(x,y)); + + //Next is the blurred log of the color ratios. + //This is the Estimated True Color Difference Ys ~~= X + Func Ys_h, Ys_v; + Ys_h = blurRatio_h(Y_h); + Ys_v = blurRatio_v(Y_v); + + //To get a Linear Minimum Mean Square Error (LMMSE) estimate, + //we want + //xhat = E[x] + cov(x,y)/var(y) * (y - E[y]) + // + //Empirically, since we don't all of these, it becomes (locally + //xhat = mean(x) + var(x)/(var(x) + var(%nu)) * (y - mean(x)) + // + //x is approximated by Ys; it's low-pass. + //nu, the error, is approximated by (Ys - Y); it's high-pass. + Func X_h, X_v; + X_h(x,y) = Ys_h(x,y); + X_v(x,y) = Ys_v(x,y); + + //Neighborhood mean of X + Func MUx_h, MUx_v; + Func momh, momv; + RDom r1(-4, 9); +#define DOMDIV 9.0f + momh(x,y) = 0.0f; + momh(x,y) += X_h(r1+x,y); + momv(x,y) = 0.0f; + momv(x,y) += X_v(x,r1+y); + + MUx_h(x,y) = momh(x,y)/DOMDIV; + MUx_v(x,y) = momv(x,y)/DOMDIV; + + //Confirmed to be exactly the same ========================================= + + //Neighborhood variance of X + Func SIGMAx_h, SIGMAx_v; + Func ph,pv;//sums of squares + RDom r2(-4,9); + ph(x,y) = 0.0f; + ph(x,y) += X_h(r2+x,y)*X_h(r2+x,y); + pv(x,y) = 0.0f; + pv(x,y) += X_v(x,r2+y)*X_v(x,r2+y); + SIGMAx_h(x,y) = ph(x,y) / 8.0f - momh(x,y)*momh(x,y)/(8.0f*9.0f); + SIGMAx_v(x,y) = pv(x,y) / 8.0f - momv(x,y)*momv(x,y)/(8.0f*9.0f); + //Confirmed error < 2e-9; absolute values peak at around 1e-3 + + //Neighborhood variance of nu + Func SIGMAnu_h, SIGMAnu_v; + RDom r3(-4,9); + SIGMAnu_h(x,y) = 0.0f; + SIGMAnu_h(x,y) += (X_h(r3+x,y) - Y_h(r3+x,y))*(X_h(r3+x,y) - Y_h(r3+x,y)) / DOMDIV; + SIGMAnu_v(x,y) = 0.0f; + SIGMAnu_v(x,y) += (X_v(x,r3+y) - Y_v(x,r3+y))*(X_v(x,r3+y) - Y_v(x,r3+y)) / DOMDIV; + + //LMMSE estimation in each direction + Func Xlmmse_h, Xlmmse_v; + Xlmmse_h(x,y) = MUx_h(x,y) + + (Y_h(x,y) - MUx_h(x,y)) * SIGMAx_h(x,y) / + (SIGMAx_h(x,y) + SIGMAnu_h(x,y) + 1e-7f); + Xlmmse_v(x,y) = MUx_v(x,y) + + (Y_v(x,y) - MUx_v(x,y)) * SIGMAx_v(x,y) / + (SIGMAx_v(x,y) + SIGMAnu_v(x,y) + 1e-7f); + + //Confirmed to be correct? ====================================================== + + //The expected estimation error is Xerror = X - Xlmmse + //We don't use it. + + //The variance of the estimation error, we do use for the weighting. + Func SIGMAer_h, SIGMAer_v; + SIGMAer_h(x,y) = SIGMAx_h(x,y) - SIGMAx_h(x,y)*SIGMAx_h(x,y)/(SIGMAx_h(x,y) + SIGMAnu_h(x,y) + 1e-7f); + SIGMAer_v(x,y) = SIGMAx_v(x,y) - SIGMAx_v(x,y)*SIGMAx_v(x,y)/(SIGMAx_v(x,y) + SIGMAnu_v(x,y) + 1e-7f); + + //Weight of estimate + Func W_h, W_v; + W_h(x,y) = SIGMAer_v(x,y) / (SIGMAer_h(x,y) + SIGMAer_v(x,y) + 1e-7f); + W_v(x,y) = 1.0f - W_h(x,y); + + //slightly different ============================================= + + //Combine to get the final log of the color ratio we'll use + Func X; + X(x,y) = W_h(x,y)*Xlmmse_h(x,y) + W_v(x,y)*Xlmmse_v(x,y); + + //Separate the green/color ratios back out. + //They're only valid on red and blue pixels. + //Reminder: these are logs, not actually the ratios yet. + Func grLogRatioAtR, gbLogRatioAtB; + grLogRatioAtR(x,y) = X(2*x+1, 2*y+0); + gbLogRatioAtB(x,y) = X(2*x+0, 2*y+1); + + //Compute the green/color ratios at the opposite colors. + //It's just the mean of the color ratios of the proper colors nearby. + //Once more, these are still logs. + Func gbLogRatioAtR, grLogRatioAtB; + gbLogRatioAtR(x,y) = (gbLogRatioAtB(x,y) + gbLogRatioAtB(x,y-1) + + gbLogRatioAtB(x+1,y-1) + gbLogRatioAtB(x+1,y)) / 4.0f; + grLogRatioAtB(x,y) = (grLogRatioAtR(x,y) + grLogRatioAtR(x,y+1) + + grLogRatioAtR(x-1,y+1) + grLogRatioAtR(x-1,y)) / 4.0f; + + Func logRatiosRB; + logRatiosRB(x,y) = Tuple(grLogRatioAtR(x,y),gbLogRatioAtB(x,y), + gbLogRatioAtR(x,y),grLogRatioAtB(x,y)); + Func grLogRatioAtRtup,grLogRatioAtBtup,gbLogRatioAtRtup,gbLogRatioAtBtup; + grLogRatioAtRtup(x,y) = logRatiosRB(x,y)[0]; + gbLogRatioAtBtup(x,y) = logRatiosRB(x,y)[1]; + gbLogRatioAtRtup(x,y) = logRatiosRB(x,y)[2]; + grLogRatioAtBtup(x,y) = logRatiosRB(x,y)[3]; + + //Compute the color ratios at green + //Again, still logs. + Func grLogRatioAtGR, gbLogRatioAtGR, grLogRatioAtGB, gbLogRatioAtGB; + grLogRatioAtGR(x,y) = (grLogRatioAtRtup(x-1,y) + grLogRatioAtRtup(x,y) + + grLogRatioAtBtup(x,y-1) + grLogRatioAtBtup(x,y)) / 4.0f; + grLogRatioAtGB(x,y) = (grLogRatioAtRtup(x,y) + grLogRatioAtRtup(x,y+1) + + grLogRatioAtBtup(x,y) + grLogRatioAtBtup(x+1,y)) / 4.0f; + gbLogRatioAtGR(x,y) = (gbLogRatioAtRtup(x-1,y) + gbLogRatioAtRtup(x,y) + + gbLogRatioAtBtup(x,y-1) + gbLogRatioAtBtup(x,y)) / 4.0f; + gbLogRatioAtGB(x,y) = (gbLogRatioAtRtup(x,y) + gbLogRatioAtRtup(x,y+1) + + gbLogRatioAtBtup(x,y) + gbLogRatioAtBtup(x+1,y)) / 4.0f; + + //The libraw lmmse does a median filter...why? + //Doesn't seem to do anything + + //First we must combine these ratios into one. + Func grLogRatio, gbLogRatio; + grLogRatio(x,y) = select(y%2 == 0, + select(x%2 == 0, + //Green pixel in red row + grLogRatioAtGR(x/2,y/2), + //Red pixel + grLogRatioAtR(x/2,y/2)), + select(x%2 == 0, + //Blue pixel + grLogRatioAtB(x/2,y/2), + //Green pixel in blue row + grLogRatioAtGB(x/2,y/2))); + gbLogRatio(x,y) = select(y%2 == 0, + select(x%2 == 0, + //Green pixel in red row + gbLogRatioAtGR(x/2,y/2), + //Red pixel + gbLogRatioAtR(x/2,y/2)), + select(x%2 == 0, + //Blue pixel + gbLogRatioAtB(x/2,y/2), + //Green pixel in blue row + gbLogRatioAtGB(x/2,y/2))); + + Func logRatios; + logRatios(x,y) = Tuple(grLogRatio(x,y),gbLogRatio(x,y)); + + //Turn the logs back into ratios, after the median filter + Func grRatio, gbRatio; + + //No median, but do turn the logs back into ratios. + grRatio(x,y) = exp(logRatios(x,y)[0]); + gbRatio(x,y) = exp(logRatios(x,y)[1]); + + //Output the values we want. + output(x,y,c) = select(c == 0, + //Red channel + select(y%2 == 0, + select(x%2 == 0, + //Green pixel in red row + g_gr(x/2,y/2) / grRatio(x,y), + //Red pixel + r_r(x/2,y/2)), + select(x%2 == 0, + //Blue pixel + b_b(x/2,y/2) * gbRatio(x,y) / grRatio(x,y), + //Green pixel in blue row + g_gb(x/2,y/2) / grRatio(x,y))), + select(c == 1, + //Green channel + select(y%2 == 0, + select(x%2 == 0, + //Green pixel in red row + g_gr(x/2,y/2), + //Red pixel + r_r(x/2,y/2) * grRatio(x,y)), + select(x%2 == 0, + //Blue pixel + b_b(x/2,y/2) * gbRatio(x,y), + //Green pixel in blue row + g_gb(x/2,y/2))), + //Blue channel + select(y%2 == 0, + select(x%2 == 0, + //Green pixel in red row + g_gr(x/2,y/2) / gbRatio(x,y), + //Red pixel + r_r(x/2,y/2) * grRatio(x,y) / gbRatio(x,y)), + select(x%2 == 0, + //Blue pixel + b_b(x/2,y/2), + //Green pixel in blue row + g_gb(x/2,y/2) / gbRatio(x,y))))) - 0.01f; + + + Y_h.compute_root().parallel(y); + Y_v.compute_root().split(x,xo,xi,16).parallel(xo).vectorize(xi,8); + X_h.compute_root().parallel(y); + X_v.compute_root().split(x,xo,xi,16).parallel(xo).vectorize(xi,8); + MUx_v.compute_root().split(x,xo,xi,16).parallel(xo).vectorize(xi,8); + MUx_h.compute_root().parallel(y); + SIGMAx_v.compute_root().split(x,xo,xi,16).parallel(xo).vectorize(xi,8); + SIGMAx_h.compute_root().parallel(y); + SIGMAnu_v.compute_root().split(x,xo,xi,16).parallel(xo).vectorize(xi,8); + SIGMAnu_h.compute_root().parallel(y); + //Xlmmse_v.split(x,xo,xi,16).parallel(xo).vectorize(xi,4); + //SIGMAer_v.split(x,xo,xi,16).parallel(xo).vectorize(xi,4); + //W_v.split(x,xo,xi,16).parallel(xo).vectorize(xi,4); + + //X.compute_root().parallel(y); + //grLogRatioAtR.store_at(output,tile_index).compute_at(grLogRatio,x); + //gbLogRatioAtB.store_at(output,tile_index).compute_at(gbLogRatio,x); + //gbLogRatioAtR.store_at(output,tile_index).compute_at(gbLogRatio,x); + //grLogRatioAtB.store_at(output,tile_index).compute_at(grLogRatio,x); + logRatiosRB.store_at(logRatios,xo).compute_at(logRatios,xi); + //grLogRatio.compute_at(output,tile_index); + //gbLogRatio.compute_at(output,tile_index); + logRatios.compute_at(output,tile_index).split(x,xo,xi,2).unroll(xi); + + output.tile(x,y,xo,yo,xi,yi,64,64) + .fuse(xo,yo,tile_index) + .parallel(tile_index) + .vectorize(xi,8) + .bound(c,0,3).unroll(c).reorder(c,xi,yi,tile_index).reorder_storage(c,x,y) + .compute_root();//.compile_to_lowered_stmt("output_unrolledC.html",HTML); + + return output; + + //Time beat: 2.25 seconds +} + +int main(int argc, char **argv) +{ + Var x, y, c; + //Halide::Image input = load("000734_levels.png"); + //Halide::Image input = load("porcupine2.png"); + Halide::Image input = load("P1040567.png"); + //Halide::Image input = load("teensy.png"); + Func toFloat, toBayer, toDemosaic, toInt, bayerFunc; + toFloat(x,y,c) = cast(input(x,y,c))/255.0f; + toBayer = bayerize(toFloat); + Halide::Image bayer = toBayer.realize(input.width(),input.height()); + //toBayer = bayerize(BoundaryConditions::constant_exterior(toFloat,0.0f,0,input.width(),0,input.height(),0,3)); + bayerFunc(x,y) = bayer(x,y); + toDemosaic = demosaic(BoundaryConditions::mirror_image(bayerFunc,0,input.width(),0,input.height())); + toInt(x,y,c) = cast(Halide::clamp(Halide::round(toDemosaic(x,y,c)*255.0f),0.0f,255.0f)); + toInt.compile_jit(); + timeval t1, t2; + std::cout << "Finished compilation, starting processing" << std::endl; + gettimeofday(&t1, NULL); + Halide::Image output = toInt.realize(input.width(),input.height(),3); + gettimeofday(&t2, NULL); + std::cout<< "Finished processing in " << + float(t2.tv_sec - t1.tv_sec) + float(t2.tv_usec - t1.tv_usec)/1000000.0f << + " seconds" << std::endl; + save(output,"000734_demosaiced7.png"); + //save(output,"porcupine_demosaiced2.png"); + //save(output,"P1040567-med-out.png"); + //save(output,"teensy_out.png"); + return 0; +} diff -Nru filmulator-0.6.3/Halide/develop.cpp filmulator-0.7.0/Halide/develop.cpp --- filmulator-0.6.3/Halide/develop.cpp 1970-01-01 00:00:00.000000000 +0000 +++ filmulator-0.7.0/Halide/develop.cpp 2018-05-30 22:12:13.000000000 +0000 @@ -0,0 +1,44 @@ +Func develop(Func inputs, Expr crystalGrowthConst, + Expr activeLayerThickness, Expr developerConsumptionConst, + Expr silverSaltConsumptionConst, Expr timestep) { + + Expr cgc = crystalGrowthConst*timestep; + Expr dcc = 2.0f*developerConsumptionConst / ( activeLayerThickness*3.0f); + Expr sscc = silverSaltConsumptionConst * 2.0f; + + Func dCrystalRad; + // Silver Salt Density + dCrystalRad(x,y,c) = inputs(x,y,DEVEL_CONC) * inputs(x,y,c+6) * cgc; + + Func dCrystalVol; + // Crystal Radius Active Crystals + dCrystalVol(x,y,c) = dCrystalRad(x,y,c) * inputs(x,y,c) * inputs(x,y,c) * inputs(x,y,c+3); + + Func outputs; + outputs(x,y,c) = select( + c == CRYSTAL_RAD_R , inputs(x,y,c) + dCrystalRad(x,y,0), + select(c == CRYSTAL_RAD_G , inputs(x,y,c) + dCrystalRad(x,y,1), + select(c == CRYSTAL_RAD_B , inputs(x,y,c) + dCrystalRad(x,y,2), + + select(c == DEVEL_CONC , max(0,inputs(x,y,c) - + dcc*(dCrystalVol(x,y,0) + dCrystalVol(x,y,1) + dCrystalVol(x,y,2))), + + select(c == SILVER_SALT_DEN_R, max(0,inputs(x,y,c) - sscc*dCrystalVol(x,y,0)), + select(c == SILVER_SALT_DEN_G, max(0,inputs(x,y,c) - sscc*dCrystalVol(x,y,1)), + select(c == SILVER_SALT_DEN_B, max(0,inputs(x,y,c) - sscc*dCrystalVol(x,y,2)), + + //Otherwise (active crystals) output=input + inputs(x,y,c)))))))); + + Var x_outer, x_inner; + outputs.split(x,x_outer,x_inner,4).reorder(x_inner,c,x_outer,y) + .vectorize(x_inner).parallel(y); + + dCrystalVol.split(x,x_outer,x_inner,4).store_at(outputs,x_outer) + .compute_at(outputs,x_outer).vectorize(x_inner); + + dCrystalRad.split(x,x_outer,x_inner,4).store_at(outputs,x_outer) + .compute_at(outputs,x_outer).vectorize(x_inner); + + return outputs; +} diff -Nru filmulator-0.6.3/Halide/diffuse.cpp filmulator-0.7.0/Halide/diffuse.cpp --- filmulator-0.6.3/Halide/diffuse.cpp 1970-01-01 00:00:00.000000000 +0000 +++ filmulator-0.7.0/Halide/diffuse.cpp 2018-05-30 22:12:13.000000000 +0000 @@ -0,0 +1,117 @@ +Func performBlur(Func f, Func coeff, Expr size, Expr sigma) { + Func blurred; + blurred(x, y) = undef(); + + // Warm up + blurred(x, 0) = coeff(0) * f(x, 0); + blurred(x, 1) = (coeff(0) * f(x, 1) + + coeff(1) * blurred(x, 0)); + blurred(x, 2) = (coeff(0) * f(x, 2) + + coeff(1) * blurred(x, 1) + + coeff(2) * blurred(x, 0)); + + // Top to bottom + RDom fwd(3, size - 3); + blurred(x, fwd) = (coeff(0) * f(x, fwd) + + coeff(1) * blurred(x, fwd - 1) + + coeff(2) * blurred(x, fwd - 2) + + coeff(3) * blurred(x, fwd - 3)); + + // Tail end + Expr padding = cast(ceil(4*sigma) + 3); + RDom tail(size, padding); + blurred(x, tail) = (coeff(1) * blurred(x, tail - 1) + + coeff(2) * blurred(x, tail - 2) + + coeff(3) * blurred(x, tail - 3)); + + // Bottom to top + Expr last = size + padding - 1; + RDom backwards(0, last - 2); + Expr b = last - 3 - backwards; // runs from last - 3 down to zero + blurred(x, b) = (coeff(0) * blurred(x, b) + + coeff(1) * blurred(x, b + 1) + + coeff(2) * blurred(x, b + 2) + + coeff(3) * blurred(x, b + 3)); + return blurred; +} + +Func blur_then_transpose(Func f, Func coeff, Expr size, Expr sigma) { + + Func blurred = performBlur(f, coeff, size, sigma); + + // Also compute attenuation due to zero boundary condition by + // blurring an image of ones in the same way. This gives a + // boundary condition equivalent to reweighting the Gaussian + // near the edge. (TODO: add a generator param to select + // different boundary conditions). + Func ones; + ones(x, y) = 1.0f; + Func attenuation = performBlur(ones, coeff, size, sigma); + + // Invert the attenuation so we can multiply by it. The + // attenuation is the same for every row/channel so we only + // need one column. + Func inverse_attenuation; + inverse_attenuation(y) = 1.0f / attenuation(0, y); + + // Transpose it + Func transposed; + transposed(x, y) = blurred(y, x); + + // Correct for attenuation + Func out; + out(x, y) = transposed(x, y) * inverse_attenuation(x); + + // Schedule it. + Var yi, xi, yii, xii; + + attenuation.compute_root(); + inverse_attenuation.compute_root().vectorize(y, 8); + out.compute_root() + .tile(x, y, xi, yi, 8, 32) + .tile(xi, yi, xii, yii, 8, 8) + .vectorize(xii).unroll(yii).parallel(y); + blurred.compute_at(out, y); + transposed.compute_at(out, xi).vectorize(y).unroll(x); + + for (int i = 0; i < blurred.num_update_definitions(); i++) { + RDom r = blurred.reduction_domain(i); + if (r.defined()) { + blurred.update(i).reorder(x, r); + } + blurred.update(i).vectorize(x, 8).unroll(x); + } + + return out; +} + +Func blur(Func input, Expr sigma, Expr width, Expr height) { + + // Compute IIR coefficients using the method of Young and Van Vliet. + Func coeff; + Expr q = select(sigma < 2.5f, + 3.97156f - 4.14554f*sqrt(1 - 0.26891f*sigma), + 0.98711f*sigma - 0.96330f); + Expr denom = 1.57825f + 2.44413f*q + 1.4281f*q*q + 0.422205f*q*q*q; + coeff(x) = undef(); + coeff(1) = (2.44413f*q + 2.85619f*q*q + 1.26661f*q*q*q)/denom; + coeff(2) = -(1.4281f*q*q + 1.26661f*q*q*q)/denom; + coeff(3) = (0.422205f*q*q*q)/denom; + coeff(0) = 1 - (coeff(1) + coeff(2) + coeff(3)); + coeff.compute_root(); + + Func blurY, blurX; + blurY = blur_then_transpose(input, coeff, height, sigma); + blurX = blur_then_transpose(blurY, coeff, width, sigma); + return blurX; +} + +Func diffuse(Func input, Expr sigma_const, Expr pixels_per_millimeter, Expr timestep, Expr width, Expr height){ + + Expr sigma = sqrt(timestep)*sigma_const*pixels_per_millimeter; +// The following causes bounds inference failure +// Expr sigma = sqrt(timestep*pow(sigma_const*pixels_per_millimeter,2)); + Func diffused; + diffused = blur(input,sigma,width,height); + return diffused; +} Binary files /tmp/tmpDtk1Ll/PwBBBD7ut_/filmulator-0.6.3/Halide/downscaleDiffuse and /tmp/tmpDtk1Ll/pinM9xnGZF/filmulator-0.7.0/Halide/downscaleDiffuse differ diff -Nru filmulator-0.6.3/Halide/downscaleDiffuse.cpp filmulator-0.7.0/Halide/downscaleDiffuse.cpp --- filmulator-0.6.3/Halide/downscaleDiffuse.cpp 1970-01-01 00:00:00.000000000 +0000 +++ filmulator-0.7.0/Halide/downscaleDiffuse.cpp 2018-05-30 22:12:13.000000000 +0000 @@ -0,0 +1,173 @@ +#include +#include +#include + +using namespace Halide; +using namespace Halide::BoundaryConditions; + +Var x, y; + +// Sum an image vertically using a box filter with a zero boundary condition. +Func downscale(Func input, Expr radius, Expr width, Expr height) { + + Func b,c,d; + // Accumulate as a uint16 + b(x, y) = cast(0); + c(x, y) = cast(0); + + // The first value is the sum over pixels [0, radius] + RDom r(-radius, radius); + b(x,y) += input(x + r - radius,y); + c(x,y) += b(x,y + r - radius); + + Expr denominator = pow(2*radius+1,2); + + d(x,y) = c(x, y)/denominator; + Var xo,xi; + //d.bound(x,0,ceil(width/denominator)).bound(y,0,ceil(height/denominator)); + b.compute_root().gpu_tile(x,y,16,16); + //d.reorder(y,x).gpu_tile(x,y,16,16); + return d; +} + +Func upscale(Func input, Expr radius){ + Func output; + + Expr scaleFactor = (2*radius+1); + Expr xf = (x % scaleFactor)/cast(scaleFactor); + Expr yf = (y % scaleFactor)/cast(scaleFactor); + Expr x0 = cast(floor(x/scaleFactor)); + Expr y0 = cast(floor(y/scaleFactor)); + Expr x1 = cast( ceil(x/scaleFactor)); + Expr y1 = cast( ceil(x/scaleFactor)); + Expr Ix0y0 = input(x0,y0); + Expr Ix0y1 = input(x0,y1); + Expr Ix1y0 = input(x1,y0); + Expr Ix1y1 = input(x1,y1); + output(x,y) = lerp(lerp(Ix0y0,Ix1y0,xf), + lerp(Ix0y1,Ix1y1,xf),yf); + return output; +} + +Func gaussBlur(Func input){ + Func blurx,blury; + blurx(x, y) = (input(x-2, y) + + input(x-1, y)*4 + + input(x , y)*6 + + input(x+1, y)*4 + + input(x+2, y)); + blury(x, y) = (blurx(x, y-2) + + blurx(x, y-1)*4 + + blurx(x, y )*6 + + blurx(x, y+1)*4 + + blurx(x, y+2)); + return blury; +} + +int main(int argc, char **argv) { + + ImageParam input(UInt(8), 2); + Param radius; + Expr denominator = cast(radius * 2 + 1); + + // radius can be at most 127 before the sums overflow + radius.set_range(0, 127); + + Func in = mirror_interior(input); + + Func downscaled; + downscaled = downscale(in,150,input.width(), input.height()); + + Func blurred_small; + blurred_small = gaussBlur(downscaled); + + Func blurred_float; + blurred_float = upscale(blurred_small,150); + + Func blurred; + blurred(x,y) = cast(blurred_float(x,y)); + + Target target = get_target_from_environment(); + std::cout << target.to_string() << std::endl; + if(target.has_gpu_feature()) + { + downscaled.compute_root().gpu_tile(x,y,16,16); + blurred_small.compute_root().gpu_tile(x,y,16,16); + blurred.gpu_tile(x,y,16,16); + + Target target = get_host_target(); + target.set_feature(Target::CUDA); + //target.set_feature(Target::GPUDebug); + blurred.compile_jit(target); + } + else + { + /*blur_in_x.compute_root().vectorize(x, 8).split(x, xo, xi, 2).reorder(xi, y, xo).parallel(xo).unroll(xi); + // It makes sense to explicitly compute the transpose so that the + // blur can be done using dense vector loads. + transpose_x.compute_at(blur_in_x, xo).vectorize(x, 8).unroll(x); + sum_in_x.compute_at(blur_in_x, xo); + for (int stage = 0; stage < 5; stage++) { + sum_in_x.update(stage).vectorize(x, 8).unroll(x); + if (stage > 0) { + RVar r = sum_in_x.reduction_domain(stage).x; + sum_in_x.update(stage).reorder(x, r); + } + } + + + blur_in_y.compute_root().vectorize(x, 8).split(x, xo, xi, 2).reorder(xi, y, xo).parallel(xo).unroll(xi); + transpose_y.compute_at(blur_in_y, xo).vectorize(x, 16).unroll(x); + sum_in_y.compute_at(blur_in_y, xo); + for (int stage = 0; stage < 5; stage++) { + sum_in_y.update(stage).vectorize(x, 8).unroll(x); + if (stage > 0) { + RVar r = sum_in_y.reduction_domain(stage).x; + sum_in_y.update(stage).reorder(x, r); + } + }*/ + } + + // Dump the assembly for inspection. + //blur_in_y.compile_to_assembly("/dev/stdout", Internal::vec(input, radius), "box_blur"); + + // Save the output. Comment this out for benchmarking - it takes + // way more time than the actual algorithm. Should look like a + // blurry circle. + //blur_in_y.debug_to_file("output.tiff"); + + // Set some test parameters + radius.set(19); + + // Make a test input image of a circle. + Image input_image(4000, 4000); + lambda(x, y, select(((x - 500)*(x - 500) + (y - 500)*(y - 500)) < 100*100, cast(255), cast(0))).realize(input_image); + input.set(input_image); + + Image out(4000, 4000); + // Realize it once to trigger compilation. + blurred.realize(out); + timeval t1, t2; + gettimeofday(&t1, NULL); + float numIter = 50; + for (size_t i = 0; i < numIter; i++) { + // Realize 100 more times for timing. + blurred.realize(out); + } + gettimeofday(&t2, NULL); + + int64_t dt = t2.tv_sec - t1.tv_sec; + dt *= 1000; + dt += (t2.tv_usec - t1.tv_usec) / 1000; + printf("%0.4f ms\n", dt / numIter); + + /*std::vector arguments; + arguments.push_back(input_image); + arguments.push_back(radius); + arguments.push_back(radius); + blur_in_y.compile_to_c("gradient.cpp", arguments, "attachment");*/ + printf("Success!\n"); + return 0; +} + + diff -Nru filmulator-0.6.3/Halide/filmSim.m filmulator-0.7.0/Halide/filmSim.m --- filmulator-0.6.3/Halide/filmSim.m 1970-01-01 00:00:00.000000000 +0000 +++ filmulator-0.7.0/Halide/filmSim.m 2018-05-30 22:12:13.000000000 +0000 @@ -0,0 +1,20 @@ +function outputCrystalRadii = filmSim(inputImage,filmArea,layerMixConst) + + defaultCrystalSize = 0.00101*ones(size(inputImage)); + + developInfluence = 0.001./(1+(inputImage/100000).^(0.75))-0.001; % Actual peak will be at this + 3*0.0134 + + developSum = sum(developInfluence,3); + developSumScaled = developSum*3; + pixelsPerMM = sqrt(size(inputImage,1)*size(inputImage,2)/filmArea); + sigma = 0.9*pixelsPerMM; + f_gauss = fspecial('gaussian',161,double(sigma)); + paddedSum = padarray(developSumScaled,[80 80], 'symmetric'); + totalDevelopmentTime = 100; + layerTimeDivisor = 20; + mysteriousDivisor = 20; + layerMixMultiplier = 1 - layerMixConst^(totalDevelopmentTime/(layerTimeDivisor*mysteriousDivisor)); + diffuseInfluence = repmat(filter2(f_gauss,paddedSum,'valid'),[1 1 3])*layerMixMultiplier; + + outputCrystalRadii = defaultCrystalSize + diffuseInfluence + developInfluence; +end \ No newline at end of file Binary files /tmp/tmpDtk1Ll/PwBBBD7ut_/filmulator-0.6.3/Halide/filmulate and /tmp/tmpDtk1Ll/pinM9xnGZF/filmulator-0.7.0/Halide/filmulate differ diff -Nru filmulator-0.6.3/Halide/filmulate.cpp filmulator-0.7.0/Halide/filmulate.cpp --- filmulator-0.6.3/Halide/filmulate.cpp 1970-01-01 00:00:00.000000000 +0000 +++ filmulator-0.7.0/Halide/filmulate.cpp 2018-05-30 22:12:13.000000000 +0000 @@ -0,0 +1,87 @@ +#include + +#include +using namespace std; + +using namespace Halide; +using namespace Halide::BoundaryConditions; + +#include "halideFilmulate.h" +Var x, y, c; + +#include "develop.cpp" +#include "diffuse.cpp" + +class filmulateIterationGenerator : public Halide::Generator { + public: + + Param reservoirConcentration{"reservoirConcentration"}; + Param reservoirThickness{"reservoirThickness"}; + Param crystalGrowthConst{"crystalGrowthConst"}; + Param activeLayerThickness{"activeLayerThickness"}; + Param developerConsumptionConst{"developerConsumptionConst"}; + Param silverSaltConsumptionConst{"silverSaltConsumptionConst"}; + Param stepTime{"stepTime"}; + Param filmArea{"filmArea"}; + Param sigmaConst{"sigmaConst"}; + Param layerMixConst{"layerMixConst"}; + Param layerTimeDivisor{"layerTimeDivisor"}; + Param doDiffuse{"doDiffuse"}; + + ImageParam input{Float(32), 3,"input"}; + + Pipeline build() { + Func filmulationData = lambda(x,y,c,input(x,y,c)); + + Func developed; + developed = develop(filmulationData, crystalGrowthConst, activeLayerThickness, + developerConsumptionConst, silverSaltConsumptionConst, + stepTime); + developed.compute_root(); + + Func diffused; + Func initialDeveloper, initialDeveloperMirrored; + initialDeveloper(x,y) = developed(x,y,DEVEL_CONC); + initialDeveloperMirrored = BoundaryConditions::mirror_interior(initialDeveloper,0,input.width(),0,input.height()); + Expr pixelsPerMillimeter = sqrt(input.width()*input.height()/filmArea); + diffused = diffuse(initialDeveloper,sigmaConst,pixelsPerMillimeter, stepTime, + input.width(), input.height()); + diffused.compute_root(); + + Func developerFlux; //Developer moving from reservoir to active layer + Expr layerMixCoef = pow(layerMixConst,stepTime/layerTimeDivisor); + developerFlux(x,y) = (reservoirConcentration - diffused(x,y))*layerMixCoef; + developerFlux.compute_root(); + + Func layerMixed; + layerMixed(x,y) = diffused(x,y) + developerFlux(x,y); + layerMixed.compute_root(); + + Func fluxSum; // Total developer moved in units of density*pixelVolume^3 + RDom r(0, input.width(), 0, input.height()); + fluxSum(x) = 0.0f; + fluxSum(0) += developerFlux(r.x,r.y); + fluxSum.compute_root(); + + Func newReservoirConcentration; + // Total developer moved in units of density*mm^3 + Expr totalFluxMM = fluxSum(0)*activeLayerThickness * 1/pow(pixelsPerMillimeter,2); + Expr reservoirVolume = reservoirThickness*filmArea; + Expr reservoirTotalDeveloper = reservoirVolume*reservoirConcentration; + newReservoirConcentration(x) = (reservoirTotalDeveloper - totalFluxMM)/reservoirVolume; + + Func filmulationDataOut; + filmulationDataOut(x,y,c) = select(c == DEVEL_CONC && doDiffuse == 1, + layerMixed(x,y), + developed(x,y,c)); + Func reservoirConcentrationOut; + reservoirConcentrationOut(x) = select(doDiffuse == 1, + newReservoirConcentration(x), + reservoirConcentration); + return Pipeline({filmulationDataOut,reservoirConcentrationOut}); + }; +}; + +RegisterGenerator filmulateIterationGenerator{"filmulateIterationGenerator"}; + + diff -Nru filmulator-0.6.3/Halide/generateFilmulatedImage.cpp filmulator-0.7.0/Halide/generateFilmulatedImage.cpp --- filmulator-0.6.3/Halide/generateFilmulatedImage.cpp 1970-01-01 00:00:00.000000000 +0000 +++ filmulator-0.7.0/Halide/generateFilmulatedImage.cpp 2018-05-30 22:12:13.000000000 +0000 @@ -0,0 +1,25 @@ +#include +#include "halideFilmulate.h" + +using namespace Halide; + +Var x,y,c; + +int main(int argc, char **argv){ + + ImageParam input(type_of(),3); + Func in = lambda(x,y,c,input(x,y,c)); + Func outputImage; + outputImage(x,y,c) = undef(); + outputImage(x,y,0) = cast(min((1000.0f*256.0f*pow(in(x,y,CRYSTAL_RAD_R),2)* + in(x,y,ACTIVE_CRYSTALS_R)),255.0f)); + outputImage(x,y,1) = cast(min((1000.0f*256.0f*pow(in(x,y,CRYSTAL_RAD_G),2)* + in(x,y,ACTIVE_CRYSTALS_G)),255.0f)); + outputImage(x,y,2) = cast(min((1000.0f*256.0f*pow(in(x,y,CRYSTAL_RAD_B),2)* + in(x,y,ACTIVE_CRYSTALS_B)),255.0f)); + + std::vector args(1); + args[0] = input; + outputImage.compile_to_file("generateFilmulatedImage",args); + return 0; +} diff -Nru filmulator-0.6.3/Halide/halideFilmulate.h filmulator-0.7.0/Halide/halideFilmulate.h --- filmulator-0.6.3/Halide/halideFilmulate.h 1970-01-01 00:00:00.000000000 +0000 +++ filmulator-0.7.0/Halide/halideFilmulate.h 2018-05-30 22:12:13.000000000 +0000 @@ -0,0 +1,12 @@ + +#define CRYSTAL_RAD_R 0 +#define CRYSTAL_RAD_G 1 +#define CRYSTAL_RAD_B 2 +#define ACTIVE_CRYSTALS_R 3 +#define ACTIVE_CRYSTALS_G 4 +#define ACTIVE_CRYSTALS_B 5 +#define SILVER_SALT_DEN_R 6 +#define SILVER_SALT_DEN_G 7 +#define SILVER_SALT_DEN_B 8 +#define DEVEL_CONC 9 + diff -Nru filmulator-0.6.3/Halide/HSVtoRGB.cpp filmulator-0.7.0/Halide/HSVtoRGB.cpp --- filmulator-0.6.3/Halide/HSVtoRGB.cpp 1970-01-01 00:00:00.000000000 +0000 +++ filmulator-0.7.0/Halide/HSVtoRGB.cpp 2018-05-30 22:12:13.000000000 +0000 @@ -0,0 +1,48 @@ +Halide::Func HSVtoRGB(Func in) +{ + Func out; + Var x,y,c; + Func h,s,v; + h(x,y) = in(x,y,0); + s(x,y) = in(x,y,1); + v(x,y) = in(x,y,2); + Func r,g,b; + Func hd,i,f,p,q,t; + hd(x,y) = h(x,y)/60; + i(x,y) = Halide::floor(hd(x,y)); + f(x,y) = hd(x,y) - i(x,y); + p(x,y) = v(x,y)*(1-s(x,y)); + q(x,y) = v(x,y)*(1-(s(x,y)*f(x,y))); + t(x,y) = v(x,y)*(1-(s(x,y)*(1-f(x,y)))); + + r(x,y) = select(i(x,y) == 0 || i(x,y) == 5, + v(x,y), + select(i(x,y) == 1, + q(x,y), + select(i(x,y) == 2 || i(x,y) == 3, + p(x,y), + select(i(x,y) == 4, + t(x,y), + v(x,y)))));//default + g(x,y) = select(i(x,y) == 0, + t(x,y), + select(i(x,y) == 1 || i(x,y) == 2, + v(x,y), + select(i(x,y) == 3, + q(x,y), + p(x,y))));//4,5,default + b(x,y) = select(i(x,y) == 0 || i(x,y) == 1, + p(x,y), + select(i(x,y) == 2, + t(x,y), + select(i(x,y) == 3 || i(x,y) == 4, + v(x,y), + q(x,y))));//5,default + out(x,y,c) = select(s(x,y) == 0, + v(x,y), + select(c == 0, r(x,y), + c == 1, g(x,y), + b(x,y))); + return out; +} + diff -Nru filmulator-0.6.3/Halide/include/clock.h filmulator-0.7.0/Halide/include/clock.h --- filmulator-0.6.3/Halide/include/clock.h 1970-01-01 00:00:00.000000000 +0000 +++ filmulator-0.7.0/Halide/include/clock.h 2018-05-30 22:12:13.000000000 +0000 @@ -0,0 +1,29 @@ +// A current_time function for use in the tests. Returns time in +// milliseconds. + +#ifdef _WIN32 +extern "C" bool QueryPerformanceCounter(uint64_t *); +extern "C" bool QueryPerformanceFrequency(uint64_t *); +double current_time() { + uint64_t t, freq; + QueryPerformanceCounter(&t); + QueryPerformanceFrequency(&freq); + return (t * 1000.0) / freq; +} +#else +#include +double current_time() { + static bool first_call = true; + static timeval reference_time; + if (first_call) { + first_call = false; + gettimeofday(&reference_time, NULL); + return 0.0; + } else { + timeval t; + gettimeofday(&t, NULL); + return ((t.tv_sec - reference_time.tv_sec)*1000.0 + + (t.tv_usec - reference_time.tv_usec)/1000.0); + } +} +#endif diff -Nru filmulator-0.6.3/Halide/include/Halide.h filmulator-0.7.0/Halide/include/Halide.h --- filmulator-0.6.3/Halide/include/Halide.h 1970-01-01 00:00:00.000000000 +0000 +++ filmulator-0.7.0/Halide/include/Halide.h 2018-05-30 22:12:13.000000000 +0000 @@ -0,0 +1,12094 @@ +#ifndef HALIDE_INTROSPECTION_H +#define HALIDE_INTROSPECTION_H + +#include +#include +#include + +// Always use assert, even if llvm-config defines NDEBUG +#ifdef NDEBUG +#undef NDEBUG +#include +#define NDEBUG +#else +#include +#endif + +#ifndef HALIDE_UTIL_H +#define HALIDE_UTIL_H + +/** \file + * Various utility functions used internally Halide. */ + +#include +#include +#include + +// by default, the symbol EXPORT does nothing. In windows dll builds we can define it to __declspec(dllexport) +#if defined(_WIN32) && defined(Halide_SHARED) +#ifdef Halide_EXPORTS +#define EXPORT __declspec(dllexport) +#else +#define EXPORT __declspec(dllimport) +#endif +#else +#define EXPORT +#endif + +// If we're in user code, we don't want certain functions to be inlined. +#if defined(COMPILING_HALIDE) || defined(BUILDING_PYTHON) +#define NO_INLINE +#else +#ifdef _WIN32 +#define NO_INLINE __declspec(noinline) +#else +#define NO_INLINE __attribute__((noinline)) +#endif +#endif + +namespace Halide { +namespace Internal { + +/** Build small vectors of up to 10 elements. If we used C++11 and + * had vector initializers, this would not be necessary, but we + * don't want to rely on C++11 support. */ +//@{ +template +std::vector vec(T a) { + std::vector v(1); + v[0] = a; + return v; +} + +template +std::vector vec(T a, T b) { + std::vector v(2); + v[0] = a; + v[1] = b; + return v; +} + +template +std::vector vec(T a, T b, T c) { + std::vector v(3); + v[0] = a; + v[1] = b; + v[2] = c; + return v; +} + +template +std::vector vec(T a, T b, T c, T d) { + std::vector v(4); + v[0] = a; + v[1] = b; + v[2] = c; + v[3] = d; + return v; +} + +template +std::vector vec(T a, T b, T c, T d, T e) { + std::vector v(5); + v[0] = a; + v[1] = b; + v[2] = c; + v[3] = d; + v[4] = e; + return v; +} + +template +std::vector vec(T a, T b, T c, T d, T e, T f) { + std::vector v(6); + v[0] = a; + v[1] = b; + v[2] = c; + v[3] = d; + v[4] = e; + v[5] = f; + return v; +} + +template +std::vector vec(T a, T b, T c, T d, T e, T f, T g) { + std::vector v(7); + v[0] = a; + v[1] = b; + v[2] = c; + v[3] = d; + v[4] = e; + v[5] = f; + v[6] = g; + return v; +} + +template +std::vector vec(T a, T b, T c, T d, T e, T f, T g, T h) { + std::vector v(8); + v[0] = a; + v[1] = b; + v[2] = c; + v[3] = d; + v[4] = e; + v[5] = f; + v[6] = g; + v[7] = h; + return v; +} + +template +std::vector vec(T a, T b, T c, T d, T e, T f, T g, T h, T i) { + std::vector v(9); + v[0] = a; + v[1] = b; + v[2] = c; + v[3] = d; + v[4] = e; + v[5] = f; + v[6] = g; + v[7] = h; + v[8] = i; + return v; +} + +template +std::vector vec(T a, T b, T c, T d, T e, T f, T g, T h, T i, T j) { + std::vector v(10); + v[0] = a; + v[1] = b; + v[2] = c; + v[3] = d; + v[4] = e; + v[5] = f; + v[6] = g; + v[7] = h; + v[8] = i; + v[9] = j; + return v; +} +// @} + +/** Convert an integer to a string. */ +EXPORT std::string int_to_string(int x); + +/** An aggressive form of reinterpret cast used for correct type-punning. */ +template +DstType reinterpret_bits(const SrcType &src) { + assert(sizeof(SrcType) == sizeof(DstType)); + DstType dst; + memcpy(&dst, &src, sizeof(SrcType)); + return dst; +} + +/** Make a unique name for an object based on the name of the stack + * variable passed in. If introspection isn't working or there are no + * debug symbols, just uses unique_name with the given prefix. */ +EXPORT std::string make_entity_name(void *stack_ptr, const std::string &type, char prefix); + +/** Generate a unique name starting with the given character. It's + * unique relative to all other calls to unique_name done by this + * process. Not thread-safe. */ +EXPORT std::string unique_name(char prefix); + +/** Generate a unique name starting with the given string. Not + * thread-safe. */ +EXPORT std::string unique_name(const std::string &name, bool user = true); + +/** Test if the first string starts with the second string */ +EXPORT bool starts_with(const std::string &str, const std::string &prefix); + +/** Test if the first string ends with the second string */ +EXPORT bool ends_with(const std::string &str, const std::string &suffix); + +/** Return the final token of the name string using the given delimiter. */ +EXPORT std::string base_name(const std::string &name, char delim = '.'); + +} +} + +#endif + +/** \file + * + * Defines methods for introspecting in C++. Relies on DWARF debugging + * metadata, so the compilation unit that uses this must be compiled + * with -g. + */ + +namespace Halide { +namespace Internal { + +/** Get the name of a stack variable from its address. The stack + * variable must be in a compilation unit compiled with -g to + * work. The expected type helps distinguish between variables at the + * same address, e.g a class instance vs its first member. */ +EXPORT std::string get_variable_name(const void *, const std::string &expected_type); + +/** Get the source location in the call stack, skipping over calls in + * the Halide namespace. */ +EXPORT std::string get_source_location(); + +// This gets called automatically by anyone who includes Halide.h by +// the code below. It tests if this functionality works for the given +// compilation unit, and disables it if not. +EXPORT void test_compilation_unit(bool (*test)(), void (*calib)()); + +} +} + + +// This code verifies that introspection is working before relying on +// it. The definitions must appear in Halide.h, but they should not +// appear in libHalide itself. They're defined as weak so that clients +// can include Halide.h multiple times without link errors. +#ifndef COMPILING_HALIDE + +namespace Halide { +namespace Internal { +static bool check_introspection(const void *var, const std::string &type, + const std::string &correct_name, + const std::string &correct_file, int line) { + std::string correct_loc = correct_file + ":" + int_to_string(line); + std::string loc = get_source_location(); + std::string name = get_variable_name(var, type); + return name == correct_name && loc == correct_loc; +} +} +} + +namespace HalideIntrospectionCanary { + +// A function that acts as a signpost. By taking it's address and +// comparing it to the program counter listed in the debugging info, +// we can calibrate for any offset between the debugging info and the +// actual memory layout where the code was loaded. +static void offset_marker() { + std::cerr << "You should not have called this function\n"; +} + +struct A { + int an_int; + + class B { + int private_member; + public: + float a_float; + A *parent; + B() : private_member(17) { + a_float = private_member * 2.0f; + } + }; + + B a_b; + + A() { + a_b.parent = this; + } + + bool test(const std::string &my_name); +}; + +static bool test_a(const A &a, const std::string &my_name) { + bool success = true; + success &= Halide::Internal::check_introspection(&a.an_int, "int", my_name + ".an_int", __FILE__ , __LINE__); + success &= Halide::Internal::check_introspection(&a.a_b, "HalideIntrospectionCanary::A::B", my_name + ".a_b", __FILE__ , __LINE__); + success &= Halide::Internal::check_introspection(&a.a_b.parent, "HalideIntrospectionCanary::A *", my_name + ".a_b.parent", __FILE__ , __LINE__); + success &= Halide::Internal::check_introspection(&a.a_b.a_float, "float", my_name + ".a_b.a_float", __FILE__ , __LINE__); + success &= Halide::Internal::check_introspection(a.a_b.parent, "HalideIntrospectionCanary::A", my_name, __FILE__ , __LINE__); + return success; +} + +static bool test() { + A a1, a2; + + return test_a(a1, "a1") && test_a(a2, "a2"); +} + +// Run the tests, and calibrate for the PC offset at static initialization time. +namespace { +struct TestCompilationUnit { + TestCompilationUnit() { + Halide::Internal::test_compilation_unit(&test, &offset_marker); + } +}; +} + +static TestCompilationUnit test_object; + +} + +#endif + +#endif +#ifndef HALIDE_TYPE_H +#define HALIDE_TYPE_H + +#include + +/** \file + * Defines halide types + */ + +namespace Halide { + +struct Expr; + +/** Types in the halide type system. They can be ints, unsigned ints, + * or floats of various bit-widths (the 'bits' field). They can also + * be vectors of the same (by setting the 'width' field to something + * larger than one). Front-end code shouldn't use vector + * types. Instead vectorize a function. */ +struct Type { + /** The basic type code: signed integer, unsigned integer, or floating point */ + enum TypeCode { + Int, //!< signed integers + UInt, //!< unsigned integers + Float, //!< floating point numbers + Handle //!< opaque pointer type (void *) + } code; + + /** The number of bits of precision of a single scalar value of this type. */ + int bits; + + /** The number of bytes required to store a single scalar value of this type. Ignores vector width. */ + int bytes() const {return (bits + 7) / 8;} + + /** How many elements (if a vector type). Should be 1 for scalar types. */ + int width; + + /** Is this type boolean (represented as UInt(1))? */ + bool is_bool() const {return code == UInt && bits == 1;} + + /** Is this type a vector type? (width > 1) */ + bool is_vector() const {return width > 1;} + + /** Is this type a scalar type? (width == 1) */ + bool is_scalar() const {return width == 1;} + + /** Is this type a floating point type (float or double). */ + bool is_float() const {return code == Float;} + + /** Is this type a signed integer type? */ + bool is_int() const {return code == Int;} + + /** Is this type an unsigned integer type? */ + bool is_uint() const {return code == UInt;} + + /** Is this type an opaque handle type (void *) */ + bool is_handle() const {return code == Handle;} + + /** Compare two types for equality */ + bool operator==(const Type &other) const { + return code == other.code && bits == other.bits && width == other.width; + } + + /** Compare two types for inequality */ + bool operator!=(const Type &other) const { + return code != other.code || bits != other.bits || width != other.width; + } + + /** Produce a vector of this type, with 'width' elements */ + Type vector_of(int w) const { + Type type = {code, bits, w}; + return type; + } + + /** Produce the type of a single element of this vector type */ + Type element_of() const { + Type type = {code, bits, 1}; + return type; + } + + /** Can this type represent all values of another type? */ + EXPORT bool can_represent(Type other) const; + + /** Return an integer which is the maximum value of this type. */ + EXPORT int imax() const; + + /** Return an expression which is the maximum value of this type */ + EXPORT Expr max() const; + + /** Return an integer which is the minimum value of this type */ + EXPORT int imin() const; + + /** Return an expression which is the minimum value of this type */ + EXPORT Expr min() const; +}; + +/** Constructing a signed integer type */ +inline Type Int(int bits, int width = 1) { + Type t; + t.code = Type::Int; + t.bits = bits; + t.width = width; + return t; +} + +/** Constructing an unsigned integer type */ +inline Type UInt(int bits, int width = 1) { + Type t; + t.code = Type::UInt; + t.bits = bits; + t.width = width; + return t; +} + +/** Construct a floating-point type */ +inline Type Float(int bits, int width = 1) { + Type t; + t.code = Type::Float; + t.bits = bits; + t.width = width; + return t; +} + +/** Construct a boolean type */ +inline Type Bool(int width = 1) { + return UInt(1, width); +} + +/** Construct a handle type */ +inline Type Handle(int width = 1) { + Type t; + t.code = Type::Handle; + t.bits = 64; // All handles are 64-bit for now + t.width = width; + return t; +} + +namespace { +template +struct type_of_helper; + +template +struct type_of_helper { + operator Type() { + return Handle(); + } +}; + +template<> +struct type_of_helper { + operator Type() {return Float(32);} +}; + +template<> +struct type_of_helper { + operator Type() {return Float(64);} +}; + +template<> +struct type_of_helper { + operator Type() {return UInt(8);} +}; + +template<> +struct type_of_helper { + operator Type() {return UInt(16);} +}; + +template<> +struct type_of_helper { + operator Type() {return UInt(32);} +}; + +template<> +struct type_of_helper { + operator Type() {return UInt(64);} +}; + +template<> +struct type_of_helper { + operator Type() {return Int(8);} +}; + +template<> +struct type_of_helper { + operator Type() {return Int(16);} +}; + +template<> +struct type_of_helper { + operator Type() {return Int(32);} +}; + +template<> +struct type_of_helper { + operator Type() {return Int(64);} +}; + +template<> +struct type_of_helper { + operator Type() {return Bool();} +}; +} + +/** Construct the halide equivalent of a C type */ +template Type type_of() { + return Type(type_of_helper()); +} + +} + +#endif +#ifndef HALIDE_ARGUMENT_H +#define HALIDE_ARGUMENT_H + +#include + +/** \file + * Defines a type used for expressing the type signature of a + * generated halide pipeline + */ + +namespace Halide { + +/** + * A struct representing an argument to a halide-generated + * function. Used for specifying the function signature of + * generated code. + */ +struct Argument { + /** The name of the argument */ + std::string name; + + /** An argument is either a primitive type (for parameters), or a + * buffer pointer. If 'is_buffer' is true, then 'type' should be + * ignored. + */ + bool is_buffer; + + /** For buffers, these two variables can be used to specify whether the + * buffer is read or written. By default, we assume that the argument + * buffer is read-write and set both flags. */ + bool read; + bool write; + + /** If this is a scalar parameter, then this is its type */ + Type type; + + Argument() : is_buffer(false) {} + Argument(const std::string &_name, bool _is_buffer, Type _type) : + name(_name), is_buffer(_is_buffer), type(_type) { + read = write = is_buffer; + } +}; +} + +#endif +#ifndef HALIDE_BOUNDS_H +#define HALIDE_BOUNDS_H + +#ifndef HALIDE_IR_H +#define HALIDE_IR_H + +/** \file + * Halide expressions (\ref Halide::Expr) and statements (\ref Halide::Internal::Stmt) + */ + +#include +#include + +#ifndef HALIDE_DEBUG_H +#define HALIDE_DEBUG_H + +/** \file + * Defines functions for debug logging during code generation. + */ + +#include +#include +#include + + +namespace Halide { + +struct Expr; +struct Type; +// Forward declare some things from IRPrinter, which we can't include yet. +EXPORT std::ostream &operator<<(std::ostream &stream, const Expr &); +EXPORT std::ostream &operator<<(std::ostream &stream, const Type &); + +namespace Internal { + +struct Stmt; +std::ostream &operator<<(std::ostream &stream, const Stmt &); + +/** For optional debugging during codegen, use the debug class as + * follows: + * + \code + debug(verbosity) << "The expression is " << expr << std::endl; + \endcode + * + * verbosity of 0 always prints, 1 should print after every major + * stage, 2 should be used for more detail, and 3 should be used for + * tracing everything that occurs. The verbosity with which to print + * is determined by the value of the environment variable + * HL_DEBUG_CODEGEN + */ + +struct debug { + EXPORT static int debug_level; + EXPORT static bool initialized; + int verbosity; + + debug(int v) : verbosity(v) { + if (!initialized) { + // Read the debug level from the environment + #ifdef _WIN32 + char lvl[32]; + size_t read = 0; + getenv_s(&read, lvl, "HL_DEBUG_CODEGEN"); + if (read) { + #else + if (char *lvl = getenv("HL_DEBUG_CODEGEN")) { + #endif + debug_level = atoi(lvl); + } else { + debug_level = 0; + } + + initialized = true; + } + } + + template + debug &operator<<(T x) { + if (verbosity > debug_level) return *this; + std::cerr << x; + return *this; + } +}; + +} +} + +#endif +#ifndef HALIDE_ERROR_H +#define HALIDE_ERROR_H + +#include +#include +#include + + +namespace Halide { + +/** Query whether Halide was compiled with exceptions. */ +EXPORT bool exceptions_enabled(); + +/** A base class for Halide errors. */ +struct Error : public std::runtime_error { + // Give each class a non-inlined constructor so that the type + // doesn't get separately instantiated in each compilation unit. + EXPORT Error(const std::string &msg); +}; + +/** An error that occurs while running a JIT-compiled Halide pipeline. */ +struct RuntimeError : public Error { + EXPORT RuntimeError(const std::string &msg); +}; + +/** An error that occurs while compiling a Halide pipeline that Halide + * attributes to a user error. */ +struct CompileError : public Error { + EXPORT CompileError(const std::string &msg); +}; + +/** An error that occurs while compiling a Halide pipeline that Halide + * attributes to an internal compiler bug, or to an invalid use of + * Halide's internals. */ +struct InternalError : public Error { + EXPORT InternalError(const std::string &msg); +}; + + +namespace Internal { + + +struct ErrorReport { + std::ostringstream *msg; + const char *file; + const char *condition_string; + int line; + bool condition; + bool user; + bool warning; + bool runtime; + + ErrorReport(const char *f, int l, const char *cs, bool c, bool u, bool w, bool r) : + msg(NULL), file(f), condition_string(cs), line(l), condition(c), user(u), warning(w), runtime(r) { + if (condition) return; + msg = new std::ostringstream; + const std::string &source_loc = get_source_location(); + + if (user) { + // Only mention where inside of libHalide the error tripped if we have debug level > 0 + debug(1) << "User error triggered at " << f << ":" << l << "\n"; + if (condition_string) { + debug(1) << "Condition failed: " << condition_string << "\n"; + } + if (warning) { + (*msg) << "Warning"; + } else { + (*msg) << "Error"; + } + if (source_loc.empty()) { + (*msg) << ":\n"; + } else { + (*msg) << " at " << source_loc << ":\n"; + } + + } else { + (*msg) << "Internal "; + if (warning) { + (*msg) << "warning"; + } else { + (*msg) << "error"; + } + (*msg) << " at " << f << ":" << l; + if (!source_loc.empty()) { + (*msg) << " triggered by user code at " << source_loc << ":\n"; + } else { + (*msg) << "\n"; + } + if (condition_string) { + (*msg) << "Condition failed: " << condition_string << "\n"; + } + } + } + + template + ErrorReport &operator<<(T x) { + if (condition) return *this; + (*msg) << x; + return *this; + } + + /** When you're done using << on the object, and let it fall out of + * scope, this errors out, or throws an exception if they are + * enabled. This is a little dangerous because the destructor will + * also be called if there's an exception in flight due to an + * error in one of the arguments passed to operator<<. We handle + * this by only actually throwing if there isn't an exception in + * flight already. + */ +#if __cplusplus >= 201100 + ~ErrorReport() noexcept(false) { +#else + ~ErrorReport() { +#endif + + if (condition) return; + explode(); + } + + EXPORT void explode(); +}; + +#define internal_error Halide::Internal::ErrorReport(__FILE__, __LINE__, NULL, false, false, false, false) +#define internal_assert(c) Halide::Internal::ErrorReport(__FILE__, __LINE__, #c, c, false, false, false) +#define user_error Halide::Internal::ErrorReport(__FILE__, __LINE__, NULL, false, true, false, false) +#define user_assert(c) Halide::Internal::ErrorReport(__FILE__, __LINE__, #c, c, true, false, false) +#define user_warning Halide::Internal::ErrorReport(__FILE__, __LINE__, NULL, false, true, true, false) +#define halide_runtime_error Halide::Internal::ErrorReport(__FILE__, __LINE__, NULL, false, true, false, true) + +// The nicely named versions get cleaned up at the end of Halide.h, +// but user code might want to do halide-style user_asserts (e.g. the +// Extern macros introduce calls to user_assert), so for that purpose +// we define an equivalent macro that can be used outside of Halide.h +#define _halide_user_assert(c) Halide::Internal::ErrorReport(__FILE__, __LINE__, #c, c, true, false, false) + +// N.B. Any function that might throw a user_assert or user_error may +// not be inlined into the user's code, or the line number will be +// misattributed to Halide.h. Either make such functions internal to +// libHalide, or mark them as NO_INLINE. + +} + +} + +#endif +#ifndef HALIDE_IR_VISITOR_H +#define HALIDE_IR_VISITOR_H + +#include +#include +#include + +/** \file + * Defines the base class for things that recursively walk over the IR + */ + +namespace Halide { + +struct Expr; + +namespace Internal { + +struct IRNode; +struct Stmt; +struct IntImm; +struct FloatImm; +struct StringImm; +struct Cast; +struct Variable; +struct Add; +struct Sub; +struct Mul; +struct Div; +struct Mod; +struct Min; +struct Max; +struct EQ; +struct NE; +struct LT; +struct LE; +struct GT; +struct GE; +struct And; +struct Or; +struct Not; +struct Select; +struct Load; +struct Ramp; +struct Broadcast; +struct Call; +struct Let; +struct LetStmt; +struct AssertStmt; +struct Pipeline; +struct For; +struct Store; +struct Provide; +struct Allocate; +struct Free; +struct Realize; +struct Block; +struct IfThenElse; +struct Evaluate; + +class Function; + +/** A base class for algorithms that need to recursively walk over the + * IR. The default implementations just recursively walk over the + * children. Override the ones you care about. + */ +class IRVisitor { +public: + virtual ~IRVisitor(); + virtual void visit(const IntImm *); + virtual void visit(const FloatImm *); + virtual void visit(const StringImm *); + virtual void visit(const Cast *); + virtual void visit(const Variable *); + virtual void visit(const Add *); + virtual void visit(const Sub *); + virtual void visit(const Mul *); + virtual void visit(const Div *); + virtual void visit(const Mod *); + virtual void visit(const Min *); + virtual void visit(const Max *); + virtual void visit(const EQ *); + virtual void visit(const NE *); + virtual void visit(const LT *); + virtual void visit(const LE *); + virtual void visit(const GT *); + virtual void visit(const GE *); + virtual void visit(const And *); + virtual void visit(const Or *); + virtual void visit(const Not *); + virtual void visit(const Select *); + virtual void visit(const Load *); + virtual void visit(const Ramp *); + virtual void visit(const Broadcast *); + virtual void visit(const Call *); + virtual void visit(const Let *); + virtual void visit(const LetStmt *); + virtual void visit(const AssertStmt *); + virtual void visit(const Pipeline *); + virtual void visit(const For *); + virtual void visit(const Store *); + virtual void visit(const Provide *); + virtual void visit(const Allocate *); + virtual void visit(const Free *); + virtual void visit(const Realize *); + virtual void visit(const Block *); + virtual void visit(const IfThenElse *); + virtual void visit(const Evaluate *); +}; + +/** A base class for algorithms that walk recursively over the IR + * without visiting the same node twice. This is for passes that are + * capable of interpreting the IR as a DAG instead of a tree. */ +class IRGraphVisitor : public IRVisitor { +protected: + /** By default these methods add the node to the visited set, and + * return whether or not it was already there. If it wasn't there, + * it delegates to the appropriate visit method. You can override + * them if you like. */ + // @{ + virtual void include(const Expr &); + virtual void include(const Stmt &); + // @} + + /** The nodes visited so far */ + std::set visited; + +public: + + /** These methods should call 'include' on the children to only + * visit them if they haven't been visited already. */ + // @{ + virtual void visit(const IntImm *); + virtual void visit(const FloatImm *); + virtual void visit(const StringImm *); + virtual void visit(const Cast *); + virtual void visit(const Variable *); + virtual void visit(const Add *); + virtual void visit(const Sub *); + virtual void visit(const Mul *); + virtual void visit(const Div *); + virtual void visit(const Mod *); + virtual void visit(const Min *); + virtual void visit(const Max *); + virtual void visit(const EQ *); + virtual void visit(const NE *); + virtual void visit(const LT *); + virtual void visit(const LE *); + virtual void visit(const GT *); + virtual void visit(const GE *); + virtual void visit(const And *); + virtual void visit(const Or *); + virtual void visit(const Not *); + virtual void visit(const Select *); + virtual void visit(const Load *); + virtual void visit(const Ramp *); + virtual void visit(const Broadcast *); + virtual void visit(const Call *); + virtual void visit(const Let *); + virtual void visit(const LetStmt *); + virtual void visit(const AssertStmt *); + virtual void visit(const Pipeline *); + virtual void visit(const For *); + virtual void visit(const Store *); + virtual void visit(const Provide *); + virtual void visit(const Allocate *); + virtual void visit(const Free *); + virtual void visit(const Realize *); + virtual void visit(const Block *); + virtual void visit(const IfThenElse *); + virtual void visit(const Evaluate *); + // @} +}; + +} +} + +#endif +#ifndef HALIDE_BUFFER_H +#define HALIDE_BUFFER_H + +#include +#ifndef HALIDE_BUFFER_T_H +#define HALIDE_BUFFER_T_H + +/** \file + * Defines the internal runtime representation of an image: buffer_t + */ + +/* Generated code must declare buffer_t as well. This conditional bracket + * prevents multiple definition errors if both Halide.h and the header + * generated by compile_to_header are included in the same file. + * + * TODO: Use a more unique name than "buffer_t" + * TODO: If possible, ensure all definitions are the same. + */ +#ifndef BUFFER_T_DEFINED +#define BUFFER_T_DEFINED + +#ifndef COMPILING_HALIDE_RUNTIME +#include +#endif + +/** + * The raw representation of an image passed around by generated + * Halide code. It includes some stuff to track whether the image is + * not actually in main memory, but instead on a device (like a + * GPU). */ +typedef struct buffer_t { + /** A device-handle for e.g. GPU memory used to back this buffer. */ + uint64_t dev; + + /** A pointer to the start of the data in main memory. */ + uint8_t* host; + + /** The size of the buffer in each dimension. */ + int32_t extent[4]; + + /** Gives the spacing in memory between adjacent elements in the + * given dimension. The correct memory address for a load from + * this buffer at position x, y, z, w is: + * host + (x * stride[0] + y * stride[1] + z * stride[2] + w * stride[3]) * elem_size + * By manipulating the strides and extents you can lazily crop, + * transpose, and even flip buffers without modifying the data. + */ + int32_t stride[4]; + + /** Buffers often represent evaluation of a Func over some + * domain. The min field encodes the top left corner of the + * domain. */ + int32_t min[4]; + + /** How many bytes does each buffer element take. This may be + * replaced with a more general type code in the future. */ + int32_t elem_size; + + /** This should be true if there is an existing device allocation + * mirroring this buffer, and the data has been modified on the + * host side. */ + bool host_dirty; + + /** This should be true if there is an existing device allocation + mirroring this buffer, and the data has been modified on the + device side. */ + bool dev_dirty; +} buffer_t; + +#endif + +#endif +#ifndef HALIDE_INTRUSIVE_PTR_H +#define HALIDE_INTRUSIVE_PTR_H + +/** \file + * + * Support classes for reference-counting via intrusive shared + * pointers. + */ + + +#include +#include +namespace Halide { +namespace Internal { + +/** A class representing a reference count to be used with IntrusivePtr */ +class RefCount { + int count; +public: + RefCount() : count(0) {} + void increment() {count++;} + void decrement() {count--;} + bool is_zero() const {return count == 0;} +}; + +/** + * Because in this header we don't yet know how client classes store + * their RefCount (and we don't want to depend on the declarations of + * the client classes), any class that you want to hold onto via one + * of these must provide implementations of ref_count and destroy, + * which we forward-declare here. + * + * E.g. if you want to use IntrusivePtr, then you should + * define something like this in MyClass.cpp (assuming MyClass has + * a field: mutable RefCount ref_count): + * + * template<> RefCount &ref_count(const MyClass *c) {return c->ref_count;} + * template<> void destroy(const MyClass *c) {delete c;} + */ +// @{ +template EXPORT RefCount &ref_count(const T *); +template EXPORT void destroy(const T *); +// @} + +/** Intrusive shared pointers have a reference count (a + * RefCount object) stored in the class itself. This is perhaps more + * efficient than storing it externally, but more importantly, it + * means it's possible to recover a reference-counted handle from the + * raw pointer, and it's impossible to have two different reference + * counts attached to the same raw object. Seeing as we pass around + * raw pointers to concrete IRNodes and Expr's interchangeably, this + * is a useful property. + */ +template +struct IntrusivePtr { +private: + + void incref(T *p) { + if (p) { + ref_count(p).increment(); + } + }; + + void decref(T *p) { + if (p) { + // Note that if the refcount is already zero, then we're + // in a recursive destructor due to a self-reference (a + // cycle), where the ref_count has been adjusted to remove + // the counts due to the cycle. The next line then makes + // the ref_count negative, which prevents actually + // entering the destructor recursively. + ref_count(p).decrement(); + if (ref_count(p).is_zero()) { + destroy(p); + } + } + } + +public: + T *ptr; + + ~IntrusivePtr() { + decref(ptr); + } + + IntrusivePtr() : ptr(NULL) { + } + + IntrusivePtr(T *p) : ptr(p) { + incref(ptr); + } + + IntrusivePtr(const IntrusivePtr &other) : ptr(other.ptr) { + incref(ptr); + } + + IntrusivePtr &operator=(const IntrusivePtr &other) { + // Other can be inside of something owned by this, so we + // should be careful to incref other before we decref + // ourselves. + T *temp = other.ptr; + incref(temp); + decref(ptr); + ptr = temp; + return *this; + } + + /* Handles can be null. This checks that. */ + bool defined() const { + return ptr != NULL; + } + + /* Check if two handles point to the same ptr. This is + * equality of reference, not equality of value. */ + bool same_as(const IntrusivePtr &other) const { + return ptr == other.ptr; + } + +}; + +} +} + +#endif + +/** \file + * Defines Buffer - A c++ wrapper around a buffer_t. + */ + +namespace Halide { +namespace Internal { +struct BufferContents; +struct JITCompiledModule; +} + +/** The internal representation of an image, or other dense array + * data. The Image type provides a typed view onto a buffer for the + * purposes of direct manipulation. A buffer may be stored in main + * memory, or some other memory space (e.g. a gpu). If you want to use + * this as an Image, see the Image class. Casting a Buffer to an Image + * will do any appropriate copy-back. This class is a fairly thin + * wrapper on a buffer_t, which is the C-style type Halide uses for + * passing buffers around. + */ +class Buffer { +private: + Internal::IntrusivePtr contents; + +public: + Buffer() : contents(NULL) {} + + EXPORT Buffer(Type t, int x_size = 0, int y_size = 0, int z_size = 0, int w_size = 0, + uint8_t* data = NULL, const std::string &name = ""); + + EXPORT Buffer(Type t, const std::vector &sizes, + uint8_t* data = NULL, const std::string &name = ""); + + EXPORT Buffer(Type t, const buffer_t *buf, const std::string &name = ""); + + /** Get a pointer to the host-side memory. */ + EXPORT void *host_ptr() const; + + /** Get a pointer to the raw buffer_t struct that this class wraps. */ + EXPORT buffer_t *raw_buffer() const; + + /** Get the device-side pointer/handle for this buffer. Will be + * zero if no device was involved in the creation of this + * buffer. */ + EXPORT uint64_t device_handle() const; + + /** Has this buffer been modified on the cpu since last copied to a + * device. Not meaningful unless there's a device involved. */ + EXPORT bool host_dirty() const; + + /** Let Halide know that the host-side memory backing this buffer + * has been externally modified. You shouldn't normally need to + * call this, because it is done for you when you cast a Buffer to + * an Image in order to modify it. */ + EXPORT void set_host_dirty(bool dirty = true); + + /** Has this buffer been modified on device since last copied to + * the cpu. Not meaninful unless there's a device involved. */ + EXPORT bool device_dirty() const; + + /** Let Halide know that the device-side memory backing this + * buffer has been externally modified, and so the cpu-side memory + * is invalid. A copy-back will occur the next time you cast this + * Buffer to an Image, or the next time this buffer is accessed on + * the host in a halide pipeline. */ + EXPORT void set_device_dirty(bool dirty = true); + + /** Get the dimensionality of this buffer. Uses the convention + * that the extent field of a buffer_t should contain zero when + * the dimensions end. */ + EXPORT int dimensions() const; + + /** Get the extent of this buffer in the given dimension. */ + EXPORT int extent(int dim) const; + + /** Get the number of bytes between adjacent elements of this buffer along the given dimension. */ + EXPORT int stride(int dim) const; + + /** Get the coordinate in the function that this buffer represents + * that corresponds to the base address of the buffer. */ + EXPORT int min(int dim) const; + + /** Set the coordinate in the function that this buffer represents + * that corresponds to the base address of the buffer. */ + EXPORT void set_min(int m0, int m1 = 0, int m2 = 0, int m3 = 0); + + /** Get the Halide type of the contents of this buffer. */ + EXPORT Type type() const; + + /** Compare two buffers for identity (not equality of data). */ + EXPORT bool same_as(const Buffer &other) const; + + /** Check if this buffer handle actually points to data. */ + EXPORT bool defined() const; + + /** Get the runtime name of this buffer used for debugging. */ + EXPORT const std::string &name() const; + + /** Convert this buffer to an argument to a halide pipeline. */ + EXPORT operator Argument() const; + + /** Declare that this buffer was created by the given jit-compiled + * module. Used internally for reference counting the module. */ + EXPORT void set_source_module(const Internal::JITCompiledModule &module); + + /** If this buffer was the output of a jit-compiled realization, + * retrieve the module it came from. Otherwise returns a module + * struct full of null pointers. */ + EXPORT const Internal::JITCompiledModule &source_module(); + + /** If this buffer was created *on-device* by a jit-compiled + * realization, then copy it back to the cpu-side memory. This is + * usually achieved by casting the Buffer to an Image. */ + EXPORT int copy_to_host(); + + /** If this buffer was created by a jit-compiled realization on a + * device-aware target (e.g. PTX), then copy the cpu-side data to + * the device-side allocation. TODO: I believe this currently + * aborts messily if no device-side allocation exists. You might + * think you want to do this because you've modified the data + * manually on the host before calling another Halide pipeline, + * but what you actually want to do in that situation is set the + * host_dirty bit so that Halide can manage the copy lazily for + * you. Casting the Buffer to an Image sets the dirty bit for + * you. */ + EXPORT int copy_to_dev(); + + /** If this buffer was created by a jit-compiled realization on a + * device-aware target (e.g. PTX), then free the device-side + * allocation, if there is one. Done automatically when the last + * reference to this buffer dies. */ + EXPORT int free_dev_buffer(); + +}; + +} + +#endif + +namespace Halide { + +namespace Internal { + +/** A class representing a type of IR node (e.g. Add, or Mul, or + * For). We use it for rtti (without having to compile with rtti). */ +struct IRNodeType {}; + +/** The abstract base classes for a node in the Halide IR. */ +struct IRNode { + + /** We use the visitor pattern to traverse IR nodes throughout the + * compiler, so we have a virtual accept method which accepts + * visitors. + */ + virtual void accept(IRVisitor *v) const = 0; + IRNode() {} + virtual ~IRNode() {} + + /** These classes are all managed with intrusive reference + counting, so we also track a reference count. It's mutable + so that we can do reference counting even through const + references to IR nodes. */ + mutable RefCount ref_count; + + /** Each IR node subclass should return some unique pointer. We + * can compare these pointers to do runtime type + * identification. We don't compile with rtti because that + * injects run-time type identification stuff everywhere (and + * often breaks when linking external libraries compiled + * without it), and we only want it for IR nodes. */ + virtual const IRNodeType *type_info() const = 0; +}; + +template<> +EXPORT inline RefCount &ref_count(const IRNode *n) {return n->ref_count;} + +template<> +EXPORT inline void destroy(const IRNode *n) {delete n;} + +/** IR nodes are split into expressions and statements. These are + similar to expressions and statements in C - expressions + represent some value and have some type (e.g. x + 3), and + statements are side-effecting pieces of code that do not + represent a value (e.g. assert(x > 3)) */ + +/** A base class for statement nodes. They have no properties or + methods beyond base IR nodes for now */ +struct BaseStmtNode : public IRNode { +}; + +/** A base class for expression nodes. They all contain their types + * (e.g. Int(32), Float(32)) */ +struct BaseExprNode : public IRNode { + Type type; +}; + +/** We use the "curiously recurring template pattern" to avoid + duplicated code in the IR Nodes. These classes live between the + abstract base classes and the actual IR Nodes in the + inheritance hierarchy. It provides an implementation of the + accept function necessary for the visitor pattern to work, and + a concrete instantiation of a unique IRNodeType per class. */ +template +struct ExprNode : public BaseExprNode { + void accept(IRVisitor *v) const { + v->visit((const T *)this); + } + virtual IRNodeType *type_info() const {return &_type_info;} + static EXPORT IRNodeType _type_info; +}; + +template +struct StmtNode : public BaseStmtNode { + void accept(IRVisitor *v) const { + v->visit((const T *)this); + } + virtual IRNodeType *type_info() const {return &_type_info;} + static EXPORT IRNodeType _type_info; +}; + +/** IR nodes are passed around opaque handles to them. This is a + base class for those handles. It manages the reference count, + and dispatches visitors. */ +struct IRHandle : public IntrusivePtr { + IRHandle() : IntrusivePtr() {} + IRHandle(const IRNode *p) : IntrusivePtr(p) {} + + /** Dispatch to the correct visitor method for this node. E.g. if + * this node is actually an Add node, then this will call + * IRVisitor::visit(const Add *) */ + void accept(IRVisitor *v) const { + ptr->accept(v); + } + + /** Downcast this ir node to its actual type (e.g. Add, or + * Select). This returns NULL if the node is not of the requested + * type. Example usage: + * + * if (const Add *add = node->as()) { + * // This is an add node + * } + */ + template const T *as() const { + if (ptr->type_info() == &T::_type_info) { + return (const T *)ptr; + } + return NULL; + } +}; + +/** Integer constants */ +struct IntImm : public ExprNode { + int value; + + static IntImm *make(int value) { + if (value >= -8 && value <= 8 && + !small_int_cache[value + 8].ref_count.is_zero()) { + return &small_int_cache[value + 8]; + } + IntImm *node = new IntImm; + node->type = Int(32); + node->value = value; + return node; + } + +private: + /** ints from -8 to 8 */ + static IntImm small_int_cache[17]; +}; + +/** Floating point constants */ +struct FloatImm : public ExprNode { + float value; + + static FloatImm *make(float value) { + FloatImm *node = new FloatImm; + node->type = Float(32); + node->value = value; + return node; + } +}; + +/** String constants */ +struct StringImm : public ExprNode { + std::string value; + + static StringImm *make(const std::string &val) { + StringImm *node = new StringImm; + node->type = Handle(); + node->value = val; + return node; + } +}; + +} + +/** A fragment of Halide syntax. It's implemented as reference-counted + * handle to a concrete expression node, but it's immutable, so you + * can treat it as a value type. */ +struct Expr : public Internal::IRHandle { + /** Make an undefined expression */ + Expr() : Internal::IRHandle() {} + + /** Make an expression from a concrete expression node pointer (e.g. Add) */ + Expr(const Internal::BaseExprNode *n) : IRHandle(n) {} + + + /** Make an expression representing a const 32-bit int (i.e. an IntImm) */ + EXPORT Expr(int x) : IRHandle(Internal::IntImm::make(x)) { + } + + /** Make an expression representing a const 32-bit float (i.e. a FloatImm) */ + EXPORT Expr(float x) : IRHandle(Internal::FloatImm::make(x)) { + } + + /** Make an expression representing a const 32-bit float, given a + * double. Also emits a warning due to truncation. */ + EXPORT Expr(double x) : IRHandle(Internal::FloatImm::make((float)x)) { + user_warning << "Halide cannot represent double constants. " + << "Converting " << x << " to float. " + << "If you wanted a double, use cast(" << x + << (x == (int64_t)(x) ? ".0f" : "f") + << ")\n"; + } + + /** Make an expression representing a const string (i.e. a StringImm) */ + EXPORT Expr(const std::string &s) : IRHandle(Internal::StringImm::make(s)) { + } + + /** Get the type of this expression node */ + Type type() const { + return ((const Internal::BaseExprNode *)ptr)->type; + } +}; + +/** This lets you use an Expr as a key in a map of the form + * map */ +struct ExprCompare { + bool operator()(Expr a, Expr b) const { + return a.ptr < b.ptr; + } +}; + +} + +// Now that we've defined an Expr, we can include Parameter.h +#ifndef HALIDE_PARAMETER_H +#define HALIDE_PARAMETER_H + +/** \file + * Defines the internal representation of parameters to halide piplines + */ + +#include + +namespace Halide { +namespace Internal { + +struct ParameterContents; + +/** A reference-counted handle to a parameter to a halide + * pipeline. May be a scalar parameter or a buffer */ +class Parameter { + IntrusivePtr contents; + + void check_defined() const; + void check_is_buffer() const; + void check_is_scalar() const; + void check_dim_ok(int dim) const; + +public: + /** Construct a new undefined handle */ + Parameter() : contents(NULL) {} + + /** Construct a new parameter of the given type. If the second + * argument is true, this is a buffer parameter, otherwise, it is + * a scalar parameter. The parameter will be given a unique + * auto-generated name. */ + EXPORT Parameter(Type t, bool is_buffer); + + /** Construct a new parameter of the given type with name given by + * the third argument. If the second argument is true, this is a + * buffer parameter, otherwise, it is a scalar parameter. The + * parameter will be given a unique auto-generated name. */ + EXPORT Parameter(Type t, bool is_buffer, const std::string &name); + + /** Get the type of this parameter */ + EXPORT Type type() const; + + /** Get the name of this parameter */ + EXPORT const std::string &name() const; + + /** Does this parameter refer to a buffer/image? */ + EXPORT bool is_buffer() const; + + /** If the parameter is a scalar parameter, get its currently + * bound value. Only relevant when jitting */ + template + NO_INLINE T get_scalar() { + user_assert(type() == type_of()) + << "Can't get Param<" << type() + << "> as scalar of type " << type_of() << "\n"; + return *((T *)(get_scalar_address())); + } + + /** If the parameter is a scalar parameter, set its current + * value. Only relevant when jitting */ + template + NO_INLINE void set_scalar(T val) { + user_assert(type() == type_of()) + << "Can't set Param<" << type() + << "> to scalar of type " << type_of() << "\n"; + *((T *)(get_scalar_address())) = val; + } + + /** If the parameter is a buffer parameter, get its currently + * bound buffer. Only relevant when jitting */ + EXPORT Buffer get_buffer() const; + + /** If the parameter is a buffer parameter, set its current + * value. Only relevant when jitting */ + EXPORT void set_buffer(Buffer b); + + /** Get the pointer to the current value of the scalar + * parameter. For a given parameter, this address will never + * change. Only relevant when jitting. */ + EXPORT void *get_scalar_address() const; + + /** Tests if this handle is the same as another handle */ + EXPORT bool same_as(const Parameter &other) const; + + /** Tests if this handle is non-NULL */ + EXPORT bool defined() const; + + /** Get and set constraints for the min, extent, and stride (see + * ImageParam::set_extent) */ + //@{ + EXPORT void set_min_constraint(int dim, Expr e); + EXPORT void set_extent_constraint(int dim, Expr e); + EXPORT void set_stride_constraint(int dim, Expr e); + EXPORT Expr min_constraint(int dim) const; + EXPORT Expr extent_constraint(int dim) const; + EXPORT Expr stride_constraint(int dim) const; + //@} + + /** Get and set constraints for scalar parameters. These are used + * directly by Param, so they must be exported. */ + // @{ + EXPORT void set_min_value(Expr e); + EXPORT Expr get_min_value(); + EXPORT void set_max_value(Expr e); + EXPORT Expr get_max_value(); + // @} +}; + +/** Validate arguments to a call to a func, image or imageparam. */ +void check_call_arg_types(const std::string &name, std::vector *args, int dims); + +} +} + +#endif + +namespace Halide { +namespace Internal { + +/** A reference-counted handle to a statement node. */ +struct Stmt : public IRHandle { + Stmt() : IRHandle() {} + Stmt(const BaseStmtNode *n) : IRHandle(n) {} + + /** This lets you use a Stmt as a key in a map of the form + * map */ + struct Compare { + bool operator()(const Stmt &a, const Stmt &b) const { + return a.ptr < b.ptr; + } + }; +}; + +/** The actual IR nodes begin here. Remember that all the Expr + * nodes also have a public "type" property */ + +} + +namespace Internal { + +/** Cast a node from one type to another */ +struct Cast : public ExprNode { + Expr value; + + EXPORT static Expr make(Type t, Expr v); +}; + +/** The sum of two expressions */ +struct Add : public ExprNode { + Expr a, b; + + EXPORT static Expr make(Expr a, Expr b); +}; + +/** The difference of two expressions */ +struct Sub : public ExprNode_{{
+ Expr a, b;
+
+ EXPORT static Expr make(Expr a, Expr b);
+};
+
+/** The product of two expressions */
+struct Mul : public ExprNode {
+ Expr a, b;
+
+ EXPORT static Expr make(Expr a, Expr b);
+};
+
+/** The ratio of two expressions */
+struct Div : public ExprNode {
+ Expr a, b;
+
+ EXPORT static Expr make(Expr a, Expr b);
+};
+
+/** The remainder of a / b. Mostly equivalent to '%' in C, except that
+ * the result here is always positive. For floats, this is equivalent
+ * to calling fmod. */
+struct Mod : public ExprNode {
+ Expr a, b;
+
+ EXPORT static Expr make(Expr a, Expr b);
+};
+
+/** The lesser of two values. */
+struct Min : public ExprNode {
+ Expr a, b;
+
+ EXPORT static Expr make(Expr a, Expr b);
+};
+
+/** The greater of two values */
+struct Max : public ExprNode {
+ Expr a, b;
+
+ EXPORT static Expr make(Expr a, Expr b);
+};
+
+/** Is the first expression equal to the second */
+struct EQ : public ExprNode {
+ Expr a, b;
+
+ EXPORT static Expr make(Expr a, Expr b);
+};
+
+/** Is the first expression not equal to the second */
+struct NE : public ExprNode {
+ Expr a, b;
+
+ EXPORT static Expr make(Expr a, Expr b);
+};
+
+/** Is the first expression less than the second. */
+struct LT : public ExprNode {
+ Expr a, b;
+
+ EXPORT static Expr make(Expr a, Expr b);
+};
+
+/** Is the first expression less than or equal to the second. */
+struct LE : public ExprNode {
+ Expr a, b;
+
+ EXPORT static Expr make(Expr a, Expr b);
+};
+
+/** Is the first expression greater than the second. */
+struct GT : public ExprNode {
+ Expr a, b;
+
+ EXPORT static Expr make(Expr a, Expr b);
+};
+
+/** Is the first expression greater than or equal to the second. */
+struct GE : public ExprNode {
+ Expr a, b;
+
+ EXPORT static Expr make(Expr a, Expr b);
+};
+
+/** Logical and - are both expressions true */
+struct And : public ExprNode {
+ Expr a, b;
+
+ EXPORT static Expr make(Expr a, Expr b);
+};
+
+/** Logical or - is at least one of the expression true */
+struct Or : public ExprNode {
+ Expr a, b;
+
+ EXPORT static Expr make(Expr a, Expr b);
+};
+
+/** Logical not - true if the expression false */
+struct Not : public ExprNode {
+ Expr a;
+
+ EXPORT static Expr make(Expr a);
+};
+
+/** A ternary operator. Evalutes 'true_value' and 'false_value',
+ * then selects between them based on 'condition'. Equivalent to
+ * the ternary operator in C. */
+struct Select : public ExprNode}