diff -Nru libdogleg-0.14/Changes libdogleg-0.15.4/Changes --- libdogleg-0.14/Changes 2018-02-12 18:55:59.000000000 +0000 +++ libdogleg-0.15.4/Changes 2020-12-11 23:37:34.000000000 +0000 @@ -1,3 +1,28 @@ +libdogleg (0.15.4) unstable; urgency=medium + + * verbose output has a mode to output vnlog + * defragmented malloc() calls + * dogleg_operatingPoint_t stores the step vector we took to get here + * Added dogleg_optimize2() and dogleg_optimize_dense2() entry points + that use local parameters + + -- Dima Kogan Fri, 11 Dec 2020 15:37:26 -0800 + +libdogleg (0.15.1) + + * Various updates to the experimental outlier-rejection logic. These + break the previous API/ABI, but those were experimental and unreleased + anyway + + -- Dima Kogan Tue, 02 Oct 2018 09:19:03 -0700 + +ligdogleg (0.15) + + * added experimental outlier-rejection logic. + This is not yet documented, and the API not yet fixed. Use at your own risk + + -- Dima Kogan Wed, 08 Aug 2018 16:04:38 -0700 + libdogleg (0.14) * standardized CHOLMOD error messages diff -Nru libdogleg-0.14/debian/changelog libdogleg-0.15.4/debian/changelog --- libdogleg-0.14/debian/changelog 2018-02-13 07:16:29.000000000 +0000 +++ libdogleg-0.15.4/debian/changelog 2020-12-11 23:41:39.000000000 +0000 @@ -1,3 +1,21 @@ +libdogleg (0.15.4-1) unstable; urgency=medium + + * New upstream release + - added experimental outlier-rejection logic. Not stable and not fully + documented + - verbose output has a mode to output vnlog + - Added dogleg_optimize2() and dogleg_optimize_dense2() entry points + that use local parameters + + -- Dima Kogan Fri, 11 Dec 2020 15:41:39 -0800 + +libdogleg (0.15-1) unstable; urgency=medium + + * updated Vcs tags to salsa + * New upstream version 0.15 + + -- Dima Kogan Wed, 08 Aug 2018 16:11:27 -0700 + libdogleg (0.14-1) unstable; urgency=medium * Minor upstream update diff -Nru libdogleg-0.14/debian/control libdogleg-0.15.4/debian/control --- libdogleg-0.14/debian/control 2018-02-13 07:14:57.000000000 +0000 +++ libdogleg-0.15.4/debian/control 2019-08-12 23:19:33.000000000 +0000 @@ -6,8 +6,8 @@ Uploaders: Dima Kogan Standards-Version: 3.9.6 Homepage: https://github.com/dkogan/libdogleg -Vcs-Git: git://anonscm.debian.org/debian-science/packages/libdogleg.git -Vcs-Browser: http://anonscm.debian.org/gitweb/?p=debian-science/packages/libdogleg.git +Vcs-Git: https://salsa.debian.org/science-team/libdogleg.git +Vcs-Browser: https://salsa.debian.org/science-team/libdogleg Package: libdogleg2 Section: libs diff -Nru libdogleg-0.14/debian/libdogleg2.symbols libdogleg-0.15.4/debian/libdogleg2.symbols --- libdogleg-0.14/debian/libdogleg2.symbols 2018-02-13 07:14:57.000000000 +0000 +++ libdogleg-0.15.4/debian/libdogleg2.symbols 2019-08-12 23:23:41.000000000 +0000 @@ -1,8 +1,15 @@ libdogleg.so.2 libdogleg2 #MINVER# dogleg_computeJtJfactorization@Base 0.12 dogleg_freeContext@Base 0.08 + dogleg_getDefaultParameters@Base 0.15.3 + dogleg_getOutliernessFactors@Base 0.15 + dogleg_getOutliernessTrace_newFeature_sparse@Base 0.15.1 + dogleg_markOutliers@Base 0.15 + dogleg_optimize2@Base 0.15.3 dogleg_optimize@Base 0.08 + dogleg_optimize_dense2@Base 0.15.3 dogleg_optimize_dense@Base 0.10 + dogleg_reportOutliers@Base 0.15 dogleg_setDebug@Base 0.08 dogleg_setInitialTrustregion@Base 0.08 dogleg_setMaxIterations@Base 0.08 diff -Nru libdogleg-0.14/dogleg.c libdogleg-0.15.4/dogleg.c --- libdogleg-0.14/dogleg.c 2018-02-12 18:55:59.000000000 +0000 +++ libdogleg-0.15.4/dogleg.c 2020-12-11 23:37:34.000000000 +0000 @@ -1,12 +1,13 @@ // -*- mode: C; c-basic-offset: 2 -*- // Copyright 2011 Oblong Industries -// 2017 Dima Kogan +// 2017-2018 Dima Kogan // License: GNU LGPL . #include #include #include #include +#include #ifdef __APPLE__ #include #else @@ -19,11 +20,16 @@ #endif +// Any non-vnlog bit mask +#define DOGLEG_DEBUG_OTHER_THAN_VNLOG (~DOGLEG_DEBUG_VNLOG) #define SAY_NONEWLINE(fmt, ...) fprintf(stderr, "libdogleg at %s:%d: " fmt, __FILE__, __LINE__, ## __VA_ARGS__) #define SAY(fmt, ...) do { SAY_NONEWLINE(fmt, ## __VA_ARGS__); fprintf(stderr, "\n"); } while(0) +// This REQUIRES that a "dogleg_solverContext_t* ctx" be available +#define SAY_IF_VERBOSE(fmt,...) do { if( ctx->parameters->dogleg_debug & DOGLEG_DEBUG_OTHER_THAN_VNLOG ) SAY(fmt, ##__VA_ARGS__); } while(0) + // I do this myself because I want this to be active in all build modes, not just !NDEBUG -#define ASSERT(x) do { if(!(x)) { SAY("ASSERTION FAILED: " #x "is not true"); exit(1); } } while(0) +#define ASSERT(x) do { if(!(x)) { SAY("ASSERTION FAILED: " #x " is not true"); exit(1); } } while(0) // used to consolidate the casts #define P(A, index) ((unsigned int*)((A)->p))[index] @@ -32,30 +38,105 @@ ////////////////////////////////////////////////////////////////////////////////////////// -// parameter stuff +// vnlog debugging stuff +// +// This is used if the user calls dogleg_setDebug(DOGLEG_DEBUG_VNLOG | stuff) ////////////////////////////////////////////////////////////////////////////////////////// -// These are the optimizer parameters. They have semi-arbitrary defaults. The -// user should adjust them through the API -static int DOGLEG_DEBUG = 0; -static int MAX_ITERATIONS = 100; - -// it is cheap to reject a too-large trust region, so I start with something -// "large". The solver will quickly move down to something reasonable. Only the -// user really knows if this is "large" or not, so they should change this via -// the API -static double TRUSTREGION0 = 1.0e3; - -// These are probably OK to leave alone. Tweaking them can maybe result in -// slightly faster convergence -static double TRUSTREGION_DECREASE_FACTOR = 0.1; -static double TRUSTREGION_INCREASE_FACTOR = 2; -static double TRUSTREGION_INCREASE_THRESHOLD = 0.75; -static double TRUSTREGION_DECREASE_THRESHOLD = 0.25; - -// The termination thresholds. Documented in the header -static double JT_X_THRESHOLD = 1e-8; -static double UPDATE_THRESHOLD = 1e-8; -static double TRUSTREGION_THRESHOLD = 1e-8; +#define VNLOG_DEBUG_STEP_TYPE_LIST(_) \ + _(STEPTYPE_CAUCHY, "cauchy") \ + _(STEPTYPE_GAUSSNEWTON, "gaussnewton") \ + _(STEPTYPE_INTERPOLATED, "interpolated") \ + _(STEPTYPE_FAILED, "failed") +#define VNLOG_DEBUG_STEP_TYPE_NAME_COMMA(name,shortname) name, +typedef enum { VNLOG_DEBUG_STEP_TYPE_LIST(VNLOG_DEBUG_STEP_TYPE_NAME_COMMA) + STEPTYPE_UNINITIALIZED } vnlog_debug_step_type_t; +#define VNLOG_DEBUG_FIELDS(_) \ + _(double, norm2x_before, INFINITY) \ + _(double, norm2x_after, INFINITY) \ + _(double, step_len_cauchy, INFINITY) \ + _(double, step_len_gauss_newton, INFINITY) \ + _(double, step_len_interpolated, INFINITY) \ + _(double, k_cauchy_to_gn, INFINITY) \ + _(double, step_len, INFINITY) \ + _(vnlog_debug_step_type_t, step_type, STEPTYPE_UNINITIALIZED) \ + _(double, step_direction_change_deg, INFINITY) \ + _(double, expected_improvement, INFINITY) \ + _(double, observed_improvement, INFINITY) \ + _(double, rho, INFINITY) \ + _(double, trustregion_before, INFINITY) \ + _(double, trustregion_after, INFINITY) +static struct vnlog_debug_data_t +{ +#define VNLOG_DEBUG_DECLARE_FIELD(type, name, initialvalue) type name; + VNLOG_DEBUG_FIELDS(VNLOG_DEBUG_DECLARE_FIELD) +} vnlog_debug_data; +static void vnlog_debug_reset(void) +{ +#define VNLOG_DEBUG_RESET_FIELD(type, name, initialvalue) vnlog_debug_data.name = initialvalue; + VNLOG_DEBUG_FIELDS(VNLOG_DEBUG_RESET_FIELD); +} +static void vnlog_debug_emit_legend(void) +{ +#define VNLOG_DEBUG_SPACE_FIELD_NAME(type, name, initialvalue) " " #name + + vnlog_debug_reset(); + printf("# iteration step_accepted" VNLOG_DEBUG_FIELDS(VNLOG_DEBUG_SPACE_FIELD_NAME) "\n"); +} +__attribute__((unused)) +static void vnlog_emit_double(double x) +{ + if(x == INFINITY) printf("- "); + else printf("%g ", x); +} +__attribute__((unused)) +static void vnlog_emit_int(int x) +{ + if(x == -1) printf("- "); + else printf("%d ", x); +} +__attribute__((unused)) +static void vnlog_emit_vnlog_debug_step_type_t(vnlog_debug_step_type_t x) +{ +#define VNLOG_DEBUG_STEP_TYPE_SWITCH_EMIT(name,shortname) case name: printf(shortname " "); break; + switch(x) + { + VNLOG_DEBUG_STEP_TYPE_LIST(VNLOG_DEBUG_STEP_TYPE_SWITCH_EMIT) + default: printf("- "); + } +} +static void vnlog_debug_emit_record(int iteration, int step_accepted) +{ +#define VNLOG_DEBUG_EMIT_FIELD(type, name, initialvalue) vnlog_emit_ ## type(vnlog_debug_data.name); + + printf("%d %d ", iteration, step_accepted); + VNLOG_DEBUG_FIELDS(VNLOG_DEBUG_EMIT_FIELD); + printf("\n"); + fflush(stdout); + vnlog_debug_reset(); +} + +// Default parameters. Used only by the original API, which uses a global set of +// parameters +#define PARAMETERS_DEFAULT \ + { \ + .max_iterations = 100, \ + .dogleg_debug = 0, \ + .trustregion0 = 1.0e3, \ + .trustregion_decrease_factor = 0.1, \ + .trustregion_decrease_threshold = 0.25, \ + .trustregion_increase_factor = 2, \ + .trustregion_increase_threshold = 0.75, \ + .Jt_x_threshold = 1e-8, \ + .update_threshold = 1e-8, \ + .trustregion_threshold = 1e-8 \ + } + +static const dogleg_parameters2_t parameters_default = PARAMETERS_DEFAULT; +static dogleg_parameters2_t parameters_global = PARAMETERS_DEFAULT; +void dogleg_getDefaultParameters(dogleg_parameters2_t* parameters) +{ + *parameters = parameters_default; +} // if I ever see a singular JtJ, I factor JtJ + LAMBDA*I from that point on #define LAMBDA_INITIAL 1e-10 @@ -63,31 +144,31 @@ // these parameters likely should be messed with void dogleg_setDebug(int debug) { - DOGLEG_DEBUG = debug; + parameters_global.dogleg_debug = debug; } void dogleg_setInitialTrustregion(double t) { - TRUSTREGION0 = t; + parameters_global.trustregion0 = t; } void dogleg_setThresholds(double Jt_x, double update, double trustregion) { - if(Jt_x > 0.0) JT_X_THRESHOLD = Jt_x; - if(update > 0.0) UPDATE_THRESHOLD = update; - if(trustregion > 0.0) TRUSTREGION_THRESHOLD = trustregion; + if(Jt_x > 0.0) parameters_global.Jt_x_threshold = Jt_x; + if(update > 0.0) parameters_global.update_threshold = update; + if(trustregion > 0.0) parameters_global.trustregion_threshold = trustregion; } // these parameters likely should not be messed with. void dogleg_setMaxIterations(int n) { - MAX_ITERATIONS = n; + parameters_global.max_iterations = n; } void dogleg_setTrustregionUpdateParameters(double downFactor, double downThreshold, double upFactor, double upThreshold) { - TRUSTREGION_DECREASE_FACTOR = downFactor; - TRUSTREGION_DECREASE_THRESHOLD = downThreshold; - TRUSTREGION_INCREASE_FACTOR = upFactor; - TRUSTREGION_INCREASE_THRESHOLD = upThreshold; + parameters_global.trustregion_decrease_factor = downFactor; + parameters_global.trustregion_decrease_threshold = downThreshold; + parameters_global.trustregion_increase_factor = upFactor; + parameters_global.trustregion_increase_threshold = upThreshold; } @@ -134,12 +215,20 @@ for(int i=0; iupdateCauchy_valid) - return; - point->updateCauchy_valid = 1; - - // I look at a step in the steepest direction that minimizes my - // quadratic error function (Cauchy point). If this is past my trust region, - // I move as far as the trust region allows along the steepest descent - // direction. My error function is F=norm2(f(p)). dF/dP = 2*ft*J. - // This is proportional to Jt_x, which is thus the steepest ascent direction. - // - // Thus along this direction we have F(k) = norm2(f(p + k Jt_x)). The Cauchy - // point is where F(k) is at a minimum: - // dF_dk = 2 f(p + k Jt_x)t J Jt_x ~ (x + k J Jt_x)t J Jt_x = - // = xt J Jt x + k xt J Jt J Jt x = norm2(Jt x) + k norm2(J Jt x) - // dF_dk = 0 -> k= -norm2(Jt x) / norm2(J Jt x) - // Summary: - // the steepest direction is parallel to Jt*x. The Cauchy point is at - // k*Jt*x where k = -norm2(Jt*x)/norm2(J*Jt*x) - double norm2_Jt_x = norm2(point->Jt_x, ctx->Nstate); - double norm2_J_Jt_x = ctx->is_sparse ? - norm2_mul_spmatrix_t_densevector(point->Jt, point->Jt_x) : - norm2_mul_matrix_vector (point->J_dense, point->Jt_x, ctx->Nmeasurements, ctx->Nstate); - double k = -norm2_Jt_x / norm2_J_Jt_x; - - point->updateCauchy_lensq = k*k * norm2_Jt_x; + if(!point->updateCauchy_valid) + { + point->updateCauchy_valid = 1; - vec_copy_scaled(point->updateCauchy, - point->Jt_x, k, ctx->Nstate); + // I look at a step in the steepest direction that minimizes my + // quadratic error function (Cauchy point). If this is past my trust region, + // I move as far as the trust region allows along the steepest descent + // direction. My error function is F=norm2(f(p)). dF/dP = 2*ft*J. + // This is proportional to Jt_x, which is thus the steepest ascent direction. + // + // Thus along this direction we have F(k) = norm2(f(p + k Jt_x)). The Cauchy + // point is where F(k) is at a minimum: + // dF_dk = 2 f(p + k Jt_x)t J Jt_x ~ (x + k J Jt_x)t J Jt_x = + // = xt J Jt x + k xt J Jt J Jt x = norm2(Jt x) + k norm2(J Jt x) + // dF_dk = 0 -> k= -norm2(Jt x) / norm2(J Jt x) + // Summary: + // the steepest direction is parallel to Jt*x. The Cauchy point is at + // k*Jt*x where k = -norm2(Jt*x)/norm2(J*Jt*x) + double norm2_Jt_x = norm2(point->Jt_x, ctx->Nstate); + double norm2_J_Jt_x = ctx->is_sparse ? + norm2_mul_spmatrix_t_densevector(point->Jt, point->Jt_x) : + norm2_mul_matrix_vector (point->J_dense, point->Jt_x, ctx->Nmeasurements, ctx->Nstate); + double k = -norm2_Jt_x / norm2_J_Jt_x; + + point->updateCauchy_lensq = k*k * norm2_Jt_x; + + vec_copy_scaled(point->updateCauchy, + point->Jt_x, k, ctx->Nstate); + SAY_IF_VERBOSE( "cauchy step size %.6g", sqrt(point->updateCauchy_lensq)); + } - if( DOGLEG_DEBUG ) - SAY( "cauchy step size %.6g", sqrt(point->updateCauchy_lensq)); + if( ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG ) + vnlog_debug_data.step_len_cauchy = sqrt(point->updateCauchy_lensq); } // LAPACK prototypes for a packed cholesky factorization and a linear solve @@ -438,9 +529,8 @@ else ctx->lambda *= 10.0; ASSERT( isfinite(ctx->lambda) ); - if( DOGLEG_DEBUG ) - SAY( "singular JtJ. Have rank/full rank: %zd/%d. Adding %g I from now on", - ctx->factorization->minor, ctx->Nstate, ctx->lambda); + SAY_IF_VERBOSE( "singular JtJ. Have rank/full rank: %zd/%d. Adding %g I from now on", + ctx->factorization->minor, ctx->Nstate, ctx->lambda); } } else @@ -503,8 +593,7 @@ if( ctx->lambda == 0.0) ctx->lambda = LAMBDA_INITIAL; else ctx->lambda *= 10.0; - if( DOGLEG_DEBUG ) - SAY( "singular JtJ. Adding %g I from now on", ctx->lambda); + SAY_IF_VERBOSE( "singular JtJ. Adding %g I from now on", ctx->lambda); } } } @@ -512,58 +601,60 @@ static void computeGaussNewtonUpdate(dogleg_operatingPoint_t* point, dogleg_solverContext_t* ctx) { // I already have this data, so don't need to recompute - if(point->updateGN_valid) - return; + if(!point->updateGN_valid) + { + dogleg_computeJtJfactorization(point, ctx); - dogleg_computeJtJfactorization(point, ctx); + // try to factorize the matrix directly. If it's singular, add a small + // constant to the diagonal. This constant gets larger if we keep being + // singular + if( ctx->is_sparse ) + { + // solve JtJ*updateGN = Jt*x. Gauss-Newton step is then -updateGN + cholmod_dense Jt_x_dense = {.nrow = ctx->Nstate, + .ncol = 1, + .nzmax = ctx->Nstate, + .d = ctx->Nstate, + .x = point->Jt_x, + .xtype = CHOLMOD_REAL, + .dtype = CHOLMOD_DOUBLE}; + + if(point->updateGN_cholmoddense != NULL) + cholmod_free_dense(&point->updateGN_cholmoddense, &ctx->common); + + point->updateGN_cholmoddense = cholmod_solve(CHOLMOD_A, + ctx->factorization, + &Jt_x_dense, + &ctx->common); + vec_negate(point->updateGN_cholmoddense->x, + ctx->Nstate); // should be more efficient than this later - // try to factorize the matrix directly. If it's singular, add a small - // constant to the diagonal. This constant gets larger if we keep being - // singular - if( ctx->is_sparse ) - { - // solve JtJ*updateGN = Jt*x. Gauss-Newton step is then -updateGN - cholmod_dense Jt_x_dense = {.nrow = ctx->Nstate, - .ncol = 1, - .nzmax = ctx->Nstate, - .d = ctx->Nstate, - .x = point->Jt_x, - .xtype = CHOLMOD_REAL, - .dtype = CHOLMOD_DOUBLE}; - - if(point->updateGN_cholmoddense != NULL) - cholmod_free_dense(&point->updateGN_cholmoddense, &ctx->common); - - point->updateGN_cholmoddense = cholmod_solve(CHOLMOD_A, - ctx->factorization, - &Jt_x_dense, - &ctx->common); - vec_negate(point->updateGN_cholmoddense->x, - ctx->Nstate); // should be more efficient than this later + point->updateGN_lensq = norm2(point->updateGN_cholmoddense->x, ctx->Nstate); + } + else + { + memcpy( point->updateGN_dense, point->Jt_x, ctx->Nstate * sizeof(point->updateGN_dense[0])); + int info; + dpptrs_(&(char){'L'}, &(int){ctx->Nstate}, &(int){1}, + ctx->factorization_dense, + point->updateGN_dense, &(int){ctx->Nstate}, &info, 1); + vec_negate(point->updateGN_dense, + ctx->Nstate); // should be more efficient than this later - point->updateGN_lensq = norm2(point->updateGN_cholmoddense->x, ctx->Nstate); - } - else - { - memcpy( point->updateGN_dense, point->Jt_x, ctx->Nstate * sizeof(point->updateGN_dense[0])); - int info; - dpptrs_(&(char){'L'}, &(int){ctx->Nstate}, &(int){1}, - ctx->factorization_dense, - point->updateGN_dense, &(int){ctx->Nstate}, &info, 1); - vec_negate(point->updateGN_dense, - ctx->Nstate); // should be more efficient than this later + point->updateGN_lensq = norm2(point->updateGN_dense, ctx->Nstate); + } - point->updateGN_lensq = norm2(point->updateGN_dense, ctx->Nstate); + SAY_IF_VERBOSE( "gn step size %.6g", sqrt(point->updateGN_lensq)); + point->updateGN_valid = 1; } + if( ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG ) + vnlog_debug_data.step_len_gauss_newton = sqrt(point->updateGN_lensq); - if( DOGLEG_DEBUG ) - SAY( "gn step size %.6g", sqrt(point->updateGN_lensq)); - - point->updateGN_valid = 1; } static void computeInterpolatedUpdate(double* update_dogleg, + double* update_dogleg_lensq, dogleg_operatingPoint_t* point, double trustregion, const dogleg_solverContext_t* ctx) @@ -606,13 +697,20 @@ } double k = (neg_c + sqrt(discriminant))/l2; + *update_dogleg_lensq = 0.0; for(int i=0; iNstate; i++) + { update_dogleg[i] = a[i] + k*(b[i] - a[i]); + *update_dogleg_lensq += update_dogleg[i]*update_dogleg[i]; + } - if( DOGLEG_DEBUG ) + SAY_IF_VERBOSE( "k_cauchy_to_gn %.6g, norm %.6g", + k, + sqrt(*update_dogleg_lensq)); + if(ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG) { - double updateNorm = norm2(update_dogleg, ctx->Nstate); - SAY( "k_cauchy_to_gn %.6g, norm %.6g", k, sqrt(updateNorm)); + vnlog_debug_data.step_len_interpolated = sqrt(*update_dogleg_lensq); + vnlog_debug_data.k_cauchy_to_gn = k; } } @@ -649,10 +747,9 @@ // If the largest absolute gradient element is smaller than the threshold, // we can stop iterating. This is equivalent to the inf-norm for(int i=0; iNstate; i++) - if(fabs(point->Jt_x[i]) > JT_X_THRESHOLD) + if(fabs(point->Jt_x[i]) > ctx->parameters->Jt_x_threshold) return 0; - if( DOGLEG_DEBUG ) - SAY( "Jt_x all below the threshold. Done iterating!"); + SAY_IF_VERBOSE( "Jt_x all below the threshold. Done iterating!"); return 1; } @@ -676,30 +773,38 @@ // takes a step from the given operating point, using the given trust region // radius. Returns the expected improvement, based on the step taken and the // linearized x(p). If we can stop iterating, returns a negative number -static double takeStepFrom(dogleg_operatingPoint_t* pointFrom, double* newp, - double trustregion, dogleg_solverContext_t* ctx) +static double takeStepFrom(dogleg_operatingPoint_t* pointFrom, + double* p_new, + double* step, + double* step_len_sq, + double trustregion, + dogleg_solverContext_t* ctx) { - if( DOGLEG_DEBUG ) - SAY( "taking step with trustregion %.6g", trustregion); - - double update_array[ctx->Nstate]; - double* update; - + SAY_IF_VERBOSE( "taking step with trustregion %.6g", trustregion); + if(ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG) + { + vnlog_debug_data.trustregion_before = trustregion; + vnlog_debug_data.norm2x_before = pointFrom->norm2_x; + } computeCauchyUpdate(pointFrom, ctx); if(pointFrom->updateCauchy_lensq >= trustregion*trustregion) { - if( DOGLEG_DEBUG ) - SAY( "taking cauchy step"); + SAY_IF_VERBOSE( "taking cauchy step"); + if(ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG) + { + vnlog_debug_data.step_type = STEPTYPE_CAUCHY; + vnlog_debug_data.step_len = vnlog_debug_data.step_len_cauchy; + } + *step_len_sq = pointFrom->updateCauchy_lensq; // cauchy step goes beyond my trust region, so I do a gradient descent // to the edge of my trust region and call it good - vec_copy_scaled(update_array, + vec_copy_scaled(step, pointFrom->updateCauchy, trustregion / sqrt(pointFrom->updateCauchy_lensq), ctx->Nstate); - update = update_array; pointFrom->didStepToEdgeOfTrustRegion = 1; } else @@ -713,41 +818,69 @@ computeGaussNewtonUpdate(pointFrom, ctx); if(pointFrom->updateGN_lensq <= trustregion*trustregion) { - if( DOGLEG_DEBUG ) - SAY( "taking GN step"); + SAY_IF_VERBOSE( "taking GN step"); + if(ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG) + { + vnlog_debug_data.step_type = STEPTYPE_GAUSSNEWTON; + vnlog_debug_data.step_len = vnlog_debug_data.step_len_gauss_newton; + } + *step_len_sq = pointFrom->updateGN_lensq; // full Gauss-Newton step lies within my trust region. Take the full step - update = ctx->is_sparse ? pointFrom->updateGN_cholmoddense->x : pointFrom->updateGN_dense; + memcpy( step, + ctx->is_sparse ? pointFrom->updateGN_cholmoddense->x : pointFrom->updateGN_dense, + ctx->Nstate * sizeof(step[0]) ); pointFrom->didStepToEdgeOfTrustRegion = 0; } else { - if( DOGLEG_DEBUG ) - SAY( "taking interpolated step"); + SAY_IF_VERBOSE( "taking interpolated step"); // full Gauss-Newton step lies outside my trust region, so I interpolate // between the Cauchy-point step and the Gauss-Newton step to find a step // that takes me to the edge of my trust region. - computeInterpolatedUpdate(update_array, pointFrom, trustregion, ctx); - update = update_array; + computeInterpolatedUpdate(step, + step_len_sq, + pointFrom, trustregion, ctx); pointFrom->didStepToEdgeOfTrustRegion = 1; + if(ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG) + { + vnlog_debug_data.step_type = STEPTYPE_INTERPOLATED; + vnlog_debug_data.step_len = vnlog_debug_data.step_len_interpolated; + } } } - - // take the step - vec_add(newp, pointFrom->p, update, ctx->Nstate); - double expectedImprovement = computeExpectedImprovement(update, pointFrom, ctx); + vec_add(p_new, pointFrom->p, step, ctx->Nstate); + double expectedImprovement = computeExpectedImprovement(step, pointFrom, ctx); + if(ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG) + { + vnlog_debug_data.expected_improvement = expectedImprovement; + + if(pointFrom->step_to_here_len_sq != INFINITY) + { + double cos_direction_change = + inner(step, pointFrom->step_to_here, ctx->Nstate) / + sqrt(*step_len_sq * pointFrom->step_to_here_len_sq); + + // check the numerical overflow cases + if(cos_direction_change >= 1.0) + vnlog_debug_data.step_direction_change_deg = 0.0; + else if(cos_direction_change <= -1.0) + vnlog_debug_data.step_direction_change_deg = 180.0; + else + vnlog_debug_data.step_direction_change_deg = 180.0/M_PI*acos(cos_direction_change); + } + } // are we done? For each state variable I look at the update step. If all the elements fall below // a threshold, I call myself done for(int i=0; iNstate; i++) - if( fabs(update[i]) > UPDATE_THRESHOLD ) + if( fabs(step[i]) > ctx->parameters->update_threshold ) return expectedImprovement; - if( DOGLEG_DEBUG ) - SAY( "update small enough. Done iterating!"); + SAY_IF_VERBOSE( "update small enough. Done iterating!"); return -1.0; } @@ -759,23 +892,24 @@ static int evaluateStep_adjustTrustRegion(const dogleg_operatingPoint_t* before, const dogleg_operatingPoint_t* after, double* trustregion, - double expectedImprovement) + double expectedImprovement, + dogleg_solverContext_t* ctx) { double observedImprovement = before->norm2_x - after->norm2_x; - double rho = observedImprovement / expectedImprovement; - if( DOGLEG_DEBUG ) + SAY_IF_VERBOSE( "observed/expected improvement: %.6g/%.6g. rho = %.6g", + observedImprovement, expectedImprovement, rho); + if(ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG) { - SAY( "observed/expected improvement: %.6g/%.6g. rho = %.6g", - observedImprovement, expectedImprovement, rho); + vnlog_debug_data.observed_improvement = observedImprovement; + vnlog_debug_data.rho = rho; } // adjust the trust region - if( rho < TRUSTREGION_DECREASE_THRESHOLD ) + if( rho < ctx->parameters->trustregion_decrease_threshold ) { - if( DOGLEG_DEBUG ) - SAY( "rho too small. decreasing trust region"); + SAY_IF_VERBOSE( "rho too small. decreasing trust region"); // Our model doesn't fit well. We should reduce the trust region size. If // the trust region size was affecting the attempted step, do this by a @@ -784,60 +918,68 @@ if( !before->didStepToEdgeOfTrustRegion ) *trustregion = sqrt(before->updateGN_lensq); - *trustregion *= TRUSTREGION_DECREASE_FACTOR; + *trustregion *= ctx->parameters->trustregion_decrease_factor; } - else if (rho > TRUSTREGION_INCREASE_THRESHOLD && before->didStepToEdgeOfTrustRegion) + else if (rho > ctx->parameters->trustregion_increase_threshold && before->didStepToEdgeOfTrustRegion) { - if( DOGLEG_DEBUG ) - SAY( "rho large enough. increasing trust region"); + SAY_IF_VERBOSE( "rho large enough. increasing trust region"); - *trustregion *= TRUSTREGION_INCREASE_FACTOR; + *trustregion *= ctx->parameters->trustregion_increase_factor; } + if(ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG) + vnlog_debug_data.trustregion_after = *trustregion; return rho > 0.0; } static int runOptimizer(dogleg_solverContext_t* ctx) { - double trustregion = TRUSTREGION0; + double trustregion = ctx->parameters->trustregion0; int stepCount = 0; if( computeCallbackOperatingPoint(ctx->beforeStep, ctx) ) return stepCount; - if( DOGLEG_DEBUG ) - SAY( "Initial operating point has norm2_x %.6g", ctx->beforeStep->norm2_x); + SAY_IF_VERBOSE( "Initial operating point has norm2_x %.6g", ctx->beforeStep->norm2_x); - while( stepCountbeforeStep->step_to_here_len_sq = INFINITY; + + while( stepCountparameters->max_iterations ) { - if( DOGLEG_DEBUG ) - { - SAY( "================= step %d", stepCount ); - } + SAY_IF_VERBOSE( "================= step %d", stepCount ); while(1) { - if( DOGLEG_DEBUG ) - SAY("--------"); + SAY_IF_VERBOSE("--------"); double expectedImprovement = - takeStepFrom(ctx->beforeStep, ctx->afterStep->p, trustregion, ctx); + takeStepFrom(ctx->beforeStep, + ctx->afterStep->p, + ctx->afterStep->step_to_here, + &ctx->afterStep->step_to_here_len_sq, + trustregion, ctx); // negative expectedImprovement is used to indicate that we're done if(expectedImprovement < 0.0) + { + if( ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG ) + vnlog_debug_emit_record(stepCount, 1); return stepCount; + } int afterStepZeroGradient = computeCallbackOperatingPoint(ctx->afterStep, ctx); - if( DOGLEG_DEBUG ) - SAY( "Evaluated operating point with norm2_x %.6g", ctx->afterStep->norm2_x); + SAY_IF_VERBOSE( "Evaluated operating point with norm2_x %.6g", ctx->afterStep->norm2_x); + if(ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG) + vnlog_debug_data.norm2x_after = ctx->afterStep->norm2_x; if( evaluateStep_adjustTrustRegion(ctx->beforeStep, ctx->afterStep, &trustregion, - expectedImprovement) ) + expectedImprovement, ctx) ) { - if( DOGLEG_DEBUG ) - SAY( "accepted step"); + SAY_IF_VERBOSE( "accepted step"); + if( ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG ) + vnlog_debug_emit_record(stepCount, 1); stepCount++; // I accept this step, so the after-step operating point is the before-step operating point @@ -850,8 +992,7 @@ if( afterStepZeroGradient ) { - if( DOGLEG_DEBUG ) - SAY( "Gradient low enough and we just improved. Done iterating!" ); + SAY_IF_VERBOSE( "Gradient low enough and we just improved. Done iterating!" ); return stepCount; } @@ -859,16 +1000,15 @@ break; } - if( DOGLEG_DEBUG ) - SAY( "rejected step"); + SAY_IF_VERBOSE( "rejected step"); + if( ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG ) + vnlog_debug_emit_record(stepCount, 0); // This step was rejected. check if the new trust region size is small // enough to give up - if(trustregion < TRUSTREGION_THRESHOLD) + if(trustregion < ctx->parameters->trustregion_threshold) { - if( DOGLEG_DEBUG ) - SAY( "trust region small enough. Giving up. Done iterating!"); - + SAY_IF_VERBOSE( "trust region small enough. Giving up. Done iterating!"); return stepCount; } @@ -876,14 +1016,14 @@ } } - if( DOGLEG_DEBUG && stepCount == MAX_ITERATIONS) - SAY( "Exceeded max number of iterations"); + if(stepCount == ctx->parameters->max_iterations) + SAY_IF_VERBOSE( "Exceeded max number of iterations"); return stepCount; } static -dogleg_operatingPoint_t* allocOperatingPoint(int Nstate, int numMeasurements, +dogleg_operatingPoint_t* allocOperatingPoint(int Nstate, int numMeasurements, unsigned int NJnnz, cholmod_common* common) { @@ -892,11 +1032,30 @@ dogleg_operatingPoint_t* point = malloc(sizeof(dogleg_operatingPoint_t)); ASSERT(point != NULL); - point->p = malloc(Nstate * sizeof(point->p[0])); - ASSERT(point->p != NULL); - point->x = malloc(numMeasurements * sizeof(point->x[0])); - ASSERT(point->x != NULL); + int Npool = + Nstate + + numMeasurements + + Nstate + + Nstate + + Nstate; + if(!is_sparse) + Npool += numMeasurements*Nstate + Nstate; + + double* pool = malloc( Npool * sizeof(double) ); + ASSERT( pool != NULL ); + + point->p = &pool[0]; + point->x = &pool[Nstate]; + point->Jt_x = &pool[Nstate + + numMeasurements]; + point->updateCauchy = &pool[Nstate + + numMeasurements + + Nstate]; + point->step_to_here = &pool[Nstate + + numMeasurements + + Nstate + + Nstate]; if( is_sparse ) { @@ -911,20 +1070,19 @@ } else { - point->J_dense = malloc( numMeasurements * Nstate * sizeof(point->J_dense[0]) ); - ASSERT(point->J_dense != NULL); - - point->updateGN_dense = malloc( Nstate * sizeof(point->updateGN_dense[0]) ); - ASSERT(point->updateGN_dense != NULL); + point->J_dense = &pool[Nstate + + numMeasurements + + Nstate + + Nstate + + Nstate]; + point->updateGN_dense = &pool[Nstate + + numMeasurements + + Nstate + + Nstate + + Nstate + + numMeasurements * Nstate]; } - // the 1-column vector Jt * x - point->Jt_x = malloc(Nstate * sizeof(point->Jt_x[0])); - ASSERT(point->Jt_x != NULL); - - // the cached update vectors - point->updateCauchy = malloc(Nstate * sizeof(point->updateCauchy[0])); - // vectors don't have any valid data yet point->updateCauchy_valid = 0; point->updateGN_valid = 0; @@ -934,8 +1092,9 @@ static void freeOperatingPoint(dogleg_operatingPoint_t** point, cholmod_common* common) { + // MUST match allocOperatingPoint(). It does a single malloc() and stores the + // pointer into p free((*point)->p); - free((*point)->x); int is_sparse = common != NULL; @@ -946,14 +1105,6 @@ if((*point)->updateGN_cholmoddense != NULL) cholmod_free_dense(&(*point)->updateGN_cholmoddense, common); } - else - { - free( (*point)->J_dense ); - free( (*point)->updateGN_dense ); - } - - free((*point)->Jt_x); - free((*point)->updateCauchy); free(*point); *point = NULL; @@ -967,7 +1118,7 @@ va_start(ap, s); int ret = vfprintf(stderr, s, ap); va_end(ap); - + fprintf(stderr, "\n"); return ret; } @@ -1009,6 +1160,9 @@ unsigned int Nmeas, unsigned int NJnnz, dogleg_callback_t* f, void* cookie, + + // NULL to use the globals + const dogleg_parameters2_t* parameters, dogleg_solverContext_t** returnContext) { int is_sparse = NJnnz > 0; @@ -1021,7 +1175,10 @@ ctx->lambda = 0.0; ctx->Nstate = Nstate; ctx->Nmeasurements = Nmeas; + ctx->parameters = parameters ? parameters : ¶meters_global; + if( ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG ) + vnlog_debug_emit_legend(); if( returnContext != NULL ) *returnContext = ctx; @@ -1052,20 +1209,20 @@ // iteration memcpy(p, ctx->beforeStep->p, Nstate * sizeof(double)); + SAY_IF_VERBOSE( "success! took %d iterations", numsteps); + if( returnContext == NULL ) dogleg_freeContext(&ctx); - if( DOGLEG_DEBUG ) - SAY( "success! took %d iterations", numsteps); - return norm2_x; } -double dogleg_optimize(double* p, unsigned int Nstate, - unsigned int Nmeas, unsigned int NJnnz, - dogleg_callback_t* f, - void* cookie, - dogleg_solverContext_t** returnContext) +double dogleg_optimize2(double* p, unsigned int Nstate, + unsigned int Nmeas, unsigned int NJnnz, + dogleg_callback_t* f, + void* cookie, + const dogleg_parameters2_t* parameters, + dogleg_solverContext_t** returnContext) { if( NJnnz == 0 ) { @@ -1074,15 +1231,1289 @@ } return _dogleg_optimize(p, Nstate, Nmeas, NJnnz, f, - cookie, returnContext); + cookie, + parameters, + returnContext); +} + +double dogleg_optimize(double* p, unsigned int Nstate, + unsigned int Nmeas, unsigned int NJnnz, + dogleg_callback_t* f, + void* cookie, + dogleg_solverContext_t** returnContext) +{ + return dogleg_optimize2(p,Nstate,Nmeas,NJnnz,f,cookie, + NULL, // no parameters; use the globals + returnContext); } +double dogleg_optimize_dense2(double* p, unsigned int Nstate, + unsigned int Nmeas, + dogleg_callback_dense_t* f, void* cookie, + const dogleg_parameters2_t* parameters, + dogleg_solverContext_t** returnContext) +{ + return _dogleg_optimize(p, Nstate, Nmeas, 0, (dogleg_callback_t*)f, + cookie, + parameters, + returnContext); +} double dogleg_optimize_dense(double* p, unsigned int Nstate, unsigned int Nmeas, dogleg_callback_dense_t* f, void* cookie, dogleg_solverContext_t** returnContext) { - return _dogleg_optimize(p, Nstate, Nmeas, 0, (dogleg_callback_t*)f, - cookie, returnContext); + return dogleg_optimize_dense2(p,Nstate,Nmeas,f,cookie, + NULL, // no parameters; use the globals + returnContext); +} + + + + + + + +// Computes pinv(J) for a subset of measurements: inv(JtJ) * +// Jt[imeasurement0..imeasurement0+N-1]. Returns false if something failed. +// ASSUMES THAT THE CHOLESKY FACTORIZATION HAS ALREADY BEEN COMPUTED. +// +// This function is experimental, and subject to change +static bool pseudoinverse_J_dense(// output + double* out, + + // inputs + const dogleg_operatingPoint_t* point, + const dogleg_solverContext_t* ctx, + int i_meas0, int NmeasInChunk) +{ + int info; + memcpy(out, + &point->J_dense[i_meas0*ctx->Nstate], + NmeasInChunk*ctx->Nstate*sizeof(double)); + dpptrs_(&(char){'L'}, &(int){ctx->Nstate}, &NmeasInChunk, + ctx->factorization_dense, + out, + &(int){ctx->Nstate}, &info, 1); + return info==0; +} + +// Computes pinv(J) for a subset of measurements: inv(JtJ) * +// Jt[imeasurement0..imeasurement0+N-1]. Returns false if something failed. +// ASSUMES THAT THE CHOLESKY FACTORIZATION HAS ALREADY BEEN COMPUTED. +// +// allocates memory, returns NULL on failure. ON SUCCESS, THE CALLER IS +// RESPONSIBLE FOR FREEING THE RETURNED MEMORY +// +// This function is experimental, and subject to change +static cholmod_dense* pseudoinverse_J_sparse(// inputs + const dogleg_operatingPoint_t* point, + dogleg_solverContext_t* ctx, + int i_meas0, int NmeasInChunk, + + // Pre-allocated array for the + // right-hand-side. This will be used as a + // workspace. Create this like so: + // + // cholmod_allocate_dense( Nstate, + // NmeasInChunk, + // Nstate, + // CHOLMOD_REAL, + // &ctx->common ); + + cholmod_dense* Jt_chunk) +{ + // I'm solving JtJ x = b where J is sparse, b is sparse, but x ends up dense. + // cholmod doesn't have functions for this exact case. so I use the + // dense-sparse-dense function (cholmod_solve), and densify the input. Instead + // of sparse-sparse-sparse and the densifying the output (cholmod_spsolve). + // This feels like it'd be more efficient + + memset( Jt_chunk->x, 0, Jt_chunk->nrow*Jt_chunk->ncol*sizeof(double) ); + int Jt_chunk_ncol_backup = Jt_chunk->ncol; + for(int i_meas=0; i_meas= (int)ctx->Nmeasurements ) + { + // at the end, we could have one chunk with less that chunk_size + // columns + Jt_chunk->ncol = i_meas; + break; + } + + for(unsigned int i0=P(point->Jt, i_meas0+i_meas); i0Jt, i_meas0+i_meas+1); i0++) + { + int irow = I(point->Jt,i0); + double x = X(point->Jt,i0); + ((double*)Jt_chunk->x)[irow + i_meas*Jt_chunk->nrow] = x; + } + } + + // solve inv(JtJ)Jt[slice] + cholmod_dense* pinv = + cholmod_solve(CHOLMOD_A, + ctx->factorization, + Jt_chunk, + &ctx->common); + Jt_chunk->ncol = Jt_chunk_ncol_backup; + return pinv; +} + + +/* +The below is a bunch of code for outlier detection/rejection and for confidence +evaluation. IT'S ALL EXPERIMENTAL AND SUBJECT TO CHANGE + +What is an outlier? Suppose I just found an optimum. I define an outlier as an +observation that does two things to the problem if I remove that observation: + +1. The cost function would improve significantly. Things would clearly improve + because the cost function contribution of the removed point itself would get + removed, but ALSO because the parameters could fit the remaining data better + without that extra observation; the "outlier" wouldn't pull the solution away + from the "true" optimum. + +2. The confidence of the solution does not significantly decrease. One could + imagine a set of data that define the problem poorly, and produce a low cost + function value for some (overfit) set of parameters. And one can imagine an + extra point being added that defines the problem and increases the confidence + of the solution. This extra point would suppress the overfitting, so this + extra point would increase the cost function value. Condition 1 above would + make this extra point look like an outlier, and this condition is meant to + detect this case and to classify this point as NOT an outlier + +Let's say we just computed an optimum least-squares fit, and we try to determine +if some of the data points look like outliers. The least squares problem I just +solved has cost function + + E = norm2(x) + +where x is a length-N vector of measurements. We solved it by optimizing the +vector of parameters p. We're at an optimum, so dE/dp = 0 -> Jt x = 0 + +We define an outlier as a measurement that would greatly improve the cost +function E if this measurement was removed, and the problem was re-optimized. + +Let's say the problem is locally linear (J = dx/dp is constant), and let's say +re-optimizing moves the parameter by dp. The original cost function was + + E0 = norm2(x) + +If we move by dp, and take away the outlier's own cost, we get + + E1 = norm2(x + J dp) - norm2( x* + J* dp ) + +where x* and J* refer to the outlier measurements. Thus "Dima's self+others +factor" is norm2(x) - (norm2(x + J dp) - norm2( x* + J* dp )) + +We can choose to look at ONLY the effect on the other variables. That would +produce "Dima's others factor" = norm2(x)-norm2(x*) - (norm2(x + J dp) - +norm2(x* + J* dp )) + +This is very similar to Cook's D factor (which Dima apparently reinvented 40 +years after the fact!). This factor looks not at the difference of norm2(x), but +at norm2(difference) instead. So "Cook's self+others factor" is proportional to +norm2(x - (x+J dp)). This is the "normal" Cook's D factor. We can also compute +"Cook's others factor": norm2(x - vec(x*) - (x + J dp -(vec(x*) + full(J*) dp))) += norm2( - J dp + full(J*) dp) = norm2(J dp) + norm2(full(J*) dp) -2 dpt(Jt +full(J*) + full(J*)tJ)dp = norm2(J dp) + norm2(full(J*) dp) -2 dpt(Jt full(J*) + +full(J*)tJ)dp = norm2(J dp) + norm2(J* dp) - 2 norm2(J* dp) = norm2(J dp) - +norm2(J* dp) + +This is 4 flavors of a very similar computation. In summary (and ignoring scale +factors): + + Dima's self+others: -norm2(J dp) + 2 x*t J* dp + norm2(J* dp) + norm2(x*) + Dima's others : -norm2(J dp) + 2 x*t J* dp + norm2(J* dp) + Cook's self+others: norm2(J dp) + Cook's others : norm2(J dp) - norm2(J* dp) + +Let's compute these. dE1/dp = 0 at p+dp -> + + 0 = Jt x + JtJ dp - J*t x* - J*tJ* dp + = JtJ dp - J*t x* - J*tJ* dp + +-> dp = inv(JtJ - J*tJ*) J*t x* + +Woodbury identity: + + inv(JtJ - J*t J*) = + = inv(JtJ) - inv(JtJ) J*t inv(-I + J* inv(JtJ) J*t) J* inv(JtJ) + +Let + A = J* inv(JtJ) J*t + B = inv(A - I) + +So + AB = BA = I+B + +Thus + inv(JtJ - J*t J*) = + = inv(JtJ) - inv(JtJ) J*t B J* inv(JtJ) + +and + + dp = inv(JtJ - J*tJ*) J*t x* = + = inv(JtJ)J*t x* - inv(JtJ) J*t B J* inv(JtJ) J*t x* + = inv(JtJ)J*t x* - inv(JtJ) J*t B A x* + = inv(JtJ)J*t(I - B A) x* + = -inv(JtJ)J*t B x* + +Then + + norm2(J dp) = x*t ( B J* inv() Jt J inv() J*t B ) x* + = x*t ( B J* inv() J*t B ) x* + = x*t ( B A B ) x* + = x*t ( B + B*B ) x* + + 2 x*t J* dp = -2 x*t J* inv(JtJ)J*t B x* = + = -2 x*t A B x* = + = x*t (-2AB) x* = + = x*t (-2I - 2B) x* + + norm2(J* dp) = x*t ( B J* inv() J*tJ* inv() J*t B ) x* = + = x*t ( B A A B ) x* = + = x*t ( I + 2B + B*B ) x* + + norm2(x*) = x*t ( I ) x* + +There're two ways to compute the "Dima's self-only" factor. I can simply say +that the measurements' cost of norm2(x*) has been removed, so the factor is x*t +I x* or I can + +1. Remove measurements +2. Re-optimize +3. Look to see how the residual of x* changes + +This is different because I look at what happens to x*(p) when x* is no longer +in the optimized set. + +State moves by dp. x* moves by J* dp. I look at + + dE = norm2(x* + J* dp) - norm2(x*) + = 2 x*t J* dp + norm2(J* dp) = + = x*t (-2I - 2B + I + 2B + B*B ) x* = + = x*t (B*B - I) x* + +I expect that removing measurements from the optimization should make their +residuals worse. I.e. I expect dE > 0. Let's check. Is B*B - I positive +definite? Let's say that there's v,l such that + + (B*B-I)v = l v, norm2(v) = 1 + --> + BBv = (l+1)v + vBBv = l+1 + + Let u = Bv -> + norm2(u) = l+1 + + A = J* inv(JtJ) J*t + B = inv(A - I) -> + + v = (A-I)u + norm2(v) = 1 = norm2(Au) - 2ut A u + norm2(u) -> + -> l = 2ut A u - norm2(Au) + + Let w = J*t u + -> Au = J* inv(JtJ) w + -> ut A u = wt inv(JtJ) w -> + l = wt ( 2inv(JtJ) - inv(JtJ)J*tJ* inv(JtJ) ) w + + J*t J* = sum(outer(j*,j*)) = sum(outer(j,j)) - sum(outer(jnot*,jnot*)) = + = JtJ - Jnot*t Jnot* + + -> l = wt ( 2inv(JtJ) - inv(JtJ)JtJinv(JtJ) + inv(JtJ)Jnot*tJnot* inv(JtJ) ) w + = wt ( inv(JtJ) + inv(JtJ)Jnot*tJnot* inv(JtJ) ) w + + -> substitute -> l = wt ( C + CDC ) w + + where C >= 0 and D >= 0 + + wt C wt >= 0 + + wt CDC wt = (Ctw)t D Ctw >= 0 -> l >= 0 + +So B*B-I >= 0 and dE >= 0: removing a point will never make its own fit better + +- if dE >> 0: the other data does NOT support this measurement being correct +- if dE ~~ 0: the other data supports this measurement being correct + + +Putting all this together I get the expressions for the factors above: + + Dima's self+others: x*t (-B ) x* + Dima's others : x*t (-B - I ) x* + Dima's self (simple): x*t ( I ) x* + Dima's self (interesting): x*t (B*B - I ) x* + Cook's self+others: x*t ( B + B*B) x* + Cook's others : x*t (-B - I ) x* + + + +One can also do a similar analysis to gauge our confidence in a solution. We can +do this by + +1. Solving the optimization problem + +2. Querying the solution in a way we care about to produce a new feature + group of measurements x = f - ref. We can compute J = dx/dp = df/dp. And + we presumably know something about ref: like its probability + distribution for instance. Example: we just calibrated a camera; we want + to know how confident we are about a projection in a particular spot on + the imager. I compute a projection in that spot: q = project(v). If + added to the optimization I'd get x = q - ref where 'ref' would be the + observed pixel coordinate. + +3. If this new feature was added to the optimization, I can compute its + outlierness factor in the same way as before. If we are confident in the + solution in a particular spot, then we have consensus, and it wouldn't take + much for these queries to look like outliers: the expected value of the + outlierness would be high. Conversely, if we aren't well-defined then a wider + set of points would fit the solution, and things wouldn't look very outliery + +Very similar analysis to the above: + +Let p,x,J represent the solution. The new feature we're adding is x* with +jacobian J*. The solution would move by dp to get to the new optimum. + +Original solution is an optimum: Jt x = 0 + +If we add x* and move by dp, we get + + E1 = norm2(x + J dp) + norm2( x* + J* dp ) + +Thus "Dima's self+others factor" is (norm2(x + J dp) + norm2( x* + J* dp )) - +norm2(x) = norm2(Jdp) + norm2( x* + J* dp ) + +We can choose to look at ONLY the effect on the other variables. That would +produce "Dima's others factor" norm2(x + J dp) - norm2(x) = norm2(Jdp) + +This is very similar to Cook's D factor (which Dima apparently reinvented 40 +years after the fact!). This factor looks not at the difference of norm2(x), but +at norm2(difference) instead. So "Cook's others factor" is proportional to +norm2(x - (x+J dp)). This is the "normal" Cook's D factor. We can also compute +"Cook's self+others factor": norm2(concat(x + J dp, x* + J* dp) - concat(x,x*)) += norm2(x + J dp-x) + norm2(x* + J* dp - x*) = norm2(J dp) + norm2(J* dp) + +This is 4 flavors of a very similar computation. In summary (and ignoring scale +factors): + + Dima's self+others: norm2(J dp) + 2 x*t J* dp + norm2(J* dp) + norm2(x*) + Dima's others : norm2(J dp) + Cook's self+others: norm2(J dp) + norm2(J* dp) + Cook's others : norm2(J dp) + +The problem including the new point is also at an optimum: + + E1 = norm2(x + J dp) + norm2( x* + J* dp ) + dE1/dp = 0 -> 0 = Jt x + JtJ dp + J*t x* + J*tJ*dp = + = JtJ dp + J*t x* + J*tJ*dp +-> dp = -inv(JtJ + J*tJ*) J*t x* + +Woodbury identity: + + -inv(JtJ + J*t J*) = + = -inv(JtJ) + inv(JtJ) J*t inv(I + J* inv(JtJ) J*t) J* inv(JtJ) + +Let + A = J* inv(JtJ) J*t (same as before) + B = inv(A + I) (NOT the same as before) + +So + AB = BA = I-B + +Thus + -inv(JtJ + J*t J*) = + = -inv(JtJ) + inv(JtJ) J*t B J* inv(JtJ) + +and + + dp = -inv(JtJ + J*tJ*) J*t x* = + = -inv(JtJ)J*t x* + inv(JtJ) J*t B J* inv(JtJ) J*t x* = + = -inv(JtJ)J*t x* + inv(JtJ) J*t B A x* + = -inv(JtJ)J*t(I - B A) x* + = -inv(JtJ)J*t B x* (same as before, but with a different B!) + +Then + + norm2(J dp) = x*t ( B J* inv() Jt J inv() J*t B ) x* + = x*t ( B J* inv() J*t B ) x* + = x*t ( B A B ) x* + = x*t ( B - B*B ) x* + + 2 x*t J* dp = -2 x*t J* inv(JtJ)J*t B x* = + = -2 x*t A B x* = + = x*t (-2AB) x* = + = x*t (-2I + 2B) x* + + norm2(J* dp) = x*t ( B J* inv() J*tJ* inv() J*t B ) x* = + = x*t ( B A A B ) x* = + = x*t ( I - 2B + B*B ) x* + + norm2(x*) = x*t ( I ) x* + +How do I compute "Dima's self" factor? The "simple" flavor from above looks at +the new measurement only: norm2(x*). The "interesting" flavor, look at what +happens to the measurements' error when they're added to the optimization set. +State moves by dp. x* moves by J* dp. I look at + + dE = norm2(x* + J*dp) - norm2(x*) = + 2 x*t J* dp + norm2(J* dp) = + x*t (-2I + 2B + I - 2B + B*B) x* = + x*t (B*B - I) x* + +I expect that adding a point to the optimization would make it fit better: dE < +0. Let's check. Let's say that there's v,l such that + + (B*B-I)v = l v, norm2(v) = 1 + --> + BBv = (l+1)v + vBBv = l+1 + + Let u = Bv -> + norm2(u) = l+1 + + A = J* inv(JtJ) J*t + B = inv(A + I) -> + + v = (A+I)u + norm2(v) = 1 = norm2(Au) + 2ut A u + norm2(u) -> + -> l = -2ut A u - norm2(Au) + + Let w = J*t u + -> Au = J* inv(JtJ) w + -> ut A u = wt inv(JtJ) w -> + l = -2 wt inv(JtJ) w - norm2(Au) + + Since inv(JtJ) > 0 -> l < 0. As expected + +So B*B-I is negative definite: adding measurements to the optimization set makes +them fit better + +Putting all this together I get the expressions for the factors above: + + Dima's self+others: x*t (B ) x* + Dima's others : x*t (B - B*B) x* + Dima's self (simple): x*t (I ) x* + Dima's self (interesting): x*t (B*B - I) x* + Cook's self+others: x*t (I - B ) x* + Cook's others : x*t (B - B*B) x* + +These expressions are all tested and verified in +mrcal/analyses/outlierness-test.py + + +There're several slightly-different definitions of Cook's D and of a +rule-of-thumb threshold floating around on the internet. Wikipedia says: + + D = norm2(x_io - x_i)^2 / (Nstate * norm2(x_io)/(Nmeasurements - Nstate)) + D_threshold = 1 + +An article https://www.nature.com/articles/nmeth.3812 says + + D = norm2(x_io - x_i)^2 / ((Nstate+1) * norm2(x_io)/(Nmeasurements - Nstate -1)) + D_threshold = 4/Nmeasurements + +I haven't tracked down the reference for this second definition, but it looks +more reasonable, and I use it here. + +Here I use the second definition. That definition expands to + + k = xo^2 / ((Nstate+1) * norm2(x)/(Nmeasurements - Nstate -1)) + B = 1.0/(jt inv(JtJ) j - 1) + f = k * (B + B*B) + +I report normalized outlierness factors so that the threshold is 1. Thus I use + + k = Nmeasurements / (4*((Nstate+1) * norm2(x)/(Nmeasurements - Nstate - 1))) + +*/ + + +static void accum_outlierness_factor(// output + double* factor, + + // inputs + const double* x, + + // A is symmetric. I store the upper triangle row-first + const double* A, + + // if outliers are grouped into features, the + // feature size is set here + int featureSize, + double k) +{ + // This implements Dima's self+outliers factor. + // + // from the derivation in a big comment above I have + // + // f = k (x*t (-B) x* ) + // + // where B = inv(A - I) and + // A = J* inv(JtJ) J*t + + // I only implemented featureSize == 1 and 2 so far. + if(featureSize <= 1) + { + featureSize = 1; + + double denom = 1.0 - *A; + if( fabs(denom) < 1e-8 ) + { + *factor = DBL_MAX; // definitely an outlier + return; + } + else + *factor = x[0]*x[0] / denom; + } + else if(featureSize == 2) + { + double det = (1.0-A[0])*(1.0-A[2]) - A[1]*A[1]; + if( fabs(det) < 1e-8 ) + { + *factor = DBL_MAX; // definitely an outlier + return; + } + else + { + double B00_det = A[2] - 1.0; + double B11_det = A[0] - 1.0; + double B01_det = -A[1]; + + // inner(x, Bx) + double xBx = + (x[0]*x[0]*B00_det + + x[0]*x[1]*B01_det * 2.0 + + x[1]*x[1]*B11_det) / det; + + // norm2(Bx) + __attribute__((unused)) double v1 = x[0]*B00_det + x[1]*B01_det; + __attribute__((unused)) double v2 = x[0]*B01_det + x[1]*B11_det; + __attribute__((unused)) double xBBx = (v1*v1 + v2*v2) / (det*det); + + // // mine self+others + // *factor = -xBx; + + // // mine others / cook others + // *factor = -(x[0]*x[0] + x[1]*x[1]) - xBx; + + // cook's self+others + *factor = xBx + xBBx; + + } + } + else + { + SAY("featureSize > 2 not implemented yet. Got featureSize=%d", featureSize); + ASSERT(0); + } + + +#warning This is a hack. The threshold should be 1.0, and the scaling should make sure that is the case. I am leaving it for now + k /= 8.; + + + *factor *= k; +} + +static void getOutliernessScale(// out + double* scale, // if *scale > 0, I just keep what I have + + // in + int Nmeasurements, int Nstate, + int NoutlierFeatures, int featureSize, + double norm2_x) +{ + // if *scale > 0, I just keep what I have + if(*scale > 0.0) + return; + + int Nmeasurements_outliers = NoutlierFeatures*featureSize; + int Nmeasurements_nonoutliers = Nmeasurements - Nmeasurements_outliers; + + *scale = + (double)(Nmeasurements_nonoutliers) / + (4.*((double)(Nstate+1) * norm2_x/(double)(Nmeasurements_nonoutliers - Nstate - 1))); +} + +static bool getOutliernessFactors_dense( // output + double* factors, // Nfeatures factors + + // output, input + double* scale, // if <0 then I recompute + + // inputs + // if outliers are grouped into features, + // the feature size is set here + int featureSize, + int Nfeatures, + int NoutlierFeatures, + const dogleg_operatingPoint_t* point, + dogleg_solverContext_t* ctx ) +{ + // cholmod_spsolve() and cholmod_solve()) work in chunks of 4, so I do this in + // chunks of 4 too. I pass it rows of J, 4 at a time. Note that if I have + // measurement features, I don't want these to cross chunk boundaries, so I set + // up chunk_size=lcm(N,4) + int chunk_size = 4; + if(featureSize <= 1) + featureSize = 1; + if(featureSize > 1) + { + // haven't implemented anything else yet. Don't have lcm() readily available + ASSERT(featureSize == 2); + // chunk_size = lcm(chunk_size,featureSize) + } + + int Nstate = ctx->Nstate; + int Nmeasurements = ctx->Nmeasurements; + bool result = false; + + double* invJtJ_Jt = malloc(Nstate*chunk_size*sizeof(double)); + if(invJtJ_Jt == NULL) + { + SAY("Couldn't allocate invJtJ_Jt!"); + goto done; + } + + getOutliernessScale(scale, + Nmeasurements, Nstate, NoutlierFeatures, featureSize, point->norm2_x); + + int i_measurement_valid_chunk_start = -1; + int i_measurement_valid_chunk_last = -1; + int i_measurement = 0; + for( int i_feature=0; i_feature i_measurement_valid_chunk_last ) + { + bool pinvresult = pseudoinverse_J_dense(invJtJ_Jt, point, ctx, + i_measurement, chunk_size); + if(!pinvresult) + { + SAY("Couldn't compute pinv!"); + goto done; + } + i_measurement_valid_chunk_start = i_measurement; + i_measurement_valid_chunk_last = i_measurement+chunk_size-1; + } + + // from the derivation in a big comment in above I have + // + // f = scale (xot (...) xo ) + // + // where A = Jo inv(JtJ) Jot + // + // A is symmetric. I store the upper triangle + double A[featureSize*(featureSize+1)/2]; + int iA=0; + for(int i=0; iJ_dense[Nstate* i_measurement+j + k]; + } + accum_outlierness_factor(&factors[i_feature], + &point->x[i_measurement], + A, featureSize, *scale); + } + + result = true; + done: + free(invJtJ_Jt); + return result; +} + + +static bool getOutliernessFactors_sparse( // output + double* factors, // Nfeatures factors + + // output, input + double* scale, // if <0 then I recompute + + // inputs + // if outliers are grouped into features, + // the feature size is set here + int featureSize, + int Nfeatures, + int NoutlierFeatures, + const dogleg_operatingPoint_t* point, + dogleg_solverContext_t* ctx ) +{ + // cholmod_spsolve() and cholmod_solve()) work in chunks of 4, so I do this in + // chunks of 4 too. I pass it rows of J, 4 at a time. Note that if I have + // measurement features, I don't want these to cross chunk boundaries, so I set + // up chunk_size=lcm(N,4) + int chunk_size = 4; + if(featureSize <= 1) + featureSize = 1; + if(featureSize > 1) + { + // haven't implemented anything else yet. Don't have lcm() readily available + ASSERT(featureSize == 2); + // chunk_size = lcm(chunk_size,featureSize) + } + + int Nstate = ctx->Nstate; + int Nmeasurements = ctx->Nmeasurements; + bool result = false; + + cholmod_dense* invJtJ_Jt = NULL; + cholmod_dense* Jt_chunk = + cholmod_allocate_dense( Nstate, + chunk_size, + Nstate, + CHOLMOD_REAL, + &ctx->common ); + if(!Jt_chunk) + { + SAY("Couldn't allocate Jt_chunk!"); + goto done; + } + + getOutliernessScale(scale, + Nmeasurements, Nstate, NoutlierFeatures, featureSize, point->norm2_x); + + int i_measurement_valid_chunk_start = -1; + int i_measurement_valid_chunk_last = -1; + int i_measurement = 0; + for( int i_feature=0; i_feature i_measurement_valid_chunk_last ) + { + if(invJtJ_Jt) cholmod_free_dense(&invJtJ_Jt, &ctx->common); + invJtJ_Jt = pseudoinverse_J_sparse(point, ctx, + i_measurement, chunk_size, + Jt_chunk); + if(invJtJ_Jt == NULL) + { + SAY("Couldn't compute pinv!"); + goto done; + } + + i_measurement_valid_chunk_start = i_measurement; + i_measurement_valid_chunk_last = i_measurement+chunk_size-1; + } + + // from the derivation in a big comment in above I have + // + // f = scale (xot (...) xo ) + // + // where A = Jo inv(JtJ) Jot + // + // A is symmetric. I store the upper triangle + double A[featureSize*(featureSize+1)/2]; + int iA=0; + for(int i=0; iJt, i_measurement+j); + l < P(point->Jt, i_measurement+j+1); + l++) + { + int k = I(point->Jt, l); + A[iA] += + ((double*)invJtJ_Jt->x)[Nstate*(i_measurement+i-i_measurement_valid_chunk_start) + k] * + X(point->Jt, l); + } + } + accum_outlierness_factor(&factors[i_feature], + &point->x[i_measurement], + A, featureSize, *scale); + } + + result = true; + done: + if(Jt_chunk) cholmod_free_dense(&Jt_chunk, &ctx->common); + if(invJtJ_Jt) cholmod_free_dense(&invJtJ_Jt, &ctx->common); + return result; +} + +bool dogleg_getOutliernessFactors( // output + double* factors, // Nfeatures factors + + // output, input + double* scale, // if <0 then I recompute + + // inputs + // if outliers are grouped into features, the + // feature size is set here + int featureSize, + int Nfeatures, + int NoutlierFeatures, // how many outliers we already have + dogleg_operatingPoint_t* point, + dogleg_solverContext_t* ctx ) +{ + if(featureSize <= 1) + featureSize = 1; + + dogleg_computeJtJfactorization( point, ctx ); + bool result; + if(ctx->is_sparse) + result = getOutliernessFactors_sparse(factors, scale, featureSize, Nfeatures, NoutlierFeatures, point, ctx); + else + result = getOutliernessFactors_dense(factors, scale, featureSize, Nfeatures, NoutlierFeatures, point, ctx); + +#if 0 + if( result ) + { + int Nstate = ctx->Nstate; + int Nmeasurements = ctx->Nmeasurements; + + + + static FILE* fp = NULL; + if(fp == NULL) + fp = fopen("/tmp/check-outlierness.py", "w"); + static int count = -1; + count++; + if(count > 5) + goto done; + + fprintf(fp, "# WARNING: all J here are unscaled with SCALE_....\n"); + + if(count == 0) + { + fprintf(fp, + "import numpy as np\n" + "import numpysane as nps\n" + "np.set_printoptions(linewidth=100000)\n" + "\n"); + } + + fprintf(fp, "x%d = np.array((", count); + for(int j=0;jx[j]); + fprintf(fp,"))\n"); + + if( ctx->is_sparse ) + { + fprintf(fp, "J%d = np.zeros((%d,%d))\n", count, Nmeasurements, Nstate); + for(int imeas=0;imeasJt, imeas); + j < (int)P(point->Jt, imeas+1); + j++) + { + int irow = I(point->Jt, j); + fprintf(fp, "J%d[%d,%d] = %.20g\n", count, + imeas, irow, X(point->Jt, j)); + } + } + } + else + { + fprintf(fp, "J%d = np.array((\n", count); + for(int j=0;jJ_dense[j*Nstate+i]); + fprintf(fp, "),\n"); + } + fprintf(fp,"))\n"); + } + + fprintf(fp, "Nmeasurements = %d\n", Nmeasurements); + fprintf(fp, "Nstate = %d\n", Nstate); + fprintf(fp, "Nfeatures = %d\n", Nfeatures); + fprintf(fp, "featureSize = %d\n", featureSize); + fprintf(fp, "NoutlierFeatures = %d\n", NoutlierFeatures); + + fprintf(fp, "scale_got = %.20f\n", *scale); + + fprintf(fp, "factors_got = np.array(("); + for(int j=0;j + B = inv(I + A) -> + tr(B) = (2+a00+a11) / ((1+a00)*(1+a11) - a01^2) + */ + + + // This is Jt because cholmod thinks in terms of col-first instead of + // row-first + int Jt_p[featureSize+1]; + int Jt_i[NstateActive*featureSize]; + for(int i=0; i<=featureSize; i++) + { + Jt_p[i] = i*NstateActive; + if(i==featureSize) break; + for(int j=0; jNstate, + .ncol = featureSize, + .nzmax = NstateActive*featureSize, + .p = (void*)Jt_p, + .i = (void*)Jt_i, + .x = (double*)JqueryFeature, + .sorted = 1, + .packed = 1, + .stype = 0, // NOT symmetric + .itype = CHOLMOD_INT, + .xtype = CHOLMOD_REAL, + .dtype = CHOLMOD_DOUBLE}; + + // Really shouldn't need to do this every time. In fact I probably don't need + // to do it at all, since this will have been done by the solver during the + // last step + dogleg_computeJtJfactorization( point, ctx ); + cholmod_sparse* invJtJ_Jp = + cholmod_spsolve(CHOLMOD_A, + ctx->factorization, + &Jt_query_sparse, + &ctx->common); + + // Now I need trace(matmult(Jquery, invJtJ_Jp)) + + // haven't implemented anything else yet + ASSERT(featureSize == 2); + double A[4] = {}; // gah. only elements 0,1,3 will be stored. + + for(int i=0; i= istateActive) + { + if(row >= istateActive+NstateActive) + break; + + int ic0 = i*featureSize; + for(int k=i; kcommon); + + double invB00 = A[0]+1.0; + double invB01 = A[1]; + double invB11 = A[3]+1.0; + + double det_invB_recip = 1.0/(invB00*invB11 - invB01*invB01); + double B00 = invB11 * det_invB_recip; + double B11 = invB00 * det_invB_recip; + + __attribute__((unused)) + double B01 = -invB01 * det_invB_recip; + double traceB = B00 + B11; + __attribute__((unused)) + double traceBB = B00*B00 + 2.0*B01*B01 + B11*B11; + + +#if 0 + static int count = -1; + count++; + if(count <= 5) + { + int Nstate = ctx->Nstate; + int Nmeasurements = ctx->Nmeasurements; + + + + static FILE* fp = NULL; + if(fp == NULL) + fp = fopen("/tmp/check-query-outlierness.py", "w"); + + fprintf(fp, "# WARNING: all J here are unscaled with SCALE_....\n"); + + if(count == 0) + { + fprintf(fp, + "import numpy as np\n" + "import numpysane as nps\n" + "np.set_printoptions(linewidth=100000)\n" + "\n"); + } + + fprintf(fp, "x%d = np.array((", count); + for(int j=0;jx[j]); + fprintf(fp,"))\n"); + + if( ctx->is_sparse ) + { + fprintf(fp, "J%d = np.zeros((%d,%d))\n", count, Nmeasurements, Nstate); + for(int imeas=0;imeasJt, imeas); + j < (int)P(point->Jt, imeas+1); + j++) + { + int irow = I(point->Jt, j); + fprintf(fp, "J%d[%d,%d] = %g\n", count, + imeas, irow, X(point->Jt, j)); + } + } + } + else + { + fprintf(fp, "J%d = np.array((\n", count); + for(int j=0;jJ_dense[j*Nstate+i]); + fprintf(fp, "),\n"); + } + fprintf(fp,"))\n"); + } + + fprintf(fp, "Nmeasurements = %d\n", Nmeasurements); + fprintf(fp, "Nstate = %d\n", Nstate); + fprintf(fp, "featureSize = %d\n", featureSize); + fprintf(fp, "traceB_got = %.20g\n", traceB); + + fprintf(fp, "Jq = np.zeros((featureSize, Nstate), dtype=float)\n"); + for(int i=0; iNmeasurements; + int Nstate = ctx->Nstate; + + double scale = -1.0; + getOutliernessScale(&scale, + Nmeasurements, Nstate, NoutlierFeatures, featureSize, point->norm2_x); + + // // Dima's self+others + // return scale * traceB; + + // Cook's self+others + return scale * (2.0 - traceB); + + // // Dima's others/Cook's others + // // This one is non-monotonic in outlierness-test + // return scale * (traceB - traceBB); + +} + + +#define OUTLIER_CONFIDENCE_DROP_THRESHOLD 0.05 +bool dogleg_markOutliers(// output, input + struct dogleg_outliers_t* markedOutliers, + double* scale, // if <0 then I recompute + + // output, input + int* Noutliers, + + // input + double (getConfidence)(int i_feature_exclude), + + // if outliers are grouped into features, the feature + // size is set here + int featureSize, + int Nfeatures, + + dogleg_operatingPoint_t* point, + dogleg_solverContext_t* ctx) +{ + if(featureSize <= 1) + featureSize = 1; + + bool markedAny = false; + + double* factors = malloc(Nfeatures * sizeof(double)); + if(factors == NULL) + { + SAY("Error allocating factors"); + goto done; + } + + if(!dogleg_getOutliernessFactors(factors, scale, + featureSize, Nfeatures, + *Noutliers, + point, ctx)) + goto done; + + // I have my list of POTENTIAL outliers (any that have factor > 1.0). I + // check to see how much confidence I would lose if I were to throw out any + // of these measurements, and accept the outlier ONLY if the confidence loss + // is acceptable + double confidence0 = getConfidence(-1); + if( confidence0 < 0.0 ) + goto done; + + SAY_IF_VERBOSE("Initial confidence: %g", confidence0); + + *Noutliers = 0; + for(int i=0; i +#include typedef void (dogleg_callback_t)(const double* p, double* x, @@ -19,6 +20,10 @@ // an operating point of the solver typedef struct { + // The pointers in this structure are all indices into a single "pool" array + // (see allocOperatingPoint()). I thus don't need to store the pointers at + // all, and can just index that one array directly, but that would break my + // ABI double* p; double* x; double norm2_x; @@ -43,8 +48,40 @@ int updateCauchy_valid, updateGN_valid; int didStepToEdgeOfTrustRegion; + + double* step_to_here; + double step_to_here_len_sq; + } dogleg_operatingPoint_t; +// The newer APIs ( dogleg_...2() ) take a dogleg_parameters2_t argument for +// their settings, while the older ones use a global set of parameters specified +// with dogleg_set_...(). This global-parameters approach doesn't work if we +// have multiple users of libdogleg in the same application +typedef struct +{ + int max_iterations; + int dogleg_debug; + + // it is cheap to reject a too-large trust region, so I start with something + // "large". The solver will quickly move down to something reasonable. Only the + // user really knows if this is "large" or not, so they should change this via + // the API + double trustregion0; + + // These are probably OK to leave alone. Tweaking them can maybe result in + // slightly faster convergence + double trustregion_decrease_factor; + double trustregion_decrease_threshold; + double trustregion_increase_factor; + double trustregion_increase_threshold; + + // The termination thresholds. Documented in the header + double Jt_x_threshold; + double update_threshold; + double trustregion_threshold; +} dogleg_parameters2_t; + // solver context. This has all the internal state of the solver typedef struct { @@ -84,9 +121,59 @@ // Are we using sparse math (cholmod)? int is_sparse; int Nstate, Nmeasurements; + + const dogleg_parameters2_t* parameters; + } dogleg_solverContext_t; +// Fills in the given structure with the default parameter set +void dogleg_getDefaultParameters(dogleg_parameters2_t* parameters); + +void dogleg_setMaxIterations(int n); +void dogleg_setTrustregionUpdateParameters(double downFactor, double downThreshold, + double upFactor, double upThreshold); + +// The argument is a bit-mapped integer. Should be a bit-field structure or +// enum, but for API backwards-compatibility, I keep this an integer. +// +// if(debug == 0 ): no diagnostic output +// if(debug & DOGLEG_DEBUG_VNLOG): output vnlog diagnostics to stdout +// if(debug & ~DOGLEG_DEBUG_VNLOG): output human-oriented diagnostics to stderr +#define DOGLEG_DEBUG_VNLOG (1 << 30) +void dogleg_setDebug(int debug); + + +// The following parameters reflect the scaling of the problem being solved, so +// the user is strongly encouraged to tweak these. The defaults are set +// semi-arbitrarily + +// The size of the trust region at start. It is cheap to reject a too-large +// trust region, so this should be something "large". Say 10x the length of an +// "expected" step size +void dogleg_setInitialTrustregion(double t); + +// termination thresholds. These really depend on the scaling of the input +// problem, so the user should set these appropriately +// +// Jt_x threshold: +// The function being minimized is E = norm2(x) where x is a function of the state p. +// dE/dp = 2*Jt*x where Jt is transpose(dx/dp). +// if( for every i fabs(Jt_x[i]) < JT_X_THRESHOLD ) +// { we are done; } +// +// update threshold: +// if(for every i fabs(state_update[i]) < UPDATE_THRESHOLD) +// { we are done; } +// +// trust region threshold: +// if(trustregion < TRUSTREGION_THRESHOLD) +// { we are done; } +// +// to leave a particular threshold alone, use a value <= 0 here +void dogleg_setThresholds(double Jt_x, double update, double trustregion); + + // The main optimization function. Initial estimate of the solution passed in p, // final optimized solution returned in p. p has Nstate variables. There are // Nmeas measurements, the jacobian matrix has NJnnz non-zero entries. @@ -101,6 +188,11 @@ unsigned int Nmeas, unsigned int NJnnz, dogleg_callback_t* f, void* cookie, dogleg_solverContext_t** returnContext); +double dogleg_optimize2(double* p, unsigned int Nstate, + unsigned int Nmeas, unsigned int NJnnz, + dogleg_callback_t* f, void* cookie, + const dogleg_parameters2_t* parameters, + dogleg_solverContext_t** returnContext); // Main optimization function if we're using dense linear algebra. The arguments // are very similar to the sparse version: dogleg_optimize() @@ -108,6 +200,11 @@ unsigned int Nmeas, dogleg_callback_dense_t* f, void* cookie, dogleg_solverContext_t** returnContext); +double dogleg_optimize_dense2(double* p, unsigned int Nstate, + unsigned int Nmeas, + dogleg_callback_dense_t* f, void* cookie, + const dogleg_parameters2_t* parameters, + dogleg_solverContext_t** returnContext); // Compute the cholesky decomposition of JtJ. This function is only exposed if // you need to touch libdogleg internals via returnContext. Sometimes after @@ -134,42 +231,65 @@ void dogleg_freeContext(dogleg_solverContext_t** ctx); -//////////////////////////////////////////////////////////////// -// solver parameters -//////////////////////////////////////////////////////////////// -void dogleg_setMaxIterations(int n); -void dogleg_setTrustregionUpdateParameters(double downFactor, double downThreshold, - double upFactor, double upThreshold); +// Computes outlierness factors. This function is experimental, and subject to +// change. +bool dogleg_getOutliernessFactors( // output + double* factors, // Nfeatures factors -// lots of solver-related debug output when on -void dogleg_setDebug(int debug); + // output, input + double* scale, // if <0 then I recompute + // inputs + // if outliers are grouped into features, the feature size is + // stated here + int featureSize, + int Nfeatures, + int NoutlierFeatures, // how many outliers we already have + dogleg_operatingPoint_t* point, + dogleg_solverContext_t* ctx ); -// The following parameters reflect the scaling of the problem being solved, so -// the user is strongly encouraged to tweak these. The defaults are set -// semi-arbitrarily -// The size of the trust region at start. It is cheap to reject a too-large -// trust region, so this should be something "large". Say 10x the length of an -// "expected" step size -void dogleg_setInitialTrustregion(double t); -// termination thresholds. These really depend on the scaling of the input -// problem, so the user should set these appropriately -// -// Jt_x threshold: -// The function being minimized is E = norm2(x) where x is a function of the state p. -// dE/dp = 2*Jt*x where Jt is transpose(dx/dp). -// if( for every i fabs(Jt_x[i]) < JT_X_THRESHOLD ) -// { we are done; } -// -// update threshold: -// if(for every i fabs(state_update[i]) < UPDATE_THRESHOLD) -// { we are done; } -// -// trust region threshold: -// if(trustregion < TRUSTREGION_THRESHOLD) -// { we are done; } -// -// to leave a particular threshold alone, use a value <= 0 here -void dogleg_setThresholds(double Jt_x, double update, double trustregion); + +// This stuff is experimental, and subject to change. +struct dogleg_outliers_t +{ + unsigned char marked : 1; +}; +bool dogleg_markOutliers(// output, input + struct dogleg_outliers_t* markedOutliers, + double* scale, // if <0 then I recompute + + // output, input + int* Noutliers, // number of outliers before and after this call + + // input + double (getConfidence)(int i_feature_exclude), + + // if outliers are grouped into features, the feature size is + // stated here + int featureSize, + int Nfeatures, + + dogleg_operatingPoint_t* point, + dogleg_solverContext_t* ctx); + +void dogleg_reportOutliers( double (getConfidence)(int i_feature_exclude), + double* scale, // if <0 then I recompute + + // if outliers are grouped into features, the feature size + // is stated here + int featureSize, + int Nfeatures, + int Noutliers, // how many outliers we already have + + dogleg_operatingPoint_t* point, + dogleg_solverContext_t* ctx); + +double dogleg_getOutliernessTrace_newFeature_sparse(const double* JqueryFeature, + int istateActive, + int NstateActive, + int featureSize, + int NoutlierFeatures, + dogleg_operatingPoint_t* point, + dogleg_solverContext_t* ctx); diff -Nru libdogleg-0.14/Makefile libdogleg-0.15.4/Makefile --- libdogleg-0.14/Makefile 2018-02-12 18:55:59.000000000 +0000 +++ libdogleg-0.15.4/Makefile 2020-12-11 23:37:34.000000000 +0000 @@ -29,7 +29,7 @@ # Add my optimization flags if none already exist. These could exist if debian # packages are built. -OPTFLAGS := $(if $(filter -O%, $(CFLAGS)),,-O3 -ffast-math -mtune=core2) +OPTFLAGS := $(if $(filter -O%, $(CFLAGS)),,-O3 -ffast-math -mtune=core2) -fno-omit-frame-pointer FLAGS += -ggdb -Wall -Wextra -MMD $(OPTFLAGS) -I/usr/include/suitesparse CFLAGS += $(FLAGS) --std=gnu99 @@ -42,7 +42,7 @@ MAN_SECTION = 3 MAN_TARGET = libdogleg.$(MAN_SECTION) -ALL_TARGETS = $(LIB_TARGETS) $(MAN_TARGET) +ALL_TARGETS = $(LIB_TARGETS) $(MAN_TARGET) sample all: $(ALL_TARGETS) diff -Nru libdogleg-0.14/README.pod libdogleg-0.15.4/README.pod --- libdogleg-0.14/README.pod 2018-02-12 18:55:59.000000000 +0000 +++ libdogleg-0.15.4/README.pod 2020-12-11 23:37:34.000000000 +0000 @@ -70,14 +70,15 @@ =head2 Main API -=head3 dogleg_optimize +=head3 dogleg_optimize2 -This is the main call to the library for I Jacobians. Its declared as +This is the main call to the library for I Jacobians. It's declared as - double dogleg_optimize(double* p, unsigned int Nstate, - unsigned int Nmeas, unsigned int NJnnz, - dogleg_callback_t* f, void* cookie, - dogleg_solverContext_t** returnContext); + double dogleg_optimize2(double* p, unsigned int Nstate, + unsigned int Nmeas, unsigned int NJnnz, + dogleg_callback_t* f, void* cookie, + const dogleg_parameters2_t* parameters, + dogleg_solverContext_t** returnContext); =over @@ -97,6 +98,10 @@ =item * C is an arbitrary data pointer passed untouched to C +=item * C a pointer to the set of parameters to use. Set to C +to use the global parameters, or call C instead. See the +L section below for more details + =item * If not C, C can be used to retrieve the full context structure from the solver. This can be useful since this structure contains the latest operating point values. It also has an active @@ -109,14 +114,25 @@ C returns norm2( B ) at the minimum, or a negative value if an error occurred. -=head3 dogleg_optimize_dense +=head3 dogleg_optimize + +This is a flavor of C that implicitly uses the global +parameters. It's declared as + + double dogleg_optimize(double* p, unsigned int Nstate, + unsigned int Nmeas, unsigned int NJnnz, + dogleg_callback_t* f, void* cookie, + dogleg_solverContext_t** returnContext); + +=head3 dogleg_optimize_dense2 This is the main call to the library for I Jacobians. Its declared as - double dogleg_optimize_dense(double* p, unsigned int Nstate, - unsigned int Nmeas, - dogleg_callback_dense_t* f, void* cookie, - dogleg_solverContext_t** returnContext); + double dogleg_optimize_dense2(double* p, unsigned int Nstate, + unsigned int Nmeas, + dogleg_callback_dense_t* f, void* cookie, + const dogleg_parameters2_t* parameters, + dogleg_solverContext_t** returnContext); The arguments are almost identical to those in the C call. @@ -134,6 +150,10 @@ =item * C is an arbitrary data pointer passed untouched to C +=item * C a pointer to the set of parameters to use. Set to C +to use the global parameters, or call C instead. See the +L section below for more details + =item * If not C, C can be used to retrieve the full context structure from the solver. This can be useful since this structure contains the latest operating point values. You usually want @@ -144,6 +164,16 @@ C returns norm2( B ) at the minimum, or a negative value if an error occurred. +=head3 dogleg_optimize_dense + +This is a flavor of C that implicitly uses the global +parameters. It's declared as + + double dogleg_optimize_dense(double* p, unsigned int Nstate, + unsigned int Nmeas, + dogleg_callback_dense_t* f, void* cookie, + dogleg_solverContext_t** returnContext); + =head3 dogleg_freeContext Used to deallocate memory used for an optimization cycle. Defined as: @@ -384,10 +414,19 @@ =head2 Parameters -It is not required to call any of these, but it's highly recommended to set the initial trust-region -size and the termination thresholds to match the problem being solved. Furthermore, it's highly -recommended for the problem being solved to be scaled so that every state variable affects the -objective norm2( B ) equally. This makes this method's concept of "trust region" much more +The optimization is controlled by several parameters. These can be set globally +for I callers of C in a process using the C +functions. Those global values are used by C and +C. Or these can be specified independently for each +invocation by passing a C argument to C or +C. The latter is recommended because multiple instances +of libdogleg in a single application would no longer conflict. + +It is not required to set any of these, but it's highly recommended to set the +initial trust-region size and the termination thresholds to match the problem +being solved. Furthermore, it's highly recommended for the problem being solved +to be scaled so that every state variable affects the objective norm2( B ) +equally. This makes this method's concept of "trust region" much more well-defined and makes the termination criteria work correctly. =head3 dogleg_setMaxIterations @@ -398,11 +437,25 @@ =head3 dogleg_setDebug -To turn on debug output, call +To turn on diagnostic output, call void dogleg_setDebug(int debug); -with a non-zero value for C. By default, debug output is disabled. +with a non-zero value for C. Two separate diagnostic streams are +available: a verbose human-oriented stream, and a +L. + +By default, diagnostic output is disabled. The C argument is a bit-mapped +integer: + + if(debug == 0 ): no diagnostic output + if(debug & DOGLEG_DEBUG_VNLOG): output vnlog diagnostics to stdout + if(debug & ~DOGLEG_DEBUG_VNLOG): output human-oriented diagnostics to stderr + +C has a very high value, so if human diagnostics are +desired, the recommended call is: + + dogleg_setDebug(1); =head3 dogleg_setInitialTrustregion diff -Nru libdogleg-0.14/sample.c libdogleg-0.15.4/sample.c --- libdogleg-0.14/sample.c 2018-02-12 18:55:59.000000000 +0000 +++ libdogleg-0.15.4/sample.c 2020-12-11 23:37:34.000000000 +0000 @@ -127,8 +127,6 @@ } Jrowptr[Nmeasurements] = iJacobian; - - fprintf(stderr, "Callback finished. 2-norm is %f\n", norm2_x); #undef STORE_JACOBIAN } @@ -168,34 +166,96 @@ STORE_JACOBIAN( 5, 1.0 ); } - - fprintf(stderr, "Callback finished. 2-norm is %f\n", norm2_x); #undef STORE_JACOBIAN } int main(int argc, char* argv[] ) { + const char* usage = "Usage: %s [--diag-vnlog] [--diag-human] sparse|dense [test-gradients]\n"; + int is_sparse; + int test_gradients = 0; + int debug = 0; - if( argc >= 2 && 0 == strcmp(argv[1], "dense") ) { - fprintf(stderr, "Using DENSE math\n"); - is_sparse = 0; - } - else - { - fprintf(stderr, "Using SPARSE math. Run with '%s dense' to use dense math.\n", argv[0]); - is_sparse = 1; + // argument parsing + int iarg = 1; + for(int i=0; i<2; i++) + { + if(iarg >= argc) + { + fprintf(stderr, usage, argv[0]); + return 1; + } + if(0 == strcmp("--diag-vnlog", argv[iarg])) + { + debug |= DOGLEG_DEBUG_VNLOG; + iarg++; + continue; + } + if(0 == strcmp("--diag-human", argv[iarg])) + { + debug |= 1; + iarg++; + continue; + } + break; + } + + if(iarg >= argc) + { + fprintf(stderr, usage, argv[0]); + return 1; + } + if( 0 == strcmp(argv[iarg], "dense") ) + { + fprintf(stderr, "Using DENSE math\n"); + is_sparse = 0; + } + else if( 0 == strcmp(argv[iarg], "sparse") ) + { + fprintf(stderr, "Using SPARSE math\n"); + is_sparse = 1; + } + else + { + fprintf(stderr, usage, argv[0]); + return 1; + } + + iarg++; + if(iarg == argc-1) + { + if( 0 != strcmp("test-gradients", argv[iarg])) + { + fprintf(stderr, usage, argv[0]); + return 1; + } + fprintf(stderr, "Testing the gradients only\n"); + test_gradients = 1; + } + else if(iarg == argc) + { + // not testing gradients. We're good + } + else + { + fprintf(stderr, usage, argv[0]); + return 1; + } } + + srandom( 0 ); // I want determinism here generateSimulationGrid(); simulate(); - dogleg_setDebug(1); // request debugging output from the solver - + dogleg_parameters2_t dogleg_parameters; + dogleg_getDefaultParameters(&dogleg_parameters); + dogleg_parameters.dogleg_debug = debug; double p[Nstate]; @@ -218,23 +278,30 @@ // sure that the reported and observed gradients match (the relative error is // low) fprintf(stderr, "have %d variables\n", Nstate); - for(int i=0; i