diff -Nru libdogleg-0.14/Changes libdogleg-0.15.4/Changes
--- libdogleg-0.14/Changes	2018-02-12 18:55:59.000000000 +0000
+++ libdogleg-0.15.4/Changes	2020-12-11 23:37:34.000000000 +0000
@@ -1,3 +1,28 @@
+libdogleg (0.15.4) unstable; urgency=medium
+
+  * verbose output has a mode to output vnlog
+  * defragmented malloc() calls
+  * dogleg_operatingPoint_t stores the step vector we took to get here
+  * Added dogleg_optimize2() and dogleg_optimize_dense2() entry points
+    that use local parameters
+
+ -- Dima Kogan <dkogan@debian.org>  Fri, 11 Dec 2020 15:37:26 -0800
+
+libdogleg (0.15.1)
+
+  * Various updates to the experimental outlier-rejection logic. These
+    break the previous API/ABI, but those were experimental and unreleased
+    anyway
+
+ -- Dima Kogan <dima@secretsauce.net>  Tue, 02 Oct 2018 09:19:03 -0700
+
+ligdogleg (0.15)
+
+  * added experimental outlier-rejection logic.
+    This is not yet documented, and the API not yet fixed. Use at your own risk
+
+ -- Dima Kogan <dima@secretsauce.net>  Wed, 08 Aug 2018 16:04:38 -0700
+
 libdogleg (0.14)
 
   * standardized CHOLMOD error messages
diff -Nru libdogleg-0.14/debian/changelog libdogleg-0.15.4/debian/changelog
--- libdogleg-0.14/debian/changelog	2018-02-13 07:16:29.000000000 +0000
+++ libdogleg-0.15.4/debian/changelog	2020-12-11 23:41:39.000000000 +0000
@@ -1,3 +1,21 @@
+libdogleg (0.15.4-1) unstable; urgency=medium
+
+  * New upstream release
+    - added experimental outlier-rejection logic. Not stable and not fully
+      documented
+    - verbose output has a mode to output vnlog
+    - Added dogleg_optimize2() and dogleg_optimize_dense2() entry points
+      that use local parameters
+
+ -- Dima Kogan <dkogan@debian.org>  Fri, 11 Dec 2020 15:41:39 -0800
+
+libdogleg (0.15-1) unstable; urgency=medium
+
+  * updated Vcs tags to salsa
+  * New upstream version 0.15
+
+ -- Dima Kogan <dkogan@debian.org>  Wed, 08 Aug 2018 16:11:27 -0700
+
 libdogleg (0.14-1) unstable; urgency=medium
 
   * Minor upstream update
diff -Nru libdogleg-0.14/debian/control libdogleg-0.15.4/debian/control
--- libdogleg-0.14/debian/control	2018-02-13 07:14:57.000000000 +0000
+++ libdogleg-0.15.4/debian/control	2019-08-12 23:19:33.000000000 +0000
@@ -6,8 +6,8 @@
 Uploaders: Dima Kogan <dkogan@debian.org>
 Standards-Version: 3.9.6
 Homepage: https://github.com/dkogan/libdogleg
-Vcs-Git: git://anonscm.debian.org/debian-science/packages/libdogleg.git
-Vcs-Browser: http://anonscm.debian.org/gitweb/?p=debian-science/packages/libdogleg.git
+Vcs-Git: https://salsa.debian.org/science-team/libdogleg.git
+Vcs-Browser: https://salsa.debian.org/science-team/libdogleg
 
 Package: libdogleg2
 Section: libs
diff -Nru libdogleg-0.14/debian/libdogleg2.symbols libdogleg-0.15.4/debian/libdogleg2.symbols
--- libdogleg-0.14/debian/libdogleg2.symbols	2018-02-13 07:14:57.000000000 +0000
+++ libdogleg-0.15.4/debian/libdogleg2.symbols	2019-08-12 23:23:41.000000000 +0000
@@ -1,8 +1,15 @@
 libdogleg.so.2 libdogleg2 #MINVER#
  dogleg_computeJtJfactorization@Base 0.12
  dogleg_freeContext@Base 0.08
+ dogleg_getDefaultParameters@Base 0.15.3
+ dogleg_getOutliernessFactors@Base 0.15
+ dogleg_getOutliernessTrace_newFeature_sparse@Base 0.15.1
+ dogleg_markOutliers@Base 0.15
+ dogleg_optimize2@Base 0.15.3
  dogleg_optimize@Base 0.08
+ dogleg_optimize_dense2@Base 0.15.3
  dogleg_optimize_dense@Base 0.10
+ dogleg_reportOutliers@Base 0.15
  dogleg_setDebug@Base 0.08
  dogleg_setInitialTrustregion@Base 0.08
  dogleg_setMaxIterations@Base 0.08
diff -Nru libdogleg-0.14/dogleg.c libdogleg-0.15.4/dogleg.c
--- libdogleg-0.14/dogleg.c	2018-02-12 18:55:59.000000000 +0000
+++ libdogleg-0.15.4/dogleg.c	2020-12-11 23:37:34.000000000 +0000
@@ -1,12 +1,13 @@
 // -*- mode: C; c-basic-offset: 2 -*-
 // Copyright 2011 Oblong Industries
-//           2017 Dima Kogan <dima@secretsauce.net>
+//           2017-2018 Dima Kogan <dima@secretsauce.net>
 // License: GNU LGPL <http://www.gnu.org/licenses>.
 
 #include <stdio.h>
 #include <stdarg.h>
 #include <math.h>
 #include <string.h>
+#include <float.h>
 #ifdef __APPLE__
 #include <malloc/malloc.h>
 #else
@@ -19,11 +20,16 @@
 #endif
 
 
+// Any non-vnlog bit mask
+#define DOGLEG_DEBUG_OTHER_THAN_VNLOG (~DOGLEG_DEBUG_VNLOG)
 #define SAY_NONEWLINE(fmt, ...) fprintf(stderr, "libdogleg at %s:%d: " fmt, __FILE__, __LINE__, ## __VA_ARGS__)
 #define SAY(fmt, ...)           do {  SAY_NONEWLINE(fmt, ## __VA_ARGS__); fprintf(stderr, "\n"); } while(0)
 
+// This REQUIRES that a "dogleg_solverContext_t* ctx" be available
+#define SAY_IF_VERBOSE(fmt,...) do { if( ctx->parameters->dogleg_debug & DOGLEG_DEBUG_OTHER_THAN_VNLOG ) SAY(fmt, ##__VA_ARGS__); } while(0)
+
 // I do this myself because I want this to be active in all build modes, not just !NDEBUG
-#define ASSERT(x) do { if(!(x)) { SAY("ASSERTION FAILED: " #x "is not true"); exit(1); } } while(0)
+#define ASSERT(x) do { if(!(x)) { SAY("ASSERTION FAILED: " #x " is not true"); exit(1); } } while(0)
 
 // used to consolidate the casts
 #define P(A, index) ((unsigned int*)((A)->p))[index]
@@ -32,30 +38,105 @@
 
 
 //////////////////////////////////////////////////////////////////////////////////////////
-// parameter stuff
+// vnlog debugging stuff
+//
+// This is used if the user calls dogleg_setDebug(DOGLEG_DEBUG_VNLOG | stuff)
 //////////////////////////////////////////////////////////////////////////////////////////
-// These are the optimizer parameters. They have semi-arbitrary defaults. The
-// user should adjust them through the API
-static int DOGLEG_DEBUG   = 0;
-static int MAX_ITERATIONS = 100;
-
-// it is cheap to reject a too-large trust region, so I start with something
-// "large". The solver will quickly move down to something reasonable. Only the
-// user really knows if this is "large" or not, so they should change this via
-// the API
-static double TRUSTREGION0 = 1.0e3;
-
-// These are probably OK to leave alone. Tweaking them can maybe result in
-// slightly faster convergence
-static double TRUSTREGION_DECREASE_FACTOR    = 0.1;
-static double TRUSTREGION_INCREASE_FACTOR    = 2;
-static double TRUSTREGION_INCREASE_THRESHOLD = 0.75;
-static double TRUSTREGION_DECREASE_THRESHOLD = 0.25;
-
-// The termination thresholds. Documented in the header
-static double JT_X_THRESHOLD        = 1e-8;
-static double UPDATE_THRESHOLD      = 1e-8;
-static double TRUSTREGION_THRESHOLD = 1e-8;
+#define VNLOG_DEBUG_STEP_TYPE_LIST(_)           \
+  _(STEPTYPE_CAUCHY,       "cauchy")            \
+  _(STEPTYPE_GAUSSNEWTON,  "gaussnewton")       \
+  _(STEPTYPE_INTERPOLATED, "interpolated")      \
+  _(STEPTYPE_FAILED,       "failed")
+#define VNLOG_DEBUG_STEP_TYPE_NAME_COMMA(name,shortname) name,
+typedef enum { VNLOG_DEBUG_STEP_TYPE_LIST(VNLOG_DEBUG_STEP_TYPE_NAME_COMMA)
+               STEPTYPE_UNINITIALIZED } vnlog_debug_step_type_t;
+#define VNLOG_DEBUG_FIELDS(_)                                      \
+  _(double,                  norm2x_before,         INFINITY)      \
+  _(double,                  norm2x_after,          INFINITY)      \
+  _(double,                  step_len_cauchy,       INFINITY)      \
+  _(double,                  step_len_gauss_newton, INFINITY)      \
+  _(double,                  step_len_interpolated, INFINITY)      \
+  _(double,                  k_cauchy_to_gn,        INFINITY)      \
+  _(double,                  step_len,              INFINITY)      \
+  _(vnlog_debug_step_type_t, step_type,             STEPTYPE_UNINITIALIZED) \
+  _(double,                  step_direction_change_deg, INFINITY)      \
+  _(double,                  expected_improvement,  INFINITY)      \
+  _(double,                  observed_improvement,  INFINITY)      \
+  _(double,                  rho,                   INFINITY)      \
+  _(double,                  trustregion_before,    INFINITY)      \
+  _(double,                  trustregion_after,     INFINITY)
+static struct vnlog_debug_data_t
+{
+#define VNLOG_DEBUG_DECLARE_FIELD(type, name, initialvalue) type name;
+  VNLOG_DEBUG_FIELDS(VNLOG_DEBUG_DECLARE_FIELD)
+} vnlog_debug_data;
+static void vnlog_debug_reset(void)
+{
+#define VNLOG_DEBUG_RESET_FIELD(type, name, initialvalue) vnlog_debug_data.name = initialvalue;
+  VNLOG_DEBUG_FIELDS(VNLOG_DEBUG_RESET_FIELD);
+}
+static void vnlog_debug_emit_legend(void)
+{
+#define VNLOG_DEBUG_SPACE_FIELD_NAME(type, name, initialvalue) " " #name
+
+  vnlog_debug_reset();
+  printf("# iteration step_accepted" VNLOG_DEBUG_FIELDS(VNLOG_DEBUG_SPACE_FIELD_NAME) "\n");
+}
+__attribute__((unused))
+static void vnlog_emit_double(double x)
+{
+  if(x == INFINITY) printf("- ");
+  else              printf("%g ", x);
+}
+__attribute__((unused))
+static void vnlog_emit_int(int x)
+{
+  if(x == -1) printf("- ");
+  else        printf("%d ", x);
+}
+__attribute__((unused))
+static void vnlog_emit_vnlog_debug_step_type_t(vnlog_debug_step_type_t x)
+{
+#define VNLOG_DEBUG_STEP_TYPE_SWITCH_EMIT(name,shortname) case name: printf(shortname " "); break;
+  switch(x)
+  {
+    VNLOG_DEBUG_STEP_TYPE_LIST(VNLOG_DEBUG_STEP_TYPE_SWITCH_EMIT)
+  default: printf("- ");
+  }
+}
+static void vnlog_debug_emit_record(int iteration, int step_accepted)
+{
+#define VNLOG_DEBUG_EMIT_FIELD(type, name, initialvalue) vnlog_emit_ ## type(vnlog_debug_data.name);
+
+  printf("%d %d ", iteration, step_accepted);
+  VNLOG_DEBUG_FIELDS(VNLOG_DEBUG_EMIT_FIELD);
+  printf("\n");
+  fflush(stdout);
+  vnlog_debug_reset();
+}
+
+// Default parameters. Used only by the original API, which uses a global set of
+// parameters
+#define PARAMETERS_DEFAULT                      \
+  {                                             \
+    .max_iterations                 = 100,      \
+    .dogleg_debug                   = 0,        \
+    .trustregion0                   = 1.0e3,    \
+    .trustregion_decrease_factor    = 0.1,      \
+    .trustregion_decrease_threshold = 0.25,     \
+    .trustregion_increase_factor    = 2,        \
+    .trustregion_increase_threshold = 0.75,     \
+    .Jt_x_threshold                 = 1e-8,     \
+    .update_threshold               = 1e-8,     \
+    .trustregion_threshold          = 1e-8      \
+  }
+
+static const dogleg_parameters2_t parameters_default = PARAMETERS_DEFAULT;
+static       dogleg_parameters2_t parameters_global  = PARAMETERS_DEFAULT;
+void dogleg_getDefaultParameters(dogleg_parameters2_t* parameters)
+{
+  *parameters = parameters_default;
+}
 
 // if I ever see a singular JtJ, I factor JtJ + LAMBDA*I from that point on
 #define LAMBDA_INITIAL 1e-10
@@ -63,31 +144,31 @@
 // these parameters likely should be messed with
 void dogleg_setDebug(int debug)
 {
-  DOGLEG_DEBUG = debug;
+  parameters_global.dogleg_debug = debug;
 }
 void dogleg_setInitialTrustregion(double t)
 {
-  TRUSTREGION0 = t;
+  parameters_global.trustregion0 = t;
 }
 void dogleg_setThresholds(double Jt_x, double update, double trustregion)
 {
-  if(Jt_x > 0.0)        JT_X_THRESHOLD        = Jt_x;
-  if(update > 0.0)      UPDATE_THRESHOLD      = update;
-  if(trustregion > 0.0) TRUSTREGION_THRESHOLD = trustregion;
+  if(Jt_x > 0.0)        parameters_global.Jt_x_threshold        = Jt_x;
+  if(update > 0.0)      parameters_global.update_threshold      = update;
+  if(trustregion > 0.0) parameters_global.trustregion_threshold = trustregion;
 }
 
 // these parameters likely should not be messed with.
 void dogleg_setMaxIterations(int n)
 {
-  MAX_ITERATIONS = n;
+  parameters_global.max_iterations = n;
 }
 void dogleg_setTrustregionUpdateParameters(double downFactor, double downThreshold,
                                            double upFactor,   double upThreshold)
 {
-  TRUSTREGION_DECREASE_FACTOR    = downFactor;
-  TRUSTREGION_DECREASE_THRESHOLD = downThreshold;
-  TRUSTREGION_INCREASE_FACTOR    = upFactor;
-  TRUSTREGION_INCREASE_THRESHOLD = upThreshold;
+  parameters_global.trustregion_decrease_factor    = downFactor;
+  parameters_global.trustregion_decrease_threshold = downThreshold;
+  parameters_global.trustregion_increase_factor    = upFactor;
+  parameters_global.trustregion_increase_threshold = upThreshold;
 }
 
 
@@ -134,12 +215,20 @@
   for(int i=0; i<n; i++)
     dest[i] = scale * v[i];
 }
+__attribute__((unused))
 static void vec_add(double* dest,
                     const double* v0, const double* v1, int n)
 {
   for(int i=0; i<n; i++)
     dest[i] = v0[i] + v1[i];
 }
+__attribute__((unused))
+static void vec_accum(double* dest,
+                      const double* v, int n)
+{
+  for(int i=0; i<n; i++)
+    dest[i] += v[i];
+}
 static void vec_negate(double* v, int n)
 {
   for(int i=0; i<n; i++)
@@ -361,37 +450,39 @@
                                 const dogleg_solverContext_t* ctx)
 {
   // I already have this data, so don't need to recompute
-  if(point->updateCauchy_valid)
-    return;
-  point->updateCauchy_valid = 1;
-
-  // I look at a step in the steepest direction that minimizes my
-  // quadratic error function (Cauchy point). If this is past my trust region,
-  // I move as far as the trust region allows along the steepest descent
-  // direction. My error function is F=norm2(f(p)). dF/dP = 2*ft*J.
-  // This is proportional to Jt_x, which is thus the steepest ascent direction.
-  //
-  // Thus along this direction we have F(k) = norm2(f(p + k Jt_x)). The Cauchy
-  // point is where F(k) is at a minimum:
-  // dF_dk = 2 f(p + k Jt_x)t  J Jt_x ~ (x + k J Jt_x)t J Jt_x =
-  // = xt J Jt x + k xt J Jt J Jt x = norm2(Jt x) + k norm2(J Jt x)
-  // dF_dk = 0 -> k= -norm2(Jt x) / norm2(J Jt x)
-  // Summary:
-  // the steepest direction is parallel to Jt*x. The Cauchy point is at
-  // k*Jt*x where k = -norm2(Jt*x)/norm2(J*Jt*x)
-  double norm2_Jt_x       = norm2(point->Jt_x, ctx->Nstate);
-  double norm2_J_Jt_x     = ctx->is_sparse ?
-    norm2_mul_spmatrix_t_densevector(point->Jt, point->Jt_x) :
-    norm2_mul_matrix_vector         (point->J_dense, point->Jt_x, ctx->Nmeasurements, ctx->Nstate);
-  double k                = -norm2_Jt_x / norm2_J_Jt_x;
-
-  point->updateCauchy_lensq = k*k * norm2_Jt_x;
+  if(!point->updateCauchy_valid)
+  {
+    point->updateCauchy_valid = 1;
 
-  vec_copy_scaled(point->updateCauchy,
-                  point->Jt_x, k, ctx->Nstate);
+    // I look at a step in the steepest direction that minimizes my
+    // quadratic error function (Cauchy point). If this is past my trust region,
+    // I move as far as the trust region allows along the steepest descent
+    // direction. My error function is F=norm2(f(p)). dF/dP = 2*ft*J.
+    // This is proportional to Jt_x, which is thus the steepest ascent direction.
+    //
+    // Thus along this direction we have F(k) = norm2(f(p + k Jt_x)). The Cauchy
+    // point is where F(k) is at a minimum:
+    // dF_dk = 2 f(p + k Jt_x)t  J Jt_x ~ (x + k J Jt_x)t J Jt_x =
+    // = xt J Jt x + k xt J Jt J Jt x = norm2(Jt x) + k norm2(J Jt x)
+    // dF_dk = 0 -> k= -norm2(Jt x) / norm2(J Jt x)
+    // Summary:
+    // the steepest direction is parallel to Jt*x. The Cauchy point is at
+    // k*Jt*x where k = -norm2(Jt*x)/norm2(J*Jt*x)
+    double norm2_Jt_x       = norm2(point->Jt_x, ctx->Nstate);
+    double norm2_J_Jt_x     = ctx->is_sparse ?
+      norm2_mul_spmatrix_t_densevector(point->Jt, point->Jt_x) :
+      norm2_mul_matrix_vector         (point->J_dense, point->Jt_x, ctx->Nmeasurements, ctx->Nstate);
+    double k                = -norm2_Jt_x / norm2_J_Jt_x;
+
+    point->updateCauchy_lensq = k*k * norm2_Jt_x;
+
+    vec_copy_scaled(point->updateCauchy,
+                    point->Jt_x, k, ctx->Nstate);
+    SAY_IF_VERBOSE( "cauchy step size %.6g", sqrt(point->updateCauchy_lensq));
+  }
 
-  if( DOGLEG_DEBUG )
-    SAY( "cauchy step size %.6g", sqrt(point->updateCauchy_lensq));
+  if( ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG )
+    vnlog_debug_data.step_len_cauchy = sqrt(point->updateCauchy_lensq);
 }
 
 // LAPACK prototypes for a packed cholesky factorization and a linear solve
@@ -438,9 +529,8 @@
       else                    ctx->lambda *= 10.0;
       ASSERT( isfinite(ctx->lambda) );
 
-      if( DOGLEG_DEBUG )
-        SAY( "singular JtJ. Have rank/full rank: %zd/%d. Adding %g I from now on",
-             ctx->factorization->minor, ctx->Nstate, ctx->lambda);
+      SAY_IF_VERBOSE( "singular JtJ. Have rank/full rank: %zd/%d. Adding %g I from now on",
+                      ctx->factorization->minor, ctx->Nstate, ctx->lambda);
     }
   }
   else
@@ -503,8 +593,7 @@
       if( ctx->lambda == 0.0) ctx->lambda = LAMBDA_INITIAL;
       else                    ctx->lambda *= 10.0;
 
-      if( DOGLEG_DEBUG )
-        SAY( "singular JtJ. Adding %g I from now on", ctx->lambda);
+      SAY_IF_VERBOSE( "singular JtJ. Adding %g I from now on", ctx->lambda);
     }
   }
 }
@@ -512,58 +601,60 @@
 static void computeGaussNewtonUpdate(dogleg_operatingPoint_t* point, dogleg_solverContext_t* ctx)
 {
   // I already have this data, so don't need to recompute
-  if(point->updateGN_valid)
-    return;
+  if(!point->updateGN_valid)
+  {
+    dogleg_computeJtJfactorization(point, ctx);
 
-  dogleg_computeJtJfactorization(point, ctx);
+    // try to factorize the matrix directly. If it's singular, add a small
+    // constant to the diagonal. This constant gets larger if we keep being
+    // singular
+    if( ctx->is_sparse )
+    {
+      // solve JtJ*updateGN = Jt*x. Gauss-Newton step is then -updateGN
+      cholmod_dense Jt_x_dense = {.nrow  = ctx->Nstate,
+                                  .ncol  = 1,
+                                  .nzmax = ctx->Nstate,
+                                  .d     = ctx->Nstate,
+                                  .x     = point->Jt_x,
+                                  .xtype = CHOLMOD_REAL,
+                                  .dtype = CHOLMOD_DOUBLE};
+
+      if(point->updateGN_cholmoddense != NULL)
+        cholmod_free_dense(&point->updateGN_cholmoddense, &ctx->common);
+
+      point->updateGN_cholmoddense = cholmod_solve(CHOLMOD_A,
+                                                   ctx->factorization,
+                                                   &Jt_x_dense,
+                                                   &ctx->common);
+      vec_negate(point->updateGN_cholmoddense->x,
+                 ctx->Nstate); // should be more efficient than this later
 
-  // try to factorize the matrix directly. If it's singular, add a small
-  // constant to the diagonal. This constant gets larger if we keep being
-  // singular
-  if( ctx->is_sparse )
-  {
-    // solve JtJ*updateGN = Jt*x. Gauss-Newton step is then -updateGN
-    cholmod_dense Jt_x_dense = {.nrow  = ctx->Nstate,
-                                .ncol  = 1,
-                                .nzmax = ctx->Nstate,
-                                .d     = ctx->Nstate,
-                                .x     = point->Jt_x,
-                                .xtype = CHOLMOD_REAL,
-                                .dtype = CHOLMOD_DOUBLE};
-
-    if(point->updateGN_cholmoddense != NULL)
-      cholmod_free_dense(&point->updateGN_cholmoddense, &ctx->common);
-
-    point->updateGN_cholmoddense = cholmod_solve(CHOLMOD_A,
-                                                 ctx->factorization,
-                                                 &Jt_x_dense,
-                                                 &ctx->common);
-    vec_negate(point->updateGN_cholmoddense->x,
-               ctx->Nstate); // should be more efficient than this later
+      point->updateGN_lensq = norm2(point->updateGN_cholmoddense->x, ctx->Nstate);
+    }
+    else
+    {
+      memcpy( point->updateGN_dense, point->Jt_x, ctx->Nstate * sizeof(point->updateGN_dense[0]));
+      int info;
+      dpptrs_(&(char){'L'}, &(int){ctx->Nstate}, &(int){1},
+              ctx->factorization_dense,
+              point->updateGN_dense, &(int){ctx->Nstate}, &info, 1);
+      vec_negate(point->updateGN_dense,
+                 ctx->Nstate); // should be more efficient than this later
 
-    point->updateGN_lensq = norm2(point->updateGN_cholmoddense->x, ctx->Nstate);
-  }
-  else
-  {
-    memcpy( point->updateGN_dense, point->Jt_x, ctx->Nstate * sizeof(point->updateGN_dense[0]));
-    int info;
-    dpptrs_(&(char){'L'}, &(int){ctx->Nstate}, &(int){1},
-            ctx->factorization_dense,
-            point->updateGN_dense, &(int){ctx->Nstate}, &info, 1);
-    vec_negate(point->updateGN_dense,
-               ctx->Nstate); // should be more efficient than this later
+      point->updateGN_lensq = norm2(point->updateGN_dense, ctx->Nstate);
+    }
 
-    point->updateGN_lensq = norm2(point->updateGN_dense, ctx->Nstate);
+    SAY_IF_VERBOSE( "gn step size %.6g", sqrt(point->updateGN_lensq));
+    point->updateGN_valid = 1;
   }
 
+  if( ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG )
+    vnlog_debug_data.step_len_gauss_newton = sqrt(point->updateGN_lensq);
 
-  if( DOGLEG_DEBUG )
-    SAY( "gn step size %.6g", sqrt(point->updateGN_lensq));
-
-  point->updateGN_valid = 1;
 }
 
 static void computeInterpolatedUpdate(double*                  update_dogleg,
+                                      double*                  update_dogleg_lensq,
                                       dogleg_operatingPoint_t* point,
                                       double                   trustregion,
                                       const dogleg_solverContext_t* ctx)
@@ -606,13 +697,20 @@
   }
   double k = (neg_c + sqrt(discriminant))/l2;
 
+  *update_dogleg_lensq = 0.0;
   for(int i=0; i<ctx->Nstate; i++)
+  {
     update_dogleg[i] = a[i] + k*(b[i] - a[i]);
+    *update_dogleg_lensq += update_dogleg[i]*update_dogleg[i];
+  }
 
-  if( DOGLEG_DEBUG )
+  SAY_IF_VERBOSE( "k_cauchy_to_gn %.6g, norm %.6g",
+                  k,
+                  sqrt(*update_dogleg_lensq));
+  if(ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG)
   {
-    double updateNorm = norm2(update_dogleg, ctx->Nstate);
-    SAY( "k_cauchy_to_gn %.6g, norm %.6g", k, sqrt(updateNorm));
+    vnlog_debug_data.step_len_interpolated = sqrt(*update_dogleg_lensq);
+    vnlog_debug_data.k_cauchy_to_gn        = k;
   }
 }
 
@@ -649,10 +747,9 @@
   // If the largest absolute gradient element is smaller than the threshold,
   // we can stop iterating. This is equivalent to the inf-norm
   for(int i=0; i<ctx->Nstate; i++)
-    if(fabs(point->Jt_x[i]) > JT_X_THRESHOLD)
+    if(fabs(point->Jt_x[i]) > ctx->parameters->Jt_x_threshold)
       return 0;
-  if( DOGLEG_DEBUG )
-    SAY( "Jt_x all below the threshold. Done iterating!");
+  SAY_IF_VERBOSE( "Jt_x all below the threshold. Done iterating!");
 
   return 1;
 }
@@ -676,30 +773,38 @@
 // takes a step from the given operating point, using the given trust region
 // radius. Returns the expected improvement, based on the step taken and the
 // linearized x(p). If we can stop iterating, returns a negative number
-static double takeStepFrom(dogleg_operatingPoint_t* pointFrom, double* newp,
-                           double trustregion, dogleg_solverContext_t* ctx)
+static double takeStepFrom(dogleg_operatingPoint_t* pointFrom,
+                           double* p_new,
+                           double* step,
+                           double* step_len_sq,
+                           double trustregion,
+                           dogleg_solverContext_t* ctx)
 {
-  if( DOGLEG_DEBUG )
-    SAY( "taking step with trustregion %.6g", trustregion);
-
-  double update_array[ctx->Nstate];
-  double* update;
-
+  SAY_IF_VERBOSE( "taking step with trustregion %.6g", trustregion);
+  if(ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG)
+  {
+    vnlog_debug_data.trustregion_before = trustregion;
+    vnlog_debug_data.norm2x_before      = pointFrom->norm2_x;
+  }
 
   computeCauchyUpdate(pointFrom, ctx);
 
   if(pointFrom->updateCauchy_lensq >= trustregion*trustregion)
   {
-    if( DOGLEG_DEBUG )
-      SAY( "taking cauchy step");
+    SAY_IF_VERBOSE( "taking cauchy step");
+    if(ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG)
+    {
+      vnlog_debug_data.step_type = STEPTYPE_CAUCHY;
+      vnlog_debug_data.step_len  = vnlog_debug_data.step_len_cauchy;
+    }
+    *step_len_sq = pointFrom->updateCauchy_lensq;
 
     // cauchy step goes beyond my trust region, so I do a gradient descent
     // to the edge of my trust region and call it good
-    vec_copy_scaled(update_array,
+    vec_copy_scaled(step,
                     pointFrom->updateCauchy,
                     trustregion / sqrt(pointFrom->updateCauchy_lensq),
                     ctx->Nstate);
-    update = update_array;
     pointFrom->didStepToEdgeOfTrustRegion = 1;
   }
   else
@@ -713,41 +818,69 @@
     computeGaussNewtonUpdate(pointFrom, ctx);
     if(pointFrom->updateGN_lensq <= trustregion*trustregion)
     {
-      if( DOGLEG_DEBUG )
-        SAY( "taking GN step");
+      SAY_IF_VERBOSE( "taking GN step");
+      if(ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG)
+      {
+        vnlog_debug_data.step_type = STEPTYPE_GAUSSNEWTON;
+        vnlog_debug_data.step_len  = vnlog_debug_data.step_len_gauss_newton;
+      }
+      *step_len_sq = pointFrom->updateGN_lensq;
 
       // full Gauss-Newton step lies within my trust region. Take the full step
-      update = ctx->is_sparse ? pointFrom->updateGN_cholmoddense->x : pointFrom->updateGN_dense;
+      memcpy( step,
+              ctx->is_sparse ? pointFrom->updateGN_cholmoddense->x : pointFrom->updateGN_dense,
+              ctx->Nstate * sizeof(step[0]) );
       pointFrom->didStepToEdgeOfTrustRegion = 0;
     }
     else
     {
-      if( DOGLEG_DEBUG )
-        SAY( "taking interpolated step");
+      SAY_IF_VERBOSE( "taking interpolated step");
 
       // full Gauss-Newton step lies outside my trust region, so I interpolate
       // between the Cauchy-point step and the Gauss-Newton step to find a step
       // that takes me to the edge of my trust region.
-      computeInterpolatedUpdate(update_array, pointFrom, trustregion, ctx);
-      update = update_array;
+      computeInterpolatedUpdate(step,
+                                step_len_sq,
+                                pointFrom, trustregion, ctx);
       pointFrom->didStepToEdgeOfTrustRegion = 1;
+      if(ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG)
+      {
+        vnlog_debug_data.step_type = STEPTYPE_INTERPOLATED;
+        vnlog_debug_data.step_len  = vnlog_debug_data.step_len_interpolated;
+      }
     }
   }
 
-
-
   // take the step
-  vec_add(newp, pointFrom->p, update, ctx->Nstate);
-  double expectedImprovement = computeExpectedImprovement(update, pointFrom, ctx);
+  vec_add(p_new, pointFrom->p, step, ctx->Nstate);
+  double expectedImprovement = computeExpectedImprovement(step, pointFrom, ctx);
+  if(ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG)
+  {
+    vnlog_debug_data.expected_improvement = expectedImprovement;
+
+    if(pointFrom->step_to_here_len_sq != INFINITY)
+    {
+      double cos_direction_change =
+        inner(step, pointFrom->step_to_here, ctx->Nstate) /
+        sqrt(*step_len_sq * pointFrom->step_to_here_len_sq);
+
+      // check the numerical overflow cases
+      if(cos_direction_change >= 1.0)
+        vnlog_debug_data.step_direction_change_deg = 0.0;
+      else if(cos_direction_change <= -1.0)
+        vnlog_debug_data.step_direction_change_deg = 180.0;
+      else
+        vnlog_debug_data.step_direction_change_deg = 180.0/M_PI*acos(cos_direction_change);
+    }
+  }
 
   // are we done? For each state variable I look at the update step. If all the elements fall below
   // a threshold, I call myself done
   for(int i=0; i<ctx->Nstate; i++)
-    if( fabs(update[i]) > UPDATE_THRESHOLD )
+    if( fabs(step[i]) > ctx->parameters->update_threshold )
       return expectedImprovement;
 
-  if( DOGLEG_DEBUG )
-    SAY( "update small enough. Done iterating!");
+  SAY_IF_VERBOSE( "update small enough. Done iterating!");
 
   return -1.0;
 }
@@ -759,23 +892,24 @@
 static int evaluateStep_adjustTrustRegion(const dogleg_operatingPoint_t* before,
                                           const dogleg_operatingPoint_t* after,
                                           double* trustregion,
-                                          double expectedImprovement)
+                                          double expectedImprovement,
+                                          dogleg_solverContext_t* ctx)
 {
   double observedImprovement = before->norm2_x - after->norm2_x;
-
   double rho = observedImprovement / expectedImprovement;
-  if( DOGLEG_DEBUG )
+  SAY_IF_VERBOSE( "observed/expected improvement: %.6g/%.6g. rho = %.6g",
+                  observedImprovement, expectedImprovement, rho);
+  if(ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG)
   {
-    SAY( "observed/expected improvement: %.6g/%.6g. rho = %.6g",
-            observedImprovement, expectedImprovement, rho);
+    vnlog_debug_data.observed_improvement = observedImprovement;
+    vnlog_debug_data.rho                  = rho;
   }
 
 
   // adjust the trust region
-  if( rho < TRUSTREGION_DECREASE_THRESHOLD )
+  if( rho < ctx->parameters->trustregion_decrease_threshold )
   {
-    if( DOGLEG_DEBUG )
-      SAY( "rho too small. decreasing trust region");
+    SAY_IF_VERBOSE( "rho too small. decreasing trust region");
 
     // Our model doesn't fit well. We should reduce the trust region size. If
     // the trust region size was affecting the attempted step, do this by a
@@ -784,60 +918,68 @@
     if( !before->didStepToEdgeOfTrustRegion )
       *trustregion = sqrt(before->updateGN_lensq);
 
-    *trustregion *= TRUSTREGION_DECREASE_FACTOR;
+    *trustregion *= ctx->parameters->trustregion_decrease_factor;
   }
-  else if (rho > TRUSTREGION_INCREASE_THRESHOLD && before->didStepToEdgeOfTrustRegion)
+  else if (rho > ctx->parameters->trustregion_increase_threshold && before->didStepToEdgeOfTrustRegion)
   {
-    if( DOGLEG_DEBUG )
-      SAY( "rho large enough. increasing trust region");
+    SAY_IF_VERBOSE( "rho large enough. increasing trust region");
 
-    *trustregion *= TRUSTREGION_INCREASE_FACTOR;
+    *trustregion *= ctx->parameters->trustregion_increase_factor;
   }
+  if(ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG)
+    vnlog_debug_data.trustregion_after = *trustregion;
 
   return rho > 0.0;
 }
 
 static int runOptimizer(dogleg_solverContext_t* ctx)
 {
-  double trustregion = TRUSTREGION0;
+  double trustregion = ctx->parameters->trustregion0;
   int stepCount = 0;
 
   if( computeCallbackOperatingPoint(ctx->beforeStep, ctx) )
     return stepCount;
 
-  if( DOGLEG_DEBUG )
-    SAY( "Initial operating point has norm2_x %.6g", ctx->beforeStep->norm2_x);
+  SAY_IF_VERBOSE( "Initial operating point has norm2_x %.6g", ctx->beforeStep->norm2_x);
 
 
-  while( stepCount<MAX_ITERATIONS )
+  ctx->beforeStep->step_to_here_len_sq = INFINITY;
+
+  while( stepCount<ctx->parameters->max_iterations )
   {
-    if( DOGLEG_DEBUG )
-    {
-      SAY( "================= step %d", stepCount );
-    }
+    SAY_IF_VERBOSE( "================= step %d", stepCount );
 
     while(1)
     {
-      if( DOGLEG_DEBUG )
-        SAY("--------");
+      SAY_IF_VERBOSE("--------");
 
       double expectedImprovement =
-        takeStepFrom(ctx->beforeStep, ctx->afterStep->p, trustregion, ctx);
+        takeStepFrom(ctx->beforeStep,
+                     ctx->afterStep->p,
+                     ctx->afterStep->step_to_here,
+                     &ctx->afterStep->step_to_here_len_sq,
+                     trustregion, ctx);
 
       // negative expectedImprovement is used to indicate that we're done
       if(expectedImprovement < 0.0)
+      {
+        if( ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG )
+          vnlog_debug_emit_record(stepCount, 1);
         return stepCount;
+      }
 
       int afterStepZeroGradient = computeCallbackOperatingPoint(ctx->afterStep, ctx);
-      if( DOGLEG_DEBUG )
-        SAY( "Evaluated operating point with norm2_x %.6g", ctx->afterStep->norm2_x);
+      SAY_IF_VERBOSE( "Evaluated operating point with norm2_x %.6g", ctx->afterStep->norm2_x);
+      if(ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG)
+        vnlog_debug_data.norm2x_after = ctx->afterStep->norm2_x;
 
       if( evaluateStep_adjustTrustRegion(ctx->beforeStep, ctx->afterStep, &trustregion,
-                                         expectedImprovement) )
+                                         expectedImprovement, ctx) )
       {
-        if( DOGLEG_DEBUG )
-          SAY( "accepted step");
+        SAY_IF_VERBOSE( "accepted step");
 
+        if( ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG )
+          vnlog_debug_emit_record(stepCount, 1);
         stepCount++;
 
         // I accept this step, so the after-step operating point is the before-step operating point
@@ -850,8 +992,7 @@
 
         if( afterStepZeroGradient )
         {
-          if( DOGLEG_DEBUG )
-            SAY( "Gradient low enough and we just improved. Done iterating!" );
+          SAY_IF_VERBOSE( "Gradient low enough and we just improved. Done iterating!" );
 
           return stepCount;
         }
@@ -859,16 +1000,15 @@
         break;
       }
 
-      if( DOGLEG_DEBUG )
-        SAY( "rejected step");
+      SAY_IF_VERBOSE( "rejected step");
+      if( ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG )
+        vnlog_debug_emit_record(stepCount, 0);
 
       // This step was rejected. check if the new trust region size is small
       // enough to give up
-      if(trustregion < TRUSTREGION_THRESHOLD)
+      if(trustregion < ctx->parameters->trustregion_threshold)
       {
-        if( DOGLEG_DEBUG )
-          SAY( "trust region small enough. Giving up. Done iterating!");
-
+        SAY_IF_VERBOSE( "trust region small enough. Giving up. Done iterating!");
         return stepCount;
       }
 
@@ -876,14 +1016,14 @@
     }
   }
 
-  if( DOGLEG_DEBUG && stepCount == MAX_ITERATIONS)
-      SAY( "Exceeded max number of iterations");
+  if(stepCount == ctx->parameters->max_iterations)
+      SAY_IF_VERBOSE( "Exceeded max number of iterations");
 
   return stepCount;
 }
 
 static
-dogleg_operatingPoint_t* allocOperatingPoint(int      Nstate, int numMeasurements,
+dogleg_operatingPoint_t* allocOperatingPoint(int Nstate, int numMeasurements,
                                              unsigned int NJnnz,
                                              cholmod_common* common)
 {
@@ -892,11 +1032,30 @@
   dogleg_operatingPoint_t* point = malloc(sizeof(dogleg_operatingPoint_t));
   ASSERT(point != NULL);
 
-  point->p = malloc(Nstate * sizeof(point->p[0]));
-  ASSERT(point->p != NULL);
 
-  point->x = malloc(numMeasurements * sizeof(point->x[0]));
-  ASSERT(point->x != NULL);
+  int Npool =
+    Nstate          +
+    numMeasurements +
+    Nstate          +
+    Nstate          +
+    Nstate;
+  if(!is_sparse)
+    Npool += numMeasurements*Nstate + Nstate;
+
+  double* pool = malloc( Npool * sizeof(double) );
+  ASSERT( pool != NULL );
+
+  point->p            = &pool[0];
+  point->x            = &pool[Nstate];
+  point->Jt_x         = &pool[Nstate +
+                              numMeasurements];
+  point->updateCauchy = &pool[Nstate +
+                              numMeasurements +
+                              Nstate];
+  point->step_to_here = &pool[Nstate +
+                              numMeasurements +
+                              Nstate +
+                              Nstate];
 
   if( is_sparse )
   {
@@ -911,20 +1070,19 @@
   }
   else
   {
-    point->J_dense = malloc( numMeasurements * Nstate * sizeof(point->J_dense[0]) );
-    ASSERT(point->J_dense != NULL);
-
-    point->updateGN_dense = malloc( Nstate * sizeof(point->updateGN_dense[0]) );
-    ASSERT(point->updateGN_dense != NULL);
+    point->J_dense = &pool[Nstate +
+                           numMeasurements +
+                           Nstate +
+                           Nstate +
+                           Nstate];
+    point->updateGN_dense = &pool[Nstate +
+                                  numMeasurements +
+                                  Nstate +
+                                  Nstate +
+                                  Nstate +
+                                  numMeasurements * Nstate];
   }
 
-  // the 1-column vector Jt * x
-  point->Jt_x = malloc(Nstate * sizeof(point->Jt_x[0]));
-  ASSERT(point->Jt_x != NULL);
-
-  // the cached update vectors
-  point->updateCauchy          = malloc(Nstate * sizeof(point->updateCauchy[0]));
-
   // vectors don't have any valid data yet
   point->updateCauchy_valid = 0;
   point->updateGN_valid     = 0;
@@ -934,8 +1092,9 @@
 
 static void freeOperatingPoint(dogleg_operatingPoint_t** point, cholmod_common* common)
 {
+  // MUST match allocOperatingPoint(). It does a single malloc() and stores the
+  // pointer into p
   free((*point)->p);
-  free((*point)->x);
 
   int is_sparse = common != NULL;
 
@@ -946,14 +1105,6 @@
     if((*point)->updateGN_cholmoddense != NULL)
       cholmod_free_dense(&(*point)->updateGN_cholmoddense, common);
   }
-  else
-  {
-    free( (*point)->J_dense );
-    free( (*point)->updateGN_dense );
-  }
-
-  free((*point)->Jt_x);
-  free((*point)->updateCauchy);
 
   free(*point);
   *point = NULL;
@@ -967,7 +1118,7 @@
   va_start(ap, s);
   int ret = vfprintf(stderr, s, ap);
   va_end(ap);
-
+  fprintf(stderr, "\n");
   return ret;
 }
 
@@ -1009,6 +1160,9 @@
                                unsigned int Nmeas, unsigned int NJnnz,
                                dogleg_callback_t* f,
                                void* cookie,
+
+                               // NULL to use the globals
+                               const dogleg_parameters2_t* parameters,
                                dogleg_solverContext_t** returnContext)
 {
   int is_sparse = NJnnz > 0;
@@ -1021,7 +1175,10 @@
   ctx->lambda                 = 0.0;
   ctx->Nstate                 = Nstate;
   ctx->Nmeasurements          = Nmeas;
+  ctx->parameters             = parameters ? parameters : &parameters_global;
 
+  if( ctx->parameters->dogleg_debug & DOGLEG_DEBUG_VNLOG )
+    vnlog_debug_emit_legend();
 
   if( returnContext != NULL )
     *returnContext = ctx;
@@ -1052,20 +1209,20 @@
   // iteration
   memcpy(p, ctx->beforeStep->p, Nstate * sizeof(double));
 
+  SAY_IF_VERBOSE( "success! took %d iterations", numsteps);
+
   if( returnContext == NULL )
     dogleg_freeContext(&ctx);
 
-  if( DOGLEG_DEBUG )
-    SAY( "success! took %d iterations", numsteps);
-
   return norm2_x;
 }
 
-double dogleg_optimize(double* p, unsigned int Nstate,
-                       unsigned int Nmeas, unsigned int NJnnz,
-                       dogleg_callback_t* f,
-                       void* cookie,
-                       dogleg_solverContext_t** returnContext)
+double dogleg_optimize2(double* p, unsigned int Nstate,
+                        unsigned int Nmeas, unsigned int NJnnz,
+                        dogleg_callback_t* f,
+                        void* cookie,
+                        const dogleg_parameters2_t* parameters,
+                        dogleg_solverContext_t** returnContext)
 {
   if( NJnnz == 0 )
   {
@@ -1074,15 +1231,1289 @@
   }
 
   return _dogleg_optimize(p, Nstate, Nmeas, NJnnz, f,
-                          cookie, returnContext);
+                          cookie,
+                          parameters,
+                          returnContext);
+}
+
+double dogleg_optimize(double* p, unsigned int Nstate,
+                       unsigned int Nmeas, unsigned int NJnnz,
+                       dogleg_callback_t* f,
+                       void* cookie,
+                       dogleg_solverContext_t** returnContext)
+{
+  return dogleg_optimize2(p,Nstate,Nmeas,NJnnz,f,cookie,
+                          NULL, // no parameters; use the globals
+                          returnContext);
 }
 
 
+double dogleg_optimize_dense2(double* p, unsigned int Nstate,
+                              unsigned int Nmeas,
+                              dogleg_callback_dense_t* f, void* cookie,
+                              const dogleg_parameters2_t* parameters,
+                              dogleg_solverContext_t** returnContext)
+{
+  return _dogleg_optimize(p, Nstate, Nmeas, 0, (dogleg_callback_t*)f,
+                          cookie,
+                          parameters,
+                          returnContext);
+}
 double dogleg_optimize_dense(double* p, unsigned int Nstate,
                              unsigned int Nmeas,
                              dogleg_callback_dense_t* f, void* cookie,
                              dogleg_solverContext_t** returnContext)
 {
-  return _dogleg_optimize(p, Nstate, Nmeas, 0, (dogleg_callback_t*)f,
-                          cookie, returnContext);
+  return dogleg_optimize_dense2(p,Nstate,Nmeas,f,cookie,
+                                NULL, // no parameters; use the globals
+                                returnContext);
+}
+
+
+
+
+
+
+
+// Computes pinv(J) for a subset of measurements: inv(JtJ) *
+// Jt[imeasurement0..imeasurement0+N-1]. Returns false if something failed.
+// ASSUMES THAT THE CHOLESKY FACTORIZATION HAS ALREADY BEEN COMPUTED.
+//
+// This function is experimental, and subject to change
+static bool pseudoinverse_J_dense(// output
+                                  double* out,
+
+                                  // inputs
+                                  const dogleg_operatingPoint_t* point,
+                                  const dogleg_solverContext_t* ctx,
+                                  int i_meas0, int NmeasInChunk)
+{
+  int info;
+  memcpy(out,
+         &point->J_dense[i_meas0*ctx->Nstate],
+         NmeasInChunk*ctx->Nstate*sizeof(double));
+  dpptrs_(&(char){'L'}, &(int){ctx->Nstate}, &NmeasInChunk,
+          ctx->factorization_dense,
+          out,
+          &(int){ctx->Nstate}, &info, 1);
+  return info==0;
+}
+
+// Computes pinv(J) for a subset of measurements: inv(JtJ) *
+// Jt[imeasurement0..imeasurement0+N-1]. Returns false if something failed.
+// ASSUMES THAT THE CHOLESKY FACTORIZATION HAS ALREADY BEEN COMPUTED.
+//
+// allocates memory, returns NULL on failure. ON SUCCESS, THE CALLER IS
+// RESPONSIBLE FOR FREEING THE RETURNED MEMORY
+//
+// This function is experimental, and subject to change
+static cholmod_dense* pseudoinverse_J_sparse(// inputs
+                                             const dogleg_operatingPoint_t* point,
+                                             dogleg_solverContext_t* ctx,
+                                             int i_meas0, int NmeasInChunk,
+
+                                             // Pre-allocated array for the
+                                             // right-hand-side. This will be used as a
+                                             // workspace. Create this like so:
+                                             //
+                                             //   cholmod_allocate_dense( Nstate,
+                                             //                           NmeasInChunk,
+                                             //                           Nstate,
+                                             //                           CHOLMOD_REAL,
+                                             //                           &ctx->common );
+
+                                             cholmod_dense* Jt_chunk)
+{
+  // I'm solving JtJ x = b where J is sparse, b is sparse, but x ends up dense.
+  // cholmod doesn't have functions for this exact case. so I use the
+  // dense-sparse-dense function (cholmod_solve), and densify the input. Instead
+  // of sparse-sparse-sparse and the densifying the output (cholmod_spsolve).
+  // This feels like it'd be more efficient
+
+  memset( Jt_chunk->x, 0, Jt_chunk->nrow*Jt_chunk->ncol*sizeof(double) );
+  int Jt_chunk_ncol_backup = Jt_chunk->ncol;
+  for(int i_meas=0; i_meas<NmeasInChunk; i_meas++)
+  {
+    if( i_meas0 + i_meas >= (int)ctx->Nmeasurements )
+    {
+      // at the end, we could have one chunk with less that chunk_size
+      // columns
+      Jt_chunk->ncol = i_meas;
+      break;
+    }
+
+    for(unsigned int i0=P(point->Jt, i_meas0+i_meas); i0<P(point->Jt, i_meas0+i_meas+1); i0++)
+    {
+      int irow = I(point->Jt,i0);
+      double x = X(point->Jt,i0);
+      ((double*)Jt_chunk->x)[irow + i_meas*Jt_chunk->nrow] = x;
+    }
+  }
+
+  // solve inv(JtJ)Jt[slice]
+  cholmod_dense* pinv =
+    cholmod_solve(CHOLMOD_A,
+                  ctx->factorization,
+                  Jt_chunk,
+                  &ctx->common);
+  Jt_chunk->ncol = Jt_chunk_ncol_backup;
+  return pinv;
+}
+
+
+/*
+The below is a bunch of code for outlier detection/rejection and for confidence
+evaluation. IT'S ALL EXPERIMENTAL AND SUBJECT TO CHANGE
+
+What is an outlier? Suppose I just found an optimum. I define an outlier as an
+observation that does two things to the problem if I remove that observation:
+
+1. The cost function would improve significantly. Things would clearly improve
+   because the cost function contribution of the removed point itself would get
+   removed, but ALSO because the parameters could fit the remaining data better
+   without that extra observation; the "outlier" wouldn't pull the solution away
+   from the "true" optimum.
+
+2. The confidence of the solution does not significantly decrease. One could
+   imagine a set of data that define the problem poorly, and produce a low cost
+   function value for some (overfit) set of parameters. And one can imagine an
+   extra point being added that defines the problem and increases the confidence
+   of the solution. This extra point would suppress the overfitting, so this
+   extra point would increase the cost function value. Condition 1 above would
+   make this extra point look like an outlier, and this condition is meant to
+   detect this case and to classify this point as NOT an outlier
+
+Let's say we just computed an optimum least-squares fit, and we try to determine
+if some of the data points look like outliers. The least squares problem I just
+solved has cost function
+
+  E = norm2(x)
+
+where x is a length-N vector of measurements. We solved it by optimizing the
+vector of parameters p. We're at an optimum, so dE/dp = 0 -> Jt x = 0
+
+We define an outlier as a measurement that would greatly improve the cost
+function E if this measurement was removed, and the problem was re-optimized.
+
+Let's say the problem is locally linear (J = dx/dp is constant), and let's say
+re-optimizing moves the parameter by dp. The original cost function was
+
+  E0 = norm2(x)
+
+If we move by dp, and take away the outlier's own cost, we get
+
+  E1 = norm2(x + J dp) - norm2( x* + J* dp )
+
+where x* and J* refer to the outlier measurements. Thus "Dima's self+others
+factor" is norm2(x) - (norm2(x + J dp) - norm2( x* + J* dp ))
+
+We can choose to look at ONLY the effect on the other variables. That would
+produce "Dima's others factor" = norm2(x)-norm2(x*) - (norm2(x + J dp) -
+norm2(x* + J* dp ))
+
+This is very similar to Cook's D factor (which Dima apparently reinvented 40
+years after the fact!). This factor looks not at the difference of norm2(x), but
+at norm2(difference) instead. So "Cook's self+others factor" is proportional to
+norm2(x - (x+J dp)). This is the "normal" Cook's D factor. We can also compute
+"Cook's others factor": norm2(x - vec(x*) - (x + J dp -(vec(x*) + full(J*) dp)))
+= norm2( - J dp + full(J*) dp) = norm2(J dp) + norm2(full(J*) dp) -2 dpt(Jt
+full(J*) + full(J*)tJ)dp = norm2(J dp) + norm2(full(J*) dp) -2 dpt(Jt full(J*) +
+full(J*)tJ)dp = norm2(J dp) + norm2(J* dp) - 2 norm2(J* dp) = norm2(J dp) -
+norm2(J* dp)
+
+This is 4 flavors of a very similar computation. In summary (and ignoring scale
+factors):
+
+  Dima's self+others: -norm2(J dp) + 2 x*t J* dp + norm2(J* dp) + norm2(x*)
+  Dima's others     : -norm2(J dp) + 2 x*t J* dp + norm2(J* dp)
+  Cook's self+others: norm2(J dp)
+  Cook's others     : norm2(J dp) - norm2(J* dp)
+
+Let's compute these. dE1/dp = 0 at p+dp ->
+
+  0 = Jt x + JtJ dp - J*t x* - J*tJ* dp
+    =        JtJ dp - J*t x* - J*tJ* dp
+
+-> dp = inv(JtJ - J*tJ*) J*t x*
+
+Woodbury identity:
+
+  inv(JtJ - J*t J*) =
+  = inv(JtJ) - inv(JtJ) J*t inv(-I + J* inv(JtJ) J*t) J* inv(JtJ)
+
+Let
+  A = J* inv(JtJ) J*t
+  B = inv(A - I)
+
+So
+  AB = BA = I+B
+
+Thus
+  inv(JtJ - J*t J*) =
+  = inv(JtJ) - inv(JtJ) J*t B J* inv(JtJ)
+
+and
+
+  dp = inv(JtJ - J*tJ*) J*t x* =
+     = inv(JtJ)J*t x* - inv(JtJ) J*t B J* inv(JtJ) J*t x*
+     = inv(JtJ)J*t x* - inv(JtJ) J*t B A x*
+     = inv(JtJ)J*t(I - B A) x*
+     = -inv(JtJ)J*t B x*
+
+Then
+
+  norm2(J dp) = x*t ( B J* inv() Jt J inv() J*t B ) x*
+              = x*t ( B J* inv() J*t B ) x*
+              = x*t ( B A B ) x*
+              = x*t ( B + B*B ) x*
+
+  2 x*t J* dp = -2 x*t J* inv(JtJ)J*t B x* =
+              = -2 x*t A B x* =
+              = x*t (-2AB) x* =
+              = x*t (-2I - 2B) x*
+
+  norm2(J* dp) = x*t ( B J* inv() J*tJ* inv() J*t B ) x* =
+               = x*t ( B A A B ) x* =
+               = x*t ( I + 2B + B*B ) x*
+
+  norm2(x*)    = x*t ( I ) x*
+
+There're two ways to compute the "Dima's self-only" factor. I can simply say
+that the measurements' cost of norm2(x*) has been removed, so the factor is x*t
+I x* or I can
+
+1. Remove measurements
+2. Re-optimize
+3. Look to see how the residual of x* changes
+
+This is different because I look at what happens to x*(p) when x* is no longer
+in the optimized set.
+
+State moves by dp. x* moves by J* dp. I look at
+
+  dE = norm2(x* + J* dp) - norm2(x*)
+     = 2 x*t J* dp + norm2(J* dp) =
+     = x*t (-2I - 2B + I + 2B + B*B ) x* =
+     = x*t (B*B - I) x*
+
+I expect that removing measurements from the optimization should make their
+residuals worse. I.e. I expect dE > 0. Let's check. Is B*B - I positive
+definite? Let's say that there's v,l such that
+
+  (B*B-I)v = l v, norm2(v) = 1
+  -->
+  BBv      = (l+1)v
+  vBBv     = l+1
+
+  Let u = Bv ->
+  norm2(u) = l+1
+
+  A = J* inv(JtJ) J*t
+  B = inv(A - I) ->
+
+  v = (A-I)u
+  norm2(v) = 1 = norm2(Au) - 2ut A u + norm2(u) ->
+  -> l = 2ut A u - norm2(Au)
+
+  Let w = J*t u
+  -> Au = J* inv(JtJ) w
+  -> ut A u = wt inv(JtJ) w ->
+  l = wt ( 2inv(JtJ) - inv(JtJ)J*tJ* inv(JtJ) ) w
+
+  J*t J* = sum(outer(j*,j*)) = sum(outer(j,j)) - sum(outer(jnot*,jnot*)) =
+         = JtJ - Jnot*t Jnot*
+
+  -> l = wt ( 2inv(JtJ) - inv(JtJ)JtJinv(JtJ) + inv(JtJ)Jnot*tJnot* inv(JtJ) ) w
+       = wt (  inv(JtJ) + inv(JtJ)Jnot*tJnot* inv(JtJ) ) w
+
+  -> substitute -> l = wt ( C + CDC ) w
+
+  where C >= 0 and D >= 0
+
+  wt C wt >= 0
+
+  wt CDC wt = (Ctw)t D Ctw >= 0 -> l >= 0
+
+So B*B-I >= 0 and dE >= 0: removing a point will never make its own fit better
+
+- if dE >> 0: the other data does NOT support this measurement being correct
+- if dE ~~ 0: the other data supports this measurement being correct
+
+
+Putting all this together I get the expressions for the factors above:
+
+  Dima's self+others:        x*t (-B      ) x*
+  Dima's others     :        x*t (-B - I  ) x*
+  Dima's self (simple):      x*t ( I      ) x*
+  Dima's self (interesting): x*t (B*B - I ) x*
+  Cook's self+others:        x*t ( B + B*B) x*
+  Cook's others     :        x*t (-B - I  ) x*
+
+
+
+One can also do a similar analysis to gauge our confidence in a solution. We can
+do this by
+
+1. Solving the optimization problem
+
+2. Querying the solution in a way we care about to produce a new feature
+   group of measurements x = f - ref. We can compute J = dx/dp = df/dp. And
+   we presumably know something about ref: like its probability
+   distribution for instance. Example: we just calibrated a camera; we want
+   to know how confident we are about a projection in a particular spot on
+   the imager. I compute a projection in that spot: q = project(v). If
+   added to the optimization I'd get x = q - ref where 'ref' would be the
+   observed pixel coordinate.
+
+3. If this new feature was added to the optimization, I can compute its
+   outlierness factor in the same way as before. If we are confident in the
+   solution in a particular spot, then we have consensus, and it wouldn't take
+   much for these queries to look like outliers: the expected value of the
+   outlierness would be high. Conversely, if we aren't well-defined then a wider
+   set of points would fit the solution, and things wouldn't look very outliery
+
+Very similar analysis to the above:
+
+Let p,x,J represent the solution. The new feature we're adding is x* with
+jacobian J*. The solution would move by dp to get to the new optimum.
+
+Original solution is an optimum: Jt x = 0
+
+If we add x* and move by dp, we get
+
+  E1 = norm2(x + J dp) + norm2( x* + J* dp )
+
+Thus "Dima's self+others factor" is (norm2(x + J dp) + norm2( x* + J* dp )) -
+norm2(x) = norm2(Jdp) + norm2( x* + J* dp )
+
+We can choose to look at ONLY the effect on the other variables. That would
+produce "Dima's others factor" norm2(x + J dp) - norm2(x) = norm2(Jdp)
+
+This is very similar to Cook's D factor (which Dima apparently reinvented 40
+years after the fact!). This factor looks not at the difference of norm2(x), but
+at norm2(difference) instead. So "Cook's others factor" is proportional to
+norm2(x - (x+J dp)). This is the "normal" Cook's D factor. We can also compute
+"Cook's self+others factor": norm2(concat(x + J dp, x* + J* dp) - concat(x,x*))
+= norm2(x + J dp-x) + norm2(x* + J* dp - x*) = norm2(J dp) + norm2(J* dp)
+
+This is 4 flavors of a very similar computation. In summary (and ignoring scale
+factors):
+
+  Dima's self+others: norm2(J dp) + 2 x*t J* dp + norm2(J* dp) + norm2(x*)
+  Dima's others     : norm2(J dp)
+  Cook's self+others: norm2(J dp) + norm2(J* dp)
+  Cook's others     : norm2(J dp)
+
+The problem including the new point is also at an optimum:
+
+  E1 = norm2(x + J dp) + norm2( x* + J* dp )
+  dE1/dp = 0 -> 0 = Jt x + JtJ dp + J*t x* + J*tJ*dp =
+                  =        JtJ dp + J*t x* + J*tJ*dp
+-> dp = -inv(JtJ + J*tJ*) J*t x*
+
+Woodbury identity:
+
+  -inv(JtJ + J*t J*) =
+  = -inv(JtJ) + inv(JtJ) J*t inv(I + J* inv(JtJ) J*t) J* inv(JtJ)
+
+Let
+  A = J* inv(JtJ) J*t   (same as before)
+  B = inv(A + I)        (NOT the same as before)
+
+So
+  AB = BA = I-B
+
+Thus
+  -inv(JtJ + J*t J*) =
+  = -inv(JtJ) + inv(JtJ) J*t B J* inv(JtJ)
+
+and
+
+  dp = -inv(JtJ + J*tJ*) J*t x* =
+     = -inv(JtJ)J*t x* + inv(JtJ) J*t B J* inv(JtJ) J*t x* =
+     = -inv(JtJ)J*t x* + inv(JtJ) J*t B A x*
+     = -inv(JtJ)J*t(I - B A) x*
+     = -inv(JtJ)J*t B x*   (same as before, but with a different B!)
+
+Then
+
+  norm2(J dp) = x*t ( B J* inv() Jt J inv() J*t B ) x*
+              = x*t ( B J* inv() J*t B ) x*
+              = x*t ( B A B ) x*
+              = x*t ( B - B*B ) x*
+
+  2 x*t J* dp = -2 x*t J* inv(JtJ)J*t B x* =
+              = -2 x*t A B x* =
+              = x*t (-2AB) x* =
+              = x*t (-2I + 2B) x*
+
+  norm2(J* dp) = x*t ( B J* inv() J*tJ* inv() J*t B ) x* =
+               = x*t ( B A A B ) x* =
+               = x*t ( I - 2B + B*B ) x*
+
+  norm2(x*)    = x*t ( I ) x*
+
+How do I compute "Dima's self" factor? The "simple" flavor from above looks at
+the new measurement only: norm2(x*). The "interesting" flavor, look at what
+happens to the measurements' error when they're added to the optimization set.
+State moves by dp. x* moves by J* dp. I look at
+
+  dE = norm2(x* + J*dp) - norm2(x*) =
+       2 x*t J* dp + norm2(J* dp) =
+       x*t (-2I + 2B + I - 2B + B*B) x* =
+       x*t (B*B - I) x*
+
+I expect that adding a point to the optimization would make it fit better: dE <
+0. Let's check. Let's say that there's v,l such that
+
+  (B*B-I)v = l v, norm2(v) = 1
+  -->
+  BBv      = (l+1)v
+  vBBv     = l+1
+
+  Let u = Bv ->
+  norm2(u) = l+1
+
+  A = J* inv(JtJ) J*t
+  B = inv(A + I) ->
+
+  v = (A+I)u
+  norm2(v) = 1 = norm2(Au) + 2ut A u + norm2(u) ->
+  -> l = -2ut A u - norm2(Au)
+
+  Let w = J*t u
+  -> Au = J* inv(JtJ) w
+  -> ut A u = wt inv(JtJ) w ->
+  l = -2 wt inv(JtJ) w - norm2(Au)
+
+  Since inv(JtJ) > 0 -> l < 0. As expected
+
+So B*B-I is negative definite: adding measurements to the optimization set makes
+them fit better
+
+Putting all this together I get the expressions for the factors above:
+
+  Dima's self+others:        x*t (B      ) x*
+  Dima's others     :        x*t (B - B*B) x*
+  Dima's self (simple):      x*t (I      ) x*
+  Dima's self (interesting): x*t (B*B - I) x*
+  Cook's self+others:        x*t (I - B  ) x*
+  Cook's others     :        x*t (B - B*B) x*
+
+These expressions are all tested and verified in
+mrcal/analyses/outlierness-test.py
+
+
+There're several slightly-different definitions of Cook's D and of a
+rule-of-thumb threshold floating around on the internet. Wikipedia says:
+
+  D = norm2(x_io - x_i)^2 / (Nstate * norm2(x_io)/(Nmeasurements - Nstate))
+  D_threshold = 1
+
+An article https://www.nature.com/articles/nmeth.3812 says
+
+  D = norm2(x_io - x_i)^2 / ((Nstate+1) * norm2(x_io)/(Nmeasurements - Nstate -1))
+  D_threshold = 4/Nmeasurements
+
+I haven't tracked down the reference for this second definition, but it looks
+more reasonable, and I use it here.
+
+Here I use the second definition. That definition expands to
+
+  k = xo^2 / ((Nstate+1) * norm2(x)/(Nmeasurements - Nstate -1))
+  B = 1.0/(jt inv(JtJ) j - 1)
+  f = k * (B + B*B)
+
+I report normalized outlierness factors so that the threshold is 1. Thus I use
+
+  k = Nmeasurements / (4*((Nstate+1) * norm2(x)/(Nmeasurements - Nstate - 1)))
+
+*/
+
+
+static void accum_outlierness_factor(// output
+                                     double* factor,
+
+                                     // inputs
+                                     const double* x,
+
+                                     // A is symmetric. I store the upper triangle row-first
+                                     const double* A,
+
+                                     // if outliers are grouped into features, the
+                                     // feature size is set here
+                                     int featureSize,
+                                     double k)
+{
+  // This implements Dima's self+outliers factor.
+  //
+  // from the derivation in a big comment above I have
+  //
+  //   f = k (x*t (-B) x* )
+  //
+  // where B = inv(A - I) and
+  //       A = J* inv(JtJ) J*t
+
+  // I only implemented featureSize == 1 and 2 so far.
+  if(featureSize <= 1)
+  {
+    featureSize = 1;
+
+    double denom = 1.0 - *A;
+    if( fabs(denom) < 1e-8 )
+    {
+      *factor = DBL_MAX; // definitely an outlier
+      return;
+    }
+    else
+      *factor = x[0]*x[0] / denom;
+  }
+  else if(featureSize == 2)
+  {
+    double det = (1.0-A[0])*(1.0-A[2]) - A[1]*A[1];
+    if( fabs(det) < 1e-8 )
+    {
+      *factor = DBL_MAX; // definitely an outlier
+      return;
+    }
+    else
+    {
+      double B00_det =  A[2] - 1.0;
+      double B11_det =  A[0] - 1.0;
+      double B01_det = -A[1];
+
+      // inner(x, Bx)
+      double xBx =
+        (x[0]*x[0]*B00_det       +
+         x[0]*x[1]*B01_det * 2.0 +
+         x[1]*x[1]*B11_det) / det;
+
+      // norm2(Bx)
+      __attribute__((unused)) double v1 = x[0]*B00_det + x[1]*B01_det;
+      __attribute__((unused)) double v2 = x[0]*B01_det + x[1]*B11_det;
+      __attribute__((unused)) double xBBx = (v1*v1 + v2*v2) / (det*det);
+
+      // // mine self+others
+      // *factor = -xBx;
+
+      // // mine others / cook others
+      // *factor = -(x[0]*x[0] + x[1]*x[1]) - xBx;
+
+      // cook's self+others
+      *factor = xBx + xBBx;
+
+    }
+  }
+  else
+  {
+    SAY("featureSize > 2 not implemented yet. Got featureSize=%d", featureSize);
+    ASSERT(0);
+  }
+
+
+#warning This is a hack. The threshold should be 1.0, and the scaling should make sure that is the case. I am leaving it for now
+  k /= 8.;
+
+
+  *factor *= k;
+}
+
+static void getOutliernessScale(// out
+                                double* scale, // if *scale > 0, I just keep what I have
+
+                                // in
+                                int Nmeasurements, int Nstate,
+                                int NoutlierFeatures, int featureSize,
+                                double norm2_x)
+{
+  // if *scale > 0, I just keep what I have
+  if(*scale > 0.0)
+    return;
+
+  int    Nmeasurements_outliers    = NoutlierFeatures*featureSize;
+  int    Nmeasurements_nonoutliers = Nmeasurements - Nmeasurements_outliers;
+
+  *scale =
+    (double)(Nmeasurements_nonoutliers) /
+    (4.*((double)(Nstate+1) * norm2_x/(double)(Nmeasurements_nonoutliers - Nstate - 1)));
+}
+
+static bool getOutliernessFactors_dense( // output
+                                        double* factors, // Nfeatures factors
+
+                                        // output, input
+                                        double* scale, // if <0 then I recompute
+
+                                        // inputs
+                                        // if outliers are grouped into features,
+                                        // the feature size is set here
+                                        int featureSize,
+                                        int Nfeatures,
+                                        int NoutlierFeatures,
+                                        const dogleg_operatingPoint_t* point,
+                                        dogleg_solverContext_t* ctx )
+{
+  // cholmod_spsolve() and cholmod_solve()) work in chunks of 4, so I do this in
+  // chunks of 4 too. I pass it rows of J, 4 at a time. Note that if I have
+  // measurement features, I don't want these to cross chunk boundaries, so I set
+  // up chunk_size=lcm(N,4)
+  int chunk_size = 4;
+  if(featureSize <= 1)
+    featureSize = 1;
+  if(featureSize > 1)
+  {
+    // haven't implemented anything else yet. Don't have lcm() readily available
+    ASSERT(featureSize == 2);
+    // chunk_size = lcm(chunk_size,featureSize)
+  }
+
+  int  Nstate        = ctx->Nstate;
+  int  Nmeasurements = ctx->Nmeasurements;
+  bool result        = false;
+
+  double* invJtJ_Jt = malloc(Nstate*chunk_size*sizeof(double));
+  if(invJtJ_Jt == NULL)
+  {
+    SAY("Couldn't allocate invJtJ_Jt!");
+    goto done;
+  }
+
+  getOutliernessScale(scale,
+                      Nmeasurements, Nstate, NoutlierFeatures, featureSize, point->norm2_x);
+
+  int i_measurement_valid_chunk_start = -1;
+  int i_measurement_valid_chunk_last  = -1;
+  int i_measurement = 0;
+  for( int i_feature=0; i_feature<Nfeatures; i_feature++, i_measurement+=featureSize)
+  {
+    if( i_measurement > i_measurement_valid_chunk_last )
+    {
+      bool pinvresult = pseudoinverse_J_dense(invJtJ_Jt, point, ctx,
+                                              i_measurement, chunk_size);
+      if(!pinvresult)
+      {
+        SAY("Couldn't compute pinv!");
+        goto done;
+      }
+      i_measurement_valid_chunk_start = i_measurement;
+      i_measurement_valid_chunk_last  = i_measurement+chunk_size-1;
+    }
+
+    // from the derivation in a big comment in above I have
+    //
+    //   f = scale (xot (...) xo )
+    //
+    // where A = Jo inv(JtJ) Jot
+    //
+    // A is symmetric. I store the upper triangle
+    double A[featureSize*(featureSize+1)/2];
+    int iA=0;
+    for(int i=0; i<featureSize; i++)
+      for(int j=i; j<featureSize; j++, iA++)
+      {
+        A[iA] = 0.0;
+
+        for(int k=0; k<Nstate; k++)
+          A[iA] +=
+            invJtJ_Jt     [Nstate*(i_measurement+i -i_measurement_valid_chunk_start) + k] *
+            point->J_dense[Nstate* i_measurement+j                                   + k];
+      }
+    accum_outlierness_factor(&factors[i_feature],
+                             &point->x[i_measurement],
+                             A, featureSize, *scale);
+  }
+
+  result = true;
+ done:
+  free(invJtJ_Jt);
+  return result;
+}
+
+
+static bool getOutliernessFactors_sparse( // output
+                                         double* factors, // Nfeatures factors
+
+                                         // output, input
+                                         double* scale, // if <0 then I recompute
+
+                                         // inputs
+                                         // if outliers are grouped into features,
+                                         // the feature size is set here
+                                         int featureSize,
+                                         int Nfeatures,
+                                         int NoutlierFeatures,
+                                         const dogleg_operatingPoint_t* point,
+                                         dogleg_solverContext_t* ctx )
+{
+  // cholmod_spsolve() and cholmod_solve()) work in chunks of 4, so I do this in
+  // chunks of 4 too. I pass it rows of J, 4 at a time. Note that if I have
+  // measurement features, I don't want these to cross chunk boundaries, so I set
+  // up chunk_size=lcm(N,4)
+  int chunk_size = 4;
+  if(featureSize <= 1)
+    featureSize = 1;
+  if(featureSize > 1)
+  {
+    // haven't implemented anything else yet. Don't have lcm() readily available
+    ASSERT(featureSize == 2);
+    // chunk_size = lcm(chunk_size,featureSize)
+  }
+
+  int  Nstate        = ctx->Nstate;
+  int  Nmeasurements = ctx->Nmeasurements;
+  bool result        = false;
+
+  cholmod_dense* invJtJ_Jt = NULL;
+  cholmod_dense* Jt_chunk =
+    cholmod_allocate_dense( Nstate,
+                            chunk_size,
+                            Nstate,
+                            CHOLMOD_REAL,
+                            &ctx->common );
+  if(!Jt_chunk)
+  {
+    SAY("Couldn't allocate Jt_chunk!");
+    goto done;
+  }
+
+  getOutliernessScale(scale,
+                      Nmeasurements, Nstate, NoutlierFeatures, featureSize, point->norm2_x);
+
+  int i_measurement_valid_chunk_start = -1;
+  int i_measurement_valid_chunk_last  = -1;
+  int i_measurement = 0;
+  for( int i_feature=0; i_feature<Nfeatures; i_feature++, i_measurement+=featureSize)
+  {
+    if( i_measurement > i_measurement_valid_chunk_last )
+    {
+      if(invJtJ_Jt) cholmod_free_dense(&invJtJ_Jt, &ctx->common);
+      invJtJ_Jt = pseudoinverse_J_sparse(point, ctx,
+                                         i_measurement, chunk_size,
+                                         Jt_chunk);
+      if(invJtJ_Jt == NULL)
+      {
+        SAY("Couldn't compute pinv!");
+        goto done;
+      }
+
+      i_measurement_valid_chunk_start = i_measurement;
+      i_measurement_valid_chunk_last  = i_measurement+chunk_size-1;
+    }
+
+    // from the derivation in a big comment in above I have
+    //
+    //   f = scale (xot (...) xo )
+    //
+    // where A = Jo inv(JtJ) Jot
+    //
+    // A is symmetric. I store the upper triangle
+    double A[featureSize*(featureSize+1)/2];
+    int iA=0;
+    for(int i=0; i<featureSize; i++)
+      for(int j=i; j<featureSize; j++, iA++)
+      {
+        A[iA] = 0.0;
+
+        for(unsigned int l = P(point->Jt, i_measurement+j);
+            l     < P(point->Jt, i_measurement+j+1);
+            l++)
+        {
+          int k = I(point->Jt, l);
+          A[iA] +=
+            ((double*)invJtJ_Jt->x)[Nstate*(i_measurement+i-i_measurement_valid_chunk_start) + k] *
+            X(point->Jt, l);
+        }
+      }
+    accum_outlierness_factor(&factors[i_feature],
+                             &point->x[i_measurement],
+                             A, featureSize, *scale);
+  }
+
+  result = true;
+ done:
+  if(Jt_chunk)  cholmod_free_dense(&Jt_chunk,  &ctx->common);
+  if(invJtJ_Jt) cholmod_free_dense(&invJtJ_Jt, &ctx->common);
+  return result;
+}
+
+bool dogleg_getOutliernessFactors( // output
+                                  double* factors, // Nfeatures factors
+
+                                  // output, input
+                                  double* scale, // if <0 then I recompute
+
+                                  // inputs
+                                  // if outliers are grouped into features, the
+                                  // feature size is set here
+                                  int featureSize,
+                                  int Nfeatures,
+                                  int NoutlierFeatures, // how many outliers we already have
+                                  dogleg_operatingPoint_t* point,
+                                  dogleg_solverContext_t* ctx )
+{
+  if(featureSize <= 1)
+    featureSize = 1;
+
+  dogleg_computeJtJfactorization( point, ctx );
+  bool result;
+  if(ctx->is_sparse)
+    result = getOutliernessFactors_sparse(factors, scale, featureSize, Nfeatures, NoutlierFeatures, point, ctx);
+  else
+    result = getOutliernessFactors_dense(factors, scale, featureSize, Nfeatures, NoutlierFeatures, point, ctx);
+
+#if 0
+  if( result )
+  {
+    int  Nstate        = ctx->Nstate;
+    int  Nmeasurements = ctx->Nmeasurements;
+
+
+
+    static FILE* fp = NULL;
+    if(fp == NULL)
+      fp = fopen("/tmp/check-outlierness.py", "w");
+    static int count = -1;
+    count++;
+    if(count > 5)
+      goto done;
+
+    fprintf(fp, "# WARNING: all J here are unscaled with SCALE_....\n");
+
+    if(count == 0)
+    {
+      fprintf(fp,
+              "import numpy as np\n"
+              "import numpysane as nps\n"
+              "np.set_printoptions(linewidth=100000)\n"
+              "\n");
+    }
+
+    fprintf(fp, "x%d = np.array((", count);
+    for(int j=0;j<Nmeasurements;j++)
+      fprintf(fp, "%.20g,", point->x[j]);
+    fprintf(fp,"))\n");
+
+    if( ctx->is_sparse )
+    {
+      fprintf(fp, "J%d = np.zeros((%d,%d))\n", count, Nmeasurements, Nstate);
+      for(int imeas=0;imeas<Nmeasurements;imeas++)
+      {
+        for(int j = P(point->Jt, imeas);
+            j     < (int)P(point->Jt, imeas+1);
+            j++)
+        {
+          int irow = I(point->Jt, j);
+          fprintf(fp, "J%d[%d,%d] = %.20g\n", count,
+                  imeas, irow, X(point->Jt, j));
+        }
+      }
+    }
+    else
+    {
+      fprintf(fp, "J%d = np.array((\n", count);
+      for(int j=0;j<Nmeasurements;j++)
+      {
+        fprintf(fp, "(");
+        for(int i=0;i<Nstate;i++)
+          fprintf(fp, "%.20g,", point->J_dense[j*Nstate+i]);
+        fprintf(fp, "),\n");
+      }
+      fprintf(fp,"))\n");
+    }
+
+    fprintf(fp, "Nmeasurements = %d\n", Nmeasurements);
+    fprintf(fp, "Nstate = %d\n", Nstate);
+    fprintf(fp, "Nfeatures = %d\n", Nfeatures);
+    fprintf(fp, "featureSize = %d\n", featureSize);
+    fprintf(fp, "NoutlierFeatures = %d\n", NoutlierFeatures);
+
+    fprintf(fp, "scale_got = %.20f\n", *scale);
+
+    fprintf(fp, "factors_got = np.array((");
+    for(int j=0;j<Nfeatures;j++)
+      fprintf(fp, "%.20g,", factors[j]);
+    fprintf(fp,"))\n");
+
+    fprintf(fp,
+            "Nmeasurements_nonoutliers = Nmeasurements - NoutlierFeatures*featureSize\n"
+            "scale = Nmeasurements_nonoutliers / (4.*((Nstate+1.) * nps.inner(x%1$d,x%1$d)/(Nmeasurements_nonoutliers - Nstate - 1.)))\n"
+            "pinvj = np.linalg.pinv(J%1$d)\n"
+            "imeas0 = [featureSize*i for i in xrange(Nfeatures)]\n"
+            "jslices = [J%1$d[imeas0[i]:(imeas0[i]+featureSize), :] for i in xrange(Nfeatures)]\n"
+            "xslices = [x%1$d[imeas0[i]:(imeas0[i]+featureSize)   ] for i in xrange(Nfeatures)]\n"
+            "A       = [nps.matmult(jslices[i],pinvj[:,imeas0[i]:(imeas0[i]+featureSize)]) for i in xrange(Nfeatures)]\n"
+            "B       = [np.linalg.inv(A[i] - np.eye(featureSize)) for i in xrange(Nfeatures)]\n"
+
+            "factors_ref = np.array([nps.matmult(xslices[i],"
+
+
+
+            "                                    B[i] + nps.matmult(B[i],B[i])," // Cook's self+others
+            //"                                    -B[i]," // Dima's self+others
+
+            "                                    nps.transpose(xslices[i])).ravel()"
+            "                        for i in xrange(Nfeatures)]).ravel() * scale\n"
+
+            "factor_err = factors_ref-factors_got\n"
+            "scale_err = scale-scale_got\n",
+            count);
+
+    fprintf(fp, "print 'RMS discrepancy in the factor (should be 0): {}'.format(np.sqrt(np.mean(factor_err*factor_err)))\n");
+    fprintf(fp, "print 'Scale error: {}'.format(scale_err)\n");
+
+    if(featureSize <= 1)
+    {
+      fprintf(fp,
+              "factors_ref1 = x%1$d * x%1$d / (1.0 - nps.inner(J%1$d, nps.transpose(pinvj))) * scale\n",
+              count
+
+              );
+      fprintf(fp, "print 'normdiff1 (should be 0): {}'.format(np.linalg.norm(factors_ref1-factors_got))\n");
+      fprintf(fp, "print 'normrefref1: {}'.format(np.linalg.norm(factors_ref1-factors_ref))\n");
+    }
+    fflush(fp);
+  }
+ done:
+#endif
+
+  return result;
+}
+
+double dogleg_getOutliernessTrace_newFeature_sparse(const double*            JqueryFeature,
+                                                    int                      istateActive,
+                                                    int                      NstateActive,
+                                                    int                      featureSize,
+                                                    int                      NoutlierFeatures,
+                                                    dogleg_operatingPoint_t* point,
+                                                    dogleg_solverContext_t*  ctx)
+{
+  /*
+    See the big comment above for a description
+
+    The outlierness I'm using here has the form x*t C x where x =
+    project(q)-xref. xref is a random variable of a potential query point with
+    mean at project(q) (so that x has mean 0). I conceivably have a distribution
+    for xref and I know its variance. For a quadratic form I can compute the
+    expected value E(x*t C x) = trace(C Var(x*)). I'm going to assume that x*
+    are all independent and identical, so Var(x*) = observation_variance*I, and
+    E(x*t C x) = trace(C) observation_variance. I thus let the caller deal with
+    the observation variance, and I just return trace(C). The test at
+    mrcal/analyses/outlierness-test.py suggests that "Dima's self+others" is the
+    metric we want to use, so C = B = inv(J* inv(JtJ) J*t + I)
+
+    k*trace(inv(I + J* inv(JtJ) J*t))
+
+    A = J* inv(JtJ) J*t ->
+    B = inv(I + A)      ->
+    tr(B) = (2+a00+a11) / ((1+a00)*(1+a11) - a01^2)
+  */
+
+
+  // This is Jt because cholmod thinks in terms of col-first instead of
+  // row-first
+  int Jt_p[featureSize+1];
+  int Jt_i[NstateActive*featureSize];
+  for(int i=0; i<=featureSize; i++)
+  {
+    Jt_p[i] = i*NstateActive;
+    if(i==featureSize) break;
+    for(int j=0; j<NstateActive; j++)
+      Jt_i[j + i*NstateActive] = istateActive + j;
+  }
+  cholmod_sparse Jt_query_sparse = {.nrow   = ctx->Nstate,
+                                    .ncol   = featureSize,
+                                    .nzmax  = NstateActive*featureSize,
+                                    .p      = (void*)Jt_p,
+                                    .i      = (void*)Jt_i,
+                                    .x      = (double*)JqueryFeature,
+                                    .sorted = 1,
+                                    .packed = 1,
+                                    .stype  = 0, // NOT symmetric
+                                    .itype  = CHOLMOD_INT,
+                                    .xtype  = CHOLMOD_REAL,
+                                    .dtype  = CHOLMOD_DOUBLE};
+
+  // Really shouldn't need to do this every time. In fact I probably don't need
+  // to do it at all, since this will have been done by the solver during the
+  // last step
+  dogleg_computeJtJfactorization( point, ctx );
+  cholmod_sparse* invJtJ_Jp =
+    cholmod_spsolve(CHOLMOD_A,
+                    ctx->factorization,
+                    &Jt_query_sparse,
+                    &ctx->common);
+
+  // Now I need trace(matmult(Jquery, invJtJ_Jp))
+
+  // haven't implemented anything else yet
+  ASSERT(featureSize == 2);
+  double A[4] = {}; // gah. only elements 0,1,3 will be stored.
+
+  for(int i=0; i<featureSize; i++)
+  {
+    for(unsigned int j=P(invJtJ_Jp, i); j<P(invJtJ_Jp, i+1); j++)
+    {
+      int row = I(invJtJ_Jp, j);
+      if(row >= istateActive)
+      {
+        if(row >= istateActive+NstateActive)
+          break;
+
+        int ic0 = i*featureSize;
+        for(int k=i; k<featureSize; k++) // computing one triangle
+          A[ic0+k] += X(invJtJ_Jp, j)*JqueryFeature[row-istateActive + k*NstateActive];
+      }
+    }
+  }
+
+  cholmod_free_sparse(&invJtJ_Jp, &ctx->common);
+
+  double invB00 = A[0]+1.0;
+  double invB01 = A[1];
+  double invB11 = A[3]+1.0;
+
+  double det_invB_recip = 1.0/(invB00*invB11 - invB01*invB01);
+  double B00 =  invB11 * det_invB_recip;
+  double B11 =  invB00 * det_invB_recip;
+
+  __attribute__((unused))
+  double B01 = -invB01 * det_invB_recip;
+  double traceB = B00 + B11;
+  __attribute__((unused))
+  double traceBB = B00*B00 + 2.0*B01*B01 + B11*B11;
+
+
+#if 0
+  static int count = -1;
+  count++;
+  if(count <= 5)
+  {
+    int  Nstate        = ctx->Nstate;
+    int  Nmeasurements = ctx->Nmeasurements;
+
+
+
+    static FILE* fp = NULL;
+    if(fp == NULL)
+      fp = fopen("/tmp/check-query-outlierness.py", "w");
+
+    fprintf(fp, "# WARNING: all J here are unscaled with SCALE_....\n");
+
+    if(count == 0)
+    {
+      fprintf(fp,
+              "import numpy as np\n"
+              "import numpysane as nps\n"
+              "np.set_printoptions(linewidth=100000)\n"
+              "\n");
+    }
+
+    fprintf(fp, "x%d = np.array((", count);
+    for(int j=0;j<Nmeasurements;j++)
+      fprintf(fp, "%.20g,", point->x[j]);
+    fprintf(fp,"))\n");
+
+    if( ctx->is_sparse )
+    {
+      fprintf(fp, "J%d = np.zeros((%d,%d))\n", count, Nmeasurements, Nstate);
+      for(int imeas=0;imeas<Nmeasurements;imeas++)
+      {
+        for(int j = P(point->Jt, imeas);
+            j     < (int)P(point->Jt, imeas+1);
+            j++)
+        {
+          int irow = I(point->Jt, j);
+          fprintf(fp, "J%d[%d,%d] = %g\n", count,
+                  imeas, irow, X(point->Jt, j));
+        }
+      }
+    }
+    else
+    {
+      fprintf(fp, "J%d = np.array((\n", count);
+      for(int j=0;j<Nmeasurements;j++)
+      {
+        fprintf(fp, "(");
+        for(int i=0;i<Nstate;i++)
+          fprintf(fp, "%.20g,", point->J_dense[j*Nstate+i]);
+        fprintf(fp, "),\n");
+      }
+      fprintf(fp,"))\n");
+    }
+
+    fprintf(fp, "Nmeasurements = %d\n", Nmeasurements);
+    fprintf(fp, "Nstate = %d\n", Nstate);
+    fprintf(fp, "featureSize = %d\n", featureSize);
+    fprintf(fp, "traceB_got = %.20g\n", traceB);
+
+    fprintf(fp, "Jq = np.zeros((featureSize, Nstate), dtype=float)\n");
+    for(int i=0; i<NstateActive; i++)
+      for(int j=0; j<featureSize; j++)
+        fprintf(fp, "Jq[%d,%d] = %.20g\n", j, i + istateActive, JqueryFeature[j*NstateActive + i]);
+
+    fprintf(fp,
+            "A = nps.matmult(Jq, np.linalg.solve( nps.matmult(nps.transpose(J%1$d),J%1$d), nps.transpose(Jq)))\n"
+            "B = np.linalg.inv( A + np.eye(featureSize) )\n",
+            count);
+
+    fprintf(fp, "print 'trace got,ref,rr: {}, {}, {}'.format(traceB_got,np.trace(B),traceB_got-np.trace(B))\n");
+    fflush(fp);
+  }
+#endif
+
+
+  int Nmeasurements = ctx->Nmeasurements;
+  int Nstate        = ctx->Nstate;
+
+  double scale = -1.0;
+  getOutliernessScale(&scale,
+                      Nmeasurements, Nstate, NoutlierFeatures, featureSize, point->norm2_x);
+
+  // // Dima's self+others
+  // return scale * traceB;
+
+  // Cook's self+others
+  return scale * (2.0 - traceB);
+
+  // // Dima's others/Cook's others
+  // // This one is non-monotonic in outlierness-test
+  // return scale * (traceB - traceBB);
+
+}
+
+
+#define OUTLIER_CONFIDENCE_DROP_THRESHOLD 0.05
+bool dogleg_markOutliers(// output, input
+                         struct dogleg_outliers_t* markedOutliers,
+                         double* scale, // if <0 then I recompute
+
+                         // output, input
+                         int* Noutliers,
+
+                         // input
+                         double (getConfidence)(int i_feature_exclude),
+
+                         // if outliers are grouped into features, the feature
+                         // size is set here
+                         int featureSize,
+                         int Nfeatures,
+
+                         dogleg_operatingPoint_t* point,
+                         dogleg_solverContext_t* ctx)
+{
+    if(featureSize <= 1)
+      featureSize = 1;
+
+    bool markedAny = false;
+
+    double* factors = malloc(Nfeatures * sizeof(double));
+    if(factors == NULL)
+    {
+        SAY("Error allocating factors");
+        goto done;
+    }
+
+    if(!dogleg_getOutliernessFactors(factors, scale,
+                                     featureSize, Nfeatures,
+                                     *Noutliers,
+                                     point, ctx))
+        goto done;
+
+    // I have my list of POTENTIAL outliers (any that have factor > 1.0). I
+    // check to see how much confidence I would lose if I were to throw out any
+    // of these measurements, and accept the outlier ONLY if the confidence loss
+    // is acceptable
+    double confidence0 = getConfidence(-1);
+    if( confidence0 < 0.0 )
+        goto done;
+
+    SAY_IF_VERBOSE("Initial confidence: %g", confidence0);
+
+    *Noutliers = 0;
+    for(int i=0; i<Nfeatures; i++)
+    {
+        if(markedOutliers[i].marked)
+        {
+          (*Noutliers)++;
+          continue;
+        }
+
+        if(factors[i] < 1.0)
+            continue;
+
+        // Looking at potential new outlier
+        double confidence_excluded = getConfidence(i);
+        if( confidence_excluded < 0.0 )
+            return false;
+
+        double confidence_drop_relative = 1.0 - confidence_excluded / confidence0;
+        if( confidence_drop_relative < OUTLIER_CONFIDENCE_DROP_THRESHOLD )
+        {
+            // I would lose less than X of my confidence. OK. This is an
+            // outlier. Throw it away.
+            markedOutliers[i].marked = true;
+            markedAny                = true;
+            SAY_IF_VERBOSE("Feature %d has outlierness factor %f. Culling produces a confidence: %g. relative loss: %g... YES an outlier; confidence drops little",
+                           i, factors[i], confidence_excluded, confidence_drop_relative);
+            (*Noutliers)++;
+        }
+        else
+        {
+            SAY_IF_VERBOSE("Feature %d has outlierness factor %f. Culling produces a confidence: %g. relative loss: %g... NOT an outlier: confidence drops too much",
+                           i, factors[i], confidence_excluded, confidence_drop_relative);
+        }
+    }
+
+ done:
+    free(factors);
+    return markedAny;
+}
+
+// This function is just for debug reporting. It is probably too slow to
+// call in general: it computes the confidence for each feature to see the
+// confidence change if the feature were to be removed. Normally we do this
+// ONLY for potential outliers
+void dogleg_reportOutliers( double (getConfidence)(int i_feature_exclude),
+                            double* scale, // if <0 then I recompute
+
+                            // if outliers are grouped into features, the
+                            // feature size is set here
+                            int featureSize,
+                            int Nfeatures,
+                            int Noutliers, // how many outliers we already have
+
+                            dogleg_operatingPoint_t* point,
+                            dogleg_solverContext_t* ctx)
+{
+    if(featureSize <= 1)
+      featureSize = 1;
+
+    double* factors = malloc(Nfeatures * sizeof(double));
+    if(factors == NULL)
+    {
+        SAY("Error allocating factors");
+        goto done;
+    }
+
+    dogleg_getOutliernessFactors(factors, scale,
+                                 featureSize, Nfeatures, Noutliers, point, ctx);
+
+    SAY("## Outlier statistics");
+    SAY("# i_feature outlier_factor confidence_drop_relative_if_removed");
+
+    double confidence_full = getConfidence(-1);
+
+    for(int i=0; i<Nfeatures; i++)
+    {
+      double confidence = getConfidence(i);
+      double rot_confidence_drop_relative = 1.0 - confidence / confidence_full;
+
+      SAY("%5d %9.3g %9.3g",
+          i,
+          factors[i],
+          rot_confidence_drop_relative);
+    }
+
+ done:
+    free(factors);
 }
diff -Nru libdogleg-0.14/dogleg.h libdogleg-0.15.4/dogleg.h
--- libdogleg-0.14/dogleg.h	2018-02-12 18:55:59.000000000 +0000
+++ libdogleg-0.15.4/dogleg.h	2020-12-11 23:37:34.000000000 +0000
@@ -6,6 +6,7 @@
 #pragma once
 
 #include <suitesparse/cholmod.h>
+#include <stdbool.h>
 
 typedef void (dogleg_callback_t)(const double*   p,
                                  double*         x,
@@ -19,6 +20,10 @@
 // an operating point of the solver
 typedef struct
 {
+  // The pointers in this structure are all indices into a single "pool" array
+  // (see allocOperatingPoint()). I thus don't need to store the pointers at
+  // all, and can just index that one array directly, but that would break my
+  // ABI
   double*         p;
   double*         x;
   double          norm2_x;
@@ -43,8 +48,40 @@
   int updateCauchy_valid, updateGN_valid;
 
   int didStepToEdgeOfTrustRegion;
+
+  double* step_to_here;
+  double  step_to_here_len_sq;
+
 } dogleg_operatingPoint_t;
 
+// The newer APIs ( dogleg_...2() ) take a dogleg_parameters2_t argument for
+// their settings, while the older ones use a global set of parameters specified
+// with dogleg_set_...(). This global-parameters approach doesn't work if we
+// have multiple users of libdogleg in the same application
+typedef struct
+{
+  int max_iterations;
+  int dogleg_debug;
+
+  // it is cheap to reject a too-large trust region, so I start with something
+  // "large". The solver will quickly move down to something reasonable. Only the
+  // user really knows if this is "large" or not, so they should change this via
+  // the API
+  double trustregion0;
+
+  // These are probably OK to leave alone. Tweaking them can maybe result in
+  // slightly faster convergence
+  double trustregion_decrease_factor;
+  double trustregion_decrease_threshold;
+  double trustregion_increase_factor;
+  double trustregion_increase_threshold;
+
+  // The termination thresholds. Documented in the header
+  double Jt_x_threshold;
+  double update_threshold;
+  double trustregion_threshold;
+} dogleg_parameters2_t;
+
 // solver context. This has all the internal state of the solver
 typedef struct
 {
@@ -84,9 +121,59 @@
   // Are we using sparse math (cholmod)?
   int                      is_sparse;
   int                      Nstate, Nmeasurements;
+
+  const dogleg_parameters2_t* parameters;
+
 } dogleg_solverContext_t;
 
 
+// Fills in the given structure with the default parameter set
+void dogleg_getDefaultParameters(dogleg_parameters2_t* parameters);
+
+void dogleg_setMaxIterations(int n);
+void dogleg_setTrustregionUpdateParameters(double downFactor, double downThreshold,
+                                           double upFactor,   double upThreshold);
+
+// The argument is a bit-mapped integer. Should be a bit-field structure or
+// enum, but for API backwards-compatibility, I keep this an integer.
+//
+// if(debug == 0                 ): no diagnostic output
+// if(debug &  DOGLEG_DEBUG_VNLOG): output vnlog diagnostics to stdout
+// if(debug & ~DOGLEG_DEBUG_VNLOG): output human-oriented diagnostics to stderr
+#define DOGLEG_DEBUG_VNLOG   (1 << 30)
+void dogleg_setDebug(int debug);
+
+
+// The following parameters reflect the scaling of the problem being solved, so
+// the user is strongly encouraged to tweak these. The defaults are set
+// semi-arbitrarily
+
+// The size of the trust region at start. It is cheap to reject a too-large
+// trust region, so this should be something "large". Say 10x the length of an
+// "expected" step size
+void dogleg_setInitialTrustregion(double t);
+
+// termination thresholds. These really depend on the scaling of the input
+// problem, so the user should set these appropriately
+//
+// Jt_x threshold:
+// The function being minimized is E = norm2(x) where x is a function of the state p.
+// dE/dp = 2*Jt*x where Jt is transpose(dx/dp).
+//   if( for every i  fabs(Jt_x[i]) < JT_X_THRESHOLD )
+//   { we are done; }
+//
+// update threshold:
+//   if(for every i  fabs(state_update[i]) < UPDATE_THRESHOLD)
+//   { we are done; }
+//
+// trust region threshold:
+//   if(trustregion < TRUSTREGION_THRESHOLD)
+//   { we are done; }
+//
+// to leave a particular threshold alone, use a value <= 0 here
+void dogleg_setThresholds(double Jt_x, double update, double trustregion);
+
+
 // The main optimization function. Initial estimate of the solution passed in p,
 // final optimized solution returned in p. p has Nstate variables. There are
 // Nmeas measurements, the jacobian matrix has NJnnz non-zero entries.
@@ -101,6 +188,11 @@
                        unsigned int Nmeas, unsigned int NJnnz,
                        dogleg_callback_t* f, void* cookie,
                        dogleg_solverContext_t** returnContext);
+double dogleg_optimize2(double* p, unsigned int Nstate,
+                        unsigned int Nmeas, unsigned int NJnnz,
+                        dogleg_callback_t* f, void* cookie,
+                        const dogleg_parameters2_t* parameters,
+                        dogleg_solverContext_t** returnContext);
 
 // Main optimization function if we're using dense linear algebra. The arguments
 // are very similar to the sparse version: dogleg_optimize()
@@ -108,6 +200,11 @@
                              unsigned int Nmeas,
                              dogleg_callback_dense_t* f, void* cookie,
                              dogleg_solverContext_t** returnContext);
+double dogleg_optimize_dense2(double* p, unsigned int Nstate,
+                              unsigned int Nmeas,
+                              dogleg_callback_dense_t* f, void* cookie,
+                              const dogleg_parameters2_t* parameters,
+                              dogleg_solverContext_t** returnContext);
 
 // Compute the cholesky decomposition of JtJ. This function is only exposed if
 // you need to touch libdogleg internals via returnContext. Sometimes after
@@ -134,42 +231,65 @@
 void dogleg_freeContext(dogleg_solverContext_t** ctx);
 
 
-////////////////////////////////////////////////////////////////
-// solver parameters
-////////////////////////////////////////////////////////////////
-void dogleg_setMaxIterations(int n);
-void dogleg_setTrustregionUpdateParameters(double downFactor, double downThreshold,
-                                           double upFactor,   double upThreshold);
+// Computes outlierness factors. This function is experimental, and subject to
+// change.
+bool dogleg_getOutliernessFactors( // output
+                                  double* factors, // Nfeatures factors
 
-// lots of solver-related debug output when on
-void dogleg_setDebug(int debug);
+                                  // output, input
+                                  double* scale, // if <0 then I recompute
 
+                                  // inputs
+                                  // if outliers are grouped into features, the feature size is
+                                  // stated here
+                                  int featureSize,
+                                  int Nfeatures,
+                                  int NoutlierFeatures, // how many outliers we already have
+                                  dogleg_operatingPoint_t* point,
+                                  dogleg_solverContext_t* ctx );
 
-// The following parameters reflect the scaling of the problem being solved, so
-// the user is strongly encouraged to tweak these. The defaults are set
-// semi-arbitrarily
 
-// The size of the trust region at start. It is cheap to reject a too-large
-// trust region, so this should be something "large". Say 10x the length of an
-// "expected" step size
-void dogleg_setInitialTrustregion(double t);
 
-// termination thresholds. These really depend on the scaling of the input
-// problem, so the user should set these appropriately
-//
-// Jt_x threshold:
-// The function being minimized is E = norm2(x) where x is a function of the state p.
-// dE/dp = 2*Jt*x where Jt is transpose(dx/dp).
-//   if( for every i  fabs(Jt_x[i]) < JT_X_THRESHOLD )
-//   { we are done; }
-//
-// update threshold:
-//   if(for every i  fabs(state_update[i]) < UPDATE_THRESHOLD)
-//   { we are done; }
-//
-// trust region threshold:
-//   if(trustregion < TRUSTREGION_THRESHOLD)
-//   { we are done; }
-//
-// to leave a particular threshold alone, use a value <= 0 here
-void dogleg_setThresholds(double Jt_x, double update, double trustregion);
+
+// This stuff is experimental, and subject to change.
+struct dogleg_outliers_t
+{
+    unsigned char marked : 1;
+};
+bool dogleg_markOutliers(// output, input
+                         struct dogleg_outliers_t* markedOutliers,
+                         double* scale, // if <0 then I recompute
+
+                         // output, input
+                         int* Noutliers, // number of outliers before and after this call
+
+                         // input
+                         double (getConfidence)(int i_feature_exclude),
+
+                         // if outliers are grouped into features, the feature size is
+                         // stated here
+                         int featureSize,
+                         int Nfeatures,
+
+                         dogleg_operatingPoint_t* point,
+                         dogleg_solverContext_t* ctx);
+
+void dogleg_reportOutliers( double (getConfidence)(int i_feature_exclude),
+                            double* scale, // if <0 then I recompute
+
+                            // if outliers are grouped into features, the feature size
+                            // is stated here
+                            int featureSize,
+                            int Nfeatures,
+                            int Noutliers, // how many outliers we already have
+
+                            dogleg_operatingPoint_t* point,
+                            dogleg_solverContext_t* ctx);
+
+double dogleg_getOutliernessTrace_newFeature_sparse(const double*            JqueryFeature,
+                                                    int                      istateActive,
+                                                    int                      NstateActive,
+                                                    int                      featureSize,
+                                                    int                      NoutlierFeatures,
+                                                    dogleg_operatingPoint_t* point,
+                                                    dogleg_solverContext_t*  ctx);
diff -Nru libdogleg-0.14/Makefile libdogleg-0.15.4/Makefile
--- libdogleg-0.14/Makefile	2018-02-12 18:55:59.000000000 +0000
+++ libdogleg-0.15.4/Makefile	2020-12-11 23:37:34.000000000 +0000
@@ -29,7 +29,7 @@
 
 # Add my optimization flags if none already exist. These could exist if debian
 # packages are built.
-OPTFLAGS := $(if $(filter -O%, $(CFLAGS)),,-O3 -ffast-math -mtune=core2)
+OPTFLAGS := $(if $(filter -O%, $(CFLAGS)),,-O3 -ffast-math -mtune=core2) -fno-omit-frame-pointer
 
 FLAGS += -ggdb  -Wall -Wextra -MMD $(OPTFLAGS) -I/usr/include/suitesparse
 CFLAGS += $(FLAGS) --std=gnu99
@@ -42,7 +42,7 @@
 MAN_SECTION = 3
 MAN_TARGET = libdogleg.$(MAN_SECTION)
 
-ALL_TARGETS = $(LIB_TARGETS) $(MAN_TARGET)
+ALL_TARGETS = $(LIB_TARGETS) $(MAN_TARGET) sample
 
 all: $(ALL_TARGETS)
 
diff -Nru libdogleg-0.14/README.pod libdogleg-0.15.4/README.pod
--- libdogleg-0.14/README.pod	2018-02-12 18:55:59.000000000 +0000
+++ libdogleg-0.15.4/README.pod	2020-12-11 23:37:34.000000000 +0000
@@ -70,14 +70,15 @@
 
 =head2 Main API
 
-=head3 dogleg_optimize
+=head3 dogleg_optimize2
 
-This is the main call to the library for I<sparse> Jacobians. Its declared as
+This is the main call to the library for I<sparse> Jacobians. It's declared as
 
- double dogleg_optimize(double* p, unsigned int Nstate,
-                        unsigned int Nmeas, unsigned int NJnnz,
-                        dogleg_callback_t* f, void* cookie,
-                        dogleg_solverContext_t** returnContext);
+ double dogleg_optimize2(double* p, unsigned int Nstate,
+                         unsigned int Nmeas, unsigned int NJnnz,
+                         dogleg_callback_t* f, void* cookie,
+                         const dogleg_parameters2_t* parameters,
+                         dogleg_solverContext_t** returnContext);
 
 =over
 
@@ -97,6 +98,10 @@
 
 =item * C<cookie> is an arbitrary data pointer passed untouched to C<f>
 
+=item * C<parameters> a pointer to the set of parameters to use. Set to C<NULL>
+to use the global parameters, or call C<dogleg_optimize> instead. See the
+L<Parameters> section below for more details
+
 =item * If not C<NULL>, C<returnContext> can be used to retrieve the full
 context structure from the solver. This can be useful since this structure
 contains the latest operating point values. It also has an active
@@ -109,14 +114,25 @@
 
 C<dogleg_optimize> returns norm2( B<x> ) at the minimum, or a negative value if an error occurred.
 
-=head3 dogleg_optimize_dense
+=head3 dogleg_optimize
+
+This is a flavor of C<dogleg_optimize2> that implicitly uses the global
+parameters. It's declared as
+
+ double dogleg_optimize(double* p, unsigned int Nstate,
+                        unsigned int Nmeas, unsigned int NJnnz,
+                        dogleg_callback_t* f, void* cookie,
+                        dogleg_solverContext_t** returnContext);
+
+=head3 dogleg_optimize_dense2
 
 This is the main call to the library for I<dense> Jacobians. Its declared as
 
- double dogleg_optimize_dense(double* p, unsigned int Nstate,
-                              unsigned int Nmeas,
-                              dogleg_callback_dense_t* f, void* cookie,
-                              dogleg_solverContext_t** returnContext);
+ double dogleg_optimize_dense2(double* p, unsigned int Nstate,
+                               unsigned int Nmeas,
+                               dogleg_callback_dense_t* f, void* cookie,
+                               const dogleg_parameters2_t* parameters,
+                               dogleg_solverContext_t** returnContext);
 
 The arguments are almost identical to those in the C<dogleg_optimize> call.
 
@@ -134,6 +150,10 @@
 
 =item * C<cookie> is an arbitrary data pointer passed untouched to C<f>
 
+=item * C<parameters> a pointer to the set of parameters to use. Set to C<NULL>
+to use the global parameters, or call C<dogleg_optimize> instead. See the
+L<Parameters> section below for more details
+
 =item * If not C<NULL>, C<returnContext> can be used to retrieve the full
 context structure from the solver. This can be useful since this structure
 contains the latest operating point values. You usually want
@@ -144,6 +164,16 @@
 
 C<dogleg_optimize> returns norm2( B<x> ) at the minimum, or a negative value if an error occurred.
 
+=head3 dogleg_optimize_dense
+
+This is a flavor of C<dogleg_optimize_dense2> that implicitly uses the global
+parameters. It's declared as
+
+ double dogleg_optimize_dense(double* p, unsigned int Nstate,
+                              unsigned int Nmeas,
+                              dogleg_callback_dense_t* f, void* cookie,
+                              dogleg_solverContext_t** returnContext);
+
 =head3 dogleg_freeContext
 
 Used to deallocate memory used for an optimization cycle. Defined as:
@@ -384,10 +414,19 @@
 
 =head2 Parameters
 
-It is not required to call any of these, but it's highly recommended to set the initial trust-region
-size and the termination thresholds to match the problem being solved. Furthermore, it's highly
-recommended for the problem being solved to be scaled so that every state variable affects the
-objective norm2( B<x> ) equally. This makes this method's concept of "trust region" much more
+The optimization is controlled by several parameters. These can be set globally
+for I<all> callers of C<libdogleg> in a process using the C<dogleg_set....()>
+functions. Those global values are used by C<dogleg_optimize> and
+C<dogleg_optimize_dense>. Or these can be specified independently for each
+invocation by passing a C<parameters> argument to C<dogleg_optimize2> or
+C<dogleg_optimize_dense2>. The latter is recommended because multiple instances
+of libdogleg in a single application would no longer conflict.
+
+It is not required to set any of these, but it's highly recommended to set the
+initial trust-region size and the termination thresholds to match the problem
+being solved. Furthermore, it's highly recommended for the problem being solved
+to be scaled so that every state variable affects the objective norm2( B<x> )
+equally. This makes this method's concept of "trust region" much more
 well-defined and makes the termination criteria work correctly.
 
 =head3 dogleg_setMaxIterations
@@ -398,11 +437,25 @@
 
 =head3 dogleg_setDebug
 
-To turn on debug output, call
+To turn on diagnostic output, call
 
  void dogleg_setDebug(int debug);
 
-with a non-zero value for C<debug>. By default, debug output is disabled.
+with a non-zero value for C<debug>. Two separate diagnostic streams are
+available: a verbose human-oriented stream, and a
+L<vnlog|http://github.com/dkogan/vnlog>.
+
+By default, diagnostic output is disabled. The C<debug> argument is a bit-mapped
+integer:
+
+ if(debug == 0                 ): no diagnostic output
+ if(debug &  DOGLEG_DEBUG_VNLOG): output vnlog diagnostics to stdout
+ if(debug & ~DOGLEG_DEBUG_VNLOG): output human-oriented diagnostics to stderr
+
+C<DOGLEG_DEBUG_VNLOG> has a very high value, so if human diagnostics are
+desired, the recommended call is:
+
+ dogleg_setDebug(1);
 
 =head3 dogleg_setInitialTrustregion
 
diff -Nru libdogleg-0.14/sample.c libdogleg-0.15.4/sample.c
--- libdogleg-0.14/sample.c	2018-02-12 18:55:59.000000000 +0000
+++ libdogleg-0.15.4/sample.c	2020-12-11 23:37:34.000000000 +0000
@@ -127,8 +127,6 @@
   }
   Jrowptr[Nmeasurements] = iJacobian;
 
-
-  fprintf(stderr, "Callback finished. 2-norm is %f\n", norm2_x);
 #undef STORE_JACOBIAN
 }
 
@@ -168,34 +166,96 @@
     STORE_JACOBIAN( 5, 1.0  );
   }
 
-
-  fprintf(stderr, "Callback finished. 2-norm is %f\n", norm2_x);
 #undef STORE_JACOBIAN
 }
 
 
 int main(int argc, char* argv[] )
 {
+  const char* usage = "Usage: %s [--diag-vnlog] [--diag-human] sparse|dense [test-gradients]\n";
+
   int is_sparse;
+  int test_gradients = 0;
+  int debug          = 0;
 
-  if( argc >= 2 && 0 == strcmp(argv[1], "dense") )
   {
-    fprintf(stderr, "Using DENSE math\n");
-    is_sparse = 0;
-  }
-  else
-  {
-    fprintf(stderr, "Using SPARSE math. Run with '%s dense' to use dense math.\n", argv[0]);
-    is_sparse = 1;
+    // argument parsing
+    int iarg = 1;
+    for(int i=0; i<2; i++)
+    {
+      if(iarg >= argc)
+      {
+        fprintf(stderr, usage, argv[0]);
+        return 1;
+      }
+      if(0 == strcmp("--diag-vnlog", argv[iarg]))
+      {
+        debug |= DOGLEG_DEBUG_VNLOG;
+        iarg++;
+        continue;
+      }
+      if(0 == strcmp("--diag-human", argv[iarg]))
+      {
+        debug |= 1;
+        iarg++;
+        continue;
+      }
+      break;
+    }
+
+    if(iarg >= argc)
+    {
+      fprintf(stderr, usage, argv[0]);
+      return 1;
+    }
+    if( 0 == strcmp(argv[iarg], "dense") )
+    {
+      fprintf(stderr, "Using DENSE math\n");
+      is_sparse = 0;
+    }
+    else if( 0 == strcmp(argv[iarg], "sparse") )
+    {
+      fprintf(stderr, "Using SPARSE math\n");
+      is_sparse = 1;
+    }
+    else
+    {
+      fprintf(stderr, usage, argv[0]);
+      return 1;
+    }
+
+    iarg++;
+    if(iarg == argc-1)
+    {
+      if( 0 != strcmp("test-gradients", argv[iarg]))
+      {
+        fprintf(stderr, usage, argv[0]);
+        return 1;
+      }
+      fprintf(stderr, "Testing the gradients only\n");
+      test_gradients = 1;
+    }
+    else if(iarg == argc)
+    {
+      // not testing gradients. We're good
+    }
+    else
+    {
+      fprintf(stderr, usage, argv[0]);
+      return 1;
+    }
   }
 
+
+
   srandom( 0 ); // I want determinism here
 
   generateSimulationGrid();
   simulate();
 
-  dogleg_setDebug(1); // request debugging output from the solver
-
+  dogleg_parameters2_t dogleg_parameters;
+  dogleg_getDefaultParameters(&dogleg_parameters);
+  dogleg_parameters.dogleg_debug = debug;
 
   double p[Nstate];
 
@@ -218,23 +278,30 @@
   // sure that the reported and observed gradients match (the relative error is
   // low)
   fprintf(stderr, "have %d variables\n", Nstate);
-  for(int i=0; i<Nstate; i++)
+  if( test_gradients )
   {
-    fprintf(stderr, "checking gradients for variable %d\n", i);
-    if( is_sparse )
-      dogleg_testGradient(i, p, Nstate, Nmeasurements, Jnnz, &optimizerCallback, NULL);
-    else
-      dogleg_testGradient_dense(i, p, Nstate, Nmeasurements, &optimizerCallback_dense, NULL);
+    for(int i=0; i<Nstate; i++)
+    {
+      fprintf(stderr, "checking gradients for variable %d\n", i);
+      if( is_sparse )
+        dogleg_testGradient(i, p, Nstate, Nmeasurements, Jnnz, &optimizerCallback, NULL);
+      else
+        dogleg_testGradient_dense(i, p, Nstate, Nmeasurements, &optimizerCallback_dense, NULL);
+    }
+    return 0;
   }
 
-
   fprintf(stderr, "SOLVING:\n");
 
   double optimum;
   if( is_sparse )
-    optimum = dogleg_optimize(p, Nstate, Nmeasurements, Jnnz, &optimizerCallback, NULL, NULL);
+    optimum = dogleg_optimize2(p, Nstate, Nmeasurements, Jnnz,
+                               &optimizerCallback, NULL,
+                               &dogleg_parameters, NULL);
   else
-    optimum = dogleg_optimize_dense(p, Nstate, Nmeasurements, &optimizerCallback_dense, NULL, NULL);
+    optimum = dogleg_optimize_dense2(p, Nstate, Nmeasurements,
+                                     &optimizerCallback_dense, NULL,
+                                     &dogleg_parameters, NULL);
 
   fprintf(stderr, "Done. Optimum = %f\n", optimum);