oomph-lib: problem.cc Source File

Go to the documentation of this file.
// LIC// ====================================================================
// LIC// This file forms part of oomph-lib, the object-oriented,
// LIC// multi-physics finite-element library, available
// LIC// at http://www.oomph-lib.org.
// LIC//
// LIC// Copyright (C) 2006-2025 Matthias Heil and Andrew Hazel
// LIC//
// LIC// This library is free software; you can redistribute it and/or
// LIC// modify it under the terms of the GNU Lesser General Public
// LIC// License as published by the Free Software Foundation; either
// LIC// version 2.1 of the License, or (at your option) any later version.
// LIC//
// LIC// This library is distributed in the hope that it will be useful,
// LIC// but WITHOUT ANY WARRANTY; without even the implied warranty of
// LIC// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// LIC// Lesser General Public License for more details.
// LIC//
// LIC// You should have received a copy of the GNU Lesser General Public
// LIC// License along with this library; if not, write to the Free Software
// LIC// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
// LIC// 02110-1301  USA.
// LIC//
// LIC// The authors may be contacted at oomph-lib@maths.man.ac.uk.
// LIC//
// LIC//====================================================================
 
#include <cmath>
#ifdef OOMPH_HAS_MPI
#include "mpi.h"
#endif
 
#include <list>
#include <algorithm>
#include <string>
 
#include "oomph_utilities.h"
#include "problem.h"
#include "timesteppers.h"
#include "explicit_timesteppers.h"
#include "generalised_timesteppers.h"
#include "refineable_mesh.h"
#include "triangle_mesh.h"
#include "linear_solver.h"
#include "eigen_solver.h"
#include "assembly_handler.h"
#include "dg_elements.h"
#include "partitioning.h"
#include "spines.h"
 
 
#ifdef OOMPH_HAS_MUMPS
#include "mumps_solver.h"
#endif
 
namespace oomph
{
  /// ///////////////////////////////////////////////////////////////
  // Non-inline functions for the problem class
  /// ///////////////////////////////////////////////////////////////
 
  //=================================================================
  /// The continuation timestepper object
  //=================================================================
  ContinuationStorageScheme Problem::Continuation_time_stepper;
 
  //================================================================
  /// Constructor: Allocate space for one time stepper
  /// and set all pointers to NULL and set defaults for all
  /// parameters.
  //===============================================================
  Problem::Problem()
    : Mesh_pt(0),
      Time_pt(0),
      Explicit_time_stepper_pt(0),
      Saved_dof_pt(0),
      Default_set_initial_condition_called(false),
      Use_globally_convergent_newton_method(false),
      Empty_actions_before_read_unstructured_meshes_has_been_called(false),
      Empty_actions_after_read_unstructured_meshes_has_been_called(false),
      Store_local_dof_pt_in_elements(false),
      Calculate_hessian_products_analytic(false),
#ifdef OOMPH_HAS_MPI
      Doc_imbalance_in_parallel_assembly(false),
      Use_default_partition_in_load_balance(false),
      Must_recompute_load_balance_for_assembly(true),
      Halo_scheme_pt(0),
#endif
      Relaxation_factor(1.0),
      Newton_solver_tolerance(1.0e-8),
      Max_newton_iterations(10),
      Nnewton_iter_taken(0),
      Max_residuals(10.0),
      Time_adaptive_newton_crash_on_solve_fail(false),
      Jacobian_reuse_is_enabled(false),
      Jacobian_has_been_computed(false),
      Problem_is_nonlinear(true),
      Pause_at_end_of_sparse_assembly(false),
      Doc_time_in_distribute(false),
      Sparse_assembly_method(Perform_assembly_using_vectors_of_pairs),
      Sparse_assemble_with_arrays_initial_allocation(400),
      Sparse_assemble_with_arrays_allocation_increment(150),
      Numerical_zero_for_sparse_assembly(0.0),
      FD_step_used_in_get_hessian_vector_products(1.0e-8),
      Mass_matrix_reuse_is_enabled(false),
      Mass_matrix_has_been_computed(false),
      Discontinuous_element_formulation(false),
      Minimum_dt(1.0e-12),
      Maximum_dt(1.0e12),
      DTSF_max_increase(4.0),
      DTSF_min_decrease(0.8),
      Target_error_safety_factor(1.0),
      Minimum_dt_but_still_proceed(-1.0),
      Scale_arc_length(true),
      Desired_proportion_of_arc_length(0.5),
      Theta_squared(1.0),
      Sign_of_jacobian(0),
      Continuation_direction(1.0),
      Parameter_derivative(1.0),
      Parameter_current(0.0),
      Use_continuation_timestepper(false),
      Dof_derivative_offset(1),
      Dof_current_offset(2),
      Ds_current(0.0),
      Desired_newton_iterations_ds(5),
      Minimum_ds(1.0e-10),
      Bifurcation_detection(false),
      Bisect_to_find_bifurcation(false),
      First_jacobian_sign_change(false),
      Arc_length_step_taken(false),
      Use_finite_differences_for_continuation_derivatives(false),
#ifdef OOMPH_HAS_MPI
      Dist_problem_matrix_distribution(Uniform_matrix_distribution),
      Parallel_sparse_assemble_previous_allocation(0),
      Problem_has_been_distributed(false),
      Bypass_increase_in_dof_check_during_pruning(false),
      Max_permitted_error_for_halo_check(1.0e-14),
#endif
      Shut_up_in_newton_solve(false),
      Always_take_one_newton_step(false),
      Timestep_reduction_factor_after_nonconvergence(0.5),
      Keep_temporal_error_below_tolerance(true)
  {
    Use_predictor_values_as_initial_guess = false;
 
    /// Setup terminate helper
    TerminateHelper::setup();
 
    // By default no submeshes:
    Sub_mesh_pt.resize(0);
 
    // No timesteppers
    Time_stepper_pt.resize(0);
 
    // Set the linear solvers, eigensolver and assembly handler
#if defined(OOMPH_HAS_MUMPS) && \
  defined(OOMPH_ENABLE_MUMPS_AS_DEFAULT_LINEAR_SOLVER)
    Linear_solver_pt = Default_linear_solver_pt = new MumpsSolver;
#else
    Linear_solver_pt = Default_linear_solver_pt = new SuperLUSolver;
#endif
 
    Mass_matrix_solver_for_explicit_timestepper_pt = Linear_solver_pt;
 
    Eigen_solver_pt = Default_eigen_solver_pt = new LAPACK_QZ;
 
    Assembly_handler_pt = Default_assembly_handler_pt = new AssemblyHandler;
 
    // setup the communicator
#ifdef OOMPH_HAS_MPI
    if (MPI_Helpers::mpi_has_been_initialised())
    {
      Communicator_pt = new OomphCommunicator(MPI_Helpers::communicator_pt());
    }
    else
    {
      Communicator_pt = new OomphCommunicator();
    }
#else
    Communicator_pt = new OomphCommunicator();
#endif
 
    // just create an empty linear algebra distribution for the
    // DOFs
    // this is setup when assign_eqn_numbers(...) is called.
    Dof_distribution_pt = new LinearAlgebraDistribution;
  }
 
  //================================================================
  /// Destructor to clean up memory
  //================================================================
  Problem::~Problem()
  {
    // Delete the memory assigned for the global time
    // (it's created on the fly in Problem::add_time_stepper_pt()
    // so we are entitled to delete it.
    if (Time_pt != 0)
    {
      delete Time_pt;
      Time_pt = 0;
    }
 
    // We're not using the default linear solver,
    // somebody else must have built it, so that person
    // must be in charge of killing it.
    // We can safely delete the defaults, however
    delete Default_linear_solver_pt;
 
    delete Default_eigen_solver_pt;
    delete Default_assembly_handler_pt;
    delete Communicator_pt;
    delete Dof_distribution_pt;
 
    // Delete any copies of the problem that have been created for
    // use in adaptive bifurcation tracking.
    // ALH: This will eventually go
    unsigned n_copies = Copy_of_problem_pt.size();
    for (unsigned c = 0; c < n_copies; c++)
    {
      delete Copy_of_problem_pt[c];
    }
 
    // if this problem has sub meshes then we must delete the Mesh_pt
    if (Sub_mesh_pt.size() != 0)
    {
      Mesh_pt->flush_element_and_node_storage();
      delete Mesh_pt;
    }
 
    // Since we called the TerminateHelper setup function in the constructor,
    // we need to delete anything that was dynamically allocated (as it's
    // just a namespace and so doesn't have it's own destructor) in the function
    TerminateHelper::clean_up_memory();
  }
 
  //=================================================================
  /// Setup the count vector that records how many elements contribute
  /// to each degree of freedom. Returns the total number of elements
  /// in the problem
  //=================================================================
  unsigned Problem::setup_element_count_per_dof()
  {
    // Now set the element counter to have the current Dof distribution
    Element_count_per_dof.build(this->Dof_distribution_pt);
    // We need to use the halo scheme (assuming it has been setup)
#ifdef OOMPH_HAS_MPI
    Element_count_per_dof.build_halo_scheme(this->Halo_scheme_pt);
#endif
 
    // Loop over the elements and count the entries
    // and number of (non-halo) elements
    const unsigned n_element = this->mesh_pt()->nelement();
    unsigned n_non_halo_element_local = 0;
    for (unsigned e = 0; e < n_element; e++)
    {
      GeneralisedElement* elem_pt = this->mesh_pt()->element_pt(e);
#ifdef OOMPH_HAS_MPI
      // Ignore halo elements
      if (!elem_pt->is_halo())
      {
#endif
        // Increment the number of non halo elements
        ++n_non_halo_element_local;
        // Now count the number of times the element contributes to a value
        // using the current assembly handler
        unsigned n_var = this->Assembly_handler_pt->ndof(elem_pt);
        for (unsigned n = 0; n < n_var; n++)
        {
          ++Element_count_per_dof.global_value(
            this->Assembly_handler_pt->eqn_number(elem_pt, n));
        }
#ifdef OOMPH_HAS_MPI
      }
#endif
    }
 
    // Storage for the total number of elements
    unsigned Nelement = 0;
 
    // Add together all the counts if we are in parallel
#ifdef OOMPH_HAS_MPI
    Element_count_per_dof.sum_all_halo_and_haloed_values();
 
    // If distributed, find the total number of elements in the problem
    if (this->Problem_has_been_distributed)
    {
      // Need to gather the total number of non halo elements
      MPI_Allreduce(&n_non_halo_element_local,
                    &Nelement,
                    1,
                    MPI_UNSIGNED,
                    MPI_SUM,
                    this->communicator_pt()->mpi_comm());
    }
    // Otherwise the total number is the same on each processor
    else
#endif
    {
      Nelement = n_non_halo_element_local;
    }
 
    return Nelement;
  }
 
 
  //==================================================================
  /// Build new LinearAlgebraDistribution. Note: you're in charge of
  /// deleting it!
  //==================================================================
  void Problem::create_new_linear_algebra_distribution(
    LinearAlgebraDistribution*& dist_pt)
  {
    // Find the number of rows
    const unsigned nrow = this->ndof();
 
#ifdef OOMPH_HAS_MPI
 
    unsigned nproc = Communicator_pt->nproc();
 
    // if problem is only one one processor assemble non-distributed
    // distribution
    if (nproc == 1)
    {
      dist_pt = new LinearAlgebraDistribution(Communicator_pt, nrow, false);
    }
    // if the problem is not distributed then assemble the jacobian with
    // a uniform distributed distribution
    else if (!Problem_has_been_distributed)
    {
      dist_pt = new LinearAlgebraDistribution(Communicator_pt, nrow, true);
    }
    // otherwise the problem is a distributed problem
    else
    {
      switch (Dist_problem_matrix_distribution)
      {
        case Uniform_matrix_distribution:
 
          dist_pt = new LinearAlgebraDistribution(Communicator_pt, nrow, true);
          break;
 
        case Problem_matrix_distribution:
 
          dist_pt = new LinearAlgebraDistribution(Dof_distribution_pt);
          break;
 
        case Default_matrix_distribution:
 
          // Put in its own scope to avoid warnings about "local" variables
          {
            LinearAlgebraDistribution* uniform_dist_pt =
              new LinearAlgebraDistribution(Communicator_pt, nrow, true);
            bool use_problem_dist = true;
            for (unsigned p = 0; p < nproc; p++)
            {
              // hierher Andrew: what's the logic behind this?
              if ((double)Dof_distribution_pt->nrow_local(p) >
                  ((double)uniform_dist_pt->nrow_local(p)) * 1.1)
              {
                use_problem_dist = false;
              }
            }
            if (use_problem_dist)
            {
              dist_pt = new LinearAlgebraDistribution(Dof_distribution_pt);
            }
            else
            {
              dist_pt = new LinearAlgebraDistribution(uniform_dist_pt);
            }
            delete uniform_dist_pt;
          }
          break;
 
        default:
 
          std::ostringstream error_stream;
          error_stream << "Never get here. Dist_problem_matrix_distribution = "
                       << Dist_problem_matrix_distribution << std::endl;
          throw OomphLibError(error_stream.str(),
                              OOMPH_CURRENT_FUNCTION,
                              OOMPH_EXCEPTION_LOCATION);
          break;
      }
    }
#else
    dist_pt = new LinearAlgebraDistribution(Communicator_pt, nrow, false);
#endif
  }
 
 
#ifdef OOMPH_HAS_MPI
 
  //==================================================================
  /// Setup the halo scheme for the degrees of freedom
  //==================================================================
  void Problem::setup_dof_halo_scheme()
  {
    // Find the number of elements stored on this processor
    const unsigned n_element = this->mesh_pt()->nelement();
 
    // Work out the all global equations to which this processor
    // contributes
    Vector<unsigned> my_eqns;
    this->get_my_eqns(this->Assembly_handler_pt, 0, n_element - 1, my_eqns);
 
    // Build the halo scheme, based on the equations to which this
    // processor contributes
    Halo_scheme_pt =
      new DoubleVectorHaloScheme(this->Dof_distribution_pt, my_eqns);
 
    // Find pointers to all the halo dofs
    // There may be more of these than required by my_eqns
    //(but should not be less)
    std::map<unsigned, double*> halo_data_pt;
    this->get_all_halo_data(halo_data_pt);
 
    // Now setup the Halo_dofs
    Halo_scheme_pt->setup_halo_dofs(halo_data_pt, this->Halo_dof_pt);
  }
 
  //==================================================================
  /// Distribute the problem without doc; report stats if required.
  /// Returns actual partitioning used, e.g. for restart.
  //==================================================================
  Vector<unsigned> Problem::distribute(const bool& report_stats)
  {
    // Set dummy doc paramemters
    DocInfo doc_info;
    doc_info.disable_doc();
 
    // Set the sizes of the input and output vectors
    unsigned n_element = mesh_pt()->nelement();
    Vector<unsigned> element_partition(n_element, 0);
 
    // Distribute and return partitioning
    return distribute(element_partition, doc_info, report_stats);
  }
 
  //==================================================================
  /// Distribute the problem according to specified partition.
  /// If all entries in partitioning vector are zero we use METIS
  /// to do the partitioning after all.
  /// Returns actual partitioning used, e.g. for restart.
  //==================================================================
  Vector<unsigned> Problem::distribute(
    const Vector<unsigned>& element_partition, const bool& report_stats)
  {
#ifdef PARANOID
    bool has_non_zero_entry = false;
    unsigned n = element_partition.size();
    for (unsigned i = 0; i < n; i++)
    {
      if (element_partition[i] != 0)
      {
        has_non_zero_entry = true;
        break;
      }
    }
    if (!has_non_zero_entry)
    {
      std::ostringstream warn_message;
      warn_message << "WARNING: All entries in specified partitioning vector \n"
                   << "         are zero -- will ignore this and use METIS\n"
                   << "         to perform the partitioning\n";
      OomphLibWarning(
        warn_message.str(), "Problem::distribute()", OOMPH_EXCEPTION_LOCATION);
    }
#endif
    // Set dummy doc paramemters
    DocInfo doc_info;
    doc_info.disable_doc();
 
    // Distribute and return partitioning
    return distribute(element_partition, doc_info, report_stats);
  }
 
  //==================================================================
  /// Distribute the problem and doc to specified DocInfo.
  /// Returns actual partitioning used, e.g. for restart.
  //==================================================================
  Vector<unsigned> Problem::distribute(DocInfo& doc_info,
                                       const bool& report_stats)
  {
    // Set the sizes of the input and output vectors
    unsigned n_element = mesh_pt()->nelement();
 
    // Dummy input vector
    Vector<unsigned> element_partition(n_element, 0);
 
    // Distribute and return partitioning
    return distribute(element_partition, doc_info, report_stats);
  }
 
  //==================================================================
  /// Distribute the problem according to specified partition.
  /// (If all entries in partitioning vector are zero we use METIS
  /// to do the partitioning after all) and doc.
  /// Returns actual partitioning used, e.g. for restart.
  //==================================================================
  Vector<unsigned> Problem::distribute(
    const Vector<unsigned>& element_partition,
    DocInfo& doc_info,
    const bool& report_stats)
  {
    // Storage for number of processors and number of elements in global mesh
    int n_proc = this->communicator_pt()->nproc();
    int my_rank = this->communicator_pt()->my_rank();
    int n_element = mesh_pt()->nelement();
 
    // Vector to be returned
    Vector<unsigned> return_element_domain;
 
    // Buffer extreme cases
    if (n_proc == 1) // single-process job - don't do anything
    {
      if (report_stats)
      {
        std::ostringstream warn_message;
        warn_message << "WARNING: You've tried to distribute a problem over\n"
                     << "only one processor: this would make METIS crash.\n"
                     << "Ignoring your request for distribution.\n";
        OomphLibWarning(warn_message.str(),
                        "Problem::distribute()",
                        OOMPH_EXCEPTION_LOCATION);
      }
    }
    else if (n_proc > n_element) // more processors than elements
    {
      // Throw an error
      std::ostringstream error_stream;
      error_stream << "You have tried to distribute a problem\n"
                   << "but there are less elements than processors.\n"
                   << "Please re-run with more elements!\n"
                   << "Please also ensure that actions_before_distribute().\n"
                   << "and actions_after_distribute() are correctly set up.\n"
                   << std::endl;
      throw OomphLibError(
        error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
    else
    {
      // We only distribute uniformly-refined meshes; buffer the case where
      // either mesh is not uniformly refined
      bool a_mesh_is_not_uniformly_refined = false;
      unsigned n_mesh = nsub_mesh();
      if (n_mesh == 0)
      {
        // Check refinement levels
        if (TreeBasedRefineableMeshBase* mmesh_pt =
              dynamic_cast<TreeBasedRefineableMeshBase*>(mesh_pt(0)))
        {
          unsigned min_ref_level = 0;
          unsigned max_ref_level = 0;
          mmesh_pt->get_refinement_levels(min_ref_level, max_ref_level);
          // If they are not the same
          if (max_ref_level != min_ref_level)
          {
            a_mesh_is_not_uniformly_refined = true;
          }
        }
      }
      else
      {
        for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
        {
          // Check refinement levels for each mesh individually
          // (one mesh is allowed to be "more uniformly refined" than another)
          if (TreeBasedRefineableMeshBase* mmesh_pt =
                dynamic_cast<TreeBasedRefineableMeshBase*>(mesh_pt(i_mesh)))
          {
            unsigned min_ref_level = 0;
            unsigned max_ref_level = 0;
            mmesh_pt->get_refinement_levels(min_ref_level, max_ref_level);
            // If they are not the same
            if (max_ref_level != min_ref_level)
            {
              a_mesh_is_not_uniformly_refined = true;
            }
          }
        }
      }
 
      // If any mesh is not uniformly refined
      if (a_mesh_is_not_uniformly_refined)
      {
        // Again it may make more sense to throw an error here as the user
        // will probably not be running a problem that is small enough to
        // fit the whole of on each processor
        std::ostringstream error_stream;
        error_stream << "You have tried to distribute a problem\n"
                     << "but at least one of your meshes is no longer\n"
                     << "uniformly refined.  In order to preserve the Tree\n"
                     << "and TreeForest structure, Problem::distribute() can\n"
                     << "only be called while meshes are uniformly refined.\n"
                     << std::endl;
        throw OomphLibError(
          error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
      }
      else
      {
        // Is there any global data?  If so, distributing the problem won't work
        if (nglobal_data() > 0)
        {
          std::ostringstream error_stream;
          error_stream << "You have tried to distribute a problem\n"
                       << "and there is some global data.\n"
                       << "This is not likely to work...\n"
                       << std::endl;
          throw OomphLibError(error_stream.str(),
                              OOMPH_CURRENT_FUNCTION,
                              OOMPH_EXCEPTION_LOCATION);
        }
 
        double t_start = 0;
        if (Doc_time_in_distribute)
        {
          t_start = TimingHelpers::timer();
        }
 
 
#ifdef PARANOID
        unsigned old_ndof = ndof();
#endif
 
        // Need to partition the global mesh before distributing
        Mesh* global_mesh_pt = mesh_pt();
 
        // Vector listing the affiliation of each element
        unsigned nelem = global_mesh_pt->nelement();
        Vector<unsigned> element_domain(nelem);
 
        // Number of elements that I'm in charge of, based on any
        // incoming partitioning
        unsigned n_my_elements = 0;
 
        // Have we used the pre-set partitioning
        bool used_preset_partitioning = false;
 
        // Partition the mesh, unless the partition has already been passed in
        // If it hasn't then the sum of all the entries of the vector should be
        // 0
        unsigned sum_element_partition = 0;
        unsigned n_part = element_partition.size();
        for (unsigned e = 0; e < n_part; e++)
        {
          // ... another one for me.
          if (int(element_partition[e]) == my_rank) n_my_elements++;
 
          sum_element_partition += element_partition[e];
        }
        if (sum_element_partition == 0)
        {
          oomph_info << "INFO: using METIS to partition elements" << std::endl;
          partition_global_mesh(global_mesh_pt, doc_info, element_domain);
          used_preset_partitioning = false;
        }
        else
        {
          oomph_info << "INFO: using pre-set partition of elements"
                     << std::endl;
          used_preset_partitioning = true;
          element_domain = element_partition;
        }
 
        // Set the GLOBAL Mesh as being distributed
        global_mesh_pt->set_communicator_pt(this->communicator_pt());
 
        double t_end = 0.0;
        if (Doc_time_in_distribute)
        {
          t_end = TimingHelpers::timer();
          oomph_info << "Time for partitioning of global mesh: "
                     << t_end - t_start << std::endl;
          t_start = TimingHelpers::timer();
        }
 
        // Store how many elements we had in the various sub-meshes
        // before actions_before_distribute() (which may empty some of
        // them).
        Vector<unsigned> n_element_in_old_submesh(n_mesh);
        if (n_mesh != 0)
        {
          for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
          {
            unsigned nsub_elem = mesh_pt(i_mesh)->nelement();
            n_element_in_old_submesh[i_mesh] = nsub_elem;
          }
        }
 
        // Partitioning complete; call actions before distribute
        actions_before_distribute();
 
        if (Doc_time_in_distribute)
        {
          t_end = TimingHelpers::timer();
          oomph_info << "Time for actions before distribute: "
                     << t_end - t_start << std::endl;
        }
 
        // This next bit is cheap -- omit timing
        // t_start = TimingHelpers::timer();
 
        // Number of submeshes (NB: some may have been deleted in
        //                          actions_after_distribute())
        n_mesh = nsub_mesh();
 
 
        // Prepare vector of vectors for submesh element domains
        Vector<Vector<unsigned>> submesh_element_domain(n_mesh);
 
        // The submeshes need to know their own element domains.
        // Also if any meshes have been emptied we ignore their
        // partitioning in the vector that we return from here
        return_element_domain.reserve(element_domain.size());
        if (n_mesh != 0)
        {
          unsigned count = 0;
          for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
          {
            unsigned nsub_elem = mesh_pt(i_mesh)->nelement();
            submesh_element_domain[i_mesh].resize(nsub_elem);
            unsigned nsub_elem_old = n_element_in_old_submesh[i_mesh];
            for (unsigned e = 0; e < nsub_elem_old; e++)
            {
              if (nsub_elem_old == nsub_elem)
              {
                submesh_element_domain[i_mesh][e] = element_domain[count];
                return_element_domain.push_back(element_domain[count]);
              }
              // return_element_domain.push_back(element_domain[count]);
              count++;
            }
          }
        }
        else
        {
          return_element_domain = element_domain;
        }
 
        if (Doc_time_in_distribute)
        {
          t_start = TimingHelpers::timer();
        }
 
        // Setup the map between "root" element and number in global mesh
        // (currently used in the load_balance() routines)
 
        // This map is only established for structured meshes, then we
        // need to check here the type of mesh
        if (n_mesh == 0)
        {
          // Check if the only one mesh is an structured mesh
          bool structured_mesh = true;
          TriangleMeshBase* tri_mesh_pt =
            dynamic_cast<TriangleMeshBase*>(mesh_pt(0));
          if (tri_mesh_pt != 0)
          {
            structured_mesh = false;
          } // if (tri_mesh_pt != 0)
          if (structured_mesh)
          {
            const unsigned n_ele = global_mesh_pt->nelement();
            Base_mesh_element_pt.resize(n_ele);
            Base_mesh_element_number_plus_one.clear();
            for (unsigned e = 0; e < n_ele; e++)
            {
              GeneralisedElement* el_pt = global_mesh_pt->element_pt(e);
              Base_mesh_element_number_plus_one[el_pt] = e + 1;
              Base_mesh_element_pt[e] = el_pt;
            } // for (e<n_ele)
          } // A TreeBaseMesh mesh
        } // if (n_mesh==0)
        else
        {
          // If we have submeshes then we only add those elements that
          // belong to structured meshes, but first compute the number
          // of total elements in the structured meshes
          unsigned nglobal_element = 0;
          // Store which submeshes are structured
          std::vector<bool> is_structured_mesh(n_mesh);
          for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
          {
            TriangleMeshBase* tri_mesh_pt =
              dynamic_cast<TriangleMeshBase*>(mesh_pt(i_mesh));
            if (tri_mesh_pt != 0)
            {
              // Set the flags to indicate this is not an structured
              // mesh
              is_structured_mesh[i_mesh] = false;
            } // if (tri_mesh_pt != 0)
            else
            {
              // Set the flags to indicate this is an structured
              // mesh
              is_structured_mesh[i_mesh] = true;
            } // else if (tri_mesh_pt != 0)
            // Check if mesh is an structured mesh
            if (is_structured_mesh[i_mesh])
            {
              nglobal_element += mesh_pt(i_mesh)->nelement();
            } // A TreeBaseMesh mesh
          } // for (i_mesh<n_mesh)
 
          // Once computed the number of elements, then resize the
          // structure
          Base_mesh_element_pt.resize(nglobal_element);
          Base_mesh_element_number_plus_one.clear();
          unsigned counter = 0;
          for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
          {
            // Check if mesh is an structured mesh
            if (is_structured_mesh[i_mesh])
            {
              const unsigned n_ele = mesh_pt(i_mesh)->nelement();
              for (unsigned e = 0; e < n_ele; e++)
              {
                GeneralisedElement* el_pt = mesh_pt(i_mesh)->element_pt(e);
                Base_mesh_element_number_plus_one[el_pt] = counter + 1;
                Base_mesh_element_pt[counter] = el_pt;
                // Inrease the global element number
                counter++;
              } // for (e<n_ele)
            } // An structured mesh
          } // for (i_mesh<n_mesh)
 
#ifdef PARANOID
          if (counter != nglobal_element)
          {
            std::ostringstream error_stream;
            error_stream
              << "The number of global elements (" << nglobal_element
              << ") is not the sameas the number of\nadded elements ("
              << counter << ") to the Base_mesh_element_pt data "
              << "structure!!!\n\n";
            throw OomphLibError(error_stream.str(),
                                "Problem::distribute()",
                                OOMPH_EXCEPTION_LOCATION);
          } // if (counter != nglobal_element)
#endif // #ifdef PARANOID
 
        } // else if (n_mesh==0)
 
        // Wipe everything if a pre-determined partitioning
        // didn't specify ANY elements for this processor
        // (typically happens during restarts with larger number
        // of processors -- in this case we really want an empty
        // processor rather than one with any "kept" halo elements)
        bool overrule_keep_as_halo_element_status = false;
        if ((n_my_elements == 0) && (used_preset_partitioning))
        {
          oomph_info << "INFO: We're over-ruling the \"keep as halo element\"\n"
                     << "      status because the preset partitioning\n"
                     << "      didn't place ANY elements on this processor,\n"
                     << "      probably because of a restart on a larger \n"
                     << "      number of processors\n";
          overrule_keep_as_halo_element_status = true;
        }
 
 
        // Distribute the (sub)meshes (i.e. sort out their halo lookup schemes)
        Vector<GeneralisedElement*> deleted_element_pt;
        if (n_mesh == 0)
        {
          global_mesh_pt->distribute(this->communicator_pt(),
                                     element_domain,
                                     deleted_element_pt,
                                     doc_info,
                                     report_stats,
                                     overrule_keep_as_halo_element_status);
        }
        else // There are submeshes, "distribute" each one separately
        {
          for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
          {
            if (report_stats)
            {
              oomph_info << "Distributing submesh " << i_mesh << std::endl
                         << "--------------------" << std::endl;
            }
            // Set the doc_info number to reflect the submesh
            doc_info.number() = i_mesh;
            mesh_pt(i_mesh)->distribute(this->communicator_pt(),
                                        submesh_element_domain[i_mesh],
                                        deleted_element_pt,
                                        doc_info,
                                        report_stats,
                                        overrule_keep_as_halo_element_status);
          }
          // Rebuild the global mesh
          rebuild_global_mesh();
        }
 
        // Null out information associated with deleted elements
        unsigned n_del = deleted_element_pt.size();
        for (unsigned e = 0; e < n_del; e++)
        {
          GeneralisedElement* el_pt = deleted_element_pt[e];
          unsigned old_el_number = Base_mesh_element_number_plus_one[el_pt] - 1;
          Base_mesh_element_number_plus_one[el_pt] = 0;
          Base_mesh_element_pt[old_el_number] = 0;
        }
 
        if (Doc_time_in_distribute)
        {
          t_end = TimingHelpers::timer();
          oomph_info << "Time for mesh-level distribution: " << t_end - t_start
                     << std::endl;
          t_start = TimingHelpers::timer();
        }
 
        // Now the problem has been distributed
        Problem_has_been_distributed = true;
 
        // Call actions after distribute
        actions_after_distribute();
 
        if (Doc_time_in_distribute)
        {
          t_end = TimingHelpers::timer();
          oomph_info << "Time for actions after distribute: " << t_end - t_start
                     << std::endl;
          t_start = TimingHelpers::timer();
        }
 
        // Re-assign the equation numbers (incl synchronisation if reqd)
        unsigned n_dof = assign_eqn_numbers();
        oomph_info << "Number of equations: " << n_dof << std::endl;
 
        if (Doc_time_in_distribute)
        {
          t_end = TimingHelpers::timer();
          oomph_info << "Time for re-assigning eqn numbers (in distribute): "
                     << t_end - t_start << std::endl;
        }
 
 
#ifdef PARANOID
        if (n_dof != old_ndof)
        {
          std::ostringstream error_stream;
          error_stream
            << "Number of dofs in distribute() has changed "
            << "from " << old_ndof << " to " << n_dof << "\n"
            << "Check that you've implemented any necessary "
               "actions_before/after\n"
            << "distribute functions, e.g. to pin redundant pressure dofs"
            << " etc.\n";
          throw OomphLibError(error_stream.str(),
                              OOMPH_CURRENT_FUNCTION,
                              OOMPH_EXCEPTION_LOCATION);
        }
#endif
 
      } // end if to check for uniformly refined mesh(es)
 
    } // end if to check number of processors vs. number of elements etc.
 
 
    // Force re-analysis of time spent on assembly each
    // elemental Jacobian
    Must_recompute_load_balance_for_assembly = true;
    Elemental_assembly_time.clear();
 
    // Return the partition vector used in the distribution
    return return_element_domain;
  }
 
  //==================================================================
  /// Partition the global mesh, return vector specifying the processor
  /// number for each element. Virtual so that it can be overloaded by
  /// any user; the default is to use METIS to perform the partitioning
  /// (with a bit of cleaning up afterwards to sort out "special cases").
  //==================================================================
  void Problem::partition_global_mesh(Mesh*& global_mesh_pt,
                                      DocInfo& doc_info,
                                      Vector<unsigned>& element_domain,
                                      const bool& report_stats)
  {
    // Storage for number of processors and current processor
    int n_proc = this->communicator_pt()->nproc();
    int rank = this->communicator_pt()->my_rank();
 
    std::ostringstream filename;
    std::ofstream some_file;
 
    // Doc the original mesh on proc 0
    //--------------------------------
    if (doc_info.is_doc_enabled())
    {
      if (rank == 0)
      {
        filename << doc_info.directory() << "/complete_mesh"
                 << doc_info.number() << ".dat";
        global_mesh_pt->output(filename.str().c_str(), 5);
      }
    }
 
    // Partition the mesh
    //-------------------
    // METIS Objective (0: minimise edge cut; 1: minimise total comm volume)
    unsigned objective = 0;
 
    // Do the partitioning
    unsigned nelem = 0;
    if (this->communicator_pt()->my_rank() == 0)
    {
      METIS::partition_mesh(this, n_proc, objective, element_domain);
      nelem = element_domain.size();
    }
    MPI_Bcast(&nelem, 1, MPI_UNSIGNED, 0, this->communicator_pt()->mpi_comm());
    element_domain.resize(nelem);
    MPI_Bcast(&element_domain[0],
              nelem,
              MPI_UNSIGNED,
              0,
              this->communicator_pt()->mpi_comm());
 
    // On very coarse meshes with larger numbers of processors, METIS
    // occasionally returns an element_domain Vector for which a particular
    // processor has no elements affiliated to it; the following fixes this
 
    // Convert element_domain to integer storage
    Vector<int> int_element_domain(nelem);
    for (unsigned e = 0; e < nelem; e++)
    {
      int_element_domain[e] = element_domain[e];
    }
 
    // Global storage for number of elements on each process
    int my_number_of_elements = 0;
    Vector<int> number_of_elements(n_proc, 0);
 
    for (unsigned e = 0; e < nelem; e++)
    {
      if (int_element_domain[e] == rank)
      {
        my_number_of_elements++;
      }
    }
 
    // Communicate the correct value for each single process into
    // the global storage vector
    MPI_Allgather(&my_number_of_elements,
                  1,
                  MPI_INT,
                  &number_of_elements[0],
                  1,
                  MPI_INT,
                  this->communicator_pt()->mpi_comm());
 
    // If a process has no elements then switch an element with the
    // process with the largest number of elements, assuming
    // that it still has enough elements left to share
    int max_number_of_elements = 0;
    int process_with_max_elements = 0;
    for (int d = 0; d < n_proc; d++)
    {
      if (number_of_elements[d] == 0)
      {
        // Find the process with maximum number of elements
        if (max_number_of_elements <= 1)
        {
          for (int dd = 0; dd < n_proc; dd++)
          {
            if (number_of_elements[dd] > max_number_of_elements)
            {
              max_number_of_elements = number_of_elements[dd];
              process_with_max_elements = dd;
            }
          }
        }
 
        // Check that this number of elements is okay for sharing
        if (max_number_of_elements <= 1)
        {
          // Throw an error if elements can't be shared
          std::ostringstream error_stream;
          error_stream << "No process has more than 1 element, and\n"
                       << "at least one process has no elements!\n"
                       << "Suggest rerunning with more refinement.\n"
                       << std::endl;
          throw OomphLibError(error_stream.str(),
                              OOMPH_CURRENT_FUNCTION,
                              OOMPH_EXCEPTION_LOCATION);
        }
 
        // Loop over the element domain vector and switch
        // one value for process "process_with_max_elements" with d
        for (unsigned e = 0; e < nelem; e++)
        {
          if (int_element_domain[e] == process_with_max_elements)
          {
            int_element_domain[e] = d;
            // Change the numbers associated with these processes
            number_of_elements[d]++;
            number_of_elements[process_with_max_elements]--;
            // Reduce the number of elements available on "max" process
            max_number_of_elements--;
            // Inform the user that a switch has taken place
            if (report_stats)
            {
              oomph_info << "INFO: Switched element domain at position " << e
                         << std::endl
                         << "from process " << process_with_max_elements
                         << " to process " << d << std::endl
                         << "which was given no elements by METIS partition"
                         << std::endl;
            }
            // Only need to do this once for this element loop, otherwise
            // this will take all the elements from "max" process and put them
            // in process d, thus leaving essentially the same problem!
            break;
          }
        }
      }
    }
 
    // Reassign new values to the element_domain vector
    for (unsigned e = 0; e < nelem; e++)
    {
      element_domain[e] = int_element_domain[e];
    }
 
    unsigned count_elements = 0;
    for (unsigned e = 0; e < nelem; e++)
    {
      if (int(element_domain[e]) == rank)
      {
        count_elements++;
      }
    }
 
    if (report_stats)
    {
      oomph_info << "I have " << count_elements
                 << " elements from this partition" << std::endl
                 << std::endl;
    }
  }
 
  //==================================================================
  /// (Irreversibly) prune halo(ed) elements and nodes, usually
  /// after another round of refinement, to get rid of
  /// excessively wide halo layers. Note that the current
  /// mesh will be now regarded as the base mesh and no unrefinement
  /// relative to it will be possible once this function
  /// has been called.
  //==================================================================
  void Problem::prune_halo_elements_and_nodes(DocInfo& doc_info,
                                              const bool& report_stats)
  {
    // Storage for number of processors and current processor
    int n_proc = this->communicator_pt()->nproc();
 
    // Has the problem been distributed yet?
    if (!Problem_has_been_distributed)
    {
      oomph_info
        << "WARNING: Problem::prune_halo_elements_and_nodes() was called on a "
        << "non-distributed Problem!" << std::endl;
      oomph_info << "Ignoring your request..." << std::endl;
    }
    else
    {
      // There are no halo layers to prune if it's a single-process job
      if (n_proc == 1)
      {
        oomph_info
          << "WARNING: You've tried to prune halo layers on a problem\n"
          << "with only one processor: this is unnecessary.\n"
          << "Ignoring your request." << std::endl
          << std::endl;
      }
      else
      {
#ifdef PARANOID
        unsigned old_ndof = ndof();
#endif
 
        double t_start = 0.0;
        if (Global_timings::Doc_comprehensive_timings)
        {
          t_start = TimingHelpers::timer();
        }
 
        // Call actions before distribute
        actions_before_distribute();
 
        double t_end = 0.0;
        if (Global_timings::Doc_comprehensive_timings)
        {
          t_end = TimingHelpers::timer();
          oomph_info << "Time for actions_before_distribute() in "
                     << "Problem::prune_halo_elements_and_nodes(): "
                     << t_end - t_start << std::endl;
          t_start = TimingHelpers::timer();
        }
 
        // Associate all elements with root in current Base mesh
        unsigned nel = Base_mesh_element_pt.size();
        std::map<GeneralisedElement*, unsigned>
          old_base_element_number_plus_one;
        std::vector<bool> old_root_is_halo_or_non_existent(nel, true);
        for (unsigned e = 0; e < nel; e++)
        {
          // Get the base element
          GeneralisedElement* base_el_pt = Base_mesh_element_pt[e];
 
          // Does it exist locally?
          if (base_el_pt != 0)
          {
            // Check if it's a halo element
            if (!base_el_pt->is_halo())
            {
              old_root_is_halo_or_non_existent[e] = false;
            }
 
            // Not refineable: It's only the element iself
            RefineableElement* ref_el_pt = 0;
            ref_el_pt = dynamic_cast<RefineableElement*>(base_el_pt);
            if (ref_el_pt == 0)
            {
              old_base_element_number_plus_one[base_el_pt] = e + 1;
            }
            // Refineable: Get entire tree of elements
            else
            {
              Vector<Tree*> tree_pt;
              ref_el_pt->tree_pt()->stick_all_tree_nodes_into_vector(tree_pt);
              unsigned ntree = tree_pt.size();
              for (unsigned t = 0; t < ntree; t++)
              {
                old_base_element_number_plus_one[tree_pt[t]->object_pt()] =
                  e + 1;
              }
            }
          }
        }
 
 
        if (Global_timings::Doc_comprehensive_timings)
        {
          t_end = TimingHelpers::timer();
          oomph_info << "Time for setup old root elements in "
                     << "Problem::prune_halo_elements_and_nodes(): "
                     << t_end - t_start << std::endl;
          t_start = TimingHelpers::timer();
        }
 
 
        // Now remember the old number of base elements
        unsigned nel_base_old = nel;
 
 
        // Prune the halo elements and nodes of the mesh(es)
        Vector<GeneralisedElement*> deleted_element_pt;
        unsigned n_mesh = nsub_mesh();
        if (n_mesh == 0)
        {
          // Prune halo elements and nodes for the (single) global mesh
          mesh_pt()->prune_halo_elements_and_nodes(
            deleted_element_pt, doc_info, report_stats);
        }
        else
        {
          // Loop over individual submeshes and prune separately
          for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
          {
            mesh_pt(i_mesh)->prune_halo_elements_and_nodes(
              deleted_element_pt, doc_info, report_stats);
          }
 
          // Rebuild the global mesh
          rebuild_global_mesh();
        }
 
        if (Global_timings::Doc_comprehensive_timings)
        {
          t_end = TimingHelpers::timer();
          oomph_info << "Total time for all mesh-level prunes in "
                     << "Problem::prune_halo_elements_and_nodes(): "
                     << t_end - t_start << std::endl;
          t_start = TimingHelpers::timer();
        }
 
        // Loop over all elements in newly rebuilt mesh (which contains
        // all element in "tree order"), find the roots
        // (which are either non-refineable elements or refineable elements
        // whose tree representations are TreeRoots)
        std::map<FiniteElement*, bool> root_el_done;
 
        // Vector storing vectors of pointers to new base elements associated
        // with the same old base element
        Vector<Vector<GeneralisedElement*>>
          new_base_element_associated_with_old_base_element(nel_base_old);
 
        unsigned n_meshes = n_mesh;
        // Change the value for the number of submeshes if there is only
        // one mesh so that the loop below works if we have only one
        // mesh
        if (n_meshes == 0)
        {
          n_meshes = 1;
        }
 
        // Store which submeshes, if there are some are structured
        // meshes
        std::vector<bool> is_structured_mesh(n_meshes);
 
        // Loop over all elements in the rebuilt mesh, but make sure
        // that we are only looping over the structured meshes
        nel = 0;
        for (unsigned i_mesh = 0; i_mesh < n_meshes; i_mesh++)
        {
          TriangleMeshBase* tri_mesh_pt =
            dynamic_cast<TriangleMeshBase*>(mesh_pt(i_mesh));
          if (!(tri_mesh_pt != 0))
          {
            // Mark the mesh as structured mesh
            is_structured_mesh[i_mesh] = true;
            // Add the number of elements
            nel += mesh_pt(i_mesh)->nelement();
          } // if (!(tri_mesh_pt!=0))
          else
          {
            // Mark the mesh as nonstructured mesh
            is_structured_mesh[i_mesh] = false;
          } // else if (!(tri_mesh_pt!=0))
        } // for (i_mesh < n_mesh)
 
        // Go for all the meshes (if there are submeshes)
        for (unsigned i_mesh = 0; i_mesh < n_meshes; i_mesh++)
        {
          // Only work with the elements in the mesh if it is an
          // structured mesh
          if (is_structured_mesh[i_mesh])
          {
            // Get the number of elements in the submesh
            const unsigned nele_submesh = mesh_pt(i_mesh)->nelement();
            for (unsigned e = 0; e < nele_submesh; e++)
            {
              // Get the element
              GeneralisedElement* el_pt = mesh_pt(i_mesh)->element_pt(e);
 
              // Not refineable: It's definitely a new base element
              RefineableElement* ref_el_pt = 0;
              ref_el_pt = dynamic_cast<RefineableElement*>(el_pt);
              if (ref_el_pt == 0)
              {
                unsigned old_base_el_no =
                  old_base_element_number_plus_one[el_pt] - 1;
                new_base_element_associated_with_old_base_element
                  [old_base_el_no]
                    .push_back(el_pt);
              }
              // Refineable
              else
              {
                // Is it a tree root (after pruning)? In that case it's
                // a new base element
                if (dynamic_cast<TreeRoot*>(ref_el_pt->tree_pt()))
                {
                  unsigned old_base_el_no =
                    old_base_element_number_plus_one[el_pt] - 1;
                  new_base_element_associated_with_old_base_element
                    [old_base_el_no]
                      .push_back(el_pt);
                }
                else
                {
                  // Get associated root element
                  FiniteElement* root_el_pt =
                    ref_el_pt->tree_pt()->root_pt()->object_pt();
 
                  if (!root_el_done[root_el_pt])
                  {
                    root_el_done[root_el_pt] = true;
                    unsigned old_base_el_no =
                      old_base_element_number_plus_one[el_pt] - 1;
                    new_base_element_associated_with_old_base_element
                      [old_base_el_no]
                        .push_back(root_el_pt);
                  }
                }
              }
            } // for (e < nele_submesh)
          } // if (is_structured_mesh[i_mesh])
        } // for (i_mesh < n_mesh)
 
        // Create a vector that stores how many new root/base elements
        // got spawned from each old root/base element in the global mesh
        Vector<unsigned> local_n_new_root(nel_base_old);
#ifdef PARANOID
        Vector<unsigned> n_new_root_back(nel_base_old);
#endif
        for (unsigned e = 0; e < nel_base_old; e++)
        {
          local_n_new_root[e] =
            new_base_element_associated_with_old_base_element[e].size();
 
#ifdef PARANOID
          // Backup so we can check that halo data was consistent
          n_new_root_back[e] = local_n_new_root[e];
#endif
        }
 
        if (Global_timings::Doc_comprehensive_timings)
        {
          t_end = TimingHelpers::timer();
          oomph_info << "Time for setup of new base elements in "
                     << "Problem::prune_halo_elements_and_nodes(): "
                     << t_end - t_start << std::endl;
          t_start = TimingHelpers::timer();
        }
 
        // Now do reduce operation to get information for all
        // old root/base elements -- the pruned (halo!) base elements contain
        // fewer associated new roots.
        Vector<unsigned> n_new_root(nel_base_old);
        MPI_Allreduce(&local_n_new_root[0],
                      &n_new_root[0],
                      nel_base_old,
                      MPI_UNSIGNED,
                      MPI_MAX,
                      this->communicator_pt()->mpi_comm());
 
 
        if (Global_timings::Doc_comprehensive_timings)
        {
          t_end = TimingHelpers::timer();
          oomph_info << "Time for allreduce in "
                     << "Problem::prune_halo_elements_and_nodes(): "
                     << t_end - t_start << std::endl;
          t_start = TimingHelpers::timer();
        }
 
        // Find out total number of base elements
        unsigned nel_base_new = 0;
        for (unsigned e = 0; e < nel_base_old; e++)
        {
          // Increment
          nel_base_new += n_new_root[e];
 
#ifdef PARANOID
          // If we already had data for this root previously then
          // the data ought to be consistent afterwards (since taking
          // the max of consistent numbers shouldn't change things -- this
          // deals with halo/haloed elements)
          if (!old_root_is_halo_or_non_existent[e])
          {
            if (n_new_root_back[e] != 0)
            {
              if (n_new_root_back[e] != n_new_root[e])
              {
                std::ostringstream error_stream;
                error_stream
                  << "Number of new root elements spawned from old root " << e
                  << ": " << n_new_root[e] << "\nis not consistent"
                  << " with previous value: " << n_new_root_back[e]
                  << std::endl;
                throw OomphLibError(error_stream.str(),
                                    OOMPH_CURRENT_FUNCTION,
                                    OOMPH_EXCEPTION_LOCATION);
              }
            }
          }
 
#endif
        }
 
        // Reset base_mesh information
        Base_mesh_element_pt.clear();
        Base_mesh_element_pt.resize(nel_base_new, 0);
        Base_mesh_element_number_plus_one.clear();
 
        // Now enumerate the new base/root elements consistently
        unsigned count = 0;
        for (unsigned e = 0; e < nel_base_old; e++)
        {
          // Old root is non-halo: Just add the new roots into the
          // new lookup scheme consecutively
          if (!old_root_is_halo_or_non_existent[e])
          {
            // Loop over new root/base element
            unsigned n_new_root =
              new_base_element_associated_with_old_base_element[e].size();
            for (unsigned j = 0; j < n_new_root; j++)
            {
              // Store new root/base element
              GeneralisedElement* el_pt =
                new_base_element_associated_with_old_base_element[e][j];
              Base_mesh_element_pt[count] = el_pt;
              Base_mesh_element_number_plus_one[el_pt] = count + 1;
 
              // Bump counter
              count++;
            }
          }
          // Old root element is halo so skip insertion (i.e. leave
          // entries in lookup schemes nulled) but increase counter to
          // ensure consistency between processors
          else
          {
            unsigned nskip = n_new_root[e];
            count += nskip;
          }
        }
 
        // Re-setup the map between "root" element and number in global mesh
        // (used in the load_balance() routines)
        setup_base_mesh_info_after_pruning();
 
 
        if (Global_timings::Doc_comprehensive_timings)
        {
          t_end = TimingHelpers::timer();
          oomph_info << "Time for finishing off base mesh info "
                     << "Problem::prune_halo_elements_and_nodes(): "
                     << t_end - t_start << std::endl;
          t_start = TimingHelpers::timer();
        }
 
 
        // Call actions after distribute
        actions_after_distribute();
 
 
        if (Global_timings::Doc_comprehensive_timings)
        {
          t_end = TimingHelpers::timer();
          oomph_info << "Time for actions_after_distribute() "
                     << "Problem::prune_halo_elements_and_nodes(): "
                     << t_end - t_start << std::endl;
          t_start = TimingHelpers::timer();
        }
 
 
        // Re-assign the equation numbers (incl synchronisation if reqd)
#ifdef PARANOID
        unsigned n_dof = assign_eqn_numbers();
#else
        assign_eqn_numbers();
#endif
 
 
        if (Global_timings::Doc_comprehensive_timings)
        {
          t_end = TimingHelpers::timer();
          oomph_info << "Time for assign_eqn_numbers() "
                     << "Problem::prune_halo_elements_and_nodes(): "
                     << t_end - t_start << std::endl;
          t_start = TimingHelpers::timer();
        }
 
 
#ifdef PARANOID
        if (!Bypass_increase_in_dof_check_during_pruning)
        {
          if (n_dof != old_ndof)
          {
            std::ostringstream error_stream;
            error_stream
              << "Number of dofs in prune_halo_elements_and_nodes() has "
                 "changed "
              << "from " << old_ndof << " to " << n_dof << "\n"
              << "Check that you've implemented any necessary "
                 "actions_before/after"
              << "\nadapt/distribute functions, e.g. to pin redundant pressure"
              << " dofs etc.\n";
            throw OomphLibError(error_stream.str(),
                                OOMPH_CURRENT_FUNCTION,
                                OOMPH_EXCEPTION_LOCATION);
          }
        }
#endif
      }
    }
  }
 
 
#endif
 
 
  //===================================================================
  /// Build a single (global) mesh from a number
  /// of submeshes which are passed as a vector of pointers to the
  /// submeshes. The ordering is not necessarily optimal.
  //==============================================================
  void Problem::build_global_mesh()
  {
#ifdef PARANOID
    // Has a global mesh already been built
    if (Mesh_pt != 0)
    {
      std::string error_message = "Problem::build_global_mesh() called,\n";
      error_message += " but a global mesh has already been built:\n";
      error_message += "Problem::Mesh_pt is not zero!\n";
 
      throw OomphLibError(
        error_message, OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
    // Check that there are submeshes
    if (Sub_mesh_pt.size() == 0)
    {
      std::string error_message = "Problem::build_global_mesh() called,\n";
      error_message += " but there are no submeshes:\n";
      error_message += "Problem::Sub_mesh_pt has no entries\n";
 
      throw OomphLibError(
        error_message, OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
#endif
 
    // Create an empty mesh
    Mesh_pt = new Mesh();
 
    // Call the rebuild function to construct the mesh
    rebuild_global_mesh();
  }
 
  //====================================================================
  /// If one of the submeshes has changed (e.g. by
  /// mesh adaptation) we need to update the global mesh.
  /// \b Note: The nodes boundary information refers to the
  /// boundary numbers within the submesh!
  /// N.B. This is essentially the same function as the Mesh constructor
  /// that assembles a single global mesh from submeshes
  //=====================================================================
  void Problem::rebuild_global_mesh()
  {
    // Use the function in mesh to merge the submeshes into this one
    Mesh_pt->merge_meshes(Sub_mesh_pt);
  }
 
 
  //================================================================
  ///  Add a timestepper to the problem. The function will automatically
  /// create or resize the Time object so that it contains the appropriate
  /// number of levels of storage.
  //================================================================
  void Problem::add_time_stepper_pt(TimeStepper* const& time_stepper_pt)
  {
    // Add the timestepper to the vector
    Time_stepper_pt.push_back(time_stepper_pt);
 
    // Find the number of timesteps required by the timestepper
    unsigned ndt = time_stepper_pt->ndt();
 
    // If time has not been allocated, create time object with the
    // required number of time steps
    if (Time_pt == 0)
    {
      Time_pt = new Time(ndt);
      oomph_info << "Created Time with " << ndt << " timesteps" << std::endl;
    }
    else
    {
      // If the required number of time steps is greater than currently stored
      // resize the time storage
      if (ndt > Time_pt->ndt())
      {
        Time_pt->resize(ndt);
        oomph_info << "Resized Time to include " << ndt << " timesteps"
                   << std::endl;
      }
      // Otherwise report that we are OK
      else
      {
        oomph_info << "Time object already has storage for " << ndt
                   << " timesteps" << std::endl;
      }
    }
 
    // Pass the pointer to time to the timestepper
    time_stepper_pt->time_pt() = Time_pt;
  }
 
  //================================================================
  /// Set the explicit time stepper for the problem and also
  /// ensure that a time object has been created.
  //================================================================
  void Problem::set_explicit_time_stepper_pt(
    ExplicitTimeStepper* const& explicit_time_stepper_pt)
  {
    // Set the explicit time stepper
    Explicit_time_stepper_pt = explicit_time_stepper_pt;
 
    // If time has not been allocated, create time object with the
    // required number of time steps
    if (Time_pt == 0)
    {
      Time_pt = new Time(0);
      oomph_info << "Created Time with storage for no previous timestep"
                 << std::endl;
    }
    else
    {
      oomph_info << "Time object already exists " << std::endl;
    }
  }
 
 
#ifdef OOMPH_HAS_MPI
 
  //================================================================
  /// Set default first and last elements for parallel assembly
  /// of non-distributed problem.
  //================================================================
  void Problem::set_default_first_and_last_element_for_assembly()
  {
    if (!Problem_has_been_distributed)
    {
      // Minimum number of elements per processor if there are fewer elements
      // than processors
      unsigned min_el = 10;
 
      // Resize and make default assignments
      int n_proc = this->communicator_pt()->nproc();
      unsigned n_elements = Mesh_pt->nelement();
      First_el_for_assembly.resize(n_proc, 0);
      Last_el_plus_one_for_assembly.resize(n_proc, 0);
 
      // In the absence of any better knowledge distribute work evenly
      // over elements
      unsigned range = 0;
      unsigned lo_proc = 0;
      unsigned hi_proc = n_proc - 1;
      if (int(n_elements) >= n_proc)
      {
        range = unsigned(double(n_elements) / double(n_proc));
      }
      else
      {
        range = min_el;
        lo_proc = 0;
        hi_proc = unsigned(double(n_elements) / double(min_el));
      }
 
      for (int p = lo_proc; p <= int(hi_proc); p++)
      {
        First_el_for_assembly[p] = p * range;
 
        unsigned last_el_plus_one = (p + 1) * range;
        if (last_el_plus_one > n_elements) last_el_plus_one = n_elements;
        Last_el_plus_one_for_assembly[p] = last_el_plus_one;
      }
 
      // Last one needs to incorporate any dangling elements
      if (int(n_elements) >= n_proc)
      {
        Last_el_plus_one_for_assembly[n_proc - 1] = n_elements;
      }
 
      // Doc
      if (n_proc > 1)
      {
        if (!Shut_up_in_newton_solve)
        {
          oomph_info << "Problem is not distributed. Parallel assembly of "
                     << "Jacobian uses default partitioning: " << std::endl;
          for (int p = 0; p < n_proc; p++)
          {
            if (Last_el_plus_one_for_assembly[p] != 0)
            {
              oomph_info << "Proc " << p << " assembles from element "
                         << First_el_for_assembly[p] << " to "
                         << Last_el_plus_one_for_assembly[p] - 1 << " \n";
            }
            else
            {
              oomph_info << "Proc " << p << " assembles no elements\n";
            }
          }
        }
      }
    }
  }
 
 
  //=======================================================================
  /// Helper function to re-assign the first and last elements to be
  /// assembled by each processor during parallel assembly for
  /// non-distributed problem.
  //=======================================================================
  void Problem::recompute_load_balanced_assembly()
  {
    // Wait until all processes have completed/timed their assembly
    MPI_Barrier(this->communicator_pt()->mpi_comm());
 
    // Storage for number of processors and current processor
    int n_proc = this->communicator_pt()->nproc();
    int rank = this->communicator_pt()->my_rank();
 
    // Don't bother to update if we've got fewer elements than
    // processors
    unsigned nel = Elemental_assembly_time.size();
    if (int(nel) < n_proc)
    {
      oomph_info << "Not re-computing distribution of elemental assembly\n"
                 << "because there are fewer elements than processors\n";
      return;
    }
 
    // Setup vectors storing the number of element timings to be sent
    // and the offset in the final vector
    Vector<int> receive_count(n_proc);
    Vector<int> displacement(n_proc);
    int offset = 0;
    for (int p = 0; p < n_proc; p++)
    {
      // Default distribution of labour
      unsigned el_lo = First_el_for_assembly[p];
      unsigned el_hi = Last_el_plus_one_for_assembly[p] - 1;
 
      // Number of timings to be sent and offset from start in
      // final vector
      receive_count[p] = el_hi - el_lo + 1;
      displacement[p] = offset;
      offset += el_hi - el_lo + 1;
    }
 
    // Make temporary c-style array to avoid over-writing in Gatherv below
    double* el_ass_time = new double[nel];
    for (unsigned e = 0; e < nel; e++)
    {
      el_ass_time[e] = Elemental_assembly_time[e];
    }
 
    // Gather timings on root processor
    unsigned nel_local =
      Last_el_plus_one_for_assembly[rank] - 1 - First_el_for_assembly[rank] + 1;
    MPI_Gatherv(&el_ass_time[First_el_for_assembly[rank]],
                nel_local,
                MPI_DOUBLE,
                &Elemental_assembly_time[0],
                &receive_count[0],
                &displacement[0],
                MPI_DOUBLE,
                0,
                this->communicator_pt()->mpi_comm());
    delete[] el_ass_time;
 
    // Vector of first and last elements for each processor
    Vector<Vector<int>> first_and_last_element(n_proc);
    for (int p = 0; p < n_proc; p++)
    {
      first_and_last_element[p].resize(2);
    }
 
    // Re-distribute work
    if (rank == 0)
    {
      if (!Shut_up_in_newton_solve)
      {
        oomph_info
          << std::endl
          << "Re-assigning distribution of element assembly over processors:"
          << std::endl;
      }
 
      // Get total assembly time
      double total = 0.0;
      unsigned n_elements = Mesh_pt->nelement();
      for (unsigned e = 0; e < n_elements; e++)
      {
        total += Elemental_assembly_time[e];
      }
 
      // Target load per processor
      double target_load = total / double(n_proc);
 
      // We're on the root processor: Always start with the first element
      int proc = 0;
      first_and_last_element[0][0] = 0;
 
      // Highest element we can help ourselves to if we want to leave
      // at least one element for all subsequent processors
      unsigned max_el_avail = n_elements - n_proc;
 
      // Initialise total work allocated
      total = 0.0;
      for (unsigned e = 0; e < n_elements; e++)
      {
        total += Elemental_assembly_time[e];
 
        // Once we have reached the target load or we've used up practically
        // all the elements...
        if ((total > target_load) || (e == max_el_avail))
        {
          // Last element for current processor
          first_and_last_element[proc][1] = e;
 
          // Provided that we are not on the last processor
          if (proc < (n_proc - 1))
          {
            // Set first element for next one
            first_and_last_element[proc + 1][0] = e + 1;
 
            // Move on to the next processor
            proc++;
          }
 
          // Can have one more...
          max_el_avail++;
 
          // Re-initialise the time
          total = 0.0;
        } // end of test for "total exceeds target"
      }
 
 
      // Last element for last processor
      first_and_last_element[n_proc - 1][1] = n_elements - 1;
 
 
      // The following block should probably be paranoidified away
      // but we've screwed the logic up so many times that I feel
      // it's safer to keep it...
      bool wrong = false;
      std::ostringstream error_stream;
      for (int p = 0; p < n_proc - 1; p++)
      {
        unsigned first_of_current = first_and_last_element[p][0];
        unsigned last_of_current = first_and_last_element[p][1];
        if (first_of_current > last_of_current)
        {
          wrong = true;
          error_stream << "Error: First/last element of proc " << p << ": "
                       << first_of_current << " " << last_of_current
                       << std::endl;
        }
        unsigned first_of_next = first_and_last_element[p + 1][0];
        if (first_of_next != (last_of_current + 1))
        {
          wrong = true;
          error_stream << "Error: First element of proc " << p + 1 << ": "
                       << first_of_next << " and last element of proc " << p
                       << ": " << last_of_current << std::endl;
        }
      }
      if (wrong)
      {
        throw OomphLibError(
          error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
      }
 
 
      // THIS TIDY UP SHOULD NO LONGER BE REQUIRED AND CAN GO AT SOME POINT
 
      // //If we haven't got to the end of the processor list then
      // //need to shift things about slightly because the processors
      // //at the end will be empty.
      // //This can occur when you have very fast assembly times and the
      // //rounding errors mean that the targets are achieved before all
      // processors
      // //have been visited.
      // //Happens a lot when you massively oversubscribe the CPUs (which was
      // //only ever for testing!)
      // if (proc!=n_proc-1)
      //  {
      //   oomph_info
      //    << "First pass did not allocate elements on every processor\n";
      //   oomph_info <<
      //    "Moving elements so that each processor has at least one\n";
 
      //   //Work out number of empty processos
      //   unsigned n_empty_processors = n_proc - proc + 1;
 
      //   //Loop over the processors that do have elements
      //   //and work out how many we need to steal elements from
      //   unsigned n_element_on_processors=0;
      //   do
      //    {
      //     //Step down the processors
      //     --proc;
      //     //Add the current processor to the number of empty processors
      //     //because the elements have to be shared between processors
      //     //including the one(s) on which they are currently stored.
      //     ++n_empty_processors;
      //     n_element_on_processors +=
      //      (first_and_last_element[proc][1] -
      //       first_and_last_element[proc][0] + 1);
      //    }
      //   while(n_element_on_processors < n_empty_processors);
 
      //   //Should now be able to put one element on each processor
      //   //Start from the end and do so
      //   unsigned current_element  = n_elements-1;
      //   for(int p=n_proc-1;p>proc;p--)
      //    {
      //     first_and_last_element[p][1] = current_element;
      //     first_and_last_element[p][0] = --current_element;
      //    }
 
      //   //Now for the last processor we touched, just adjust the final value
      //   first_and_last_element[proc][1] = current_element;
      //  }
      // //Otherwise just put the rest of the elements on the final
      // //processor
      // else
      //  {
      //   // Last one
      //   first_and_last_element[n_proc-1][1]=n_elements-1;
      //  }
 
 
      // END PRESUMED-TO-BE-UNNECESSARY BLOCK...
 
 
      // Now communicate the information
 
      // Set local informationt for this (root) processor
      First_el_for_assembly[0] = first_and_last_element[0][0];
      Last_el_plus_one_for_assembly[0] = first_and_last_element[0][1] + 1;
 
      if (!Shut_up_in_newton_solve)
      {
        oomph_info << "Processor " << 0 << " assembles Jacobians"
                   << " from elements " << first_and_last_element[0][0]
                   << " to " << first_and_last_element[0][1] << " "
                   << std::endl;
      }
 
      // Only now can we send the information to the other processors
      for (int p = 1; p < n_proc; ++p)
      {
        MPI_Send(&first_and_last_element[p][0],
                 2,
                 MPI_INT,
                 p,
                 0,
                 this->communicator_pt()->mpi_comm());
 
 
        if (!Shut_up_in_newton_solve)
        {
          oomph_info << "Processor " << p << " assembles Jacobians"
                     << " from elements " << first_and_last_element[p][0]
                     << " to " << first_and_last_element[p][1] << " "
                     << std::endl;
        }
      }
    }
    // Receive first and last element from root on non-master processors
    else
    {
      Vector<int> aux(2);
      MPI_Status status;
      MPI_Recv(&aux[0],
               2,
               MPI_INT,
               0,
               0,
               this->communicator_pt()->mpi_comm(),
               &status);
      First_el_for_assembly[rank] = aux[0];
      Last_el_plus_one_for_assembly[rank] = aux[1] + 1;
    }
 
    // Wipe all others
    for (int p = 0; p < n_proc; p++)
    {
      if (p != rank)
      {
        First_el_for_assembly[p] = 0;
        Last_el_plus_one_for_assembly[p] = 1;
      }
    }
 
    // The equations assembled by this processor may have changed so
    // we must resize the sparse assemble with arrays previous allocation
    Sparse_assemble_with_arrays_previous_allocation.resize(0);
  }
 
#endif
 
  //================================================================
  /// Assign all equation numbers for problem: Deals with global
  /// data (= data that isn't attached to any elements) and then
  /// does the equation numbering for the elements.  Bool argument
  /// can be set to false to ignore assigning local equation numbers
  /// (necessary in the parallel implementation of locate_zeta
  /// between multiple meshes).
  //================================================================
  unsigned long Problem::assign_eqn_numbers(
    const bool& assign_local_eqn_numbers)
  {
    // Check that the global mesh has been build
#ifdef PARANOID
    if (Mesh_pt == 0)
    {
      std::ostringstream error_stream;
      error_stream << "Global mesh does not exist, so equation numbers cannot "
                      "be assigned.\n";
      // Check for sub meshes
      if (nsub_mesh() == 0)
      {
        error_stream << "There aren't even any sub-meshes in the Problem.\n"
                     << "You can set the global mesh directly by using\n"
                     << "Problem::mesh_pt() = my_mesh_pt;\n"
                     << "OR you can use Problem::add_sub_mesh(mesh_pt); "
                     << "to add a sub mesh.\n";
      }
      else
      {
        error_stream << "There are " << nsub_mesh() << " sub-meshes.\n";
      }
      error_stream << "You need to call Problem::build_global_mesh() to create "
                      "a global mesh\n"
                   << "from the sub-meshes.\n\n";
 
      throw OomphLibError(
        error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
#endif
 
    // Number of submeshes
    unsigned n_sub_mesh = Sub_mesh_pt.size();
 
#ifdef OOMPH_HAS_MPI
 
    // Storage for number of processors
    int n_proc = this->communicator_pt()->nproc();
 
 
    if (n_proc > 1)
    {
      // Force re-analysis of time spent on assembly each
      // elemental Jacobian
      Must_recompute_load_balance_for_assembly = true;
      Elemental_assembly_time.clear();
    }
    else
    {
      Must_recompute_load_balance_for_assembly = false;
    }
 
    // Re-distribution of elements over processors during assembly
    // must be recomputed
    if (!Problem_has_been_distributed)
    {
      // Set default first and last elements for parallel assembly
      // of non-distributed problem.
      set_default_first_and_last_element_for_assembly();
    }
 
#endif
 
 
    double t_start = 0.0;
    if (Global_timings::Doc_comprehensive_timings)
    {
      t_start = TimingHelpers::timer();
    }
 
    // Loop over all elements in the mesh and set up any additional
    // dependencies that they may have (e.g. storing the geometric
    // Data, i.e. Data that affects an element's shape in elements
    // with algebraic node-update functions
    unsigned nel = Mesh_pt->nelement();
    for (unsigned e = 0; e < nel; e++)
    {
      Mesh_pt->element_pt(e)->complete_setup_of_dependencies();
    }
 
#ifdef OOMPH_HAS_MPI
    // Complete setup of dependencies for external halo elements too
    unsigned n_mesh = this->nsub_mesh();
    for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
    {
      for (int iproc = 0; iproc < n_proc; iproc++)
      {
        unsigned n_ext_halo_el = mesh_pt(i_mesh)->nexternal_halo_element(iproc);
        for (unsigned e = 0; e < n_ext_halo_el; e++)
        {
          mesh_pt(i_mesh)
            ->external_halo_element_pt(iproc, e)
            ->complete_setup_of_dependencies();
        }
      }
    }
#endif
 
 
    double t_end = 0.0;
    if (Global_timings::Doc_comprehensive_timings)
    {
      t_end = TimingHelpers::timer();
      oomph_info
        << "Time for complete setup of dependencies in assign_eqn_numbers: "
        << t_end - t_start << std::endl;
    }
 
 
    // Initialise number of dofs for reserve below
    unsigned n_dof = 0;
 
    // Potentially loop over remainder of routine, possible re-visiting all
    // those parts that must be redone, following the removal of duplicate
    // external halo data.
    for (unsigned loop_count = 0; loop_count < 2; loop_count++)
    {
      //(Re)-set the dof pointer to zero length because entries are
      // pushed back onto it -- if it's not reset here then we get into
      // trouble during mesh refinement when we reassign all dofs
      Dof_pt.resize(0);
 
      // Reserve from previous allocation if we're going around again
      Dof_pt.reserve(n_dof);
 
      // Reset the equation number
      unsigned long equation_number = 0;
 
      // Now set equation numbers for the global Data
      unsigned Nglobal_data = nglobal_data();
      for (unsigned i = 0; i < Nglobal_data; i++)
      {
        Global_data_pt[i]->assign_eqn_numbers(equation_number, Dof_pt);
      }
 
      if (Global_timings::Doc_comprehensive_timings)
      {
        t_start = TimingHelpers::timer();
      }
 
      // Call assign equation numbers on the global mesh
      n_dof = Mesh_pt->assign_global_eqn_numbers(Dof_pt);
 
      // Deal with the spine meshes additional numbering
      // If there is only one mesh
      if (n_sub_mesh == 0)
      {
        if (SpineMesh* const spine_mesh_pt = dynamic_cast<SpineMesh*>(Mesh_pt))
        {
          n_dof = spine_mesh_pt->assign_global_spine_eqn_numbers(Dof_pt);
        }
      }
      // Otherwise loop over the sub meshes
      else
      {
        // Assign global equation numbers first
        for (unsigned i = 0; i < n_sub_mesh; i++)
        {
          if (SpineMesh* const spine_mesh_pt =
                dynamic_cast<SpineMesh*>(Sub_mesh_pt[i]))
          {
            n_dof = spine_mesh_pt->assign_global_spine_eqn_numbers(Dof_pt);
          }
        }
      }
 
      if (Global_timings::Doc_comprehensive_timings)
      {
        t_end = TimingHelpers::timer();
        oomph_info
          << "Time for assign_global_eqn_numbers in assign_eqn_numbers: "
          << t_end - t_start << std::endl;
        t_start = TimingHelpers::timer();
      }
 
 
#ifdef OOMPH_HAS_MPI
 
      // reset previous allocation
      Parallel_sparse_assemble_previous_allocation = 0;
 
      // Only synchronise if the problem has actually been
      // distributed.
      if (Problem_has_been_distributed)
      {
        // Synchronise the equation numbers and return the total
        // number of degrees of freedom in the overall problem
        // Do not assign local equation numbers -- we're doing this
        // below.
        n_dof = synchronise_eqn_numbers(false);
      }
      // ..else just setup the Dof_distribution_pt
      // NOTE: this is setup by synchronise_eqn_numbers(...)
      // if Problem_has_been_distributed
      else
#endif
      {
        Dof_distribution_pt->build(Communicator_pt, n_dof, false);
      }
 
      if (Global_timings::Doc_comprehensive_timings)
      {
        t_end = TimingHelpers::timer();
        oomph_info << "Time for Problem::synchronise_eqn_numbers in "
                   << "Problem::assign_eqn_numbers: " << t_end - t_start
                   << std::endl;
      }
 
 
#ifdef OOMPH_HAS_MPI
 
 
      // Now remove duplicate data in external halo elements
      if (Problem_has_been_distributed)
      {
        if (Global_timings::Doc_comprehensive_timings)
        {
          t_start = TimingHelpers::timer();
        }
 
        // Monitor if we've actually changed anything
        bool actually_removed_some_data = false;
 
        // Only do it once!
        if (loop_count == 0)
        {
          if (n_sub_mesh == 0)
          {
            remove_duplicate_data(Mesh_pt, actually_removed_some_data);
          }
          else
          {
            for (unsigned i = 0; i < n_sub_mesh; i++)
            {
              bool tmp_actually_removed_some_data = false;
              remove_duplicate_data(Sub_mesh_pt[i],
                                    tmp_actually_removed_some_data);
              if (tmp_actually_removed_some_data)
                actually_removed_some_data = true;
            }
          }
        }
 
 
        if (Global_timings::Doc_comprehensive_timings)
        {
          t_end = TimingHelpers::timer();
          std::stringstream tmp;
          tmp << "Time for calls to Problem::remove_duplicate_data in "
              << "Problem::assign_eqn_numbers: " << t_end - t_start
              << " ; have ";
          if (!actually_removed_some_data)
          {
            tmp << " not ";
          }
          tmp << " removed some/any data.\n";
          oomph_info << tmp.str();
          t_start = TimingHelpers::timer();
        }
 
        // Break out of the loop if we haven't done anything here.
        unsigned status = 0;
        if (actually_removed_some_data) status = 1;
 
        // Allreduce to check if anyone has removed any data
        unsigned overall_status = 0;
        MPI_Allreduce(&status,
                      &overall_status,
                      1,
                      MPI_UNSIGNED,
                      MPI_MAX,
                      this->communicator_pt()->mpi_comm());
 
 
        if (Global_timings::Doc_comprehensive_timings)
        {
          t_end = TimingHelpers::timer();
          std::stringstream tmp;
          tmp
            << "Time for MPI_Allreduce after Problem::remove_duplicate_data in "
            << "Problem::assign_eqn_numbers: " << t_end - t_start << std::endl;
          oomph_info << tmp.str();
          t_start = TimingHelpers::timer();
        }
 
        // Bail out if we haven't done anything here
        if (overall_status != 1)
        {
          break;
        }
 
        // Big tidy up: Remove null pointers from halo/haloed node storage
        // for all meshes (this involves comms and therefore must be
        // performed outside loop over meshes so the all-to-all is only
        // done once)
        remove_null_pointers_from_external_halo_node_storage();
 
        // Time it...
        if (Global_timings::Doc_comprehensive_timings)
        {
          double t_end = TimingHelpers::timer();
          oomph_info << "Total time for "
                     << "Problem::remove_null_pointers_from_external_halo_node_"
                        "storage(): "
                     << t_end - t_start << std::endl;
        }
      }
      else
      {
        // Problem not distributed; no need for another loop
        break;
      }
 
#else
 
      // Serial run: Again no need for a second loop
      break;
 
#endif
 
    } // end of loop over fcts that need to be re-executed if
    // we've removed duplicate external data
 
 
    // Resize the sparse assemble with arrays previous allocation
    Sparse_assemble_with_arrays_previous_allocation.resize(0);
 
 
    if (Global_timings::Doc_comprehensive_timings)
    {
      t_start = TimingHelpers::timer();
    }
 
    // Finally assign local equations
    if (assign_local_eqn_numbers)
    {
      if (n_sub_mesh == 0)
      {
        Mesh_pt->assign_local_eqn_numbers(Store_local_dof_pt_in_elements);
      }
      else
      {
        for (unsigned i = 0; i < n_sub_mesh; i++)
        {
          Sub_mesh_pt[i]->assign_local_eqn_numbers(
            Store_local_dof_pt_in_elements);
        }
      }
    }
 
    if (Global_timings::Doc_comprehensive_timings)
    {
      t_end = TimingHelpers::timer();
      oomph_info << "Total time for all Mesh::assign_local_eqn_numbers in "
                 << "Problem::assign_eqn_numbers: " << t_end - t_start
                 << std::endl;
    }
 
 
    // and return the total number of DOFs
    return n_dof;
  }
  //================================================================
  /// Function to describe the dofs in terms of the global
  /// equation number, i.e. what type of value (nodal value of
  /// a Node; value in a Data object; value of internal Data in an
  /// element; etc) is the unknown with a certain global equation number.
  /// Output stream defaults to oomph_info.
  //================================================================
  void Problem::describe_dofs(std::ostream& out) const
  {
    // Check that the global mesh has been build
#ifdef PARANOID
    if (Mesh_pt == 0)
    {
      std::ostringstream error_stream;
      error_stream
        << "Global mesh does not exist, so equation numbers cannot be found.\n";
      // Check for sub meshes
      if (nsub_mesh() == 0)
      {
        error_stream << "There aren't even any sub-meshes in the Problem.\n"
                     << "You can set the global mesh directly by using\n"
                     << "Problem::mesh_pt() = my_mesh_pt;\n"
                     << "OR you can use Problem::add_sub_mesh(mesh_pt); "
                     << "to add a sub mesh.\n";
      }
      else
      {
        error_stream << "There are " << nsub_mesh() << " sub-meshes.\n";
      }
      error_stream << "You need to call Problem::build_global_mesh() to create "
                      "a global mesh\n"
                   << "from the sub-meshes.\n\n";
 
      throw OomphLibError(
        error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
#endif
 
    out
      << "Although this program will describe the degrees of freedom in the \n"
      << "problem, it will do so using the typedef for the elements. This is \n"
      << "not neccesarily human readable, but there is a solution.\n"
      << "Pipe your program's output through c++filt, with the argument -t.\n"
      << "e.g. \"./two_d_multi_poisson | c++filt -t > ReadableOutput.txt\".\n "
      << "(Disregarding the quotes)\n\n\n";
 
    out << "Classifying Global Equation Numbers" << std::endl;
    out << std::string(80, '-') << std::endl;
 
    // Number of submeshes
    unsigned n_sub_mesh = Sub_mesh_pt.size();
 
    // Classify Global dofs
    unsigned Nglobal_data = nglobal_data();
    for (unsigned i = 0; i < Nglobal_data; i++)
    {
      std::stringstream conversion;
      conversion << " in Global Data " << i << ".";
      std::string in(conversion.str());
      Global_data_pt[i]->describe_dofs(out, in);
    }
 
    // Put string in limiting scope.
    {
      // Descend into assignment for mesh.
      std::string in(" in Problem's Only Mesh.");
      Mesh_pt->describe_dofs(out, in);
    }
 
    // Deal with the spine meshes additional numbering:
    // If there is only one mesh:
    if (n_sub_mesh == 0)
    {
      if (SpineMesh* const spine_mesh_pt = dynamic_cast<SpineMesh*>(Mesh_pt))
      {
        std::string in(" in Problem's Only SpineMesh.");
        spine_mesh_pt->describe_spine_dofs(out, in);
      }
    }
    // Otherwise loop over the sub meshes
    else
    {
      // Assign global equation numbers first
      for (unsigned i = 0; i < n_sub_mesh; i++)
      {
        if (SpineMesh* const spine_mesh_pt =
              dynamic_cast<SpineMesh*>(Sub_mesh_pt[i]))
        {
          std::stringstream conversion;
          conversion << " in Sub-SpineMesh " << i << ".";
          std::string in(conversion.str());
          spine_mesh_pt->describe_spine_dofs(out, in);
        } // end if.
      } // end for.
    } // end else.
 
 
    out << std::string(80, '\\') << std::endl;
    out << std::string(80, '\\') << std::endl;
    out << std::string(80, '\\') << std::endl;
    out << "Classifying global eqn numbers in terms of elements." << std::endl;
    out << std::string(80, '-') << std::endl;
    out << "Eqns   | Source" << std::endl;
    out << std::string(80, '-') << std::endl;
 
    if (n_sub_mesh == 0)
    {
      std::string in(" in Problem's Only Mesh.");
      Mesh_pt->describe_local_dofs(out, in);
    }
    else
    {
      for (unsigned i = 0; i < n_sub_mesh; i++)
      {
        std::stringstream conversion;
        conversion << " in Sub-Mesh " << i << ".";
        std::string in(conversion.str());
        Sub_mesh_pt[i]->describe_local_dofs(out, in);
      } // End for
    } // End else
  } // End problem::describe_dofs(...)
 
 
  //================================================================
  /// Get the vector of dofs, i.e. a vector containing the current
  /// values of all unknowns.
  //================================================================
  void Problem::get_dofs(DoubleVector& dofs) const
  {
    // Find number of dofs
    const unsigned long n_dof = ndof();
 
    // Resize the vector
    dofs.build(Dof_distribution_pt, 0.0);
 
    // Copy dofs into vector
    for (unsigned long l = 0; l < n_dof; l++)
    {
      dofs[l] = *Dof_pt[l];
    }
  }
 
  /// Get history values of dofs in a double vector.
  void Problem::get_dofs(const unsigned& t, DoubleVector& dofs) const
  {
#ifdef PARANOID
    if (distributed())
    {
      throw OomphLibError("Not designed for distributed problems",
                          OOMPH_EXCEPTION_LOCATION,
                          OOMPH_CURRENT_FUNCTION);
      // might work, not sure
    }
#endif
 
    // Resize the vector
    dofs.build(Dof_distribution_pt, 0.0);
 
    // First deal with global data
    unsigned Nglobal_data = nglobal_data();
    for (unsigned i = 0; i < Nglobal_data; i++)
    {
      for (unsigned j = 0, nj = Global_data_pt[i]->nvalue(); j < nj; j++)
      {
        // For each data get the equation number and copy out the value.
        int eqn_number = Global_data_pt[i]->eqn_number(j);
        if (eqn_number >= 0)
        {
          dofs[eqn_number] = Global_data_pt[i]->value(t, j);
        }
      }
    }
 
    // Next element internal data
    for (unsigned i = 0, ni = mesh_pt()->nelement(); i < ni; i++)
    {
      GeneralisedElement* ele_pt = mesh_pt()->element_pt(i);
      for (unsigned j = 0, nj = ele_pt->ninternal_data(); j < nj; j++)
      {
        Data* d_pt = ele_pt->internal_data_pt(j);
        for (unsigned k = 0, nk = d_pt->nvalue(); k < nk; k++)
        {
          int eqn_number = d_pt->eqn_number(k);
          if (eqn_number >= 0)
          {
            dofs[eqn_number] = d_pt->value(t, k);
          }
        }
      }
    }
 
    // Now the nodes
    for (unsigned i = 0, ni = mesh_pt()->nnode(); i < ni; i++)
    {
      Node* node_pt = mesh_pt()->node_pt(i);
      for (unsigned j = 0, nj = node_pt->nvalue(); j < nj; j++)
      {
        // For each node get the equation number and copy out the value.
        int eqn_number = node_pt->eqn_number(j);
        if (eqn_number >= 0)
        {
          dofs[eqn_number] = node_pt->value(t, j);
        }
      }
    }
  }
 
 
#ifdef OOMPH_HAS_MPI
 
  //=======================================================================
  /// Private helper function to remove repeated data
  /// in external haloed elements associated with specified mesh.
  /// Bool is true if some data was removed -- this usually requires
  /// re-running through certain parts of the equation numbering procedure.
  //======================================================================
  void Problem::remove_duplicate_data(Mesh* const& mesh_pt,
                                      bool& actually_removed_some_data)
  {
    //    //   // Taken out again by MH -- clutters up output
    //    // Doc timings if required
    //    double t_start=0.0;
    //    if (Global_timings::Doc_comprehensive_timings)
    //     {
    //      t_start=TimingHelpers::timer();
    //     }
 
    int n_proc = this->communicator_pt()->nproc();
    int my_rank = this->communicator_pt()->my_rank();
 
    // Initialise
    actually_removed_some_data = false;
 
    // Each individual container of external halo nodes has unique
    // nodes/equation numbers, but there may be some duplication between
    // two or more different containers; the following code checks for this
    // and removes the duplication by overwriting any data point with an already
    // existing eqn number with the original data point which had the eqn no.
 
    // // Storage for existing nodes, enumerated by first non-negative
    // // global equation number
    // unsigned n_dof=ndof();
 
    // Note: This used to be
    // Vector<Node*> global_node_pt(n_dof,0);
    // but this is a total killer! Memory allocation is extremely
    // costly and only relatively few entries are used so use
    // map:
    std::map<unsigned, Node*> global_node_pt;
 
    // Only do each retained node once
    std::map<Node*, bool> node_done;
 
    // Loop over existing "normal" elements in mesh
    unsigned n_element = mesh_pt->nelement();
    for (unsigned e = 0; e < n_element; e++)
    {
      FiniteElement* el_pt =
        dynamic_cast<FiniteElement*>(mesh_pt->element_pt(e));
      if (el_pt != 0)
      {
        // Loop over nodes
        unsigned n_node = el_pt->nnode();
        for (unsigned j = 0; j < n_node; j++)
        {
          Node* nod_pt = el_pt->node_pt(j);
 
          // Have we already done the node?
          if (!node_done[nod_pt])
          {
            node_done[nod_pt] = true;
 
            // Loop over values stored at node (if any) to find
            // the first non-negative eqn number
            unsigned first_non_negative_eqn_number_plus_one = 0;
            unsigned n_val = nod_pt->nvalue();
            for (unsigned i_val = 0; i_val < n_val; i_val++)
            {
              int eqn_no = nod_pt->eqn_number(i_val);
              if (eqn_no >= 0)
              {
                first_non_negative_eqn_number_plus_one = eqn_no + 1;
                break;
              }
            }
 
            // If we haven't found a non-negative eqn number check
            // eqn numbers associated with solid data (if any)
            if (first_non_negative_eqn_number_plus_one == 0)
            {
              // Is it a solid node?
              SolidNode* solid_nod_pt = dynamic_cast<SolidNode*>(nod_pt);
              if (solid_nod_pt != 0)
              {
                // Loop over values stored at node (if any) to find
                // the first non-negative eqn number
                unsigned n_val = solid_nod_pt->variable_position_pt()->nvalue();
                for (unsigned i_val = 0; i_val < n_val; i_val++)
                {
                  int eqn_no =
                    solid_nod_pt->variable_position_pt()->eqn_number(i_val);
                  if (eqn_no >= 0)
                  {
                    first_non_negative_eqn_number_plus_one = eqn_no + 1;
                    break;
                  }
                }
              }
            }
 
            // Associate node with first non negative global eqn number
            if (first_non_negative_eqn_number_plus_one > 0)
            {
              global_node_pt[first_non_negative_eqn_number_plus_one - 1] =
                nod_pt;
            }
 
 
            // Take into account master nodes too
            if (dynamic_cast<RefineableElement*>(el_pt) != 0)
            {
              int n_cont_int_values = dynamic_cast<RefineableElement*>(el_pt)
                                        ->ncont_interpolated_values();
              for (int i_cont = -1; i_cont < n_cont_int_values; i_cont++)
              {
                if (nod_pt->is_hanging(i_cont))
                {
                  HangInfo* hang_pt = nod_pt->hanging_pt(i_cont);
                  unsigned n_master = hang_pt->nmaster();
                  for (unsigned m = 0; m < n_master; m++)
                  {
                    Node* master_nod_pt = hang_pt->master_node_pt(m);
                    if (!node_done[master_nod_pt])
                    {
                      node_done[master_nod_pt] = true;
 
                      // Loop over values stored at node (if any) to find
                      // the first non-negative eqn number
                      unsigned first_non_negative_eqn_number_plus_one = 0;
                      unsigned n_val = master_nod_pt->nvalue();
                      for (unsigned i_val = 0; i_val < n_val; i_val++)
                      {
                        int eqn_no = master_nod_pt->eqn_number(i_val);
                        if (eqn_no >= 0)
                        {
                          first_non_negative_eqn_number_plus_one = eqn_no + 1;
                          break;
                        }
                      }
 
                      // If we haven't found a non-negative eqn number check
                      // eqn numbers associated with solid data (if any)
                      if (first_non_negative_eqn_number_plus_one == 0)
                      {
                        // If this master is a SolidNode then add its extra
                        // eqn numbers
                        SolidNode* master_solid_nod_pt =
                          dynamic_cast<SolidNode*>(master_nod_pt);
                        if (master_solid_nod_pt != 0)
                        {
                          // Loop over values stored at node (if any) to find
                          // the first non-negative eqn number
                          unsigned n_val =
                            master_solid_nod_pt->variable_position_pt()
                              ->nvalue();
                          for (unsigned i_val = 0; i_val < n_val; i_val++)
                          {
                            int eqn_no =
                              master_solid_nod_pt->variable_position_pt()
                                ->eqn_number(i_val);
                            if (eqn_no >= 0)
                            {
                              first_non_negative_eqn_number_plus_one =
                                eqn_no + 1;
                              break;
                            }
                          }
                        }
                      }
                      // Associate node with first non negative global
                      // eqn number
                      if (first_non_negative_eqn_number_plus_one > 0)
                      {
                        global_node_pt[first_non_negative_eqn_number_plus_one -
                                       1] = master_nod_pt;
                      }
 
                    } // End of not-yet-done hang node
                  }
                }
              }
            }
          } // endif for node already done
        } // End of loop over nodes
      } // End of FiniteElement
 
      // Internal data equation numbers do not need to be added since
      // internal data cannot be shared between distinct elements, so
      // internal data on locally-stored elements can never be halo.
    }
 
    // Set to record duplicate nodes scheduled to be killed
    std::set<Node*> killed_nodes;
 
    // Now loop over the other processors from highest to lowest
    // (i.e. if there is a duplicate between these containers
    //  then this will use the node on the highest numbered processor)
    for (int iproc = n_proc - 1; iproc >= 0; iproc--)
    {
      // Don't have external halo elements with yourself!
      if (iproc != my_rank)
      {
        // Loop over external halo elements with iproc
        // to remove the duplicates
        unsigned n_element = mesh_pt->nexternal_halo_element(iproc);
        for (unsigned e_ext = 0; e_ext < n_element; e_ext++)
        {
          FiniteElement* finite_ext_el_pt = dynamic_cast<FiniteElement*>(
            mesh_pt->external_halo_element_pt(iproc, e_ext));
          if (finite_ext_el_pt != 0)
          {
            // Loop over nodes
            unsigned n_node = finite_ext_el_pt->nnode();
            for (unsigned j = 0; j < n_node; j++)
            {
              Node* nod_pt = finite_ext_el_pt->node_pt(j);
 
              // Loop over values stored at node (if any) to find
              // the first non-negative eqn number
              unsigned first_non_negative_eqn_number_plus_one = 0;
              unsigned n_val = nod_pt->nvalue();
              for (unsigned i_val = 0; i_val < n_val; i_val++)
              {
                int eqn_no = nod_pt->eqn_number(i_val);
                if (eqn_no >= 0)
                {
                  first_non_negative_eqn_number_plus_one = eqn_no + 1;
                  break;
                }
              }
 
              // If we haven't found a non-negative eqn number check
              // eqn numbers associated with solid data (if any)
              if (first_non_negative_eqn_number_plus_one == 0)
              {
                // Is it a solid node?
                SolidNode* solid_nod_pt = dynamic_cast<SolidNode*>(nod_pt);
                if (solid_nod_pt != 0)
                {
                  // Loop over values stored at node (if any) to find
                  // the first non-negative eqn number
                  unsigned n_val =
                    solid_nod_pt->variable_position_pt()->nvalue();
                  for (unsigned i_val = 0; i_val < n_val; i_val++)
                  {
                    int eqn_no =
                      solid_nod_pt->variable_position_pt()->eqn_number(i_val);
                    if (eqn_no >= 0)
                    {
                      first_non_negative_eqn_number_plus_one = eqn_no + 1;
                      break;
                    }
                  }
                }
              }
 
              // Identified which node we're dealing with via first non-negative
              // global eqn number (if there is none, everything is pinned
              // and we don't give a damn...)
              if (first_non_negative_eqn_number_plus_one > 0)
              {
                Node* existing_node_pt =
                  global_node_pt[first_non_negative_eqn_number_plus_one - 1];
 
                // Does this node already exist?
                if (existing_node_pt != 0)
                {
                  // Record that we're about to cull one
                  actually_removed_some_data = true;
 
                  // It's a duplicate, so store the duplicated one for
                  // later killing...
                  Node* duplicated_node_pt = nod_pt;
                  if (!node_done[duplicated_node_pt])
                  {
                    // Remove node from all boundaries
                    std::set<unsigned>* boundaries_pt;
                    duplicated_node_pt->get_boundaries_pt(boundaries_pt);
                    if (boundaries_pt != 0)
                    {
                      Vector<unsigned> bound;
                      unsigned nb = (*boundaries_pt).size();
                      bound.reserve(nb);
                      for (std::set<unsigned>::iterator it =
                             (*boundaries_pt).begin();
                           it != (*boundaries_pt).end();
                           it++)
                      {
                        bound.push_back((*it));
                      }
                      for (unsigned i = 0; i < nb; i++)
                      {
                        mesh_pt->remove_boundary_node(bound[i],
                                                      duplicated_node_pt);
                      }
                    }
 
                    // Get ready to kill it
                    killed_nodes.insert(duplicated_node_pt);
                    unsigned i_proc = unsigned(iproc);
                    mesh_pt->null_external_halo_node(i_proc,
                                                     duplicated_node_pt);
                  }
 
 
                  // Note: For now we're leaving the "dangling" (no longer
                  // accessed masters where they are; they get cleaned
                  // up next time we delete all the external storage
                  // for the meshes so it's a temporary "leak" only...
                  // At some point we should probably delete them properly too
 
#ifdef PARANOID
 
                  // Check that hang status of exiting and replacement node
                  // matches
                  if (dynamic_cast<RefineableElement*>(finite_ext_el_pt) != 0)
                  {
                    int n_cont_inter_values =
                      dynamic_cast<RefineableElement*>(finite_ext_el_pt)
                        ->ncont_interpolated_values();
                    for (int i_cont = -1; i_cont < n_cont_inter_values;
                         i_cont++)
                    {
                      unsigned n_master_orig = 0;
                      if (finite_ext_el_pt->node_pt(j)->is_hanging(i_cont))
                      {
                        n_master_orig = finite_ext_el_pt->node_pt(j)
                                          ->hanging_pt(i_cont)
                                          ->nmaster();
 
                        // Temporary leak: Resolve like this:
                        // loop over all external halo nodes and identify the
                        // the ones that are still reached by any of the
                        // external elements. Kill the dangling ones.
                      }
                      unsigned n_master_replace = 0;
                      if (existing_node_pt->is_hanging(i_cont))
                      {
                        n_master_replace =
                          existing_node_pt->hanging_pt(i_cont)->nmaster();
                      }
 
                      if (n_master_orig != n_master_replace)
                      {
                        std::ostringstream error_stream;
                        error_stream
                          << "Number of master nodes for node to be replaced, "
                          << n_master_orig << ", doesn't match"
                          << "those of replacement node, " << n_master_replace
                          << " for i_cont=" << i_cont << std::endl;
                        {
                          error_stream
                            << "Nodal coordinates of replacement node:";
                          unsigned ndim = existing_node_pt->ndim();
                          for (unsigned i = 0; i < ndim; i++)
                          {
                            error_stream << existing_node_pt->x(i) << " ";
                          }
                          error_stream << "\n";
                          error_stream << "The coordinates of its "
                                       << n_master_replace
                                       << " master nodes are: \n";
                          for (unsigned k = 0; k < n_master_replace; k++)
                          {
                            Node* master_nod_pt =
                              existing_node_pt->hanging_pt(i_cont)
                                ->master_node_pt(k);
                            unsigned ndim = master_nod_pt->ndim();
                            for (unsigned i = 0; i < ndim; i++)
                            {
                              error_stream << master_nod_pt->x(i) << " ";
                            }
                            error_stream << "\n";
                          }
                        }
 
                        {
                          error_stream
                            << "Nodal coordinates of node to be replaced:";
                          unsigned ndim = finite_ext_el_pt->node_pt(j)->ndim();
                          for (unsigned i = 0; i < ndim; i++)
                          {
                            error_stream << finite_ext_el_pt->node_pt(j)->x(i)
                                         << " ";
                          }
                          error_stream << "\n";
                          error_stream << "The coordinates of its "
                                       << n_master_orig
                                       << " master nodes are: \n";
                          for (unsigned k = 0; k < n_master_orig; k++)
                          {
                            Node* master_nod_pt = finite_ext_el_pt->node_pt(j)
                                                    ->hanging_pt(i_cont)
                                                    ->master_node_pt(k);
                            unsigned ndim = master_nod_pt->ndim();
                            for (unsigned i = 0; i < ndim; i++)
                            {
                              error_stream << master_nod_pt->x(i) << " ";
                            }
                            error_stream << "\n";
                          }
                        }
 
 
                        throw OomphLibError(error_stream.str(),
                                            OOMPH_CURRENT_FUNCTION,
                                            OOMPH_EXCEPTION_LOCATION);
                      }
                    }
                  }
#endif
                  // ...and point to the existing one
                  finite_ext_el_pt->node_pt(j) = existing_node_pt;
                }
                // If it doesn't add it to the list of existing ones
                else
                {
                  global_node_pt[first_non_negative_eqn_number_plus_one - 1] =
                    nod_pt;
                  node_done[nod_pt] = true;
                }
              }
 
 
              // Do the same for any master nodes of that (possibly replaced)
              // node
              if (dynamic_cast<RefineableElement*>(finite_ext_el_pt) != 0)
              {
                int n_cont_inter_values =
                  dynamic_cast<RefineableElement*>(finite_ext_el_pt)
                    ->ncont_interpolated_values();
                for (int i_cont = -1; i_cont < n_cont_inter_values; i_cont++)
                {
                  if (finite_ext_el_pt->node_pt(j)->is_hanging(i_cont))
                  {
                    HangInfo* hang_pt =
                      finite_ext_el_pt->node_pt(j)->hanging_pt(i_cont);
                    unsigned n_master = hang_pt->nmaster();
                    for (unsigned m = 0; m < n_master; m++)
                    {
                      Node* master_nod_pt = hang_pt->master_node_pt(m);
                      unsigned n_val = master_nod_pt->nvalue();
                      unsigned first_non_negative_eqn_number_plus_one = 0;
                      for (unsigned i_val = 0; i_val < n_val; i_val++)
                      {
                        int eqn_no = master_nod_pt->eqn_number(i_val);
                        if (eqn_no >= 0)
                        {
                          first_non_negative_eqn_number_plus_one = eqn_no + 1;
                          break;
                        }
                      }
 
                      // If we haven't found a non-negative eqn number check
                      // eqn numbers associated with solid data (if any)
                      if (first_non_negative_eqn_number_plus_one == 0)
                      {
                        SolidNode* solid_master_nod_pt =
                          dynamic_cast<SolidNode*>(master_nod_pt);
                        if (solid_master_nod_pt != 0)
                        {
                          // Loop over values stored at node (if any) to find
                          // the first non-negative eqn number
                          unsigned n_val =
                            solid_master_nod_pt->variable_position_pt()
                              ->nvalue();
                          for (unsigned i_val = 0; i_val < n_val; i_val++)
                          {
                            int eqn_no =
                              solid_master_nod_pt->variable_position_pt()
                                ->eqn_number(i_val);
                            if (eqn_no >= 0)
                            {
                              first_non_negative_eqn_number_plus_one =
                                eqn_no + 1;
                              break;
                            }
                          }
                        }
                      }
 
                      // Identified which node we're dealing with via
                      // first non-negative global eqn number (if there
                      // is none, everything is pinned and we don't give a
                      // damn...)
                      if (first_non_negative_eqn_number_plus_one > 0)
                      {
                        Node* existing_node_pt = global_node_pt
                          [first_non_negative_eqn_number_plus_one - 1];
 
                        // Does this node already exist?
                        if (existing_node_pt != 0)
                        {
                          // Record that we're about to cull one
                          actually_removed_some_data = true;
 
                          // It's a duplicate, so store the duplicated one for
                          // later killing...
                          Node* duplicated_node_pt = master_nod_pt;
 
                          if (!node_done[duplicated_node_pt])
                          {
                            // Remove node from all boundaries
                            std::set<unsigned>* boundaries_pt;
                            duplicated_node_pt->get_boundaries_pt(
                              boundaries_pt);
                            if (boundaries_pt != 0)
                            {
                              for (std::set<unsigned>::iterator it =
                                     (*boundaries_pt).begin();
                                   it != (*boundaries_pt).end();
                                   it++)
                              {
                                mesh_pt->remove_boundary_node(
                                  (*it), duplicated_node_pt);
                              }
                            }
 
                            killed_nodes.insert(duplicated_node_pt);
                            unsigned i_proc = unsigned(iproc);
                            mesh_pt->null_external_halo_node(
                              i_proc, duplicated_node_pt);
                          }
 
                          // Weight of the original node
                          double m_weight = hang_pt->master_weight(m);
 
 
#ifdef PARANOID
                          // Sanity check: setting replacement master
                          // node for non-hanging node? Sign of really
                          // f***ed up code.
                          Node* tmp_nod_pt = finite_ext_el_pt->node_pt(j);
                          if (!tmp_nod_pt->is_hanging(i_cont))
                          {
                            std::ostringstream error_stream;
                            error_stream
                              << "About to re-set master for i_cont= " << i_cont
                              << " for external node (with proc " << iproc
                              << " )" << tmp_nod_pt << " at ";
                            unsigned n = tmp_nod_pt->ndim();
                            for (unsigned jj = 0; jj < n; jj++)
                            {
                              error_stream << tmp_nod_pt->x(jj) << " ";
                            }
                            error_stream
                              << " which is not hanging --> About to die!"
                              << "Outputting offending element into oomph-info "
                              << "stream. \n\n";
                            oomph_info << "\n\n";
                            finite_ext_el_pt->output(*(oomph_info.stream_pt()));
                            oomph_info << "\n\n";
                            oomph_info.stream_pt()->flush();
                            throw OomphLibError(error_stream.str(),
                                                OOMPH_CURRENT_FUNCTION,
                                                OOMPH_EXCEPTION_LOCATION);
                          }
#endif
 
 
                          // And re-set master
                          finite_ext_el_pt->node_pt(j)
                            ->hanging_pt(i_cont)
                            ->set_master_node_pt(m, existing_node_pt, m_weight);
                        }
                        // If it doesn't, add it to the list of existing ones
                        else
                        {
                          global_node_pt
                            [first_non_negative_eqn_number_plus_one - 1] =
                              master_nod_pt;
                          node_done[master_nod_pt] = true;
                        }
                      }
                    } // End of loop over master nodes
                  } // end of hanging
                } // end of loop over continously interpolated variables
              } // end refineable element (with potentially hanging node
 
            } // end loop over nodes on external halo elements
 
          } // End of check for finite element
 
        } // end loop over external halo elements
      }
    } // end loop over processors
 
 
    // Now kill all the deleted nodes
    for (std::set<Node*>::iterator it = killed_nodes.begin();
         it != killed_nodes.end();
         it++)
    {
      delete (*it);
    }
 
 
    //   oomph_info << "Number of nonzero entries in global_node_pt: "
    //              << global_node_pt.size() << std::endl;
 
    //    // Time it...
    //    // Taken out again by MH -- clutters up output
    //    if (Global_timings::Doc_comprehensive_timings)
    //     {
    //      double t_end = TimingHelpers::timer();
    //      oomph_info
    //       << "Total time for Problem::remove_duplicate_data: "
    //       << t_end-t_start << std::endl;
    //     }
  }
 
 
  //========================================================================
  /// Consolidate external halo node storage by removing nulled out
  /// pointers in external halo and haloed schemes for all meshes.
  //========================================================================
  void Problem::remove_null_pointers_from_external_halo_node_storage()
  {
    // Do we have submeshes?
    unsigned n_mesh_loop = 1;
    unsigned nmesh = nsub_mesh();
    if (nmesh > 0)
    {
      n_mesh_loop = nmesh;
    }
 
    // Storage for number of processors and current processor
    int n_proc = this->communicator_pt()->nproc();
    int my_rank = this->communicator_pt()->my_rank();
 
    // If only one processor then return
    if (n_proc == 1)
    {
      return;
    }
 
    // Loop over all (other) processors and store index of any nulled-out
    // external halo nodes in storage scheme.
 
    // Data to be sent to each processor
    Vector<int> send_n(n_proc, 0);
 
    // Storage for all values to be sent to all processors
    Vector<int> send_data;
 
    // Start location within send_data for data to be sent to each processor
    Vector<int> send_displacement(n_proc, 0);
 
    // Check missing ones
    for (int domain = 0; domain < n_proc; domain++)
    {
      // Set the offset for the current processor
      send_displacement[domain] = send_data.size();
 
      // Don't bother to do anything if the processor in the loop is the
      // current processor
      if (domain != my_rank)
      {
        // Deal with sub-meshes one-by-one if required
        Mesh* my_mesh_pt = 0;
 
        // Loop over submeshes
        for (unsigned imesh = 0; imesh < n_mesh_loop; imesh++)
        {
          if (nmesh == 0)
          {
            my_mesh_pt = mesh_pt();
          }
          else
          {
            my_mesh_pt = mesh_pt(imesh);
          }
 
          // Make backup of external halo node pointers with this domain
          Vector<Node*> backup_pt(my_mesh_pt->external_halo_node_pt(domain));
 
          // How many do we have currently?
          unsigned nnod = backup_pt.size();
 
          // Prepare storage for updated halo nodes
          Vector<Node*> new_external_halo_node_pt;
          new_external_halo_node_pt.reserve(nnod);
 
          // Loop over external halo nodes with this domain
          for (unsigned j = 0; j < nnod; j++)
          {
            // Get pointer to node
            Node* nod_pt = backup_pt[j];
 
            // Has it been nulled out?
            if (nod_pt == 0)
            {
              // Save index of nulled out one
              send_data.push_back(j);
            }
            else
            {
              // Still alive: Copy across
              new_external_halo_node_pt.push_back(nod_pt);
            }
          }
 
          // Set new external halo node vector
          my_mesh_pt->set_external_halo_node_pt(domain,
                                                new_external_halo_node_pt);
 
          // End of data for this mesh
          send_data.push_back(-1);
 
        } // end of loop over meshes
 
      } // end skip own domain
 
      // Find the number of data added to the vector
      send_n[domain] = send_data.size() - send_displacement[domain];
    }
 
    // Storage for the number of data to be received from each processor
    Vector<int> receive_n(n_proc, 0);
 
    // Now send numbers of data to be sent between all processors
    MPI_Alltoall(&send_n[0],
                 1,
                 MPI_INT,
                 &receive_n[0],
                 1,
                 MPI_INT,
                 this->communicator_pt()->mpi_comm());
 
 
    // We now prepare the data to be received
    // by working out the displacements from the received data
    Vector<int> receive_displacement(n_proc, 0);
    int receive_data_count = 0;
    for (int rank = 0; rank < n_proc; ++rank)
    {
      // Displacement is number of data received so far
      receive_displacement[rank] = receive_data_count;
      receive_data_count += receive_n[rank];
    }
 
    // Now resize the receive buffer for all data from all processors
    // Make sure that it has a size of at least one
    if (receive_data_count == 0)
    {
      ++receive_data_count;
    }
    Vector<int> receive_data(receive_data_count);
 
    // Make sure that the send buffer has size at least one
    // so that we don't get a segmentation fault
    if (send_data.size() == 0)
    {
      send_data.resize(1);
    }
 
    // Now send the data between all the processors
    MPI_Alltoallv(&send_data[0],
                  &send_n[0],
                  &send_displacement[0],
                  MPI_INT,
                  &receive_data[0],
                  &receive_n[0],
                  &receive_displacement[0],
                  MPI_INT,
                  this->communicator_pt()->mpi_comm());
 
    // Now use the received data
    for (int send_rank = 0; send_rank < n_proc; send_rank++)
    {
      // Don't bother to do anything for the processor corresponding to the
      // current processor or if no data were received from this processor
      if ((send_rank != my_rank) && (receive_n[send_rank] != 0))
      {
        // Counter for the data within the large array
        unsigned count = receive_displacement[send_rank];
 
        // Deal with sub-meshes one-by-one if required
        Mesh* my_mesh_pt = 0;
 
        // Loop over submeshes
        for (unsigned imesh = 0; imesh < n_mesh_loop; imesh++)
        {
          if (nmesh == 0)
          {
            my_mesh_pt = mesh_pt();
          }
          else
          {
            my_mesh_pt = mesh_pt(imesh);
          }
 
          // Make backup of external haloed node pointers with this domain
          Vector<Node*> backup_pt =
            my_mesh_pt->external_haloed_node_pt(send_rank);
 
          // Unpack until we reach "end of data" indicator (-1) for this mesh
          while (true)
          {
            // Read next entry
            int next_one = receive_data[count++];
 
            if (next_one == -1)
            {
              break;
            }
            else
            {
              // Null out the entry
              backup_pt[next_one] = 0;
            }
          }
 
          // How many do we have currently?
          unsigned nnod = backup_pt.size();
 
          // Prepare storage for updated haloed nodes
          Vector<Node*> new_external_haloed_node_pt;
          new_external_haloed_node_pt.reserve(nnod);
 
          // Loop over external haloed nodes with this domain
          for (unsigned j = 0; j < nnod; j++)
          {
            // Get pointer to node
            Node* nod_pt = backup_pt[j];
 
            // Has it been nulled out?
            if (nod_pt != 0)
            {
              // Still alive: Copy across
              new_external_haloed_node_pt.push_back(nod_pt);
            }
          }
 
          // Set new external haloed node vector
          my_mesh_pt->set_external_haloed_node_pt(send_rank,
                                                  new_external_haloed_node_pt);
        }
      }
 
    } // End of data is received
  }
 
#endif
 
 
  //=======================================================================
  /// Function that sets the values of the dofs in the object
  //======================================================================
  void Problem::set_dofs(const DoubleVector& dofs)
  {
    const unsigned long n_dof = this->ndof();
#ifdef PARANOID
    if (n_dof != dofs.nrow())
    {
      std::ostringstream error_stream;
      error_stream << "Number of degrees of freedom in vector argument "
                   << dofs.nrow() << "\n"
                   << "does not equal number of degrees of freedom in problem "
                   << n_dof;
      throw OomphLibError(
        error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
#endif
    for (unsigned long l = 0; l < n_dof; l++)
    {
      *Dof_pt[l] = dofs[l];
    }
  }
 
  /// Set history values of dofs
  void Problem::set_dofs(const unsigned& t, DoubleVector& dofs)
  {
#ifdef PARANOID
    if (distributed())
    {
      throw OomphLibError("Not designed for distributed problems",
                          OOMPH_EXCEPTION_LOCATION,
                          OOMPH_CURRENT_FUNCTION);
      // might work if the dofs vector is distributed in the right way...
    }
#endif
 
    // First deal with global data
    unsigned Nglobal_data = nglobal_data();
    for (unsigned i = 0; i < Nglobal_data; i++)
    {
      for (unsigned j = 0, nj = Global_data_pt[i]->nvalue(); j < nj; j++)
      {
        // For each data get the equation number and copy out the value.
        int eqn_number = Global_data_pt[i]->eqn_number(j);
        if (eqn_number >= 0)
        {
          Global_data_pt[i]->set_value(t, j, dofs[eqn_number]);
        }
      }
    }
 
    // Next element internal data
    for (unsigned i = 0, ni = mesh_pt()->nelement(); i < ni; i++)
    {
      GeneralisedElement* ele_pt = mesh_pt()->element_pt(i);
      for (unsigned j = 0, nj = ele_pt->ninternal_data(); j < nj; j++)
      {
        Data* d_pt = ele_pt->internal_data_pt(j);
        for (unsigned k = 0, nk = d_pt->nvalue(); k < nk; k++)
        {
          int eqn_number = d_pt->eqn_number(k);
          if (eqn_number >= 0)
          {
            d_pt->set_value(t, k, dofs[eqn_number]);
          }
        }
      }
    }
 
    // Now the nodes
    for (unsigned i = 0, ni = mesh_pt()->nnode(); i < ni; i++)
    {
      Node* node_pt = mesh_pt()->node_pt(i);
      for (unsigned j = 0, nj = node_pt->nvalue(); j < nj; j++)
      {
        // For each node get the equation number and copy out the value.
        int eqn_number = node_pt->eqn_number(j);
        if (eqn_number >= 0)
        {
          node_pt->set_value(t, j, dofs[eqn_number]);
        }
      }
    }
  }
 
 
  /// Set history values of dofs from the type of vector stored in
  /// problem::Dof_pt.
  void Problem::set_dofs(const unsigned& t, Vector<double*>& dof_pt)
  {
#ifdef PARANOID
    if (distributed())
    {
      throw OomphLibError("Not implemented for distributed problems!",
                          OOMPH_EXCEPTION_LOCATION,
                          OOMPH_CURRENT_FUNCTION);
    }
#endif
 
    // If we have any spine meshes I think there might be more degrees
    // of freedom there. I don't use them though so I'll let someone who
    // knows what they are doing handle it. --David Shepherd
 
    // First deal with global data
    unsigned Nglobal_data = nglobal_data();
    for (unsigned i = 0; i < Nglobal_data; i++)
    {
      for (unsigned j = 0, nj = Global_data_pt[i]->nvalue(); j < nj; j++)
      {
        // For each data get the equation number and copy in the value.
        int eqn_number = Global_data_pt[i]->eqn_number(j);
        if (eqn_number >= 0)
        {
          Global_data_pt[i]->set_value(t, j, *(dof_pt[eqn_number]));
        }
      }
    }
 
    // Now the mesh data
    // nodes
    for (unsigned i = 0, ni = mesh_pt()->nnode(); i < ni; i++)
    {
      Node* node_pt = mesh_pt()->node_pt(i);
      for (unsigned j = 0, nj = node_pt->nvalue(); j < nj; j++)
      {
        // For each node get the equation number and copy in the value.
        int eqn_number = node_pt->eqn_number(j);
        if (eqn_number >= 0)
        {
          node_pt->set_value(t, j, *(dof_pt[eqn_number]));
        }
      }
    }
 
    // and non-nodal data inside elements
    for (unsigned i = 0, ni = mesh_pt()->nelement(); i < ni; i++)
    {
      GeneralisedElement* ele_pt = mesh_pt()->element_pt(i);
      for (unsigned j = 0, nj = ele_pt->ninternal_data(); j < nj; j++)
      {
        Data* data_pt = ele_pt->internal_data_pt(j);
        // For each node get the equation number and copy in the value.
        int eqn_number = data_pt->eqn_number(j);
        if (eqn_number >= 0)
        {
          data_pt->set_value(t, j, *(dof_pt[eqn_number]));
        }
      }
    }
  }
 
 
  //===================================================================
  /// Function that adds the values to the dofs
  //==================================================================
  void Problem::add_to_dofs(const double& lambda,
                            const DoubleVector& increment_dofs)
  {
    const unsigned long n_dof = this->ndof();
    for (unsigned long l = 0; l < n_dof; l++)
    {
      *Dof_pt[l] += lambda * increment_dofs[l];
    }
  }
 
 
  //=========================================================================
  /// Return the residual vector multiplied by the inverse mass matrix
  /// Virtual so that it can be overloaded for mpi problems
  //=========================================================================
  void Problem::get_inverse_mass_matrix_times_residuals(DoubleVector& Mres)
  {
    // This function does not make sense for assembly handlers other than the
    // default, so complain if we try to call it with another handler
 
#ifdef PARANOID
    // If we are not the default, then complain
    if (this->assembly_handler_pt() != Default_assembly_handler_pt)
    {
      std::ostringstream error_stream;
      error_stream << "The function get_inverse_mass_matrix_times_residuals() "
                      "can only be\n"
                   << "used with the default assembly handler\n\n";
      throw OomphLibError(
        error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
#endif
 
    // Find the number of degrees of freedom in the problem
    const unsigned n_dof = this->ndof();
 
    // Resize the vector
    LinearAlgebraDistribution dist(this->communicator_pt(), n_dof, false);
    Mres.build(&dist, 0.0);
 
    // If we have discontinuous formulation
    // We can invert the mass matrix element by element
    if (Discontinuous_element_formulation)
    {
      // Loop over the elements and get their residuals
      const unsigned n_element = Problem::mesh_pt()->nelement();
      Vector<double> element_Mres;
      for (unsigned e = 0; e < n_element; e++)
      {
        // Cache the element
        DGElement* const elem_pt =
          dynamic_cast<DGElement*>(Problem::mesh_pt()->element_pt(e));
 
        // Find the elemental inverse mass matrix times residuals
        const unsigned n_el_dofs = elem_pt->ndof();
        elem_pt->get_inverse_mass_matrix_times_residuals(element_Mres);
 
        // Add contribution to global matrix
        for (unsigned i = 0; i < n_el_dofs; i++)
        {
          Mres[elem_pt->eqn_number(i)] = element_Mres[i];
        }
      }
    }
    // Otherwise it's continous and we must invert the full
    // mass matrix via a global linear solve.
    else
    {
      // Now do the linear solve -- recycling Mass matrix if requested
      // If we already have the factorised mass matrix, then resolve
      if (Mass_matrix_reuse_is_enabled && Mass_matrix_has_been_computed)
      {
        if (!Shut_up_in_newton_solve)
        {
          oomph_info << "Not recomputing Mass Matrix " << std::endl;
        }
 
        // Get the residuals
        DoubleVector residuals(&dist, 0.0);
        this->get_residuals(residuals);
 
        // Resolve the linear system
        this->mass_matrix_solver_for_explicit_timestepper_pt()->resolve(
          residuals, Mres);
      }
      // Otherwise solve for the first time
      else
      {
        // If we wish to reuse the mass matrix, then enable resolve
        if (Mass_matrix_reuse_is_enabled)
        {
          if (!Shut_up_in_newton_solve)
          {
            oomph_info << "Enabling resolve in explicit timestep" << std::endl;
          }
          this->mass_matrix_solver_for_explicit_timestepper_pt()
            ->enable_resolve();
        }
 
        // Use a custom assembly handler to assemble and invert the mass matrix
 
        // Store the old assembly handler
        AssemblyHandler* old_assembly_handler_pt = this->assembly_handler_pt();
        // Set the assembly handler to the explicit timestep handler
        this->assembly_handler_pt() = new ExplicitTimeStepHandler;
 
        // Solve the linear system
        this->mass_matrix_solver_for_explicit_timestepper_pt()->solve(this,
                                                                      Mres);
        // The mass matrix has now been computed
        Mass_matrix_has_been_computed = true;
 
        // Delete the Explicit Timestep handler
        delete this->assembly_handler_pt();
        // Reset the assembly handler to the original handler
        this->assembly_handler_pt() = old_assembly_handler_pt;
      }
    }
  }
 
  void Problem::get_dvaluesdt(DoubleVector& f)
  {
    // Loop over timesteppers: make them (temporarily) steady and store their
    // is_steady status.
    unsigned n_time_steppers = this->ntime_stepper();
    std::vector<bool> was_steady(n_time_steppers);
    for (unsigned i = 0; i < n_time_steppers; i++)
    {
      was_steady[i] = time_stepper_pt(i)->is_steady();
      time_stepper_pt(i)->make_steady();
    }
 
    // Calculate f using the residual/jacobian machinary.
    get_inverse_mass_matrix_times_residuals(f);
 
    // Reset the is_steady status of all timesteppers that weren't already
    // steady when we came in here and reset their weights
    for (unsigned i = 0; i < n_time_steppers; i++)
    {
      if (!was_steady[i])
      {
        time_stepper_pt(i)->undo_make_steady();
      }
    }
  }
 
 
  //================================================================
  /// Get the total residuals Vector for the problem
  //================================================================
  void Problem::get_residuals(DoubleVector& residuals)
  {
    // Three different cases; if MPI_Helpers::MPI_has_been_initialised=true
    // this means MPI_Helpers::init() has been called.  This could happen on a
    // code compiled with MPI but run serially; in this instance the
    // get_residuals function still works on one processor.
    //
    // Secondly, if a code has been compiled with MPI, but MPI_Helpers::init()
    // has not been called, then MPI_Helpers::MPI_has_been_initialised=false
    // and the code calls...
    //
    // Thirdly, the serial version (compiled by all, but only run when compiled
    // with MPI if MPI_Helpers::MPI_has_been_initialised=false
 
    // Check that the residuals has the correct number of rows if it has been
    // setup
#ifdef PARANOID
    if (residuals.built())
    {
      if (residuals.distribution_pt()->nrow() != this->ndof())
      {
        std::ostringstream error_stream;
        error_stream << "The distribution of the residuals vector does not "
                        "have the correct\n"
                     << "number of global rows\n";
 
        throw OomphLibError(
          error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
      }
    }
#endif
 
    // Determine the distribution for the residuals vector
    // IF the vector has distribution setup then use that
    // ELSE determine the distribution based on the
    // distributed_matrix_distribution enum
    LinearAlgebraDistribution* dist_pt = 0;
    if (residuals.built())
    {
      dist_pt = new LinearAlgebraDistribution(residuals.distribution_pt());
    }
    else
    {
      create_new_linear_algebra_distribution(dist_pt);
    }
 
    // Locally cache pointer to assembly handler
    AssemblyHandler* const assembly_handler_pt = Assembly_handler_pt;
 
    // Build and zero the residuals
    residuals.build(dist_pt, 0.0);
 
    // Serial (or one processor case)
#ifdef OOMPH_HAS_MPI
    if (this->communicator_pt()->nproc() == 1)
    {
#endif // OOMPH_HAS_MPI
      // Loop over all the elements
      unsigned long Element_pt_range = Mesh_pt->nelement();
      for (unsigned long e = 0; e < Element_pt_range; e++)
      {
        // Get the pointer to the element
        GeneralisedElement* elem_pt = Mesh_pt->element_pt(e);
 
        // Find number of dofs in the element
        unsigned n_element_dofs = assembly_handler_pt->ndof(elem_pt);
 
        // Set up an array
        Vector<double> element_residuals(n_element_dofs);
 
        // Fill the array
        assembly_handler_pt->get_residuals(elem_pt, element_residuals);
 
        // Now loop over the dofs and assign values to global Vector
        for (unsigned l = 0; l < n_element_dofs; l++)
        {
          residuals[assembly_handler_pt->eqn_number(elem_pt, l)] +=
            element_residuals[l];
        }
      }
      // Otherwise parallel case
#ifdef OOMPH_HAS_MPI
    }
    else
    {
      // Store the current assembly handler
      AssemblyHandler* const old_assembly_handler_pt = Assembly_handler_pt;
      // Create a new assembly handler that only assembles the residuals
      Assembly_handler_pt =
        new ParallelResidualsHandler(old_assembly_handler_pt);
 
      // Setup memory for parallel sparse assemble
      // No matrix so all size zero
      Vector<int*> column_index;
      Vector<int*> row_start;
      Vector<double*> value;
      Vector<unsigned> nnz;
      // One set of residuals of sizer one
      Vector<double*> res(1);
 
      // Call the parallel sparse assemble, that should only assemble residuals
      parallel_sparse_assemble(
        dist_pt, column_index, row_start, value, nnz, res);
      // Fill in the residuals data
      residuals.set_external_values(res[0], true);
 
      // Delete new assembly handler
      delete Assembly_handler_pt;
      // Reset the assembly handler to the original
      Assembly_handler_pt = old_assembly_handler_pt;
    }
#endif
 
    // Delete the distribution
    delete dist_pt;
  }
 
  //=============================================================================
  /// Get the fully assembled residual vector and Jacobian matrix
  /// in dense storage. The DoubleVector residuals returned will be
  /// non-distributed. If on calling this method the DoubleVector residuals is
  /// setup then it must be non-distributed and of the correct length.
  /// The matrix type DenseDoubleMatrix is not distributable and therefore
  /// the residual vector is also assumed to be non distributable.
  //=============================================================================
  void Problem::get_jacobian(DoubleVector& residuals,
                             DenseDoubleMatrix& jacobian)
  {
    // get the number of degrees of freedom
    unsigned n_dof = ndof();
 
#ifdef PARANOID
    // PARANOID checks : if the distribution of residuals is setup then it must
    // must not be distributed, have the right number of rows, and the same
    // communicator as the problem
    if (residuals.built())
    {
      if (residuals.distribution_pt()->distributed())
      {
        std::ostringstream error_stream;
        error_stream
          << "If the DoubleVector residuals is setup then it must not "
          << "be distributed.";
        throw OomphLibError(
          error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
      }
      if (residuals.distribution_pt()->nrow() != n_dof)
      {
        std::ostringstream error_stream;
        error_stream
          << "If the DoubleVector residuals is setup then it must have"
          << " the correct number of rows";
        throw OomphLibError(
          error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
      }
      if (!(*Communicator_pt ==
            *residuals.distribution_pt()->communicator_pt()))
      {
        std::ostringstream error_stream;
        error_stream
          << "If the DoubleVector residuals is setup then it must have"
          << " the same communicator as the problem.";
        throw OomphLibError(
          error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
      }
    }
#endif
 
    // set the residuals distribution if it is not setup
    if (!residuals.built())
    {
      LinearAlgebraDistribution dist(Communicator_pt, n_dof, false);
      residuals.build(&dist, 0.0);
    }
    // else just zero the residuals
    else
    {
      residuals.initialise(0.0);
    }
 
    // Resize the matrices -- this cannot always be done externally
    // because get_jacobian exists in many different versions for
    // different storage formats -- resizing a CC or CR matrix doesn't
    // make sense.
 
    // resize the jacobian
    jacobian.resize(n_dof, n_dof);
    jacobian.initialise(0.0);
 
    // Locally cache pointer to assembly handler
    AssemblyHandler* const assembly_handler_pt = Assembly_handler_pt;
 
    // Loop over all the elements
    unsigned long n_element = Mesh_pt->nelement();
    for (unsigned long e = 0; e < n_element; e++)
    {
      // Get the pointer to the element
      GeneralisedElement* elem_pt = Mesh_pt->element_pt(e);
      // Find number of dofs in the element
      unsigned n_element_dofs = assembly_handler_pt->ndof(elem_pt);
      // Set up an array
      Vector<double> element_residuals(n_element_dofs);
      // Set up a matrix
      DenseMatrix<double> element_jacobian(n_element_dofs);
      // Fill the array
      assembly_handler_pt->get_jacobian(
        elem_pt, element_residuals, element_jacobian);
      // Now loop over the dofs and assign values to global Vector
      for (unsigned l = 0; l < n_element_dofs; l++)
      {
        unsigned long eqn_number = assembly_handler_pt->eqn_number(elem_pt, l);
        residuals[eqn_number] += element_residuals[l];
        for (unsigned l2 = 0; l2 < n_element_dofs; l2++)
        {
          jacobian(eqn_number, assembly_handler_pt->eqn_number(elem_pt, l2)) +=
            element_jacobian(l, l2);
        }
      }
    }
  }
 
  //=============================================================================
  /// Return the fully-assembled Jacobian and residuals for the problem,
  /// in the case where the Jacobian matrix is in a distributable
  /// row compressed storage format.
  /// 1. If the distribution of the jacobian and residuals is setup then, they
  /// will be returned with that distribution.
  /// Note. the jacobian and residuals must have the same distribution.
  /// 2. If the distribution of the jacobian and residuals are not setup then
  /// their distribution will computed based on:
  /// Distributed_problem_matrix_distribution.
  //=============================================================================
  void Problem::get_jacobian(DoubleVector& residuals, CRDoubleMatrix& jacobian)
  {
    // Three different cases; if MPI_Helpers::MPI_has_been_initialised=true
    // this means MPI_Helpers::setup() has been called.  This could happen on a
    // code compiled with MPI but run serially; in this instance the
    // get_residuals function still works on one processor.
    //
    // Secondly, if a code has been compiled with MPI, but MPI_Helpers::setup()
    // has not been called, then MPI_Helpers::MPI_has_been_initialised=false
    // and the code calls...
    //
    // Thirdly, the serial version (compiled by all, but only run when compiled
    // with MPI if MPI_Helpers::MPI_has_been_initialised=false
    //
    // The only case where an MPI code cannot run serially at present
    // is one where the distribute function is used (i.e. METIS is called)
 
    // Allocate storage for the matrix entries
    // The generalised Vector<Vector<>> structure is required
    // for the most general interface to sparse_assemble() which allows
    // the assembly of multiple matrices at once.
    Vector<int*> column_index(1);
    Vector<int*> row_start(1);
    Vector<double*> value(1);
    Vector<unsigned> nnz(1);
 
#ifdef PARANOID
    // PARANOID checks that the distribution of the jacobian matches that of the
    // residuals (if they are setup) and that they have the right number of rows
    if (residuals.built() && jacobian.distribution_built())
    {
      if (!(*residuals.distribution_pt() == *jacobian.distribution_pt()))
      {
        std::ostringstream error_stream;
        error_stream << "The distribution of the residuals must "
                     << "be the same as the distribution of the jacobian.";
        throw OomphLibError(
          error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
      }
      if (jacobian.distribution_pt()->nrow() != this->ndof())
      {
        std::ostringstream error_stream;
        error_stream
          << "The distribution of the jacobian and residuals does not"
          << "have the correct number of global rows.";
        throw OomphLibError(
          error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
      }
    }
    else if (residuals.built() != jacobian.distribution_built())
    {
      std::ostringstream error_stream;
      error_stream << "The distribution of the jacobian and residuals must "
                   << "both be setup or both not setup";
      throw OomphLibError(
        error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
#endif
 
 
    // Allocate generalised storage format for passing to sparse_assemble()
    Vector<double*> res(1);
 
    // determine the distribution for the jacobian.
    // IF the jacobian has distribution setup then use that
    // ELSE determine the distribution based on the
    // distributed_matrix_distribution enum
    LinearAlgebraDistribution* dist_pt = 0;
    if (jacobian.distribution_built())
    {
      dist_pt = new LinearAlgebraDistribution(jacobian.distribution_pt());
    }
    else
    {
      create_new_linear_algebra_distribution(dist_pt);
    }
 
 
    // The matrix is in compressed row format
    bool compressed_row_flag = true;
 
#ifdef OOMPH_HAS_MPI
    //
    if (Communicator_pt->nproc() == 1)
    {
#endif
      sparse_assemble_row_or_column_compressed(
        column_index, row_start, value, nnz, res, compressed_row_flag);
      jacobian.build(dist_pt);
      jacobian.build_without_copy(
        dist_pt->nrow(), nnz[0], value[0], column_index[0], row_start[0]);
      residuals.build(dist_pt, 0.0);
      residuals.set_external_values(res[0], true);
#ifdef OOMPH_HAS_MPI
    }
    else
    {
      if (dist_pt->distributed())
      {
        parallel_sparse_assemble(
          dist_pt, column_index, row_start, value, nnz, res);
        jacobian.build(dist_pt);
        jacobian.build_without_copy(
          dist_pt->nrow(), nnz[0], value[0], column_index[0], row_start[0]);
        residuals.build(dist_pt, 0.0);
        residuals.set_external_values(res[0], true);
      }
      else
      {
        LinearAlgebraDistribution* temp_dist_pt =
          new LinearAlgebraDistribution(Communicator_pt, dist_pt->nrow(), true);
        parallel_sparse_assemble(
          temp_dist_pt, column_index, row_start, value, nnz, res);
        jacobian.build(temp_dist_pt);
        jacobian.build_without_copy(
          dist_pt->nrow(), nnz[0], value[0], column_index[0], row_start[0]);
        jacobian.redistribute(dist_pt);
        residuals.build(temp_dist_pt, 0.0);
        residuals.set_external_values(res[0], true);
        residuals.redistribute(dist_pt);
        delete temp_dist_pt;
      }
    }
#endif
 
    // clean up dist_pt and residuals_vector pt
    delete dist_pt;
  }
 
  //=============================================================================
  /// Return the fully-assembled Jacobian and residuals for the problem,
  /// in the case when the jacobian matrix is in column-compressed storage
  /// format.
  //=============================================================================
  void Problem::get_jacobian(DoubleVector& residuals, CCDoubleMatrix& jacobian)
  {
    // Three different cases; if MPI_Helpers::MPI_has_been_initialised=true
    // this means MPI_Helpers::setup() has been called.  This could happen on a
    // code compiled with MPI but run serially; in this instance the
    // get_residuals function still works on one processor.
    //
    // Secondly, if a code has been compiled with MPI, but MPI_Helpers::setup()
    // has not been called, then MPI_Helpers::MPI_has_been_initialised=false
    // and the code calls...
    //
    // Thirdly, the serial version (compiled by all, but only run when compiled
    // with MPI if MPI_Helpers::MPI_has_been_5Binitialised=false
    //
    // The only case where an MPI code cannot run serially at present
    // is one where the distribute function is used (i.e. METIS is called)
 
    // get the number of degrees of freedom
    unsigned n_dof = ndof();
 
#ifdef PARANOID
    // PARANOID checks : if the distribution of residuals is setup then it must
    // must not be distributed, have the right number of rows, and the same
    // communicator as the problem
    if (residuals.built())
    {
      if (residuals.distribution_pt()->distributed())
      {
        std::ostringstream error_stream;
        error_stream
          << "If the DoubleVector residuals is setup then it must not "
          << "be distributed.";
        throw OomphLibError(
          error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
      }
      if (residuals.distribution_pt()->nrow() != n_dof)
      {
        std::ostringstream error_stream;
        error_stream
          << "If the DoubleVector residuals is setup then it must have"
          << " the correct number of rows";
        throw OomphLibError(
          error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
      }
      if (!(*Communicator_pt ==
            *residuals.distribution_pt()->communicator_pt()))
      {
        std::ostringstream error_stream;
        error_stream
          << "If the DoubleVector residuals is setup then it must have"
          << " the same communicator as the problem.";
        throw OomphLibError(
          error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
      }
    }
#endif
 
    // Allocate storage for the matrix entries
    // The generalised Vector<Vector<>> structure is required
    // for the most general interface to sparse_assemble() which allows
    // the assembly of multiple matrices at once.
    Vector<int*> row_index(1);
    Vector<int*> column_start(1);
    Vector<double*> value(1);
 
    // Allocate generalised storage format for passing to sparse_assemble()
    Vector<double*> res(1);
 
    // allocate storage for the number of non-zeros in each matrix
    Vector<unsigned> nnz(1);
 
    // The matrix is in compressed column format
    bool compressed_row_flag = false;
 
    // get the distribution for the residuals
    LinearAlgebraDistribution* dist_pt;
    if (!residuals.built())
    {
      dist_pt =
        new LinearAlgebraDistribution(Communicator_pt, this->ndof(), false);
    }
    else
    {
      dist_pt = new LinearAlgebraDistribution(residuals.distribution_pt());
    }
 
#ifdef OOMPH_HAS_MPI
    if (communicator_pt()->nproc() == 1)
    {
#endif
      sparse_assemble_row_or_column_compressed(
        row_index, column_start, value, nnz, res, compressed_row_flag);
      jacobian.build_without_copy(
        value[0], row_index[0], column_start[0], nnz[0], n_dof, n_dof);
      residuals.build(dist_pt, 0.0);
      residuals.set_external_values(res[0], true);
#ifdef OOMPH_HAS_MPI
    }
    else
    {
      std::ostringstream error_stream;
      error_stream << "Cannot assemble a CCDoubleMatrix Jacobian on more "
                   << "than one processor.";
      throw OomphLibError(
        error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
#endif
 
    // clean up
    delete dist_pt;
  }
 
 
  //===================================================================
  /// Set all pinned values to zero.
  /// Used to set boundary conditions to be homogeneous in the copy
  /// of the problem  used in adaptive bifurcation tracking
  /// (ALH: TEMPORARY HACK, WILL BE FIXED)
  //==================================================================
  void Problem::set_pinned_values_to_zero()
  {
    // NOTE THIS DOES NOT ZERO ANY SPINE DATA, but otherwise everything else
    // should be zeroed
 
    // Zero any pinned global Data
    const unsigned n_global_data = nglobal_data();
    for (unsigned i = 0; i < n_global_data; i++)
    {
      Data* const local_data_pt = Global_data_pt[i];
      const unsigned n_value = local_data_pt->nvalue();
      for (unsigned j = 0; j < n_value; j++)
      {
        // If the data value is pinned set the value to zero
        if (local_data_pt->is_pinned(j))
        {
          local_data_pt->set_value(j, 0.0);
        }
      }
    }
 
    // Loop over the submeshes:
    const unsigned n_sub_mesh = Sub_mesh_pt.size();
    if (n_sub_mesh == 0)
    {
      // Loop over the nodes in the element
      const unsigned n_node = Mesh_pt->nnode();
      for (unsigned n = 0; n < n_node; n++)
      {
        Node* const local_node_pt = Mesh_pt->node_pt(n);
        const unsigned n_value = local_node_pt->nvalue();
        for (unsigned j = 0; j < n_value; j++)
        {
          // If the data value is pinned set the value to zero
          if (local_node_pt->is_pinned(j))
          {
            local_node_pt->set_value(j, 0.0);
          }
        }
 
        // Try to cast to a solid node
        SolidNode* const local_solid_node_pt =
          dynamic_cast<SolidNode*>(local_node_pt);
        // If we are successful
        if (local_solid_node_pt)
        {
          // Find the dimension of the node
          const unsigned n_dim = local_solid_node_pt->ndim();
          // Find number of positions
          const unsigned n_position_type =
            local_solid_node_pt->nposition_type();
 
          for (unsigned k = 0; k < n_position_type; k++)
          {
            for (unsigned i = 0; i < n_dim; i++)
            {
              // If the generalised position is pinned,
              // set the value to zero
              if (local_solid_node_pt->position_is_pinned(k, i))
              {
                local_solid_node_pt->x_gen(k, i) = 0.0;
              }
            }
          }
        }
      }
 
      // Now loop over the element's and zero the internal data
      const unsigned n_element = Mesh_pt->nelement();
      for (unsigned e = 0; e < n_element; e++)
      {
        GeneralisedElement* const local_element_pt = Mesh_pt->element_pt(e);
        const unsigned n_internal = local_element_pt->ninternal_data();
        for (unsigned i = 0; i < n_internal; i++)
        {
          Data* const local_data_pt = local_element_pt->internal_data_pt(i);
          const unsigned n_value = local_data_pt->nvalue();
          for (unsigned j = 0; j < n_value; j++)
          {
            // If the data value is pinned set the value to zero
            if (local_data_pt->is_pinned(j))
            {
              local_data_pt->set_value(j, 0.0);
            }
          }
        }
      } // End of loop over elements
    }
    else
    {
      // Alternatively loop over all sub meshes
      for (unsigned m = 0; m < n_sub_mesh; m++)
      {
        // Loop over the nodes in the element
        const unsigned n_node = Sub_mesh_pt[m]->nnode();
        for (unsigned n = 0; n < n_node; n++)
        {
          Node* const local_node_pt = Sub_mesh_pt[m]->node_pt(n);
          const unsigned n_value = local_node_pt->nvalue();
          for (unsigned j = 0; j < n_value; j++)
          {
            // If the data value is pinned set the value to zero
            if (local_node_pt->is_pinned(j))
            {
              local_node_pt->set_value(j, 0.0);
            }
          }
 
          // Try to cast to a solid node
          SolidNode* const local_solid_node_pt =
            dynamic_cast<SolidNode*>(local_node_pt);
          // If we are successful
          if (local_solid_node_pt)
          {
            // Find the dimension of the node
            const unsigned n_dim = local_solid_node_pt->ndim();
            // Find number of positions
            const unsigned n_position_type =
              local_solid_node_pt->nposition_type();
 
            for (unsigned k = 0; k < n_position_type; k++)
            {
              for (unsigned i = 0; i < n_dim; i++)
              {
                // If the generalised position is pinned,
                // set the value to zero
                if (local_solid_node_pt->position_is_pinned(k, i))
                {
                  local_solid_node_pt->x_gen(k, i) = 0.0;
                }
              }
            }
          }
        }
 
        // Now loop over the element's and zero the internal data
        const unsigned n_element = Sub_mesh_pt[m]->nelement();
        for (unsigned e = 0; e < n_element; e++)
        {
          GeneralisedElement* const local_element_pt =
            Sub_mesh_pt[m]->element_pt(e);
          const unsigned n_internal = local_element_pt->ninternal_data();
          for (unsigned i = 0; i < n_internal; i++)
          {
            Data* const local_data_pt = local_element_pt->internal_data_pt(i);
            const unsigned n_value = local_data_pt->nvalue();
            for (unsigned j = 0; j < n_value; j++)
            {
              // If the data value is pinned set the value to zero
              if (local_data_pt->is_pinned(j))
              {
                local_data_pt->set_value(j, 0.0);
              }
            }
          }
        } // End of loop over elements
      }
    }
  }
 
 
  //=====================================================================
  /// This is a (private) helper function that is used to assemble system
  /// matrices in compressed row or column format
  /// and compute residual vectors.
  /// The default action is to assemble the jacobian matrix and
  /// residuals for the Newton method. The action can be
  /// overloaded at an elemental level by changing the default
  /// behaviour of the function Element::get_all_vectors_and_matrices().
  /// column_or_row_index: Column [or row] index of given entry
  /// row_or_column_start: Index of first entry for given row [or column]
  /// value              : Vector of nonzero entries
  /// residuals          : Residual vector
  /// compressed_row_flag: Bool flag to indicate if storage format is
  ///                      compressed row [if false interpretation of
  ///                      arguments is as stated in square brackets].
  /// We provide four different assembly methods, each with different
  /// memory requirements/execution speeds. The method is set by
  /// the public flag Problem::Sparse_assembly_method.
  //=====================================================================
  void Problem::sparse_assemble_row_or_column_compressed(
    Vector<int*>& column_or_row_index,
    Vector<int*>& row_or_column_start,
    Vector<double*>& value,
    Vector<unsigned>& nnz,
    Vector<double*>& residuals,
    bool compressed_row_flag)
  {
    // Choose the actual method
    switch (Sparse_assembly_method)
    {
      case Perform_assembly_using_vectors_of_pairs:
 
        sparse_assemble_row_or_column_compressed_with_vectors_of_pairs(
          column_or_row_index,
          row_or_column_start,
          value,
          nnz,
          residuals,
          compressed_row_flag);
 
        break;
 
      case Perform_assembly_using_two_vectors:
 
        sparse_assemble_row_or_column_compressed_with_two_vectors(
          column_or_row_index,
          row_or_column_start,
          value,
          nnz,
          residuals,
          compressed_row_flag);
 
        break;
 
      case Perform_assembly_using_maps:
 
        sparse_assemble_row_or_column_compressed_with_maps(column_or_row_index,
                                                           row_or_column_start,
                                                           value,
                                                           nnz,
                                                           residuals,
                                                           compressed_row_flag);
 
        break;
 
      case Perform_assembly_using_lists:
 
        sparse_assemble_row_or_column_compressed_with_lists(
          column_or_row_index,
          row_or_column_start,
          value,
          nnz,
          residuals,
          compressed_row_flag);
 
        break;
 
      case Perform_assembly_using_two_arrays:
 
        sparse_assemble_row_or_column_compressed_with_two_arrays(
          column_or_row_index,
          row_or_column_start,
          value,
          nnz,
          residuals,
          compressed_row_flag);
 
        break;
 
      default:
 
        std::ostringstream error_stream;
        error_stream
          << "Error: Incorrect value for Problem::Sparse_assembly_method"
          << Sparse_assembly_method << std::endl
          << "It should be one of the enumeration Problem::Assembly_method"
          << std::endl;
        throw OomphLibError(
          error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
  }
 
 
  //=====================================================================
  /// This is a (private) helper function that is used to assemble system
  /// matrices in compressed row or column format
  /// and compute residual vectors, using maps
  /// The default action is to assemble the jacobian matrix and
  /// residuals for the Newton method. The action can be
  /// overloaded at an elemental level by chaging the default
  /// behaviour of the function Element::get_all_vectors_and_matrices().
  /// column_or_row_index: Column [or row] index of given entry
  /// row_or_column_start: Index of first entry for given row [or column]
  /// value              : Vector of nonzero entries
  /// residuals          : Residual vector
  /// compressed_row_flag: Bool flag to indicate if storage format is
  ///                      compressed row [if false interpretation of
  ///                      arguments is as stated in square brackets].
  //=====================================================================
  void Problem::sparse_assemble_row_or_column_compressed_with_maps(
    Vector<int*>& column_or_row_index,
    Vector<int*>& row_or_column_start,
    Vector<double*>& value,
    Vector<unsigned>& nnz,
    Vector<double*>& residuals,
    bool compressed_row_flag)
  {
    // Total number of elements
    const unsigned long n_elements = mesh_pt()->nelement();
 
    // Default range of elements for distributed problems
    unsigned long el_lo = 0;
    unsigned long el_hi = n_elements - 1;
 
#ifdef OOMPH_HAS_MPI
    // Otherwise just loop over a fraction of the elements
    // (This will either have been initialised in
    // Problem::set_default_first_and_last_element_for_assembly() or
    // will have been re-assigned during a previous assembly loop
    // Note that following the re-assignment only the entries
    // for the current processor are relevant.
    if (!Problem_has_been_distributed)
    {
      el_lo = First_el_for_assembly[Communicator_pt->my_rank()];
      el_hi = Last_el_plus_one_for_assembly[Communicator_pt->my_rank()] - 1;
    }
#endif
 
    // number of dofs
    unsigned ndof = this->ndof();
 
    // Find the number of vectors to be assembled
    const unsigned n_vector = residuals.size();
 
    // Find the number of matrices to be assembled
    const unsigned n_matrix = column_or_row_index.size();
 
    // Locally cache pointer to assembly handler
    AssemblyHandler* const assembly_handler_pt = Assembly_handler_pt;
 
#ifdef OOMPH_HAS_MPI
    bool doing_residuals = false;
    if (dynamic_cast<ParallelResidualsHandler*>(Assembly_handler_pt) != 0)
    {
      doing_residuals = true;
    }
#endif
 
// Error check dimensions
#ifdef PARANOID
    if (row_or_column_start.size() != n_matrix)
    {
      std::ostringstream error_stream;
      error_stream << "Error: " << std::endl
                   << "row_or_column_start.size() "
                   << row_or_column_start.size() << " does not equal "
                   << "column_or_row_index.size() "
                   << column_or_row_index.size() << std::endl;
      throw OomphLibError(
        error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
 
    if (value.size() != n_matrix)
    {
      std::ostringstream error_stream;
      error_stream
        << "Error in Problem::sparse_assemble_row_or_column_compressed "
        << std::endl
        << "value.size() " << value.size() << " does not equal "
        << "column_or_row_index.size() " << column_or_row_index.size()
        << std::endl
        << std::endl
        << std::endl;
      throw OomphLibError(
        error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
#endif
 
 
    // The idea behind this sparse assembly routine is to use a vector of
    // maps for the entries in each row or column of the complete matrix.
    // The key for each map is the global row or column number and
    // the default comparison operator for integers means that each map
    // is ordered by the global row or column number. Thus, we need not
    // sort the maps, that happens at each insertion of a new entry. The
    // price we pay  is that for large maps, inseration is not a
    // cheap operation. Hash maps can be used to increase the speed, but then
    // the ordering is lost and we would have to sort anyway. The solution if
    // speed is required is to use lists, see below.
 
 
    // Set up a vector of vectors of maps of entries of each  matrix,
    // indexed by either the column or row. The entries of the vector for
    // each matrix correspond to all the rows or columns of that matrix.
    // The use of the map storage
    // scheme, with its implicit ordering on the first index, gives
    // a sparse ordered list of the entries in the given row or column.
    Vector<Vector<std::map<unsigned, double>>> matrix_data_map(n_matrix);
    // Loop over the number of matrices being assembled and resize
    // each vector of maps to the number of rows or columns of the matrix
    for (unsigned m = 0; m < n_matrix; m++)
    {
      matrix_data_map[m].resize(ndof);
    }
 
    // Resize the residuals vectors
    for (unsigned v = 0; v < n_vector; v++)
    {
      residuals[v] = new double[ndof];
      for (unsigned i = 0; i < ndof; i++)
      {
        residuals[v][i] = 0;
      }
    }
 
 
#ifdef OOMPH_HAS_MPI
 
 
    // Storage for assembly time for elements
    double t_assemble_start = 0.0;
 
    // Storage for assembly times
    if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
    {
      Elemental_assembly_time.resize(n_elements);
    }
 
#endif
 
    //----------------Assemble and populate the maps-------------------------
    {
      // Allocate local storage for the element's contribution to the
      // residuals vectors and system matrices of the size of the maximum
      // number of dofs in any element.
      // This means that the storage is only allocated (and deleted) once
      Vector<Vector<double>> el_residuals(n_vector);
      Vector<DenseMatrix<double>> el_jacobian(n_matrix);
 
      // Loop over the elements for this processor
      for (unsigned long e = el_lo; e <= el_hi; e++)
      {
#ifdef OOMPH_HAS_MPI
        // Time it?
        if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
        {
          t_assemble_start = TimingHelpers::timer();
        }
#endif
 
        // Get the pointer to the element
        GeneralisedElement* elem_pt = mesh_pt()->element_pt(e);
 
#ifdef OOMPH_HAS_MPI
        // Ignore halo elements
        if (!elem_pt->is_halo())
        {
#endif
 
          // Find number of degrees of freedom in the element
          const unsigned nvar = assembly_handler_pt->ndof(elem_pt);
 
          // Resize the storage for elemental jacobian and residuals
          for (unsigned v = 0; v < n_vector; v++)
          {
            el_residuals[v].resize(nvar);
          }
          for (unsigned m = 0; m < n_matrix; m++)
          {
            el_jacobian[m].resize(nvar);
          }
 
          // Now get the residuals and jacobian for the element
          assembly_handler_pt->get_all_vectors_and_matrices(
            elem_pt, el_residuals, el_jacobian);
 
          //---------------Insert the values into the maps--------------
 
          // Loop over the first index of local variables
          for (unsigned i = 0; i < nvar; i++)
          {
            // Get the local equation number
            unsigned eqn_number = assembly_handler_pt->eqn_number(elem_pt, i);
 
            // Add the contribution to the residuals
            for (unsigned v = 0; v < n_vector; v++)
            {
              // Fill in each residuals vector
              residuals[v][eqn_number] += el_residuals[v][i];
            }
 
            // Now loop over the other index
            for (unsigned j = 0; j < nvar; j++)
            {
              // Get the number of the unknown
              unsigned unknown = assembly_handler_pt->eqn_number(elem_pt, j);
 
              // Loop over the matrices
              for (unsigned m = 0; m < n_matrix; m++)
              {
                // Get the value of the matrix at this point
                double value = el_jacobian[m](i, j);
                // Only bother to add to the map if it's non-zero
                if (std::fabs(value) > Numerical_zero_for_sparse_assembly)
                {
                  // If it's compressed row storage, then our vector of maps
                  // is indexed by row (equation number)
                  if (compressed_row_flag)
                  {
                    // Add the data into the map using the unknown as the map
                    // key
                    matrix_data_map[m][eqn_number][unknown] += value;
                  }
                  // Otherwise it's compressed column storage and our vector is
                  // indexed by column (the unknown)
                  else
                  {
                    // Add the data into the map using the eqn_numbe as the map
                    // key
                    matrix_data_map[m][unknown][eqn_number] += value;
                  }
                }
              } // End of loop over matrices
            }
          }
 
#ifdef OOMPH_HAS_MPI
        } // endif halo element
#endif
 
 
#ifdef OOMPH_HAS_MPI
        // Time it?
        if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
        {
          Elemental_assembly_time[e] =
            TimingHelpers::timer() - t_assemble_start;
        }
#endif
 
      } // End of loop over the elements
 
    } // End of map assembly
 
 
#ifdef OOMPH_HAS_MPI
 
    // Postprocess timing information and re-allocate distribution of
    // elements during subsequent assemblies.
    if ((!doing_residuals) && (!Problem_has_been_distributed) &&
        Must_recompute_load_balance_for_assembly)
    {
      recompute_load_balanced_assembly();
    }
 
    // We have determined load balancing for current setup.
    // This can remain the same until assign_eqn_numbers() is called
    // again -- the flag is re-set to true there.
    if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
    {
      Must_recompute_load_balance_for_assembly = false;
    }
 
#endif
 
 
    //-----------Finally we need to convert the beautiful map storage scheme
    //------------------------to the containers required by SuperLU
 
    // Loop over the number of matrices
    for (unsigned m = 0; m < n_matrix; m++)
    {
      // Set the number of rows or columns
      row_or_column_start[m] = new int[ndof + 1];
      // Counter for the total number of entries in the storage scheme
      unsigned long entry_count = 0;
      row_or_column_start[m][0] = entry_count;
 
      // first we compute the number of non-zeros
      nnz[m] = 0;
      for (unsigned long i_global = 0; i_global < ndof; i_global++)
      {
        nnz[m] += matrix_data_map[m][i_global].size();
      }
 
      // and then resize the storage
      column_or_row_index[m] = new int[nnz[m]];
      value[m] = new double[nnz[m]];
 
      // Now we merely loop over the number of rows or columns
      for (unsigned long i_global = 0; i_global < ndof; i_global++)
      {
        // Start index for the present row
        row_or_column_start[m][i_global] = entry_count;
        // If there are no entries in the map then skip the rest of the loop
        if (matrix_data_map[m][i_global].empty())
        {
          continue;
        }
 
        // Loop over all the entries in the map corresponding to the given
        // row or column. It will be ordered
 
        for (std::map<unsigned, double>::iterator it =
               matrix_data_map[m][i_global].begin();
             it != matrix_data_map[m][i_global].end();
             ++it)
        {
          // The first value is the column or row index
          column_or_row_index[m][entry_count] = it->first;
          // The second value is the actual data value
          value[m][entry_count] = it->second;
          // Increase the value of the counter
          entry_count++;
        }
      }
 
      // Final entry in the row/column start vector
      row_or_column_start[m][ndof] = entry_count;
    } // End of the loop over the matrices
 
    if (Pause_at_end_of_sparse_assembly)
    {
      oomph_info << "Pausing at end of sparse assembly." << std::endl;
      pause("Check memory usage now.");
    }
  }
 
 
  //=====================================================================
  /// This is a (private) helper function that is used to assemble system
  /// matrices in compressed row or column format
  /// and compute residual vectors using lists
  /// The default action is to assemble the jacobian matrix and
  /// residuals for the Newton method. The action can be
  /// overloaded at an elemental level by chaging the default
  /// behaviour of the function Element::get_all_vectors_and_matrices().
  /// column_or_row_index: Column [or row] index of given entry
  /// row_or_column_start: Index of first entry for given row [or column]
  /// value              : Vector of nonzero entries
  /// residuals          : Residual vector
  /// compressed_row_flag: Bool flag to indicate if storage format is
  ///                      compressed row [if false interpretation of
  ///                      arguments is as stated in square brackets].
  //=====================================================================
  void Problem::sparse_assemble_row_or_column_compressed_with_lists(
    Vector<int*>& column_or_row_index,
    Vector<int*>& row_or_column_start,
    Vector<double*>& value,
    Vector<unsigned>& nnz,
    Vector<double*>& residuals,
    bool compressed_row_flag)
  {
    // Total number of elements
    const unsigned long n_elements = mesh_pt()->nelement();
 
    // Default range of elements for distributed problems
    unsigned long el_lo = 0;
    unsigned long el_hi = n_elements - 1;
 
#ifdef OOMPH_HAS_MPI
    // Otherwise just loop over a fraction of the elements
    // (This will either have been initialised in
    // Problem::set_default_first_and_last_element_for_assembly() or
    // will have been re-assigned during a previous assembly loop
    // Note that following the re-assignment only the entries
    // for the current processor are relevant.
    if (!Problem_has_been_distributed)
    {
      el_lo = First_el_for_assembly[Communicator_pt->my_rank()];
      el_hi = Last_el_plus_one_for_assembly[Communicator_pt->my_rank()] - 1;
    }
#endif
 
    // number of dofs
    unsigned ndof = this->ndof();
 
    // Find the number of vectors to be assembled
    const unsigned n_vector = residuals.size();
 
    // Find the number of matrices to be assembled
    const unsigned n_matrix = column_or_row_index.size();
 
    // Locally cache pointer to assembly handler
    AssemblyHandler* const assembly_handler_pt = Assembly_handler_pt;
 
#ifdef OOMPH_HAS_MPI
    bool doing_residuals = false;
    if (dynamic_cast<ParallelResidualsHandler*>(Assembly_handler_pt) != 0)
    {
      doing_residuals = true;
    }
#endif
 
// Error check dimensions
#ifdef PARANOID
    if (row_or_column_start.size() != n_matrix)
    {
      std::ostringstream error_stream;
      error_stream << "Error: " << std::endl
                   << "row_or_column_start.size() "
                   << row_or_column_start.size() << " does not equal "
                   << "column_or_row_index.size() "
                   << column_or_row_index.size() << std::endl;
      throw OomphLibError(
        error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
 
    if (value.size() != n_matrix)
    {
      std::ostringstream error_stream;
      error_stream
        << "Error in Problem::sparse_assemble_row_or_column_compressed "
        << std::endl
        << "value.size() " << value.size() << " does not equal "
        << "column_or_row_index.size() " << column_or_row_index.size()
        << std::endl
        << std::endl
        << std::endl;
      throw OomphLibError(
        error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
#endif
 
    // The idea behind this sparse assembly routine is to use a vector of
    // lists for the entries in each row or column of the complete matrix.
    // The lists contain pairs of entries (global row/column number, value).
    // All non-zero contributions from each element are added to the lists.
    // We then sort each list by global row/column number and then combine
    // the entries corresponding to each row/column before adding to the
    // vectors column_or_row_index and value.
 
    // Note the trade off for "fast assembly" is that we will require
    // more memory during the assembly phase. Then again, if we can
    // only just assemble the sparse matrix, we're in real trouble.
 
    // Set up a vector of lists of paired entries of
    //(row/column index, jacobian matrix entry).
    // The entries of the vector correspond to all the rows or columns.
    // The use of the list storage scheme, should give fast insertion
    // and fast sorts later.
    Vector<Vector<std::list<std::pair<unsigned, double>>>> matrix_data_list(
      n_matrix);
    // Loop over the number of matrices and resize
    for (unsigned m = 0; m < n_matrix; m++)
    {
      matrix_data_list[m].resize(ndof);
    }
 
    // Resize the residuals vectors
    for (unsigned v = 0; v < n_vector; v++)
    {
      residuals[v] = new double[ndof];
      for (unsigned i = 0; i < ndof; i++)
      {
        residuals[v][i] = 0;
      }
    }
 
#ifdef OOMPH_HAS_MPI
 
 
    // Storage for assembly time for elements
    double t_assemble_start = 0.0;
 
    // Storage for assembly times
    if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
    {
      Elemental_assembly_time.resize(n_elements);
    }
 
#endif
 
    //------------Assemble and populate the lists-----------------------
    {
      // Allocate local storage for the element's contribution to the
      // residuals vectors and system matrices of the size of the maximum
      // number of dofs in any element.
      // This means that the stored is only allocated (and deleted) once
      Vector<Vector<double>> el_residuals(n_vector);
      Vector<DenseMatrix<double>> el_jacobian(n_matrix);
 
 
      // Pointer to a single list to be used during the assembly
      std::list<std::pair<unsigned, double>>* list_pt;
 
      // Loop over the all elements
      for (unsigned long e = el_lo; e <= el_hi; e++)
      {
#ifdef OOMPH_HAS_MPI
        // Time it?
        if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
        {
          t_assemble_start = TimingHelpers::timer();
        }
#endif
 
        // Get the pointer to the element
        GeneralisedElement* elem_pt = mesh_pt()->element_pt(e);
 
#ifdef OOMPH_HAS_MPI
        // Ignore halo elements
        if (!elem_pt->is_halo())
        {
#endif
 
          // Find number of degrees of freedom in the element
          const unsigned nvar = assembly_handler_pt->ndof(elem_pt);
 
          // Resize the storage for the elemental jacobian and residuals
          for (unsigned v = 0; v < n_vector; v++)
          {
            el_residuals[v].resize(nvar);
          }
          for (unsigned m = 0; m < n_matrix; m++)
          {
            el_jacobian[m].resize(nvar);
          }
 
          // Now get the residuals and jacobian for the element
          assembly_handler_pt->get_all_vectors_and_matrices(
            elem_pt, el_residuals, el_jacobian);
 
          //---------------- Insert the values into the lists -----------
 
          // Loop over the first index of local variables
          for (unsigned i = 0; i < nvar; i++)
          {
            // Get the local equation number
            unsigned eqn_number = assembly_handler_pt->eqn_number(elem_pt, i);
 
            // Add the contribution to the residuals
            for (unsigned v = 0; v < n_vector; v++)
            {
              // Fill in the residuals vector
              residuals[v][eqn_number] += el_residuals[v][i];
            }
 
            // Now loop over the other index
            for (unsigned j = 0; j < nvar; j++)
            {
              // Get the number of the unknown
              unsigned unknown = assembly_handler_pt->eqn_number(elem_pt, j);
 
              // Loop over the matrices
              for (unsigned m = 0; m < n_matrix; m++)
              {
                // Get the value of the matrix at this point
                double value = el_jacobian[m](i, j);
                // Only add to theif it's non-zero
                if (std::fabs(value) > Numerical_zero_for_sparse_assembly)
                {
                  // If it's compressed row storage, then our vector is indexed
                  // by row (the equation number)
                  if (compressed_row_flag)
                  {
                    // Find the list that corresponds to the desired row
                    list_pt = &matrix_data_list[m][eqn_number];
                    // Insert the data into the list, the first entry
                    // in the pair is the unknown (column index),
                    // the second is the value itself.
                    list_pt->insert(list_pt->end(),
                                    std::make_pair(unknown, value));
                  }
                  // Otherwise it's compressed column storage, and our
                  // vector is indexed by column (the unknown)
                  else
                  {
                    // Find the list that corresponds to the desired column
                    list_pt = &matrix_data_list[m][unknown];
                    // Insert the data into the list, the first entry
                    // in the pair is the equation number (row index),
                    // the second is the value itself.
                    list_pt->insert(list_pt->end(),
                                    std::make_pair(eqn_number, value));
                  }
                }
              }
            }
          }
 
#ifdef OOMPH_HAS_MPI
        } // endif halo element
#endif
 
 
#ifdef OOMPH_HAS_MPI
        // Time it?
        if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
        {
          Elemental_assembly_time[e] =
            TimingHelpers::timer() - t_assemble_start;
        }
#endif
 
      } // End of loop over the elements
 
    } // list_pt goes out of scope
 
 
#ifdef OOMPH_HAS_MPI
 
    // Postprocess timing information and re-allocate distribution of
    // elements during subsequent assemblies.
    if ((!doing_residuals) && (!Problem_has_been_distributed) &&
        Must_recompute_load_balance_for_assembly)
    {
      recompute_load_balanced_assembly();
    }
 
    // We have determined load balancing for current setup.
    // This can remain the same until assign_eqn_numbers() is called
    // again -- the flag is re-set to true there.
    if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
    {
      Must_recompute_load_balance_for_assembly = false;
    }
 
#endif
 
 
    //----Finally we need to convert the beautiful list storage scheme---
    //----------to the containers required by SuperLU--------------------
 
    // Loop over the number of matrices
    for (unsigned m = 0; m < n_matrix; m++)
    {
      // Set the number of rows or columns
      row_or_column_start[m] = new int[ndof + 1];
      // Counter for the total number of entries in the storage scheme
      unsigned long entry_count = 0;
      // The first entry is 0
      row_or_column_start[m][0] = entry_count;
 
      // first we compute the number of non-zeros
      nnz[m] = 0;
      for (unsigned long i_global = 0; i_global < ndof; i_global++)
      {
        nnz[m] += matrix_data_list[m][i_global].size();
      }
 
      // and then resize the storage
      column_or_row_index[m] = new int[nnz[m]];
      value[m] = new double[nnz[m]];
 
      // Now we merely loop over the number of rows or columns
      for (unsigned long i_global = 0; i_global < ndof; i_global++)
      {
        // Start index for the present row is the number of entries so far
        row_or_column_start[m][i_global] = entry_count;
        // If there are no entries in the list then skip the loop
        if (matrix_data_list[m][i_global].empty())
        {
          continue;
        }
 
        // Sort the list corresponding to this row or column by the
        // column or row index (first entry in the pair).
        // This might be inefficient, but we only have to do the sort ONCE
        // for each list. This is faster than using a map storage scheme, where
        // we are sorting for every insertion (although the map structure
        // is cleaner and more memory efficient)
        matrix_data_list[m][i_global].sort();
 
        // Set up an iterator for start of the list
        std::list<std::pair<unsigned, double>>::iterator it =
          matrix_data_list[m][i_global].begin();
 
        // Get the first row or column index in the list...
        unsigned current_index = it->first;
        //...and the corresponding value
        double current_value = it->second;
 
        // Loop over all the entries in the sorted list
        // Increase the iterator so that we start at the second entry
        for (++it; it != matrix_data_list[m][i_global].end(); ++it)
        {
          // If the index has not changed, then we must add the contribution
          // of the present entry to the value.
          // Additionally check that the entry is non-zero
          if ((it->first == current_index) &&
              (std::fabs(it->second) > Numerical_zero_for_sparse_assembly))
          {
            current_value += it->second;
          }
          // Otherwise, we have added all the contributions to the index
          // to current_value, so add it to the SuperLU data structure
          else
          {
            // Add the row or column index to the vector
            column_or_row_index[m][entry_count] = current_index;
            // Add the actual value to the vector
            value[m][entry_count] = current_value;
            // Increase the counter for the number of entries in each vector
            entry_count++;
 
            // Set the index and value to be those of the current entry in the
            // list
            current_index = it->first;
            current_value = it->second;
          }
        } // End of loop over all list entries for this global row or column
 
        // There are TWO special cases to consider.
        // If there is only one equation number in the list, then it
        // will NOT have been added. We test this case by comparing the
        // number of entries with those stored in row_or_column_start[i_global]
        // Otherwise
        // If the final entry in the list has the same index as the penultimate
        // entry, then it will NOT have been added to the SuperLU storage scheme
        // Check this by comparing the current_index with the final index
        // stored in the SuperLU scheme. If they are not the same, then
        // add the current_index and value.
 
        // If single equation number in list
        if ((static_cast<int>(entry_count) == row_or_column_start[m][i_global])
            // If we have a single equation number, this will not be evaluated.
            // If we don't then we do the test to check that the final
            // entry is added
            || (static_cast<int>(current_index) !=
                column_or_row_index[m][entry_count - 1]))
        {
          // Add the row or column index to the vector
          column_or_row_index[m][entry_count] = current_index;
          // Add the actual value to the vector
          value[m][entry_count] = current_value;
          // Increase the counter for the number of entries in each vector
          entry_count++;
        }
 
      } // End of loop over the rows or columns of the entire matrix
 
      // Final entry in the row/column start vector
      row_or_column_start[m][ndof] = entry_count;
    } // End of loop over matrices
 
    if (Pause_at_end_of_sparse_assembly)
    {
      oomph_info << "Pausing at end of sparse assembly." << std::endl;
      pause("Check memory usage now.");
    }
  }
 
 
  //=====================================================================
  /// This is a (private) helper function that is used to assemble system
  /// matrices in compressed row or column format
  /// and compute residual vectors using vectors of pairs
  /// The default action is to assemble the jacobian matrix and
  /// residuals for the Newton method. The action can be
  /// overloaded at an elemental level by chaging the default
  /// behaviour of the function Element::get_all_vectors_and_matrices().
  /// column_or_row_index: Column [or row] index of given entry
  /// row_or_column_start: Index of first entry for given row [or column]
  /// value              : Vector of nonzero entries
  /// residuals          : Residual vector
  /// compressed_row_flag: Bool flag to indicate if storage format is
  ///                      compressed row [if false interpretation of
  ///                      arguments is as stated in square brackets].
  //=====================================================================
  void Problem::sparse_assemble_row_or_column_compressed_with_vectors_of_pairs(
    Vector<int*>& column_or_row_index,
    Vector<int*>& row_or_column_start,
    Vector<double*>& value,
    Vector<unsigned>& nnz,
    Vector<double*>& residuals,
    bool compressed_row_flag)
  {
    // Total number of elements
    const unsigned long n_elements = mesh_pt()->nelement();
 
    // Default range of elements for distributed problems
    unsigned long el_lo = 0;
    unsigned long el_hi = n_elements - 1;
 
#ifdef OOMPH_HAS_MPI
    // Otherwise just loop over a fraction of the elements
    // (This will either have been initialised in
    // Problem::set_default_first_and_last_element_for_assembly() or
    // will have been re-assigned during a previous assembly loop
    // Note that following the re-assignment only the entries
    // for the current processor are relevant.
    if (!Problem_has_been_distributed)
    {
      el_lo = First_el_for_assembly[Communicator_pt->my_rank()];
      el_hi = Last_el_plus_one_for_assembly[Communicator_pt->my_rank()] - 1;
    }
#endif
 
    // number of local eqns
    unsigned ndof = this->ndof();
 
    // Find the number of vectors to be assembled
    const unsigned n_vector = residuals.size();
 
    // Find the number of matrices to be assembled
    const unsigned n_matrix = column_or_row_index.size();
 
    // Locally cache pointer to assembly handler
    AssemblyHandler* const assembly_handler_pt = Assembly_handler_pt;
 
#ifdef OOMPH_HAS_MPI
    bool doing_residuals = false;
    if (dynamic_cast<ParallelResidualsHandler*>(Assembly_handler_pt) != 0)
    {
      doing_residuals = true;
    }
#endif
 
// Error check dimensions
#ifdef PARANOID
    if (row_or_column_start.size() != n_matrix)
    {
      std::ostringstream error_stream;
      error_stream << "Error: " << std::endl
                   << "row_or_column_start.size() "
                   << row_or_column_start.size() << " does not equal "
                   << "column_or_row_index.size() "
                   << column_or_row_index.size() << std::endl;
      throw OomphLibError(
        error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
 
    if (value.size() != n_matrix)
    {
      std::ostringstream error_stream;
      error_stream << "Error: " << std::endl
                   << "value.size() " << value.size() << " does not equal "
                   << "column_or_row_index.size() "
                   << column_or_row_index.size() << std::endl
                   << std::endl
                   << std::endl;
      throw OomphLibError(
        error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
#endif
 
 
    // The idea behind this sparse assembly routine is to use a Vector of
    // Vectors of pairs for each complete matrix.
    // Each inner Vector stores pairs and holds the row (or column) index
    // and the value of the matrix entry.
 
    // Set up Vector of Vectors to store the entries of each matrix,
    // indexed by either the column or row.
    Vector<Vector<Vector<std::pair<unsigned, double>>>> matrix_data(n_matrix);
 
    // Loop over the number of matrices being assembled and resize
    // each Vector of Vectors to the number of rows or columns of the matrix
    for (unsigned m = 0; m < n_matrix; m++)
    {
      matrix_data[m].resize(ndof);
    }
 
    // Resize the residuals vectors
    for (unsigned v = 0; v < n_vector; v++)
    {
      residuals[v] = new double[ndof];
      for (unsigned i = 0; i < ndof; i++)
      {
        residuals[v][i] = 0;
      }
    }
 
#ifdef OOMPH_HAS_MPI
 
    // Storage for assembly time for elements
    double t_assemble_start = 0.0;
 
    // Storage for assembly times
    if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
    {
      Elemental_assembly_time.resize(n_elements);
    }
 
#endif
 
    //----------------Assemble and populate the vector storage scheme--------
    {
      // Allocate local storage for the element's contribution to the
      // residuals vectors and system matrices of the size of the maximum
      // number of dofs in any element
      // This means that the storage is only allocated (and deleted) once
      Vector<Vector<double>> el_residuals(n_vector);
      Vector<DenseMatrix<double>> el_jacobian(n_matrix);
 
      // Loop over the elements
      for (unsigned long e = el_lo; e <= el_hi; e++)
      {
#ifdef OOMPH_HAS_MPI
        // Time it?
        if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
        {
          t_assemble_start = TimingHelpers::timer();
        }
#endif
 
        // Get the pointer to the element
        GeneralisedElement* elem_pt = mesh_pt()->element_pt(e);
 
#ifdef OOMPH_HAS_MPI
        // Ignore halo elements
        if (!elem_pt->is_halo())
        {
#endif
 
          // Find number of degrees of freedom in the element
          const unsigned nvar = assembly_handler_pt->ndof(elem_pt);
 
          // Resize the storage for elemental jacobian and residuals
          for (unsigned v = 0; v < n_vector; v++)
          {
            el_residuals[v].resize(nvar);
          }
          for (unsigned m = 0; m < n_matrix; m++)
          {
            el_jacobian[m].resize(nvar);
          }
 
          // Now get the residuals and jacobian for the element
          assembly_handler_pt->get_all_vectors_and_matrices(
            elem_pt, el_residuals, el_jacobian);
 
          //---------------Insert the values into the vectors--------------
 
          // Loop over the first index of local variables
          for (unsigned i = 0; i < nvar; i++)
          {
            // Get the local equation number
            unsigned eqn_number = assembly_handler_pt->eqn_number(elem_pt, i);
 
            // Add the contribution to the residuals
            for (unsigned v = 0; v < n_vector; v++)
            {
              // Fill in each residuals vector
              residuals[v][eqn_number] += el_residuals[v][i];
            }
 
            // Now loop over the other index
            for (unsigned j = 0; j < nvar; j++)
            {
              // Get the number of the unknown
              unsigned unknown = assembly_handler_pt->eqn_number(elem_pt, j);
 
              // Loop over the matrices
              // If it's compressed row storage, then our vector of maps
              // is indexed by row (equation number)
              for (unsigned m = 0; m < n_matrix; m++)
              {
                // Get the value of the matrix at this point
                double value = el_jacobian[m](i, j);
                // Only bother to add to the vector if it's non-zero
                if (std::fabs(value) > Numerical_zero_for_sparse_assembly)
                {
                  // If it's compressed row storage, then our vector of maps
                  // is indexed by row (equation number)
                  if (compressed_row_flag)
                  {
                    // Find the correct position and add the data into the
                    // vectors
                    const unsigned size = matrix_data[m][eqn_number].size();
                    for (unsigned k = 0; k <= size; k++)
                    {
                      if (k == size)
                      {
                        matrix_data[m][eqn_number].push_back(
                          std::make_pair(unknown, value));
                        break;
                      }
                      else if (matrix_data[m][eqn_number][k].first == unknown)
                      {
                        matrix_data[m][eqn_number][k].second += value;
                        break;
                      }
                    }
                  }
                  // Otherwise it's compressed column storage and our vector is
                  // indexed by column (the unknown)
                  else
                  {
                    // Add the data into the vectors in the correct position
                    const unsigned size = matrix_data[m][unknown].size();
                    for (unsigned k = 0; k <= size; k++)
                    {
                      if (k == size)
                      {
                        matrix_data[m][unknown].push_back(
                          std::make_pair(eqn_number, value));
                        break;
                      }
                      else if (matrix_data[m][unknown][k].first == eqn_number)
                      {
                        matrix_data[m][unknown][k].second += value;
                        break;
                      }
                    }
                  }
                }
              } // End of loop over matrices
            }
          }
 
#ifdef OOMPH_HAS_MPI
        } // endif halo element
#endif
 
 
#ifdef OOMPH_HAS_MPI
        // Time it?
        if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
        {
          Elemental_assembly_time[e] =
            TimingHelpers::timer() - t_assemble_start;
        }
#endif
 
      } // End of loop over the elements
 
 
    } // End of vector assembly
 
 
#ifdef OOMPH_HAS_MPI
 
    // Postprocess timing information and re-allocate distribution of
    // elements during subsequent assemblies.
    if ((!doing_residuals) && (!Problem_has_been_distributed) &&
        Must_recompute_load_balance_for_assembly)
    {
      recompute_load_balanced_assembly();
    }
 
    // We have determined load balancing for current setup.
    // This can remain the same until assign_eqn_numbers() is called
    // again -- the flag is re-set to true there.
    if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
    {
      Must_recompute_load_balance_for_assembly = false;
    }
 
#endif
 
 
    //-----------Finally we need to convert this vector storage scheme
    //------------------------to the containers required by SuperLU
 
    // Loop over the number of matrices
    for (unsigned m = 0; m < n_matrix; m++)
    {
      // Set the number of rows or columns
      row_or_column_start[m] = new int[ndof + 1];
 
      // fill row_or_column_start and find the number of entries
      row_or_column_start[m][0] = 0;
      for (unsigned long i = 0; i < ndof; i++)
      {
        row_or_column_start[m][i + 1] =
          row_or_column_start[m][i] + matrix_data[m][i].size();
      }
      const unsigned entries = row_or_column_start[m][ndof];
 
      // resize vectors
      column_or_row_index[m] = new int[entries];
      value[m] = new double[entries];
      nnz[m] = entries;
 
      // Now we merely loop over the number of rows or columns
      for (unsigned long i_global = 0; i_global < ndof; i_global++)
      {
        // If there are no entries in the vector then skip the rest of the loop
        if (matrix_data[m][i_global].empty())
        {
          continue;
        }
 
        // Loop over all the entries in the vectors corresponding to the given
        // row or column. It will NOT be ordered
        unsigned p = 0;
        for (int j = row_or_column_start[m][i_global];
             j < row_or_column_start[m][i_global + 1];
             j++)
        {
          column_or_row_index[m][j] = matrix_data[m][i_global][p].first;
          value[m][j] = matrix_data[m][i_global][p].second;
          ++p;
        }
      }
    } // End of the loop over the matrices
 
    if (Pause_at_end_of_sparse_assembly)
    {
      oomph_info << "Pausing at end of sparse assembly." << std::endl;
      pause("Check memory usage now.");
    }
  }
 
 
  //=====================================================================
  /// This is a (private) helper function that is used to assemble system
  /// matrices in compressed row or column format
  /// and compute residual vectors using two vectors.
  /// The default action is to assemble the jacobian matrix and
  /// residuals for the Newton method. The action can be
  /// overloaded at an elemental level by chaging the default
  /// behaviour of the function Element::get_all_vectors_and_matrices().
  /// column_or_row_index: Column [or row] index of given entry
  /// row_or_column_start: Index of first entry for given row [or column]
  /// value              : Vector of nonzero entries
  /// residuals          : Residual vector
  /// compressed_row_flag: Bool flag to indicate if storage format is
  ///                      compressed row [if false interpretation of
  ///                      arguments is as stated in square brackets].
  //=====================================================================
  void Problem::sparse_assemble_row_or_column_compressed_with_two_vectors(
    Vector<int*>& column_or_row_index,
    Vector<int*>& row_or_column_start,
    Vector<double*>& value,
    Vector<unsigned>& nnz,
    Vector<double*>& residuals,
    bool compressed_row_flag)
  {
    // Total number of elements
    const unsigned long n_elements = mesh_pt()->nelement();
 
    // Default range of elements for distributed problems
    unsigned long el_lo = 0;
    unsigned long el_hi = n_elements - 1;
 
 
#ifdef OOMPH_HAS_MPI
    // Otherwise just loop over a fraction of the elements
    // (This will either have been initialised in
    // Problem::set_default_first_and_last_element_for_assembly() or
    // will have been re-assigned during a previous assembly loop
    // Note that following the re-assignment only the entries
    // for the current processor are relevant.
    if (!Problem_has_been_distributed)
    {
      el_lo = First_el_for_assembly[Communicator_pt->my_rank()];
      el_hi = Last_el_plus_one_for_assembly[Communicator_pt->my_rank()] - 1;
    }
#endif
 
    // number of local eqns
    unsigned ndof = this->ndof();
 
    // Find the number of vectors to be assembled
    const unsigned n_vector = residuals.size();
 
    // Find the number of matrices to be assembled
    const unsigned n_matrix = column_or_row_index.size();
 
    // Locally cache pointer to assembly handler
    AssemblyHandler* const assembly_handler_pt = Assembly_handler_pt;
 
#ifdef OOMPH_HAS_MPI
    bool doing_residuals = false;
    if (dynamic_cast<ParallelResidualsHandler*>(Assembly_handler_pt) != 0)
    {
      doing_residuals = true;
    }
#endif
 
// Error check dimensions
#ifdef PARANOID
    if (row_or_column_start.size() != n_matrix)
    {
      std::ostringstream error_stream;
      error_stream << "Error: " << std::endl
                   << "row_or_column_start.size() "
                   << row_or_column_start.size() << " does not equal "
                   << "column_or_row_index.size() "
                   << column_or_row_index.size() << std::endl;
      throw OomphLibError(
        error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
 
    if (value.size() != n_matrix)
    {
      std::ostringstream error_stream;
      error_stream << "Error: " << std::endl
                   << "value.size() " << value.size() << " does not equal "
                   << "column_or_row_index.size() "
                   << column_or_row_index.size() << std::endl
                   << std::endl
                   << std::endl;
      throw OomphLibError(
        error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
#endif
 
    // The idea behind this sparse assembly routine is to use Vectors of
    // Vectors for the entries in each complete matrix. And a second
    // Vector of Vectors stores the global row (or column) indeces. This
    // will not have the memory overheads associated with the methods using
    // lists or maps, but insertion will be more costly.
 
    // Set up two vector of vectors to store the entries of each  matrix,
    // indexed by either the column or row. The entries of the vector for
    // each matrix correspond to all the rows or columns of that matrix.
    Vector<Vector<Vector<unsigned>>> matrix_row_or_col_indices(n_matrix);
    Vector<Vector<Vector<double>>> matrix_values(n_matrix);
 
    // Loop over the number of matrices being assembled and resize
    // each vector of vectors to the number of rows or columns of the matrix
    for (unsigned m = 0; m < n_matrix; m++)
    {
      matrix_row_or_col_indices[m].resize(ndof);
      matrix_values[m].resize(ndof);
    }
 
    // Resize the residuals vectors
    for (unsigned v = 0; v < n_vector; v++)
    {
      residuals[v] = new double[ndof];
      for (unsigned i = 0; i < ndof; i++)
      {
        residuals[v][i] = 0;
      }
    }
 
#ifdef OOMPH_HAS_MPI
 
    // Storage for assembly time for elements
    double t_assemble_start = 0.0;
 
    // Storage for assembly times
    if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
    {
      Elemental_assembly_time.resize(n_elements);
    }
 
#endif
 
 
    //----------------Assemble and populate the vector storage scheme-------
    {
      // Allocate local storage for the element's contribution to the
      // residuals vectors and system matrices of the size of the maximum
      // number of dofs in any element
      // This means that the storage will only be allocated (and deleted) once
      Vector<Vector<double>> el_residuals(n_vector);
      Vector<DenseMatrix<double>> el_jacobian(n_matrix);
 
      // Loop over the elements
      for (unsigned long e = el_lo; e <= el_hi; e++)
      {
#ifdef OOMPH_HAS_MPI
        // Time it?
        if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
        {
          t_assemble_start = TimingHelpers::timer();
        }
#endif
 
        // Get the pointer to the element
        GeneralisedElement* elem_pt = mesh_pt()->element_pt(e);
 
#ifdef OOMPH_HAS_MPI
        // Ignore halo elements
        if (!elem_pt->is_halo())
        {
#endif
 
          // Find number of degrees of freedom in the element
          const unsigned nvar = assembly_handler_pt->ndof(elem_pt);
 
          // Resize the storage for elemental jacobian and residuals
          for (unsigned v = 0; v < n_vector; v++)
          {
            el_residuals[v].resize(nvar);
          }
          for (unsigned m = 0; m < n_matrix; m++)
          {
            el_jacobian[m].resize(nvar);
          }
 
          // Now get the residuals and jacobian for the element
          assembly_handler_pt->get_all_vectors_and_matrices(
            elem_pt, el_residuals, el_jacobian);
 
          //---------------Insert the values into the vectors--------------
 
          // Loop over the first index of local variables
          for (unsigned i = 0; i < nvar; i++)
          {
            // Get the local equation number
            unsigned eqn_number = assembly_handler_pt->eqn_number(elem_pt, i);
 
            // Add the contribution to the residuals
            for (unsigned v = 0; v < n_vector; v++)
            {
              // Fill in each residuals vector
              residuals[v][eqn_number] += el_residuals[v][i];
            }
 
            // Now loop over the other index
            for (unsigned j = 0; j < nvar; j++)
            {
              // Get the number of the unknown
              unsigned unknown = assembly_handler_pt->eqn_number(elem_pt, j);
 
              // Loop over the matrices
              // If it's compressed row storage, then our vector of maps
              // is indexed by row (equation number)
              for (unsigned m = 0; m < n_matrix; m++)
              {
                // Get the value of the matrix at this point
                double value = el_jacobian[m](i, j);
                // Only bother to add to the vector if it's non-zero
                if (std::fabs(value) > Numerical_zero_for_sparse_assembly)
                {
                  // If it's compressed row storage, then our vector of maps
                  // is indexed by row (equation number)
                  if (compressed_row_flag)
                  {
                    // Find the correct position and add the data into the
                    // vectors
                    const unsigned size =
                      matrix_row_or_col_indices[m][eqn_number].size();
 
                    for (unsigned k = 0; k <= size; k++)
                    {
                      if (k == size)
                      {
                        matrix_row_or_col_indices[m][eqn_number].push_back(
                          unknown);
                        matrix_values[m][eqn_number].push_back(value);
                        break;
                      }
                      else if (matrix_row_or_col_indices[m][eqn_number][k] ==
                               unknown)
                      {
                        matrix_values[m][eqn_number][k] += value;
                        break;
                      }
                    }
                  }
                  // Otherwise it's compressed column storage and our vector is
                  // indexed by column (the unknown)
                  else
                  {
                    // Add the data into the vectors in the correct position
                    const unsigned size =
                      matrix_row_or_col_indices[m][unknown].size();
                    for (unsigned k = 0; k <= size; k++)
                    {
                      if (k == size)
                      {
                        matrix_row_or_col_indices[m][unknown].push_back(
                          eqn_number);
                        matrix_values[m][unknown].push_back(value);
                        break;
                      }
                      else if (matrix_row_or_col_indices[m][unknown][k] ==
                               eqn_number)
                      {
                        matrix_values[m][unknown][k] += value;
                        break;
                      }
                    }
                  }
                }
              } // End of loop over matrices
            }
          }
 
#ifdef OOMPH_HAS_MPI
        } // endif halo element
#endif
 
 
#ifdef OOMPH_HAS_MPI
        // Time it?
        if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
        {
          Elemental_assembly_time[e] =
            TimingHelpers::timer() - t_assemble_start;
        }
#endif
 
      } // End of loop over the elements
 
    } // End of vector assembly
 
 
#ifdef OOMPH_HAS_MPI
 
    // Postprocess timing information and re-allocate distribution of
    // elements during subsequent assemblies.
    if ((!doing_residuals) && (!Problem_has_been_distributed) &&
        Must_recompute_load_balance_for_assembly)
    {
      recompute_load_balanced_assembly();
    }
 
    // We have determined load balancing for current setup.
    // This can remain the same until assign_eqn_numbers() is called
    // again -- the flag is re-set to true there.
    if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
    {
      Must_recompute_load_balance_for_assembly = false;
    }
 
#endif
 
    //-----------Finally we need to convert this lousy vector storage scheme
    //------------------------to the containers required by SuperLU
 
    // Loop over the number of matrices
    for (unsigned m = 0; m < n_matrix; m++)
    {
      // Set the number of rows or columns
      row_or_column_start[m] = new int[ndof + 1];
 
      // fill row_or_column_start and find the number of entries
      row_or_column_start[m][0] = 0;
      for (unsigned long i = 0; i < ndof; i++)
      {
        row_or_column_start[m][i + 1] =
          row_or_column_start[m][i] + matrix_values[m][i].size();
      }
      const unsigned entries = row_or_column_start[m][ndof];
 
      // resize vectors
      column_or_row_index[m] = new int[entries];
      value[m] = new double[entries];
      nnz[m] = entries;
 
      // Now we merely loop over the number of rows or columns
      for (unsigned long i_global = 0; i_global < ndof; i_global++)
      {
        // If there are no entries in the vector then skip the rest of the loop
        if (matrix_values[m][i_global].empty())
        {
          continue;
        }
 
        // Loop over all the entries in the vectors corresponding to the given
        // row or column. It will NOT be ordered
        unsigned p = 0;
        for (int j = row_or_column_start[m][i_global];
             j < row_or_column_start[m][i_global + 1];
             j++)
        {
          column_or_row_index[m][j] = matrix_row_or_col_indices[m][i_global][p];
          value[m][j] = matrix_values[m][i_global][p];
          ++p;
        }
      }
    } // End of the loop over the matrices
 
    if (Pause_at_end_of_sparse_assembly)
    {
      oomph_info << "Pausing at end of sparse assembly." << std::endl;
      pause("Check memory usage now.");
    }
  }
 
 
  //=====================================================================
  /// This is a (private) helper function that is used to assemble system
  /// matrices in compressed row or column format
  /// and compute residual vectors using two vectors.
  /// The default action is to assemble the jacobian matrix and
  /// residuals for the Newton method. The action can be
  /// overloaded at an elemental level by chaging the default
  /// behaviour of the function Element::get_all_vectors_and_matrices().
  /// column_or_row_index: Column [or row] index of given entry
  /// row_or_column_start: Index of first entry for given row [or column]
  /// value              : Vector of nonzero entries
  /// residuals          : Residual vector
  /// compressed_row_flag: Bool flag to indicate if storage format is
  ///                      compressed row [if false interpretation of
  ///                      arguments is as stated in square brackets].
  //=====================================================================
  void Problem::sparse_assemble_row_or_column_compressed_with_two_arrays(
    Vector<int*>& column_or_row_index,
    Vector<int*>& row_or_column_start,
    Vector<double*>& value,
    Vector<unsigned>& nnz,
    Vector<double*>& residuals,
    bool compressed_row_flag)
  {
    // Total number of elements
    const unsigned long n_elements = mesh_pt()->nelement();
 
    // Default range of elements for distributed problems
    unsigned long el_lo = 0;
    unsigned long el_hi = n_elements - 1;
 
 
#ifdef OOMPH_HAS_MPI
    // Otherwise just loop over a fraction of the elements
    // (This will either have been initialised in
    // Problem::set_default_first_and_last_element_for_assembly() or
    // will have been re-assigned during a previous assembly loop
    // Note that following the re-assignment only the entries
    // for the current processor are relevant.
    if (!Problem_has_been_distributed)
    {
      el_lo = First_el_for_assembly[Communicator_pt->my_rank()];
      el_hi = Last_el_plus_one_for_assembly[Communicator_pt->my_rank()] - 1;
    }
#endif
 
    // number of local eqns
    unsigned ndof = this->ndof();
 
    // Find the number of vectors to be assembled
    const unsigned n_vector = residuals.size();
 
    // Find the number of matrices to be assembled
    const unsigned n_matrix = column_or_row_index.size();
 
    // Locally cache pointer to assembly handler
    AssemblyHandler* const assembly_handler_pt = Assembly_handler_pt;
 
#ifdef OOMPH_HAS_MPI
    bool doing_residuals = false;
    if (dynamic_cast<ParallelResidualsHandler*>(Assembly_handler_pt) != 0)
    {
      doing_residuals = true;
    }
#endif
 
// Error check dimensions
#ifdef PARANOID
    if (row_or_column_start.size() != n_matrix)
    {
      std::ostringstream error_stream;
      error_stream << "Error: " << std::endl
                   << "row_or_column_start.size() "
                   << row_or_column_start.size() << " does not equal "
                   << "column_or_row_index.size() "
                   << column_or_row_index.size() << std::endl;
      throw OomphLibError(
        error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
 
    if (value.size() != n_matrix)
    {
      std::ostringstream error_stream;
      error_stream << "Error: " << std::endl
                   << "value.size() " << value.size() << " does not equal "
                   << "column_or_row_index.size() "
                   << column_or_row_index.size() << std::endl
                   << std::endl
                   << std::endl;
      throw OomphLibError(
        error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
#endif
 
    // The idea behind this sparse assembly routine is to use Vectors of
    // Vectors for the entries in each complete matrix. And a second
    // Vector of Vectors stores the global row (or column) indeces. This
    // will not have the memory overheads associated with the methods using
    // lists or maps, but insertion will be more costly.
 
    // Set up two vector of vectors to store the entries of each  matrix,
    // indexed by either the column or row. The entries of the vector for
    // each matrix correspond to all the rows or columns of that matrix.
    Vector<unsigned**> matrix_row_or_col_indices(n_matrix);
    Vector<double**> matrix_values(n_matrix);
 
    // Loop over the number of matrices being assembled and resize
    // each vector of vectors to the number of rows or columns of the matrix
    for (unsigned m = 0; m < n_matrix; m++)
    {
      matrix_row_or_col_indices[m] = new unsigned*[ndof];
      matrix_values[m] = new double*[ndof];
    }
 
    // Resize the residuals vectors
    for (unsigned v = 0; v < n_vector; v++)
    {
      residuals[v] = new double[ndof];
      for (unsigned i = 0; i < ndof; i++)
      {
        residuals[v][i] = 0;
      }
    }
 
#ifdef OOMPH_HAS_MPI
 
    // Storage for assembly time for elements
    double t_assemble_start = 0.0;
 
    // Storage for assembly times
    if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
    {
      Elemental_assembly_time.resize(n_elements);
    }
 
#endif
 
    // number of coefficients in each row
    Vector<Vector<unsigned>> ncoef(n_matrix);
    for (unsigned m = 0; m < n_matrix; m++)
    {
      ncoef[m].resize(ndof, 0);
    }
 
    if (Sparse_assemble_with_arrays_previous_allocation.size() == 0)
    {
      Sparse_assemble_with_arrays_previous_allocation.resize(n_matrix);
      for (unsigned m = 0; m < n_matrix; m++)
      {
        Sparse_assemble_with_arrays_previous_allocation[m].resize(ndof, 0);
      }
    }
 
    //----------------Assemble and populate the vector storage scheme-------
    {
      // Allocate local storage for the element's contribution to the
      // residuals vectors and system matrices of the size of the maximum
      // number of dofs in any element
      // This means that the storage will only be allocated (and deleted) once
      Vector<Vector<double>> el_residuals(n_vector);
      Vector<DenseMatrix<double>> el_jacobian(n_matrix);
 
      // Loop over the elements
      for (unsigned long e = el_lo; e <= el_hi; e++)
      {
#ifdef OOMPH_HAS_MPI
        // Time it?
        if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
        {
          t_assemble_start = TimingHelpers::timer();
        }
#endif
 
        // Get the pointer to the element
        GeneralisedElement* elem_pt = mesh_pt()->element_pt(e);
 
#ifdef OOMPH_HAS_MPI
        // Ignore halo elements
        if (!elem_pt->is_halo())
        {
#endif
 
          // Find number of degrees of freedom in the element
          const unsigned nvar = assembly_handler_pt->ndof(elem_pt);
 
          // Resize the storage for elemental jacobian and residuals
          for (unsigned v = 0; v < n_vector; v++)
          {
            el_residuals[v].resize(nvar);
          }
          for (unsigned m = 0; m < n_matrix; m++)
          {
            el_jacobian[m].resize(nvar);
          }
 
          // Now get the residuals and jacobian for the element
          assembly_handler_pt->get_all_vectors_and_matrices(
            elem_pt, el_residuals, el_jacobian);
 
          //---------------Insert the values into the vectors--------------
 
          // Loop over the first index of local variables
          for (unsigned i = 0; i < nvar; i++)
          {
            // Get the local equation number
            unsigned eqn_number = assembly_handler_pt->eqn_number(elem_pt, i);
 
            // Add the contribution to the residuals
            for (unsigned v = 0; v < n_vector; v++)
            {
              // Fill in each residuals vector
              residuals[v][eqn_number] += el_residuals[v][i];
            }
 
            // Now loop over the other index
            for (unsigned j = 0; j < nvar; j++)
            {
              // Get the number of the unknown
              unsigned unknown = assembly_handler_pt->eqn_number(elem_pt, j);
 
              // Loop over the matrices
              // If it's compressed row storage, then our vector of maps
              // is indexed by row (equation number)
              for (unsigned m = 0; m < n_matrix; m++)
              {
                // Get the value of the matrix at this point
                double value = el_jacobian[m](i, j);
                // Only bother to add to the vector if it's non-zero
                if (std::fabs(value) > Numerical_zero_for_sparse_assembly)
                {
                  // number of entrys in this row
                  const unsigned size = ncoef[m][eqn_number];
 
                  // if no data has been allocated for this row then allocate
                  if (size == 0)
                  {
                    // do we have previous allocation data
                    if (Sparse_assemble_with_arrays_previous_allocation
                          [m][eqn_number] != 0)
                    {
                      matrix_row_or_col_indices[m][eqn_number] = new unsigned
                        [Sparse_assemble_with_arrays_previous_allocation
                           [m][eqn_number]];
                      matrix_values[m][eqn_number] = new double
                        [Sparse_assemble_with_arrays_previous_allocation
                           [m][eqn_number]];
                    }
                    else
                    {
                      matrix_row_or_col_indices[m][eqn_number] = new unsigned
                        [Sparse_assemble_with_arrays_initial_allocation];
                      matrix_values[m][eqn_number] = new double
                        [Sparse_assemble_with_arrays_initial_allocation];
                      Sparse_assemble_with_arrays_previous_allocation
                        [m][eqn_number] =
                          Sparse_assemble_with_arrays_initial_allocation;
                    }
                  }
 
                  // If it's compressed row storage, then our vector of maps
                  // is indexed by row (equation number)
                  if (compressed_row_flag)
                  {
                    // next add the data
                    for (unsigned k = 0; k <= size; k++)
                    {
                      if (k == size)
                      {
                        // do we need to allocate more storage
                        if (Sparse_assemble_with_arrays_previous_allocation
                              [m][eqn_number] == ncoef[m][eqn_number])
                        {
                          unsigned new_allocation =
                            ncoef[m][eqn_number] +
                            Sparse_assemble_with_arrays_allocation_increment;
                          double* new_values = new double[new_allocation];
                          unsigned* new_indices = new unsigned[new_allocation];
                          for (unsigned c = 0; c < ncoef[m][eqn_number]; c++)
                          {
                            new_values[c] = matrix_values[m][eqn_number][c];
                            new_indices[c] =
                              matrix_row_or_col_indices[m][eqn_number][c];
                          }
                          delete[] matrix_values[m][eqn_number];
                          delete[] matrix_row_or_col_indices[m][eqn_number];
                          matrix_values[m][eqn_number] = new_values;
                          matrix_row_or_col_indices[m][eqn_number] =
                            new_indices;
                          Sparse_assemble_with_arrays_previous_allocation
                            [m][eqn_number] = new_allocation;
                        }
                        // and now add the data
                        unsigned entry = ncoef[m][eqn_number];
                        ncoef[m][eqn_number]++;
                        matrix_row_or_col_indices[m][eqn_number][entry] =
                          unknown;
                        matrix_values[m][eqn_number][entry] = value;
                        break;
                      }
                      else if (matrix_row_or_col_indices[m][eqn_number][k] ==
                               unknown)
                      {
                        matrix_values[m][eqn_number][k] += value;
                        break;
                      }
                    }
                  }
                  // Otherwise it's compressed column storage and our vector is
                  // indexed by column (the unknown)
                  else
                  {
                    // Add the data into the vectors in the correct position
                    for (unsigned k = 0; k <= size; k++)
                    {
                      if (k == size)
                      {
                        // do we need to allocate more storage
                        if (Sparse_assemble_with_arrays_previous_allocation
                              [m][unknown] == ncoef[m][unknown])
                        {
                          unsigned new_allocation =
                            ncoef[m][unknown] +
                            Sparse_assemble_with_arrays_allocation_increment;
                          double* new_values = new double[new_allocation];
                          unsigned* new_indices = new unsigned[new_allocation];
                          for (unsigned c = 0; c < ncoef[m][unknown]; c++)
                          {
                            new_values[c] = matrix_values[m][unknown][c];
                            new_indices[c] =
                              matrix_row_or_col_indices[m][unknown][c];
                          }
                          delete[] matrix_values[m][unknown];
                          delete[] matrix_row_or_col_indices[m][unknown];
                          Sparse_assemble_with_arrays_previous_allocation
                            [m][unknown] = new_allocation;
                        }
                        // and now add the data
                        unsigned entry = ncoef[m][unknown];
                        ncoef[m][unknown]++;
                        matrix_row_or_col_indices[m][unknown][entry] =
                          eqn_number;
                        matrix_values[m][unknown][entry] = value;
                        break;
                      }
                      else if (matrix_row_or_col_indices[m][unknown][k] ==
                               eqn_number)
                      {
                        matrix_values[m][unknown][k] += value;
                        break;
                      }
                    }
                  }
                }
              } // End of loop over matrices
            }
          }
 
#ifdef OOMPH_HAS_MPI
        } // endif halo element
#endif
 
 
#ifdef OOMPH_HAS_MPI
        // Time it?
        if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
        {
          Elemental_assembly_time[e] =
            TimingHelpers::timer() - t_assemble_start;
        }
#endif
 
      } // End of loop over the elements
 
    } // End of vector assembly
 
 
#ifdef OOMPH_HAS_MPI
 
    // Postprocess timing information and re-allocate distribution of
    // elements during subsequent assemblies.
    if ((!doing_residuals) && (!Problem_has_been_distributed) &&
        Must_recompute_load_balance_for_assembly)
    {
      recompute_load_balanced_assembly();
    }
 
    // We have determined load balancing for current setup.
    // This can remain the same until assign_eqn_numbers() is called
    // again -- the flag is re-set to true there.
    if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
    {
      Must_recompute_load_balance_for_assembly = false;
    }
 
#endif
 
    //-----------Finally we need to convert this lousy vector storage scheme
    //------------------------to the containers required by SuperLU
 
    // Loop over the number of matrices
    for (unsigned m = 0; m < n_matrix; m++)
    {
      // Set the number of rows or columns
      row_or_column_start[m] = new int[ndof + 1];
 
      // fill row_or_column_start and find the number of entries
      row_or_column_start[m][0] = 0;
      for (unsigned long i = 0; i < ndof; i++)
      {
        row_or_column_start[m][i + 1] = row_or_column_start[m][i] + ncoef[m][i];
        Sparse_assemble_with_arrays_previous_allocation[m][i] = ncoef[m][i];
      }
      const unsigned entries = row_or_column_start[m][ndof];
 
      // resize vectors
      column_or_row_index[m] = new int[entries];
      value[m] = new double[entries];
      nnz[m] = entries;
 
      // Now we merely loop over the number of rows or columns
      for (unsigned long i_global = 0; i_global < ndof; i_global++)
      {
        // If there are no entries in the vector then skip the rest of the loop
        if (ncoef[m][i_global] == 0)
        {
          continue;
        }
 
        // Loop over all the entries in the vectors corresponding to the given
        // row or column. It will NOT be ordered
        unsigned p = 0;
        for (int j = row_or_column_start[m][i_global];
             j < row_or_column_start[m][i_global + 1];
             j++)
        {
          column_or_row_index[m][j] = matrix_row_or_col_indices[m][i_global][p];
          value[m][j] = matrix_values[m][i_global][p];
          ++p;
        }
 
        // and delete
        delete[] matrix_row_or_col_indices[m][i_global];
        delete[] matrix_values[m][i_global];
      }
 
      //
      delete[] matrix_row_or_col_indices[m];
      delete[] matrix_values[m];
    } // End of the loop over the matrices
 
    if (Pause_at_end_of_sparse_assembly)
    {
      oomph_info << "Pausing at end of sparse assembly." << std::endl;
      pause("Check memory usage now.");
    }
  }
 
 
#ifdef OOMPH_HAS_MPI
  //=======================================================================
  /// Helper method that returns the global equations to which
  /// the elements in the range el_lo to el_hi contribute on this
  /// processor
  //=======================================================================
  void Problem::get_my_eqns(AssemblyHandler* const& assembly_handler_pt,
                            const unsigned& el_lo,
                            const unsigned& el_hi,
                            Vector<unsigned>& my_eqns)
  {
    // Index to keep track of the equations counted
    unsigned my_eqns_index = 0;
 
    // Loop over the selection of elements
    for (unsigned long e = el_lo; e <= el_hi; e++)
    {
      // Get the pointer to the element
      GeneralisedElement* elem_pt = this->mesh_pt()->element_pt(e);
 
      // Ignore halo elements
      if (!elem_pt->is_halo())
      {
        // Find number of degrees of freedom in the element
        const unsigned nvar = assembly_handler_pt->ndof(elem_pt);
        // Add the number of dofs to the current size of my_eqns
        my_eqns.resize(my_eqns_index + nvar);
 
        // Loop over the first index of local variables
        for (unsigned i = 0; i < nvar; i++)
        {
          // Get the local equation number
          unsigned global_eqn_number =
            assembly_handler_pt->eqn_number(elem_pt, i);
          // Add into the vector
          my_eqns[my_eqns_index + i] = global_eqn_number;
        }
        // Update the number of elements in the vector
        my_eqns_index += nvar;
      }
    }
 
    //  now sort and remove duplicate entries in the vector
    std::sort(my_eqns.begin(), my_eqns.end());
    Vector<unsigned>::iterator it = std::unique(my_eqns.begin(), my_eqns.end());
    my_eqns.resize(it - my_eqns.begin());
  }
 
 
  //=============================================================================
  /// Helper method to assemble CRDoubleMatrices from distributed
  /// on multiple processors.
  //=============================================================================
  void Problem::parallel_sparse_assemble(
    const LinearAlgebraDistribution* const& target_dist_pt,
    Vector<int*>& column_indices,
    Vector<int*>& row_start,
    Vector<double*>& values,
    Vector<unsigned>& nnz,
    Vector<double*>& residuals)
  {
    // Time assembly
    double t_start = TimingHelpers::timer();
 
    // my rank and nproc
    unsigned my_rank = Communicator_pt->my_rank();
    unsigned nproc = Communicator_pt->nproc();
 
    // Total number of elements
    const unsigned long n_elements = mesh_pt()->nelement();
 
#ifdef PARANOID
    // No elements? This is usually a sign that the problem distribution has
    // led to one processor not having any elements. Either
    // a sign of something having gone wrong or a relatively small
    // problem on a huge number of processors
    if (n_elements == 0)
    {
      std::ostringstream error_stream;
      error_stream << "Processsor " << my_rank << " has no elements. \n"
                   << "This is usually a sign that the problem distribution \n"
                   << "or the load balancing have gone wrong.";
      OomphLibWarning(error_stream.str(),
                      "Problem::parallel_sparse_assemble()",
                      OOMPH_EXCEPTION_LOCATION);
    }
#endif
 
 
    // Default range of elements for distributed problems.
    unsigned long el_lo = 0;
    unsigned long el_hi_plus_one = n_elements;
 
    // Otherwise just loop over a fraction of the elements
    // (This will either have been initialised in
    // Problem::set_default_first_and_last_element_for_assembly() or
    // will have been re-assigned during a previous assembly loop
    // Note that following the re-assignment only the entries
    // for the current processor are relevant.
    if (!Problem_has_been_distributed)
    {
      el_lo = First_el_for_assembly[my_rank];
      el_hi_plus_one = Last_el_plus_one_for_assembly[my_rank];
    }
 
    // Find the number of vectors to be assembled
    const unsigned n_vector = residuals.size();
 
    // Find the number of matrices to be assembled
    const unsigned n_matrix = column_indices.size();
 
    // Locally cache pointer to assembly handler
    AssemblyHandler* const assembly_handler_pt = Assembly_handler_pt;
 
    bool doing_residuals = false;
    if (dynamic_cast<ParallelResidualsHandler*>(Assembly_handler_pt) != 0)
    {
      doing_residuals = true;
    }
 
// Error check dimensions
#ifdef PARANOID
    if (row_start.size() != n_matrix)
    {
      std::ostringstream error_stream;
      error_stream << "Error: " << std::endl
                   << "row_or_column_start.size() " << row_start.size()
                   << " does not equal "
                   << "column_or_row_index.size() " << column_indices.size()
                   << std::endl;
      throw OomphLibError(
        error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
 
    if (values.size() != n_matrix)
    {
      std::ostringstream error_stream;
      error_stream << "Error: " << std::endl
                   << "value.size() " << values.size() << " does not equal "
                   << "column_or_row_index.size() " << column_indices.size()
                   << std::endl
                   << std::endl
                   << std::endl;
      throw OomphLibError(
        error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
#endif
 
 
    // start by assembling the sorted set of equations to which this processor
    // contributes. Essentially this is every global equation that features in
    // all the non-halo elements. This may not be the same as the locally-stored
    // dofs because some of the Nodes in non-halo elements may actually
    // be halos.
    //======================================================================
    Vector<unsigned> my_eqns;
    if (n_elements != 0)
    {
      this->get_my_eqns(
        assembly_handler_pt, el_lo, el_hi_plus_one - 1, my_eqns);
    }
 
    // number of equations
    unsigned my_n_eqn = my_eqns.size();
 
    // next we assemble the data into an array of arrays
    // =================================================
    // The idea behind this sparse assembly routine is to use an array of
    // arrays for the entries in each complete matrix. And a second
    // array of arrays stores the global row (or column) indeces.
 
    // Set up two vector of vectors to store the entries of each  matrix,
    // indexed by either the column or row. The entries of the vector for
    // each matrix correspond to all the rows or columns of that matrix.
    Vector<unsigned**> matrix_col_indices(n_matrix);
    Vector<double**> matrix_values(n_matrix);
 
    // Loop over the number of matrices being assembled and resize
    // each vector of vectors to the number of rows or columns of the matrix
    for (unsigned m = 0; m < n_matrix; m++)
    {
      matrix_col_indices[m] = new unsigned*[my_n_eqn];
      matrix_values[m] = new double*[my_n_eqn];
      for (unsigned i = 0; i < my_n_eqn; i++)
      {
        matrix_col_indices[m][i] = 0;
        matrix_values[m][i] = 0;
      }
    }
 
    // Resize the residuals vectors
    Vector<double*> residuals_data(n_vector);
    for (unsigned v = 0; v < n_vector; v++)
    {
      residuals_data[v] = new double[my_n_eqn];
      for (unsigned i = 0; i < my_n_eqn; i++)
      {
        residuals_data[v][i] = 0;
      }
    }
 
    // Storage for assembly time for elements
    double t_assemble_start = 0.0;
 
    // Storage for assembly times
    if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
    {
      Elemental_assembly_time.resize(n_elements);
    }
 
    // number of coefficients in each row
    Vector<Vector<unsigned>> ncoef(n_matrix);
    for (unsigned m = 0; m < n_matrix; m++)
    {
      ncoef[m].resize(my_n_eqn, 0);
    }
 
    // Sparse_assemble_with_arrays_previous_allocation stores the number of
    // coefs in each row.
    // if a matrix of this size has not been assembled before then resize this
    // storage
    if (Sparse_assemble_with_arrays_previous_allocation.size() == 0)
    {
      Sparse_assemble_with_arrays_previous_allocation.resize(n_matrix);
      for (unsigned m = 0; m < n_matrix; m++)
      {
        Sparse_assemble_with_arrays_previous_allocation[m].resize(my_n_eqn, 0);
      }
    }
 
 
    // assemble and populate an array based storage scheme
    {
      // Allocate local storage for the element's contribution to the
      // residuals vectors and system matrices of the size of the maximum
      // number of dofs in any element
      // This means that the storage will only be allocated (and deleted) once
      Vector<Vector<double>> el_residuals(n_vector);
      Vector<DenseMatrix<double>> el_jacobian(n_matrix);
 
      // Loop over the elements
      for (unsigned long e = el_lo; e < el_hi_plus_one; e++)
      {
        // Time it?
        if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
        {
          t_assemble_start = TimingHelpers::timer();
        }
 
        // Get the pointer to the element
        GeneralisedElement* elem_pt = mesh_pt()->element_pt(e);
 
        // Ignore halo elements
        if (!elem_pt->is_halo())
        {
          // Find number of degrees of freedom in the element
          const unsigned nvar = assembly_handler_pt->ndof(elem_pt);
 
          // Resize the storage for elemental jacobian and residuals
          for (unsigned v = 0; v < n_vector; v++)
          {
            el_residuals[v].resize(nvar);
          }
          for (unsigned m = 0; m < n_matrix; m++)
          {
            el_jacobian[m].resize(nvar);
          }
 
          // Now get the residuals and jacobian for the element
          assembly_handler_pt->get_all_vectors_and_matrices(
            elem_pt, el_residuals, el_jacobian);
 
          //---------------Insert the values into the vectors--------------
 
          // Loop over the first index of local variables
          for (unsigned i = 0; i < nvar; i++)
          {
            // Get the local equation number
            unsigned global_eqn_number =
              assembly_handler_pt->eqn_number(elem_pt, i);
 
            // determine the element number in my set of eqns using the
            // bisection method
            int left = 0;
            int right = my_n_eqn - 1;
            int eqn_number = right / 2;
            while (my_eqns[eqn_number] != global_eqn_number)
            {
              if (left == right)
              {
                // Check that the residuals associated with the
                // eqn number that can't be found are all zero
                bool broken = false;
                for (unsigned v = 0; v < n_vector; v++)
                {
                  if (el_residuals[v][i] != 0.0)
                  {
                    broken = true;
                    break;
                  }
                }
 
                // Now loop over the other index to check the entries
                // in the appropriate row of the Jacobians are zero too
                for (unsigned j = 0; j < nvar; j++)
                {
                  // Get the number of the unknown
                  // unsigned unknown =
                  // assembly_handler_pt->eqn_number(elem_pt,j);
 
                  // Loop over the matrices
                  // If it's compressed row storage, then our vector of maps
                  // is indexed by row (equation number)
                  for (unsigned m = 0; m < n_matrix; m++)
                  {
                    // Get the value of the matrix at this point
                    double value = el_jacobian[m](i, j);
                    if (value != 0.0)
                    {
                      broken = true;
                      break;
                    }
                    if (broken) break;
                  }
                }
 
                if (broken)
                {
                  std::ostringstream error_stream;
                  error_stream
                    << "Internal Error: " << std::endl
                    << "Could not find global equation number "
                    << global_eqn_number
                    << " in my_eqns vector of equation numbers but\n"
                    << "at least one entry in the residual vector is nonzero.";
                  throw OomphLibError(error_stream.str(),
                                      OOMPH_CURRENT_FUNCTION,
                                      OOMPH_EXCEPTION_LOCATION);
                }
                else
                {
                  break;
                }
              }
              if (my_eqns[eqn_number] > global_eqn_number)
              {
                right = std::max(eqn_number - 1, left);
              }
              else
              {
                left = std::min(eqn_number + 1, right);
              }
              eqn_number = (right + left) / 2;
            }
 
            // Add the contribution to the residuals
            for (unsigned v = 0; v < n_vector; v++)
            {
              // Fill in each residuals vector
              residuals_data[v][eqn_number] += el_residuals[v][i];
            }
 
            // Now loop over the other index
            for (unsigned j = 0; j < nvar; j++)
            {
              // Get the number of the unknown
              unsigned unknown = assembly_handler_pt->eqn_number(elem_pt, j);
 
              // Loop over the matrices
              // If it's compressed row storage, then our vector of maps
              // is indexed by row (equation number)
              for (unsigned m = 0; m < n_matrix; m++)
              {
                // Get the value of the matrix at this point
                double value = el_jacobian[m](i, j);
                // Only bother to add to the vector if it's non-zero
                if (std::fabs(value) > Numerical_zero_for_sparse_assembly)
                {
                  // number of entrys in this row
                  const unsigned size = ncoef[m][eqn_number];
 
                  // if no data has been allocated for this row then allocate
                  if (size == 0)
                  {
                    // do we have previous allocation data
                    if (Sparse_assemble_with_arrays_previous_allocation
                          [m][eqn_number] != 0)
                    {
                      matrix_col_indices[m][eqn_number] = new unsigned
                        [Sparse_assemble_with_arrays_previous_allocation
                           [m][eqn_number]];
 
                      matrix_values[m][eqn_number] = new double
                        [Sparse_assemble_with_arrays_previous_allocation
                           [m][eqn_number]];
                    }
                    else
                    {
                      matrix_col_indices[m][eqn_number] = new unsigned
                        [Sparse_assemble_with_arrays_initial_allocation];
 
                      matrix_values[m][eqn_number] = new double
                        [Sparse_assemble_with_arrays_initial_allocation];
 
                      Sparse_assemble_with_arrays_previous_allocation
                        [m][eqn_number] =
                          Sparse_assemble_with_arrays_initial_allocation;
                    }
                  }
 
                  // next add the data
                  for (unsigned k = 0; k <= size; k++)
                  {
                    if (k == size)
                    {
                      // do we need to allocate more storage
                      if (Sparse_assemble_with_arrays_previous_allocation
                            [m][eqn_number] == ncoef[m][eqn_number])
                      {
                        unsigned new_allocation =
                          ncoef[m][eqn_number] +
                          Sparse_assemble_with_arrays_allocation_increment;
                        double* new_values = new double[new_allocation];
                        unsigned* new_indices = new unsigned[new_allocation];
                        for (unsigned c = 0; c < ncoef[m][eqn_number]; c++)
                        {
                          new_values[c] = matrix_values[m][eqn_number][c];
                          new_indices[c] = matrix_col_indices[m][eqn_number][c];
                        }
                        delete[] matrix_values[m][eqn_number];
                        delete[] matrix_col_indices[m][eqn_number];
 
                        matrix_values[m][eqn_number] = new_values;
                        matrix_col_indices[m][eqn_number] = new_indices;
 
                        Sparse_assemble_with_arrays_previous_allocation
                          [m][eqn_number] = new_allocation;
                      }
                      // and now add the data
                      unsigned entry = ncoef[m][eqn_number];
                      ncoef[m][eqn_number]++;
                      matrix_col_indices[m][eqn_number][entry] = unknown;
                      matrix_values[m][eqn_number][entry] = value;
                      break;
                    }
                    else if (matrix_col_indices[m][eqn_number][k] == unknown)
                    {
                      matrix_values[m][eqn_number][k] += value;
                      break;
                    }
                  }
                } // numerical zero check
              } // End of loop over matrices
            }
          }
        } // endif halo element
 
        // Time it?
        if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
        {
          Elemental_assembly_time[e] =
            TimingHelpers::timer() - t_assemble_start;
        }
      } // End of loop over the elements
    } // End of vector assembly
 
 
    // Doc?
    double t_end = 0.0;
    double t_local = 0.0;
    double t_max = 0.0;
    double t_min = 0.0;
    double t_sum = 0.0;
    if (Doc_imbalance_in_parallel_assembly)
    {
      t_end = TimingHelpers::timer();
      t_local = t_end - t_start;
      t_max = 0.0;
      t_min = 0.0;
      t_sum = 0.0;
      MPI_Allreduce(&t_local,
                    &t_max,
                    1,
                    MPI_DOUBLE,
                    MPI_MAX,
                    this->communicator_pt()->mpi_comm());
      MPI_Allreduce(&t_local,
                    &t_min,
                    1,
                    MPI_DOUBLE,
                    MPI_MIN,
                    this->communicator_pt()->mpi_comm());
      MPI_Allreduce(&t_local,
                    &t_sum,
                    1,
                    MPI_DOUBLE,
                    MPI_SUM,
                    this->communicator_pt()->mpi_comm());
      double imbalance = (t_max - t_min) / (t_sum / double(nproc)) * 100.0;
 
      if (doing_residuals)
      {
        oomph_info << "\nCPU for residual computation (loc/max/min/imbal): ";
      }
      else
      {
        oomph_info << "\nCPU for Jacobian computation (loc/max/min/imbal): ";
      }
      oomph_info << t_local << " " << t_max << " " << t_min << " " << imbalance
                 << "%\n";
 
      t_start = TimingHelpers::timer();
    }
 
 
    // Adjust number of coefficients in each row
    for (unsigned m = 0; m < n_matrix; m++)
    {
      unsigned max = 0;
      unsigned min = INT_MAX;
      unsigned sum = 0;
      unsigned sum_total = 0;
      for (unsigned e = 0; e < my_n_eqn; e++)
      {
        sum += ncoef[m][e];
        sum_total += Sparse_assemble_with_arrays_previous_allocation[m][e];
        if (ncoef[m][e] > max) max = ncoef[m][e];
        if (ncoef[m][e] < min) min = ncoef[m][e];
 
        // Now shrink the storage to what we actually need
        unsigned new_allocation = ncoef[m][e];
        double* new_values = new double[new_allocation];
        unsigned* new_indices = new unsigned[new_allocation];
        for (unsigned c = 0; c < ncoef[m][e]; c++)
        {
          new_values[c] = matrix_values[m][e][c];
          new_indices[c] = matrix_col_indices[m][e][c];
        }
        delete[] matrix_values[m][e];
        delete[] matrix_col_indices[m][e];
 
        matrix_values[m][e] = new_values;
        matrix_col_indices[m][e] = new_indices;
      }
    }
 
 
    // Postprocess timing information and re-allocate distribution of
    // elements during subsequent assemblies.
    if ((!doing_residuals) && (!Problem_has_been_distributed) &&
        Must_recompute_load_balance_for_assembly)
    {
      recompute_load_balanced_assembly();
    }
 
    // We have determined load balancing for current setup.
    // This can remain the same until assign_eqn_numbers() is called
    // again -- the flag is re-set to true there.
    if ((!doing_residuals) && Must_recompute_load_balance_for_assembly)
    {
      Must_recompute_load_balance_for_assembly = false;
    }
 
 
    // next we compute the number of equations and number of non-zeros to be
    // sent to each processor, and send/recv that information
    // =====================================================================
 
    // determine the number of eqns to be sent to each processor
    Vector<unsigned> n_eqn_for_proc(nproc, 0);
    Vector<unsigned> first_eqn_element_for_proc(nproc, 0);
    // If no equations are assembled then we don't need to do any of this
    if (my_n_eqn > 0)
    {
      unsigned current_p = target_dist_pt->rank_of_global_row(my_eqns[0]);
      first_eqn_element_for_proc[current_p] = 0;
      n_eqn_for_proc[current_p] = 1;
      for (unsigned i = 1; i < my_n_eqn; i++)
      {
        unsigned next_p = target_dist_pt->rank_of_global_row(my_eqns[i]);
        if (next_p != current_p)
        {
          current_p = next_p;
          first_eqn_element_for_proc[current_p] = i;
        }
        n_eqn_for_proc[current_p]++;
      }
    }
 
    // determine the number of non-zeros to be sent to each processor for each
    // matrix (if n_eqn_for_proc[p]=0, then nothing will be assembled)
    DenseMatrix<unsigned> nnz_for_proc(nproc, n_matrix, 0);
    for (unsigned p = 0; p < nproc; p++)
    {
      int first_eqn_element = first_eqn_element_for_proc[p];
      int last_eqn_element = (int)(first_eqn_element + n_eqn_for_proc[p]) - 1;
      for (unsigned m = 0; m < n_matrix; m++)
      {
        for (int i = first_eqn_element; i <= last_eqn_element; i++)
        {
          nnz_for_proc(p, m) += ncoef[m][i];
        }
      }
    }
 
    // next post the sends and recvs to the corresponding processors
    Vector<unsigned*> temp_send_storage(nproc);
    Vector<unsigned*> temp_recv_storage(nproc);
    Vector<MPI_Request> send_nnz_reqs;
    Vector<MPI_Request> recv_nnz_reqs;
    for (unsigned p = 0; p < nproc; p++)
    {
      if (p != my_rank)
      {
        temp_send_storage[p] = new unsigned[n_matrix + 1];
        temp_send_storage[p][0] = n_eqn_for_proc[p];
        for (unsigned m = 0; m < n_matrix; m++)
        {
          temp_send_storage[p][m + 1] = nnz_for_proc(p, m);
        }
        MPI_Request sreq;
        MPI_Isend(temp_send_storage[p],
                  n_matrix + 1,
                  MPI_UNSIGNED,
                  p,
                  0,
                  Communicator_pt->mpi_comm(),
                  &sreq);
        send_nnz_reqs.push_back(sreq);
        temp_recv_storage[p] = new unsigned[n_matrix + 1];
        MPI_Request rreq;
        MPI_Irecv(temp_recv_storage[p],
                  n_matrix + 1,
                  MPI_UNSIGNED,
                  p,
                  0,
                  Communicator_pt->mpi_comm(),
                  &rreq);
        recv_nnz_reqs.push_back(rreq);
      }
    }
 
    // assemble the data to be sent to each processor
    // ==============================================
 
    // storage
    Vector<unsigned*> eqns_for_proc(nproc);
    DenseMatrix<double*> residuals_for_proc(nproc, n_vector);
    DenseMatrix<unsigned*> row_start_for_proc(nproc, n_matrix);
    DenseMatrix<unsigned*> column_indices_for_proc(nproc, n_matrix);
    DenseMatrix<double*> values_for_proc(nproc, n_matrix);
 
    // equation numbers
    for (unsigned p = 0; p < nproc; p++)
    {
      unsigned n_eqns_p = n_eqn_for_proc[p];
      if (n_eqns_p > 0)
      {
        unsigned first_eqn_element = first_eqn_element_for_proc[p];
        unsigned first_row = target_dist_pt->first_row(p);
        eqns_for_proc[p] = new unsigned[n_eqns_p];
        for (unsigned i = 0; i < n_eqns_p; i++)
        {
          eqns_for_proc[p][i] = my_eqns[i + first_eqn_element] - first_row;
        }
      }
    }
 
    // residuals for p
    for (unsigned v = 0; v < n_vector; v++)
    {
      for (unsigned p = 0; p < nproc; p++)
      {
        unsigned n_eqns_p = n_eqn_for_proc[p];
        if (n_eqns_p > 0)
        {
          unsigned first_eqn_element = first_eqn_element_for_proc[p];
          residuals_for_proc(p, v) = new double[n_eqns_p];
          for (unsigned i = 0; i < n_eqns_p; i++)
          {
            residuals_for_proc(p, v)[i] =
              residuals_data[v][first_eqn_element + i];
          }
        }
      }
      delete[] residuals_data[v];
    }
 
    // matrices for p
    for (unsigned m = 0; m < n_matrix; m++)
    {
      for (unsigned p = 0; p < nproc; p++)
      {
        unsigned n_eqns_p = n_eqn_for_proc[p];
        if (n_eqns_p > 0)
        {
          unsigned first_eqn_element = first_eqn_element_for_proc[p];
          row_start_for_proc(p, m) = new unsigned[n_eqns_p + 1];
          column_indices_for_proc(p, m) = new unsigned[nnz_for_proc(p, m)];
          values_for_proc(p, m) = new double[nnz_for_proc(p, m)];
          unsigned entry = 0;
          for (unsigned i = 0; i < n_eqns_p; i++)
          {
            row_start_for_proc(p, m)[i] = entry;
            unsigned n_coef_in_row = ncoef[m][first_eqn_element + i];
            for (unsigned j = 0; j < n_coef_in_row; j++)
            {
              column_indices_for_proc(p, m)[entry] =
                matrix_col_indices[m][i + first_eqn_element][j];
              values_for_proc(p, m)[entry] =
                matrix_values[m][i + first_eqn_element][j];
              entry++;
            }
          }
          row_start_for_proc(p, m)[n_eqns_p] = entry;
        }
      }
      for (unsigned i = 0; i < my_n_eqn; i++)
      {
        delete[] matrix_col_indices[m][i];
        delete[] matrix_values[m][i];
      }
      delete[] matrix_col_indices[m];
      delete[] matrix_values[m];
    }
 
    // need to wait for the recv nnzs to complete
    // before we can allocate storage for the matrix recvs
    // ===================================================
 
    // recv and copy the datafrom the recv storage to
    // + nnz_from_proc
    // + n_eqn_from_proc
    Vector<MPI_Status> recv_nnz_stat(nproc - 1);
    MPI_Waitall(nproc - 1, &recv_nnz_reqs[0], &recv_nnz_stat[0]);
    Vector<unsigned> n_eqn_from_proc(nproc);
    DenseMatrix<unsigned> nnz_from_proc(nproc, n_matrix);
    for (unsigned p = 0; p < nproc; p++)
    {
      if (p != my_rank)
      {
        n_eqn_from_proc[p] = temp_recv_storage[p][0];
        for (unsigned m = 0; m < n_matrix; m++)
        {
          nnz_from_proc(p, m) = temp_recv_storage[p][m + 1];
        }
        delete[] temp_recv_storage[p];
      }
      else
      {
        n_eqn_from_proc[p] = n_eqn_for_proc[p];
        for (unsigned m = 0; m < n_matrix; m++)
        {
          nnz_from_proc(p, m) = nnz_for_proc(p, m);
        }
      }
    }
    recv_nnz_stat.clear();
    recv_nnz_reqs.clear();
 
    // allocate the storage for the data to be recv and post the sends recvs
    // =====================================================================
 
    // storage
    Vector<unsigned*> eqns_from_proc(nproc);
    DenseMatrix<double*> residuals_from_proc(nproc, n_vector);
    DenseMatrix<unsigned*> row_start_from_proc(nproc, n_matrix);
    DenseMatrix<unsigned*> column_indices_from_proc(nproc, n_matrix);
    DenseMatrix<double*> values_from_proc(nproc, n_matrix);
 
    // allocate and post sends and recvs
    double base;
    MPI_Aint communication_base;
    MPI_Get_address(&base, &communication_base);
    unsigned n_comm_types = 1 + 1 * n_vector + 3 * n_matrix;
    Vector<MPI_Request> recv_reqs;
    Vector<MPI_Request> send_reqs;
    for (unsigned p = 0; p < nproc; p++)
    {
      if (p != my_rank)
      {
        // allocate
        if (n_eqn_from_proc[p] > 0)
        {
          eqns_from_proc[p] = new unsigned[n_eqn_from_proc[p]];
          for (unsigned v = 0; v < n_vector; v++)
          {
            residuals_from_proc(p, v) = new double[n_eqn_from_proc[p]];
          }
          for (unsigned m = 0; m < n_matrix; m++)
          {
            row_start_from_proc(p, m) = new unsigned[n_eqn_from_proc[p] + 1];
            column_indices_from_proc(p, m) = new unsigned[nnz_from_proc(p, m)];
            values_from_proc(p, m) = new double[nnz_from_proc(p, m)];
          }
        }
 
        // recv
        if (n_eqn_from_proc[p] > 0)
        {
          MPI_Datatype types[n_comm_types];
          MPI_Aint offsets[n_comm_types];
          int count[n_comm_types];
          int pt = 0;
 
          // equations
          count[pt] = 1;
          MPI_Get_address(eqns_from_proc[p], &offsets[pt]);
          offsets[pt] -= communication_base;
          MPI_Type_contiguous(n_eqn_from_proc[p], MPI_UNSIGNED, &types[pt]);
          MPI_Type_commit(&types[pt]);
          pt++;
 
          // vectors
          for (unsigned v = 0; v < n_vector; v++)
          {
            count[pt] = 1;
            MPI_Get_address(residuals_from_proc(p, v), &offsets[pt]);
            offsets[pt] -= communication_base;
            MPI_Type_contiguous(n_eqn_from_proc[p], MPI_DOUBLE, &types[pt]);
            MPI_Type_commit(&types[pt]);
            pt++;
          }
 
          // matrices
          for (unsigned m = 0; m < n_matrix; m++)
          {
            // row start
            count[pt] = 1;
            MPI_Get_address(row_start_from_proc(p, m), &offsets[pt]);
            offsets[pt] -= communication_base;
            MPI_Type_contiguous(
              n_eqn_from_proc[p] + 1, MPI_UNSIGNED, &types[pt]);
            MPI_Type_commit(&types[pt]);
            pt++;
 
 
            // column indices
            count[pt] = 1;
            MPI_Get_address(column_indices_from_proc(p, m), &offsets[pt]);
            offsets[pt] -= communication_base;
            MPI_Type_contiguous(nnz_from_proc(p, m), MPI_UNSIGNED, &types[pt]);
            MPI_Type_commit(&types[pt]);
            pt++;
 
            // values
            count[pt] = 1;
            MPI_Get_address(values_from_proc(p, m), &offsets[pt]);
            offsets[pt] -= communication_base;
            MPI_Type_contiguous(nnz_from_proc(p, m), MPI_DOUBLE, &types[pt]);
            MPI_Type_commit(&types[pt]);
            pt++;
          }
 
          // build the combined type
          MPI_Datatype recv_type;
          MPI_Type_create_struct(
            n_comm_types, count, offsets, types, &recv_type);
          MPI_Type_commit(&recv_type);
          for (unsigned t = 0; t < n_comm_types; t++)
          {
            MPI_Type_free(&types[t]);
          }
          MPI_Request req;
          MPI_Irecv(
            &base, 1, recv_type, p, 1, Communicator_pt->mpi_comm(), &req);
          MPI_Type_free(&recv_type);
          recv_reqs.push_back(req);
        }
 
        // send
        if (n_eqn_for_proc[p] > 0)
        {
          MPI_Datatype types[n_comm_types];
          MPI_Aint offsets[n_comm_types];
          int count[n_comm_types];
          int pt = 0;
 
          // equations
          count[pt] = 1;
          MPI_Get_address(eqns_for_proc[p], &offsets[pt]);
          offsets[pt] -= communication_base;
          MPI_Type_contiguous(n_eqn_for_proc[p], MPI_UNSIGNED, &types[pt]);
          MPI_Type_commit(&types[pt]);
          pt++;
 
          // vectors
          for (unsigned v = 0; v < n_vector; v++)
          {
            count[pt] = 1;
            MPI_Get_address(residuals_for_proc(p, v), &offsets[pt]);
            offsets[pt] -= communication_base;
            MPI_Type_contiguous(n_eqn_for_proc[p], MPI_DOUBLE, &types[pt]);
            MPI_Type_commit(&types[pt]);
            pt++;
          }
 
          // matrices
          for (unsigned m = 0; m < n_matrix; m++)
          {
            // row start
            count[pt] = 1;
            MPI_Get_address(row_start_for_proc(p, m), &offsets[pt]);
            offsets[pt] -= communication_base;
            MPI_Type_contiguous(
              n_eqn_for_proc[p] + 1, MPI_UNSIGNED, &types[pt]);
            MPI_Type_commit(&types[pt]);
            pt++;
 
 
            // column indices
            count[pt] = 1;
            MPI_Get_address(column_indices_for_proc(p, m), &offsets[pt]);
            offsets[pt] -= communication_base;
            MPI_Type_contiguous(nnz_for_proc(p, m), MPI_UNSIGNED, &types[pt]);
            MPI_Type_commit(&types[pt]);
            pt++;
 
            // values
            count[pt] = 1;
            MPI_Get_address(values_for_proc(p, m), &offsets[pt]);
            offsets[pt] -= communication_base;
            MPI_Type_contiguous(nnz_for_proc(p, m), MPI_DOUBLE, &types[pt]);
            MPI_Type_commit(&types[pt]);
            pt++;
          }
 
          // build the combined type
          MPI_Datatype send_type;
          MPI_Type_create_struct(
            n_comm_types, count, offsets, types, &send_type);
          MPI_Type_commit(&send_type);
          for (unsigned t = 0; t < n_comm_types; t++)
          {
            MPI_Type_free(&types[t]);
          }
          MPI_Request req;
          MPI_Isend(
            &base, 1, send_type, p, 1, Communicator_pt->mpi_comm(), &req);
          MPI_Type_free(&send_type);
          send_reqs.push_back(req);
        }
      }
      // otherwise send to self
      else
      {
        eqns_from_proc[p] = eqns_for_proc[p];
        for (unsigned v = 0; v < n_vector; v++)
        {
          residuals_from_proc(p, v) = residuals_for_proc(p, v);
        }
        for (unsigned m = 0; m < n_matrix; m++)
        {
          row_start_from_proc(p, m) = row_start_for_proc(p, m);
          column_indices_from_proc(p, m) = column_indices_for_proc(p, m);
          values_from_proc(p, m) = values_for_proc(p, m);
        }
      }
    }
 
    // wait for the recvs to complete
    unsigned n_recv_req = recv_reqs.size();
    if (n_recv_req > 0)
    {
      Vector<MPI_Status> recv_stat(n_recv_req);
      MPI_Waitall(n_recv_req, &recv_reqs[0], &recv_stat[0]);
    }
 
    // ==============================================
    unsigned target_nrow_local = target_dist_pt->nrow_local();
 
    // loop over the matrices
    for (unsigned m = 0; m < n_matrix; m++)
    {
      // allocate row_start
      row_start[m] = new int[target_nrow_local + 1];
      row_start[m][0] = 0;
 
      // initially allocate storage based on the maximum number of non-zeros
      // from any one processor
      unsigned nnz_allocation = Parallel_sparse_assemble_previous_allocation;
      for (unsigned p = 0; p < nproc; p++)
      {
        nnz_allocation = std::max(nnz_allocation, nnz_from_proc(p, m));
      }
      Vector<double*> values_chunk(1);
      values_chunk[0] = new double[nnz_allocation];
      Vector<int*> column_indices_chunk(1);
      column_indices_chunk[0] = new int[nnz_allocation];
      Vector<unsigned> ncoef_in_chunk(1, 0);
      Vector<unsigned> size_of_chunk(1, 0);
      size_of_chunk[0] = nnz_allocation;
      unsigned current_chunk = 0;
 
      // for each row on this processor
      for (unsigned i = 0; i < target_nrow_local; i++)
      {
        row_start[m][i] = 0;
 
        // determine the processors that this row is on
        Vector<int> row_on_proc(nproc, -1);
        for (unsigned p = 0; p < nproc; p++)
        {
          if (n_eqn_from_proc[p] == 0)
          {
            row_on_proc[p] = -1;
          }
          else
          {
            int left = 0;
            int right = n_eqn_from_proc[p] - 1;
            int midpoint = right / 2;
            bool complete = false;
            while (!complete)
            {
              midpoint = (right + left) / 2;
              if (midpoint > right)
              {
                midpoint = right;
              }
              if (midpoint < left)
              {
                midpoint = left;
              }
              if (left == right)
              {
                if (eqns_from_proc[p][midpoint] == i)
                {
                  midpoint = left;
                }
                else
                {
                  midpoint = -1;
                }
                complete = true;
              }
              else if (eqns_from_proc[p][midpoint] == i)
              {
                complete = true;
              }
              else if (eqns_from_proc[p][midpoint] > i)
              {
                right = std::max(midpoint - 1, left);
              }
              else
              {
                left = std::min(midpoint + 1, right);
              }
            }
            row_on_proc[p] = midpoint;
          }
        }
 
        // for each processor build this row of the matrix
        unsigned check_first = ncoef_in_chunk[current_chunk];
        unsigned check_last = check_first;
        for (unsigned p = 0; p < nproc; p++)
        {
          if (row_on_proc[p] != -1)
          {
            int row = row_on_proc[p];
            unsigned first = row_start_from_proc(p, m)[row];
            unsigned last = row_start_from_proc(p, m)[row + 1];
            for (unsigned l = first; l < last; l++)
            {
              bool done = false;
              for (unsigned j = check_first; j <= check_last && !done; j++)
              {
                if (j == check_last)
                {
                  // is this temp array full, do we need to allocate
                  // a new temp array
                  if (ncoef_in_chunk[current_chunk] ==
                      size_of_chunk[current_chunk])
                  {
                    // number of chunks allocated
                    unsigned n_chunk = values_chunk.size();
 
                    // determine the number of non-zeros added so far
                    // (excluding the current row)
                    unsigned nnz_so_far = 0;
                    for (unsigned c = 0; c < n_chunk; c++)
                    {
                      nnz_so_far += ncoef_in_chunk[c];
                    }
                    nnz_so_far -= row_start[m][i];
 
                    // average number of non-zeros per row
                    unsigned avg_nnz = nnz_so_far / (i + 1);
 
                    // number of rows left +1
                    unsigned nrows_left = target_nrow_local - i;
 
                    // allocation for next chunk
                    unsigned next_chunk_size =
                      avg_nnz * nrows_left + row_start[m][i];
 
                    // allocate storage in next chunk
                    current_chunk++;
                    n_chunk++;
                    values_chunk.resize(n_chunk);
                    values_chunk[current_chunk] = new double[next_chunk_size];
                    column_indices_chunk.resize(n_chunk);
                    column_indices_chunk[current_chunk] =
                      new int[next_chunk_size];
                    size_of_chunk.resize(n_chunk);
                    size_of_chunk[current_chunk] = next_chunk_size;
                    ncoef_in_chunk.resize(n_chunk);
 
                    // copy current row from previous chunk to new chunk
                    for (unsigned k = check_first; k < check_last; k++)
                    {
                      values_chunk[current_chunk][k - check_first] =
                        values_chunk[current_chunk - 1][k];
                      column_indices_chunk[current_chunk][k - check_first] =
                        column_indices_chunk[current_chunk - 1][k];
                    }
                    ncoef_in_chunk[current_chunk - 1] -= row_start[m][i];
                    ncoef_in_chunk[current_chunk] = row_start[m][i];
 
                    // update first_check and last_check
                    check_first = 0;
                    check_last = row_start[m][i];
                    j = check_last;
                  }
 
                  // add the coefficient
                  values_chunk[current_chunk][j] = values_from_proc(p, m)[l];
                  column_indices_chunk[current_chunk][j] =
                    column_indices_from_proc(p, m)[l];
                  ncoef_in_chunk[current_chunk]++;
                  row_start[m][i]++;
                  check_last++;
                  done = true;
                }
                else if (column_indices_chunk[current_chunk][j] ==
                         (int)column_indices_from_proc(p, m)[l])
                {
                  values_chunk[current_chunk][j] += values_from_proc(p, m)[l];
                  done = true;
                }
              }
            }
          }
        }
      }
 
      // delete recv data for this matrix
      for (unsigned p = 0; p < nproc; p++)
      {
        if (n_eqn_from_proc[p] > 0)
        {
          delete[] row_start_from_proc(p, m);
          delete[] column_indices_from_proc(p, m);
          delete[] values_from_proc(p, m);
        }
      }
 
      // next we take the chunk base storage of the column indices and values
      // and copy into a single contiguous block of memory
      // ====================================================================
      unsigned n_chunk = values_chunk.size();
      nnz[m] = 0;
      for (unsigned c = 0; c < n_chunk; c++)
      {
        nnz[m] += ncoef_in_chunk[c];
      }
      Parallel_sparse_assemble_previous_allocation = nnz[m];
 
      // allocate
      values[m] = new double[nnz[m]];
      column_indices[m] = new int[nnz[m]];
 
      // copy
      unsigned pt = 0;
      for (unsigned c = 0; c < n_chunk; c++)
      {
        unsigned nc = ncoef_in_chunk[c];
        for (unsigned i = 0; i < nc; i++)
        {
          values[m][pt + i] = values_chunk[c][i];
          column_indices[m][pt + i] = column_indices_chunk[c][i];
        }
        pt += nc;
        delete[] values_chunk[c];
        delete[] column_indices_chunk[c];
      }
 
      // the row_start vector currently contains the number of coefs in each
      // row. Update
      // ===================================================================
      unsigned g = row_start[m][0];
      row_start[m][0] = 0;
      for (unsigned i = 1; i < target_nrow_local; i++)
      {
        unsigned h = g + row_start[m][i];
        row_start[m][i] = g;
        g = h;
      }
      row_start[m][target_nrow_local] = g;
    }
 
    // next accumulate the residuals
    for (unsigned v = 0; v < n_vector; v++)
    {
      residuals[v] = new double[target_nrow_local];
      for (unsigned i = 0; i < target_nrow_local; i++)
      {
        residuals[v][i] = 0;
      }
      for (unsigned p = 0; p < nproc; p++)
      {
        if (n_eqn_from_proc[p] > 0)
        {
          unsigned n_eqn_p = n_eqn_from_proc[p];
          for (unsigned i = 0; i < n_eqn_p; i++)
          {
            residuals[v][eqns_from_proc[p][i]] += residuals_from_proc(p, v)[i];
          }
          delete[] residuals_from_proc(p, v);
        }
      }
    }
 
    // delete list of eqns from proc
    for (unsigned p = 0; p < nproc; p++)
    {
      if (n_eqn_from_proc[p] > 0)
      {
        delete[] eqns_from_proc[p];
      }
    }
 
    // and wait for sends to complete
    Vector<MPI_Status> send_nnz_stat(nproc - 1);
    MPI_Waitall(nproc - 1, &send_nnz_reqs[0], &send_nnz_stat[0]);
    for (unsigned p = 0; p < nproc; p++)
    {
      if (p != my_rank)
      {
        delete[] temp_send_storage[p];
      }
    }
    send_nnz_stat.clear();
    send_nnz_reqs.clear();
 
    // wait for the matrix data sends to complete and delete the data
    unsigned n_send_reqs = send_reqs.size();
    if (n_send_reqs > 0)
    {
      Vector<MPI_Status> send_stat(n_send_reqs);
      MPI_Waitall(n_send_reqs, &send_reqs[0], &send_stat[0]);
      for (unsigned p = 0; p < nproc; p++)
      {
        if (p != my_rank)
        {
          if (n_eqn_for_proc[p])
          {
            delete[] eqns_for_proc[p];
            for (unsigned m = 0; m < n_matrix; m++)
            {
              delete[] row_start_for_proc(p, m);
              delete[] column_indices_for_proc(p, m);
              delete[] values_for_proc(p, m);
            }
            for (unsigned v = 0; v < n_vector; v++)
            {
              delete[] residuals_for_proc(p, v);
            }
          }
        }
      }
    }
 
    // Doc?
    if (Doc_imbalance_in_parallel_assembly)
    {
      t_end = TimingHelpers::timer();
      t_local = t_end - t_start;
      t_max = 0.0;
      t_min = 0.0;
      t_sum = 0.0;
      MPI_Allreduce(&t_local,
                    &t_max,
                    1,
                    MPI_DOUBLE,
                    MPI_MAX,
                    this->communicator_pt()->mpi_comm());
      MPI_Allreduce(&t_local,
                    &t_min,
                    1,
                    MPI_DOUBLE,
                    MPI_MIN,
                    this->communicator_pt()->mpi_comm());
      MPI_Allreduce(&t_local,
                    &t_sum,
                    1,
                    MPI_DOUBLE,
                    MPI_SUM,
                    this->communicator_pt()->mpi_comm());
      double imbalance = (t_max - t_min) / (t_sum / double(nproc)) * 100.0;
      if (doing_residuals)
      {
        oomph_info << "CPU for residual distribut.  (loc/max/min/imbal): ";
      }
      else
      {
        oomph_info << "CPU for Jacobian distribut.  (loc/max/min/imbal): ";
      }
      oomph_info << t_local << " " << t_max << " " << t_min << " " << imbalance
                 << "%\n\n";
    }
  }
 
#endif
 
 
  //================================================================
  /// Get the full Jacobian by finite differencing
  //================================================================
  void Problem::get_fd_jacobian(DoubleVector& residuals,
                                DenseMatrix<double>& jacobian)
  {
#ifdef OOMPH_HAS_MPI
 
    if (Problem_has_been_distributed)
    {
      OomphLibWarning("This is unlikely to work with a distributed problem",
                      " Problem::get_fd_jacobian()",
                      OOMPH_EXCEPTION_LOCATION);
    }
#endif
 
 
    // Find number of dofs
    const unsigned long n_dof = ndof();
 
    // Advanced residuals
    DoubleVector residuals_pls;
 
    // Get reference residuals
    get_residuals(residuals);
 
    const double FD_step = 1.0e-8;
 
    // Make sure the Jacobian is the right size (since we don't care about
    // speed).
    jacobian.resize(n_dof, n_dof);
 
    // Loop over all dofs
    for (unsigned long jdof = 0; jdof < n_dof; jdof++)
    {
      double backup = *Dof_pt[jdof];
      *Dof_pt[jdof] += FD_step;
 
      // We're checking if the new values for Dof_pt[] actually
      // solve the entire problem --> update as if problem had
      // been solved
      actions_before_newton_solve();
      actions_before_newton_convergence_check();
      actions_after_newton_solve();
 
      // Get advanced residuals
      get_residuals(residuals_pls);
 
      for (unsigned long ieqn = 0; ieqn < n_dof; ieqn++)
      {
        jacobian(ieqn, jdof) =
          (residuals_pls[ieqn] - residuals[ieqn]) / FD_step;
      }
 
      *Dof_pt[jdof] = backup;
    }
 
    // Reset problem to state it was in
    actions_before_newton_solve();
    actions_before_newton_convergence_check();
    actions_after_newton_solve();
  }
 
  //======================================================================
  /// Get derivative of the residuals vector wrt a global parameter
  /// This is required in continuation problems
  //=======================================================================
  void Problem::get_derivative_wrt_global_parameter(double* const& parameter_pt,
                                                    DoubleVector& result)
  {
    // If we are doing the calculation analytically then call the appropriate
    // handler and then calling get_residuals
    if (is_dparameter_calculated_analytically(parameter_pt))
    {
      // Locally cache pointer to assembly handler
      AssemblyHandler* const old_assembly_handler_pt = Assembly_handler_pt;
      // Create a new assembly handler that replaces get_residuals by
      // get_dresiduals_dparameter for each element
      Assembly_handler_pt =
        new ParameterDerivativeHandler(old_assembly_handler_pt, parameter_pt);
      // Get the residuals, which will be dresiduals by dparameter
      this->get_residuals(result);
      // Delete the parameter derivative handler
      delete Assembly_handler_pt;
      // Reset the assembly handler to the original handler
      Assembly_handler_pt = old_assembly_handler_pt;
 
      /*AssemblyHandler* const assembly_handler_pt = Assembly_handler_pt;
      //Loop over all the elements
      unsigned long Element_pt_range = Mesh_pt->nelement();
      for(unsigned long e=0;e<Element_pt_range;e++)
       {
        //Get the pointer to the element
        GeneralisedElement* elem_pt = Mesh_pt->element_pt(e);
        //Find number of dofs in the element
        unsigned n_element_dofs = assembly_handler_pt->ndof(elem_pt);
        //Set up an array
        Vector<double> element_residuals(n_element_dofs);
        //Fill the array
        assembly_handler_pt->get_dresiduals_dparameter(elem_pt,parameter_pt,
                                                       element_residuals);
        //Now loop over the dofs and assign values to global Vector
        for(unsigned l=0;l<n_element_dofs;l++)
         {
          result[assembly_handler_pt->eqn_number(elem_pt,l)]
           += element_residuals[l];
          }
          }*/
 
      // for(unsigned n=0;n<n_dof;n++)
      // {std::cout << "BLA " << n << " " <<  result[n] << "\n";}
    }
    // Otherwise use the finite difference default
    else
    {
      // Get the (global) residuals and store in the result vector
      get_residuals(result);
 
      // Storage for the new residuals
      DoubleVector newres;
 
      // Increase the global parameter
      const double FD_step = 1.0e-8;
 
      // Store the current value of the parameter
      double param_value = *parameter_pt;
 
      // Increase the parameter
      *parameter_pt += FD_step;
 
      // Do any possible updates
      actions_after_change_in_global_parameter(parameter_pt);
 
      // Get the new residuals
      get_residuals(newres);
 
      // Find the number of local rows
      //(I think it's a global vector, so that should be fine)
      const unsigned ndof_local = result.nrow_local();
 
      // Do the finite differencing in the local variables
      for (unsigned n = 0; n < ndof_local; ++n)
      {
        result[n] = (newres[n] - result[n]) / FD_step;
      }
 
      // Reset the value of the parameter
      *parameter_pt = param_value;
 
      // Do any possible updates
      actions_after_change_in_global_parameter(parameter_pt);
    }
  }
 
 
  //======================================================================
  /// Return the product of the global hessian (derivative of Jacobian
  /// matrix  with respect to all variables) with
  /// an eigenvector, Y, and any number of other specified vectors C
  /// (d(J_{ij})/d u_{k}) Y_{j} C_{k}.
  /// This function is used in assembling and solving the augmented systems
  /// associated with bifurcation tracking.
  /// The default implementation is to use finite differences at the global
  /// level.
  //========================================================================
  void Problem::get_hessian_vector_products(
    DoubleVectorWithHaloEntries const& Y,
    Vector<DoubleVectorWithHaloEntries> const& C,
    Vector<DoubleVectorWithHaloEntries>& product)
  {
    // How many vector products must we construct
    const unsigned n_vec = C.size();
 
    // currently only global (non-distributed) distributions are allowed
    // LinearAlgebraDistribution* dist_pt = new
    // LinearAlgebraDistribution(Communicator_pt,n_dof,false);
 
    // Cache the assembly hander
    AssemblyHandler* const assembly_handler_pt = Assembly_handler_pt;
 
    // Rebuild the results vectors and initialise to zero
    // use the same distribution of the vector Y
    for (unsigned i = 0; i < n_vec; i++)
    {
      product[i].build(Y.distribution_pt(), 0.0);
      product[i].initialise(0.0);
    }
 
// Setup the halo schemes for the result
#ifdef OOMPH_HAS_MPI
    if (Problem_has_been_distributed)
    {
      for (unsigned i = 0; i < n_vec; i++)
      {
        product[i].build_halo_scheme(this->Halo_scheme_pt);
      }
    }
#endif
 
    // If we are doing the calculation analytically then call the appropriate
    // handler
    // A better way to do this is probably to hook into the get_residuals
    // framework but with a different member function of the assembly
    // handler
    if (this->are_hessian_products_calculated_analytically())
    {
      // Loop over all the elements
      unsigned long Element_pt_range = Mesh_pt->nelement();
      for (unsigned long e = 0; e < Element_pt_range; e++)
      {
        // Get the pointer to the element
        GeneralisedElement* elem_pt = Mesh_pt->element_pt(e);
// Do not loop over halo elements
#ifdef OOMPH_HAS_MPI
        if (!elem_pt->is_halo())
        {
#endif
          // Find number of dofs in the element
          unsigned n_var = assembly_handler_pt->ndof(elem_pt);
          // Set up a matrix for the input and output
          Vector<double> Y_local(n_var);
          DenseMatrix<double> C_local(n_vec, n_var);
          DenseMatrix<double> product_local(n_vec, n_var);
 
          // Translate the global input vectors into the local storage
          // Probably horribly inefficient, but otherwise things get really
          // messy at the elemental level
          for (unsigned l = 0; l < n_var; l++)
          {
            // Cache the global equation number
            const unsigned long eqn_number =
              assembly_handler_pt->eqn_number(elem_pt, l);
 
            Y_local[l] = Y.global_value(eqn_number);
            for (unsigned i = 0; i < n_vec; i++)
            {
              C_local(i, l) = C[i].global_value(eqn_number);
            }
          }
 
          // Fill the array
          assembly_handler_pt->get_hessian_vector_products(
            elem_pt, Y_local, C_local, product_local);
 
          // Assign the local results to the global vector
          for (unsigned l = 0; l < n_var; l++)
          {
            const unsigned long eqn_number =
              assembly_handler_pt->eqn_number(elem_pt, l);
 
            for (unsigned i = 0; i < n_vec; i++)
            {
              product[i].global_value(eqn_number) += product_local(i, l);
              // std::cout << "BLA " << e << " " << i << " "
              //          << l << " " << product_local(i,l) << "\n";
            }
          }
#ifdef OOMPH_HAS_MPI
        }
#endif
      }
    }
    // Otherwise calculate using finite differences by
    // perturbing the jacobian along a particular direction
    else
    {
      // Cache the finite difference step
      /// Alice: My bifurcation tracking converges better with this FD_step
      /// as 1.0e-5. The default value remains at 1.0e-8.
      const double FD_step = FD_step_used_in_get_hessian_vector_products;
 
      // We can now construct our multipliers
      const unsigned n_dof_local = this->Dof_distribution_pt->nrow_local();
      // Prepare to scale
      double dof_length = 0.0;
      Vector<double> C_length(n_vec, 0.0);
 
      for (unsigned n = 0; n < n_dof_local; n++)
      {
        if (std::fabs(this->dof(n)) > dof_length)
        {
          dof_length = std::fabs(this->dof(n));
        }
      }
 
      // C is assumed to have the same distribution as the dofs
      for (unsigned i = 0; i < n_vec; i++)
      {
        for (unsigned n = 0; n < n_dof_local; n++)
        {
          if (std::fabs(C[i][n]) > C_length[i])
          {
            C_length[i] = std::fabs(C[i][n]);
          }
        }
      }
 
      // Now broadcast the information, if distributed
#ifdef OOMPH_HAS_MPI
      if (Problem_has_been_distributed)
      {
        const unsigned n_length = n_vec + 1;
        double all_length[n_length];
        all_length[0] = dof_length;
        for (unsigned i = 0; i < n_vec; i++)
        {
          all_length[i + 1] = C_length[i];
        }
 
        // Do the MPI call
        double all_length_reduce[n_length];
        MPI_Allreduce(all_length,
                      all_length_reduce,
                      n_length,
                      MPI_DOUBLE,
                      MPI_MAX,
                      this->communicator_pt()->mpi_comm());
 
        // Read out the information
        dof_length = all_length_reduce[0];
        for (unsigned i = 0; i < n_vec; i++)
        {
          C_length[i] = all_length_reduce[i + 1];
        }
      }
#endif
 
      // Form the multipliers
      Vector<double> C_mult(n_vec, 0.0);
      for (unsigned i = 0; i < n_vec; i++)
      {
        C_mult[i] = dof_length / C_length[i];
        C_mult[i] += FD_step;
        C_mult[i] *= FD_step;
      }
 
 
      // Dummy vector to stand in the place of the residuals
      Vector<double> dummy_res;
 
      // Calculate the product of the jacobian matrices, etc by looping over the
      // elements
      const unsigned long n_element = this->mesh_pt()->nelement();
      for (unsigned long e = 0; e < n_element; e++)
      {
        GeneralisedElement* elem_pt = this->mesh_pt()->element_pt(e);
        // Ignore halo's of course
#ifdef OOMPH_HAS_MPI
        if (!elem_pt->is_halo())
        {
#endif
          // Loop over the ndofs in each element
          unsigned n_var = assembly_handler_pt->ndof(elem_pt);
          // Resize the dummy residuals vector
          dummy_res.resize(n_var);
          // Allocate storage for the unperturbed jacobian matrix
          DenseMatrix<double> jac(n_var);
          // Get unperturbed jacobian
          assembly_handler_pt->get_jacobian(elem_pt, dummy_res, jac);
 
          // Backup the dofs
          Vector<double> dof_bac(n_var);
          for (unsigned n = 0; n < n_var; n++)
          {
            unsigned eqn_number = assembly_handler_pt->eqn_number(elem_pt, n);
            dof_bac[n] = *this->global_dof_pt(eqn_number);
          }
 
          // Now loop over all vectors C
          for (unsigned i = 0; i < n_vec; i++)
          {
            // Perturb the dofs by the appropriate vector
            for (unsigned n = 0; n < n_var; n++)
            {
              unsigned eqn_number = assembly_handler_pt->eqn_number(elem_pt, n);
              // Perturb by vector C[i]
              *this->global_dof_pt(eqn_number) +=
                C_mult[i] * C[i].global_value(eqn_number);
            }
            actions_before_newton_convergence_check();
 
            // Allocate storage for the perturbed jacobian
            DenseMatrix<double> jac_C(n_var);
 
            // Now get the new jacobian
            assembly_handler_pt->get_jacobian(elem_pt, dummy_res, jac_C);
 
            // Reset the dofs
            for (unsigned n = 0; n < n_var; n++)
            {
              unsigned eqn_number = assembly_handler_pt->eqn_number(elem_pt, n);
              *this->global_dof_pt(eqn_number) = dof_bac[n];
            }
            actions_before_newton_convergence_check();
 
            // Now work out the products
            for (unsigned n = 0; n < n_var; n++)
            {
              unsigned eqn_number = assembly_handler_pt->eqn_number(elem_pt, n);
              double prod_c = 0.0;
              for (unsigned m = 0; m < n_var; m++)
              {
                unsigned unknown = assembly_handler_pt->eqn_number(elem_pt, m);
                prod_c += (jac_C(n, m) - jac(n, m)) * Y.global_value(unknown);
              }
              // std::cout << "FD   " << e << " " << i << " "
              //          << n << " " << prod_c/C_mult[i] << "\n";
              product[i].global_value(eqn_number) += prod_c / C_mult[i];
            }
          }
#ifdef OOMPH_HAS_MPI
        }
#endif
      } // End of loop over elements
    }
 
    // If we have a distributed problem then gather all
    // values
#ifdef OOMPH_HAS_MPI
    if (Problem_has_been_distributed)
    {
      // Sum all values if distributed
      for (unsigned i = 0; i < n_vec; i++)
      {
        product[i].sum_all_halo_and_haloed_values();
      }
    }
#endif
  }
 
 
  //==================================================================
  /// Solve the eigenproblem
  //==================================================================
  void Problem::solve_eigenproblem(const unsigned& n_eval,
                                   Vector<std::complex<double>>& alpha,
                                   Vector<double>& beta,
                                   Vector<DoubleVector>& eigenvector_real,
                                   Vector<DoubleVector>& eigenvector_imag,
                                   const bool& make_timesteppers_steady)
  {
    // If the boolean flag is steady, then make all the timesteppers steady
    // before solving the eigenproblem. This will "switch off" the
    // time-derivative terms in the jacobian matrix
    if (make_timesteppers_steady)
    {
      // Find out how many timesteppers there are
      const unsigned n_time_steppers = ntime_stepper();
 
      // Vector of bools to store the is_steady status of the various
      // timesteppers when we came in here
      std::vector<bool> was_steady(n_time_steppers);
 
      // Loop over them all and make them (temporarily) static
      for (unsigned i = 0; i < n_time_steppers; i++)
      {
        was_steady[i] = time_stepper_pt(i)->is_steady();
        time_stepper_pt(i)->make_steady();
      }
 
      const bool do_adjoint_problem = false;
      // Call the Eigenproblem for the eigensolver
      Eigen_solver_pt->solve_eigenproblem(this,
                                          n_eval,
                                          alpha,
                                          beta,
                                          eigenvector_real,
                                          eigenvector_imag,
                                          do_adjoint_problem);
 
      // Reset the is_steady status of all timesteppers that
      // weren't already steady when we came in here and reset their
      // weights
      for (unsigned i = 0; i < n_time_steppers; i++)
      {
        if (!was_steady[i])
        {
          time_stepper_pt(i)->undo_make_steady();
        }
      }
    }
    // Otherwise if we don't want to make the problem steady, just
    // assemble and solve the eigensystem
    else
    {
      const bool do_adjoint_problem = false;
      // Call the Eigenproblem for the eigensolver
      Eigen_solver_pt->solve_eigenproblem(this,
                                          n_eval,
                                          alpha,
                                          beta,
                                          eigenvector_real,
                                          eigenvector_imag,
                                          do_adjoint_problem);
    }
  }
 
 
  //==================================================================
  /// Solve the eigenproblem
  //==================================================================
  void Problem::solve_eigenproblem(const unsigned& n_eval,
                                   Vector<std::complex<double>>& eigenvalue,
                                   Vector<DoubleVector>& eigenvector_real,
                                   Vector<DoubleVector>& eigenvector_imag,
                                   const bool& make_timesteppers_steady)
  {
    // If the boolean flag is steady, then make all the timesteppers steady
    // before solving the eigenproblem. This will "switch off" the
    // time-derivative terms in the jacobian matrix
    if (make_timesteppers_steady)
    {
      // Find out how many timesteppers there are
      const unsigned n_time_steppers = ntime_stepper();
 
      // Vector of bools to store the is_steady status of the various
      // timesteppers when we came in here
      std::vector<bool> was_steady(n_time_steppers);
 
      // Loop over them all and make them (temporarily) static
      for (unsigned i = 0; i < n_time_steppers; i++)
      {
        was_steady[i] = time_stepper_pt(i)->is_steady();
        time_stepper_pt(i)->make_steady();
      }
 
      const bool do_adjoint_problem = false;
      // Call the Eigenproblem for the eigensolver
      Eigen_solver_pt->solve_eigenproblem(this,
                                          n_eval,
                                          eigenvalue,
                                          eigenvector_real,
                                          eigenvector_imag,
                                          do_adjoint_problem);
 
      // Reset the is_steady status of all timesteppers that
      // weren't already steady when we came in here and reset their
      // weights
      for (unsigned i = 0; i < n_time_steppers; i++)
      {
        if (!was_steady[i])
        {
          time_stepper_pt(i)->undo_make_steady();
        }
      }
    }
    // Otherwise if we don't want to make the problem steady, just
    // assemble and solve the eigensystem
    else
    {
      const bool do_adjoint_problem = false;
      // Call the Eigenproblem for the eigensolver
      Eigen_solver_pt->solve_eigenproblem(this,
                                          n_eval,
                                          eigenvalue,
                                          eigenvector_real,
                                          eigenvector_imag,
                                          do_adjoint_problem);
    }
  }
 
 
  //==================================================================
  /// Solve the adjoint eigenproblem
  //==================================================================
  void Problem::solve_adjoint_eigenproblem(
    const unsigned& n_eval,
    Vector<std::complex<double>>& eigenvalue,
    Vector<DoubleVector>& eigenvector_real,
    Vector<DoubleVector>& eigenvector_imag,
    const bool& make_timesteppers_steady)
  {
    // If the boolean flag is steady, then make all the timesteppers steady
    // before solving the eigenproblem. This will "switch off" the
    // time-derivative terms in the jacobian matrix
    if (make_timesteppers_steady)
    {
      // Find out how many timesteppers there are
      const unsigned n_time_steppers = ntime_stepper();
 
      // Vector of bools to store the is_steady status of the various
      // timesteppers when we came in here
      std::vector<bool> was_steady(n_time_steppers);
 
      // Loop over them all and make them (temporarily) static
      for (unsigned i = 0; i < n_time_steppers; i++)
      {
        was_steady[i] = time_stepper_pt(i)->is_steady();
        time_stepper_pt(i)->make_steady();
      }
 
      const bool do_adjoint_problem = true;
      // Call the Eigenproblem for the eigensolver
      Eigen_solver_pt->solve_eigenproblem(this,
                                          n_eval,
                                          eigenvalue,
                                          eigenvector_real,
                                          eigenvector_imag,
                                          do_adjoint_problem);
 
      // Reset the is_steady status of all timesteppers that
      // weren't already steady when we came in here and reset their
      // weights
      for (unsigned i = 0; i < n_time_steppers; i++)
      {
        if (!was_steady[i])
        {
          time_stepper_pt(i)->undo_make_steady();
        }
      }
    }
    // Otherwise if we don't want to make the problem steady, just
    // assemble and solve the eigensystem
    else
    {
      const bool do_adjoint_problem = true;
      // Call the Eigenproblem for the eigensolver
      Eigen_solver_pt->solve_eigenproblem(this,
                                          n_eval,
                                          eigenvalue,
                                          eigenvector_real,
                                          eigenvector_imag,
                                          do_adjoint_problem);
    }
  }
 
  //===================================================================
  /// Get the matrices required to solve an eigenproblem
  /// WARNING: temporarily this method only works with non-distributed
  /// matrices
  //===================================================================
  void Problem::get_eigenproblem_matrices(CRDoubleMatrix& mass_matrix,
                                          CRDoubleMatrix& main_matrix,
                                          const double& shift)
  {
    // Three different cases again here:
    // 1) Compiled with MPI, but run in serial
    // 2) Compiled with MPI, but MPI not initialised in driver
    // 3) Serial version
 
 
#ifdef PARANOID
    if (mass_matrix.distribution_built() && main_matrix.distribution_built())
    {
      // Check that the distribution of the mass matrix and jacobian match
      if (!(*mass_matrix.distribution_pt() == *main_matrix.distribution_pt()))
      {
        std::ostringstream error_stream;
        error_stream
          << "The distributions of the jacobian and mass matrix are\n"
          << "not the same and they must be.\n";
        throw OomphLibError(
          error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
      }
 
      if (mass_matrix.nrow() != this->ndof())
      {
        std::ostringstream error_stream;
        error_stream
          << "mass_matrix has a distribution, but the number of rows is not "
          << "equal to the number of degrees of freedom in the problem.";
        throw OomphLibError(
          error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
      }
 
      if (main_matrix.nrow() != this->ndof())
      {
        std::ostringstream error_stream;
        error_stream
          << "main_matrix has a distribution, but the number of rows is not "
          << "equal to the number of degrees of freedom in the problem.";
        throw OomphLibError(
          error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
      }
    }
    // If the distributions are not the same, then complain
    else if (main_matrix.distribution_built() !=
             mass_matrix.distribution_built())
    {
      std::ostringstream error_stream;
      error_stream << "The distribution of the jacobian and mass matrix must "
                   << "both be setup or both not setup";
      throw OomphLibError(
        error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
#endif
 
    // Store the old assembly handler
    AssemblyHandler* old_assembly_handler_pt = Assembly_handler_pt;
    // Now setup the eigenproblem handler, pass in the value of the shift
    Assembly_handler_pt = new EigenProblemHandler(shift);
 
    // Prepare the storage formats.
    Vector<int*> column_or_row_index(2);
    Vector<int*> row_or_column_start(2);
    Vector<double*> value(2);
    Vector<unsigned> nnz(2);
    // Allocate pointer to residuals, although not used in these problems
    Vector<double*> residuals_vectors(0);
 
    // determine the distribution for the jacobian (main matrix)
    // IF the jacobian has distribution setup then use that
    // ELSE determine the distribution based on the
    // distributed_matrix_distribution enum
    LinearAlgebraDistribution* dist_pt = 0;
    if (main_matrix.distribution_built())
    {
      dist_pt = new LinearAlgebraDistribution(main_matrix.distribution_pt());
    }
    else
    {
      create_new_linear_algebra_distribution(dist_pt);
    }
 
 
    // The matrix is in compressed row format
    bool compressed_row_flag = true;
 
#ifdef OOMPH_HAS_MPI
    //
    if (Communicator_pt->nproc() == 1)
    {
#endif
 
      sparse_assemble_row_or_column_compressed(column_or_row_index,
                                               row_or_column_start,
                                               value,
                                               nnz,
                                               residuals_vectors,
                                               compressed_row_flag);
 
      // The main matrix is the first entry
      main_matrix.build(dist_pt);
      main_matrix.build_without_copy(dist_pt->nrow(),
                                     nnz[0],
                                     value[0],
                                     column_or_row_index[0],
                                     row_or_column_start[0]);
      // The mass matrix is the second entry
      mass_matrix.build(dist_pt);
      mass_matrix.build_without_copy(dist_pt->nrow(),
                                     nnz[1],
                                     value[1],
                                     column_or_row_index[1],
                                     row_or_column_start[1]);
#ifdef OOMPH_HAS_MPI
    }
    else
    {
      if (dist_pt->distributed())
      {
        parallel_sparse_assemble(dist_pt,
                                 column_or_row_index,
                                 row_or_column_start,
                                 value,
                                 nnz,
                                 residuals_vectors);
        // The main matrix is the first entry
        main_matrix.build(dist_pt);
        main_matrix.build_without_copy(dist_pt->nrow(),
                                       nnz[0],
                                       value[0],
                                       column_or_row_index[0],
                                       row_or_column_start[0]);
        // The mass matrix is the second entry
        mass_matrix.build(dist_pt);
        mass_matrix.build_without_copy(dist_pt->nrow(),
                                       nnz[1],
                                       value[1],
                                       column_or_row_index[1],
                                       row_or_column_start[1]);
      }
      else
      {
        LinearAlgebraDistribution* temp_dist_pt =
          new LinearAlgebraDistribution(Communicator_pt, dist_pt->nrow(), true);
        parallel_sparse_assemble(temp_dist_pt,
                                 column_or_row_index,
                                 row_or_column_start,
                                 value,
                                 nnz,
                                 residuals_vectors);
        // The main matrix is the first entry
        main_matrix.build(temp_dist_pt);
        main_matrix.build_without_copy(dist_pt->nrow(),
                                       nnz[0],
                                       value[0],
                                       column_or_row_index[0],
                                       row_or_column_start[0]);
        main_matrix.redistribute(dist_pt);
        // The mass matrix is the second entry
        mass_matrix.build(temp_dist_pt);
        mass_matrix.build_without_copy(dist_pt->nrow(),
                                       nnz[1],
                                       value[1],
                                       column_or_row_index[1],
                                       row_or_column_start[1]);
        mass_matrix.redistribute(dist_pt);
        delete temp_dist_pt;
      }
    }
#endif
 
    // clean up dist_pt and residuals_vector pt
    delete dist_pt;
 
    // Delete the eigenproblem handler
    delete Assembly_handler_pt;
    // Reset the assembly handler to the original handler
    Assembly_handler_pt = old_assembly_handler_pt;
  }
 
 
  //=======================================================================
  /// Stored the current values of the dofs
  //=======================================================================
  void Problem::store_current_dof_values()
  {
    // If memory has not been allocated, then allocated memory for the saved
    // dofs
    if (Saved_dof_pt == 0)
    {
      Saved_dof_pt = new Vector<double>;
    }
 
#ifdef OOMPH_HAS_MPI
    // If the problem is distributed I have to do something different
    if (Problem_has_been_distributed)
    {
      // How many entries do we store locally?
      const unsigned n_row_local = Dof_distribution_pt->nrow_local();
 
      // Resize the vector
      Saved_dof_pt->resize(n_row_local);
 
      // Back 'em up
      for (unsigned i = 0; i < n_row_local; i++)
      {
        (*Saved_dof_pt)[i] = *(this->Dof_pt[i]);
      }
    }
    // Otherwise just store all the dofs
    else
#endif
    {
      // Find the number of dofs
      unsigned long n_dof = ndof();
 
      // Resize the vector
      Saved_dof_pt->resize(n_dof);
 
      // Transfer the values over
      for (unsigned long n = 0; n < n_dof; n++)
      {
        (*Saved_dof_pt)[n] = dof(n);
      }
    }
  }
 
  //====================================================================
  /// Restore the saved dofs
  //====================================================================
  void Problem::restore_dof_values()
  {
    // Check that we can do this
    if (Saved_dof_pt == 0)
    {
      throw OomphLibError(
        "There are no stored values, use store_current_dof_values()\n",
        OOMPH_CURRENT_FUNCTION,
        OOMPH_EXCEPTION_LOCATION);
    }
 
 
#ifdef OOMPH_HAS_MPI
    // If the problem is distributed I have to do something different
    if (Problem_has_been_distributed)
    {
      // How many entries do we store locally?
      const unsigned n_row_local = Dof_distribution_pt->nrow_local();
 
      if (Saved_dof_pt->size() != n_row_local)
      {
        throw OomphLibError("The number of stored values is not equal to the "
                            "current number of dofs\n",
                            OOMPH_CURRENT_FUNCTION,
                            OOMPH_EXCEPTION_LOCATION);
      }
 
      // Transfer the values over
      for (unsigned long n = 0; n < n_row_local; n++)
      {
        *(this->Dof_pt[n]) = (*Saved_dof_pt)[n];
      }
    }
    // Otherwise just restore all the dofs
    else
#endif
    {
      // Find the number of dofs
      unsigned long n_dof = ndof();
 
      if (Saved_dof_pt->size() != n_dof)
      {
        throw OomphLibError("The number of stored values is not equal to the "
                            "current number of dofs\n",
                            OOMPH_CURRENT_FUNCTION,
                            OOMPH_EXCEPTION_LOCATION);
      }
 
      // Transfer the values over
      for (unsigned long n = 0; n < n_dof; n++)
      {
        dof(n) = (*Saved_dof_pt)[n];
      }
    }
 
    // Delete the memory
    delete Saved_dof_pt;
    Saved_dof_pt = 0;
  }
 
  //======================================================================
  /// Assign the eigenvector passed to the function to the dofs
  //======================================================================
  void Problem::assign_eigenvector_to_dofs(DoubleVector& eigenvector)
  {
    unsigned long n_dof = ndof();
    // Check that the eigenvector has the correct size
    if (eigenvector.nrow() != n_dof)
    {
      std::ostringstream error_message;
      error_message << "Eigenvector has size " << eigenvector.nrow()
                    << ", not equal to the number of dofs in the problem,"
                    << n_dof << std::endl;
 
      throw OomphLibError(
        error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
 
    // Ensure that the eigenvector distribution matches the dof distribution
    // Copy vector
    DoubleVector eigenvector_dof = eigenvector;
    // Redistribute the copy to the dof distribution
    eigenvector_dof.redistribute(this->Dof_distribution_pt);
 
    // Loop over the dofs and assign the eigenvector
    for (unsigned long n = 0; n < eigenvector_dof.nrow_local(); n++)
    {
      dof(n) = eigenvector_dof[n];
    }
// Of course we now need to synchronise
#ifdef OOMPH_HAS_MPI
    this->synchronise_all_dofs();
#endif
  }
 
 
  //======================================================================
  /// Add the eigenvector passed to the function to the dofs with
  /// magnitude epsilon
  //======================================================================
  void Problem::add_eigenvector_to_dofs(const double& epsilon,
                                        const DoubleVector& eigenvector)
  {
    unsigned long n_dof = ndof();
    // Check that the eigenvector has the correct size
    if (eigenvector.nrow() != n_dof)
    {
      std::ostringstream error_message;
      error_message << "Eigenvector has size " << eigenvector.nrow()
                    << ", not equal to the number of dofs in the problem,"
                    << n_dof << std::endl;
 
      throw OomphLibError(
        error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
 
    // Ensure that the eigenvector distribution matches the dof distribution
    // Copy vector
    DoubleVector eigenvector_dof = eigenvector;
    // Redistribute the copy to the dof distribution
    eigenvector_dof.redistribute(this->Dof_distribution_pt);
 
 
    // Loop over the dofs and add the eigenvector
    // Only use local values
    for (unsigned long n = 0; n < eigenvector.nrow_local(); n++)
    {
      dof(n) += epsilon * eigenvector[n];
    }
// Of course we now need to synchronise
#ifdef OOMPH_HAS_MPI
    this->synchronise_all_dofs();
#endif
  }
 
 
  //================================================================
  /// General Newton solver. Requires only a convergence tolerance.
  /// The linear solver takes a pointer to the problem (which defines
  /// the Jacobian \b J and the residual Vector \b r) and returns
  /// the solution \b x of the system
  /// \f[ {\bf J} {\bf x} = - \bf{r} \f].
  //================================================================
  void Problem::newton_solve()
  {
    // Initialise timers
    double total_linear_solver_time = 0.0;
    double t_start = TimingHelpers::timer();
    Max_res.clear();
 
    // Find total number of dofs
    unsigned long n_dofs = ndof();
 
    // Set up the Vector to hold the solution
    DoubleVector dx;
 
    //-----Variables for the globally convergent Newton method------
 
    // Set up the vector to hold the gradient
    DoubleVector gradient;
 
    // Other variables
    double half_residual_squared = 0.0;
    double max_step = 0.0;
 
    //--------------------------------------------------------------
 
    // Set the counter
    unsigned count = 0;
    // Set the loop flag
    unsigned LOOP_FLAG = 1;
 
    if (Use_globally_convergent_newton_method)
    {
#ifdef OOMPH_HAS_MPI
      // Break if running in parallel
      if (MPI_Helpers::mpi_has_been_initialised())
      {
        std::ostringstream error_stream;
        error_stream << "Globally convergent Newton method has not been "
                     << "implemented in parallel yet!" << std::endl;
        throw OomphLibError(
          error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
      }
#endif
 
      // Get gradient
      Linear_solver_pt->enable_computation_of_gradient();
      // Reset the gradient (clear it), since the number of dofs and
      // hence the size of the DoubleVector might have changed
      Linear_solver_pt->reset_gradient();
    }
 
    // Update anything that needs updating
    actions_before_newton_solve();
 
    // Reset number of Newton iterations taken
    Nnewton_iter_taken = 0;
 
    // Now do the Newton loop
    do
    {
      count++;
 
      // Do any updates that are required
      actions_before_newton_step();
 
 
      // No degrees of freedom? What are you solving for?
      if (n_dofs == 0)
      {
        oomph_info << std::endl << std::endl << std::endl;
        oomph_info << "This is a bit bizarre: The problem has no dofs."
                   << std::endl;
        oomph_info
          << "I'll just return from the Newton solver without doing anything."
          << std::endl;
 
        // Do any updates that would have been performed
        actions_before_newton_convergence_check();
        actions_after_newton_step();
        actions_before_newton_convergence_check();
        actions_after_newton_solve();
 
        oomph_info << "I hope this is what you intended me to do..."
                   << std::endl;
        oomph_info << std::endl
                   << "Note: All actions_...() functions were called"
                   << std::endl;
        oomph_info << std::endl << "      before returning." << std::endl;
        oomph_info << std::endl << std::endl << std::endl;
        return;
      }
 
      // Calculate initial residuals
      if (count == 1)
      {
        // Is the problem nonlinear? If not ignore the pre-iteration
        // convergence check.
        if (Problem_is_nonlinear)
        {
#ifdef OOMPH_HAS_MPI
          // Synchronise the solution on different processors (on each submesh)
          this->synchronise_all_dofs();
#endif
 
          actions_before_newton_convergence_check();
          dx.clear();
          get_residuals(dx);
 
          // Get half of squared residual and find maximum step length
          // for step length control
          if (Use_globally_convergent_newton_method)
          {
            half_residual_squared = 0.0;
            double sum = 0.0;
            for (unsigned i = 0; i < n_dofs; i++)
            {
              sum += (*Dof_pt[i]) * (*Dof_pt[i]);
              half_residual_squared += dx[i] * dx[i];
            }
            half_residual_squared *= 0.5;
            max_step = 100.0 * std::max(sqrt(sum), double(n_dofs));
          }
 
          // Get maximum residuals
          double maxres = dx.max();
          Max_res.push_back(maxres);
 
          if (!Shut_up_in_newton_solve)
          {
            // Let's output the residuals
            // unsigned n_row_local = dx.distribution_pt()->nrow_local();
            // unsigned first_row = dx.distribution_pt()->first_row();
            // for(unsigned n=0;n<n_row_local;n++)
            //{
            // oomph_info << "residual: " << n + first_row << " " << dx[n] <<
            // "\n";
            //}
 
            oomph_info << "\nInitial Maximum residuals " << maxres << std::endl;
          }
 
          if ((maxres < Newton_solver_tolerance) &&
              (!Always_take_one_newton_step))
          {
            LOOP_FLAG = 0;
            continue;
          }
        }
        else
        {
          if (!Shut_up_in_newton_solve)
          {
            oomph_info
              << "Linear problem -- convergence in one iteration assumed."
              << std::endl;
          }
        }
      }
 
 
      // Increment number of Newton iterations taken
      Nnewton_iter_taken++;
 
      // Initialise timer for linear solver
      double t_solver_start = TimingHelpers::timer();
 
      // Now do the linear solve -- recycling Jacobian if requested
      if (Jacobian_reuse_is_enabled && Jacobian_has_been_computed)
      {
        if (!Shut_up_in_newton_solve)
        {
          oomph_info << "Not recomputing Jacobian! " << std::endl;
        }
 
        // If we're doing the first iteration and the problem is nonlinear,
        // the residuals have already been computed above during the
        // initial convergence check. Otherwise compute them here.
        if ((count != 1) || (!Problem_is_nonlinear)) get_residuals(dx);
 
        // Backup residuals
        DoubleVector resid(dx);
 
        // Resolve
        Linear_solver_pt->resolve(resid, dx);
      }
      else
      {
        if (Jacobian_reuse_is_enabled)
        {
          if (!Shut_up_in_newton_solve)
          {
            oomph_info << "Enabling resolve" << std::endl;
          }
          Linear_solver_pt->enable_resolve();
        }
        Linear_solver_pt->solve(this, dx);
        Jacobian_has_been_computed = true;
      }
 
      // End of linear solver
      double t_solver_end = TimingHelpers::timer();
      total_linear_solver_time += t_solver_end - t_solver_start;
 
      if (!Shut_up_in_newton_solve)
      {
        oomph_info << std::endl;
        oomph_info << "Time for linear solver (ndof=" << n_dofs << "): "
                   << TimingHelpers::convert_secs_to_formatted_string(
                        t_solver_end - t_solver_start)
                   << std::endl
                   << std::endl;
      }
 
      // Subtract the new values from the true dofs
      dx.redistribute(Dof_distribution_pt);
      double* dx_pt = dx.values_pt();
      unsigned ndof_local = Dof_distribution_pt->nrow_local();
 
      if (Use_globally_convergent_newton_method)
      {
        // Get the gradient
        Linear_solver_pt->get_gradient(gradient);
 
        for (unsigned i = 0; i < ndof_local; i++)
        {
          dx_pt[i] *= -1.0;
        }
 
        // Update with steplength control
        Vector<double> unknowns_old(ndof_local);
 
        for (unsigned i = 0; i < ndof_local; i++)
        {
          unknowns_old[i] = *Dof_pt[i];
        }
 
        double half_residual_squared_old = half_residual_squared;
        globally_convergent_line_search(unknowns_old,
                                        half_residual_squared_old,
                                        gradient,
                                        dx,
                                        half_residual_squared,
                                        max_step);
      }
      // direct Newton update
      else
      {
        for (unsigned l = 0; l < ndof_local; l++)
        {
          *Dof_pt[l] -= Relaxation_factor * dx_pt[l];
        }
      }
#ifdef OOMPH_HAS_MPI
      // Synchronise the solution on different processors (on each submesh)
      this->synchronise_all_dofs();
#endif
 
      // Do any updates that are required
      actions_after_newton_step();
      actions_before_newton_convergence_check();
 
      // Maximum residuals
      double maxres = 0.0;
      // If the user has declared that the Problem is linear
      // we ignore the convergence check
      if (Problem_is_nonlinear)
      {
        // Get the maximum residuals
        // maxres = std::fabs(*std::max_element(dx.begin(),dx.end(),
        //                                    AbsCmp<double>()));
        // oomph_info << "Maxres correction " << maxres << "\n";
 
        // Calculate the new residuals
        dx.clear();
        get_residuals(dx);
 
        // Get the maximum residuals
        maxres = dx.max();
        Max_res.push_back(maxres);
 
        if (!Shut_up_in_newton_solve)
        {
          oomph_info << "Newton Step " << count << ": Maximum residuals "
                     << maxres << std::endl
                     << std::endl;
        }
      }
 
      // If we have converged jump straight to the test at the end of the loop
      if (maxres < Newton_solver_tolerance)
      {
        LOOP_FLAG = 0;
        continue;
      }
 
      // This section will not be reached if we have converged already
      // If the maximum number of residuals is too high or the maximum number
      // of iterations has been reached
      if ((maxres > Max_residuals) || (count == Max_newton_iterations))
      {
        // Print a warning -- regardless of what the throw does
        if (maxres > Max_residuals)
        {
          oomph_info << "Max. residual (" << Max_residuals
                     << ") has been exceeded in Newton solver." << std::endl;
        }
        if (count == Max_newton_iterations)
        {
          oomph_info << "Reached max. number of iterations ("
                     << Max_newton_iterations << ") in Newton solver."
                     << std::endl;
        }
        // Now throw...
        throw NewtonSolverError(count, maxres);
      }
 
    } while (LOOP_FLAG);
 
    // Now update anything that needs updating
    actions_after_newton_solve();
 
    // Finalise/doc timings
    if (!Shut_up_in_newton_solve)
    {
      oomph_info << std::endl;
      oomph_info << "Total time for linear solver (ndof=" << n_dofs << "): "
                 << TimingHelpers::convert_secs_to_formatted_string(
                      total_linear_solver_time)
                 << std::endl;
    }
 
    double t_end = TimingHelpers::timer();
    double total_time = t_end - t_start;
 
    if (!Shut_up_in_newton_solve)
    {
      oomph_info << "Total time for Newton solver (ndof=" << n_dofs << "): "
                 << TimingHelpers::convert_secs_to_formatted_string(total_time)
                 << std::endl;
    }
    if (total_time > 0.0)
    {
      if (!Shut_up_in_newton_solve)
      {
        oomph_info << "Time outside linear solver        : "
                   << (total_time - total_linear_solver_time) / total_time *
                        100.0
                   << " %" << std::endl;
      }
    }
    else
    {
      if (!Shut_up_in_newton_solve)
      {
        oomph_info << "Time outside linear solver        : "
                   << "[too fast]" << std::endl;
      }
    }
    if (!Shut_up_in_newton_solve) oomph_info << std::endl;
  }
 
  //========================================================================
  /// Helper function for the globally convergent Newton solver
  //========================================================================
  void Problem::globally_convergent_line_search(
    const Vector<double>& x_old,
    const double& half_residual_squared_old,
    DoubleVector& gradient,
    DoubleVector& newton_dir,
    double& half_residual_squared,
    const double& stpmax)
  {
    const double min_fct_decrease = 1.0e-4;
    double convergence_tol_on_x = 1.0e-16;
    double f_aux = 0.0;
    double lambda_aux = 0.0;
    double proposed_lambda;
    unsigned long n_dof = ndof();
    double sum = 0.0;
    for (unsigned i = 0; i < n_dof; i++)
    {
      sum += newton_dir[i] * newton_dir[i];
    }
    sum = sqrt(sum);
    if (sum > stpmax)
    {
      for (unsigned i = 0; i < n_dof; i++)
      {
        newton_dir[i] *= stpmax / sum;
      }
    }
    double slope = 0.0;
    for (unsigned i = 0; i < n_dof; i++)
    {
      slope += gradient[i] * newton_dir[i];
    }
    if (slope >= 0.0)
    {
      std::ostringstream warn_message;
      warn_message << "WARNING: Non-negative slope, probably due to a "
                   << " roundoff \nproblem in the linesearch: slope=" << slope
                   << "\n";
      OomphLibWarning(warn_message.str(),
                      "Problem::globally_convergent_line_search()",
                      OOMPH_EXCEPTION_LOCATION);
    }
    double test = 0.0;
    for (unsigned i = 0; i < n_dof; i++)
    {
      double temp =
        std::fabs(newton_dir[i]) / std::max(std::fabs(x_old[i]), 1.0);
      if (temp > test) test = temp;
    }
    double lambda_min = convergence_tol_on_x / test;
    double lambda = 1.0;
    while (true)
    {
      for (unsigned i = 0; i < n_dof; i++)
      {
        *Dof_pt[i] = x_old[i] + lambda * newton_dir[i];
      }
 
      // Evaluate current residuals
      DoubleVector residuals;
      get_residuals(residuals);
      half_residual_squared = 0.0;
      for (unsigned i = 0; i < n_dof; i++)
      {
        half_residual_squared += residuals[i] * residuals[i];
      }
      half_residual_squared *= 0.5;
 
      if (lambda < lambda_min)
      {
        for (unsigned i = 0; i < n_dof; i++) *Dof_pt[i] = x_old[i];
 
        std::ostringstream warn_message;
        warn_message << "WARNING: Line search converged on x only!\n";
        OomphLibWarning(warn_message.str(),
                        "Problem::globally_convergent_line_search()",
                        OOMPH_EXCEPTION_LOCATION);
        return;
      }
      else if (half_residual_squared <=
               half_residual_squared_old + min_fct_decrease * lambda * slope)
      {
        oomph_info << "Returning from linesearch with lambda=" << lambda
                   << std::endl;
        return;
      }
      else
      {
        if (lambda == 1.0)
        {
          proposed_lambda =
            -slope /
            (2.0 * (half_residual_squared - half_residual_squared_old - slope));
        }
        else
        {
          double r1 =
            half_residual_squared - half_residual_squared_old - lambda * slope;
          double r2 = f_aux - half_residual_squared_old - lambda_aux * slope;
          double a_poly =
            (r1 / (lambda * lambda) - r2 / (lambda_aux * lambda_aux)) /
            (lambda - lambda_aux);
          double b_poly = (-lambda_aux * r1 / (lambda * lambda) +
                           lambda * r2 / (lambda_aux * lambda_aux)) /
                          (lambda - lambda_aux);
          if (a_poly == 0.0)
          {
            proposed_lambda = -slope / (2.0 * b_poly);
          }
          else
          {
            double discriminant = b_poly * b_poly - 3.0 * a_poly * slope;
            if (discriminant < 0.0)
            {
              proposed_lambda = 0.5 * lambda;
            }
            else if (b_poly <= 0.0)
            {
              proposed_lambda = (-b_poly + sqrt(discriminant)) / (3.0 * a_poly);
            }
            else
            {
              proposed_lambda = -slope / (b_poly + sqrt(discriminant));
            }
          }
          if (proposed_lambda > 0.5 * lambda)
          {
            proposed_lambda = 0.5 * lambda;
          }
        }
      }
      lambda_aux = lambda;
      f_aux = half_residual_squared;
      lambda = std::max(proposed_lambda, 0.1 * lambda);
    }
  }
 
 
  //========================================================================
  /// Solve a steady problem, in the context of an overall unsteady problem.
  /// This is achieved by setting the weights in the timesteppers to be zero
  /// which has the effect of rendering them steady timesteppers
  /// The optional argument max_adapt specifies the max. number of
  /// adaptations of all refineable submeshes are performed to
  /// achieve the the error targets specified in the refineable submeshes.
  //========================================================================
  void Problem::steady_newton_solve(unsigned const& max_adapt)
  {
    // Find out how many timesteppers there are
    unsigned n_time_steppers = ntime_stepper();
 
    // Vector of bools to store the is_steady status of the various
    // timesteppers when we came in here
    std::vector<bool> was_steady(n_time_steppers);
 
    // Loop over them all and make them (temporarily) static
    for (unsigned i = 0; i < n_time_steppers; i++)
    {
      was_steady[i] = time_stepper_pt(i)->is_steady();
      time_stepper_pt(i)->make_steady();
    }
 
    try
    {
      // Solve the non-linear problem with Newton's method
      if (max_adapt == 0)
      {
        newton_solve();
      }
      else
      {
        newton_solve(max_adapt);
      }
    }
    // Catch any exceptions thrown in the Newton solver
    catch (NewtonSolverError& error)
    {
      oomph_info << std::endl
                 << "USER-DEFINED ERROR IN NEWTON SOLVER " << std::endl;
      // Check whether it's the linear solver
      if (error.linear_solver_error())
      {
        oomph_info << "ERROR IN THE LINEAR SOLVER" << std::endl;
      }
      // Check to see whether we have reached Max_iterations
      else if (error.iterations() == Max_newton_iterations)
      {
        oomph_info << "MAXIMUM NUMBER OF ITERATIONS (" << error.iterations()
                   << ") REACHED WITHOUT CONVERGENCE " << std::endl;
      }
      // If not, it must be that we have exceeded the maximum residuals
      else
      {
        oomph_info << "MAXIMUM RESIDUALS: " << error.maxres()
                   << " EXCEEDS PREDEFINED MAXIMUM " << Max_residuals
                   << std::endl;
      }
 
      // Die horribly!!
      std::ostringstream error_stream;
      error_stream << "Error occured in Newton solver. " << std::endl;
      throw OomphLibError(
        error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
 
 
    // Reset the is_steady status of all timesteppers that
    // weren't already steady when we came in here and reset their
    // weights
    for (unsigned i = 0; i < n_time_steppers; i++)
    {
      if (!was_steady[i])
      {
        time_stepper_pt(i)->undo_make_steady();
      }
    }
 
    // Since we performed a steady solve, the history values
    // now have to be set as if we had performed an impulsive start from
    // the current solution. This ensures that the time-derivatives
    // evaluate to zero even now that the timesteppers have been
    // reactivated.
    assign_initial_values_impulsive();
  }
 
  //===========================================================================
  /// Perform a basic continuation step using Newton's method. The governing
  /// parameter of the problem is passed as a pointer to the routine. The
  /// number of Newton steps taken is returned
  //==========================================================================
  unsigned Problem::newton_solve_continuation(double* const& parameter_pt)
  {
    // Set up memory for z
    // unsigned long n_dofs = ndof();
    // LinearAlgebraDistribution dist(Communicator_pt,n_dofs,false);
    // DoubleVector z(&dist,0.0);
    DoubleVector z;
    // Call the solver
    return newton_solve_continuation(parameter_pt, z);
  }
 
 
  //===================================================================
  /// This function performs a basic continuation step using the Newton method.
  /// The number of Newton steps taken is returned, to be used in any
  /// external step-size control routines.
  /// The governing parameter of the problem is passed as a pointer to the
  /// routine, as is the sign of the Jacobian and a Vector in which
  /// to store the derivatives wrt the parameter, if required.
  //==================================================================
  unsigned Problem::newton_solve_continuation(double* const& parameter_pt,
                                              DoubleVector& z)
  {
    // Find the total number of dofs
    // unsigned long n_dofs = ndof();
 
    // Find the local number of dofs
    unsigned ndof_local = Dof_distribution_pt->nrow_local();
 
    // create the distribution (not distributed)
    // LinearAlgebraDistribution dist(this->communicator_pt(),n_dofs,false);
 
    // Assign memory for solutions of the equations
    // DoubleVector y(&dist,0.0);
    DoubleVector y;
 
    // Assign memory for the dot products of the uderivatives and y and z
    double uderiv_dot_y = 0.0, uderiv_dot_z = 0.0;
    // Set and initialise the counter
    unsigned count = 0;
    // Set the loop flag
    unsigned LOOP_FLAG = 1;
 
    // Update anything that needs updating
    actions_before_newton_solve();
 
    // Check the arc-length constraint
    double arc_length_constraint_residual = 0.0;
 
    // Are we storing the matrix in the linear solve
    bool enable_resolve = Linear_solver_pt->is_resolve_enabled();
 
    // For this problem, we must store the residuals
    Linear_solver_pt->enable_resolve();
 
    // Now do the Newton loop
    do
    {
      count++;
 
      // Do any updates that are required
      actions_before_newton_step();
 
      // Calculate initial residuals
      if (count == 1)
      {
#ifdef OOMPH_HAS_MPI
        // Synchronise the solution on different processors (on each submesh)
        this->synchronise_all_dofs();
#endif
 
        actions_before_newton_convergence_check();
        y.clear();
        get_residuals(y);
        // Get maximum residuals, using our own abscmp function
        double maxres = y.max();
 
        // Assemble the residuals for the arc-length step
        arc_length_constraint_residual = 0.0;
        // Add the variables
        for (unsigned long l = 0; l < ndof_local; l++)
        {
          arc_length_constraint_residual +=
            dof_derivative(l) * (*Dof_pt[l] - dof_current(l));
        }
 
        // Now reduce if we have been distributed
#ifdef OOMPH_HAS_MPI
        double arc_length_cons_res2 = arc_length_constraint_residual;
        if ((Dof_distribution_pt->distributed()) &&
            (Dof_distribution_pt->communicator_pt()->nproc() > 1))
        {
          MPI_Allreduce(&arc_length_constraint_residual,
                        &arc_length_cons_res2,
                        1,
                        MPI_DOUBLE,
                        MPI_SUM,
                        Dof_distribution_pt->communicator_pt()->mpi_comm());
        }
        arc_length_constraint_residual = arc_length_cons_res2;
#endif
 
        arc_length_constraint_residual *= Theta_squared;
        arc_length_constraint_residual +=
          Parameter_derivative * (*parameter_pt - Parameter_current) -
          Ds_current;
 
        // Is it the max
        if (std::fabs(arc_length_constraint_residual) > maxres)
        {
          maxres = std::fabs(arc_length_constraint_residual);
        }
 
        // Find the max
        if (!Shut_up_in_newton_solve)
        {
          oomph_info << "Initial Maximum residuals " << maxres << std::endl;
        }
 
        // If we are below the Tolerance, then return immediately
        if ((maxres < Newton_solver_tolerance) &&
            (!Always_take_one_newton_step))
        {
          LOOP_FLAG = 0;
          count = 0;
          continue;
        }
      }
 
      // If it's the block hopf solver we need to solve for both rhs's
      // simultaneously. This is because the block decomposition involves
      // solves with two different matrices and storing both at once to
      // allow general resolves would be more expensive than necessary.
      if (dynamic_cast<BlockHopfLinearSolver*>(Linear_solver_pt))
      {
        // Get the vector dresiduals/dparameter
        z.clear();
        get_derivative_wrt_global_parameter(parameter_pt, z);
 
        // Copy rhs vector into local storage so it doesn't get overwritten
        // if the linear solver decides to initialise the solution vector, say,
        // which it's quite entitled to do!
        DoubleVector input_z(z);
 
        // Solve the system for the two right-hand sides.
        dynamic_cast<BlockHopfLinearSolver*>(Linear_solver_pt)
          ->solve_for_two_rhs(this, y, input_z, z);
      }
      // Otherwise
      else
      {
        // Solve the standard problem
        Linear_solver_pt->solve(this, y);
 
        // Get the vector dresiduals/dparameter
        z.clear();
        get_derivative_wrt_global_parameter(parameter_pt, z);
 
        // Copy rhs vector into local storage so it doesn't get overwritten
        // if the linear solver decides to initialise the solution vector, say,
        // which it's quite entitled to do!
        DoubleVector input_z(z);
 
        // Redistribute the RHS to match the linear solver
        // input_z.redistribute(Linear_solver_pt->distribution_pt());
        // Do not clear z because we assume that it has dR/dparam
        z.clear();
        // Now resolve the system with the new RHS
        Linear_solver_pt->resolve(input_z, z);
      }
 
      // Redistribute the results into the natural distribution
      y.redistribute(Dof_distribution_pt);
      z.redistribute(Dof_distribution_pt);
 
      // Now we need to calculate dparam, for which we must calculate the
      // dot product of the derivatives and y and z
      // Reset these values to zero
      uderiv_dot_y = 0.0;
      uderiv_dot_z = 0.0;
      // Now calculate the dot products of the derivative and the solutions
      // of the linear system
      // Cache pointers to the data in the distributed vectors
      double* const y_pt = y.values_pt();
      double* const z_pt = z.values_pt();
      for (unsigned long l = 0; l < ndof_local; l++)
      {
        uderiv_dot_y += dof_derivative(l) * y_pt[l];
        uderiv_dot_z += dof_derivative(l) * z_pt[l];
      }
 
      // Now reduce if we have been distributed
#ifdef OOMPH_HAS_MPI
      // Create send and receive arrays of size two
      double uderiv_dot[2];
      double uderiv_dot2[2];
      uderiv_dot[0] = uderiv_dot_y;
      uderiv_dot[1] = uderiv_dot_z;
      uderiv_dot2[0] = uderiv_dot_y;
      uderiv_dot2[1] = uderiv_dot_z;
      // Now reduce both together
      if ((Dof_distribution_pt->distributed()) &&
          (Dof_distribution_pt->communicator_pt()->nproc() > 1))
      {
        MPI_Allreduce(uderiv_dot,
                      uderiv_dot2,
                      2,
                      MPI_DOUBLE,
                      MPI_SUM,
                      Dof_distribution_pt->communicator_pt()->mpi_comm());
      }
      uderiv_dot_y = uderiv_dot2[0];
      uderiv_dot_z = uderiv_dot2[1];
#endif
 
      // Now scale the results
      uderiv_dot_y *= Theta_squared;
      uderiv_dot_z *= Theta_squared;
 
      // The set the change in the parameter, given by the pseudo-arclength
      // equation. Note that here we are assuming that the arc-length
      // equation is always exactly zero,
      // which seems to work OK, and saves on some storage.
      // In fact, it's more subtle than that. If we include this
      // proper residual then we will have to solve the eigenproblem.
      // This will make the solver more robust and *should* be done
      // ... at some point.
      double dparam = (arc_length_constraint_residual - uderiv_dot_y) /
                      (Parameter_derivative - uderiv_dot_z);
 
      // Set the new value of the parameter
      *parameter_pt -= dparam;
 
      // Update the values of the other degrees of freedom
      for (unsigned long l = 0; l < ndof_local; l++)
      {
        *Dof_pt[l] -= y_pt[l] - dparam * z_pt[l];
      }
 
      // Calculate the new residuals
#ifdef OOMPH_HAS_MPI
      // Synchronise the solution on different processors (on each submesh)
      this->synchronise_all_dofs();
#endif
 
      // Do any updates that are required
      actions_after_newton_step();
      actions_before_newton_convergence_check();
 
      y.clear();
      get_residuals(y);
 
      // Get the maximum residuals
      double maxres = y.max();
 
      // Assemble the residuals for the arc-length step
      arc_length_constraint_residual = 0.0;
      // Add the variables
      for (unsigned long l = 0; l < ndof_local; l++)
      {
        arc_length_constraint_residual +=
          dof_derivative(l) * (*Dof_pt[l] - dof_current(l));
      }
 
      // Now reduce if we have been distributed
#ifdef OOMPH_HAS_MPI
      double arc_length_cons_res2 = arc_length_constraint_residual;
      if ((Dof_distribution_pt->distributed()) &&
          (Dof_distribution_pt->communicator_pt()->nproc() > 1))
      {
        MPI_Allreduce(&arc_length_constraint_residual,
                      &arc_length_cons_res2,
                      1,
                      MPI_DOUBLE,
                      MPI_SUM,
                      Dof_distribution_pt->communicator_pt()->mpi_comm());
      }
      arc_length_constraint_residual = arc_length_cons_res2;
#endif
 
      arc_length_constraint_residual *= Theta_squared;
      arc_length_constraint_residual +=
        Parameter_derivative * (*parameter_pt - Parameter_current) - Ds_current;
 
      // Is it the max
      if (std::fabs(arc_length_constraint_residual) > maxres)
      {
        maxres = std::fabs(arc_length_constraint_residual);
      }
 
      if (!Shut_up_in_newton_solve)
      {
        oomph_info << "Continuation Step " << count << ":  Maximum residuals "
                   << maxres << "\n";
      }
 
      // If we have converged jump straight to the test at the end of the loop
      if (maxres < Newton_solver_tolerance)
      {
        LOOP_FLAG = 0;
        continue;
      }
 
      // This section will not be reached if we have converged already
      // If the maximum number of residuals is too high or the maximum number
      // of iterations has been reached
      if ((maxres > Max_residuals) || (count == Max_newton_iterations))
      {
        throw NewtonSolverError(count, maxres);
      }
 
    } while (LOOP_FLAG);
 
    // Now update anything that needs updating
    actions_after_newton_solve();
 
    // Reset the storage of the matrix on the linear solver to what it was
    // on entry to this routine
    if (enable_resolve)
    {
      Linear_solver_pt->enable_resolve();
    }
    else
    {
      Linear_solver_pt->disable_resolve();
    }
 
    // Return the number of Newton Steps taken
    return count;
  }
 
  //=========================================================================
  /// A function to calculate the derivatives wrt the arc-length. This version
  /// of the function actually does a linear solve so that the derivatives
  /// are calculated "exactly" rather than using the values at the Newton
  /// step just before convergence. This is only necessary in spatially adaptive
  /// problems, in which the number of degrees of freedom changes and so
  /// the appropriate derivatives must be calculated for the new variables.
  //=========================================================================
  void Problem::calculate_continuation_derivatives(double* const& parameter_pt)
  {
    // Find the number of degrees of freedom in the problem
    const unsigned long n_dofs = ndof();
 
    // create a non-distributed z vector
    LinearAlgebraDistribution dist(Communicator_pt, n_dofs, false);
 
    // Assign memory for solutions of the equations
    DoubleVector z(&dist, 0.0);
 
    // If it's the block hopf solver need to solve for both RHS
    // at once, but this would all be alleviated if we have the solve
    // for the non-residuals RHS.
    if (dynamic_cast<BlockHopfLinearSolver*>(Linear_solver_pt))
    {
      // Get the vector dresiduals/dparameter
      get_derivative_wrt_global_parameter(parameter_pt, z);
 
      // Copy rhs vector into local storage so it doesn't get overwritten
      // if the linear solver decides to initialise the solution vector, say,
      // which it's quite entitled to do!
      DoubleVector dummy(&dist, 0.0), input_z(z);
 
      // Solve for the two RHSs
      dynamic_cast<BlockHopfLinearSolver*>(Linear_solver_pt)
        ->solve_for_two_rhs(this, dummy, input_z, z);
    }
    // Otherwise we can use the normal resolve
    else
    {
      // Save the status before entry to this routine
      bool enable_resolve = Linear_solver_pt->is_resolve_enabled();
 
      // We need to do resolves
      Linear_solver_pt->enable_resolve();
 
      // Solve the standard problem, we only want to make sure that
      // we factorise the matrix, if it has not been factorised. We shall
      // ignore the return value of z.
      Linear_solver_pt->solve(this, z);
 
      // Get the vector dresiduals/dparameter
      get_derivative_wrt_global_parameter(parameter_pt, z);
 
 
      // Copy rhs vector into local storage so it doesn't get overwritten
      // if the linear solver decides to initialise the solution vector, say,
      // which it's quite entitled to do!
      DoubleVector input_z(z);
 
      // Now resolve the system with the new RHS and overwrite the solution
      Linear_solver_pt->resolve(input_z, z);
 
      // Restore the storage status of the linear solver
      if (enable_resolve)
      {
        Linear_solver_pt->enable_resolve();
      }
      else
      {
        Linear_solver_pt->disable_resolve();
      }
    }
 
    // Now, we can calculate the derivatives, etc
    calculate_continuation_derivatives(z);
  }
 
  //=======================================================================
  /// A function to calculate the derivatives with respect to the arc-length
  /// required for continuation. The arguments is the solution of the
  /// linear system,
  /// Jz = dR/dparameter, that gives du/dparameter and the direction
  /// output from the newton_solve_continuation function. The derivatives
  /// are stored in the ContinuationParameters namespace.
  //===================================================================
  void Problem::calculate_continuation_derivatives(const DoubleVector& z)
  {
    // Calculate the continuation derivatives
    calculate_continuation_derivatives_helper(z);
 
    // Scale the value of theta if the control flag is set
    if (Scale_arc_length)
    {
      // Don't divide by zero!
      if (Parameter_derivative != 1.0)
      {
        Theta_squared *= (Parameter_derivative * Parameter_derivative /
                          Desired_proportion_of_arc_length) *
                         ((1.0 - Desired_proportion_of_arc_length) /
                          (1.0 - Parameter_derivative * Parameter_derivative));
 
        // Recalculate the continuation derivatives with the new scaled values
        calculate_continuation_derivatives_helper(z);
      }
    }
  }
 
  //=======================================================================
  /// A function to calculate the derivatives with respect to the arc-length
  /// required for continuation using finite differences.
  //===================================================================
  void Problem::calculate_continuation_derivatives_fd(
    double* const& parameter_pt)
  {
    // Calculate the continuation derivatives
    calculate_continuation_derivatives_fd_helper(parameter_pt);
 
    // Scale the value of theta if the control flag is set
    if (Scale_arc_length)
    {
      // Don't divide by zero!
      if (Parameter_derivative != 1.0)
      {
        Theta_squared *= (Parameter_derivative * Parameter_derivative /
                          Desired_proportion_of_arc_length) *
                         ((1.0 - Desired_proportion_of_arc_length) /
                          (1.0 - Parameter_derivative * Parameter_derivative));
 
        // Recalculate the continuation derivatives with the new scaled values
        calculate_continuation_derivatives_fd_helper(parameter_pt);
      }
    }
  }
 
  //======================================================================
  /// Function that returns a boolean flag to indicate whether the pointer
  /// parameter_pt refers to memory that is a value in a Data object used
  /// within the problem
  //======================================================================
  bool Problem::does_pointer_correspond_to_problem_data(
    double* const& parameter_pt)
  {
    // Firstly check the global data
    const unsigned n_global = Global_data_pt.size();
    for (unsigned i = 0; i < n_global; ++i)
    {
      // If we find it then return true
      if (Global_data_pt[i]->does_pointer_correspond_to_value(parameter_pt))
      {
        return true;
      }
    }
 
    // If we find the pointer in the mesh data return true
    if (Mesh_pt->does_pointer_correspond_to_mesh_data(parameter_pt))
    {
      return true;
    }
 
    // Loop over the submeshes to handle the case of spine data
    const unsigned n_sub_mesh = this->nsub_mesh();
    // If there is only one mesh
    if (n_sub_mesh == 0)
    {
      if (SpineMesh* const spine_mesh_pt = dynamic_cast<SpineMesh*>(Mesh_pt))
      {
        if (spine_mesh_pt->does_pointer_correspond_to_spine_data(parameter_pt))
        {
          return true;
        }
      }
    }
    // Otherwise loop over the sub meshes
    else
    {
      // Assign global equation numbers first
      for (unsigned i = 0; i < n_sub_mesh; i++)
      {
        if (SpineMesh* const spine_mesh_pt =
              dynamic_cast<SpineMesh*>(Sub_mesh_pt[i]))
        {
          if (spine_mesh_pt->does_pointer_correspond_to_spine_data(
                parameter_pt))
          {
            return true;
          }
        }
      }
    }
 
    // If we have got here then the data is not stored in the problem, so return
    // false
    return false;
  }
 
 
  //=======================================================================
  /// A private helper function to
  /// calculate the derivatives with respect to the arc-length
  /// required for continuation. The arguments is the solution of the
  /// linear system,
  /// Jz = dR/dparameter, that gives du/dparameter and the direction
  /// output from the newton_solve_continuation function. The derivatives
  /// are stored in the ContinuationParameters namespace.
  //===================================================================
  void Problem::calculate_continuation_derivatives_helper(const DoubleVector& z)
  {
    // Find the number of degrees of freedom in the problem
    // unsigned long n_dofs = ndof();
    // Find the number of local dofs in the problem
    const unsigned long ndof_local = Dof_distribution_pt->nrow_local();
 
    // Work out the continuation direction
    // The idea is that (du/ds)_{old} . (du/ds)_{new} >= 0
    // if the direction is to remain the same.
    // du/ds_{new} = [dlambda/ds; du/ds] = [dlambda/ds ; - dlambda/ds z]
    // so (du/ds)_{new} . (du/ds)_{old}
    // = dlambda/ds [1 ; - z] . [ Parameter_derivative ; Dof_derivatives]
    // = dlambda/ds (Parameter_derivative - Dof_derivative . z)
 
    // Create a local copy of z that can be redistributed without breaking
    // the constness of z
    DoubleVector local_z(z);
 
    // Redistribute z so that it has the (natural) dof distribution
    local_z.redistribute(Dof_distribution_pt);
 
    // Calculate the local contribution to the Continuation direction
    Continuation_direction = 0.0;
    // Cache the pointer to z
    double* const local_z_pt = local_z.values_pt();
    for (unsigned long l = 0; l < ndof_local; l++)
    {
      Continuation_direction -= dof_derivative(l) * local_z_pt[l];
    }
 
    // Now reduce if we have been distributed
#ifdef OOMPH_HAS_MPI
    double cont_dir2 = Continuation_direction;
    if ((Dof_distribution_pt->distributed()) &&
        (Dof_distribution_pt->communicator_pt()->nproc() > 1))
    {
      MPI_Allreduce(&Continuation_direction,
                    &cont_dir2,
                    1,
                    MPI_DOUBLE,
                    MPI_SUM,
                    Dof_distribution_pt->communicator_pt()->mpi_comm());
    }
    Continuation_direction = cont_dir2;
#endif
 
    // Add parameter derivative
    Continuation_direction += Parameter_derivative;
 
    // Calculate the magnitude of the du/ds Vector
 
    // Note that actually, we are usually approximating by using the value at
    // newton step just before convergence, which saves one additional
    // Newton solve.
 
    // First calculate the magnitude of du/dparameter, chi
    double chi = local_z.dot(local_z);
 
    // Calculate the current derivative of the parameter wrt the arc-length
    Parameter_derivative = 1.0 / sqrt(1.0 + Theta_squared * chi);
 
    // If the dot product of the current derivative wrt the Direction
    // is less than zero, switch the sign of the derivative to ensure
    // smooth continuation
    if (Parameter_derivative * Continuation_direction < 0.0)
    {
      Parameter_derivative *= -1.0;
    }
 
    // Resize the derivatives array, if necessary
    if (!Use_continuation_timestepper)
    {
      if (Dof_derivative.size() != ndof_local)
      {
        Dof_derivative.resize(ndof_local, 0.0);
      }
    }
    // Calculate the new derivatives wrt the arc-length
    for (unsigned long l = 0; l < ndof_local; l++)
    {
      // This comes from the formulation J u_dot + dr/dlambda  lambda_dot = 0
      // on the curve and then it follows that.
      dof_derivative(l) = -Parameter_derivative * local_z_pt[l];
    }
  }
 
  //=======================================================================
  /// A private helper function to
  /// calculate the derivatives with respect to the arc-length
  /// required for continuation using finite differences.
  //===================================================================
  void Problem::calculate_continuation_derivatives_fd_helper(
    double* const& parameter_pt)
  {
    // Find the number of values
    // const unsigned long n_dofs = this->ndof();
    // Find the number of local dofs in the problem
    const unsigned long ndof_local = Dof_distribution_pt->nrow_local();
 
    // Temporary storage for the finite-difference approximation to the helper
    Vector<double> z(ndof_local);
    double length = 0.0;
    // Calculate the change in values and contribution to total length
    for (unsigned long l = 0; l < ndof_local; l++)
    {
      z[l] = (*Dof_pt[l] - Dof_current[l]) / Ds_current;
      length += Theta_squared * z[l] * z[l];
    }
 
    // Reduce if parallel
#ifdef OOMPH_HAS_MPI
    double length2 = length;
    if ((Dof_distribution_pt->distributed()) &&
        (Dof_distribution_pt->communicator_pt()->nproc() > 1))
    {
      MPI_Allreduce(&length,
                    &length2,
                    1,
                    MPI_DOUBLE,
                    MPI_SUM,
                    Dof_distribution_pt->communicator_pt()->mpi_comm());
    }
    length = length2;
#endif
 
    // Calculate change in parameter
    double Z = (*parameter_pt - Parameter_current) / Ds_current;
    length += Z * Z;
 
    // Scale the approximations to the derivatives
    length = sqrt(length);
    for (unsigned long l = 0; l < ndof_local; l++)
    {
      dof_derivative(l) = z[l] / length;
    }
    Parameter_derivative = Z / length;
  }
 
 
  /// Virtual function that is used to symmetrise the problem so that
  /// the current solution exactly satisfies any symmetries within the system.
  /// Used when adpativly solving pitchfork detection problems when small
  /// asymmetries in the coarse solution can be magnified
  /// leading to very inaccurate answers on the fine mesh.
  /// This is always problem-specific and must be filled in by the user
  /// The default issues a warning
  void Problem::symmetrise_eigenfunction_for_adaptive_pitchfork_tracking()
  {
    std::ostringstream warn_message;
    warn_message
      << "Warning: This function is called after spatially adapting the\n"
      << "eigenfunction associated with a pitchfork bifurcation and should\n"
      << "ensure that the exact (anti-)symmetries of problem are enforced\n"
      << "within that eigenfunction. It is problem specific and must be\n"
      << "filled in by the user if required.\n"
      << "A sign of problems is if the slack paramter gets too large and\n"
      << "if the solution at the Pitchfork is not symmetric.\n";
    OomphLibWarning(
      warn_message.str(),
      "Problem::symmetrise_eigenfunction_for_adaptive_pitchfork_tracking()",
      OOMPH_EXCEPTION_LOCATION);
  }
 
  //====================================================================
  /// Return pointer to the parameter that is used in the
  /// bifurcation detection. If we are not tracking a bifurcation then
  /// an error will be thrown by the AssemblyHandler
  //====================================================================
  double* Problem::bifurcation_parameter_pt() const
  {
    return Assembly_handler_pt->bifurcation_parameter_pt();
  }
 
  //====================================================================
  /// Return the eigenfunction calculated as part of a
  /// bifurcation tracking process. If we are not tracking a bifurcation
  /// then an error will be thrown by the AssemblyHandler
  //======================================================================
  void Problem::get_bifurcation_eigenfunction(
    Vector<DoubleVector>& eigenfunction)
  {
    // Simply call the appropriate assembly handler function
    Assembly_handler_pt->get_eigenfunction(eigenfunction);
  }
 
  //============================================================
  /// Activate the fold tracking system by changing the assembly
  /// handler and initialising it using the parameter addressed
  /// by parameter_pt.
  //============================================================
  void Problem::activate_fold_tracking(double* const& parameter_pt,
                                       const bool& block_solve)
  {
    // Reset the assembly handler to default
    reset_assembly_handler_to_default();
    // Set the new assembly handler. Note that the constructor actually
    // solves the original problem to get some initial conditions, but
    // this is OK because the RHS is always evaluated before assignment.
    Assembly_handler_pt = new FoldHandler(this, parameter_pt);
 
    // If we are using a block solver, we must set the linear solver pointer
    // to the block fold solver. The present linear solver is
    // used by the block solver and so must be passed as an argument.
    // The destructor of the Fold handler returns the linear
    // solver to the original non-block version.
    if (block_solve)
    {
      Linear_solver_pt = new AugmentedBlockFoldLinearSolver(Linear_solver_pt);
    }
  }
 
  //===============================================================
  /// Activate the generic bifurcation ///tracking system by changing the
  /// assembly handler and initialising it using the parameter addressed by
  /// parameter_pt.
  //============================================================
  void Problem::activate_bifurcation_tracking(double* const& parameter_pt,
                                              const DoubleVector& eigenvector,
                                              const bool& block_solve)
  {
    // Reset the assembly handler to default
    reset_assembly_handler_to_default();
    // Set the new assembly handler. Note that the constructor actually
    // solves the original problem to get some initial conditions, but
    // this is OK because the RHS is always evaluated before assignment.
    Assembly_handler_pt = new FoldHandler(this, parameter_pt, eigenvector);
 
    // If we are using a block solver, we must set the linear solver pointer
    // to the block fold solver. The present linear solver is
    // used by the block solver and so must be passed as an argument.
    // The destructor of the Fold handler returns the linear
    // solver to the original non-block version.
    if (block_solve)
    {
      Linear_solver_pt = new AugmentedBlockFoldLinearSolver(Linear_solver_pt);
    }
  }
 
 
  //===============================================================
  /// Activate the generic bifurcation ///tracking system by changing the
  /// assembly handler and initialising it using the parameter addressed by
  /// parameter_pt.
  //============================================================
  void Problem::activate_bifurcation_tracking(double* const& parameter_pt,
                                              const DoubleVector& eigenvector,
                                              const DoubleVector& normalisation,
                                              const bool& block_solve)
  {
    // Reset the assembly handler to default
    reset_assembly_handler_to_default();
    // Set the new assembly handler. Note that the constructor actually
    // solves the original problem to get some initial conditions, but
    // this is OK because the RHS is always evaluated before assignment.
    Assembly_handler_pt =
      new FoldHandler(this, parameter_pt, eigenvector, normalisation);
 
    // If we are using a block solver, we must set the linear solver pointer
    // to the block fold solver. The present linear solver is
    // used by the block solver and so must be passed as an argument.
    // The destructor of the Fold handler returns the linear
    // solver to the original non-block version.
    if (block_solve)
    {
      Linear_solver_pt = new AugmentedBlockFoldLinearSolver(Linear_solver_pt);
    }
  }
 
 
  //==================================================================
  /// Activate the pitchfork tracking system by changing the assembly
  /// handler and initialising it using the parameter addressed
  /// by parameter_pt and a symmetry vector. The boolean flag is
  /// used to specify whether a block solver is used, default is true.
  //===================================================================
  void Problem::activate_pitchfork_tracking(double* const& parameter_pt,
                                            const DoubleVector& symmetry_vector,
                                            const bool& block_solve)
  {
    // Reset the assembly handler to default
    reset_assembly_handler_to_default();
 
    // Set the new assembly handler. Note that the constructor actually
    // solves the original problem to get some initial conditions, but
    // this is OK because the RHS is always evaluated before assignment.
    Assembly_handler_pt = new PitchForkHandler(
      this, this->assembly_handler_pt(), parameter_pt, symmetry_vector);
 
    // If we are using a block solver, we must set the linear solver pointer
    // to the block pitchfork solver. The present linear solver is
    // used by the block solver and so must be passed as an argument.
    // The destructor of the PitchFork handler returns the linear
    // solver to the original non-block version.
    if (block_solve)
    {
      Linear_solver_pt = new BlockPitchForkLinearSolver(Linear_solver_pt);
    }
  }
 
 
  //============================================================
  /// Activate the hopf tracking system by changing the assembly
  /// handler and initialising it using the parameter addressed
  /// by parameter_pt.
  //============================================================
  void Problem::activate_hopf_tracking(double* const& parameter_pt,
                                       const bool& block_solve)
  {
    // Reset the assembly handler to default
    reset_assembly_handler_to_default();
    // Set the new assembly handler. Note that the constructor actually
    // solves the original problem to get some initial conditions, but
    // this is OK because the RHS is always evaluated before assignment.
    Assembly_handler_pt = new HopfHandler(this, parameter_pt);
 
    // If we are using a block solver, we must set the linear solver pointer
    // to the block hopf solver. The present linear solver is
    // used by the block solver and so must be passed as an argument.
    // The destructor of the Hopf handler returns the linear
    // solver to the original non-block version.
    if (block_solve)
    {
      Linear_solver_pt = new BlockHopfLinearSolver(Linear_solver_pt);
    }
  }
 
 
  //============================================================
  /// Activate the hopf tracking system by changing the assembly
  /// handler and initialising it using the parameter addressed
  /// by parameter_pt and the frequency and null vectors
  /// specified.
  //============================================================
  void Problem::activate_hopf_tracking(double* const& parameter_pt,
                                       const double& omega,
                                       const DoubleVector& null_real,
                                       const DoubleVector& null_imag,
                                       const bool& block_solve)
  {
    // Reset the assembly handler to default
    reset_assembly_handler_to_default();
    // Set the new assembly handler. Note that the constructor actually
    // solves the original problem to get some initial conditions, but
    // this is OK because the RHS is always evaluated before assignment.
    Assembly_handler_pt =
      new HopfHandler(this, parameter_pt, omega, null_real, null_imag);
 
    // If we are using a block solver, we must set the linear solver pointer
    // to the block hopf solver. The present linear solver is
    // used by the block solver and so must be passed as an argument.
    // The destructor of the Hopf handler returns the linear
    // solver to the original non-block version.
    if (block_solve)
    {
      Linear_solver_pt = new BlockHopfLinearSolver(Linear_solver_pt);
    }
  }
 
 
  //===============================================================
  /// Reset the assembly handler to default
  //===============================================================
  void Problem::reset_assembly_handler_to_default()
  {
    // If we have a non-default handler
    if (Assembly_handler_pt != Default_assembly_handler_pt)
    {
      // Delete the current assembly handler
      delete Assembly_handler_pt;
      // Reset the assembly handler
      Assembly_handler_pt = Default_assembly_handler_pt;
    }
  }
 
  //===================================================================
  /// This function takes one step of length ds in pseudo-arclength.The
  /// argument parameter_pt is a pointer to the parameter (global variable)
  /// that is being traded for arc-length. The function returns the next desired
  /// arc-length according to criteria based upon the desired number of Newton
  /// Iterations per solve.
  //=====================================================================
  double Problem::arc_length_step_solve(double* const& parameter_pt,
                                        const double& ds,
                                        const unsigned& max_adapt)
  {
    // First check that we shouldn't use the other interface
    // by checking that the parameter isn't already stored as data
    if (does_pointer_correspond_to_problem_data(parameter_pt))
    {
      std::ostringstream error_message;
      error_message
        << "The parameter addressed by " << parameter_pt << " with the value "
        << *parameter_pt
        << "\n is supposed to be used for arc-length contiunation,\n"
        << " but it is stored in a Data object used by the problem.\n\n"
        << "This is bad for two reasons:\n"
        << "1. If it's a variable in the problem, it must already have an\n"
           "associated equation, so it can't be used for continuation;\n"
        << "2. The problem data will be reorganised in memory during "
           "continuation,\n"
        << "   which means that the pointer will become invalid.\n\n"
        << "If you are sure that this is what you want to do you must:\n"
        << "A. Ensure that the value is pinned (don't worry we'll shout again "
           "if not)\n"
        << "B. Use the alternative interface\n"
        << "   Problem::arc_length_step_solve(Data*,unsigned,...)\n"
        << "   which uses a pointer to the data object and not the raw double "
           "pointer."
        << std::endl;
      throw OomphLibError(
        error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
 
 
    // If we are using the continuation timestepper
    if (Use_continuation_timestepper)
    {
      // Has the timestepper already been added to the problem
      bool continuation_time_stepper_added = false;
      const unsigned n_time_steppers = this->ntime_stepper();
      for (unsigned i = 0; i < n_time_steppers; i++)
      {
        if (this->time_stepper_pt(i) == &Continuation_time_stepper)
        {
          continuation_time_stepper_added = true;
          break;
        }
      }
 
      // If not add it
      if (!continuation_time_stepper_added)
      {
        oomph_info << "Adding the continuation time stepper\n";
        this->add_time_stepper_pt(&Continuation_time_stepper);
      }
 
      // Need to treat case of eigenproblems and bifurcation detection/tracking
      // here
 
      // Backup the current timesteppers for each mesh!
 
 
      // If an arc length step has not been taken then set the timestepper
      if (!Arc_length_step_taken)
      {
        // Set the continuation timestepper for all data in the problem
        oomph_info << this->set_timestepper_for_all_data(
                        &Continuation_time_stepper)
                   << " equation numbers allocated for continuation\n";
      }
 
    } // End of continuation time stepper case
 
 
    // Just call the helper function (parameter is not from data)
    return arc_length_step_solve_helper(parameter_pt, ds, max_adapt);
  }
 
 
  //===================================================================
  /// This function takes one step of length ds in pseudo-arclength.The
  /// argument data_pt is a pointer to the data that holds the
  /// parameter (global variable)
  /// that is being traded for arc-length. The exact value is located at
  /// the location given by data_index.
  /// The function returns the next desired
  /// arc-length according to criteria based upon the desired number of Newton
  /// Iterations per solve.
  //=====================================================================
  double Problem::arc_length_step_solve(Data* const& data_pt,
                                        const unsigned& data_index,
                                        const double& ds,
                                        const unsigned& max_adapt)
  {
    // Firstly check that the data is pinned
    if (!data_pt->is_pinned(data_index))
    {
      std::ostringstream error_stream;
      error_stream << "The value at index " << data_index
                   << " in the data object to be used for continuation\n"
                   << "is not pinned, which means that it is already a\n"
                   << "variable in the problem "
                   << "and cannot be used for continuation.\n\n"
                   << "Please correct your formulation by either:\n"
                   << "A. Pinning the value"
                   << "\n or \n"
                   << "B. Using a different parameter for continuation"
                   << std::endl;
      throw OomphLibError(
        error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
 
 
    // If we are using the continuation timestepper
    if (Use_continuation_timestepper)
    {
      // Has the timestepper already been added to the problem
      bool continuation_time_stepper_added = false;
      const unsigned n_time_steppers = this->ntime_stepper();
      for (unsigned i = 0; i < n_time_steppers; i++)
      {
        if (this->time_stepper_pt(i) == &Continuation_time_stepper)
        {
          continuation_time_stepper_added = true;
          break;
        }
      }
 
      // If not add it
      if (!continuation_time_stepper_added)
      {
        oomph_info << "Adding the continuation time stepper\n";
        this->add_time_stepper_pt(&Continuation_time_stepper);
      }
 
      // Need to treat case of eigenproblems and bifurcation detection/tracking
      // here
 
 
      // Backup the current timesteppers for each mesh!
 
 
      // If an arc length step has not been taken then set the timestepper
      if (!Arc_length_step_taken)
      {
        // Set the continuation timestepper for all data in the problem
        oomph_info << this->set_timestepper_for_all_data(
                        &Continuation_time_stepper)
                   << " equation numbers allocated for continuation\n";
      }
 
 
    } // End of continuation time stepper case
 
 
    // Now make a pointer to the (newly allocated) data object
    double* parameter_pt = data_pt->value_pt(data_index);
    // Call the helper function, this will change the parameter_pt if
    // the data storage is changed (if the timestepper has to be changed,
    // which happens if this is the first time that a continuation step is
    // taken)
    // ALH: Don't think this is true because it has happened above....
    return arc_length_step_solve_helper(parameter_pt, ds, max_adapt);
  }
 
  //======================================================================
  /// Private helper function that is used to set the appropriate
  /// pinned values for continuation. If the data is pinned, the its
  /// current value is always the same as the original value and
  /// the derivative is always zero. If these are not set properly
  /// then interpolation and projection in spatial adaptivity will
  /// not give the best answers.
  //=====================================================================
  void Problem::set_consistent_pinned_values_for_continuation()
  {
    // Set the consistent values for the global mesh
    Mesh_pt->set_consistent_pinned_values_for_continuation(
      &Continuation_time_stepper);
 
    // Deal with the spine meshes additional numbering separately
    const unsigned n_sub_mesh = this->nsub_mesh();
    // If there is only one mesh
    if (n_sub_mesh == 0)
    {
      if (SpineMesh* const spine_mesh_pt = dynamic_cast<SpineMesh*>(Mesh_pt))
      {
        spine_mesh_pt->set_consistent_pinned_spine_values_for_continuation(
          &Continuation_time_stepper);
      }
      // If it's a triangle mesh the we need to set the
    }
    // Otherwise loop over the sub meshes
    else
    {
      // Assign global equation numbers first
      for (unsigned i = 0; i < n_sub_mesh; i++)
      {
        if (SpineMesh* const spine_mesh_pt =
              dynamic_cast<SpineMesh*>(Sub_mesh_pt[i]))
        {
          spine_mesh_pt->set_consistent_pinned_spine_values_for_continuation(
            &Continuation_time_stepper);
        }
      }
    }
 
    // Also set time stepper for global data
    const unsigned n_global = Global_data_pt.size();
    for (unsigned i = 0; i < n_global; ++i)
    {
      Continuation_time_stepper.set_consistent_pinned_values(Global_data_pt[i]);
    }
  }
 
 
  //===================================================================
  /// This function takes one step of length ds in pseudo-arclength.The
  /// argument parameter_pt is a pointer to the parameter (global variable)
  /// that is being traded for arc-length. The function returns the next desired
  /// arc-length according to criteria based upon the desired number of Newton
  /// Iterations per solve.
  //=====================================================================
  double Problem::arc_length_step_solve_helper(double* const& parameter_pt,
                                               const double& ds,
                                               const unsigned& max_adapt)
  {
    //----------------------MAKE THE PROBLEM STEADY-----------------------
    // Loop over the timesteppers and make them (temporarily) steady.
    // We can only do continuation for steady problems!
    unsigned n_time_steppers = ntime_stepper();
    // Vector of bools to store the is_steady status of the various
    // timesteppers when we came in here
    std::vector<bool> was_steady(n_time_steppers);
 
    // Loop over them all and make them (temporarily) static
    for (unsigned i = 0; i < n_time_steppers; i++)
    {
      was_steady[i] = time_stepper_pt(i)->is_steady();
      time_stepper_pt(i)->make_steady();
    }
 
 
    // Max number of solves
    unsigned max_solve = max_adapt + 1;
    // Storage for newton steps in each adaptation
    unsigned max_count_in_adapt_loop = 0;
 
 
    //----SET UP MEMORY FOR QUANTITIES THAT ARE REQUIRED OUTSIDE THE LOOP----
 
    // Assign memory for solutions of the equations Jz = du/dparameter
    // This is needed here (outside the loop), so that we can save on
    // one linear solve when calculating the derivatives wrt the arc-length
    DoubleVector z;
 
 
    // Store sign of the Jacobian, used for bifurcation detection
    // If this is the first time that we are calling the arc-length solver,
    // this should not be used.
    int previous_sign = Sign_of_jacobian;
 
    // Flag to indicate a sign change
    bool SIGN_CHANGE = false;
 
 
    // Adaptation loop
    for (unsigned isolve = 0; isolve < max_solve; ++isolve)
    {
      // Only adapt after the first solve has been done
      if (isolve > 0)
      {
        unsigned n_refined;
        unsigned n_unrefined;
 
        // Adapt problem
        adapt(n_refined, n_unrefined);
 
#ifdef OOMPH_HAS_MPI
        // Adaptation only converges if ALL the processes have no
        // refinement or unrefinement to perform
        unsigned total_refined = 0;
        unsigned total_unrefined = 0;
        if (Problem_has_been_distributed)
        {
          MPI_Allreduce(&n_refined,
                        &total_refined,
                        1,
                        MPI_UNSIGNED,
                        MPI_SUM,
                        this->communicator_pt()->mpi_comm());
          n_refined = total_refined;
          MPI_Allreduce(&n_unrefined,
                        &total_unrefined,
                        1,
                        MPI_UNSIGNED,
                        MPI_SUM,
                        this->communicator_pt()->mpi_comm());
          n_unrefined = total_unrefined;
        }
#endif
 
        oomph_info << "---> " << n_refined << " elements were refined, and "
                   << n_unrefined << " were unrefined"
#ifdef OOMPH_HAS_MPI
                   << ", in total (over all processors).\n";
#else
                   << ".\n";
#endif
 
 
        // Check convergence of adaptation cycle
        if ((n_refined == 0) && (n_unrefined == 0))
        {
          oomph_info << "\n \n Solution is fully converged in "
                     << "Problem::newton_solver(). \n \n ";
          break;
        }
      }
 
      //----------SAVE THE INITIAL VALUES, IN CASE THE STEP FAILS-----------
 
      // Find the number of local dofs
      unsigned ndof_local = Dof_distribution_pt->nrow_local();
 
      // Only need to do this in the first loop
      if (isolve == 0)
      {
        if (!Use_continuation_timestepper)
        {
          // Safety check, set up the array of dof derivatives, if necessary
          // The distribution is the same as the (natural) distribution of the
          // dofs
          if (Dof_derivative.size() != ndof_local)
          {
            Dof_derivative.resize(ndof_local, 0.0);
          }
 
          // Safety check, set up the array of curren values, if necessary
          // Again the distribution reflects the (natural) distribution of the
          // dofs
          if (Dof_current.size() != ndof_local)
          {
            Dof_current.resize(ndof_local);
          }
        }
 
        // Save the current value of the parameter
        Parameter_current = *parameter_pt;
 
        // Save the current values of the degrees of freedom
        for (unsigned long l = 0; l < ndof_local; l++)
        {
          dof_current(l) = *Dof_pt[l];
        }
 
        // Set the value of ds_current
        Ds_current = ds;
      }
 
      // Counter for the number of newton steps
      unsigned count = 0;
 
      // Flag to indicate a successful step
      bool STEP_REJECTED = false;
 
 
      // Set the appropriate initial conditions for the pinned data
      if (Use_continuation_timestepper)
      {
        this->set_consistent_pinned_values_for_continuation();
      }
 
      // Loop around the step in arc-length
      do
      {
        // Check that the step has not fallen below the minimum tolerance
        if (std::fabs(Ds_current) < Minimum_ds)
        {
          std::ostringstream error_message;
          error_message << "DESIRED ARC-LENGTH STEP " << Ds_current
                        << " HAS FALLEN BELOW MINIMUM TOLERANCE, " << Minimum_ds
                        << std::endl;
 
          throw OomphLibError(error_message.str(),
                              OOMPH_CURRENT_FUNCTION,
                              OOMPH_EXCEPTION_LOCATION);
        }
 
        // Assume that we shall accept the step
        STEP_REJECTED = false;
 
        // Set initial value of the parameter
        *parameter_pt = Parameter_current + Parameter_derivative * Ds_current;
 
        // Perform any actions...
        actions_after_parameter_increase(parameter_pt);
 
        Ds_current = (*parameter_pt - Parameter_current) / Parameter_derivative;
 
        // Loop over the (local) variables and set their initial values
        for (unsigned long l = 0; l < ndof_local; l++)
        {
          *Dof_pt[l] = dof_current(l) + dof_derivative(l) * Ds_current;
        }
 
        // Actually do the newton solve stage for the continuation problem
        try
        {
          count = newton_solve_continuation(parameter_pt, z);
        }
        // Catch any exceptions thrown in the Newton solver
        catch (NewtonSolverError& error)
        {
          // Check whether it's the linear solver
          if (error.linear_solver_error())
          {
            std::ostringstream error_stream;
            error_stream << std::endl
                         << "USER-DEFINED ERROR IN NEWTON SOLVER " << std::endl;
            oomph_info << "ERROR IN THE LINEAR SOLVER" << std::endl;
            throw OomphLibError(error_stream.str(),
                                OOMPH_CURRENT_FUNCTION,
                                OOMPH_EXCEPTION_LOCATION);
          }
          // Otherwise mark the step as having failed
          else
          {
            oomph_info << "STEP REJECTED DUE TO NEWTON SOLVER --- TRYING AGAIN"
                       << std::endl;
            STEP_REJECTED = true;
            // Let's take a smaller step
            Ds_current *= (2.0 / 3.0);
          }
        }
        catch (InvertedElementError const& error)
        {
          oomph_info
            << "STEP REJECTED DUE TO INVERTED ELEMENTS --- TRYING AGAIN"
            << std::endl;
          STEP_REJECTED = true;
          // Let's take a smaller step
          Ds_current *= (2.0 / 3.0);
        }
      } while (STEP_REJECTED); // continue until a step is accepted
 
      // Set the maximum count
      if (count > max_count_in_adapt_loop)
      {
        max_count_in_adapt_loop = count;
      }
    } /// end of adaptation loop
 
    // Only recalculate the derivatives if there has been a Newton solve
    // If not, the previous values should be close enough
    if (max_count_in_adapt_loop > 0)
    {
      //--------------------CHECK FOR POTENTIAL BIFURCATIONS-------------
      if (Bifurcation_detection)
      {
        // If the sign of the jacobian is zero issue a warning
        if (Sign_of_jacobian == 0)
        {
          std::string error_message =
            "The sign of the jacobian is zero after a linear solve\n";
          error_message += "Either the matrix is singular (unlikely),\n";
          error_message += "or the linear solver cannot compute the "
                           "determinant of the matrix;\n";
          error_message += "e.g. an iterative linear solver.\n";
          error_message +=
            "If the latter, bifurcation detection must be via an eigensolver\n";
          OomphLibWarning(error_message,
                          "Problem::arc_length_step_solve",
                          OOMPH_EXCEPTION_LOCATION);
        }
        // If this is the first step, we cannot rely on the previous value
        // of the jacobian so set the previous sign to the present sign
        if (!Arc_length_step_taken)
        {
          previous_sign = Sign_of_jacobian;
        }
        // If we have detected a sign change in the last converged Jacobian,
        // it must be a turning point or bifurcation
        if (Sign_of_jacobian != previous_sign)
        {
          // There has been, at least, one sign change
          First_jacobian_sign_change = true;
 
          // The sign has changed this time
          SIGN_CHANGE = true;
 
          // Calculate the dot product of the approximate null vector
          // of the Jacobian matrix ((badly) approximated by z)
          // and the vectors of derivatives of the residuals wrt the
          // global parameter
          // If this is small it is a bifurcation rather than a turning point.
          // Get the derivative wrt global parameter
          // DoubleVector dparam;
          // get_derivative_wrt_global_parameter(parameter_pt,dparam);
          // Calculate the dot product
          // double dot=0.0;
          // for(unsigned long n=0;n<n_dofs;++n) {dot += dparam[n]*z[n];}
          // z.dot(dparam);
 
          // Write the output message
          std::ostringstream message;
          message
            << "-----------------------------------------------------------";
          message << std::endl
                  << "SIGN CHANGE IN DETERMINANT OF JACOBIAN: " << std::endl;
          message << "BIFURCATION OR TURNING POINT DETECTED BETWEEN "
                  << Parameter_current << " AND " << *parameter_pt << std::endl;
          // message << "APPROXIMATE DOT PRODUCT : " << dot << "," << std::endl;
          // message << "IF CLOSE TO ZERO WE HAVE A BIFURCATION; ";
          // message << "OTHERWISE A TURNING POINT" << std::endl;
          message
            << "-----------------------------------------------------------"
            << std::endl;
 
          // Write the message to standard output
          oomph_info << message.str();
 
          // Open the information file for appending
          std::ofstream bifurcation_info("bifurcation_info",
                                         std::ios_base::app);
          // Write the message to the file
          bifurcation_info << message.str();
          bifurcation_info.close();
        }
      }
 
      // Calculate the derivatives required for the next stage of continuation
      // In this we pass the last value of z (i.e. approximation)
      if (!Use_finite_differences_for_continuation_derivatives)
      {
        calculate_continuation_derivatives(z);
      }
      // Or use finite differences
      else
      {
        calculate_continuation_derivatives_fd(parameter_pt);
      }
 
      // If it's the first step then the value of the next step should
      // be the change in parameter divided by the parameter derivative
      // to obtain approximately the same parameter change
      if (!Arc_length_step_taken)
      {
        Ds_current = (*parameter_pt - Parameter_current) / Parameter_derivative;
      }
 
      // We have taken our first step
      Arc_length_step_taken = true;
    }
    // If there has not been a newton step then we still need to estimate
    // the derivatives in the arc length direction
    else
    {
      // Default is to calculate the continuation derivatives by solving the
      // linear system. We must do this to ensure that the derivatives are in
      // sync It could lead to problems near turning points when we should
      // really be solving an eigenproblem, but seems OK so far!
 
      // Save the current sign of the jacobian
      int temp_sign = Sign_of_jacobian;
 
      // Calculate the continuation derivatives, which includes a solve
      // of the linear system if not using finite differences
      if (!Use_finite_differences_for_continuation_derivatives)
      {
        calculate_continuation_derivatives(parameter_pt);
      }
      // Otherwise use finite differences
      else
      {
        calculate_continuation_derivatives_fd(parameter_pt);
      }
 
      // Reset the sign of the jacobian, just in case the sign has changed when
      // solving the continuation derivatives. The sign change will be picked
      // up on the next continuation step.
      Sign_of_jacobian = temp_sign;
    }
 
    // Reset the is_steady status of all timesteppers that
    // weren't already steady when we came in here and reset their
    // weights
    for (unsigned i = 0; i < n_time_steppers; i++)
    {
      if (!was_steady[i])
      {
        time_stepper_pt(i)->undo_make_steady();
      }
    }
 
    // If we are trying to find a bifurcation and the first sign change
    // has occured, use bisection
    if ((Bifurcation_detection) && (Bisect_to_find_bifurcation) &&
        (First_jacobian_sign_change))
    {
      // If there has been a sign change we need to half the step size
      // and reverse the direction
      if (SIGN_CHANGE)
      {
        Ds_current *= -0.5;
      }
      // Otherwise
      else
      {
        // The size of the bracketed interval is always
        // 2ds - Ds_current (this will work even if the original step failed)
        // We want our new step size to be half this
        Ds_current = ds - 0.5 * Ds_current;
      }
      // Return the desired value of the step
      return Ds_current;
    }
 
    // If fewer than the desired number of Newton Iterations, increase the step
    if (max_count_in_adapt_loop < Desired_newton_iterations_ds)
    {
      return Ds_current * 1.5;
    }
    // If more than the desired number of Newton Iterations, reduce the step
    if (max_count_in_adapt_loop > Desired_newton_iterations_ds)
    {
      return Ds_current * (2.0 / 3.0);
    }
    // Otherwise return the step just taken
    return Ds_current;
  }
 
 
  //=======================================================================
  /// Take an explicit timestep of size dt
  //======================================================================
  void Problem::explicit_timestep(const double& dt, const bool& shift_values)
  {
#ifdef PARANOID
    if (this->explicit_time_stepper_pt() == 0)
    {
      throw OomphLibError("Explicit time stepper pointer is null in problem.",
                          OOMPH_EXCEPTION_LOCATION,
                          OOMPH_CURRENT_FUNCTION);
    }
#endif
 
    // Firstly we shift the time values
    if (shift_values)
    {
      shift_time_values();
    }
    // Set the current value of dt, if we can
    if (time_pt()->ndt() > 0)
    {
      time_pt()->dt() = dt;
    }
 
    // Take the explicit step
    this->explicit_time_stepper_pt()->timestep(this, dt);
  }
 
 
  //========================================================================
  /// Do one timestep of size dt using Newton's method with the specified
  /// tolerance and linear solver defined as member data of the Problem class.
  /// This will be the most commonly used version
  /// of  unsteady_newton_solve, in which the time values are always shifted
  /// This does not include any kind of adaptativity. If the solution fails to
  /// converge the program will end.
  //========================================================================
  void Problem::unsteady_newton_solve(const double& dt)
  {
    // We shift the values, so shift_values is true
    unsteady_newton_solve(dt, true);
  }
 
  //========================================================================
  /// Do one timestep forward of size dt using Newton's method with the
  /// specified tolerance and linear solver defined via member data of the
  /// Problem class.
  /// The boolean flag shift_values is used to control whether the time values
  /// should be shifted or not.
  //========================================================================
  void Problem::unsteady_newton_solve(const double& dt,
                                      const bool& shift_values)
  {
    // Shift the time values and the dts, according to the control flag
    if (shift_values)
    {
      shift_time_values();
    }
 
    // Advance global time and set current value of dt
    time_pt()->time() += dt;
    time_pt()->dt() = dt;
 
    // Find out how many timesteppers there are
    unsigned n_time_steppers = ntime_stepper();
 
    // Loop over them all and set the weights
    for (unsigned i = 0; i < n_time_steppers; i++)
    {
      time_stepper_pt(i)->set_weights();
    }
 
    // Run the individual timesteppers actions before timestep. These need to
    // be before the problem's actions_before_implicit_timestep so that the
    // boundary conditions are set consistently.
    for (unsigned i = 0; i < n_time_steppers; i++)
    {
      time_stepper_pt(i)->actions_before_timestep(this);
    }
 
    // Now update anything that needs updating before the timestep
    // This could be time-dependent boundary conditions, for example.
    actions_before_implicit_timestep();
 
    try
    {
      // Solve the non-linear problem for this timestep with Newton's method
      newton_solve();
    }
    // Catch any exceptions thrown in the Newton solver
    catch (NewtonSolverError& error)
    {
      oomph_info << std::endl
                 << "USER-DEFINED ERROR IN NEWTON SOLVER " << std::endl;
      // Check whether it's the linear solver
      if (error.linear_solver_error())
      {
        oomph_info << "ERROR IN THE LINEAR SOLVER" << std::endl;
      }
      // Check to see whether we have reached Max_iterations
      else if (error.iterations() == Max_newton_iterations)
      {
        oomph_info << "MAXIMUM NUMBER OF ITERATIONS (" << error.iterations()
                   << ") REACHED WITHOUT CONVERGENCE " << std::endl;
      }
      // If not, it must be that we have exceeded the maximum residuals
      else
      {
        oomph_info << "MAXIMUM RESIDUALS: " << error.maxres()
                   << " EXCEEDS PREDEFINED MAXIMUM " << Max_residuals
                   << std::endl;
      }
      // Die horribly!!
      std::ostringstream error_stream;
      error_stream << "Error occured in unsteady Newton solver. " << std::endl;
      throw OomphLibError(
        error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
 
    // Run the individual timesteppers actions, these need to be before the
    // problem's actions_after_implicit_timestep so that the time step is
    // finished before the problem does any auxiliary calculations (e.g. in
    // semi-implicit micromagnetics the calculation of magnetostatic field).
    for (unsigned i = 0; i < n_time_steppers; i++)
    {
      time_stepper_pt(i)->actions_after_timestep(this);
    }
 
 
    // Now update anything that needs updating after the timestep
    actions_after_implicit_timestep();
    actions_after_implicit_timestep_and_error_estimation();
  }
 
  //=======================================================================
  /// Attempt to take one timestep forward using dt_desired. The error control
  /// parameter, epsilon, is used to specify the desired approximate value of
  /// the global error norm per timestep. The routine returns the value an
  /// estimate of the next value of dt that should be taken.
  //=======================================================================
  double Problem::adaptive_unsteady_newton_solve(const double& dt_desired,
                                                 const double& epsilon)
  {
    // We always want to shift the time values
    return adaptive_unsteady_newton_solve(dt_desired, epsilon, true);
  }
 
 
  //=======================================================================
  /// Attempt to take  one timestep forward using the dt_desired.
  /// This is the driver for a number of adaptive solvers. If the solution
  /// fails to converge at a given timestep, the routine will automatically
  /// halve the time step and try again, until the time step falls below the
  /// specified minimum value. The routine returns the value an estimate
  /// of the next value of dt that should be taken.
  /// Timestep is also rejected if the  error estimate post-solve
  /// (computed by global_temporal_error_norm()) exceeds epsilon.
  /// This behaviour can be over-ruled by setting the protected
  /// boolean Problem::Keep_temporal_error_below_tolerance to false.
  //========================================================================
  double Problem::adaptive_unsteady_newton_solve(const double& dt_desired,
                                                 const double& epsilon,
                                                 const bool& shift_values)
  {
    // First, we need to backup the existing dofs, in case the timestep is
    // rejected
 
    // Find total number of dofs on current processor
    unsigned n_dof_local = dof_distribution_pt()->nrow_local();
 
    // Now set up a Vector to hold current values
    Vector<double> dofs_current(n_dof_local);
 
    // Load values into dofs_current
    for (unsigned i = 0; i < n_dof_local; i++) dofs_current[i] = dof(i);
 
    // Store the time
    double time_current = time_pt()->time();
 
    // Flag to detect whether the timestep has been rejected or not
    bool reject_timestep = 0;
 
    // Flag to detect whether any of the timesteppers are adaptive
    unsigned adaptive_flag = 0;
 
    // The value of the actual timestep, by default the same as desired timestep
    double dt_actual = dt_desired;
 
    // Find out whether any of the timesteppers are adaptive
    unsigned n_time_steppers = ntime_stepper();
    for (unsigned i = 0; i < n_time_steppers; i++)
    {
      if (time_stepper_pt(i)->adaptive_flag())
      {
        adaptive_flag = 1;
        break;
      }
    }
 
    // Shift the time_values according to the control flag
    if (shift_values)
    {
      shift_time_values();
    }
 
    // This loop surrounds the adaptive time-stepping and will not be broken
    // until a timestep is accepted
    do
    {
      // Initially we assume that this step will succeed and that this dt
      // value is ok.
      reject_timestep = 0;
      double dt_rescaling_factor = 1.0;
 
      // Set the new time and value of dt
      time_pt()->time() += dt_actual;
      time_pt()->dt() = dt_actual;
 
      // Loop over all timesteppers and set the weights and predictor weights
      for (unsigned i = 0; i < n_time_steppers; i++)
      {
        // If the time_stepper is non-adaptive, this will be zero
        time_stepper_pt(i)->set_predictor_weights();
        time_stepper_pt(i)->set_weights();
      }
 
      // Now calculate the predicted values for the all data and all positions
      calculate_predictions();
 
      // Run the individual timesteppers actions before timestep. These need to
      // be before the problem's actions_before_implicit_timestep so that the
      // boundary conditions are set consistently.
      for (unsigned i = 0; i < n_time_steppers; i++)
      {
        time_stepper_pt(i)->actions_before_timestep(this);
      }
 
      // Do any updates/boundary conditions changes here
      actions_before_implicit_timestep();
 
      // Attempt to solve the non-linear system
      try
      {
        // Solve the non-linear problem at this timestep
        newton_solve();
      }
      // Catch any exceptions thrown
      catch (NewtonSolverError& error)
      {
        // If it's a solver error then die
        if (error.linear_solver_error() ||
            Time_adaptive_newton_crash_on_solve_fail)
        {
          std::string error_message = "USER-DEFINED ERROR IN NEWTON SOLVER\n";
          error_message += "ERROR IN THE LINEAR SOLVER\n";
 
          // Die
          throw OomphLibError(
            error_message, OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
        }
        else
        {
          // Reject the timestep, if we have an exception
          oomph_info << "TIMESTEP REJECTED DUE TO THE NEWTON SOLVER"
                     << std::endl;
          reject_timestep = true;
 
          // Half the time step
          dt_rescaling_factor = Timestep_reduction_factor_after_nonconvergence;
        }
      }
      catch (InvertedElementError const& error)
      {
        /// Reject the timestep, if we have an exception
        oomph_info << "TIMESTEP REJECTED DUE TO INVERTED ELEMENTS" << std::endl;
        reject_timestep = true;
 
        /// Half the time step
        dt_rescaling_factor = Timestep_reduction_factor_after_nonconvergence;
      }
 
      // Run the individual timesteppers actions, these need to be before the
      // problem's actions_after_implicit_timestep so that the time step is
      // finished before the problem does any auxiliary calculations (e.g. in
      // semi-implicit micromagnetics the calculation of magnetostatic field).
      for (unsigned i = 0; i < n_time_steppers; i++)
      {
        time_stepper_pt(i)->actions_after_timestep(this);
      }
 
      // Update anything that needs updating after the timestep
      actions_after_implicit_timestep();
 
      // If we have an adapative timestepper (and we haven't already failed)
      // then calculate the error estimate and rescaling factor.
      if (adaptive_flag && !reject_timestep)
      {
        // Once timestep has been accepted can do fancy error processing
        // Set the error weights
        for (unsigned i = 0; i < n_time_steppers; i++)
        {
          time_stepper_pt(i)->set_error_weights();
        }
 
        // Get a global error norm to use in adaptivity (as specified by the
        // problem sub-class writer). Prevent a divide by zero if the solution
        // gives very close to zero error. Error norm should never be negative
        // but use absolute value just in case.
        double error = std::max(std::abs(global_temporal_error_norm()), 1e-12);
 
        // Target error that we wish our next timestep to approximately produce
        // as a factor of the maximum error tolerance
        double target_error = Target_error_safety_factor * epsilon;
 
        // Calculate the scaling factor
        dt_rescaling_factor = std::pow(
          (target_error / error), (1.0 / (1.0 + time_stepper_pt()->order())));
 
        oomph_info
          << "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n"
          << "Estimated timestepping error is " << error << "\n"
          << "Timestep scaling factor is " << dt_rescaling_factor << "\n";
 
 
        // Do we have to do it again?
        if (error > epsilon)
        {
          oomph_info << "Estimated timestepping error " << error
                     << " exceeds tolerance " << epsilon << "\n";
          if (Keep_temporal_error_below_tolerance)
          {
            oomph_info << "    --> rejecting timestep.\n";
            reject_timestep = true;
          }
          else
          {
            oomph_info << "    ...but we're not rejecting the timestep\n";
          }
          oomph_info
            << "Note: This behaviour can be adjusted by changing the\n"
            << "protected boolean\n"
            << "    Problem::Keep_temporal_error_below_tolerance\n\n"
            << "Also, if you are noticing that many of your timesteps result\n"
            << "in error > tolerance, try reducing the target error with\n"
            << "respect to the error tolerance by reducing the value of\n"
            << "Target_error_safety_factor from its default value of 1.0\n"
            << "using the access function\n"
            << "    target_error_safety_factor() = 0.5 (e.g.)\n"
            << "The default strategy (Target_error_safety_factor=1.0) tries\n"
            << "to suggest a timestep which will produce an error equal to\n"
            << "the error tolerance `epsilon` which risks error > tolerance\n"
            << "quite often. Setting the safety factor to too small a value\n"
            << "will make the timesteps unnecessarily small; too large will\n"
            << "not address the issue -- neither is optimal and a problem\n"
            << "dependent compromise is needed.\n"
            << "for more info see:\n"
            << " Mayr et al. (2018), p5,9, DOI:10.1016/j.finel.2017.12.002\n"
            << " Harrier et al. (1993), p168, ISBN:978-3-540-56670-0\n"
            << " Söderlind (2002), (2.7) on p5, DOI:10.1023/A:1021160023092\n";
        }
        oomph_info
          << "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n"
          << std::endl;
 
 
      } // End of if adaptive flag
 
 
      // Calculate the next time step size and check it's ok
      // ============================================================
 
      // Calculate the possible next time step, if no error conditions
      // trigger.
      double new_dt_candidate = dt_rescaling_factor * dt_actual;
 
      // Check that the scaling factor is within the allowed range
      if (dt_rescaling_factor > DTSF_max_increase)
      {
        oomph_info << "Tried to increase dt by the ratio "
                   << dt_rescaling_factor << " which is above the maximum ("
                   << DTSF_max_increase
                   << "). Attempting to increase by the maximum ratio instead."
                   << std::endl;
        new_dt_candidate = DTSF_max_increase * dt_actual;
      }
      // If we have already rejected the timestep then don't do this check
      // because DTSF will definitely be too small.
      else if ((!reject_timestep) && (dt_rescaling_factor <= DTSF_min_decrease))
      {
        // Handle this special case where we want to continue anyway (usually
        // Minimum_dt_but_still_proceed = -1 so this has no effect).
        if (new_dt_candidate < Minimum_dt_but_still_proceed)
        {
          oomph_info
            << "Warning: Adaptation of timestep to ensure satisfaction\n"
            << "         of error bounds during adaptive timestepping\n"
            << "         would lower dt below \n"
            << "         Problem::Minimum_dt_but_still_proceed="
            << Minimum_dt_but_still_proceed << "\n"
            << "         ---> We're continuing with present timestep.\n"
            << std::endl;
          dt_rescaling_factor = 1.0;
          // ??ds shouldn't we set new_dt_candidate =
          // Minimum_dt_but_still_proceed here, rather than not changing dt at
          // all?
        }
        else
        {
          // Otherwise reject
          oomph_info << "Timestep would decrease by " << dt_rescaling_factor
                     << " which is less than the minimum scaling factor "
                     << DTSF_min_decrease << std::endl;
          oomph_info << "TIMESTEP REJECTED" << std::endl;
          reject_timestep = 1;
        }
      }
 
      // Now check that the new dt is within the allowed range
      if (new_dt_candidate > Maximum_dt)
      {
        oomph_info << "Tried to increase dt to " << new_dt_candidate
                   << " which is above the maximum (" << Maximum_dt
                   << "). I increased it to the maximum value instead.";
        dt_actual = Maximum_dt;
      }
      else if (new_dt_candidate < Minimum_dt)
      {
        std::ostringstream err;
        err << "Tried to reduce dt to " << new_dt_candidate
            << " which is less than the minimum dt (" << Minimum_dt << ")."
            << std::endl;
        throw OomphLibError(
          err.str(), OOMPH_EXCEPTION_LOCATION, OOMPH_CURRENT_FUNCTION);
      }
      else
      {
        dt_actual = new_dt_candidate;
      }
 
 
      actions_after_implicit_timestep_and_error_estimation();
 
 
      // If we are rejecting this attempt then revert the dofs etc.
      if (reject_timestep)
      {
        // Reset the time
        time_pt()->time() = time_current;
 
        // Reload the dofs
        unsigned ni = dofs_current.size();
        for (unsigned i = 0; i < ni; i++)
        {
          dof(i) = dofs_current[i];
        }
 
#ifdef OOMPH_HAS_MPI
        // Synchronise the solution on different processors (on each submesh)
        this->synchronise_all_dofs();
#endif
 
        // Call all "after" actions, e.g. to handle mesh updates
        actions_after_newton_step();
        actions_before_newton_convergence_check();
        actions_after_newton_solve();
        actions_after_implicit_timestep();
        actions_after_implicit_timestep_and_error_estimation();
      }
 
    }
    // Keep this loop going until we accept the timestep
    while (reject_timestep);
 
    // Once the timestep has been accepted, return the time step that should be
    // used next time.
    return dt_actual;
  }
 
 
  //=======================================================================
  /// Private helper function to perform
  /// unsteady "doubly" adaptive Newton solve: Does temporal
  /// adaptation first, i.e. we try to do a timestep with an increment
  /// of dt, and adjusting dt until the solution on the given mesh satisfies
  /// the temporal error measure with tolerance epsilon. Following
  /// this, we do up to max_adapt spatial adaptions (without
  /// re-examining the temporal error). If first==true, the initial conditions
  /// are re-assigned after the mesh adaptations.
  /// Shifting of time can be suppressed by overwriting the
  /// default value of shift (true). [Shifting must be done
  /// if first_timestep==true because we're constantly re-assigning
  /// the initial conditions; if first_timestep==true and shift==false
  /// shifting is performed anyway and a warning is issued.
  /// Pseudo-Boolean flag suppress_resolve_after_spatial_adapt [0: false;
  /// 1: true] does what it says.]
  //========================================================================
  double Problem::doubly_adaptive_unsteady_newton_solve_helper(
    const double& dt_desired,
    const double& epsilon,
    const unsigned& max_adapt,
    const unsigned& suppress_resolve_after_spatial_adapt_flag,
    const bool& first,
    const bool& shift_values)
  {
    // Store the initial time
    double initial_time = time_pt()->time();
 
    // Take adaptive timestep, adjusting dt until tolerance is satisfied
    double new_dt =
      adaptive_unsteady_newton_solve(dt_desired, epsilon, shift_values);
    double dt_taken = time_pt()->dt();
    oomph_info << "Accepted solution taken with timestep: " << dt_taken
               << std::endl;
 
 
    // Bail out straightaway if no spatial adaptation allowed
    if (max_adapt == 0)
    {
      oomph_info << "No spatial refinement allowed; max_adapt=0\n";
      return new_dt;
    }
 
    // Adapt problem/mesh
    unsigned n_refined = 0;
    unsigned n_unrefined = 0;
    adapt(n_refined, n_unrefined);
 
    // Check if mesh has been adapted on other processors
    Vector<int> total_ref_count(2);
    total_ref_count[0] = n_refined;
    total_ref_count[1] = n_unrefined;
 
 
#ifdef OOMPH_HAS_MPI
    if (Problem_has_been_distributed)
    {
      // Sum n_refine across all processors
      Vector<int> ref_count(2);
      ref_count[0] = n_refined;
      ref_count[1] = n_unrefined;
      MPI_Allreduce(&ref_count[0],
                    &total_ref_count[0],
                    2,
                    MPI_INT,
                    MPI_SUM,
                    communicator_pt()->mpi_comm());
    }
#endif
 
 
    // Re-solve the problem if the adaptation has changed anything
    if ((total_ref_count[0] != 0) || (total_ref_count[1] != 0))
    {
      if (suppress_resolve_after_spatial_adapt_flag == 1)
      {
        oomph_info << "Mesh was adapted but re-solve has been suppressed."
                   << std::endl;
      }
      else
      {
        oomph_info
          << "Mesh was adapted --> we'll re-solve for current timestep."
          << std::endl;
 
        // Reset time to what it was when we entered here
        // because it will be incremented again by dt_taken.
        time_pt()->time() = initial_time;
 
        // Shift the timesteps? No! They've been shifted already when we
        // called the solve with pure temporal adaptivity...
        bool shift = false;
 
        // Reset the inital condition on refined meshes
        if (first)
        {
          // Reset default set_initial_condition has been called flag to false
          Default_set_initial_condition_called = false;
 
          // Reset the initial conditions
          oomph_info << "Re-assigning initial condition at time="
                     << time_pt()->time() << std::endl;
          set_initial_condition();
 
          // This is the first timestep so shifting
          // has to be done following the assignment of initial conditions,
          // providing the default set_initial_condition function has not
          // been called.
          // In fact, unsteady_newton_solve(...) does that automatically.
          // We're changing the flag here to avoid warning messages.
          if (!Default_set_initial_condition_called)
          {
            shift = true;
          }
        }
 
        // Now take the step again on the refined mesh, using the same
        // timestep as used before.
        unsteady_newton_solve(dt_taken, max_adapt, first, shift);
      }
    }
    else
    {
      oomph_info << "Mesh wasn't adapted --> we'll accept spatial refinement."
                 << std::endl;
    }
 
    return new_dt;
  }
 
 
  //========================================================================
  /// Initialise the previous values of the variables for time stepping
  /// corresponding to an impulsive start. Previous history for all data
  /// is generated by the appropriate timesteppers. Previous nodal
  /// positions are simply copied backwards.
  //========================================================================
  void Problem::assign_initial_values_impulsive()
  {
    // Assign the impulsive values in the "master" mesh
    Mesh_pt->assign_initial_values_impulsive();
 
    // Loop over global data
    unsigned Nglobal = Global_data_pt.size();
    for (unsigned iglobal = 0; iglobal < Nglobal; iglobal++)
    {
      Global_data_pt[iglobal]
        ->time_stepper_pt()
        ->assign_initial_values_impulsive(Global_data_pt[iglobal]);
    }
  }
 
 
  //=======================================================================
  /// Assign the values for an impulsive start and also set the initial
  /// values of the previous dts to both be dt
  //======================================================================
  void Problem::assign_initial_values_impulsive(const double& dt)
  {
    // First initialise the dts and set the weights
    initialise_dt(dt);
    // Now call assign_initial_values_impulsive
    assign_initial_values_impulsive();
  }
 
  //=======================================================================
  /// Return the current value of continuous time. If not Time object
  /// has been assigned, then throw an error
  //======================================================================
  double& Problem::time()
  {
    if (Time_pt == 0)
    {
      throw OomphLibError("Time object has not been set",
                          OOMPH_CURRENT_FUNCTION,
                          OOMPH_EXCEPTION_LOCATION);
    }
    else
    {
      return Time_pt->time();
    }
  }
 
  //=======================================================================
  /// Return the current value of continuous time. If not Time object
  /// has been assigned, then throw an error. Const version.
  //======================================================================
  double Problem::time() const
  {
    if (Time_pt == 0)
    {
      throw OomphLibError("Time object has not been set",
                          OOMPH_CURRENT_FUNCTION,
                          OOMPH_EXCEPTION_LOCATION);
    }
    else
    {
      return Time_pt->time();
    }
  }
 
 
  //=======================================================================
  /// Set all problem data to have the same timestepper (timestepper_pt).
  /// This is mainly used in continuation and bifurcation detection problems
  /// in which case the total number of unknowns may change and the changes
  /// to the underlying memory layout means that the Dof_pt must be
  /// reallocated. Thus, the function calls assign_eqn_numbers() and returns
  /// the number of new equation numbers.
  //=========================================================================
  unsigned long Problem::set_timestepper_for_all_data(
    TimeStepper* const& time_stepper_pt, const bool& preserve_existing_data)
  {
    // Set the timestepper for the master mesh's nodal and elemental data
    // to be the
    // continuation time stepper. This will wipe all storage other than
    // the 0th (present time) value at all the data objects
    Mesh_pt->set_nodal_and_elemental_time_stepper(time_stepper_pt,
                                                  preserve_existing_data);
 
    // Deal with the any additional mesh level timestepper data separately
    const unsigned n_sub_mesh = this->nsub_mesh();
    // If there is only one mesh
    if (n_sub_mesh == 0)
    {
      Mesh_pt->set_mesh_level_time_stepper(time_stepper_pt,
                                           preserve_existing_data);
    }
    // Otherwise loop over the sub meshes
    else
    {
      // Assign global equation numbers first
      for (unsigned i = 0; i < n_sub_mesh; i++)
      {
        this->Sub_mesh_pt[i]->set_mesh_level_time_stepper(
          time_stepper_pt, preserve_existing_data);
      }
    }
 
    // Also set time stepper for global data
    const unsigned n_global = Global_data_pt.size();
    for (unsigned i = 0; i < n_global; ++i)
    {
      Global_data_pt[i]->set_time_stepper(time_stepper_pt,
                                          preserve_existing_data);
    }
 
    // We now need to reassign equations numbers because the Dof pointer
    // will be inappropriate  because memory has been reallocated
 
#ifdef OOMPH_HAS_MPI
    if (Problem_has_been_distributed)
    {
      std::ostringstream warning_stream;
      warning_stream << "This has not been comprehensively tested for "
                        "distributed problems.\n"
                     << "I'm sure that I need to worry about external halo and "
                        "external elements."
                     << std::endl;
      OomphLibWarning(
        warning_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
 
#endif
 
    return (this->assign_eqn_numbers());
  }
 
 
  //========================================================================
  /// Shift all time-dependent data along for next timestep.
  //========================================================================
  void Problem::shift_time_values()
  {
    // Move the values of dt in the Time object
    Time_pt->shift_dt();
 
    // Only shift time values in the "master" mesh, otherwise things will
    // get shifted twice in complex problems
    Mesh_pt->shift_time_values();
 
    // Shift global data with their own timesteppers
    unsigned Nglobal = Global_data_pt.size();
    for (unsigned iglobal = 0; iglobal < Nglobal; iglobal++)
    {
      Global_data_pt[iglobal]->time_stepper_pt()->shift_time_values(
        Global_data_pt[iglobal]);
    }
  }
 
 
  //========================================================================
  /// Calculate the predictions of all variables in problem
  //========================================================================
  void Problem::calculate_predictions()
  {
// Check that if we have multiple time steppers none of them want to
// predict by calling an explicit timestepper (as opposed to doing
// something like an explicit step by combining known history values, as
// done in BDF).
#ifdef PARANOID
    if (Time_stepper_pt.size() != 1)
    {
      for (unsigned j = 0; j < Time_stepper_pt.size(); j++)
      {
        if (time_stepper_pt()->predict_by_explicit_step())
        {
          std::string err = "Prediction by explicit step only works for "
                            "problems with a simple time";
          err += "stepper. I think implementing anything more general will";
          err += "require a rewrite of explicit time steppers. - David";
          throw OomphLibError(
            err, OOMPH_EXCEPTION_LOCATION, OOMPH_CURRENT_FUNCTION);
        }
      }
    }
#endif
 
 
    // Predict using an explicit timestepper (don't do it if adaptive = false
    // because pointers probably aren't set up).
    if (time_stepper_pt()->predict_by_explicit_step() &&
        time_stepper_pt()->adaptive_flag())
    {
      // Copy the time stepper's predictor pt into problem's explicit time
      // stepper pt (unless problem already has its own explicit time
      // stepper).
      ExplicitTimeStepper* ets_pt = time_stepper_pt()->explicit_predictor_pt();
#ifdef PARANOID
      if (ets_pt == 0)
      {
        std::string err = "Requested predictions by explicit step but explicit";
        err += " predictor pt is null.";
        throw OomphLibError(
          err, OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
      }
 
      if ((explicit_time_stepper_pt() != ets_pt) &&
          (explicit_time_stepper_pt() != 0))
      {
        throw OomphLibError("Problem has explicit time stepper other than "
                            "predictor, not sure how to handle this yet ??ds",
                            OOMPH_EXCEPTION_LOCATION,
                            OOMPH_CURRENT_FUNCTION);
      }
#endif
      explicit_time_stepper_pt() = ets_pt;
 
      // Backup dofs and time
      store_current_dof_values();
 
#ifdef PARANOID
      double backup_time = time();
#endif
 
      // Move time back so that we are at the start of the timestep (as
      // explicit_timestep functions expect). This is needed because the
      // predictor calculations are done after unsteady newton solve has
      // started, and it has already moved time forwards.
      double dt = time_pt()->dt();
      time() -= dt;
 
      // Explicit step
      this->explicit_timestep(dt, false);
 
      // Copy predicted dofs and time to their storage slots.
      set_dofs(time_stepper_pt()->predictor_storage_index(), Dof_pt);
      time_stepper_pt()->update_predicted_time(time());
 
      // Check we got the times right
#ifdef PARANOID
      if (std::abs(time() - backup_time) > 1e-12)
      {
        using namespace StringConversion;
        std::string err = "Predictor landed at the wrong time!";
        err += " Expected time " + to_string(backup_time, 14) + " but got ";
        err += to_string(time(), 14);
        throw OomphLibError(
          err, OOMPH_EXCEPTION_LOCATION, OOMPH_CURRENT_FUNCTION);
      }
#endif
 
      // Restore dofs and time
      restore_dof_values();
    }
 
    // Otherwise we can do predictions in a more object oriented way using
    // whatever timestepper the data provides (this is the normal case).
    else
    {
      // Calculate all predictions in the "master" mesh
      Mesh_pt->calculate_predictions();
 
      // Calculate predictions for global data with their own timesteppers
      unsigned Nglobal = Global_data_pt.size();
      for (unsigned iglobal = 0; iglobal < Nglobal; iglobal++)
      {
        Global_data_pt[iglobal]->time_stepper_pt()->calculate_predicted_values(
          Global_data_pt[iglobal]);
      }
    }
 
    // If requested then copy the predicted value into the current time data
    // slots, ready for the newton solver to use as an initial guess.
    if (use_predictor_values_as_initial_guess())
    {
      // Not sure I know enough about distributed problems to implement
      // this. Probably you just need to loop over ndof_local or something,
      // but I can't really test it...
#ifdef OOMPH_HAS_MPI
      if (distributed())
      {
        throw OomphLibError("Not yet implemented for distributed problems",
                            OOMPH_EXCEPTION_LOCATION,
                            OOMPH_CURRENT_FUNCTION);
      }
#endif
 
      // With multiple time steppers this is much more complex becuase you
      // need to check the time stepper for each data to get the
      // predictor_storage_index(). Do-able if you need it though.
      if (Time_stepper_pt.size() != 1)
      {
        std::string err = "Not implemented for multiple time steppers";
        throw OomphLibError(
          err, OOMPH_EXCEPTION_LOCATION, OOMPH_CURRENT_FUNCTION);
      }
 
      // Get predicted values
      DoubleVector predicted_dofs;
      get_dofs(time_stepper_pt()->predictor_storage_index(), predicted_dofs);
 
      // Update dofs at current step
      for (unsigned i = 0; i < ndof(); i++)
      {
        dof(i) = predicted_dofs[i];
      }
    }
  }
 
  //======================================================================
  /// Enable recycling of the mass matrix in explicit timestepping
  /// schemes. Useful for timestepping on fixed meshes when you want
  /// to avoid the linear solve phase.
  //=====================================================================
  void Problem::enable_mass_matrix_reuse()
  {
    Mass_matrix_reuse_is_enabled = true;
    Mass_matrix_has_been_computed = false;
 
    // If we have a discontinuous formulation set the elements to reuse
    // their own mass matrices
    if (Discontinuous_element_formulation)
    {
      const unsigned n_element = Problem::mesh_pt()->nelement();
      // Loop over the other elements
      for (unsigned e = 0; e < n_element; e++)
      {
        // Cache the element
        DGElement* const elem_pt =
          dynamic_cast<DGElement*>(Problem::mesh_pt()->element_pt(e));
        elem_pt->enable_mass_matrix_reuse();
      }
    }
  }
 
  //======================================================================
  /// Turn off the recyling of the mass matrix in explicit
  /// time-stepping schemes
  //======================================================================
  void Problem::disable_mass_matrix_reuse()
  {
    Mass_matrix_reuse_is_enabled = false;
    Mass_matrix_has_been_computed = false;
 
    // If we have a discontinuous formulation set the element-level
    // function
    if (Discontinuous_element_formulation)
    {
      const unsigned n_element = Problem::mesh_pt()->nelement();
      // Loop over the other elements
      for (unsigned e = 0; e < n_element; e++)
      {
        // Cache the element
        DGElement* const elem_pt =
          dynamic_cast<DGElement*>(Problem::mesh_pt()->element_pt(e));
        elem_pt->disable_mass_matrix_reuse();
      }
    }
  }
 
 
  //=========================================================================
  /// Copy Data values, nodal positions etc from specified problem.
  /// Note: This is not a copy constructor. We assume that the current
  /// and the "original" problem have both been created by calling
  /// the same problem constructor so that all Data objects,
  /// time steppers etc. in the two problems are completely independent.
  /// This function copies the nodal, internal and global values
  /// and the time parameters from the original problem into "this"
  /// one. This functionality is required, e.g. for
  /// multigrid computations.
  //=========================================================================
  void Problem::copy(Problem* orig_problem_pt)
  {
    // Copy time
    //----------
 
    // Flag to indicate that orig problem is unsteady problem
    bool unsteady_flag = (orig_problem_pt->time_pt() != 0);
 
    // Copy current time and previous time increments for proper unsteady run
    if (unsteady_flag)
    {
      oomph_info << "Copying an unsteady problem." << std::endl;
      // Current time
      this->time_pt()->time() = orig_problem_pt->time_pt()->time();
      // Timesteps
      unsigned n_dt = orig_problem_pt->time_pt()->ndt();
      time_pt()->resize(n_dt);
      for (unsigned i = 0; i < n_dt; i++)
      {
        time_pt()->dt(i) = orig_problem_pt->time_pt()->dt(i);
      }
 
      // Find out how many timesteppers there are
      unsigned n_time_steppers = ntime_stepper();
 
      // Loop over them all and set the weights
      for (unsigned i = 0; i < n_time_steppers; i++)
      {
        time_stepper_pt(i)->set_weights();
      }
    }
 
    // Copy nodes
    //-----------
 
    // Loop over submeshes:
    unsigned nmesh = nsub_mesh();
    if (nmesh == 0) nmesh = 1;
    for (unsigned m = 0; m < nmesh; m++)
    {
      // Find number of nodes in present mesh
      unsigned long n_node = mesh_pt(m)->nnode();
 
      // Check # of nodes:
      unsigned long n_node_orig = orig_problem_pt->mesh_pt(m)->nnode();
      if (n_node != n_node_orig)
      {
        std::ostringstream error_message;
        error_message << "Number of nodes in copy " << n_node
                      << " not equal to the number in the original "
                      << n_node_orig << std::endl;
 
        throw OomphLibError(error_message.str(),
                            OOMPH_CURRENT_FUNCTION,
                            OOMPH_EXCEPTION_LOCATION);
      }
 
      // Loop over the nodes
      for (unsigned long i = 0; i < n_node; i++)
      {
        // Try to cast to elastic node
        SolidNode* el_node_pt =
          dynamic_cast<SolidNode*>(mesh_pt(m)->node_pt(i));
        if (el_node_pt != 0)
        {
          SolidNode* el_node_orig_pt =
            dynamic_cast<SolidNode*>(orig_problem_pt->mesh_pt(m)->node_pt(i));
          el_node_pt->copy(el_node_orig_pt);
        }
        else
        {
          mesh_pt(m)->node_pt(i)->copy(orig_problem_pt->mesh_pt(m)->node_pt(i));
        }
      }
    }
 
 
    // Copy global data:
    //------------------
 
    // Number of global data
    unsigned n_global = Global_data_pt.size();
 
    // Check # of nodes in orig problem
    unsigned long n_global_orig = orig_problem_pt->nglobal_data();
    if (n_global != n_global_orig)
    {
      std::ostringstream error_message;
      error_message << "Number of global data in copy " << n_global
                    << " not equal to the number in the original "
                    << n_global_orig << std::endl;
 
      throw OomphLibError(
        error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
 
    for (unsigned iglobal = 0; iglobal < n_global; iglobal++)
    {
      Global_data_pt[iglobal]->copy(orig_problem_pt->global_data_pt(iglobal));
    }
 
 
    // Copy internal data of elements:
    //--------------------------------
 
    // Loop over submeshes:
    for (unsigned m = 0; m < nmesh; m++)
    {
      // Loop over elements and deal with internal data
      unsigned n_element = mesh_pt(m)->nelement();
      for (unsigned e = 0; e < n_element; e++)
      {
        GeneralisedElement* el_pt = mesh_pt(m)->element_pt(e);
        unsigned n_internal = el_pt->ninternal_data();
        if (n_internal > 0)
        {
          // Check # of internals :
          unsigned long n_internal_orig =
            orig_problem_pt->mesh_pt(m)->element_pt(e)->ninternal_data();
          if (n_internal != n_internal_orig)
          {
            std::ostringstream error_message;
            error_message << "Number of internal data in copy " << n_internal
                          << " not equal to the number in the original "
                          << n_internal_orig << std::endl;
 
            throw OomphLibError(error_message.str(),
                                OOMPH_CURRENT_FUNCTION,
                                OOMPH_EXCEPTION_LOCATION);
          }
          for (unsigned i = 0; i < n_internal; i++)
          {
            el_pt->internal_data_pt(i)->copy(
              orig_problem_pt->mesh_pt(m)->element_pt(e)->internal_data_pt(i));
          }
        }
      }
    }
  }
 
  //=========================================================================
  /// Make and return a pointer to the copy of the problem. A virtual
  /// function that must be filled in by the user is they wish to perform
  /// adaptive refinement in bifurcation tracking or in multigrid problems.
  /// ALH: WILL NOT BE NECESSARY IN BIFURCATION TRACKING IN LONG RUN...
  //=========================================================================
  Problem* Problem::make_copy()
  {
    std::ostringstream error_stream;
    error_stream
      << "This function must be overloaded in your specific problem, and must\n"
      << "create an exact copy of your problem. Usually this will be achieved\n"
      << "by a call to the constructor with exactly the same arguments as "
         "used\n";
 
    throw OomphLibError(
      error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
  }
 
 
  //=========================================================================
  /// Dump refinement pattern of all refineable meshes and all  generic
  /// Problem data to file for restart.
  //=========================================================================
  void Problem::dump(std::ofstream& dump_file) const
  {
    // Number of submeshes?
    unsigned n_mesh = nsub_mesh();
 
    dump_file << std::max(unsigned(1), n_mesh) << " # number of (sub)meshes "
              << std::endl;
 
    // Single mesh:
    //------------
    if (n_mesh == 0)
    {
      // Dump level of refinement before pruning
      if (TreeBasedRefineableMeshBase* mmesh_pt =
            dynamic_cast<TreeBasedRefineableMeshBase*>(mesh_pt(0)))
      {
        dump_file << mmesh_pt->uniform_refinement_level_when_pruned()
                  << " # uniform refinement when pruned " << std::endl;
      }
      else
      {
        dump_file << 0 << " # (fake) uniform refinement when pruned "
                  << std::endl;
      }
      dump_file << 9999 << " # test flag for end of sub-meshes " << std::endl;
    }
 
    // Multiple submeshes
    //------------------
    else
    {
      // Loop over submeshes to dump level of refinement before pruning
      for (unsigned imesh = 0; imesh < n_mesh; imesh++)
      {
        if (TreeBasedRefineableMeshBase* mmesh_pt =
              dynamic_cast<TreeBasedRefineableMeshBase*>(mesh_pt(imesh)))
        {
          dump_file << mmesh_pt->uniform_refinement_level_when_pruned()
                    << " # uniform refinement when pruned " << std::endl;
        }
        else
        {
          dump_file << 0 << " # (fake) uniform refinement when pruned "
                    << std::endl;
        }
      }
      dump_file << 9999 << " # test flag for end of sub-meshes " << std::endl;
    }
 
#ifdef OOMPH_HAS_MPI
 
    const int my_rank = this->communicator_pt()->my_rank();
 
    // Record destination of all base elements
    unsigned n = Base_mesh_element_pt.size();
    Vector<int> local_base_element_processor(n, -1);
    Vector<int> base_element_processor(n, -1);
    for (unsigned e = 0; e < n; e++)
    {
      GeneralisedElement* el_pt = Base_mesh_element_pt[e];
      if (el_pt != 0)
      {
        if (!el_pt->is_halo())
        {
          local_base_element_processor[e] = my_rank;
        }
      }
    }
 
 
    // Get target for all base elements by reduction
    if (Problem_has_been_distributed)
    {
      // Check that the base elements have been associated to a processor
      // (the Base_mesh_elemen_pt is only used for structured meshes,
      // therefore, if there are no ustructured meshes as part of the
      // problem this container will be empty)
      if (n > 0)
      {
        MPI_Allreduce(&local_base_element_processor[0],
                      &base_element_processor[0],
                      n,
                      MPI_INT,
                      MPI_MAX,
                      this->communicator_pt()->mpi_comm());
      }
    }
    else
    {
      // All the same...
      base_element_processor = local_base_element_processor;
    }
 
 
    dump_file << n << " # Number of base elements; partitioning follows.\n";
    for (unsigned e = 0; e < n; e++)
    {
      dump_file << base_element_processor[e] << "\n";
    }
    dump_file << "8888 #test flag for end of base element distribution\n";
 
#endif
 
    // Single mesh:
    //------------
    if (n_mesh == 0)
    {
      // Dump single mesh refinement pattern (if mesh is refineable)
      if (TreeBasedRefineableMeshBase* mmesh_pt =
            dynamic_cast<TreeBasedRefineableMeshBase*>(mesh_pt(0)))
      {
        mmesh_pt->dump_refinement(dump_file);
      }
#ifdef OOMPH_HAS_TRIANGLE_LIB
      // Dump triangle mesh TriangulateIO which represents mesh topology
      TriangleMeshBase* mmesh_pt = dynamic_cast<TriangleMeshBase*>(mesh_pt(0));
      if (mmesh_pt != 0 && mmesh_pt->use_triangulateio_restart())
      {
#ifdef OOMPH_HAS_MPI
        // Check if the mesh is distributed, if that is the case then
        // additional info. needs to be saved
        if (mmesh_pt->is_mesh_distributed())
        {
          // Dump the info. related with the distribution of the mesh
          mmesh_pt->dump_distributed_info_for_restart(dump_file);
        }
#endif
        mmesh_pt->dump_triangulateio(dump_file);
      }
#endif
    }
 
    // Multiple submeshes
    //------------------
    else
    {
      // Loop over submeshes
      for (unsigned imesh = 0; imesh < n_mesh; imesh++)
      {
        // Dump single mesh refinement pattern (if mesh is refineable)
        if (TreeBasedRefineableMeshBase* mmesh_pt =
              dynamic_cast<TreeBasedRefineableMeshBase*>(mesh_pt(imesh)))
        {
          mmesh_pt->dump_refinement(dump_file);
        }
#ifdef OOMPH_HAS_TRIANGLE_LIB
        // Dump triangle mesh TriangulateIO which represents mesh topology
        TriangleMeshBase* mmesh_pt =
          dynamic_cast<TriangleMeshBase*>(mesh_pt(imesh));
        if (mmesh_pt != 0 && mmesh_pt->use_triangulateio_restart())
        {
#ifdef OOMPH_HAS_MPI
          // Check if the mesh is distributed, if that is the case then
          // additional info. needs to be saved
          if (mmesh_pt->is_mesh_distributed())
          {
            // Dump the info. related with the distribution of the mesh
            mmesh_pt->dump_distributed_info_for_restart(dump_file);
          }
#endif
          mmesh_pt->dump_triangulateio(dump_file);
        }
#endif
      } // End of loop over submeshes
    }
 
 
    // Dump time
    // ---------
 
    // Flag to indicate unsteady run
    bool unsteady_flag = (time_pt() != 0);
    dump_file << unsteady_flag << " # bool flag for unsteady" << std::endl;
 
    // Current time and previous time increments for proper unsteady run
    if (unsteady_flag)
    {
      // Current time
      dump_file << time_pt()->time() << " # Time " << std::endl;
      // Timesteps
      unsigned n_dt = time_pt()->ndt();
      dump_file << n_dt << " # Number of timesteps " << std::endl;
      for (unsigned i = 0; i < n_dt; i++)
      {
        dump_file << time_pt()->dt(i) << " # dt " << std::endl;
      }
    }
    // Dummy time and previous time increments for steady run
    else
    {
      // Current time
      dump_file << "0.0 # Dummy time from steady run " << std::endl;
      // Timesteps
      dump_file << "0 # Dummy number of timesteps from steady run" << std::endl;
    }
 
    // Loop over submeshes and dump their data
    unsigned nmesh = nsub_mesh();
    if (nmesh == 0) nmesh = 1;
    for (unsigned m = 0; m < nmesh; m++)
    {
      mesh_pt(m)->dump(dump_file);
    }
 
    // Dump global data
 
    // Loop over global data
    unsigned Nglobal = Global_data_pt.size();
    dump_file << Nglobal << " # number of global Data items " << std::endl;
    for (unsigned iglobal = 0; iglobal < Nglobal; iglobal++)
    {
      Global_data_pt[iglobal]->dump(dump_file);
      dump_file << std::endl;
    }
  }
 
  //=========================================================================
  /// Read refinement pattern of all refineable meshes and refine them
  /// accordingly, then read all Data and nodal position info from
  /// file for restart. Return flag to indicate if the restart was from
  /// steady or unsteady solution.
  //=========================================================================
  void Problem::read(std::ifstream& restart_file, bool& unsteady_restart)
  {
    // Check if the file is actually open as it won't be if it doesn't
    // exist! In that case we're almost certainly restarting the run on
    // a larger number of processors than the restart data was produced.
    // Say so and return
    bool restart_file_is_open = true;
    if (!restart_file.is_open())
    {
      std::ostringstream warn_message;
      warn_message << "Restart file isn't open -- I'm assuming that this is\n";
      warn_message << "because we're restarting on a larger number of\n";
      warn_message << "processor than were in use when the restart data was \n";
      warn_message << "dumped.\n";
      OomphLibWarning(
        warn_message.str(), "Problem::read()", OOMPH_EXCEPTION_LOCATION);
      restart_file_is_open = false;
    }
 
    // Number of (sub)meshes?
    unsigned n_mesh = std::max(unsigned(1), nsub_mesh());
 
    std::string input_string;
 
    // Read line up to termination sign
    getline(restart_file, input_string, '#');
 
    // Ignore rest of line
    restart_file.ignore(80, '\n');
 
    // Read in number of sub-meshes
    unsigned n_submesh_read;
    n_submesh_read = std::atoi(input_string.c_str());
 
#ifdef PARANOID
    if (restart_file_is_open)
    {
      if (n_submesh_read != n_mesh)
      {
        std::ostringstream error_message;
        error_message
          << "Number of sub-meshes specified in restart file, "
          << n_submesh_read << " doesn't \n match the my number of sub-meshes,"
          << n_mesh << std::endl
          << "Make sure all sub-meshes have been added to the global mesh\n"
          << "when calling the Problem::dump() function.\n";
        throw OomphLibError(error_message.str(),
                            OOMPH_CURRENT_FUNCTION,
                            OOMPH_EXCEPTION_LOCATION);
      }
    }
#else
    // Suppress comiler warnings about non-used variable
    n_submesh_read++;
    n_submesh_read--;
#endif
 
 
    // Read levels of refinement before pruning
#ifdef OOMPH_HAS_MPI
    bool refine_and_prune_required = false;
#endif
    Vector<unsigned> nrefinement_for_mesh(n_mesh);
    for (unsigned i = 0; i < n_mesh; i++)
    {
      // Read line up to termination sign
      getline(restart_file, input_string, '#');
 
      // Ignore rest of line
      restart_file.ignore(80, '\n');
 
      // Convert
      nrefinement_for_mesh[i] = std::atoi(input_string.c_str());
 
      // Get pointer to sub-mesh in incarnation as tree-based refineable mesh
      TreeBasedRefineableMeshBase* ref_mesh_pt =
        dynamic_cast<TreeBasedRefineableMeshBase*>(mesh_pt(i));
 
      // If it's not a tree-based refineable mesh, ignore the following
      if (ref_mesh_pt == 0)
      {
        if (nrefinement_for_mesh[i] != 0)
        {
          std::ostringstream error_stream;
          error_stream << "Nonzero uniform-refinement-when-pruned specified\n"
                       << "even though mesh is not tree-based. Odd. May want\n"
                       << "to check this carefully before disabling this \n"
                       << "warning/error -- most likely if/when we start to\n"
                       << "prune unstructured meshes [though I can't see why\n"
                       << "we would want to do this, given that they are \n"
                       << "currently totally re-generated...]\n";
          throw OomphLibError(error_stream.str(),
                              OOMPH_CURRENT_FUNCTION,
                              OOMPH_EXCEPTION_LOCATION);
        }
      }
      else
      {
        // Get min and max refinement level
        unsigned local_min_ref = 0;
        unsigned local_max_ref = 0;
        ref_mesh_pt->get_refinement_levels(local_min_ref, local_max_ref);
 
        // Overall min refinement level over all meshes
        unsigned min_ref = local_min_ref;
 
#ifdef OOMPH_HAS_MPI
        if (Problem_has_been_distributed)
        {
          // Reconcile between processors: If (e.g. following
          // distribution/pruning) the mesh has no elements on this
          // processor) then ignore its contribution to the poll of
          // max/min refinement levels
          int int_local_min_ref = local_min_ref;
          if (ref_mesh_pt->nelement() == 0)
          {
            int_local_min_ref = INT_MAX;
          }
          int int_min_ref = 0;
          MPI_Allreduce(&int_local_min_ref,
                        &int_min_ref,
                        1,
                        MPI_INT,
                        MPI_MIN,
                        Communicator_pt->mpi_comm());
 
          // Overall min refinement level over all meshes
          min_ref = unsigned(int_min_ref);
        }
#endif
 
        // Need to refine less
        if (nrefinement_for_mesh[i] >= min_ref)
        {
          nrefinement_for_mesh[i] -= min_ref;
        }
      }
 
#ifdef OOMPH_HAS_MPI
      if (nrefinement_for_mesh[i] > 0)
      {
        refine_and_prune_required = true;
      }
#endif
    }
 
 
    // Reconcile overall need to refine and prune (even empty
    // processors have to participate in some communication!)
#ifdef OOMPH_HAS_MPI
    if (Problem_has_been_distributed)
    {
      unsigned local_req_flag = 0;
      unsigned req_flag = 0;
      if (refine_and_prune_required)
      {
        local_req_flag = 1;
      }
      MPI_Allreduce(&local_req_flag,
                    &req_flag,
                    1,
                    MPI_UNSIGNED,
                    MPI_MAX,
                    Communicator_pt->mpi_comm());
      refine_and_prune_required = false;
      if (req_flag == 1)
      {
        refine_and_prune_required = true;
      }
 
      // If refine and prune is required make number of uniform
      // refinements for each mesh consistent otherwise code
      // hangs on "empty" processors for which no restart file exists
      if (refine_and_prune_required)
      {
        // This is what we have locally
        Vector<unsigned> local_nrefinement_for_mesh(nrefinement_for_mesh);
        // Synchronise over all processors with max operation
        MPI_Allreduce(&local_nrefinement_for_mesh[0],
                      &nrefinement_for_mesh[0],
                      n_mesh,
                      MPI_UNSIGNED,
                      MPI_MAX,
                      Communicator_pt->mpi_comm());
 
#ifdef PARANOID
        // Check it: Reconciliation should only be required for
        // for processors on which no restart file was opened and
        // for which the meshes are therefore empty
        bool fail = false;
        std::ostringstream error_message;
        error_message << "Number of uniform refinements was not consistent \n"
                      << "for following meshes during restart on processor \n"
                      << "on which restart file could be opened:\n";
        for (unsigned i = 0; i < n_mesh; i++)
        {
          if ((local_nrefinement_for_mesh[i] != nrefinement_for_mesh[i]) &&
              restart_file_is_open)
          {
            fail = true;
            error_message << "Sub-mesh: " << i << "; local nrefinement: "
                          << local_nrefinement_for_mesh[i] << " "
                          << "; global/synced nrefinement: "
                          << nrefinement_for_mesh[i] << "\n";
          }
        }
        if (fail)
        {
          OomphLibWarning(
            error_message.str(), "Problem::read()", OOMPH_EXCEPTION_LOCATION);
        }
#endif
      }
    }
#endif
 
    // Read line up to termination sign
    getline(restart_file, input_string, '#');
 
    // Ignore rest of line
    restart_file.ignore(80, '\n');
 
    // Check flag that indicates that we've read the final data
    unsigned tmp;
    tmp = std::atoi(input_string.c_str());
 
#ifdef PARANOID
    if (restart_file_is_open)
    {
      if (tmp != 9999)
      {
        std::ostringstream error_message;
        error_message
          << "Error in reading restart data: Uniform refinement when pruned \n"
          << "flags should be followed by 9999.\n";
        throw OomphLibError(error_message.str(),
                            OOMPH_CURRENT_FUNCTION,
                            OOMPH_EXCEPTION_LOCATION);
      }
    }
 
#else
    // Suppress comiler warnings about non-used variable
    tmp++;
    tmp--;
#endif
 
 
#ifdef OOMPH_HAS_MPI
 
    // Refine and prune if required
    if (refine_and_prune_required)
    {
      refine_uniformly(nrefinement_for_mesh);
      prune_halo_elements_and_nodes();
    }
 
    // target_domain_for_local_non_halo_element[e] contains the number
    // of the domain [0,1,...,nproc-1] to which non-halo element e on THE
    // CURRENT PROCESSOR ONLY has been assigned. The order of the non-halo
    // elements is the same as in the Problem's mesh, with the halo
    // elements being skipped.
    Vector<unsigned> target_domain_for_local_non_halo_element;
 
    // If a restart file has been generated using code compiled without MPI
    // then it will not have any of the base element data.
    // If we try to read in that file with code that has been compied using
    // MPI, even if running only one processor, then it will fail here.
    // The ideal fix is to edit the restart file so that it contains the two
    // lines
    //
    // 0 # Number of base elements; partitioning follows.
    // 8888 # Test flag for end of base element distribution
    //
    // after the end of the sub-meshes, but before the number of elements
    // However, we can determine that this is the problem if n_base = 0,
    // so there is a little bit of logic below to catch this case
 
    // Store current location in the file (before we are about to read
    // in either the base mesh or number of elements of the first mesh)
    std::streampos position_before_base_element = restart_file.tellg();
    // Boolean flag used to set whether to read in base element info
    bool read_in_base_element_info = true;
 
    // Read line up to termination sign
    getline(restart_file, input_string, '#');
 
    // Ignore rest of line
    restart_file.ignore(80, '\n');
 
    // Get number of base elements as recorded
    unsigned n_base_element_read_in = atoi(input_string.c_str());
    unsigned nbase = Base_mesh_element_pt.size();
    if (restart_file_is_open)
    {
      if (n_base_element_read_in != nbase)
      {
        // If we have zero base elements the problem could be that the
        // restart file was generated without MPI. Issue a warning
        // and continue anyway
        if (nbase == 0)
        {
          std::ostringstream warn_message;
          warn_message
            << "The number of base elements in the mesh is 0,\n"
            << " but the restart file indicates that there are "
            << n_base_element_read_in << ".\n"
            << "This could be because the restart file was \n"
            << "generated by using code without MPI.\n"
            << "\n"
            << "The best fix is to include two additional lines\n"
            << "in the restart file: \n\n"
            << "0 # Number of base elements; partitioning follows.\n"
            << "8888 # Test flag for end of base element distribution\n"
            << "\n"
            << "These lines go after the flag 9999 that indicates\n"
            << "the end of the submesh information.\n"
            << "\n"
            << "The file will now continue to be read assuming that\n"
            << "the base element information is not present.\n"
            << "If you get strange results then please look carefully\n"
            << "at the restart file. The safest thing to do is to \n"
            << "ensure that the restart file was generated by code\n"
            << "compiled and run with the same parallel options.\n";
          OomphLibWarning(warn_message.str(),
                          OOMPH_CURRENT_FUNCTION,
                          OOMPH_EXCEPTION_LOCATION);
          // Set the skip flag to true
          // and rewind the file pointer
          read_in_base_element_info = false;
          restart_file.seekg(position_before_base_element);
        }
        // Otherwise throw a hard error
        else
        {
          std::ostringstream error_message;
          error_message << "About to read " << n_base_element_read_in
                        << " base elements \n"
                        << "though we only have " << nbase
                        << " base elements in mesh.\n";
          throw OomphLibError(error_message.str(),
                              OOMPH_CURRENT_FUNCTION,
                              OOMPH_EXCEPTION_LOCATION);
        }
      }
    }
 
    // Read in the remaning base element information, if necessary
    if (read_in_base_element_info == true)
    {
      // Read in target_domain_for_base_element[e] for all base elements
      Vector<unsigned> target_domain_for_base_element(nbase);
      for (unsigned e = 0; e < nbase; e++)
      {
        // Read line
        getline(restart_file, input_string);
 
        // Get target domain
        target_domain_for_base_element[e] = atoi(input_string.c_str());
      }
 
      // Read line up to termination sign
      getline(restart_file, input_string, '#');
 
      // Ignore rest of line
      restart_file.ignore(80, '\n');
 
      // Check flag that indicates that we've read the final data
      tmp = std::atoi(input_string.c_str());
 
 
#ifdef PARANOID
      if (restart_file_is_open)
      {
        if (tmp != 8888)
        {
          std::ostringstream error_message;
          error_message
            << "Error in reading restart data: Target proc for base elements \n"
            << "should be followed by 8888.\n";
          throw OomphLibError(error_message.str(),
                              OOMPH_CURRENT_FUNCTION,
                              OOMPH_EXCEPTION_LOCATION);
        }
      }
#endif
 
      // Loop over all elements (incl. any FaceElements) and assign
      // target domain for all local non-halo elements and check if
      // load balancing is required -- no need to do this if problem is
      // not distributed.
      unsigned load_balance_required_flag = 0;
      if (Problem_has_been_distributed)
      {
        // Working with TreeBasedRefineableMeshBase mesh
        unsigned local_load_balance_required_flag = 0;
        if (dynamic_cast<TreeBasedRefineableMeshBase*>(mesh_pt(0)))
        {
          const int my_rank = this->communicator_pt()->my_rank();
          unsigned nel = mesh_pt()->nelement();
          for (unsigned e = 0; e < nel; e++)
          {
            GeneralisedElement* el_pt = mesh_pt()->element_pt(e);
            if (!el_pt->is_halo())
            {
              // Get element number (plus one) in base element enumeration
              unsigned el_number_in_base_mesh_plus_one =
                Base_mesh_element_number_plus_one[el_pt];
 
              // If it's zero then we haven't found it, it may be a FaceElement
              // (in which case we move it to the same processor as its bulk
              // element
              if (el_number_in_base_mesh_plus_one == 0)
              {
                FaceElement* face_el_pt = dynamic_cast<FaceElement*>(el_pt);
                if (face_el_pt != 0)
                {
                  // Get corresponding bulk element
                  FiniteElement* bulk_el_pt = face_el_pt->bulk_element_pt();
 
                  // Use its element number (plus one) in base element
                  // enumeration
                  el_number_in_base_mesh_plus_one =
                    Base_mesh_element_number_plus_one[bulk_el_pt];
 
                  // If this is zero too we have a problem
                  if (el_number_in_base_mesh_plus_one == 0)
                  {
                    throw OomphLibError(
                      "el_number_in_base_mesh_plus_one=0 for bulk",
                      "Problem::read()",
                      OOMPH_EXCEPTION_LOCATION);
                  }
                }
              }
 
              // If we've made it here then we're not dealing with a
              // FaceElement but with an element that doesn't exist locally
              // --> WTF?
              if (el_number_in_base_mesh_plus_one == 0)
              {
                throw OomphLibError("el_number_in_base_mesh_plus_one=0",
                                    OOMPH_CURRENT_FUNCTION,
                                    OOMPH_EXCEPTION_LOCATION);
              }
 
              // Assign target domain for next local non-halo element in
              // the order in which it's encountered in the global mesh
              target_domain_for_local_non_halo_element.push_back(
                target_domain_for_base_element[el_number_in_base_mesh_plus_one -
                                               1]);
 
              // Do elements on this processor to be moved elsewhere?
              if (int(target_domain_for_base_element
                        [el_number_in_base_mesh_plus_one - 1]) != my_rank)
              {
                local_load_balance_required_flag = 1;
              }
            }
          }
 
        } // if (working with TreeBasedRefineableMeshBase mesh)
 
        // Get overall need to load balance by max
        MPI_Allreduce(&local_load_balance_required_flag,
                      &load_balance_required_flag,
                      1,
                      MPI_UNSIGNED,
                      MPI_MAX,
                      this->communicator_pt()->mpi_comm());
      }
 
      // Do we need to load balance?
      if (load_balance_required_flag == 1)
      {
        oomph_info << "Doing load balancing after pruning\n";
        DocInfo doc_info;
        doc_info.disable_doc();
        bool report_stats = false;
        load_balance(
          doc_info, report_stats, target_domain_for_local_non_halo_element);
        oomph_info << "Done load balancing after pruning\n";
      }
      else
      {
        oomph_info << "No need for load balancing after pruning\n";
      }
    } // End of read in base element information
#endif
 
 
    // Boolean to record if any unstructured bulk meshes have
    // been read in (and therefore completely re-generated, with new
    // elements and nodes) from disk
    bool have_read_unstructured_mesh = false;
 
    // Call the actions before adaptation
    actions_before_adapt();
 
    // If there are unstructured meshes in the problem we need
    // to strip out any face elements that are attached to them
    // because restart of unstructured meshes re-creates their elements
    // and nodes from scratch, leading to dangling pointers from the
    // face elements to the old elements and nodes. This function is
    // virtual and (practically) empty in the Problem base class
    // but toggles a flag to indicate that it has been called. We can then
    // issue a warning below, prompting the user to consider overloading it
    // if the problem is found to contain unstructured bulk meshes.
    // Warning can be ignored if the bulk mesh is not associated with any
    // face elements.
    Empty_actions_before_read_unstructured_meshes_has_been_called = false;
    actions_before_read_unstructured_meshes();
 
    // Update number of submeshes
    n_mesh = nsub_mesh();
 
    // Single mesh:
    //------------
    if (n_mesh == 0)
    {
      // Refine single mesh (if it's refineable)
      if (TreeBasedRefineableMeshBase* mmesh_pt =
            dynamic_cast<TreeBasedRefineableMeshBase*>(mesh_pt(0)))
      {
        // When we get in here the problem has been constructed
        // by the constructor and the mesh is its original unrefined
        // form.
        // RefineableMeshBase::refine(...) reads the refinement pattern from the
        // specified file and performs refinements until the mesh has
        // reached the same level of refinement as the mesh that existed
        // when the problem was dumped to disk.
        mmesh_pt->refine(restart_file);
      }
#ifdef OOMPH_HAS_TRIANGLE_LIB
      // Regenerate mesh from triangulate IO if it's a triangular mesh
      TriangleMeshBase* mmesh_pt = dynamic_cast<TriangleMeshBase*>(mesh_pt(0));
      if (mmesh_pt != 0 && mmesh_pt->use_triangulateio_restart())
      {
#ifdef OOMPH_HAS_MPI
        // Check if the mesh is distributed, if that is the case then
        // additional info. needs to be read
        if (mmesh_pt->is_mesh_distributed())
        {
          // Dump the info. related with the distribution of the mesh
          mmesh_pt->read_distributed_info_for_restart(restart_file);
        }
#endif
        // The function reads the TriangulateIO data structure from the dump
        // file and then completely regenerates the mesh using the
        // data structure
        mmesh_pt->remesh_from_triangulateio(restart_file);
        have_read_unstructured_mesh = true;
#ifdef OOMPH_HAS_MPI
        // Check if the mesh is distributed, if that is the case then we
        // need to re-establish the halo/haloed scheme (similar as in the
        // RefineableTriangleMesh::adapt() method)
        if (mmesh_pt->is_mesh_distributed())
        {
          mmesh_pt->reestablish_distribution_info_for_restart(
            this->communicator_pt(), restart_file);
        }
#endif
        // Still left to update the polylines representation, that is performed
        // later since the nodes positions may still change when reading info.
        // for the mesh, see below
      }
#endif
    }
 
    // Multiple submeshes
    //------------------
    else
    {
      // Loop over submeshes
      for (unsigned imesh = 0; imesh < n_mesh; imesh++)
      {
        // Refine single mesh (if its refineable)
        if (TreeBasedRefineableMeshBase* mmesh_pt =
              dynamic_cast<TreeBasedRefineableMeshBase*>(mesh_pt(imesh)))
        {
          // When we get in here the problem has been constructed
          // by the constructor and the mesh is its original unrefined
          // form.
          // RefineableMeshBase::refine(...) reads the refinement pattern from
          // the specified file and performs refinements until the mesh has
          // reached the same level of refinement as the mesh that existed
          // when the problem was dumped to disk.
          mmesh_pt->refine(restart_file);
        }
#ifdef OOMPH_HAS_TRIANGLE_LIB
        // Regenerate mesh from triangulate IO if it's a triangular mesh
        TriangleMeshBase* mmesh_pt =
          dynamic_cast<TriangleMeshBase*>(mesh_pt(imesh));
        if (mmesh_pt != 0 && mmesh_pt->use_triangulateio_restart())
        {
#ifdef OOMPH_HAS_MPI
          // Check if the mesh is distributed, if that is the case then
          // additional info. needs to be read
          if (mmesh_pt->is_mesh_distributed())
          {
            // Dump the info. related with the distribution of the mesh
            mmesh_pt->read_distributed_info_for_restart(restart_file);
          }
#endif
          // The function reads the TriangulateIO data structure from the dump
          // file and then completely regenerates the mesh using the
          // data structure
          mmesh_pt->remesh_from_triangulateio(restart_file);
          have_read_unstructured_mesh = true;
 
#ifdef OOMPH_HAS_MPI
          // Check if the mesh is distributed, if that is the case then we
          // need to re-establish the halo/haloed scheme (similar as in the
          // RefineableTriangleMesh::adapt() method)
          if (mmesh_pt->is_mesh_distributed())
          {
            mmesh_pt->reestablish_distribution_info_for_restart(
              this->communicator_pt(), restart_file);
          }
#endif
          // Still left to update the polylines representation, that is
          // performed later since the nodes positions may still change when
          // reading info. for the mesh, see below
        }
#endif
      } // End of loop over submeshes
 
 
      // Rebuild the global mesh
      rebuild_global_mesh();
    }
 
    // Any actions after adapt
    actions_after_adapt();
 
    // Re-attach face elements (or whatever else needs to be done
    // following the total re-generation of the unstructured meshes
    Empty_actions_after_read_unstructured_meshes_has_been_called = false;
    actions_after_read_unstructured_meshes();
 
 
    // Issue warning:
    if (!Suppress_warning_about_actions_before_read_unstructured_meshes)
    {
      if (have_read_unstructured_mesh)
      {
        if (Empty_actions_before_read_unstructured_meshes_has_been_called ||
            Empty_actions_after_read_unstructured_meshes_has_been_called)
        {
          std::ostringstream warn_message;
          warn_message
            << "I've just read in some unstructured meshes and have, in\n"
            << "the process, totally re-generated their nodes and elements.\n"
            << "This may create dangling pointers that still point to the\n"
            << "old nodes and elements, e.g. because FaceElements were\n"
            << "attached to these meshes or pointers to nodes and elements\n"
            << "were stored somewhere. FaceElements should therefore be\n"
            << "removed before reading in these meshes, using an overloaded\n"
            << "version of the function\n\n"
            << "   Problem::actions_before_read_unstructured_meshes()\n\n"
            << "and then re-attached using an overloaded version of\n\n"
            << "   Problem::actions_after_read_unstructured_meshes().\n\n"
            << "The required content of these functions is likely to be "
               "similar\n"
            << "to the Problem::actions_before_adapt() and \n"
            << "Problem::actions_after_adapt() that would be required in\n"
            << "a spatially adaptive computation. If these functions already\n"
            << "exist and perform the required actions, the \n"
            << "actions_before/after_read_unstructured_meshes() functions\n"
            << "can remain empty because the former are called automatically.\n"
            << "In this case, this warning my be suppressed by setting the\n"
            << "public boolean\n\n"
            << "   "
               "Problem::Suppress_warning_about_actions_before_read_"
               "unstructured_meshes\n\n"
            << "to true." << std::endl;
          OomphLibWarning(warn_message.str(),
                          OOMPH_CURRENT_FUNCTION,
                          OOMPH_EXCEPTION_LOCATION);
        }
      }
    }
 
    // Setup equation numbering scheme
    oomph_info << "\nNumber of equations in Problem::read(): "
               << assign_eqn_numbers() << std::endl
               << std::endl;
    // Read time info
    //---------------
    unsigned local_unsteady_restart_flag = 0;
    double local_time = -DBL_MAX;
    unsigned local_n_dt = 0;
#ifdef OOMPH_HAS_MPI
    unsigned local_sync_needed_flag = 0;
#endif
    Vector<double> local_dt;
 
    if (restart_file.is_open())
    {
      oomph_info << "Restart file exists" << std::endl;
#ifdef OOMPH_HAS_MPI
      local_sync_needed_flag = 0;
#endif
      // Read line up to termination sign
      getline(restart_file, input_string, '#');
 
      // Ignore rest of line
      restart_file.ignore(80, '\n');
 
      // Is the restart data from an unsteady run?
      local_unsteady_restart_flag = atoi(input_string.c_str());
 
      // Read line up to termination sign
      getline(restart_file, input_string, '#');
 
      // Ignore rest of line
      restart_file.ignore(80, '\n');
 
      // Read in initial time and set
      local_time = atof(input_string.c_str());
 
      // Read line up to termination sign
      getline(restart_file, input_string, '#');
 
      // Ignore rest of line
      restart_file.ignore(80, '\n');
 
      // Read & set number of timesteps
      local_n_dt = atoi(input_string.c_str());
      local_dt.resize(local_n_dt);
 
      // Read in timesteps:
      for (unsigned i = 0; i < local_n_dt; i++)
      {
        // Read line up to termination sign
        getline(restart_file, input_string, '#');
 
        // Ignore rest of line
        restart_file.ignore(80, '\n');
 
        // Read in initial time and set
        double prev_dt = atof(input_string.c_str());
        local_dt[i] = prev_dt;
      }
    }
    else
    {
      oomph_info << "Restart file does not exist" << std::endl;
#ifdef OOMPH_HAS_MPI
      local_sync_needed_flag = 1;
#endif
    }
 
 
    // No prepare global values, possibly via sync
    Vector<double> dt;
 
    // Do we need to sync?
    unsigned sync_needed_flag = 0;
 
#ifdef OOMPH_HAS_MPI
    if (Problem_has_been_distributed)
    {
      // Get need to sync by max
      MPI_Allreduce(&local_sync_needed_flag,
                    &sync_needed_flag,
                    1,
                    MPI_UNSIGNED,
                    MPI_MAX,
                    this->communicator_pt()->mpi_comm());
    }
#endif
 
    // Synchronise
    if (sync_needed_flag == 1)
    {
#ifdef OOMPH_HAS_MPI
 
 
#ifdef PARANOID
      if (!Problem_has_been_distributed)
      {
        std::ostringstream error_message;
        error_message << "Synchronisation of temporal restart data \n"
                      << "required even though Problem hasn't been distributed "
                         "-- very odd!\n";
        throw OomphLibError(error_message.str(),
                            OOMPH_CURRENT_FUNCTION,
                            OOMPH_EXCEPTION_LOCATION);
      }
#endif
 
      // Get unsteady restart flag by max-based reduction
      unsigned unsteady_restart_flag = 0;
      MPI_Allreduce(&local_unsteady_restart_flag,
                    &unsteady_restart_flag,
                    1,
                    MPI_UNSIGNED,
                    MPI_MAX,
                    this->communicator_pt()->mpi_comm());
 
      // So, is it an unsteady restart?
      unsteady_restart = false;
      if (unsteady_restart_flag == 1)
      {
        unsteady_restart = true;
 
        // Get time by max
        double time = -DBL_MAX;
        MPI_Allreduce(&local_time,
                      &time,
                      1,
                      MPI_DOUBLE,
                      MPI_MAX,
                      this->communicator_pt()->mpi_comm());
        time_pt()->time() = time;
 
        // Get number of timesteps by max-based reduction
        unsigned n_dt = 0;
        MPI_Allreduce(&local_n_dt,
                      &n_dt,
                      1,
                      MPI_UNSIGNED,
                      MPI_MAX,
                      this->communicator_pt()->mpi_comm());
 
        // Resize whatever needs resizing
        time_pt()->resize(n_dt);
        dt.resize(n_dt);
        if (local_dt.size() == 0)
        {
          local_dt.resize(n_dt, -DBL_MAX);
        }
 
        // Get timesteps increments by max-based reduction
        MPI_Allreduce(&local_dt[0],
                      &dt[0],
                      n_dt,
                      MPI_DOUBLE,
                      MPI_MAX,
                      this->communicator_pt()->mpi_comm());
      }
 
#else
 
      std::ostringstream error_message;
      error_message
        << "Synchronisation of temporal restart data \n"
        << "required even though we don't have mpi support -- very odd!\n";
      throw OomphLibError(
        error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
 
#endif
    }
    // No sync needed -- just copy across
    else
    {
      unsteady_restart = false;
      if (local_unsteady_restart_flag == 1)
      {
        unsteady_restart = true;
        time_pt()->time() = local_time;
        time_pt()->resize(local_n_dt);
        dt.resize(local_n_dt);
        for (unsigned i = 0; i < local_n_dt; i++)
        {
          dt[i] = local_dt[i];
        }
      }
    }
 
    // Initialise timestep -- also sets the weights for all timesteppers
    // in the problem.
    if (unsteady_restart) initialise_dt(dt);
 
    // Loop over submeshes:
    unsigned nmesh = nsub_mesh();
    if (nmesh == 0) nmesh = 1;
    for (unsigned m = 0; m < nmesh; m++)
    {
      //    //---------------------------------------------------------
      //    // Keep this commented out code around to debug restarts
      //    //---------------------------------------------------------
      //    std::ofstream some_file;
      //    char filename[100];
      //    snprintf(filename, sizeof(filename), "read_mesh%i_on_proc%i.dat",m,
      //            this->communicator_pt()->my_rank());
      //    some_file.open(filename);
      //    mesh_pt(m)->output(some_file);
      //    some_file.close();
 
      //    snprintf(filename, sizeof(filename),
      //    "read_mesh%i_with_haloes_on_proc%i.dat",m,
      //            this->communicator_pt()->my_rank());
      //    mesh_pt(m)->enable_output_of_halo_elements();
      //    some_file.open(filename);
      //    mesh_pt(m)->output(some_file);
      //    mesh_pt(m)->disable_output_of_halo_elements();
      //    some_file.close();
      //    oomph_info << "Doced mesh " << m << " before reading\n";
 
      //    snprintf(filename, sizeof(filename),
      //    "read_nodes_mesh%i_on_proc%i.dat",m,
      //            this->communicator_pt()->my_rank());
      //    some_file.open(filename);
      //    unsigned nnod=mesh_pt(m)->nnode();
      //    for (unsigned j=0;j<nnod;j++)
      //     {
      //      Node* nod_pt=mesh_pt(m)->node_pt(j);
      //      unsigned n=nod_pt->ndim();
      //      for (unsigned i=0;i<n;i++)
      //       {
      //        some_file << nod_pt->x(i) << " ";
      //       }
      //      some_file << nod_pt->is_halo() << " "
      //                << nod_pt->nvalue() << " "
      //                << nod_pt->hang_code() << "\n";
      //     }
      //    some_file.close();
      //    oomph_info << "Doced mesh " << m << " before reading\n";
      //    //---------------------------------------------------------
      //    // End keep this commented out code around to debug restarts
      //    //---------------------------------------------------------
 
      mesh_pt(m)->read(restart_file);
 
#ifdef OOMPH_HAS_TRIANGLE_LIB
      // Here update the polyline representation if working with
      // triangle base meshes
      if (TriangleMeshBase* mmesh_pt =
            dynamic_cast<TriangleMeshBase*>(mesh_pt(m)))
      {
        // In charge of updating the polylines representation to the
        // current refinement/unrefinement level after restart, it
        // also update the shared boundaries in case of working with a
        // distributed mesh
        mmesh_pt->update_polyline_representation_from_restart();
      }
#endif // #ifdef OOMPH_HAS_TRIANGLE_LIB
    }
 
    // Read global data:
    //------------------
 
    // Number of global data
    unsigned Nglobal = Global_data_pt.size();
 
    // Read line up to termination sign
    getline(restart_file, input_string, '#');
 
    // Ignore rest of line
    restart_file.ignore(80, '\n');
 
    // Check # of nodes:
    unsigned long check_nglobal = atoi(input_string.c_str());
 
 
    if (restart_file_is_open)
    {
      if (check_nglobal != Nglobal)
      {
        std::ostringstream error_message;
        error_message << "The number of global data " << Nglobal
                      << " is not equal to that specified in the input file "
                      << check_nglobal << std::endl;
 
        throw OomphLibError(error_message.str(),
                            OOMPH_CURRENT_FUNCTION,
                            OOMPH_EXCEPTION_LOCATION);
      }
    }
 
    for (unsigned iglobal = 0; iglobal < Nglobal; iglobal++)
    {
      Global_data_pt[iglobal]->read(restart_file);
    }
  }
 
  //===================================================================
  /// Set all timesteps to the same value, dt, and assign
  /// weights for all timesteppers in the problem.
  //===================================================================
  void Problem::initialise_dt(const double& dt)
  {
    // Initialise the timesteps in the Problem's time object
    Time_pt->initialise_dt(dt);
 
    // Find out how many timesteppers there are
    unsigned n_time_steppers = ntime_stepper();
 
    // Loop over them all and set the weights
    for (unsigned i = 0; i < n_time_steppers; i++)
    {
      time_stepper_pt(i)->set_weights();
      if (time_stepper_pt(i)->adaptive_flag())
      {
        time_stepper_pt(i)->set_error_weights();
      }
    }
  }
 
  //=========================================================================
  /// Set the value of the timesteps to be equal to the values passed in
  /// a vector and assign weights for all timesteppers in the problem
  //========================================================================
  void Problem::initialise_dt(const Vector<double>& dt)
  {
    // Initialise the timesteps in the Problem's time object
    Time_pt->initialise_dt(dt);
 
    // Find out how many timesteppers there are
    unsigned n_time_steppers = ntime_stepper();
 
    // Loop over them all and set the weights
    for (unsigned i = 0; i < n_time_steppers; i++)
    {
      time_stepper_pt(i)->set_weights();
      if (time_stepper_pt(i)->adaptive_flag())
      {
        time_stepper_pt(i)->set_error_weights();
      }
    }
  }
 
  //========================================================
  /// Self-test: Check meshes and global data. Return 0 for OK
  //========================================================
  unsigned Problem::self_test()
  {
    // Initialise
    bool passed = true;
 
    // Are there any submeshes?
    unsigned Nmesh = nsub_mesh();
 
    // Just one mesh: Check it
    if (Nmesh == 0)
    {
      if (mesh_pt()->self_test() != 0)
      {
        passed = false;
        oomph_info
          << "\n ERROR: Failed Mesh::self_test() for single mesh in problem"
          << std::endl;
      }
    }
    // Loop over all submeshes and check them
    else
    {
      for (unsigned imesh = 0; imesh < Nmesh; imesh++)
      {
        if (mesh_pt(imesh)->self_test() != 0)
        {
          passed = false;
          oomph_info << "\n ERROR: Failed Mesh::self_test() for mesh imesh"
                     << imesh << std::endl;
        }
      }
    }
 
 
    // Check global data
    unsigned Nglobal = Global_data_pt.size();
    for (unsigned iglobal = 0; iglobal < Nglobal; iglobal++)
    {
      if (Global_data_pt[iglobal]->self_test() != 0)
      {
        passed = false;
        oomph_info
          << "\n ERROR: Failed Data::self_test() for global data iglobal"
          << iglobal << std::endl;
      }
    }
 
 
#ifdef OOMPH_HAS_MPI
 
    if (Problem_has_been_distributed)
    {
      // Note: This throws an error if it fails so no return is required.
      DocInfo tmp_doc_info;
      tmp_doc_info.disable_doc();
      check_halo_schemes(tmp_doc_info);
    }
 
#endif
 
    // Return verdict
    if (passed)
    {
      return 0;
    }
    else
    {
      return 1;
    }
  }
 
  //====================================================================
  /// A function that is used to adapt a bifurcation-tracking
  /// problem, which requires separate interpolation of the
  /// associated eigenfunction. The error measure is chosen to be
  /// a suitable combination of the errors in the base flow and the
  /// eigenfunction. The bifurcation type is passed as an argument
  //=====================================================================
  void Problem::bifurcation_adapt_helper(unsigned& n_refined,
                                         unsigned& n_unrefined,
                                         const unsigned& bifurcation_type,
                                         const bool& actually_adapt)
  {
    // Storage for eigenfunction from the problem
    Vector<DoubleVector> eigenfunction;
    // Get the eigenfunction from the problem
    this->get_bifurcation_eigenfunction(eigenfunction);
 
    // Get the bifurcation parameter
    double* parameter_pt = this->bifurcation_parameter_pt();
 
    // Get the frequency parameter if tracking a Hopf bifurcation
    double omega = 0.0;
    // If we're tracking a Hopf then also get the frequency
    if (bifurcation_type == 3)
    {
      omega = dynamic_cast<HopfHandler*>(assembly_handler_pt())->omega();
    }
 
    // If we're tracking a Pitchfork get the slack parameter (Hack)
    double sigma = 0.0;
    if (bifurcation_type == 2)
    {
      sigma = this->dof(this->ndof() - 1);
    }
 
    // We can now deactivate the bifurcation tracking in the problem
    // to restore the degrees of freedom to the unaugmented value
    this->deactivate_bifurcation_tracking();
 
    // Next, we create copies of the present problem
    // The number of copies depends on the number of eigenfunctions
    // One copy for each eigenfunction
    const unsigned n_copies = eigenfunction.size();
    Copy_of_problem_pt.resize(n_copies);
 
    // Loop over the number of copies
    for (unsigned c = 0; c < n_copies; c++)
    {
      // If we don't already have a copy
      if (Copy_of_problem_pt[c] == 0)
      {
        // Create the copy
        Copy_of_problem_pt[c] = this->make_copy();
 
        // Refine the copy to the same level as the current problem
 
        // Find number of submeshes
        const unsigned N_mesh = Copy_of_problem_pt[c]->nsub_mesh();
        // If there is only one mesh
        if (N_mesh == 0)
        {
          // Can we refine the mesh
          if (TreeBasedRefineableMeshBase* mmesh_pt =
                dynamic_cast<TreeBasedRefineableMeshBase*>(
                  Copy_of_problem_pt[c]->mesh_pt(0)))
          {
            // Is the adapt flag set
            if (mmesh_pt->is_adaptation_enabled())
            {
              // Now get the original problem's mesh if it's refineable
              if (TreeBasedRefineableMeshBase* original_mesh_pt =
                    dynamic_cast<TreeBasedRefineableMeshBase*>(
                      this->mesh_pt(0)))
              {
                mmesh_pt->refine_base_mesh_as_in_reference_mesh(
                  original_mesh_pt);
              }
              else
              {
                oomph_info
                  << "Info/Warning: Mesh in orginal problem is not refineable."
                  << std::endl;
              }
            }
            else
            {
              oomph_info << "Info/Warning: Mesh adaptation is disabled in copy."
                         << std::endl;
            }
          }
          else
          {
            oomph_info << "Info/Warning: Mesh cannot be adapted in copy."
                       << std::endl;
          }
        } // End of single mesh case
        // Otherwise loop over the submeshes
        else
        {
          for (unsigned m = 0; m < N_mesh; m++)
          {
            // Can we refine the submesh
            if (TreeBasedRefineableMeshBase* mmesh_pt =
                  dynamic_cast<TreeBasedRefineableMeshBase*>(
                    Copy_of_problem_pt[c]->mesh_pt(m)))
            {
              // Is the adapt flag set
              if (mmesh_pt->is_adaptation_enabled())
              {
                // Now get the original problem's mesh
                if (TreeBasedRefineableMeshBase* original_mesh_pt =
                      dynamic_cast<TreeBasedRefineableMeshBase*>(
                        this->mesh_pt(m)))
                {
                  mmesh_pt->refine_base_mesh_as_in_reference_mesh(
                    original_mesh_pt);
                }
                else
                {
                  oomph_info << "Info/Warning: Mesh in orginal problem is not "
                                "refineable."
                             << std::endl;
                }
              }
              else
              {
                oomph_info
                  << "Info/Warning: Mesh adaptation is disabled in copy."
                  << std::endl;
              }
            }
            else
            {
              oomph_info << "Info/Warning: Mesh cannot be adapted in copy."
                         << std::endl;
            }
          }
          // rebuild the global mesh in the copy
          Copy_of_problem_pt[c]->rebuild_global_mesh();
 
        } // End of multiple mesh case
 
        // Must call actions after adapt
        Copy_of_problem_pt[c]->actions_after_adapt();
 
        // Assign the equation numbers to the copy (quietly)
        (void)Copy_of_problem_pt[c]->assign_eqn_numbers();
      }
    } // End of creation of copies
 
 
    // Now check some numbers
    for (unsigned c = 0; c < n_copies; c++)
    {
      // Check that the dofs match for each copy
#ifdef PARANOID
      // If the problems don't match then complain
      if (Copy_of_problem_pt[c]->ndof() != this->ndof())
      {
        std::ostringstream error_stream;
        error_stream << "Number of unknowns in the problem copy " << c << " "
                     << "not equal to number in the original:\n"
                     << this->ndof() << " (original) "
                     << Copy_of_problem_pt[c]->ndof() << " (copy)\n";
 
        throw OomphLibError(
          error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
      }
#endif
 
      // Assign the eigenfunction(s) to the copied problems
      Copy_of_problem_pt[c]->assign_eigenvector_to_dofs(eigenfunction[c]);
      // Set all pinned values to zero
      Copy_of_problem_pt[c]->set_pinned_values_to_zero();
    }
 
    // Symmetrise the problem if we are solving a pitchfork
    if (bifurcation_type == 2)
    {
      Copy_of_problem_pt[0]
        ->symmetrise_eigenfunction_for_adaptive_pitchfork_tracking();
    }
 
    // Find error estimates based on current problem and eigenproblem
    // Now we need to get the error estimates for both problems.
    Vector<Vector<double>> base_error, eigenfunction_error;
    this->get_all_error_estimates(base_error);
    // Loop over the copies
    for (unsigned c = 0; c < n_copies; c++)
    {
      // Get the error estimates for the copy
      Copy_of_problem_pt[c]->get_all_error_estimates(eigenfunction_error);
 
      // Find the number of meshes
      unsigned n_mesh = base_error.size();
 
#ifdef PARANOID
      if (n_mesh != eigenfunction_error.size())
      {
        std::ostringstream error_stream;
        error_stream << "Problems do not have the same number of meshes\n"
                     << "Base : " << n_mesh
                     << " : Eigenproblem : " << eigenfunction_error.size()
                     << "\n";
        throw OomphLibError(
          error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
      }
#endif
 
      for (unsigned m = 0; m < n_mesh; m++)
      {
        // Check the number of elements is the same
        unsigned n_element = base_error[m].size();
#ifdef PARANOID
        if (n_element != eigenfunction_error[m].size())
        {
          std::ostringstream error_stream;
          error_stream << "Mesh " << m
                       << " does not have the same number of elements in the "
                          "two problems:\n"
                       << "Base: " << n_element
                       << " :  Eigenproblem: " << eigenfunction_error[m].size()
                       << "\n";
          throw OomphLibError(error_stream.str(),
                              OOMPH_CURRENT_FUNCTION,
                              OOMPH_EXCEPTION_LOCATION);
        }
#endif
        // Now add all the error esimates together
        for (unsigned e = 0; e < n_element; e++)
        {
          // Add the error estimates (lazy)
          base_error[m][e] += eigenfunction_error[m][e];
        }
      }
    } // End of loop over copies
 
    // Then refine all problems based on the combined measure
    // if we are actually adapting (not just estimating the errors)
    if (actually_adapt)
    {
      this->adapt_based_on_error_estimates(n_refined, n_unrefined, base_error);
      for (unsigned c = 0; c < n_copies; c++)
      {
        Copy_of_problem_pt[c]->adapt_based_on_error_estimates(
          n_refined, n_unrefined, base_error);
      }
      // Symmetrise the problem (again) if we are solving for a pitchfork
      if (bifurcation_type == 2)
      {
        Copy_of_problem_pt[0]
          ->symmetrise_eigenfunction_for_adaptive_pitchfork_tracking();
      }
 
      // Now get the refined guess for the eigenvector
      for (unsigned c = 0; c < n_copies; c++)
      {
        Copy_of_problem_pt[c]->get_dofs(eigenfunction[c]);
      }
    }
 
    // Reactivate the tracking
    switch (bifurcation_type)
    {
        // Fold tracking
      case 1:
        this->activate_fold_tracking(parameter_pt);
        break;
 
        // Pitchfork
      case 2:
        this->activate_pitchfork_tracking(parameter_pt, eigenfunction[0]);
        // reset the slack parameter
        this->dof(this->ndof() - 1) = sigma;
        break;
 
        // Hopf
      case 3:
        this->activate_hopf_tracking(
          parameter_pt, omega, eigenfunction[0], eigenfunction[1]);
        break;
 
      default:
        std::ostringstream error_stream;
        error_stream << "Bifurcation type " << bifurcation_type
                     << " not known\n"
                     << "1: Fold, 2: Pitchfork, 3: Hopf\n";
        throw OomphLibError(
          error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
  }
 
 
  //====================================================================
  /// A function that is used to document the errors when
  /// adapting a bifurcation-tracking
  /// problem, which requires separate interpolation of the
  /// associated eigenfunction. The error measure is chosen to be
  /// a suitable combination of the errors in the base flow and the
  /// eigenfunction. The bifurcation type is passed as an argument
  //=====================================================================
  void Problem::bifurcation_adapt_doc_errors(const unsigned& bifurcation_type)
  {
    // Dummy arguments
    unsigned n_refined, n_unrefined;
    // Just call the bifurcation helper without actually adapting
    bifurcation_adapt_helper(n_refined, n_unrefined, bifurcation_type, false);
  }
 
 
  //========================================================================
  /// Adapt problem:
  /// Perform mesh adaptation for (all) refineable (sub)mesh(es),
  /// based on their own error estimates and the target errors specified
  /// in the mesh(es). Following mesh adaptation,
  /// update global mesh, and re-assign equation numbers.
  /// Return # of refined/unrefined elements. On return from this
  /// function, Problem can immediately be solved again.
  //======================================================================
  void Problem::adapt(unsigned& n_refined, unsigned& n_unrefined)
  {
    double t_start_total = 0.0;
    if (Global_timings::Doc_comprehensive_timings)
    {
      t_start_total = TimingHelpers::timer();
    }
 
    // Get the bifurcation type
    int bifurcation_type = this->Assembly_handler_pt->bifurcation_type();
 
    bool continuation_problem = false;
 
    // If we have continuation data then we need to project that across to the
    // new mesh
    if (!Use_continuation_timestepper)
    {
      if (Dof_derivative.size() != 0)
      {
        continuation_problem = true;
      }
    }
 
    // If we are tracking a bifurcation then call the bifurcation adapt function
    if (bifurcation_type != 0)
    {
      this->bifurcation_adapt_helper(n_refined, n_unrefined, bifurcation_type);
      // Return immediately
      return;
    }
 
    if (continuation_problem)
    {
      // Create a copy of the problem
      Copy_of_problem_pt.resize(2);
      // If we don't already have a copy
      for (unsigned c = 0; c < 2; c++)
      {
        if (Copy_of_problem_pt[c] == 0)
        {
          // Create the copy
          Copy_of_problem_pt[c] = this->make_copy();
 
          // Refine the copy to the same level as the current problem
          // Must call actions before adapt
          Copy_of_problem_pt[c]->actions_before_adapt();
 
          // Find number of submeshes
          const unsigned N_mesh = Copy_of_problem_pt[c]->nsub_mesh();
 
          // If there is only one mesh
          if (N_mesh == 0)
          {
            // Can we refine the mesh
            if (TreeBasedRefineableMeshBase* mmesh_pt =
                  dynamic_cast<TreeBasedRefineableMeshBase*>(
                    Copy_of_problem_pt[c]->mesh_pt(0)))
            {
              // Is the adapt flag set
              if (mmesh_pt->is_adaptation_enabled())
              {
                // Now get the original problem's mesh if it's refineable
                if (TreeBasedRefineableMeshBase* original_mesh_pt =
                      dynamic_cast<TreeBasedRefineableMeshBase*>(
                        this->mesh_pt(0)))
                {
                  if (dynamic_cast<SolidMesh*>(original_mesh_pt) != 0)
                  {
                    oomph_info
                      << "Info/Warning: Adaptive Continuation is broken in "
                      << "SolidElement" << std::endl;
                  }
                  mmesh_pt->refine_base_mesh_as_in_reference_mesh(
                    original_mesh_pt);
                }
                else
                {
                  oomph_info << "Info/Warning: Mesh in orginal problem is not "
                                "refineable."
                             << std::endl;
                }
              }
              else
              {
                oomph_info
                  << "Info/Warning: Mesh adaptation is disabled in copy."
                  << std::endl;
              }
            }
            else if (TriangleMeshBase* tmesh_pt =
                       dynamic_cast<TriangleMeshBase*>(
                         Copy_of_problem_pt[c]->mesh_pt(0)))
            {
              if (TriangleMeshBase* original_mesh_pt =
                    dynamic_cast<TriangleMeshBase*>(this->mesh_pt(0)))
              {
                if (dynamic_cast<SolidMesh*>(original_mesh_pt) != 0)
                {
                  oomph_info
                    << "Info/Warning: Adaptive Continuation is broken in "
                    << "SolidElement" << std::endl;
                }
 
                // Remesh using the triangulateIO of the base mesh
                // Done via a file, so a bit hacky but this will be
                // superseded very soon
                std::ofstream tri_dump("triangle_mesh.dmp");
                original_mesh_pt->dump_triangulateio(tri_dump);
                tri_dump.close();
                std::ifstream tri_read("triangle_mesh.dmp");
                tmesh_pt->remesh_from_triangulateio(tri_read);
                tri_read.close();
 
 
                // Set the nodes to be at the same positions
                // as the original just in case the
                // triangulatio is out of sync with the real data
                const unsigned n_node = original_mesh_pt->nnode();
                for (unsigned n = 0; n < n_node; ++n)
                {
                  Node* const nod_pt = original_mesh_pt->node_pt(n);
                  Node* const new_node_pt = tmesh_pt->node_pt(n);
                  unsigned n_dim = nod_pt->ndim();
                  for (unsigned i = 0; i < n_dim; ++i)
                  {
                    new_node_pt->x(i) = nod_pt->x(i);
                  }
                }
              }
              else
              {
                oomph_info
                  << "Info/warning: Original Mesh is not TriangleBased\n"
                  << "... but the copy is!" << std::endl;
              }
            }
            else
            {
              oomph_info << "Info/Warning: Mesh cannot be adapted in copy."
                         << std::endl;
            }
          } // End of single mesh case
          // Otherwise loop over the submeshes
          else
          {
            for (unsigned m = 0; m < N_mesh; m++)
            {
              // Can we refine the submesh
              if (TreeBasedRefineableMeshBase* mmesh_pt =
                    dynamic_cast<TreeBasedRefineableMeshBase*>(
                      Copy_of_problem_pt[c]->mesh_pt(m)))
              {
                // Is the adapt flag set
                if (mmesh_pt->is_adaptation_enabled())
                {
                  // Now get the original problem's mesh
                  if (TreeBasedRefineableMeshBase* original_mesh_pt =
                        dynamic_cast<TreeBasedRefineableMeshBase*>(
                          this->mesh_pt(m)))
                  {
                    if (dynamic_cast<SolidMesh*>(original_mesh_pt) != 0)
                    {
                      oomph_info
                        << "Info/Warning: Adaptive Continuation is broken in "
                        << "SolidElement" << std::endl;
                    }
 
                    mmesh_pt->refine_base_mesh_as_in_reference_mesh(
                      original_mesh_pt);
                  }
                  else
                  {
                    oomph_info << "Info/Warning: Mesh in orginal problem is "
                                  "not refineable."
                               << std::endl;
                  }
                }
                else
                {
                  oomph_info
                    << "Info/Warning: Mesh adaptation is disabled in copy."
                    << std::endl;
                }
              }
              else if (TriangleMeshBase* tmesh_pt =
                         dynamic_cast<TriangleMeshBase*>(
                           Copy_of_problem_pt[c]->mesh_pt(m)))
              {
                if (TriangleMeshBase* original_mesh_pt =
                      dynamic_cast<TriangleMeshBase*>(this->mesh_pt(m)))
                {
                  if (dynamic_cast<SolidMesh*>(original_mesh_pt) != 0)
                  {
                    oomph_info
                      << "Info/Warning: Adaptive Continuation is broken in "
                      << "SolidElement" << std::endl;
                  }
 
                  // Remesh using the triangulateIO of the base mesh
                  // Done via a file, so a bit hacky but this will be
                  // superseded very soon
                  std::ofstream tri_dump("triangle_mesh.dmp");
                  original_mesh_pt->dump_triangulateio(tri_dump);
                  tri_dump.close();
                  std::ifstream tri_read("triangle_mesh.dmp");
                  tmesh_pt->remesh_from_triangulateio(tri_read);
                  tri_read.close();
 
                  // Set the nodes to be at the same positions
                  // as the original just in case the
                  // triangulatio is out of sync with the real data
                  const unsigned n_node = original_mesh_pt->nnode();
                  for (unsigned n = 0; n < n_node; ++n)
                  {
                    Node* const nod_pt = original_mesh_pt->node_pt(n);
                    Node* const new_node_pt = tmesh_pt->node_pt(n);
                    unsigned n_dim = nod_pt->ndim();
                    for (unsigned i = 0; i < n_dim; ++i)
                    {
                      new_node_pt->x(i) = nod_pt->x(i);
                    }
                  }
                }
                else
                {
                  oomph_info
                    << "Info/warning: Original Mesh is not TriangleBased\n"
                    << "... but the copy is!" << std::endl;
                }
              }
              else
              {
                oomph_info << "Info/Warning: Mesh cannot be adapted in copy."
                           << std::endl;
              }
            }
 
 
            // Must call actions after adapt
            Copy_of_problem_pt[c]->actions_after_adapt();
 
            // rebuild the global mesh in the copy
            Copy_of_problem_pt[c]->rebuild_global_mesh();
 
          } // End of multiple mesh case
 
          // Must call actions after adapt
          Copy_of_problem_pt[c]->actions_after_adapt();
 
          // Assign the equation numbers to the copy (quietly)
          (void)Copy_of_problem_pt[c]->assign_eqn_numbers();
        }
 
        // Check that the dofs match for each copy
#ifdef PARANOID
        // If the problems don't match then complain
        if (Copy_of_problem_pt[c]->ndof() != this->ndof())
        {
          std::ostringstream error_stream;
          error_stream << "Number of unknowns in the problem copy " << c << " "
                       << "not equal to number in the original:\n"
                       << this->ndof() << " (original) "
                       << Copy_of_problem_pt[c]->ndof() << " (copy)\n";
 
          throw OomphLibError(error_stream.str(),
                              OOMPH_CURRENT_FUNCTION,
                              OOMPH_EXCEPTION_LOCATION);
        }
#endif
      }
 
      // Need to set the Dof derivatives to the copied problem
      // Assign the eigenfunction(s) to the copied problems
      unsigned ndof_local = Dof_distribution_pt->nrow_local();
      for (unsigned i = 0; i < ndof_local; i++)
      {
        Copy_of_problem_pt[0]->dof(i) = this->dof_derivative(i);
        Copy_of_problem_pt[1]->dof(i) = this->dof_current(i);
      }
      // Set all pinned values to zero
      Copy_of_problem_pt[0]->set_pinned_values_to_zero();
      // Don't need to for the current dofs that are actuall the dofs
 
      // Now adapt
      Vector<Vector<double>> base_error;
      this->get_all_error_estimates(base_error);
      this->adapt_based_on_error_estimates(n_refined, n_unrefined, base_error);
      Copy_of_problem_pt[0]->adapt_based_on_error_estimates(
        n_refined, n_unrefined, base_error);
      Copy_of_problem_pt[1]->adapt_based_on_error_estimates(
        n_refined, n_unrefined, base_error);
 
      // Now sort out the Dof pointer
      ndof_local = Dof_distribution_pt->nrow_local();
      if (Dof_derivative.size() != ndof_local)
      {
        Dof_derivative.resize(ndof_local, 0.0);
      }
      if (Dof_current.size() != ndof_local)
      {
        Dof_current.resize(ndof_local, 0.0);
      }
      for (unsigned i = 0; i < ndof_local; i++)
      {
        Dof_derivative[i] = Copy_of_problem_pt[0]->dof(i);
        Dof_current[i] = Copy_of_problem_pt[1]->dof(i);
      }
      // Return immediately
      return;
    }
 
    oomph_info << std::endl << std::endl;
    oomph_info << "Adapting problem:" << std::endl;
    oomph_info << "=================" << std::endl;
 
    double t_start = 0.0;
    if (Global_timings::Doc_comprehensive_timings)
    {
      t_start = TimingHelpers::timer();
    }
 
    // Call the actions before adaptation
    actions_before_adapt();
 
    double t_end = 0.0;
    if (Global_timings::Doc_comprehensive_timings)
    {
      t_end = TimingHelpers::timer();
      oomph_info << "Time for actions before adapt: " << t_end - t_start
                 << std::endl;
      t_start = TimingHelpers::timer();
    }
 
    // Initialise counters
    n_refined = 0;
    n_unrefined = 0;
 
    // Number of submeshes?
    unsigned Nmesh = nsub_mesh();
 
    // Single mesh:
    //------------
    if (Nmesh == 0)
    {
      // Refine single mesh if possible
      if (RefineableMeshBase* mmesh_pt =
            dynamic_cast<RefineableMeshBase*>(mesh_pt(0)))
      {
        if (mmesh_pt->is_adaptation_enabled())
        {
          double t_start = TimingHelpers::timer();
 
          // Get pointer to error estimator
          ErrorEstimator* error_estimator_pt =
            mmesh_pt->spatial_error_estimator_pt();
 
#ifdef PARANOID
          if (error_estimator_pt == 0)
          {
            throw OomphLibError("Error estimator hasn't been set yet",
                                OOMPH_CURRENT_FUNCTION,
                                OOMPH_EXCEPTION_LOCATION);
          }
#endif
 
          // Get error for all elements
          Vector<double> elemental_error(mmesh_pt->nelement());
 
          if (mmesh_pt->doc_info_pt() == 0)
          {
            error_estimator_pt->get_element_errors(mesh_pt(0), elemental_error);
          }
          else
          {
            error_estimator_pt->get_element_errors(
              mesh_pt(0), elemental_error, *mmesh_pt->doc_info_pt());
          }
 
          // Store max./min actual error
          mmesh_pt->max_error() = std::fabs(*std::max_element(
            elemental_error.begin(), elemental_error.end(), AbsCmp<double>()));
 
          mmesh_pt->min_error() = std::fabs(*std::min_element(
            elemental_error.begin(), elemental_error.end(), AbsCmp<double>()));
 
          oomph_info << "\n Max/min error: " << mmesh_pt->max_error() << " "
                     << mmesh_pt->min_error() << std::endl
                     << std::endl;
 
 
          if (Global_timings::Doc_comprehensive_timings)
          {
            t_end = TimingHelpers::timer();
            oomph_info << "Time for error estimation: " << t_end - t_start
                       << std::endl;
            t_start = TimingHelpers::timer();
          }
 
          // Adapt mesh
          mmesh_pt->adapt(elemental_error);
 
          // Add to counters
          n_refined += mmesh_pt->nrefined();
          n_unrefined += mmesh_pt->nunrefined();
 
          if (Global_timings::Doc_comprehensive_timings)
          {
            t_end = TimingHelpers::timer();
            oomph_info << "Time for complete mesh adaptation "
                       << "(but excluding comp of error estimate): "
                       << t_end - t_start << std::endl;
            t_start = TimingHelpers::timer();
          }
        }
        else
        {
          oomph_info << "Info/Warning: Mesh adaptation is disabled."
                     << std::endl;
        }
      }
      else
      {
        oomph_info << "Info/Warning: Mesh cannot be adapted" << std::endl;
      }
    }
    // Multiple submeshes
    //------------------
    else
    {
      // Loop over submeshes
      for (unsigned imesh = 0; imesh < Nmesh; imesh++)
      {
        // Refine single mesh uniformly if possible
        if (RefineableMeshBase* mmesh_pt =
              dynamic_cast<RefineableMeshBase*>(mesh_pt(imesh)))
        {
          double t_start = TimingHelpers::timer();
 
          // Get pointer to error estimator
          ErrorEstimator* error_estimator_pt =
            mmesh_pt->spatial_error_estimator_pt();
 
#ifdef PARANOID
          if (error_estimator_pt == 0)
          {
            throw OomphLibError("Error estimator hasn't been set yet",
                                OOMPH_CURRENT_FUNCTION,
                                OOMPH_EXCEPTION_LOCATION);
          }
#endif
 
          if (mmesh_pt->is_adaptation_enabled())
          {
            // Get error for all elements
            Vector<double> elemental_error(mmesh_pt->nelement());
            if (mmesh_pt->doc_info_pt() == 0)
            {
              error_estimator_pt->get_element_errors(mesh_pt(imesh),
                                                     elemental_error);
            }
            else
            {
              error_estimator_pt->get_element_errors(
                mesh_pt(imesh), elemental_error, *mmesh_pt->doc_info_pt());
            }
 
            // Store max./min error if the mesh has any elements
            if (mesh_pt(imesh)->nelement() > 0)
            {
              mmesh_pt->max_error() =
                std::fabs(*std::max_element(elemental_error.begin(),
                                            elemental_error.end(),
                                            AbsCmp<double>()));
 
              mmesh_pt->min_error() =
                std::fabs(*std::min_element(elemental_error.begin(),
                                            elemental_error.end(),
                                            AbsCmp<double>()));
            }
 
            oomph_info << "\n Max/min error: " << mmesh_pt->max_error() << " "
                       << mmesh_pt->min_error() << std::endl;
 
 
            if (Global_timings::Doc_comprehensive_timings)
            {
              t_end = TimingHelpers::timer();
              oomph_info << "Time for error estimation: " << t_end - t_start
                         << std::endl;
              t_start = TimingHelpers::timer();
            }
 
            // Adapt mesh
            mmesh_pt->adapt(elemental_error);
 
            // Add to counters
            n_refined += mmesh_pt->nrefined();
            n_unrefined += mmesh_pt->nunrefined();
 
 
            if (Global_timings::Doc_comprehensive_timings)
            {
              t_end = TimingHelpers::timer();
              oomph_info << "Time for complete mesh adaptation "
                         << "(but excluding comp of error estimate): "
                         << t_end - t_start << std::endl;
              t_start = TimingHelpers::timer();
            }
          }
          else
          {
            oomph_info << "Info/Warning: Mesh adaptation is disabled."
                       << std::endl;
          }
        }
        else
        {
          oomph_info << "Info/Warning: Mesh cannot be adapted." << std::endl;
        }
 
      } // End of loop over submeshes
 
      // Rebuild the global mesh
      rebuild_global_mesh();
    }
 
 
    if (Global_timings::Doc_comprehensive_timings)
    {
      t_end = TimingHelpers::timer();
      oomph_info << "Total time for actual adaptation "
                 << "(all meshes; incl error estimates): " << t_end - t_start
                 << std::endl;
      t_start = TimingHelpers::timer();
    }
 
    // Any actions after adapt
    actions_after_adapt();
 
 
    if (Global_timings::Doc_comprehensive_timings)
    {
      t_end = TimingHelpers::timer();
      oomph_info << "Time for actions after adapt: " << t_end - t_start
                 << std::endl;
      t_start = TimingHelpers::timer();
 
      oomph_info << "About to start re-assigning eqn numbers "
                 << "with Problem::assign_eqn_numbers() at end of "
                 << "Problem::adapt().\n";
    }
 
    // Attach the boundary conditions to the mesh
    oomph_info << "\nNumber of equations: " << assign_eqn_numbers() << std::endl
               << std::endl;
 
 
    if (Global_timings::Doc_comprehensive_timings)
    {
      t_end = TimingHelpers::timer();
      oomph_info << "Time for re-assigning eqn numbers with "
                 << "Problem::assign_eqn_numbers() at end of Problem::adapt(): "
                 << t_end - t_start << std::endl;
      oomph_info << "Total time for adapt: " << t_end - t_start_total
                 << std::endl;
    }
  }
 
  //========================================================================
  /// p-adapt problem:
  /// Perform mesh adaptation for (all) refineable (sub)mesh(es),
  /// based on their own error estimates and the target errors specified
  /// in the mesh(es). Following mesh adaptation,
  /// update global mesh, and re-assign equation numbers.
  /// Return # of refined/unrefined elements. On return from this
  /// function, Problem can immediately be solved again.
  //======================================================================
  void Problem::p_adapt(unsigned& n_refined, unsigned& n_unrefined)
  {
    double t_start_total = 0.0;
    if (Global_timings::Doc_comprehensive_timings)
    {
      t_start_total = TimingHelpers::timer();
    }
 
    // Get the bifurcation type
    int bifurcation_type = this->Assembly_handler_pt->bifurcation_type();
 
    // If we are tracking a bifurcation then call the bifurcation adapt function
    if (bifurcation_type != 0)
    {
      this->bifurcation_adapt_helper(n_refined, n_unrefined, bifurcation_type);
      // Return immediately
      return;
    }
 
    oomph_info << std::endl << std::endl;
    oomph_info << "p-adapting problem:" << std::endl;
    oomph_info << "===================" << std::endl;
 
    double t_start = 0.0;
    if (Global_timings::Doc_comprehensive_timings)
    {
      t_start = TimingHelpers::timer();
    }
 
    // Call the actions before adaptation
    actions_before_adapt();
 
    double t_end = 0.0;
    if (Global_timings::Doc_comprehensive_timings)
    {
      t_end = TimingHelpers::timer();
      oomph_info << "Time for actions before adapt: " << t_end - t_start
                 << std::endl;
      t_start = TimingHelpers::timer();
    }
 
    // Initialise counters
    n_refined = 0;
    n_unrefined = 0;
 
    // Number of submeshes?
    unsigned Nmesh = nsub_mesh();
 
    // Single mesh:
    //------------
    if (Nmesh == 0)
    {
      // Refine single mesh if possible
      if (RefineableMeshBase* mmesh_pt =
            dynamic_cast<RefineableMeshBase*>(mesh_pt(0)))
      {
        if (mmesh_pt->is_p_adaptation_enabled())
        {
          double t_start = TimingHelpers::timer();
 
          // Get pointer to error estimator
          ErrorEstimator* error_estimator_pt =
            mmesh_pt->spatial_error_estimator_pt();
 
#ifdef PARANOID
          if (error_estimator_pt == 0)
          {
            throw OomphLibError("Error estimator hasn't been set yet",
                                OOMPH_CURRENT_FUNCTION,
                                OOMPH_EXCEPTION_LOCATION);
          }
#endif
 
          // Get error for all elements
          Vector<double> elemental_error(mmesh_pt->nelement());
 
          if (mmesh_pt->doc_info_pt() == 0)
          {
            error_estimator_pt->get_element_errors(mesh_pt(0), elemental_error);
          }
          else
          {
            error_estimator_pt->get_element_errors(
              mesh_pt(0), elemental_error, *mmesh_pt->doc_info_pt());
          }
 
          // Store max./min actual error
          mmesh_pt->max_error() = std::fabs(*std::max_element(
            elemental_error.begin(), elemental_error.end(), AbsCmp<double>()));
 
          mmesh_pt->min_error() = std::fabs(*std::min_element(
            elemental_error.begin(), elemental_error.end(), AbsCmp<double>()));
 
          oomph_info << "\n Max/min error: " << mmesh_pt->max_error() << " "
                     << mmesh_pt->min_error() << std::endl
                     << std::endl;
 
 
          if (Global_timings::Doc_comprehensive_timings)
          {
            t_end = TimingHelpers::timer();
            oomph_info << "Time for error estimation: " << t_end - t_start
                       << std::endl;
            t_start = TimingHelpers::timer();
          }
 
          // Adapt mesh
          mmesh_pt->p_adapt(elemental_error);
 
          // Add to counters
          n_refined += mmesh_pt->nrefined();
          n_unrefined += mmesh_pt->nunrefined();
 
          if (Global_timings::Doc_comprehensive_timings)
          {
            t_end = TimingHelpers::timer();
            oomph_info << "Time for complete mesh adaptation "
                       << "(but excluding comp of error estimate): "
                       << t_end - t_start << std::endl;
            t_start = TimingHelpers::timer();
          }
        }
        else
        {
          oomph_info << "Info/Warning: Mesh adaptation is disabled."
                     << std::endl;
        }
      }
      else
      {
        oomph_info << "Info/Warning: Mesh cannot be adapted" << std::endl;
      }
    }
    // Multiple submeshes
    //------------------
    else
    {
      // Loop over submeshes
      for (unsigned imesh = 0; imesh < Nmesh; imesh++)
      {
        // Refine single mesh uniformly if possible
        if (RefineableMeshBase* mmesh_pt =
              dynamic_cast<RefineableMeshBase*>(mesh_pt(imesh)))
        {
          double t_start = TimingHelpers::timer();
 
          // Get pointer to error estimator
          ErrorEstimator* error_estimator_pt =
            mmesh_pt->spatial_error_estimator_pt();
 
#ifdef PARANOID
          if (error_estimator_pt == 0)
          {
            throw OomphLibError("Error estimator hasn't been set yet",
                                OOMPH_CURRENT_FUNCTION,
                                OOMPH_EXCEPTION_LOCATION);
          }
#endif
 
          if (mmesh_pt->is_p_adaptation_enabled())
          {
            // Get error for all elements
            Vector<double> elemental_error(mmesh_pt->nelement());
            if (mmesh_pt->doc_info_pt() == 0)
            {
              error_estimator_pt->get_element_errors(mesh_pt(imesh),
                                                     elemental_error);
            }
            else
            {
              error_estimator_pt->get_element_errors(
                mesh_pt(imesh), elemental_error, *mmesh_pt->doc_info_pt());
            }
 
            // Store max./min error if the mesh has any elements
            if (mesh_pt(imesh)->nelement() > 0)
            {
              mmesh_pt->max_error() =
                std::fabs(*std::max_element(elemental_error.begin(),
                                            elemental_error.end(),
                                            AbsCmp<double>()));
 
              mmesh_pt->min_error() =
                std::fabs(*std::min_element(elemental_error.begin(),
                                            elemental_error.end(),
                                            AbsCmp<double>()));
            }
 
            oomph_info << "\n Max/min error: " << mmesh_pt->max_error() << " "
                       << mmesh_pt->min_error() << std::endl;
 
 
            if (Global_timings::Doc_comprehensive_timings)
            {
              t_end = TimingHelpers::timer();
              oomph_info << "Time for error estimation: " << t_end - t_start
                         << std::endl;
              t_start = TimingHelpers::timer();
            }
 
            // Adapt mesh
            mmesh_pt->p_adapt(elemental_error);
 
            // Add to counters
            n_refined += mmesh_pt->nrefined();
            n_unrefined += mmesh_pt->nunrefined();
 
 
            if (Global_timings::Doc_comprehensive_timings)
            {
              t_end = TimingHelpers::timer();
              oomph_info << "Time for complete mesh adaptation "
                         << "(but excluding comp of error estimate): "
                         << t_end - t_start << std::endl;
              t_start = TimingHelpers::timer();
            }
          }
          else
          {
            oomph_info << "Info/Warning: Mesh adaptation is disabled."
                       << std::endl;
          }
        }
        else
        {
          oomph_info << "Info/Warning: Mesh cannot be adapted." << std::endl;
        }
 
      } // End of loop over submeshes
 
      // Rebuild the global mesh
      rebuild_global_mesh();
    }
 
 
    if (Global_timings::Doc_comprehensive_timings)
    {
      t_end = TimingHelpers::timer();
      oomph_info << "Total time for actual adaptation "
                 << "(all meshes; incl error estimates): " << t_end - t_start
                 << std::endl;
      t_start = TimingHelpers::timer();
    }
 
    // Any actions after adapt
    actions_after_adapt();
 
 
    if (Global_timings::Doc_comprehensive_timings)
    {
      t_end = TimingHelpers::timer();
      oomph_info << "Time for actions after adapt: " << t_end - t_start
                 << std::endl;
      t_start = TimingHelpers::timer();
 
      oomph_info << "About to start re-assigning eqn numbers "
                 << "with Problem::assign_eqn_numbers() at end of "
                 << "Problem::adapt().\n";
    }
 
    // Attach the boundary conditions to the mesh
    oomph_info << "\nNumber of equations: " << assign_eqn_numbers() << std::endl
               << std::endl;
 
 
    if (Global_timings::Doc_comprehensive_timings)
    {
      t_end = TimingHelpers::timer();
      oomph_info << "Time for re-assigning eqn numbers with "
                 << "Problem::assign_eqn_numbers() at end of Problem::adapt(): "
                 << t_end - t_start << std::endl;
      oomph_info << "Total time for adapt: " << t_end - t_start_total
                 << std::endl;
    }
  }
 
  //========================================================================
  /// Perform mesh adaptation for (all) refineable (sub)mesh(es),
  /// based on the error estimates in elemental_error
  /// and the target errors specified
  /// in the mesh(es). Following mesh adaptation,
  /// update global mesh, and re-assign equation numbers.
  /// Return # of refined/unrefined elements. On return from this
  /// function, Problem can immediately be solved again.
  //========================================================================
  void Problem::adapt_based_on_error_estimates(
    unsigned& n_refined,
    unsigned& n_unrefined,
    Vector<Vector<double>>& elemental_error)
  {
    oomph_info << std::endl << std::endl;
    oomph_info << "Adapting problem:" << std::endl;
    oomph_info << "=================" << std::endl;
 
    // Call the actions before adaptation
    actions_before_adapt();
 
    // Initialise counters
    n_refined = 0;
    n_unrefined = 0;
 
    // Number of submeshes?
    unsigned Nmesh = nsub_mesh();
 
    // Single mesh:
    //------------
    if (Nmesh == 0)
    {
      // Refine single mesh uniformly if possible
      if (RefineableMeshBase* mmesh_pt =
            dynamic_cast<RefineableMeshBase*>(Problem::mesh_pt(0)))
      {
        if (mmesh_pt->is_adaptation_enabled())
        {
          // Adapt mesh
          mmesh_pt->adapt(elemental_error[0]);
 
          // Add to counters
          n_refined += mmesh_pt->nrefined();
          n_unrefined += mmesh_pt->nunrefined();
        }
        else
        {
          oomph_info << "Info/Warning: Mesh adaptation is disabled."
                     << std::endl;
        }
      }
      else
      {
        oomph_info << "Info/Warning: Mesh cannot be adapted" << std::endl;
      }
    }
 
    // Multiple submeshes
    //------------------
    else
    {
      // Loop over submeshes
      for (unsigned imesh = 0; imesh < Nmesh; imesh++)
      {
        // Refine single mesh uniformly if possible
        if (RefineableMeshBase* mmesh_pt =
              dynamic_cast<RefineableMeshBase*>(Problem::mesh_pt(imesh)))
        {
          if (mmesh_pt->is_adaptation_enabled())
          {
            // Adapt mesh
            mmesh_pt->adapt(elemental_error[imesh]);
 
            // Add to counters
            n_refined += mmesh_pt->nrefined();
            n_unrefined += mmesh_pt->nunrefined();
          }
          else
          {
            oomph_info << "Info/Warning: Mesh adaptation is disabled."
                       << std::endl;
          }
        }
        else
        {
          oomph_info << "Info/Warning: Mesh cannot be adapted." << std::endl;
        }
 
      } // End of loop over submeshes
 
      // Rebuild the global mesh
      rebuild_global_mesh();
    }
 
    // Any actions after adapt
    actions_after_adapt();
 
    // Attach the boundary conditions to the mesh
    oomph_info << "\nNumber of equations: " << assign_eqn_numbers() << std::endl
               << std::endl;
  }
 
 
  //========================================================================
  /// Return the error estimates computed by (all) refineable
  /// (sub)mesh(es) in the elemental_error structure, which consists of
  /// a vector of elemental errors for each (sub)mesh.
  //========================================================================
  void Problem::get_all_error_estimates(Vector<Vector<double>>& elemental_error)
  {
    // Number of submeshes?
    const unsigned Nmesh = nsub_mesh();
 
    // Single mesh:
    //------------
    if (Nmesh == 0)
    {
      // There is only one mesh
      elemental_error.resize(1);
      // Refine single mesh uniformly if possible
      if (RefineableMeshBase* mmesh_pt =
            dynamic_cast<RefineableMeshBase*>(Problem::mesh_pt(0)))
      {
        // If we can adapt the mesh
        if (mmesh_pt->is_adaptation_enabled())
        {
          // Get pointer to error estimator
          ErrorEstimator* error_estimator_pt =
            mmesh_pt->spatial_error_estimator_pt();
 
#ifdef PARANOID
          if (error_estimator_pt == 0)
          {
            throw OomphLibError("Error estimator hasn't been set yet",
                                OOMPH_CURRENT_FUNCTION,
                                OOMPH_EXCEPTION_LOCATION);
          }
#endif
 
          // Get error for all elements
          elemental_error[0].resize(mmesh_pt->nelement());
          // Are we documenting the errors or not
          if (mmesh_pt->doc_info_pt() == 0)
          {
            error_estimator_pt->get_element_errors(Problem::mesh_pt(0),
                                                   elemental_error[0]);
          }
          else
          {
            error_estimator_pt->get_element_errors(Problem::mesh_pt(0),
                                                   elemental_error[0],
                                                   *mmesh_pt->doc_info_pt());
          }
 
          // Store max./min actual error
          mmesh_pt->max_error() =
            std::fabs(*std::max_element(elemental_error[0].begin(),
                                        elemental_error[0].end(),
                                        AbsCmp<double>()));
 
          mmesh_pt->min_error() =
            std::fabs(*std::min_element(elemental_error[0].begin(),
                                        elemental_error[0].end(),
                                        AbsCmp<double>()));
 
          oomph_info << "\n Max/min error: " << mmesh_pt->max_error() << " "
                     << mmesh_pt->min_error() << std::endl;
        }
        else
        {
          oomph_info << "Info/Warning: Mesh adaptation is disabled."
                     << std::endl;
        }
      }
      else
      {
        oomph_info << "Info/Warning: Mesh cannot be adapted" << std::endl;
      }
    }
 
    // Multiple submeshes
    //------------------
    else
    {
      // Resize to the number of submeshes
      elemental_error.resize(Nmesh);
 
      // Loop over submeshes
      for (unsigned imesh = 0; imesh < Nmesh; imesh++)
      {
        // Refine single mesh uniformly if possible
        if (RefineableMeshBase* mmesh_pt =
              dynamic_cast<RefineableMeshBase*>(Problem::mesh_pt(imesh)))
        {
          // Get pointer to error estimator
          ErrorEstimator* error_estimator_pt =
            mmesh_pt->spatial_error_estimator_pt();
 
#ifdef PARANOID
          if (error_estimator_pt == 0)
          {
            throw OomphLibError("Error estimator hasn't been set yet",
                                OOMPH_CURRENT_FUNCTION,
                                OOMPH_EXCEPTION_LOCATION);
          }
#endif
          // If we can adapt the mesh
          if (mmesh_pt->is_adaptation_enabled())
          {
            // Get error for all elements
            elemental_error[imesh].resize(mmesh_pt->nelement());
            if (mmesh_pt->doc_info_pt() == 0)
            {
              error_estimator_pt->get_element_errors(Problem::mesh_pt(imesh),
                                                     elemental_error[imesh]);
            }
            else
            {
              error_estimator_pt->get_element_errors(Problem::mesh_pt(imesh),
                                                     elemental_error[imesh],
                                                     *mmesh_pt->doc_info_pt());
            }
 
            // Store max./min error
            mmesh_pt->max_error() =
              std::fabs(*std::max_element(elemental_error[imesh].begin(),
                                          elemental_error[imesh].end(),
                                          AbsCmp<double>()));
 
            mmesh_pt->min_error() =
              std::fabs(*std::min_element(elemental_error[imesh].begin(),
                                          elemental_error[imesh].end(),
                                          AbsCmp<double>()));
 
            oomph_info << "\n Max/min error: " << mmesh_pt->max_error() << " "
                       << mmesh_pt->min_error() << std::endl;
          }
          else
          {
            oomph_info << "Info/Warning: Mesh adaptation is disabled."
                       << std::endl;
          }
        }
        else
        {
          oomph_info << "Info/Warning: Mesh cannot be adapted." << std::endl;
        }
 
      } // End of loop over submeshes
    }
  }
 
  //========================================================================
  /// Get max and min error for all elements in submeshes
  //========================================================================
  void Problem::doc_errors(DocInfo& doc_info)
  {
    // Get the bifurcation type
    int bifurcation_type = this->Assembly_handler_pt->bifurcation_type();
    // If we are tracking a bifurcation then call the bifurcation adapt function
    if (bifurcation_type != 0)
    {
      this->bifurcation_adapt_doc_errors(bifurcation_type);
      // Return immediately
      return;
    }
 
    // Number of submeshes?
    unsigned Nmesh = nsub_mesh();
 
    // Single mesh:
    //------------
    if (Nmesh == 0)
    {
      // Is the single mesh refineable?
      if (RefineableMeshBase* mmesh_pt =
            dynamic_cast<RefineableMeshBase*>(mesh_pt(0)))
      {
        // Get pointer to error estimator
        ErrorEstimator* error_estimator_pt =
          mmesh_pt->spatial_error_estimator_pt();
 
#ifdef PARANOID
        if (error_estimator_pt == 0)
        {
          throw OomphLibError("Error estimator hasn't been set yet",
                              OOMPH_CURRENT_FUNCTION,
                              OOMPH_EXCEPTION_LOCATION);
        }
#endif
 
        // Get error for all elements
        Vector<double> elemental_error(mmesh_pt->nelement());
        if (!doc_info.is_doc_enabled())
        {
          error_estimator_pt->get_element_errors(mesh_pt(0), elemental_error);
        }
        else
        {
          error_estimator_pt->get_element_errors(
            mesh_pt(0), elemental_error, doc_info);
        }
 
        // Store max./min actual error
        mmesh_pt->max_error() = std::fabs(*std::max_element(
          elemental_error.begin(), elemental_error.end(), AbsCmp<double>()));
 
        mmesh_pt->min_error() = std::fabs(*std::min_element(
          elemental_error.begin(), elemental_error.end(), AbsCmp<double>()));
 
        oomph_info << "\n Max/min error: " << mmesh_pt->max_error() << " "
                   << mmesh_pt->min_error() << std::endl;
      }
    }
 
    // Multiple submeshes
    //------------------
    else
    {
      // Loop over submeshes
      for (unsigned imesh = 0; imesh < Nmesh; imesh++)
      {
        // Is the single mesh refineable?
        if (RefineableMeshBase* mmesh_pt =
              dynamic_cast<RefineableMeshBase*>(mesh_pt(imesh)))
        {
          // Get pointer to error estimator
          ErrorEstimator* error_estimator_pt =
            mmesh_pt->spatial_error_estimator_pt();
 
#ifdef PARANOID
          if (error_estimator_pt == 0)
          {
            throw OomphLibError("Error estimator hasn't been set yet",
                                OOMPH_CURRENT_FUNCTION,
                                OOMPH_EXCEPTION_LOCATION);
          }
#endif
 
          // Get error for all elements
          Vector<double> elemental_error(mmesh_pt->nelement());
          if (mmesh_pt->doc_info_pt() == 0)
          {
            error_estimator_pt->get_element_errors(mesh_pt(imesh),
                                                   elemental_error);
          }
          else
          {
            error_estimator_pt->get_element_errors(
              mesh_pt(imesh), elemental_error, *mmesh_pt->doc_info_pt());
          }
 
          // Store max./min error if the mesh has any elements
          if (mesh_pt(imesh)->nelement() > 0)
          {
            mmesh_pt->max_error() =
              std::fabs(*std::max_element(elemental_error.begin(),
                                          elemental_error.end(),
                                          AbsCmp<double>()));
 
            mmesh_pt->min_error() =
              std::fabs(*std::min_element(elemental_error.begin(),
                                          elemental_error.end(),
                                          AbsCmp<double>()));
          }
 
          oomph_info << "\n Max/min error: " << mmesh_pt->max_error() << " "
                     << mmesh_pt->min_error() << std::endl;
        }
 
      } // End of loop over submeshes
    }
  }
 
  //========================================================================
  /// Refine (one and only!) mesh by splitting the elements identified
  /// by their numbers relative to the problems' only mesh, then rebuild
  /// the problem.
  //========================================================================
  void Problem::refine_selected_elements(
    const Vector<unsigned>& elements_to_be_refined)
  {
    actions_before_adapt();
 
    // Number of submeshes?
    unsigned Nmesh = nsub_mesh();
 
    // Single mesh:
    if (Nmesh == 0)
    {
      // Refine single mesh if possible
      if (TreeBasedRefineableMeshBase* mmesh_pt =
            dynamic_cast<TreeBasedRefineableMeshBase*>(mesh_pt(0)))
      {
        mmesh_pt->refine_selected_elements(elements_to_be_refined);
      }
      else
      {
        oomph_info << "Info/Warning: Mesh cannot be refined " << std::endl;
      }
    }
    // Multiple submeshes
    else
    {
      std::ostringstream error_message;
      error_message << "Problem::refine_selected_elements(...) only works for\n"
                    << "multiple-mesh problems if you specify the mesh\n"
                    << "number in the function argument before the Vector,\n"
                    << "or a Vector of Vectors for each submesh.\n"
                    << std::endl;
      throw OomphLibError(
        error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
 
    // Any actions after the adapatation phase
    actions_after_adapt();
 
    // Attach the boundary conditions to the mesh
    oomph_info << "Number of equations: " << assign_eqn_numbers() << std::endl;
  }
 
  //========================================================================
  /// Refine (one and only!) mesh by splitting the elements identified
  /// by their pointers, then rebuild the problem.
  //========================================================================
  void Problem::refine_selected_elements(
    const Vector<RefineableElement*>& elements_to_be_refined_pt)
  {
    actions_before_adapt();
 
    // Number of submeshes?
    unsigned Nmesh = nsub_mesh();
 
    // Single mesh:
    if (Nmesh == 0)
    {
      // Refine single mesh if possible
      if (TreeBasedRefineableMeshBase* mmesh_pt =
            dynamic_cast<TreeBasedRefineableMeshBase*>(mesh_pt(0)))
      {
        mmesh_pt->refine_selected_elements(elements_to_be_refined_pt);
      }
      else
      {
        oomph_info << "Info/Warning: Mesh cannot be refined " << std::endl;
      }
    }
    // Multiple submeshes
    else
    {
      std::ostringstream error_message;
      error_message << "Problem::refine_selected_elements(...) only works for\n"
                    << "multiple-mesh problems if you specify the mesh\n"
                    << "number in the function argument before the Vector,\n"
                    << "or a Vector of Vectors for each submesh.\n"
                    << std::endl;
      throw OomphLibError(
        error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
 
    // Any actions after the adapatation phase
    actions_after_adapt();
 
    // Do equation numbering
    oomph_info << "Number of equations: " << assign_eqn_numbers() << std::endl;
  }
 
  //========================================================================
  /// Refine specified submesh by splitting the elements identified
  /// by their numbers relative to the specified mesh, then rebuild the problem.
  //========================================================================
  void Problem::refine_selected_elements(
    const unsigned& i_mesh, const Vector<unsigned>& elements_to_be_refined)
  {
    actions_before_adapt();
 
    // Number of submeshes?
    unsigned n_mesh = nsub_mesh();
 
    if (i_mesh >= n_mesh)
    {
      std::ostringstream error_message;
      error_message << "Problem only has " << n_mesh
                    << " submeshes. Cannot refine submesh " << i_mesh
                    << std::endl;
      throw OomphLibError(
        error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
 
    // Refine single mesh if possible
    if (TreeBasedRefineableMeshBase* mmesh_pt =
          dynamic_cast<TreeBasedRefineableMeshBase*>(mesh_pt(i_mesh)))
    {
      mmesh_pt->refine_selected_elements(elements_to_be_refined);
    }
    else
    {
      oomph_info << "Info/Warning: Mesh cannot be refined " << std::endl;
    }
 
    if (n_mesh > 1)
    {
      // Rebuild the global mesh
      rebuild_global_mesh();
    }
 
    // Any actions after the adapatation phase
    actions_after_adapt();
 
    // Do equation numbering
    oomph_info << "Number of equations: " << assign_eqn_numbers() << std::endl;
  }
 
 
  //========================================================================
  /// Refine specified submesh by splitting the elements identified
  /// by their pointers, then rebuild the problem.
  //========================================================================
  void Problem::refine_selected_elements(
    const unsigned& i_mesh,
    const Vector<RefineableElement*>& elements_to_be_refined_pt)
  {
    actions_before_adapt();
 
    // Number of submeshes?
    unsigned n_mesh = nsub_mesh();
 
    if (i_mesh >= n_mesh)
    {
      std::ostringstream error_message;
      error_message << "Problem only has " << n_mesh
                    << " submeshes. Cannot refine submesh " << i_mesh
                    << std::endl;
      throw OomphLibError(
        error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
 
    // Refine single mesh if possible
    if (TreeBasedRefineableMeshBase* mmesh_pt =
          dynamic_cast<TreeBasedRefineableMeshBase*>(mesh_pt(i_mesh)))
    {
      mmesh_pt->refine_selected_elements(elements_to_be_refined_pt);
    }
    else
    {
      oomph_info << "Info/Warning: Mesh cannot be refined " << std::endl;
    }
 
    if (n_mesh > 1)
    {
      // Rebuild the global mesh
      rebuild_global_mesh();
    }
 
    // Any actions after the adapatation phase
    actions_after_adapt();
 
    // Do equation numbering
    oomph_info << "Number of equations: " << assign_eqn_numbers() << std::endl;
  }
 
  //========================================================================
  /// Refine all submeshes by splitting the elements identified by their
  /// numbers relative to each submesh in a Vector of Vectors, then
  /// rebuild the problem.
  //========================================================================
  void Problem::refine_selected_elements(
    const Vector<Vector<unsigned>>& elements_to_be_refined)
  {
    actions_before_adapt();
 
    // Number of submeshes?
    unsigned n_mesh = nsub_mesh();
 
    // Refine all submeshes if possible
    for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
    {
      if (TreeBasedRefineableMeshBase* mmesh_pt =
            dynamic_cast<TreeBasedRefineableMeshBase*>(mesh_pt(i_mesh)))
      {
        mmesh_pt->refine_selected_elements(elements_to_be_refined[i_mesh]);
      }
      else
      {
        oomph_info << "Info/Warning: Mesh cannot be refined " << std::endl;
      }
    }
 
    // Rebuild the global mesh
    rebuild_global_mesh();
 
    // Any actions after the adapatation phase
    actions_after_adapt();
 
    // Do equation numbering
    oomph_info << "Number of equations: " << assign_eqn_numbers() << std::endl;
  }
 
  //========================================================================
  /// Refine all submeshes by splitting the elements identified by their
  /// pointers within each submesh in a Vector of Vectors, then
  /// rebuild the problem.
  //========================================================================
  void Problem::refine_selected_elements(
    const Vector<Vector<RefineableElement*>>& elements_to_be_refined_pt)
  {
    actions_before_adapt();
 
    // Number of submeshes?
    unsigned n_mesh = nsub_mesh();
 
    // Refine all submeshes if possible
    for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
    {
      if (TreeBasedRefineableMeshBase* mmesh_pt =
            dynamic_cast<TreeBasedRefineableMeshBase*>(mesh_pt(i_mesh)))
      {
        mmesh_pt->refine_selected_elements(elements_to_be_refined_pt[i_mesh]);
      }
      else
      {
        oomph_info << "Info/Warning: Mesh cannot be refined " << std::endl;
      }
    }
 
    // Rebuild the global mesh
    rebuild_global_mesh();
 
    // Any actions after the adapatation phase
    actions_after_adapt();
 
    // Do equation numbering
    oomph_info << "Number of equations: " << assign_eqn_numbers() << std::endl;
  }
 
  //========================================================================
  /// p-refine (one and only!) mesh by refining the elements identified
  /// by their numbers relative to the problems' only mesh, then rebuild
  /// the problem.
  //========================================================================
  void Problem::p_refine_selected_elements(
    const Vector<unsigned>& elements_to_be_refined)
  {
    actions_before_adapt();
 
    // Number of submeshes?
    unsigned Nmesh = nsub_mesh();
 
    // Single mesh:
    if (Nmesh == 0)
    {
      // Refine single mesh if possible
      if (TreeBasedRefineableMeshBase* mmesh_pt =
            dynamic_cast<TreeBasedRefineableMeshBase*>(mesh_pt(0)))
      {
        mmesh_pt->p_refine_selected_elements(elements_to_be_refined);
      }
      else
      {
        oomph_info << "Info/Warning: Mesh cannot be refined " << std::endl;
      }
    }
    // Multiple submeshes
    else
    {
      std::ostringstream error_message;
      error_message
        << "Problem::p_refine_selected_elements(...) only works for\n"
        << "multiple-mesh problems if you specify the mesh\n"
        << "number in the function argument before the Vector,\n"
        << "or a Vector of Vectors for each submesh.\n"
        << std::endl;
      throw OomphLibError(
        error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
 
    // Any actions after the adapatation phase
    actions_after_adapt();
 
    // Attach the boundary conditions to the mesh
    oomph_info << "Number of equations: " << assign_eqn_numbers() << std::endl;
  }
 
  //========================================================================
  /// p-refine (one and only!) mesh by refining the elements identified
  /// by their pointers, then rebuild the problem.
  //========================================================================
  void Problem::p_refine_selected_elements(
    const Vector<PRefineableElement*>& elements_to_be_refined_pt)
  {
    actions_before_adapt();
 
    // Number of submeshes?
    unsigned Nmesh = nsub_mesh();
 
    // Single mesh:
    if (Nmesh == 0)
    {
      // Refine single mesh if possible
      if (TreeBasedRefineableMeshBase* mmesh_pt =
            dynamic_cast<TreeBasedRefineableMeshBase*>(mesh_pt(0)))
      {
        mmesh_pt->p_refine_selected_elements(elements_to_be_refined_pt);
      }
      else
      {
        oomph_info << "Info/Warning: Mesh cannot be refined " << std::endl;
      }
    }
    // Multiple submeshes
    else
    {
      std::ostringstream error_message;
      error_message
        << "Problem::p_refine_selected_elements(...) only works for\n"
        << "multiple-mesh problems if you specify the mesh\n"
        << "number in the function argument before the Vector,\n"
        << "or a Vector of Vectors for each submesh.\n"
        << std::endl;
      throw OomphLibError(
        error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
 
    // Any actions after the adapatation phase
    actions_after_adapt();
 
    // Do equation numbering
    oomph_info << "Number of equations: " << assign_eqn_numbers() << std::endl;
  }
 
  //========================================================================
  /// p-refine specified submesh by refining the elements identified
  /// by their numbers relative to the specified mesh, then rebuild the problem.
  //========================================================================
  void Problem::p_refine_selected_elements(
    const unsigned& i_mesh, const Vector<unsigned>& elements_to_be_refined)
  {
    OomphLibWarning(
      "p-refinement for multiple submeshes has not yet been tested.",
      "Problem::p_refine_selected_elements()",
      OOMPH_EXCEPTION_LOCATION);
 
    actions_before_adapt();
 
    // Number of submeshes?
    unsigned n_mesh = nsub_mesh();
 
    if (i_mesh >= n_mesh)
    {
      std::ostringstream error_message;
      error_message << "Problem only has " << n_mesh
                    << " submeshes. Cannot p-refine submesh " << i_mesh
                    << std::endl;
      throw OomphLibError(
        error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
 
    // Refine single mesh if possible
    if (TreeBasedRefineableMeshBase* mmesh_pt =
          dynamic_cast<TreeBasedRefineableMeshBase*>(mesh_pt(i_mesh)))
    {
      mmesh_pt->p_refine_selected_elements(elements_to_be_refined);
    }
    else
    {
      oomph_info << "Info/Warning: Mesh cannot be refined " << std::endl;
    }
 
    if (n_mesh > 1)
    {
      // Rebuild the global mesh
      rebuild_global_mesh();
    }
 
    // Any actions after the adapatation phase
    actions_after_adapt();
 
    // Do equation numbering
    oomph_info << "Number of equations: " << assign_eqn_numbers() << std::endl;
  }
 
 
  //========================================================================
  /// p-refine specified submesh by refining the elements identified
  /// by their pointers, then rebuild the problem.
  //========================================================================
  void Problem::p_refine_selected_elements(
    const unsigned& i_mesh,
    const Vector<PRefineableElement*>& elements_to_be_refined_pt)
  {
    OomphLibWarning(
      "p-refinement for multiple submeshes has not yet been tested.",
      "Problem::p_refine_selected_elements()",
      OOMPH_EXCEPTION_LOCATION);
 
    actions_before_adapt();
 
    // Number of submeshes?
    unsigned n_mesh = nsub_mesh();
 
    if (i_mesh >= n_mesh)
    {
      std::ostringstream error_message;
      error_message << "Problem only has " << n_mesh
                    << " submeshes. Cannot p-refine submesh " << i_mesh
                    << std::endl;
      throw OomphLibError(
        error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
 
    // Refine single mesh if possible
    if (TreeBasedRefineableMeshBase* mmesh_pt =
          dynamic_cast<TreeBasedRefineableMeshBase*>(mesh_pt(i_mesh)))
    {
      mmesh_pt->p_refine_selected_elements(elements_to_be_refined_pt);
    }
    else
    {
      oomph_info << "Info/Warning: Mesh cannot be refined " << std::endl;
    }
 
    if (n_mesh > 1)
    {
      // Rebuild the global mesh
      rebuild_global_mesh();
    }
 
    // Any actions after the adapatation phase
    actions_after_adapt();
 
    // Do equation numbering
    oomph_info << "Number of equations: " << assign_eqn_numbers() << std::endl;
  }
 
  //========================================================================
  /// p-refine all submeshes by refining the elements identified by their
  /// numbers relative to each submesh in a Vector of Vectors, then
  /// rebuild the problem.
  //========================================================================
  void Problem::p_refine_selected_elements(
    const Vector<Vector<unsigned>>& elements_to_be_refined)
  {
    OomphLibWarning(
      "p-refinement for multiple submeshes has not yet been tested.",
      "Problem::p_refine_selected_elements()",
      OOMPH_EXCEPTION_LOCATION);
 
    actions_before_adapt();
 
    // Number of submeshes?
    unsigned n_mesh = nsub_mesh();
 
    // Refine all submeshes if possible
    for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
    {
      if (TreeBasedRefineableMeshBase* mmesh_pt =
            dynamic_cast<TreeBasedRefineableMeshBase*>(mesh_pt(i_mesh)))
      {
        mmesh_pt->p_refine_selected_elements(elements_to_be_refined[i_mesh]);
      }
      else
      {
        oomph_info << "Info/Warning: Mesh cannot be refined " << std::endl;
      }
    }
 
    // Rebuild the global mesh
    rebuild_global_mesh();
 
    // Any actions after the adapatation phase
    actions_after_adapt();
 
    // Do equation numbering
    oomph_info << "Number of equations: " << assign_eqn_numbers() << std::endl;
  }
 
  //========================================================================
  /// p-refine all submeshes by refining the elements identified by their
  /// pointers within each submesh in a Vector of Vectors, then
  /// rebuild the problem.
  //========================================================================
  void Problem::p_refine_selected_elements(
    const Vector<Vector<PRefineableElement*>>& elements_to_be_refined_pt)
  {
    OomphLibWarning(
      "p-refinement for multiple submeshes has not yet been tested.",
      "Problem::p_refine_selected_elements()",
      OOMPH_EXCEPTION_LOCATION);
 
    actions_before_adapt();
 
    // Number of submeshes?
    unsigned n_mesh = nsub_mesh();
 
    // Refine all submeshes if possible
    for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
    {
      if (TreeBasedRefineableMeshBase* mmesh_pt =
            dynamic_cast<TreeBasedRefineableMeshBase*>(mesh_pt(i_mesh)))
      {
        mmesh_pt->p_refine_selected_elements(elements_to_be_refined_pt[i_mesh]);
      }
      else
      {
        oomph_info << "Info/Warning: Mesh cannot be refined " << std::endl;
      }
    }
 
    // Rebuild the global mesh
    rebuild_global_mesh();
 
    // Any actions after the adapatation phase
    actions_after_adapt();
 
    // Do equation numbering
    oomph_info << "Number of equations: " << assign_eqn_numbers() << std::endl;
  }
 
 
  //========================================================================
  /// Helper function to do compund refinement of (all) refineable
  /// (sub)mesh(es) uniformly as many times as specified in vector and
  /// rebuild problem; doc refinement process. Set boolean argument
  /// to true if you want to prune immediately after refining the meshes
  /// individually.
  //========================================================================
  void Problem::refine_uniformly_aux(const Vector<unsigned>& nrefine_for_mesh,
                                     DocInfo& doc_info,
                                     const bool& prune)
  {
    double t_start = 0.0;
    if (Global_timings::Doc_comprehensive_timings)
    {
      t_start = TimingHelpers::timer();
    }
 
    actions_before_adapt();
 
    double t_end = 0.0;
    if (Global_timings::Doc_comprehensive_timings)
    {
      t_end = TimingHelpers::timer();
      oomph_info
        << "Time for actions before adapt in Problem::refine_uniformly_aux(): "
        << t_end - t_start << std::endl;
      t_start = TimingHelpers::timer();
    }
 
    // Number of submeshes?
    unsigned n_mesh = nsub_mesh();
 
    // Single mesh:
    if (n_mesh == 0)
    {
      // Refine single mesh uniformly if possible
      if (RefineableMeshBase* mmesh_pt =
            dynamic_cast<RefineableMeshBase*>(mesh_pt(0)))
      {
        unsigned nref = nrefine_for_mesh[0];
        for (unsigned i = 0; i < nref; i++)
        {
          mmesh_pt->refine_uniformly(doc_info);
        }
      }
      else
      {
        oomph_info << "Info/Warning: Mesh cannot be refined uniformly "
                   << std::endl;
      }
    }
    // Multiple submeshes
    else
    {
      // Loop over submeshes
      for (unsigned imesh = 0; imesh < n_mesh; imesh++)
      {
        // Refine i-th submesh uniformly if possible
        if (RefineableMeshBase* mmesh_pt =
              dynamic_cast<RefineableMeshBase*>(mesh_pt(imesh)))
        {
          unsigned nref = nrefine_for_mesh[imesh];
          for (unsigned i = 0; i < nref; i++)
          {
            mmesh_pt->refine_uniformly(doc_info);
          }
        }
        else
        {
          oomph_info << "Info/Warning: Cannot refine mesh " << imesh
                     << std::endl;
        }
      }
      // Rebuild the global mesh
      rebuild_global_mesh();
    }
 
    if (Global_timings::Doc_comprehensive_timings)
    {
      t_end = TimingHelpers::timer();
      oomph_info << "Time for mesh-level mesh refinement in "
                 << "Problem::refine_uniformly_aux(): " << t_end - t_start
                 << std::endl;
      t_start = TimingHelpers::timer();
    }
 
    // Any actions after the adaptation phase
    actions_after_adapt();
 
 
    if (Global_timings::Doc_comprehensive_timings)
    {
      t_end = TimingHelpers::timer();
      oomph_info
        << "Time for actions after adapt  Problem::refine_uniformly_aux(): "
        << t_end - t_start << std::endl;
      t_start = TimingHelpers::timer();
    }
 
 
#ifdef OOMPH_HAS_MPI
 
    // Prune it?
    if (prune)
    {
      // Note: This calls assign eqn numbers already...
      Bypass_increase_in_dof_check_during_pruning = true;
      prune_halo_elements_and_nodes();
      Bypass_increase_in_dof_check_during_pruning = false;
 
      if (Global_timings::Doc_comprehensive_timings)
      {
        t_end = TimingHelpers::timer();
        oomph_info << "Time for Problem::prune_halo_elements_and_nodes() in "
                   << "Problem::refine_uniformly_aux(): " << t_end - t_start
                   << std::endl;
      }
    }
    else
#else
    if (prune)
    {
      std::ostringstream error_message;
      error_message
        << "Requested pruning in serial build. Ignoring the request.\n";
      OomphLibWarning(error_message.str(),
                      "Problem::refine_uniformly_aux()",
                      OOMPH_EXCEPTION_LOCATION);
    }
#endif
    {
      // Do equation numbering
      oomph_info
        << "Number of equations after Problem::refine_uniformly_aux(): "
        << assign_eqn_numbers() << std::endl;
 
      if (Global_timings::Doc_comprehensive_timings)
      {
        t_end = TimingHelpers::timer();
        oomph_info << "Time for Problem::assign_eqn_numbers() in "
                   << "Problem::refine_uniformly_aux(): " << t_end - t_start
                   << std::endl;
      }
    }
  }
 
 
  //========================================================================
  /// Helper function to do compund p-refinement of (all) p-refineable
  /// (sub)mesh(es) uniformly as many times as specified in vector and
  /// rebuild problem; doc refinement process. Set boolean argument
  /// to true if you want to prune immediately after refining the meshes
  /// individually.
  //========================================================================
  void Problem::p_refine_uniformly_aux(const Vector<unsigned>& nrefine_for_mesh,
                                       DocInfo& doc_info,
                                       const bool& prune)
  {
    double t_start = 0.0;
    if (Global_timings::Doc_comprehensive_timings)
    {
      t_start = TimingHelpers::timer();
    }
 
    actions_before_adapt();
 
    double t_end = 0.0;
    if (Global_timings::Doc_comprehensive_timings)
    {
      t_end = TimingHelpers::timer();
      oomph_info << "Time for actions before adapt in "
                    "Problem::p_refine_uniformly_aux(): "
                 << t_end - t_start << std::endl;
      t_start = TimingHelpers::timer();
    }
 
    // Number of submeshes?
    unsigned n_mesh = nsub_mesh();
 
    // Single mesh:
    if (n_mesh == 0)
    {
      // Refine single mesh uniformly if possible
      if (RefineableMeshBase* mmesh_pt =
            dynamic_cast<RefineableMeshBase*>(mesh_pt(0)))
      {
        unsigned nref = nrefine_for_mesh[0];
        for (unsigned i = 0; i < nref; i++)
        {
          mmesh_pt->p_refine_uniformly(doc_info);
        }
      }
      else
      {
        oomph_info << "Info/Warning: Mesh cannot be p-refined uniformly "
                   << std::endl;
      }
    }
    // Multiple submeshes
    else
    {
      OomphLibWarning(
        "p-refinement for multiple submeshes has not yet been tested.",
        "Problem::p_refine_uniformly_aux()",
        OOMPH_EXCEPTION_LOCATION);
 
      // Loop over submeshes
      for (unsigned imesh = 0; imesh < n_mesh; imesh++)
      {
        // Refine i-th submesh uniformly if possible
        if (RefineableMeshBase* mmesh_pt =
              dynamic_cast<RefineableMeshBase*>(mesh_pt(imesh)))
        {
          unsigned nref = nrefine_for_mesh[imesh];
          for (unsigned i = 0; i < nref; i++)
          {
            mmesh_pt->p_refine_uniformly(doc_info);
          }
        }
        else
        {
          oomph_info << "Info/Warning: Cannot p-refine mesh " << imesh
                     << std::endl;
        }
      }
      // Rebuild the global mesh
      rebuild_global_mesh();
    }
 
    if (Global_timings::Doc_comprehensive_timings)
    {
      t_end = TimingHelpers::timer();
      oomph_info << "Time for mesh-level mesh refinement in "
                 << "Problem::p_refine_uniformly_aux(): " << t_end - t_start
                 << std::endl;
      t_start = TimingHelpers::timer();
    }
 
    // Any actions after the adaptation phase
    actions_after_adapt();
 
 
    if (Global_timings::Doc_comprehensive_timings)
    {
      t_end = TimingHelpers::timer();
      oomph_info
        << "Time for actions after adapt  Problem::p_refine_uniformly_aux(): "
        << t_end - t_start << std::endl;
      t_start = TimingHelpers::timer();
    }
 
 
#ifdef OOMPH_HAS_MPI
 
    // Prune it?
    if (prune)
    {
      // Note: This calls assign eqn numbers already...
      Bypass_increase_in_dof_check_during_pruning = true;
      prune_halo_elements_and_nodes();
      Bypass_increase_in_dof_check_during_pruning = false;
 
      if (Global_timings::Doc_comprehensive_timings)
      {
        t_end = TimingHelpers::timer();
        oomph_info << "Time for Problem::prune_halo_elements_and_nodes() in "
                   << "Problem::p_refine_uniformly_aux(): " << t_end - t_start
                   << std::endl;
      }
    }
    else
#else
    if (prune)
    {
      std::ostringstream error_message;
      error_message
        << "Requested pruning in serial build. Ignoring the request.\n";
      OomphLibWarning(error_message.str(),
                      "Problem::p_refine_uniformly_aux()",
                      OOMPH_EXCEPTION_LOCATION);
    }
#endif
    {
      // Do equation numbering
      oomph_info
        << "Number of equations after Problem::p_refine_uniformly_aux(): "
        << assign_eqn_numbers() << std::endl;
 
      if (Global_timings::Doc_comprehensive_timings)
      {
        t_end = TimingHelpers::timer();
        oomph_info << "Time for Problem::assign_eqn_numbers() in "
                   << "Problem::p_refine_uniformly_aux(): " << t_end - t_start
                   << std::endl;
      }
    }
  }
 
  //========================================================================
  /// Refine submesh i_mesh uniformly and rebuild problem;
  /// doc refinement process.
  //========================================================================
  void Problem::refine_uniformly(const unsigned& i_mesh, DocInfo& doc_info)
  {
    actions_before_adapt();
 
#ifdef PARANOID
    // Number of submeshes?
    if (i_mesh >= nsub_mesh())
    {
      std::ostringstream error_message;
      error_message << "imesh " << i_mesh
                    << " is greater than the number of sub meshes "
                    << nsub_mesh() << std::endl;
 
      throw OomphLibError(
        error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
#endif
 
    // Refine single mesh uniformly if possible
    if (RefineableMeshBase* mmesh_pt =
          dynamic_cast<RefineableMeshBase*>(mesh_pt(i_mesh)))
    {
      mmesh_pt->refine_uniformly(doc_info);
    }
    else
    {
      oomph_info << "Info/Warning: Mesh cannot be refined uniformly "
                 << std::endl;
    }
 
    // Rebuild the global mesh
    rebuild_global_mesh();
 
    // Any actions after the adaptation phase
    actions_after_adapt();
 
    // Do equation numbering
    oomph_info << "Number of equations: " << assign_eqn_numbers() << std::endl;
  }
 
  //========================================================================
  /// p-refine submesh i_mesh uniformly and rebuild problem;
  /// doc refinement process.
  //========================================================================
  void Problem::p_refine_uniformly(const unsigned& i_mesh, DocInfo& doc_info)
  {
    actions_before_adapt();
 
#ifdef PARANOID
    // Number of submeshes?
    if (i_mesh >= nsub_mesh())
    {
      std::ostringstream error_message;
      error_message << "imesh " << i_mesh
                    << " is greater than the number of sub meshes "
                    << nsub_mesh() << std::endl;
 
      throw OomphLibError(
        error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
#endif
 
    // Refine single mesh uniformly if possible
    if (RefineableMeshBase* mmesh_pt =
          dynamic_cast<RefineableMeshBase*>(mesh_pt(i_mesh)))
    {
      mmesh_pt->p_refine_uniformly(doc_info);
    }
    else
    {
      oomph_info << "Info/Warning: Mesh cannot be refined uniformly "
                 << std::endl;
    }
 
    // Rebuild the global mesh
    rebuild_global_mesh();
 
    // Any actions after the adaptation phase
    actions_after_adapt();
 
    // Do equation numbering
    oomph_info << "Number of equations: " << assign_eqn_numbers() << std::endl;
  }
 
 
  //========================================================================
  /// Unrefine (all) refineable (sub)mesh(es) uniformly and rebuild problem.
  /// Return 0 for success,
  /// 1 for failure (if unrefinement has reached the coarsest permitted
  /// level)
  //========================================================================
  unsigned Problem::unrefine_uniformly()
  {
    // Call actions_before_adapt()
    actions_before_adapt();
 
    // Has unrefinement been successful?
    unsigned success_flag = 0;
 
    // Number of submeshes?
    unsigned n_mesh = nsub_mesh();
 
    // Single mesh:
    if (n_mesh == 0)
    {
      // Unrefine single mesh uniformly if possible
      if (RefineableMeshBase* mmesh_pt =
            dynamic_cast<RefineableMeshBase*>(mesh_pt(0)))
      {
        success_flag += mmesh_pt->unrefine_uniformly();
      }
      else
      {
        oomph_info << "Info/Warning: Mesh cannot be unrefined uniformly "
                   << std::endl;
      }
    }
    // Multiple submeshes
    else
    {
      // Loop over submeshes
      for (unsigned imesh = 0; imesh < n_mesh; imesh++)
      {
        // Unrefine i-th submesh uniformly if possible
        if (RefineableMeshBase* mmesh_pt =
              dynamic_cast<RefineableMeshBase*>(mesh_pt(imesh)))
        {
          success_flag += mmesh_pt->unrefine_uniformly();
        }
        else
        {
          oomph_info << "Info/Warning: Cannot unrefine mesh " << imesh
                     << std::endl;
        }
      }
      // Rebuild the global mesh
      rebuild_global_mesh();
    }
 
    // Any actions after the adaptation phase
    actions_after_adapt();
 
    // Do equation numbering
    oomph_info << " Number of equations: " << assign_eqn_numbers() << std::endl;
 
    // Judge success
    if (success_flag > 0)
    {
      return 1;
    }
    else
    {
      return 0;
    }
  }
 
  //========================================================================
  /// Unrefine submesh i_mesh uniformly and rebuild problem.
  /// Return 0 for success,
  /// 1 for failure (if unrefinement has reached the coarsest permitted
  /// level)
  //========================================================================
  unsigned Problem::unrefine_uniformly(const unsigned& i_mesh)
  {
    actions_before_adapt();
 
    // Has unrefinement been successful?
    unsigned success_flag = 0;
 
#ifdef PARANOID
    // Number of submeshes?
    if (i_mesh >= nsub_mesh())
    {
      std::ostringstream error_message;
      error_message << "imesh " << i_mesh
                    << " is greater than the number of sub meshes "
                    << nsub_mesh() << std::endl;
 
      throw OomphLibError(
        error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
#endif
 
    // Unrefine single mesh uniformly if possible
    if (RefineableMeshBase* mmesh_pt =
          dynamic_cast<RefineableMeshBase*>(mesh_pt(i_mesh)))
    {
      success_flag += mmesh_pt->unrefine_uniformly();
    }
    else
    {
      oomph_info << "Info/Warning: Mesh cannot be unrefined uniformly "
                 << std::endl;
    }
 
    // Rebuild the global mesh
    rebuild_global_mesh();
 
    // Any actions after the adaptation phase
    actions_after_adapt();
 
    // Do equation numbering
    oomph_info << "Number of equations: " << assign_eqn_numbers() << std::endl;
 
    // Judge success
    if (success_flag > 0)
    {
      return 1;
    }
    else
    {
      return 0;
    }
  }
 
 
  //========================================================================
  /// p-unrefine (all) p-refineable (sub)mesh(es) uniformly and rebuild problem;
  /// doc refinement process.
  //========================================================================
  void Problem::p_unrefine_uniformly(DocInfo& doc_info)
  {
    actions_before_adapt();
 
    // Number of submeshes?
    unsigned n_mesh = nsub_mesh();
 
    // Single mesh:
    if (n_mesh == 0)
    {
      // Unrefine single mesh uniformly if possible
      if (RefineableMeshBase* mmesh_pt =
            dynamic_cast<RefineableMeshBase*>(mesh_pt(0)))
      {
        mmesh_pt->p_unrefine_uniformly(doc_info);
      }
      else
      {
        oomph_info << "Info/Warning: Mesh cannot be p-unrefined uniformly "
                   << std::endl;
      }
    }
    // Multiple submeshes
    else
    {
      // Not tested:
      throw OomphLibError("This functionality has not yet been tested.",
                          OOMPH_CURRENT_FUNCTION,
                          OOMPH_EXCEPTION_LOCATION);
      // Loop over submeshes
      for (unsigned imesh = 0; imesh < n_mesh; imesh++)
      {
        // Unrefine i-th submesh uniformly if possible
        if (RefineableMeshBase* mmesh_pt =
              dynamic_cast<RefineableMeshBase*>(mesh_pt(imesh)))
        {
          mmesh_pt->p_unrefine_uniformly(doc_info);
        }
        else
        {
          oomph_info << "Info/Warning: Cannot p-unrefine mesh " << imesh
                     << std::endl;
        }
      }
      // Rebuild the global mesh
      rebuild_global_mesh();
    }
 
    // Any actions after the adaptation phase
    actions_after_adapt();
 
    // Do equation numbering
    oomph_info << "Number of equations: " << assign_eqn_numbers() << std::endl;
  }
 
  //========================================================================
  /// p-unrefine submesh i_mesh uniformly and rebuild problem;
  /// doc refinement process.
  //========================================================================
  void Problem::p_unrefine_uniformly(const unsigned& i_mesh, DocInfo& doc_info)
  {
    actions_before_adapt();
 
#ifdef PARANOID
    // Number of submeshes?
    if (i_mesh >= nsub_mesh())
    {
      std::ostringstream error_message;
      error_message << "imesh " << i_mesh
                    << " is greater than the number of sub meshes "
                    << nsub_mesh() << std::endl;
 
      throw OomphLibError(
        error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
#endif
 
    // Refine single mesh uniformly if possible
    if (RefineableMeshBase* mmesh_pt =
          dynamic_cast<RefineableMeshBase*>(mesh_pt(i_mesh)))
    {
      mmesh_pt->p_unrefine_uniformly(doc_info);
    }
    else
    {
      oomph_info << "Info/Warning: Mesh cannot be p-unrefined uniformly "
                 << std::endl;
    }
 
    // Rebuild the global mesh
    rebuild_global_mesh();
 
    // Any actions after the adaptation phase
    actions_after_adapt();
 
    // Do equation numbering
    oomph_info << "Number of equations: " << assign_eqn_numbers() << std::endl;
  }
 
 
  //========================================================================
  /// Do one timestep, dt, forward  using Newton's method with specified
  /// tolerance and linear solver specified via member data.
  /// Keep adapting on all meshes to criteria specified in
  /// these meshes (up to max_adapt adaptations are performed).
  /// If first_timestep==true, re-set initial conditions after mesh adaptation.
  /// Shifting of time can be suppressed by overwriting the
  /// default value of shift (true). [Shifting must be done
  /// if first_timestep==true because we're constantly re-assigning
  /// the initial conditions; if first_timestep==true and shift==false
  /// shifting is performed anyway and a warning is issued.
  //========================================================================
  void Problem::unsteady_newton_solve(const double& dt,
                                      const unsigned& max_adapt,
                                      const bool& first_timestep,
                                      const bool& shift)
  {
    // Do shifting or not?
    bool shift_it = shift;
 
    // Warning:
    if (first_timestep && (!shift) && (!Default_set_initial_condition_called))
    {
      shift_it = true;
      oomph_info
        << "\n\n===========================================================\n";
      oomph_info << "                  ********  WARNING *********** \n";
      oomph_info
        << "===========================================================\n";
      oomph_info << "Problem::unsteady_newton_solve() called with "
                 << std::endl;
      oomph_info << "first_timestep: " << first_timestep << std::endl;
      oomph_info << "shift: " << shift << std::endl;
      oomph_info << "This doesn't make sense (shifting does have to be done"
                 << std::endl;
      oomph_info << "since we're constantly re-assigning the initial conditions"
                 << std::endl;
      oomph_info
        << "\n===========================================================\n\n";
    }
 
 
    // Find the initial time
    double initial_time = time_pt()->time();
 
    // Max number of solves
    unsigned max_solve = max_adapt + 1;
 
    // Adaptation loop
    //----------------
    for (unsigned isolve = 0; isolve < max_solve; isolve++)
    {
      // Only adapt after the first solve has been done!
      if (isolve > 0)
      {
        unsigned n_refined;
        unsigned n_unrefined;
 
        // Adapt problem
        adapt(n_refined, n_unrefined);
 
#ifdef OOMPH_HAS_MPI
        // Adaptation only converges if ALL the processes have no
        // refinement or unrefinement to perform
        unsigned total_refined = 0;
        unsigned total_unrefined = 0;
        if (Problem_has_been_distributed)
        {
          MPI_Allreduce(&n_refined,
                        &total_refined,
                        1,
                        MPI_UNSIGNED,
                        MPI_SUM,
                        this->communicator_pt()->mpi_comm());
          n_refined = total_refined;
          MPI_Allreduce(&n_unrefined,
                        &total_unrefined,
                        1,
                        MPI_UNSIGNED,
                        MPI_SUM,
                        this->communicator_pt()->mpi_comm());
          n_unrefined = total_unrefined;
        }
#endif
 
        oomph_info << "---> " << n_refined << " elements were refined, and "
                   << n_unrefined << " were unrefined, in total." << std::endl;
 
        // Check convergence of adaptation cycle
        if ((n_refined == 0) && (n_unrefined == 0))
        {
          oomph_info << "\n \n Solution is fully converged in "
                     << "Problem::unsteady_newton_solver() \n \n ";
          break;
        }
 
        // Reset the time
        time_pt()->time() = initial_time;
 
        // Reset the inital condition on refined meshes. Note that because we
        // have reset the global time to the initial time, the initial
        // conditions are reset at time t=0 rather than at time t=dt
        if (first_timestep)
        {
          // Reset default set_initial_condition has been called flag to false
          Default_set_initial_condition_called = false;
 
          oomph_info << "Re-setting initial condition " << std::endl;
          set_initial_condition();
 
          // If the default set_initial_condition function has been called,
          // we must not shift the timevalues on the first timestep, as we
          // will NOT be constantly re-assigning the initial condition
          if (Default_set_initial_condition_called)
          {
            shift_it = false;
          }
        }
      }
 
      // Now do the actual unsteady timestep
      // If it's the first time around the loop, or the first timestep
      // shift the timevalues, otherwise don't
      // Note: we need to shift if it's the first timestep because
      // we're constantly re-assigning the initial condition above!
      // The only exception to this is if the default set_initial_condition
      // function has been called, in which case we must NOT shift!
      if ((isolve == 0) || (first_timestep))
      {
        Problem::unsteady_newton_solve(dt, shift_it);
      }
      // Subsequent solve: Have shifted already -- don't do it again.
      else
      {
        shift_it = false;
        Problem::unsteady_newton_solve(dt, shift_it);
      }
 
      if (isolve == max_solve - 1)
      {
        oomph_info
          << std::endl
          << "----------------------------------------------------------"
          << std::endl
          << "Reached max. number of adaptations in \n"
          << "Problem::unsteady_newton_solver().\n"
          << "----------------------------------------------------------"
          << std::endl
          << std::endl;
      }
 
    } // End of adaptation loop
  }
 
 
  //========================================================================
  /// Adaptive Newton solver.
  /// The linear solver takes a pointer to the problem (which defines
  /// the Jacobian \b J and the residual Vector \b r) and returns
  /// the solution \b x of the system
  /// \f[ {\bf J} {\bf x} = - \bf{r} \f].
  /// Performs at most max_adapt adaptations on all meshes.
  //========================================================================
  void Problem::newton_solve(const unsigned& max_adapt)
  {
    // Max number of solves
    unsigned max_solve = max_adapt + 1;
 
    // Adaptation loop
    //----------------
    for (unsigned isolve = 0; isolve < max_solve; isolve++)
    {
      // Only adapt after the first solve has been done!
      if (isolve > 0)
      {
        unsigned n_refined;
        unsigned n_unrefined;
 
        // Adapt problem
        adapt(n_refined, n_unrefined);
 
#ifdef OOMPH_HAS_MPI
        // Adaptation only converges if ALL the processes have no
        // refinement or unrefinement to perform
        unsigned total_refined = 0;
        unsigned total_unrefined = 0;
        if (Problem_has_been_distributed)
        {
          MPI_Allreduce(&n_refined,
                        &total_refined,
                        1,
                        MPI_UNSIGNED,
                        MPI_SUM,
                        this->communicator_pt()->mpi_comm());
          n_refined = total_refined;
          MPI_Allreduce(&n_unrefined,
                        &total_unrefined,
                        1,
                        MPI_UNSIGNED,
                        MPI_SUM,
                        this->communicator_pt()->mpi_comm());
          n_unrefined = total_unrefined;
        }
#endif
 
        oomph_info << "---> " << n_refined << " elements were refined, and "
                   << n_unrefined << " were unrefined"
#ifdef OOMPH_HAS_MPI
                   << ", in total (over all processors).\n";
#else
                   << ".\n";
#endif
 
 
        // Check convergence of adaptation cycle
        if ((n_refined == 0) && (n_unrefined == 0))
        {
          oomph_info << "\n \n Solution is fully converged in "
                     << "Problem::newton_solver(). \n \n ";
          break;
        }
      }
 
 
      // Do actual solve
      //----------------
      {
        // Now update anything that needs updating
        // NOT NEEDED -- IS CALLED IN newton_solve BELOW! #
        // actions_before_newton_solve();
 
        try
        {
          // Solve the non-linear problem for this timestep with Newton's method
          newton_solve();
        }
        // Catch any exceptions thrown in the Newton solver
        catch (NewtonSolverError& error)
        {
          oomph_info << std::endl
                     << "USER-DEFINED ERROR IN NEWTON SOLVER " << std::endl;
          // Check to see whether we have reached Max_iterations
          if (error.iterations() == Max_newton_iterations)
          {
            oomph_info << "MAXIMUM NUMBER OF ITERATIONS (" << error.iterations()
                       << ") REACHED WITHOUT CONVERGENCE " << std::endl;
          }
          // If not, it must be that we have exceeded the maximum residuals
          else
          {
            oomph_info << "MAXIMUM RESIDUALS: " << error.maxres()
                       << "EXCEEDS PREDEFINED MAXIMUM " << Max_residuals
                       << std::endl;
          }
 
          // Die horribly!!
          std::ostringstream error_stream;
          error_stream << "Error occured in adaptive Newton solver. "
                       << std::endl;
          throw OomphLibError(error_stream.str(),
                              OOMPH_CURRENT_FUNCTION,
                              OOMPH_EXCEPTION_LOCATION);
        }
 
        // Now update anything that needs updating
        // NOT NEEDED -- WAS CALLED IN newton_solve ABOVE
        // !actions_after_newton_solve();
 
      } // End of solve block
 
 
      if (isolve == max_solve - 1)
      {
        oomph_info
          << std::endl
          << "----------------------------------------------------------"
          << std::endl
          << "Reached max. number of adaptations in \n"
          << "Problem::newton_solver().\n"
          << "----------------------------------------------------------"
          << std::endl
          << std::endl;
      }
 
    } // End of adaptation loop
  }
 
  //========================================================================
  /// Delete any external storage for any submeshes
  /// NB this would ordinarily take place within the adaptation procedure
  /// for each submesh (See RefineableMesh::adapt_mesh(...)), but there
  /// are instances where the actions_before/after_adapt routines are used
  /// and no adaptive routines are called in between (e.g. when doc-ing
  /// errors at the end of an adaptive newton solver)
  //========================================================================
  void Problem::delete_all_external_storage()
  {
    // Number of submeshes
    unsigned n_mesh = nsub_mesh();
 
    // External storage will only exist if there is more than one (sub)mesh
    if (n_mesh > 1)
    {
      for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
      {
        mesh_pt(i_mesh)->delete_all_external_storage();
      }
    }
  }
 
 
#ifdef OOMPH_HAS_MPI
 
  //====================================================================
  /// Get all the halo data stored on this processor and store pointers
  /// to the data in a map, indexed by the gobal eqn number
  //====================================================================
  void Problem::get_all_halo_data(std::map<unsigned, double*>& map_of_halo_data)
  {
    // Halo data is stored in the meshes, so kick the problem down to that
    // level
 
    // Find the number of meshes
    unsigned n_mesh = this->nsub_mesh();
    // If there are no submeshes it's only the main mesh
    if (n_mesh == 0)
    {
      mesh_pt()->get_all_halo_data(map_of_halo_data);
    }
    // Otherwise loop over all the submeshes
    else
    {
      for (unsigned imesh = 0; imesh < n_mesh; ++imesh)
      {
        mesh_pt(imesh)->get_all_halo_data(map_of_halo_data);
      }
    }
  }
 
 
  //========================================================================
  /// Check the halo/haloed/shared node/element schemes.
  //========================================================================
  void Problem::check_halo_schemes(DocInfo& doc_info)
  {
    // The bulk of the stuff that was in this routine is mesh-based, and
    // should therefore drop into the Mesh base class.  All that needs to remain
    // here is a "wrapper" which calls the function dependent upon the number
    // of (sub)meshes that may have been distributed.
 
    unsigned n_mesh = nsub_mesh();
 
    if (n_mesh == 0)
    {
      oomph_info << "Checking halo schemes on single mesh" << std::endl;
      doc_info.label() = "_one_and_only_mesh_";
      mesh_pt()->check_halo_schemes(doc_info,
                                    Max_permitted_error_for_halo_check);
    }
    else // there are submeshes
    {
      for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
      {
        oomph_info << "Checking halo schemes on submesh " << i_mesh
                   << std::endl;
        std::stringstream tmp;
        tmp << "_mesh" << i_mesh << "_";
        doc_info.label() = tmp.str();
        mesh_pt(i_mesh)->check_halo_schemes(doc_info,
                                            Max_permitted_error_for_halo_check);
      }
    }
  }
 
 
  //========================================================================
  /// Synchronise all dofs by calling the appropriate synchronisation
  /// routines for all meshes and the assembly handler
  //========================================================================
  void Problem::synchronise_all_dofs()
  {
    // Synchronise dofs themselves
    bool do_halos = true;
    bool do_external_halos = false;
    this->synchronise_dofs(do_halos, do_external_halos);
 
 
    do_halos = false;
    do_external_halos = true;
    this->synchronise_dofs(do_halos, do_external_halos);
 
    // Now perform any synchronisation required by the assembly handler
    this->assembly_handler_pt()->synchronise();
  }
 
 
  //========================================================================
  /// Synchronise the degrees of freedom by overwriting
  /// the haloed values with their non-halo counterparts held
  /// on other processors. Bools control if we deal with data associated with
  /// external halo/ed elements/nodes or the "normal" halo/ed ones.
  //========================================================================
  void Problem::synchronise_dofs(const bool& do_halos,
                                 const bool& do_external_halos)
  {
    // Do we have submeshes?
    unsigned n_mesh_loop = 1;
    unsigned nmesh = nsub_mesh();
    if (nmesh > 0)
    {
      n_mesh_loop = nmesh;
    }
 
    // Local storage for number of processors and current processor
    const int n_proc = this->communicator_pt()->nproc();
 
    // If only one processor then return
    if (n_proc == 1)
    {
      return;
    }
 
    const int my_rank = this->communicator_pt()->my_rank();
 
    // Storage for number of data to be sent to each processor
    Vector<int> send_n(n_proc, 0);
 
    // Storage for all values to be sent to all processors
    Vector<double> send_data;
 
    // Start location within send_data for data to be sent to each processor
    Vector<int> send_displacement(n_proc, 0);
 
    // Loop over all processors
    for (int rank = 0; rank < n_proc; rank++)
    {
      // Set the offset for the current processor
      send_displacement[rank] = send_data.size();
 
      // Don't bother to do anything if the processor in the loop is the
      // current processor
      if (rank != my_rank)
      {
        // Deal with sub-meshes one-by-one if required
        Mesh* my_mesh_pt = 0;
 
        // Loop over submeshes
        for (unsigned imesh = 0; imesh < n_mesh_loop; imesh++)
        {
          if (nmesh == 0)
          {
            my_mesh_pt = mesh_pt();
          }
          else
          {
            my_mesh_pt = mesh_pt(imesh);
          }
 
          if (do_halos)
          {
            // How many of my nodes are haloed by the processor whose values
            // are updated?
            unsigned n_nod = my_mesh_pt->nhaloed_node(rank);
            for (unsigned n = 0; n < n_nod; n++)
            {
              // Add the data for each haloed node to the vector
              my_mesh_pt->haloed_node_pt(rank, n)->add_values_to_vector(
                send_data);
            }
 
            // Now loop over haloed elements and prepare to add their
            // internal data to the big vector to be sent
            Vector<GeneralisedElement*> haloed_elem_pt =
              my_mesh_pt->haloed_element_pt(rank);
            unsigned nelem_haloed = haloed_elem_pt.size();
            for (unsigned e = 0; e < nelem_haloed; e++)
            {
              haloed_elem_pt[e]->add_internal_data_values_to_vector(send_data);
            }
          }
 
          if (do_external_halos)
          {
            // How many of my nodes are externally haloed by the processor whose
            // values are updated?  NB these nodes are on the external mesh.
            unsigned n_ext_nod = my_mesh_pt->nexternal_haloed_node(rank);
            for (unsigned n = 0; n < n_ext_nod; n++)
            {
              // Add data from each external haloed node to the vector
              my_mesh_pt->external_haloed_node_pt(rank, n)
                ->add_values_to_vector(send_data);
            }
 
            // Now loop over haloed elements and prepare to send internal data
            unsigned next_elem_haloed =
              my_mesh_pt->nexternal_haloed_element(rank);
            for (unsigned e = 0; e < next_elem_haloed; e++)
            {
              my_mesh_pt->external_haloed_element_pt(rank, e)
                ->add_internal_data_values_to_vector(send_data);
            }
          }
        } // end of loop over meshes
      }
 
      // Find the number of data added to the vector
      send_n[rank] = send_data.size() - send_displacement[rank];
    }
 
 
    // Storage for the number of data to be received from each processor
    Vector<int> receive_n(n_proc, 0);
 
    // Now send numbers of data to be sent between all processors
    MPI_Alltoall(&send_n[0],
                 1,
                 MPI_INT,
                 &receive_n[0],
                 1,
                 MPI_INT,
                 this->communicator_pt()->mpi_comm());
 
    // We now prepare the data to be received
    // by working out the displacements from the received data
    Vector<int> receive_displacement(n_proc, 0);
    int receive_data_count = 0;
    for (int rank = 0; rank < n_proc; ++rank)
    {
      // Displacement is number of data received so far
      receive_displacement[rank] = receive_data_count;
      receive_data_count += receive_n[rank];
    }
 
    // Now resize the receive buffer for all data from all processors
    // Make sure that it has a size of at least one
    if (receive_data_count == 0)
    {
      ++receive_data_count;
    }
    Vector<double> receive_data(receive_data_count);
 
    // Make sure that the send buffer has size at least one
    // so that we don't get a segmentation fault
    if (send_data.size() == 0)
    {
      send_data.resize(1);
    }
 
    // Now send the data between all the processors
    MPI_Alltoallv(&send_data[0],
                  &send_n[0],
                  &send_displacement[0],
                  MPI_DOUBLE,
                  &receive_data[0],
                  &receive_n[0],
                  &receive_displacement[0],
                  MPI_DOUBLE,
                  this->communicator_pt()->mpi_comm());
 
    // Now use the received data to update the halo nodes
    for (int send_rank = 0; send_rank < n_proc; send_rank++)
    {
      // Don't bother to do anything for the processor corresponding to the
      // current processor or if no data were received from this processor
      if ((send_rank != my_rank) && (receive_n[send_rank] != 0))
      {
        // Counter for the data within the large array
        unsigned count = receive_displacement[send_rank];
 
        // Deal with sub-meshes one-by-one if required
        Mesh* my_mesh_pt = 0;
 
        // Loop over submeshes
        for (unsigned imesh = 0; imesh < n_mesh_loop; imesh++)
        {
          if (nmesh == 0)
          {
            my_mesh_pt = mesh_pt();
          }
          else
          {
            my_mesh_pt = mesh_pt(imesh);
          }
 
          if (do_halos)
          {
            // How many of my nodes are halos whose non-halo counter
            // parts live on processor send_rank?
            unsigned n_nod = my_mesh_pt->nhalo_node(send_rank);
            for (unsigned n = 0; n < n_nod; n++)
            {
              // Read in values for each halo node
              my_mesh_pt->halo_node_pt(send_rank, n)
                ->read_values_from_vector(receive_data, count);
            }
 
            // Get number of halo elements whose non-halo is
            // on process send_rank
            Vector<GeneralisedElement*> halo_elem_pt =
              my_mesh_pt->halo_element_pt(send_rank);
 
            unsigned nelem_halo = halo_elem_pt.size();
            for (unsigned e = 0; e < nelem_halo; e++)
            {
              halo_elem_pt[e]->read_internal_data_values_from_vector(
                receive_data, count);
            }
          }
 
          if (do_external_halos)
          {
            // How many of my nodes are external halos whose external non-halo
            // counterparts live on processor send_rank?
            unsigned n_ext_nod = my_mesh_pt->nexternal_halo_node(send_rank);
 
            // Copy into the values of the external halo nodes
            // on the present processors
            for (unsigned n = 0; n < n_ext_nod; n++)
            {
              // Read the data from the array into each halo node
              my_mesh_pt->external_halo_node_pt(send_rank, n)
                ->read_values_from_vector(receive_data, count);
            }
 
            // Get number of halo elements whose non-halo is
            // on process send_rank
            unsigned next_elem_halo =
              my_mesh_pt->nexternal_halo_element(send_rank);
            for (unsigned e = 0; e < next_elem_halo; e++)
            {
              my_mesh_pt->external_halo_element_pt(send_rank, e)
                ->read_internal_data_values_from_vector(receive_data, count);
            }
          }
 
        } // end of loop over meshes
      }
    } // End of data is received
  } // End of synchronise
 
 
  //========================================================================
  ///  Synchronise equation numbers and return the total
  /// number of degrees of freedom in the overall problem
  //========================================================================
  long Problem::synchronise_eqn_numbers(const bool& assign_local_eqn_numbers)
  {
    // number of equations on this processor, which at this stage is only known
    // by counting the number of dofs that have been added to the problem
    unsigned my_n_eqn = Dof_pt.size();
 
    // my rank
    unsigned my_rank = Communicator_pt->my_rank();
 
    // number of processors
    unsigned nproc = Communicator_pt->nproc();
 
    //  // Time alternative communication
    //  Vector<unsigned> n_eqn(nproc);
    //  {
    //   double t_start = TimingHelpers::timer();
 
    //   // Gather numbers of equations (enumerated independently on all procs)
    //   MPI_Allgather(&my_n_eqn,1,MPI_UNSIGNED,&n_eqn[0],
    //                 1,MPI_INT,Communicator_pt->mpi_comm());
 
    //   double t_end = TimingHelpers::timer();
    //   oomph_info << "Time for allgather-based exchange of eqn numbers: "
    //              << t_end-t_start << std::endl;
    //  }
 
    double t_start = TimingHelpers::timer();
 
    // send my_n_eqn to with rank greater than my_rank
    unsigned n_send = nproc - my_rank - 1;
    Vector<MPI_Request> send_req(n_send);
    for (unsigned p = my_rank + 1; p < nproc; p++)
    {
      MPI_Isend(&my_n_eqn,
                1,
                MPI_UNSIGNED,
                p,
                0,
                Communicator_pt->mpi_comm(),
                &send_req[p - my_rank - 1]);
    }
 
    // recv n_eqn from processors with rank less than my_rank
    Vector<unsigned> n_eqn_on_proc(my_rank);
    for (unsigned p = 0; p < my_rank; p++)
    {
      MPI_Recv(&n_eqn_on_proc[p],
               1,
               MPI_UNSIGNED,
               p,
               0,
               Communicator_pt->mpi_comm(),
               MPI_STATUS_IGNORE);
    }
 
    double t_end = 0.0;
    if (Global_timings::Doc_comprehensive_timings)
    {
      t_end = TimingHelpers::timer();
      oomph_info << "Time for send and receive stuff: " << t_end - t_start
                 << std::endl;
      t_start = TimingHelpers::timer();
    }
 
    // determine the number of equation on processors with rank
    // less than my_rank
    unsigned my_eqn_num_base = 0;
    for (unsigned p = 0; p < my_rank; p++)
    {
      my_eqn_num_base += n_eqn_on_proc[p];
      //   if (n_eqn_on_proc[p]!=n_eqn[p])
      //     {
      //      std::cout << "proc " << my_rank << "clash in eqn numbers: "
      //                << p << " " << n_eqn_on_proc[p] << " " << n_eqn[p]
      //                << std::endl;
      //     }
    }
 
    // Loop over all internal data (on elements) and bump up their
    // equation numbers if they exist
    unsigned nelem = mesh_pt()->nelement();
    for (unsigned e = 0; e < nelem; e++)
    {
      GeneralisedElement* el_pt = mesh_pt()->element_pt(e);
 
      unsigned nintern_data = el_pt->ninternal_data();
      for (unsigned iintern = 0; iintern < nintern_data; iintern++)
      {
        Data* int_data_pt = el_pt->internal_data_pt(iintern);
        unsigned nval = int_data_pt->nvalue();
        for (unsigned ival = 0; ival < nval; ival++)
        {
          int old_eqn_number = int_data_pt->eqn_number(ival);
          if (old_eqn_number >= 0) // i.e. it's being used
          {
            // Bump up eqn number
            int new_eqn_number = old_eqn_number + my_eqn_num_base;
            int_data_pt->eqn_number(ival) = new_eqn_number;
          }
        }
      }
    }
 
    // Loop over all nodes on current processor and bump up their
    // equation numbers if they're not pinned!
    unsigned nnod = mesh_pt()->nnode();
    for (unsigned j = 0; j < nnod; j++)
    {
      Node* nod_pt = mesh_pt()->node_pt(j);
 
      // loop over ALL eqn numbers - variable number of values
      unsigned nval = nod_pt->nvalue();
 
      for (unsigned ival = 0; ival < nval; ival++)
      {
        int old_eqn_number = nod_pt->eqn_number(ival);
        // Include all eqn numbers
        if (old_eqn_number >= 0)
        {
          // Bump up eqn number
          int new_eqn_number = old_eqn_number + my_eqn_num_base;
          nod_pt->eqn_number(ival) = new_eqn_number;
        }
      }
 
      // Is this a solid node? If so, need to bump up its equation number(s)
      SolidNode* solid_nod_pt = dynamic_cast<SolidNode*>(nod_pt);
 
      if (solid_nod_pt != 0)
      {
        // Find equation numbers
        unsigned nval = solid_nod_pt->variable_position_pt()->nvalue();
        for (unsigned ival = 0; ival < nval; ival++)
        {
          int old_eqn_number =
            solid_nod_pt->variable_position_pt()->eqn_number(ival);
          // include all eqn numbers
 
          if (old_eqn_number >= 0)
          {
            // Bump up eqn number
            int new_eqn_number = old_eqn_number + my_eqn_num_base;
            solid_nod_pt->variable_position_pt()->eqn_number(ival) =
              new_eqn_number;
          }
        }
      }
    }
 
    if (Global_timings::Doc_comprehensive_timings)
    {
      t_end = TimingHelpers::timer();
      oomph_info << "Time for bumping: " << t_end - t_start << std::endl;
      t_start = TimingHelpers::timer();
    }
 
 
    // Now copy the haloed eqn numbers across
    // This has to include the internal data equation numbers as well
    // as the solid node equation numbers
    bool do_halos = true;
    bool do_external_halos = false;
    copy_haloed_eqn_numbers_helper(do_halos, do_external_halos);
 
    if (Global_timings::Doc_comprehensive_timings)
    {
      t_end = TimingHelpers::timer();
      oomph_info << "Time for copy_haloed_eqn_numbers_helper for halos: "
                 << t_end - t_start << std::endl;
      t_start = TimingHelpers::timer();
    }
 
    // Now do external halo stuff
    do_halos = false;
    do_external_halos = true;
    copy_haloed_eqn_numbers_helper(do_halos, do_external_halos);
 
    if (Global_timings::Doc_comprehensive_timings)
    {
      t_end = TimingHelpers::timer();
      oomph_info
        << "Time for copy_haloed_eqn_numbers_helper for external halos: "
        << t_end - t_start << std::endl;
      t_start = TimingHelpers::timer();
    }
 
    // Now the global equation numbers have been updated.
    //---------------------------------------------------
    // Setup the local equation numbers again.
    //----------------------------------------
    if (assign_local_eqn_numbers)
    {
      // Loop over the submeshes: Note we need to call the submeshes' own
      // assign_*_eqn_number() otherwise we miss additional functionality
      // that is implemented (e.g.) in SolidMeshes!
      unsigned n_sub_mesh = nsub_mesh();
      if (n_sub_mesh == 0)
      {
        mesh_pt()->assign_local_eqn_numbers(Store_local_dof_pt_in_elements);
      }
      else
      {
        for (unsigned i = 0; i < n_sub_mesh; i++)
        {
          mesh_pt(i)->assign_local_eqn_numbers(Store_local_dof_pt_in_elements);
        }
      }
    }
 
    if (Global_timings::Doc_comprehensive_timings)
    {
      t_end = TimingHelpers::timer();
      oomph_info << "Time for assign_local_eqn_numbers in sync: "
                 << t_end - t_start << std::endl;
      t_start = TimingHelpers::timer();
    }
 
    // wait for the sends to complete
    if (n_send > 0)
    {
      Vector<MPI_Status> send_status(n_send);
      MPI_Waitall(n_send, &send_req[0], &send_status[0]);
    }
 
    if (Global_timings::Doc_comprehensive_timings)
    {
      t_end = TimingHelpers::timer();
      oomph_info << "Time for waitall: " << t_end - t_start << std::endl;
      t_start = TimingHelpers::timer();
    }
 
    // build the Dof distribution pt
    Dof_distribution_pt->build(Communicator_pt, my_eqn_num_base, my_n_eqn);
 
    // and return the total number of equations in the problem
    return (long)Dof_distribution_pt->nrow();
  }
 
 
  //=======================================================================
  /// A private helper function to
  /// copy the haloed equation numbers into the halo equation numbers,
  /// either for the problem's one and only mesh or for all of its
  /// submeshes. Bools control if we deal with data associated with
  /// external halo/ed elements/nodes or the "normal" halo/ed ones.
  //===================================================================
  void Problem::copy_haloed_eqn_numbers_helper(const bool& do_halos,
                                               const bool& do_external_halos)
  {
    // Do we have submeshes?
    unsigned n_mesh_loop = 1;
    unsigned nmesh = nsub_mesh();
    if (nmesh > 0)
    {
      n_mesh_loop = nmesh;
    }
 
    // Storage for number of processors and current processor
    int n_proc = this->communicator_pt()->nproc();
 
    // If only one processor then return
    if (n_proc == 1)
    {
      return;
    }
    int my_rank = this->communicator_pt()->my_rank();
 
    // Storage for number of data to be sent to each processor
    Vector<int> send_n(n_proc, 0);
    // Storage for all equation numbers to be sent to all processors
    Vector<long> send_data;
    // Start location within send_data for data to be sent to each processor
    Vector<int> send_displacement(n_proc, 0);
 
 
    // Loop over all processors whose eqn numbers are to be updated
    for (int rank = 0; rank < n_proc; rank++)
    {
      // Set the displacement of the current processor in the loop
      send_displacement[rank] = send_data.size();
 
      // If I'm not the processor whose halo eqn numbers are updated,
      // some of my nodes may be haloed: Stick their
      // eqn numbers into the vector
      if (rank != my_rank)
      {
        // Deal with sub-meshes one-by-one if required
        Mesh* my_mesh_pt = 0;
 
        // Loop over submeshes
        for (unsigned imesh = 0; imesh < n_mesh_loop; imesh++)
        {
          if (nmesh == 0)
          {
            my_mesh_pt = mesh_pt();
          }
          else
          {
            my_mesh_pt = mesh_pt(imesh);
          }
 
          if (do_halos)
          {
            // Add equation numbers for each haloed node
            unsigned n_nod = my_mesh_pt->nhaloed_node(rank);
            for (unsigned n = 0; n < n_nod; n++)
            {
              my_mesh_pt->haloed_node_pt(rank, n)->add_eqn_numbers_to_vector(
                send_data);
            }
 
            // Add the equation numbers associated with internal data
            // in the haloed elements
            Vector<GeneralisedElement*> haloed_elem_pt =
              my_mesh_pt->haloed_element_pt(rank);
            unsigned nelem_haloed = haloed_elem_pt.size();
            for (unsigned e = 0; e < nelem_haloed; e++)
            {
              haloed_elem_pt[e]->add_internal_eqn_numbers_to_vector(send_data);
            }
          }
 
          if (do_external_halos)
          {
            // Add equation numbers associated with external haloed nodes
            unsigned n_ext_nod = my_mesh_pt->nexternal_haloed_node(rank);
            for (unsigned n = 0; n < n_ext_nod; n++)
            {
              my_mesh_pt->external_haloed_node_pt(rank, n)
                ->add_eqn_numbers_to_vector(send_data);
            }
 
            // Add the equation numbers associated with internal data in
            // each external haloed element
            unsigned next_elem_haloed =
              my_mesh_pt->nexternal_haloed_element(rank);
            for (unsigned e = 0; e < next_elem_haloed; e++)
            {
              // how many internal data values for this element?
              my_mesh_pt->external_haloed_element_pt(rank, e)
                ->add_internal_eqn_numbers_to_vector(send_data);
            }
          }
 
        } // end of loop over meshes
      }
 
      // Find the number of data added to the vector by this processor
      send_n[rank] = send_data.size() - send_displacement[rank];
    }
 
    // Storage for the number of data to be received from each processor
    Vector<int> receive_n(n_proc, 0);
 
    // Communicate all numbers of data to be sent between all processors
    MPI_Alltoall(&send_n[0],
                 1,
                 MPI_INT,
                 &receive_n[0],
                 1,
                 MPI_INT,
                 this->communicator_pt()->mpi_comm());
 
    // We now prepare the data to be received
    // by working out the displacements from the received data
    Vector<int> receive_displacement(n_proc, 0);
    int receive_data_count = 0;
    for (int rank = 0; rank < n_proc; ++rank)
    {
      // Displacement is number of data received so far
      receive_displacement[rank] = receive_data_count;
      receive_data_count += receive_n[rank];
    }
 
    // Now resize the receive buffer
    // Make sure that it has a size of at least one
    if (receive_data_count == 0)
    {
      ++receive_data_count;
    }
    Vector<long> receive_data(receive_data_count);
 
    // Make sure that the send buffer has size at least one
    // so that we don't get a segmentation fault
    if (send_data.size() == 0)
    {
      send_data.resize(1);
    }
 
    // Now send the data between all the processors
    MPI_Alltoallv(&send_data[0],
                  &send_n[0],
                  &send_displacement[0],
                  MPI_LONG,
                  &receive_data[0],
                  &receive_n[0],
                  &receive_displacement[0],
                  MPI_LONG,
                  this->communicator_pt()->mpi_comm());
 
 
    // Loop over all other processors to receive their
    // eqn numbers
    for (int send_rank = 0; send_rank < n_proc; send_rank++)
    {
      // Don't do anything for the processor corresponding to the
      // current processor or if no data were received from this processor
      if ((send_rank != my_rank) && (receive_n[send_rank] != 0))
      {
        // Counter for the data within the large array
        unsigned count = receive_displacement[send_rank];
 
        // Deal with sub-meshes one-by-one if required
        Mesh* my_mesh_pt = 0;
 
        // Loop over submeshes
        for (unsigned imesh = 0; imesh < n_mesh_loop; imesh++)
        {
          if (nmesh == 0)
          {
            my_mesh_pt = mesh_pt();
          }
          else
          {
            my_mesh_pt = mesh_pt(imesh);
          }
 
          if (do_halos)
          {
            // How many of my nodes are halos whose non-halo counter
            // parts live on processor send_rank?
            unsigned n_nod = my_mesh_pt->nhalo_node(send_rank);
            for (unsigned n = 0; n < n_nod; n++)
            {
              // Generalise to variable number of values per node
              my_mesh_pt->halo_node_pt(send_rank, n)
                ->read_eqn_numbers_from_vector(receive_data, count);
            }
 
            // Get number of halo elements whose non-halo is on
            // process send_rank
            Vector<GeneralisedElement*> halo_elem_pt =
              my_mesh_pt->halo_element_pt(send_rank);
            unsigned nelem_halo = halo_elem_pt.size();
            for (unsigned e = 0; e < nelem_halo; e++)
            {
              halo_elem_pt[e]->read_internal_eqn_numbers_from_vector(
                receive_data, count);
            }
          }
 
          if (do_external_halos)
          {
            // How many of my nodes are external halos whose external non-halo
            // counterparts live on processor send_rank?
            unsigned n_ext_nod = my_mesh_pt->nexternal_halo_node(send_rank);
            for (unsigned n = 0; n < n_ext_nod; n++)
            {
              my_mesh_pt->external_halo_node_pt(send_rank, n)
                ->read_eqn_numbers_from_vector(receive_data, count);
            }
 
            // Get number of external halo elements whose external haloed
            // counterpart is on process send_rank
            unsigned next_elem_halo =
              my_mesh_pt->nexternal_halo_element(send_rank);
            for (unsigned e = 0; e < next_elem_halo; e++)
            {
              my_mesh_pt->external_halo_element_pt(send_rank, e)
                ->read_internal_eqn_numbers_from_vector(receive_data, count);
            }
          }
 
        } // end of loop over meshes
      }
    } // End of loop over processors
  }
 
  //==========================================================================
  /// Balance the load of a (possibly non-uniformly refined) problem that has
  /// already been distributed, by re-distributing elements over the processors.
  /// Produce explicit stats of load balancing process if boolean, report_stats,
  /// is set to true and doc various bits of data (mainly for debugging)
  /// in directory specified by DocInfo object.
  //==========================================================================
  void Problem::load_balance(
    DocInfo& doc_info,
    const bool& report_stats,
    const Vector<unsigned>& input_target_domain_for_local_non_halo_element)
  {
    double start_t = TimingHelpers::timer();
 
    // Number of processes
    const unsigned n_proc = this->communicator_pt()->nproc();
 
    // Don't do anything if this is a single-process job
    if (n_proc == 1)
    {
      if (report_stats)
      {
        std::ostringstream warn_message;
        warn_message << "WARNING: You've tried to load balance a problem over\n"
                     << "only one processor: ignoring your request.\n";
        OomphLibWarning(warn_message.str(),
                        "Problem::load_balance()",
                        OOMPH_EXCEPTION_LOCATION);
      }
    }
    // Multiple processors
    else
    {
      // This will only work if the problem has already been distributed
      if (!Problem_has_been_distributed)
      {
        // Throw an error
        std::ostringstream error_stream;
        error_stream << "You have called Problem::load_balance()\n"
                     << "on a non-distributed problem. This doesn't\n"
                     << "make sense -- go distribute your problem first."
                     << std::endl;
        throw OomphLibError(
          error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
      }
 
      // Timings
      double t_start = 0.0;
      double t_metis = 0.0;
      double t_partition = 0.0;
      double t_distribute = 0.0;
      double t_refine = 0.0;
      double t_copy_solution = 0.0;
 
      if (report_stats)
      {
        t_start = TimingHelpers::timer();
      }
 
 
#ifdef PARANOID
      unsigned old_ndof = ndof();
#endif
 
      // Store pointers to the old mesh(es) so we retain a handle
      //---------------------------------------------------------
      // to them for deletion
      //---------------------
      Vector<Mesh*> old_mesh_pt;
      unsigned n_mesh = nsub_mesh();
      if (n_mesh == 0)
      {
        // Resize the container
        old_mesh_pt.resize(1);
        old_mesh_pt[0] = mesh_pt();
      }
      else
      {
        // Resize the container
        old_mesh_pt.resize(n_mesh);
        for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
        {
          old_mesh_pt[i_mesh] = mesh_pt(i_mesh);
        }
      }
 
 
      // Partition the global mesh in its current state
      //-----------------------------------------------
 
      // target_domain_for_local_non_halo_element[e] contains the number
      // of the domain [0,1,...,nproc-1] to which non-halo element e on THE
      // CURRENT PROCESSOR ONLY has been assigned. The order of the non-halo
      // elements is the same as in the Problem's mesh, with the halo
      // elements being skipped.
      Vector<unsigned> target_domain_for_local_non_halo_element;
 
      // Do any of the processors want to go through externally imposed
      // partitioning? If so, we'd better do it here too (even if the processor
      // is empty, e.g. following a restart on a larger number of procs) or
      // we hang.
      unsigned local_ntarget =
        input_target_domain_for_local_non_halo_element.size();
      unsigned global_ntarget = 0;
      MPI_Allreduce(&local_ntarget,
                    &global_ntarget,
                    1,
                    MPI_UNSIGNED,
                    MPI_MAX,
                    Communicator_pt->mpi_comm());
 
      // External prescribed partitioning
      if (global_ntarget > 0)
      {
        target_domain_for_local_non_halo_element =
          input_target_domain_for_local_non_halo_element;
      }
      else
      {
        // Metis does not always produce repeatable results which is
        // a disaster for validation runs -- this bypasses metis and
        // comes up with a stupid but repeatable partioning.
        if (Use_default_partition_in_load_balance)
        {
          // Bypass METIS to perform the partitioning
          unsigned objective = 0;
          bool bypass_metis = true;
          METIS::partition_distributed_mesh(
            this,
            objective,
            target_domain_for_local_non_halo_element,
            bypass_metis);
        }
        else
        {
          // Use METIS to perform the partitioning
          unsigned objective = 0;
          METIS::partition_distributed_mesh(
            this, objective, target_domain_for_local_non_halo_element);
        }
      }
 
      if (report_stats)
      {
        t_metis = TimingHelpers::timer();
      }
 
      // Setup map linking element with target domain
      std::map<GeneralisedElement*, unsigned>
        target_domain_for_local_non_halo_element_map;
      unsigned n_elem = mesh_pt()->nelement();
      unsigned count_non_halo_el = 0;
      for (unsigned e = 0; e < n_elem; e++)
      {
        GeneralisedElement* el_pt = mesh_pt()->element_pt(e);
        if (!el_pt->is_halo())
        {
          target_domain_for_local_non_halo_element_map[el_pt] =
            target_domain_for_local_non_halo_element[count_non_halo_el];
          count_non_halo_el++;
        }
      }
 
      // Load balancing is equivalent to distribution so call the
      // appropriate "actions before". NOTE: This acts on the
      // current, refined, distributed, etc. problem object
      // before it's being wiped. This step is therefore not
      // a duplicate of the call below, which acts on the
      // new, not-yet refined, distributed etc. problem!
      actions_before_distribute();
 
      // Re-setup target domains for remaining elements (FaceElements
      // are likely to have been stripped out in actions_before_distribute()
      n_elem = mesh_pt()->nelement();
      target_domain_for_local_non_halo_element.clear();
      target_domain_for_local_non_halo_element.reserve(n_elem);
      count_non_halo_el = 0;
      for (unsigned e = 0; e < n_elem; e++)
      {
        GeneralisedElement* el_pt = mesh_pt()->element_pt(e);
        if (!el_pt->is_halo())
        {
          target_domain_for_local_non_halo_element.push_back(
            target_domain_for_local_non_halo_element_map[el_pt]);
        }
      } // for (e < n_elem)
 
      // Re-setup the number of sub-meshes since some of them may have
      // been stripped out in actions_before_distribute(), but save the
      // number of old sub-meshes
      const unsigned n_old_sub_meshes = n_mesh;
      n_mesh = nsub_mesh();
 
      // Now get the target domains for each of the submeshes, we only
      // get the target domains for the nonhalo elements
      Vector<Vector<unsigned>> target_domain_for_local_non_halo_element_submesh(
        n_mesh);
      // If we have no sub-meshes then we do not need to copy the target areas
      // of the submeshes
      if (n_mesh != 0)
      {
        // Counter to copy the target domains from the global vector
        unsigned count_td = 0;
        for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
        {
          // Get the number of elements (considering halo elements)
          const unsigned nsub_ele = mesh_pt(i_mesh)->nelement();
          // Now copy that number of data from the global target domains
          for (unsigned i = 0; i < nsub_ele; i++)
          {
            // Get the element
            GeneralisedElement* ele_pt = mesh_pt(i_mesh)->element_pt(i);
            // ... and check if it is a nonhalo element
            if (!ele_pt->is_halo())
            {
              // Get the target domain for the current element
              const unsigned target_domain =
                target_domain_for_local_non_halo_element[count_td++];
              // Add the target domain for the nonhalo element in the
              // submesh
              target_domain_for_local_non_halo_element_submesh[i_mesh]
                .push_back(target_domain);
            } // if (!ele_pt->is_halo())
          } // for (i < nsub_ele)
        } // for (imesh < n_mesh)
 
#ifdef PARANOID
        // Check that the total number of copied data be the same as the
        // total number of nonhalo elements in the (sub)-mesh(es)
        const unsigned ntarget_domain =
          target_domain_for_local_non_halo_element.size();
        if (count_td != ntarget_domain)
        {
          std::ostringstream error_stream;
          error_stream
            << "The number of nonhalo elements (" << count_td
            << ") found in (all)\n"
            << "the (sub)-mesh(es) is different from the number of target "
               "domains\n"
            << "(" << ntarget_domain << ") for the nonhalo elements.\n"
            << "Please ensure that you called the rebuild_global_mesh() method "
            << "after the\npossible deletion of FaceElements in "
            << "actions_before_distribute()!!!\n\n";
          throw OomphLibError(error_stream.str(),
                              "Problem::load_balance()",
                              OOMPH_EXCEPTION_LOCATION);
        } // if (count_td != ntarget_domain)
#endif
 
      } // if (n_mesh != 0)
 
      // Check if we have different type of submeshes (unstructured
      // and/or structured). Identify to which type each submesh belongs.
      // If we have only one mesh then identify to which type that mesh
      // belongs.
 
      // The load balancing strategy acts in the structured meshes and
      // then acts in the unstructured meshes
 
      // Vector to temporaly store pointers to unstructured meshes
      // (TriangleMeshBase)
      Vector<TriangleMeshBase*> unstructured_mesh_pt;
      std::vector<bool> is_unstructured_mesh;
 
      // Flag to indicate that there are unstructured meshes as part of
      // the problem
      bool are_there_unstructured_meshes = false;
 
      // We have only one mesh
      if (n_mesh == 0)
      {
        // Check if it is a TriangleMeshBase mesh
        if (TriangleMeshBase* tri_mesh_pt =
              dynamic_cast<TriangleMeshBase*>(old_mesh_pt[0]))
        {
          // Add the pointer to the unstructured meshes container
          unstructured_mesh_pt.push_back(tri_mesh_pt);
          // Indicate that it is an unstructured mesh
          is_unstructured_mesh.push_back(true);
          // Indicate that there are unstructured meshes as part of the
          // problem
          are_there_unstructured_meshes = true;
        }
        else
        {
          // Add the pointer to the unstructured meshes container (null
          // pointer)
          unstructured_mesh_pt.push_back(tri_mesh_pt);
          // Indicate that it is not an unstructured mesh
          is_unstructured_mesh.push_back(false);
        }
      } // if (n_mesh == 0)
      else // We have sub-meshes
      {
        // Check which sub-meshes are unstructured meshes (work with the
        // old sub-meshes number)
        for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
        {
          // Is it a TriangleMeshBase mesh
          if (TriangleMeshBase* tri_mesh_pt =
                dynamic_cast<TriangleMeshBase*>(old_mesh_pt[i_mesh]))
          {
            // Add the pointer to the unstructured meshes container
            unstructured_mesh_pt.push_back(tri_mesh_pt);
            // Indicate that it is an unstructured mesh
            is_unstructured_mesh.push_back(true);
            // Indicate that there are unstructured meshes as part of the
            // problem
            are_there_unstructured_meshes = true;
          }
          else
          {
            // Add the pointer to the unstructured meshes container (null
            // pointer)
            unstructured_mesh_pt.push_back(tri_mesh_pt);
            // Indicate that it is not an unstructured mesh
            is_unstructured_mesh.push_back(false);
          }
        } // for (i_mesh < n_mesh)
      } // else if (n_mesh == 0) // We have sub-meshes
 
      // Extract data to be sent to various processors after the
      //--------------------------------------------------------
      // problem has been rebuilt/re-distributed
      //----------------------------------------
 
      // Storage for number of data to be sent to each processor
      Vector<int> send_n(n_proc, 0);
 
      // Storage for all values to be sent to all processors
      Vector<double> send_data;
 
      // Start location within send_data for data to be sent to each processor
      Vector<int> send_displacement(n_proc, 0);
 
      // Old and new domains for each base element (available for all, for
      // convenience)
      Vector<unsigned> old_domain_for_base_element;
      Vector<unsigned> new_domain_for_base_element;
 
      // Flat-packed refinement info, labeled by id of locally
      // available root elements
      std::map<unsigned, Vector<unsigned>> flat_packed_refinement_info_for_root;
 
      // Max. level of refinement
      unsigned max_refinement_level_overall = 0;
 
      // Prepare the input for the get_data...() method, only copy the
      // data from the structured meshes, TreeBaseMesh meshes
      Vector<unsigned>
        target_domain_for_local_non_halo_element_in_structured_mesh;
      if (n_mesh == 0)
      {
        // Check if the mesh is an structured mesh
        if (!is_unstructured_mesh[0])
        {
          const unsigned nele_mesh =
            target_domain_for_local_non_halo_element.size();
          for (unsigned e = 0; e < nele_mesh; e++)
          {
            const unsigned target_domain =
              target_domain_for_local_non_halo_element[e];
            target_domain_for_local_non_halo_element_in_structured_mesh
              .push_back(target_domain);
          } // for (e < nele_mesh)
        } // if (!is_unstructured_mesh[0])
      } // if (n_mesh == 0)
      else
      {
        // Copy the target domains from the structured meshes only
        for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
        {
          // Check if the mesh is an structured mesh
          if (!is_unstructured_mesh[i_mesh])
          {
            const unsigned nele_sub_mesh =
              target_domain_for_local_non_halo_element_submesh[i_mesh].size();
            for (unsigned e = 0; e < nele_sub_mesh; e++)
            {
              const unsigned target_domain =
                target_domain_for_local_non_halo_element_submesh[i_mesh][e];
              target_domain_for_local_non_halo_element_in_structured_mesh
                .push_back(target_domain);
            } // for (e < nele_sub_mesh)
          } // if (!is_triangle_mesh_base[i_mesh])
        } // for (i_mesh < n_mesh)
      } // else if (n_mesh == 0)
 
      // Extract data from current problem
      // sorted into data to be sent to various processors after
      // rebuilding the meshes in a load-balanced form
      get_data_to_be_sent_during_load_balancing(
        target_domain_for_local_non_halo_element_in_structured_mesh,
        send_n,
        send_data,
        send_displacement,
        old_domain_for_base_element,
        new_domain_for_base_element,
        max_refinement_level_overall);
 
      // Extract flat-packed refinement pattern
      get_flat_packed_refinement_pattern_for_load_balancing(
        old_domain_for_base_element,
        new_domain_for_base_element,
        max_refinement_level_overall,
        flat_packed_refinement_info_for_root);
 
      if (report_stats)
      {
        t_partition = TimingHelpers::timer();
        oomph_info << "CPU for partition calculation for roots: "
                   << t_partition - t_metis << std::endl;
      }
 
 
      // Flush and delete old submeshes and null the global mesh
      //--------------------------------------------------------
      // and rebuild the new (not yet distributed, refined etc.) mesh
      //-------------------------------------------------------------
      // that will be distributed in the new, improved way determined
      //-------------------------------------------------------------
      // by METIS
      //---------
      Vector<unsigned> pruned_refinement_level(
        std::max(int(n_old_sub_meshes), 1));
      if (n_mesh == 0)
      {
        pruned_refinement_level[0] = 0;
        TreeBasedRefineableMeshBase* ref_mesh_pt =
          dynamic_cast<TreeBasedRefineableMeshBase*>(old_mesh_pt[0]);
        if (ref_mesh_pt != 0)
        {
          pruned_refinement_level[0] =
            ref_mesh_pt->uniform_refinement_level_when_pruned();
        }
 
        // If the mesh is an unstructured mesh (TriangleMeshBase mesh)
        // then we should not delete it since the load balance strategy
        // requires the mesh
 
        // Delete the mesh if it is not an unstructured mesh
        if (!is_unstructured_mesh[0])
        {
          delete old_mesh_pt[0];
          old_mesh_pt[0] = 0;
        } // if (!is_unstructured_mesh[0])
      } // if (n_mesh==0)
      else
      {
        // Loop over the number of old meshes (required to delete the
        // pointers of structured meshes in the old_mesh_pt structure)
        for (unsigned i_mesh = 0; i_mesh < n_old_sub_meshes; i_mesh++)
        {
          pruned_refinement_level[i_mesh] = 0;
          TreeBasedRefineableMeshBase* ref_mesh_pt =
            dynamic_cast<TreeBasedRefineableMeshBase*>(old_mesh_pt[i_mesh]);
          if (ref_mesh_pt != 0)
          {
            pruned_refinement_level[i_mesh] =
              ref_mesh_pt->uniform_refinement_level_when_pruned();
          }
 
          // If the mesh is an unstructured mesh (TriangleMeshBase mesh)
          // then we should NOT delete it since the load balance strategy
          // requires the mesh
 
          // Delete the mesh if it is not an unstructured mesh
          if (!is_unstructured_mesh[i_mesh])
          {
            delete old_mesh_pt[i_mesh];
            old_mesh_pt[i_mesh] = 0;
          } // if (!is_unstructured_mesh[i_mesh])
 
        } // for (i_mesh<n_mesh)
 
        // Empty storage for sub-meshes
        flush_sub_meshes();
 
        // Flush the storage for nodes and elements in compound mesh
        // (they've already been deleted in the sub-meshes)
        mesh_pt()->flush_element_and_node_storage();
 
        // Kill
        delete mesh_pt();
        mesh_pt() = 0;
      } // else if (n_mesh==0)
 
      bool some_mesh_has_been_pruned = false;
      unsigned n = pruned_refinement_level.size();
      for (unsigned i = 0; i < n; i++)
      {
        if (pruned_refinement_level[i] > 0) some_mesh_has_been_pruned = true;
      }
 
      // (Re-)build the new mesh(es) -- this must get the problem into the
      // state it was in when it was first distributed!
      build_mesh();
 
      // Has one of the meshes been pruned; if so refine to the
      // common refinement level
      if (some_mesh_has_been_pruned)
      {
        // Do actions before adapt
        actions_before_adapt();
 
        // Re-assign number of submeshes -- when this was first
        // set, the problem may have had face meshes that have now
        // disappeared.
        n_mesh = nsub_mesh();
 
        // Now adapt meshes manually
        if (n_mesh == 0)
        {
          TreeBasedRefineableMeshBase* ref_mesh_pt =
            dynamic_cast<TreeBasedRefineableMeshBase*>(mesh_pt());
          if (ref_mesh_pt != 0)
          {
            // Get min and max refinement level
            unsigned local_min_ref = 0;
            unsigned local_max_ref = 0;
            ref_mesh_pt->get_refinement_levels(local_min_ref, local_max_ref);
 
            // Reconcile between processors: If (e.g. following
            // distribution/pruning) the mesh has no elements on this
            // processor) then ignore its contribution to the poll of
            // max/min refinement levels
            int int_local_min_ref = local_min_ref;
            if (ref_mesh_pt->nelement() == 0)
            {
              int_local_min_ref = INT_MAX;
            }
            int int_min_ref = 0;
            MPI_Allreduce(&int_local_min_ref,
                          &int_min_ref,
                          1,
                          MPI_INT,
                          MPI_MIN,
                          Communicator_pt->mpi_comm());
 
            // Overall min refinement level over all meshes
            unsigned min_ref = unsigned(int_min_ref);
 
            // Refine as many times as required to get refinement up to
            // uniform refinement level after last prune
            unsigned nref = pruned_refinement_level[0] - min_ref;
            oomph_info << "Refining one-and-only mesh uniformly " << nref
                       << " times\n";
            for (unsigned i = 0; i < nref; i++)
            {
              ref_mesh_pt->refine_uniformly();
            }
          }
        }
        else
        {
          for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
          {
            TreeBasedRefineableMeshBase* ref_mesh_pt =
              dynamic_cast<TreeBasedRefineableMeshBase*>(mesh_pt(i_mesh));
            if (ref_mesh_pt != 0)
            {
              // Get min and max refinement level
              unsigned local_min_ref = 0;
              unsigned local_max_ref = 0;
              ref_mesh_pt->get_refinement_levels(local_min_ref, local_max_ref);
 
              // Reconcile between processors: If (e.g. following
              // distribution/pruning) the mesh has no elements on this
              // processor) then ignore its contribution to the poll of
              // max/min refinement levels
              int int_local_min_ref = local_min_ref;
              if (ref_mesh_pt->nelement() == 0)
              {
                int_local_min_ref = INT_MAX;
              }
              int int_min_ref = 0;
              MPI_Allreduce(&int_local_min_ref,
                            &int_min_ref,
                            1,
                            MPI_INT,
                            MPI_MIN,
                            Communicator_pt->mpi_comm());
 
              // Overall min refinement level over all meshes
              unsigned min_ref = unsigned(int_min_ref);
 
              // Refine as many times as required to get refinement up to
              // uniform refinement level after last prune
              unsigned nref = pruned_refinement_level[i_mesh] - min_ref;
              oomph_info << "Refining sub-mesh " << i_mesh << " uniformly "
                         << nref << " times\n";
              for (unsigned i = 0; i < nref; i++)
              {
                ref_mesh_pt->refine_uniformly();
              }
            }
          }
          // Rebuild the global mesh
          rebuild_global_mesh();
        }
 
        // Do actions after adapt
        actions_after_adapt();
 
        // Re-assign number of submeshes -- when this was first
        // set, the problem may have had face meshes that have now
        // disappeared.
        n_mesh = nsub_mesh();
      } // if (some_mesh_has_been_pruned)
 
 
      // Perform any actions before distribution but now for the new mesh
      // NOTE: This does NOT replicate the actions_before_distribute()
      // call made above for the previous mesh!
      actions_before_distribute();
 
      // Do some book-keeping
      //---------------------
 
      // Re-assign number of submeshes -- when this was first
      // set, the problem may have had face meshes that have now
      // disappeared.
      n_mesh = nsub_mesh();
 
      // The submeshes, if they exist, need to know their own element
      // domains.
      // NOTE: This vector only stores the target domains or the
      // element partition for structured meshes
      Vector<Vector<unsigned>> submesh_element_partition(n_mesh);
      if (n_mesh != 0)
      {
        unsigned count = 0;
        for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
        {
          // Only work with structured meshes
          if (!is_unstructured_mesh[i_mesh])
          {
            // Get the number of element in the mesh
            const unsigned nsub_ele = mesh_pt(i_mesh)->nelement();
            submesh_element_partition[i_mesh].resize(nsub_ele);
            for (unsigned e = 0; e < nsub_ele; e++)
            {
              submesh_element_partition[i_mesh][e] =
                new_domain_for_base_element[count++];
            } // for (e<nsub_elem)
          } // if (sub_mesh_pt!=0)
        } // for (i_mesh<n_mesh)
 
#ifdef PARANOID
        const unsigned nnew_domain_for_base_element =
          new_domain_for_base_element.size();
        if (count != nnew_domain_for_base_element)
        {
          std::ostringstream error_stream;
          error_stream
            << "The number of READ target domains for nonhalo elements\n"
            << " is (" << count << "), but the number of target domains for\n"
            << "nonhalo elements is (" << nnew_domain_for_base_element
            << ")!\n";
          throw OomphLibError(error_stream.str(),
                              "Problem::load_balance()",
                              OOMPH_EXCEPTION_LOCATION);
        }
#endif
 
      } // if (n_mesh!=0)
 
      // Setup the map between "root" element and number in global mesh
      // again
 
      // This map is only established for structured meshes, then we
      // need to check here the type of mesh
      if (n_mesh == 0)
      {
        // Check if the only one mesh is an stuctured mesh
        if (!is_unstructured_mesh[0])
        {
          const unsigned n_ele = mesh_pt()->nelement();
          Base_mesh_element_pt.resize(n_ele);
          Base_mesh_element_number_plus_one.clear();
          for (unsigned e = 0; e < n_ele; e++)
          {
            GeneralisedElement* el_pt = mesh_pt()->element_pt(e);
            Base_mesh_element_number_plus_one[el_pt] = e + 1;
            Base_mesh_element_pt[e] = el_pt;
          } // for (e<n_ele)
        } // if (!is_triangle_mesh_base[0])
      } // if (n_mesh==0)
      else
      {
        // If we have submeshes then we only add those elements that
        // belong to structured meshes, but first compute the number of
        // total elements in the structured sub-meshes
        unsigned nglobal_element = 0;
        for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
        {
          // Check if mesh is an structured mesh
          if (!is_unstructured_mesh[i_mesh])
          {
            nglobal_element += mesh_pt(i_mesh)->nelement();
          } // if (!is_triangle_mesh_base[i_mesh])
        } // for (i_mesh<n_mesh)
 
        // Once computed the number of elements, then resize the
        // structure
        Base_mesh_element_pt.resize(nglobal_element);
        Base_mesh_element_number_plus_one.clear();
        unsigned counter_base = 0;
        for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
        {
          // Check if mesh is a structured mesh
          if (!is_unstructured_mesh[i_mesh])
          {
            const unsigned n_ele = mesh_pt(i_mesh)->nelement();
            for (unsigned e = 0; e < n_ele; e++)
            {
              GeneralisedElement* el_pt = mesh_pt(i_mesh)->element_pt(e);
              Base_mesh_element_number_plus_one[el_pt] = counter_base + 1;
              Base_mesh_element_pt[counter_base] = el_pt;
              // Inrease the global element number
              counter_base++;
            } // for (e<n_ele)
          } // if (!is_triangle_mesh_base[i_mesh])
        } // for (i_mesh<n_mesh)
 
#ifdef PARANOID
        if (counter_base != nglobal_element)
        {
          std::ostringstream error_stream;
          error_stream << "The number of global elements (" << nglobal_element
                       << ") is not the same as the number of\nadded elements ("
                       << counter_base << ") to the Base_mesh_element_pt data "
                       << "structure!!!\n\n";
          throw OomphLibError(error_stream.str(),
                              "Problem::load_balance()",
                              OOMPH_EXCEPTION_LOCATION);
        } // if (counter_base != nglobal_element)
#endif // #ifdef PARANOID
 
      } // else if (n_mesh==0)
 
      // Storage for the number of face elements in the base mesh --
      // element is identified by number of bulk element and face index
      // so we can reconstruct it if and when the FaceElements have been wiped
      // in actions_before_distribute().
      // NOTE: Not really clear (any more) why this is required. Typically
      //       FaceElements get wiped in actions_before_distribute() so
      //       at this point there shouldn't be any of them left.
      //       This is certainly the case in all our currently existing
      //       test codes. However, I'm too scared to take this out
      //       in case it does matter (we're not insisting that FaceElements
      //       are always removed in actions_before_distribute()...).
      std::map<unsigned, std::map<int, unsigned>> face_element_number;
      unsigned n_element = mesh_pt()->nelement();
      for (unsigned e = 0; e < n_element; e++)
      {
        FaceElement* face_el_pt =
          dynamic_cast<FaceElement*>(mesh_pt()->finite_element_pt(e));
        if (face_el_pt != 0)
        {
#ifdef PARANOID
          std::stringstream info;
          info << "================================================\n";
          info << "INFO: I've come across a FaceElement while \n";
          info << "       load-balancing a problem. \n";
          info << "================================================\n";
          oomph_info << info.str() << std::endl;
#endif
          FiniteElement* bulk_elem_pt = face_el_pt->bulk_element_pt();
          unsigned e_bulk = Base_mesh_element_number_plus_one[bulk_elem_pt];
#ifdef PARANOID
          if (e_bulk == 0)
          {
            throw OomphLibError("Base_mesh_element_number_plus_one[...]=0",
                                OOMPH_CURRENT_FUNCTION,
                                OOMPH_EXCEPTION_LOCATION);
          }
#endif
          e_bulk -= 1;
          int face_index = face_el_pt->face_index();
          face_element_number[e_bulk][face_index] = e;
        }
      }
 
      // Distribute the (sub)meshes
      //---------------------------
      Vector<GeneralisedElement*> deleted_element_pt;
      if (n_mesh == 0)
      {
        // Only distribute (load balance strategy) if this is an
        // structured mesh
        if (!is_unstructured_mesh[0])
        {
#ifdef PARANOID
          if (mesh_pt()->nelement() != new_domain_for_base_element.size())
          {
            std::ostringstream error_stream;
            error_stream << "Distributing one-and-only mesh containing "
                         << mesh_pt()->nelement() << " elements with info for "
                         << new_domain_for_base_element.size() << std::endl;
            throw OomphLibError(error_stream.str(),
                                OOMPH_CURRENT_FUNCTION,
                                OOMPH_EXCEPTION_LOCATION);
          }
#endif
 
          if (report_stats)
          {
            oomph_info << "Distributing one and only mesh\n"
                       << "------------------------------" << std::endl;
          }
 
          // No pre-set distribution from restart that may leave some
          // processors empty so no need to overrule deletion of elements
          bool overrule_keep_as_halo_element_status = false;
 
          mesh_pt()->distribute(this->communicator_pt(),
                                new_domain_for_base_element,
                                deleted_element_pt,
                                doc_info,
                                report_stats,
                                overrule_keep_as_halo_element_status);
 
        } // if (!is_unstructured_mesh[0])
 
      } // if (n_mesh==0)
      else // There are submeshes, "distribute" each one separately
      {
        // Rebuild the mesh only if one of the meshes was modified
        bool need_to_rebuild_mesh = false;
        for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
        {
          // Perform the load balancing based on distribution in the
          // structured meshes only
          if (!is_unstructured_mesh[i_mesh])
          {
            if (report_stats)
            {
              oomph_info << "Distributing submesh " << i_mesh << " of "
                         << n_mesh << " in total\n"
                         << "---------------------------------------------"
                         << std::endl;
            }
 
            // Set the doc_info number to reflect the submesh
            doc_info.number() = i_mesh;
 
            // No pre-set distribution from restart that may leave some
            // processors empty so no need to overrule deletion of elements
            bool overrule_keep_as_halo_element_status = false;
            mesh_pt(i_mesh)->distribute(this->communicator_pt(),
                                        submesh_element_partition[i_mesh],
                                        deleted_element_pt,
                                        doc_info,
                                        report_stats,
                                        overrule_keep_as_halo_element_status);
 
            // Set the flag to rebuild the global mesh
            need_to_rebuild_mesh = true;
 
          } // if (!is_unstructured_mesh[i_mesh])
 
        } // for (i_mesh<n_mesh)
 
        if (need_to_rebuild_mesh)
        {
          // Rebuild the global mesh
          rebuild_global_mesh();
        } // if (need_to_rebuild_mesh)
 
      } // else if (n_mesh==0)
 
      // Null out information associated with deleted elements
      unsigned n_del = deleted_element_pt.size();
      for (unsigned e = 0; e < n_del; e++)
      {
        GeneralisedElement* el_pt = deleted_element_pt[e];
        unsigned old_el_number = Base_mesh_element_number_plus_one[el_pt] - 1;
        Base_mesh_element_number_plus_one[el_pt] = 0;
        Base_mesh_element_pt[old_el_number] = 0;
      }
 
      // Has one of the meshes been pruned before distribution? If so
      // then prune here now
      if (some_mesh_has_been_pruned)
      {
        Vector<GeneralisedElement*> deleted_element_pt;
        if (n_mesh == 0)
        {
          TreeBasedRefineableMeshBase* ref_mesh_pt =
            dynamic_cast<TreeBasedRefineableMeshBase*>(mesh_pt());
          if (ref_mesh_pt != 0)
          {
            ref_mesh_pt->prune_halo_elements_and_nodes(
              deleted_element_pt, doc_info, report_stats);
          }
        }
        else
        {
          for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
          {
            TreeBasedRefineableMeshBase* ref_mesh_pt =
              dynamic_cast<TreeBasedRefineableMeshBase*>(mesh_pt(i_mesh));
            if (ref_mesh_pt != 0)
            {
              ref_mesh_pt->prune_halo_elements_and_nodes(
                deleted_element_pt, doc_info, report_stats);
            }
          }
          // Rebuild the global mesh
          rebuild_global_mesh();
        }
 
        // Null out information associated with deleted elements
        unsigned n_del = deleted_element_pt.size();
        for (unsigned e = 0; e < n_del; e++)
        {
          GeneralisedElement* el_pt = deleted_element_pt[e];
          unsigned old_el_number = Base_mesh_element_number_plus_one[el_pt] - 1;
          Base_mesh_element_number_plus_one[el_pt] = 0;
          Base_mesh_element_pt[old_el_number] = 0;
        }
 
        // Setup the map between "root" element and number in global mesh again
        setup_base_mesh_info_after_pruning();
      }
 
      if (report_stats)
      {
        t_distribute = TimingHelpers::timer();
        oomph_info << "CPU for build and distribution of new mesh(es): "
                   << t_distribute - t_partition << std::endl;
      }
 
 
      // Send refinement info to other processors
      //-----------------------------------------
 
      // Storage for refinement pattern:  Given ID of root element,
      // root_element_id, and current refinement level, level, the e-th entry in
      // refinement_info_for_root_elements[root_element_id][level][e] is equal
 
      // to 2 if the e-th element (using the enumeration when the mesh has been
      // refined to the level-th level) is to be refined during the next
      // refinement; it's 1 if it's not to be refined.
      Vector<Vector<Vector<unsigned>>> refinement_info_for_root_elements;
 
 
      // Send refinement information between processors, using flat-packed
      // information accumulated earlier
      send_refinement_info_helper(old_domain_for_base_element,
                                  new_domain_for_base_element,
                                  max_refinement_level_overall,
                                  flat_packed_refinement_info_for_root,
                                  refinement_info_for_root_elements);
 
      // Refine each mesh based upon refinement information stored for each root
      //------------------------------------------------------------------------
      refine_distributed_base_mesh(refinement_info_for_root_elements,
                                   max_refinement_level_overall);
 
      if (report_stats)
      {
        t_refine = TimingHelpers::timer();
        oomph_info << "CPU for refinement of base mesh: "
                   << t_refine - t_distribute << std::endl;
      }
 
      // NOTE: The following two calls are important e.g. when
      //       FaceElements that resize nodes are attached/detached
      //       after/before adaptation. If we don't attach them
      //       on the newly built/refined mesh, there isn't enough
      //       storage for the nodal values that are sent around
      //       (in a flat-packed format) resulting in total disaster.
      //       So we attach them first, but then immediatly strip
      //       them out again because the FaceElements themselves
      //       will have been stripped out before distribution/adaptation.
 
      // Do actions after adapt because we have just adapted the mesh.
      actions_after_adapt();
 
      // Now strip it back out to get problem into the same state
      // it was in when data to be sent was recorded.
      actions_before_adapt();
 
      // Send the stored values in each root from the old mesh into the new mesh
      //------------------------------------------------------------------------
      send_data_to_be_sent_during_load_balancing(
        send_n, send_data, send_displacement);
 
      // If there are unstructured meshes here we perform the load
      // balancing of those meshes
      if (are_there_unstructured_meshes)
      {
        // Delete any storage of external elements and nodes
        this->delete_all_external_storage();
 
        if (n_mesh == 0)
        {
          // Before doing the load balancing delete the mesh created at
          // calling build_mesh(), and restore the pointer to the old
          // mesh
 
          // It MUST be an unstructured mesh, otherwise we should not be
          // here
          if (is_unstructured_mesh[0])
          {
            // Delete the new created mesh
            delete mesh_pt();
            // Re-assign the pointer to the old mesh
            this->mesh_pt() = old_mesh_pt[0];
          } // if (is_unstructured_mesh[0])
#ifdef PARANOID
          else
          {
            std::ostringstream error_stream;
            error_stream << "The only one mesh in the problem is not an "
                            "unstructured mesh,\n"
                         << "but the flag 'are_there_unstructures_meshes' ("
                         << are_there_unstructured_meshes
                         << ") was turned on,\n"
                         << "this is weird. Please check for any  condition "
                            "that may have\n"
                         << "turned on this flag!!!!\n\n";
            throw OomphLibError(error_stream.str(),
                                "Problem::load_balance()",
                                OOMPH_EXCEPTION_LOCATION);
          }
#endif
 
          unstructured_mesh_pt[0]->load_balance(
            target_domain_for_local_non_halo_element);
        } // if (n_mesh == 0)
        else
        {
          // Before doing the load balancing delete the meshes created
          // at calling build_mesh(), and restore the pointer to the
          // old meshes
          for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
          {
            if (is_unstructured_mesh[i_mesh])
            {
              // Delete the new created mesh
              delete mesh_pt(i_mesh);
              // Now point it to nothing
              mesh_pt(i_mesh) = 0;
              // ... and re-assign the pointer to the old mesh
              this->mesh_pt(i_mesh) = old_mesh_pt[i_mesh];
            } // if (is_unstructured_mesh[i_mesh])
 
          } // for (i_mesh<n_mesh)
 
          // Empty storage for sub-meshes
          // flush_sub_meshes();
 
          // Flush the storage for nodes and elements in compound mesh
          // (they've already been deleted in the sub-meshes)
          mesh_pt()->flush_element_and_node_storage();
 
          // Now we can procede with the load balancing thing
          for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
          {
            if (is_unstructured_mesh[i_mesh])
            {
              // Get the number of elements in the "i_mesh" (the old one)
              const unsigned n_element = old_mesh_pt[i_mesh]->nelement();
 
              // Perform the load balancing if there are elements in the
              // mesh. We check for this case because the meshes created
              // from face elements have been cleaned in
              // "actions_before_distribute()"
              if (n_element > 0 && is_unstructured_mesh[i_mesh])
              {
                unstructured_mesh_pt[i_mesh]->load_balance(
                  target_domain_for_local_non_halo_element_submesh[i_mesh]);
              } // if (n_element > 0)
            } // if (is_unstructured_mesh[i_mesh)]
          } // for (i_mesh < n_mesh)
 
          // Rebuild the global mesh
          rebuild_global_mesh();
 
        } // else if (n_mesh == 0)
 
      } // if (are_there_unstructured_meshes)
 
      if (report_stats)
      {
        t_copy_solution = TimingHelpers::timer();
        oomph_info << "CPU for transferring solution to new mesh(es): "
                   << t_copy_solution - t_refine << std::endl;
        oomph_info << "CPU for load balancing: " << t_copy_solution - t_start
                   << std::endl;
      }
 
      // Do actions after distribution
      actions_after_distribute();
 
      // Re-assign equation numbers
#ifdef PARANOID
      unsigned n_dof = assign_eqn_numbers();
#else
      assign_eqn_numbers();
#endif
 
      if (report_stats)
      {
        oomph_info
          << "Total number of elements on this processor after load balance: "
          << mesh_pt()->nelement() << std::endl;
 
        oomph_info << "Number of non-halo elements on this processor after "
                      "load balance: "
                   << mesh_pt()->nnon_halo_element() << std::endl;
      }
 
#ifdef PARANOID
      if (n_dof != old_ndof)
      {
        std::ostringstream error_stream;
        error_stream
          << "Number of dofs in load_balance() has changed from " << old_ndof
          << " to " << n_dof << "\n"
          << "Check that you've implemented any necessary "
             "actions_before/after\n"
          << "adapt/distribute functions, e.g. to pin redundant pressure dofs"
          << " etc.\n";
        throw OomphLibError(
          error_stream.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
      }
#endif
    }
 
    // Finally synchronise all dofs to allow halo check to pass
    synchronise_all_dofs();
 
    double end_t = TimingHelpers::timer();
    oomph_info << "Time for load_balance() [sec]    : " << end_t - start_t
               << std::endl;
  }
 
 
  //==========================================================================
  /// Send refinement information between processors
  //==========================================================================
  void Problem::send_refinement_info_helper(
    Vector<unsigned>& old_domain_for_base_element,
    Vector<unsigned>& new_domain_for_base_element,
    const unsigned& max_refinement_level_overall,
    std::map<unsigned, Vector<unsigned>>& flat_packed_refinement_info_for_root,
    Vector<Vector<Vector<unsigned>>>& refinement_info_for_root_elements)
  {
    // Number of processes etc.
    const int n_proc = this->communicator_pt()->nproc();
    const int my_rank = this->communicator_pt()->my_rank();
 
    // Make space
    unsigned n_base_element = old_domain_for_base_element.size();
    refinement_info_for_root_elements.resize(n_base_element);
 
    // Make space for list of domains that the refinement info
    // is to be forwarded to
    std::map<unsigned, Vector<unsigned>> halo_domain_of_haloed_base_element;
 
    // Find out haloed elements in new, redistributed problem
    //-------------------------------------------------------
 
    // halo_domains[e][j] = j-th halo domain associated with (haloed) element e
    std::map<unsigned, Vector<unsigned>> halo_domains;
 
    // Loop over sub meshes
    unsigned n_sub_mesh = nsub_mesh();
    unsigned max_mesh = std::max(n_sub_mesh, unsigned(1));
    for (unsigned i_mesh = 0; i_mesh < max_mesh; i_mesh++)
    {
      // Choose the right mesh
      Mesh* my_mesh_pt = 0;
      if (n_sub_mesh == 0)
      {
        my_mesh_pt = mesh_pt();
      }
      else
      {
        my_mesh_pt = mesh_pt(i_mesh);
      }
 
      // Only work with structured meshes
      TriangleMeshBase* sub_mesh_pt =
        dynamic_cast<TriangleMeshBase*>(mesh_pt(i_mesh));
      if (!(sub_mesh_pt != 0))
      {
        // Loop over processors to find haloed elements -- need to
        // send their refinement patterns processors that hold their
        // halo counterparts!
        for (int p = 0; p < n_proc; p++)
        {
          Vector<GeneralisedElement*> haloed_elem_pt =
            my_mesh_pt->haloed_element_pt(p);
          unsigned nhaloed = haloed_elem_pt.size();
          for (unsigned h = 0; h < nhaloed; h++)
          {
            // This element must send its refinement information to processor p
            unsigned e = Base_mesh_element_number_plus_one[haloed_elem_pt[h]];
#ifdef PARANOID
            if (e == 0)
            {
              throw OomphLibError("Base_mesh_element_number_plus_one[...]=0",
                                  OOMPH_CURRENT_FUNCTION,
                                  OOMPH_EXCEPTION_LOCATION);
            }
#endif
            e -= 1;
            halo_domains[e].push_back(p);
          }
        }
      } // if (!(sub_mesh_pt!=0))
    } // for (i_mesh<max_mesh)
 
    // Accumulate relevant flat-packed refinement data to be sent to
    //--------------------------------------------------------------
    // various processors
    //-------------------
 
    // Map to accumulate unsigned data to be sent to each processor
    // (map for sparsity)
    std::map<unsigned, Vector<unsigned>> data_for_proc;
 
    // Number of base elements to be sent to specified domain
    Vector<unsigned> nbase_elements_for_proc(n_proc, 0);
 
    // Total number of entries in send vector
    unsigned count = 0;
 
    // Loop over all base elements
    //----------------------------
    for (unsigned e = 0; e < n_base_element; e++)
    {
      // Is it one of mine (i.e. was it a non-halo element on this
      //----------------------------------------------------------
      // processor before re-distribution, and do I therefore hold
      //----------------------------------------------------------
      // refinement information for it)?
      //--------------------------------
      if (int(old_domain_for_base_element[e]) == my_rank)
      {
        // Where does it go?
        unsigned new_domain = new_domain_for_base_element[e];
 
        // Keep counting
        nbase_elements_for_proc[new_domain]++;
 
        // If it stays local, deal with it here
        if (int(new_domain) == my_rank)
        {
          // Record on which other procs/domains the refinement info for
          // this element is required because it's haloed.
          unsigned nhalo = halo_domains[e].size();
          halo_domain_of_haloed_base_element[e].resize(nhalo);
          for (unsigned j = 0; j < nhalo; j++)
          {
            halo_domain_of_haloed_base_element[e][j] = halo_domains[e][j];
          }
 
          // Provide storage for refinement pattern
          refinement_info_for_root_elements[e].resize(
            max_refinement_level_overall);
 
#ifdef PARANOID
          // Get number of additional data sent for check
          unsigned n_additional_data =
            flat_packed_refinement_info_for_root[e].size();
#endif
 
          // Get number of tree nodes
          unsigned n_tree_nodes = flat_packed_refinement_info_for_root[e][0];
 
          // Counter for entries to be processed locally
          unsigned local_count = 1; // (have already processed zero-th entry)
 
          // Loop over levels and number of nodes in tree
          for (unsigned level = 0; level < max_refinement_level_overall;
               level++)
          {
            for (unsigned ee = 0; ee < n_tree_nodes; ee++)
            {
              // Element exists at this level
              if (flat_packed_refinement_info_for_root[e][local_count] == 1)
              {
                local_count++;
 
                // Element should be refined
                if (flat_packed_refinement_info_for_root[e][local_count] == 1)
                {
                  refinement_info_for_root_elements[e][level].push_back(2);
                  local_count++;
                }
                // Element should not be refined
                else
                {
                  refinement_info_for_root_elements[e][level].push_back(1);
                  local_count++;
                }
              }
              // Element does not exist at this level
              else
              {
                refinement_info_for_root_elements[e][level].push_back(0);
                local_count++;
              }
            }
          }
 
#ifdef PARANOID
          if (n_additional_data != local_count)
          {
            std::stringstream error_message;
            error_message << "Number of additional data: " << n_additional_data
                          << " doesn't match that actually send: "
                          << local_count << std::endl;
            throw OomphLibError(error_message.str(),
                                OOMPH_CURRENT_FUNCTION,
                                OOMPH_EXCEPTION_LOCATION);
          }
#endif
        }
        // Element in question is not one of mine so prepare for sending
        //--------------------------------------------------------------
        else
        {
          // Make space
          unsigned current_size = data_for_proc[new_domain].size();
          unsigned n_additional_data =
            flat_packed_refinement_info_for_root[e].size();
          data_for_proc[new_domain].reserve(current_size + n_additional_data +
                                            2);
 
          // Keep counting
          count += n_additional_data + 2;
 
          // Add base element number
          data_for_proc[new_domain].push_back(e);
 
#ifdef PARANOID
          // Add number of flat-packed instructions to follow
          data_for_proc[new_domain].push_back(n_additional_data);
#endif
 
          // Add flat packed refinement data
          for (unsigned j = 0; j < n_additional_data; j++)
          {
            data_for_proc[new_domain].push_back(
              flat_packed_refinement_info_for_root[e][j]);
          }
        }
      }
    }
 
 
    // Now do the actual send/receive
    //-------------------------------
 
    // Storage for number of data to be sent to each processor
    Vector<int> send_n(n_proc, 0);
 
    // Storage for all values to be sent to all processors
    Vector<unsigned> send_data;
    send_data.reserve(count);
 
    // Start location within send_data for data to be sent to each processor
    Vector<int> send_displacement(n_proc, 0);
 
    // Loop over all processors
    for (int rank = 0; rank < n_proc; rank++)
    {
      // Set the offset for the current processor
      send_displacement[rank] = send_data.size();
 
      // Don't bother to do anything if the processor in the loop is the
      // current processor
      if (rank != my_rank)
      {
        // Record how many base elements are to be sent
        send_data.push_back(nbase_elements_for_proc[rank]);
 
        // Add data
        unsigned n_data = data_for_proc[rank].size();
        for (unsigned j = 0; j < n_data; j++)
        {
          send_data.push_back(data_for_proc[rank][j]);
        }
      }
 
      // Find the number of data added to the vector
      send_n[rank] = send_data.size() - send_displacement[rank];
    }
 
    // Storage for the number of data to be received from each processor
    Vector<int> receive_n(n_proc, 0);
 
    // Now send numbers of data to be sent between all processors
    MPI_Alltoall(&send_n[0],
                 1,
                 MPI_INT,
                 &receive_n[0],
                 1,
                 MPI_INT,
                 this->communicator_pt()->mpi_comm());
 
    // We now prepare the data to be received
    // by working out the displacements from the received data
    Vector<int> receive_displacement(n_proc, 0);
    int receive_data_count = 0;
    for (int rank = 0; rank < n_proc; ++rank)
    {
      // Displacement is number of data received so far
      receive_displacement[rank] = receive_data_count;
      receive_data_count += receive_n[rank];
    }
 
    // Now resize the receive buffer for all data from all processors
    // Make sure that it has a size of at least one
    if (receive_data_count == 0)
    {
      ++receive_data_count;
    }
    Vector<unsigned> receive_data(receive_data_count);
 
    // Make sure that the send buffer has size at least one
    // so that we don't get a segmentation fault
    if (send_data.size() == 0)
    {
      send_data.resize(1);
    }
 
    // Now send the data between all the processors
    MPI_Alltoallv(&send_data[0],
                  &send_n[0],
                  &send_displacement[0],
                  MPI_UNSIGNED,
                  &receive_data[0],
                  &receive_n[0],
                  &receive_displacement[0],
                  MPI_UNSIGNED,
                  this->communicator_pt()->mpi_comm());
 
 
    // Now use the received data to update
    //-----------------------------------
    for (int send_rank = 0; send_rank < n_proc; send_rank++)
    {
      // Don't bother to do anything for the processor corresponding to the
      // current processor or if no data were received from this processor
      if ((send_rank != my_rank) && (receive_n[send_rank] != 0))
      {
        // Counter for the data within the large array
        unsigned count = receive_displacement[send_rank];
 
        // Loop over base elements
        unsigned nbase_element = receive_data[count];
        count++;
        for (unsigned b = 0; b < nbase_element; b++)
        {
          //  Get base element number
          unsigned base_element_number = receive_data[count];
          count++;
 
          // Record on which other procs/domains the refinement info for
          // this element is required because it's haloed.
          unsigned nhalo = halo_domains[base_element_number].size();
          halo_domain_of_haloed_base_element[base_element_number].resize(nhalo);
          for (unsigned j = 0; j < nhalo; j++)
          {
            halo_domain_of_haloed_base_element[base_element_number][j] =
              halo_domains[base_element_number][j];
          }
 
          // Provide storage for refinement pattern
          refinement_info_for_root_elements[base_element_number].resize(
            max_refinement_level_overall);
 
          // Get number of flat-packed instructions to follow
          // (only used for check)
#ifdef PARANOID
          unsigned n_additional_data = receive_data[count];
          count++;
 
          // Counter for number of additional data (validation only)
          unsigned check_count = 0;
#endif
 
          // Get number of tree nodes
          unsigned n_tree_nodes = receive_data[count];
          count++;
 
#ifdef PARANOID
          check_count++;
#endif
 
          // Loop over levels and number of nodes in tree
          for (unsigned level = 0; level < max_refinement_level_overall;
               level++)
          {
            for (unsigned e = 0; e < n_tree_nodes; e++)
            {
              // Element exists at this level
              if (receive_data[count] == 1)
              {
                count++;
 
#ifdef PARANOID
                check_count++;
#endif
 
                // Element should be refined
                if (receive_data[count] == 1)
                {
                  refinement_info_for_root_elements[base_element_number][level]
                    .push_back(2);
                  count++;
 
#ifdef PARANOID
                  check_count++;
#endif
                }
                // Element should not be refined
                else
                {
                  refinement_info_for_root_elements[base_element_number][level]
                    .push_back(1);
                  count++;
 
#ifdef PARANOID
                  check_count++;
#endif
                }
              }
              // Element does not exist at this level
              else
              {
                refinement_info_for_root_elements[base_element_number][level]
                  .push_back(0);
                count++;
 
#ifdef PARANOID
                check_count++;
#endif
              }
            }
          }
 
#ifdef PARANOID
          if (n_additional_data != check_count)
          {
            std::stringstream error_message;
            error_message << "Number of additional data: " << n_additional_data
                          << " doesn't match that actually send: "
                          << check_count << std::endl;
            throw OomphLibError(error_message.str(),
                                OOMPH_CURRENT_FUNCTION,
                                OOMPH_EXCEPTION_LOCATION);
          }
#endif
        }
      }
    }
 
 
    // Now send the fully assembled refinement info to halo elements
    //---------------------------------------------------------------
    {
      // Accumulate data to be sent
      //---------------------------
 
      // Map to accumulate data to be sent to other procs
      // (map for sparsity)
      std::map<unsigned, Vector<unsigned>> data_for_proc;
 
      // Number of base elements to be sent to specified domain
      Vector<unsigned> nbase_elements_for_proc(n_proc, 0);
 
      // Loop over all haloed root elements and find out which
      // processors they have haloes on
      for (std::map<unsigned, Vector<unsigned>>::iterator it =
             halo_domain_of_haloed_base_element.begin();
           it != halo_domain_of_haloed_base_element.end();
           it++)
      {
        // Get base element number
        unsigned base_element_number = (*it).first;
 
        // Loop over target domains
        Vector<unsigned> domains = (*it).second;
        unsigned nd = domains.size();
        for (unsigned jd = 0; jd < nd; jd++)
        {
          // Actual number of domain
          unsigned d = domains[jd];
 
          // Keep counting number of base elemements for domain
          nbase_elements_for_proc[d]++;
 
          // Write base element number
          data_for_proc[d].push_back(base_element_number);
 
          // Write refinement info in flat-packed form
          for (unsigned level = 0; level < max_refinement_level_overall;
               level++)
          {
            // Number of entries at each level
            unsigned n =
              refinement_info_for_root_elements[base_element_number][level]
                .size();
            data_for_proc[d].push_back(n);
            for (unsigned j = 0; j < n; j++)
            {
              data_for_proc[d].push_back(
                refinement_info_for_root_elements[base_element_number][level]
                                                 [j]);
            }
          }
        }
      }
 
 
      // Do the actual send
      //-------------------
 
      // Storage for number of data to be sent to each processor
      Vector<int> send_n(n_proc, 0);
 
      // Storage for all values to be sent to all processors
      Vector<unsigned> send_data;
      send_data.reserve(count);
 
      // Start location within send_data for data to be sent to each processor
      Vector<int> send_displacement(n_proc, 0);
 
      // Loop over all processors
      for (int rank = 0; rank < n_proc; rank++)
      {
        // Set the offset for the current processor
        send_displacement[rank] = send_data.size();
 
        // Don't bother to do anything if the processor in the loop is the
        // current processor
        if (rank != my_rank)
        {
          // Record how many base elements are to be sent
          send_data.push_back(nbase_elements_for_proc[rank]);
 
          // Add data
          unsigned n_data = data_for_proc[rank].size();
          for (unsigned j = 0; j < n_data; j++)
          {
            send_data.push_back(data_for_proc[rank][j]);
          }
        }
        // Find the number of data added to the vector
        send_n[rank] = send_data.size() - send_displacement[rank];
      }
 
      // Storage for the number of data to be received from each processor
      Vector<int> receive_n(n_proc, 0);
 
      // Now send numbers of data to be sent between all processors
      MPI_Alltoall(&send_n[0],
                   1,
                   MPI_INT,
                   &receive_n[0],
                   1,
                   MPI_INT,
                   this->communicator_pt()->mpi_comm());
 
      // We now prepare the data to be received
      // by working out the displacements from the received data
      Vector<int> receive_displacement(n_proc, 0);
      int receive_data_count = 0;
      for (int rank = 0; rank < n_proc; ++rank)
      {
        // Displacement is number of data received so far
        receive_displacement[rank] = receive_data_count;
        receive_data_count += receive_n[rank];
      }
 
      // Now resize the receive buffer for all data from all processors
      // Make sure that it has a size of at least one
      if (receive_data_count == 0)
      {
        ++receive_data_count;
      }
      Vector<unsigned> receive_data(receive_data_count);
 
      // Make sure that the send buffer has size at least one
      // so that we don't get a segmentation fault
      if (send_data.size() == 0)
      {
        send_data.resize(1);
      }
 
      // Now send the data between all the processors
      MPI_Alltoallv(&send_data[0],
                    &send_n[0],
                    &send_displacement[0],
                    MPI_UNSIGNED,
                    &receive_data[0],
                    &receive_n[0],
                    &receive_displacement[0],
                    MPI_UNSIGNED,
                    this->communicator_pt()->mpi_comm());
 
 
      // Now use the received data
      //------------------------
      for (int send_rank = 0; send_rank < n_proc; send_rank++)
      {
        // Don't bother to do anything for the processor corresponding to the
        // current processor or if no data were received from this processor
        if ((send_rank != my_rank) && (receive_n[send_rank] != 0))
        {
          // Counter for the data within the large array
          unsigned count = receive_displacement[send_rank];
 
          // Read number of base elements
          unsigned nbase_element = receive_data[count];
          count++;
 
          for (unsigned e = 0; e < nbase_element; e++)
          {
            // Read base element number
            unsigned base_element_number = receive_data[count];
            count++;
 
            // Provide storage for refinement pattern
            refinement_info_for_root_elements[base_element_number].resize(
              max_refinement_level_overall);
 
            // Read refinement info in flat-packed form
            for (unsigned level = 0; level < max_refinement_level_overall;
                 level++)
            {
              // Read number of entries at each level
              unsigned n = receive_data[count];
              count++;
 
              // Read entries
              for (unsigned j = 0; j < n; j++)
              {
                refinement_info_for_root_elements[base_element_number][level]
                  .push_back(receive_data[count]);
                count++;
              }
            }
          }
        }
      }
    }
  }
 
  //==========================================================================
  /// Load balance helper routine: Send data to other
  /// processors during load balancing.
  /// - send_n: Input, number of data to be sent to each processor
  /// - send_data: Input, storage for all values to be sent to all processors
  /// - send_displacement: Input, start location within send_data for data to
  ///   be sent to each processor
  //==========================================================================
  void Problem::send_data_to_be_sent_during_load_balancing(
    Vector<int>& send_n,
    Vector<double>& send_data,
    Vector<int>& send_displacement)
  {
    // Communicator info
    OomphCommunicator* comm_pt = this->communicator_pt();
    const int n_proc = comm_pt->nproc();
 
    // Storage for the number of data to be received from each processor
    Vector<int> receive_n(n_proc, 0);
 
    // Now send numbers of data to be sent between all processors
    MPI_Alltoall(&send_n[0],
                 1,
                 MPI_INT,
                 &receive_n[0],
                 1,
                 MPI_INT,
                 this->communicator_pt()->mpi_comm());
 
    // We now prepare the data to be received
    // by working out the displacements from the received data
    Vector<int> receive_displacement(n_proc, 0);
    int receive_data_count = 0;
    for (int rank = 0; rank < n_proc; ++rank)
    {
      // Displacement is number of data received so far
      receive_displacement[rank] = receive_data_count;
      receive_data_count += receive_n[rank];
    }
 
    // Now resize the receive buffer for all data from all processors
    // Make sure that it has a size of at least one
    if (receive_data_count == 0)
    {
      ++receive_data_count;
    }
    Vector<double> receive_data(receive_data_count);
 
    // Make sure that the send buffer has size at least one
    // so that we don't get a segmentation fault
    if (send_data.size() == 0)
    {
      send_data.resize(1);
    }
 
    // Now send the data between all the processors
    MPI_Alltoallv(&send_data[0],
                  &send_n[0],
                  &send_displacement[0],
                  MPI_DOUBLE,
                  &receive_data[0],
                  &receive_n[0],
                  &receive_displacement[0],
                  MPI_DOUBLE,
                  this->communicator_pt()->mpi_comm());
 
    unsigned el_count = 0;
 
    // Only do each node once
    Vector<std::map<Node*, bool>> node_done(n_proc);
 
    // Now use the received data to update the halo nodes
    for (int send_rank = 0; send_rank < n_proc; send_rank++)
    {
      // Don't bother to do anything if no data were received from this
      // processor
      // NOTE: We do have to loop over our own processor number to process
      //       the data locally.
      if (receive_n[send_rank] != 0)
      {
        // Counter for the data within the large array
        unsigned count = receive_displacement[send_rank];
 
        // How many batches are there for current rank
        unsigned nbatch = unsigned(receive_data[count]);
        count++;
 
        // Loop over batches (containing leaves associated with root elements)
        for (unsigned b = 0; b < nbatch; b++)
        {
          // How many elements were received for this batch?
          unsigned nel = unsigned(receive_data[count]);
          count++;
 
          // Get the unique base/root element number of this batch
          // in unrefined mesh
          unsigned base_el_no = unsigned(receive_data[count]);
          count++;
 
          // Get pointer to base/root element from reverse lookup scheme
          GeneralisedElement* root_el_pt = Base_mesh_element_pt[base_el_no];
 
          // Vector for pointers to associated elements in batch
          Vector<GeneralisedElement*> batch_el_pt;
 
          // Is it a refineable element?
          RefineableElement* ref_root_el_pt =
            dynamic_cast<RefineableElement*>(root_el_pt);
          if (ref_root_el_pt != 0)
          {
            // Get all leaves associated with this base/root element
            Vector<Tree*> all_leaf_nodes_pt;
            ref_root_el_pt->tree_pt()->stick_leaves_into_vector(
              all_leaf_nodes_pt);
 
            // How many leaves are there?
            unsigned n_leaf = all_leaf_nodes_pt.size();
 
#ifdef PARANOID
            if (n_leaf != nel)
            {
              std::ostringstream error_message;
              error_message
                << "Number of leaves: " << n_leaf << " "
                << " doesn't match number of elements sent in batch: " << nel
                << "\n";
              throw OomphLibError(error_message.str(),
                                  OOMPH_CURRENT_FUNCTION,
                                  OOMPH_EXCEPTION_LOCATION);
            }
#endif
 
            // Loop over batch of elements associated with this base/root
            // element
            batch_el_pt.resize(n_leaf);
            for (unsigned e = 0; e < n_leaf; e++)
            {
              batch_el_pt[e] = all_leaf_nodes_pt[e]->object_pt();
            }
          }
          // Not refineable -- the batch contains just the root element itself
          else
          {
#ifdef PARANOID
            if (1 != nel)
            {
              std::ostringstream error_message;
              error_message
                << "Non-refineable root element should only be associated with"
                << " one element but nel=" << nel << "\n";
              throw OomphLibError(error_message.str(),
                                  OOMPH_CURRENT_FUNCTION,
                                  OOMPH_EXCEPTION_LOCATION);
            }
#endif
            batch_el_pt.push_back(root_el_pt);
          }
 
          // Now loop  over all elements in batch
          for (unsigned e = 0; e < nel; e++)
          {
            GeneralisedElement* el_pt = batch_el_pt[e];
            el_count++;
 
            // FE?
            FiniteElement* fe_pt = dynamic_cast<FiniteElement*>(el_pt);
            if (fe_pt != 0)
            {
              // Loop over nodes
              unsigned nnod = fe_pt->nnode();
              for (unsigned j = 0; j < nnod; j++)
              {
                Node* nod_pt = fe_pt->node_pt(j);
                if (!node_done[send_rank][nod_pt])
                {
                  node_done[send_rank][nod_pt] = true;
 
 
                  // Read number of values (as double) to allow for resizing
                  // before read (req'd in case we store data that
                  // got introduced by attaching FaceElements to bulk)
                  unsigned nval = unsigned(receive_data[count]);
                  count++;
 
#ifdef PARANOID
                  // Does the size match?
                  if (nval < nod_pt->nvalue())
                  {
                    std::ostringstream error_message;
                    error_message
                      << "Node has more values, namely " << nod_pt->nvalue()
                      << ", than we're about to receive, namely " << nval
                      << ". Something's wrong!\n";
                    throw OomphLibError(error_message.str(),
                                        OOMPH_CURRENT_FUNCTION,
                                        OOMPH_EXCEPTION_LOCATION);
                  }
#endif
 
 
#ifdef PARANOID
                  // Check if it's been sent as a boundary node
                  unsigned is_boundary_node = unsigned(receive_data[count]);
                  count++;
#endif
 
                  // Check if it's actually a boundary node
                  BoundaryNodeBase* bnod_pt =
                    dynamic_cast<BoundaryNodeBase*>(nod_pt);
                  if (bnod_pt != 0)
                  {
#ifdef PARANOID
                    // Check if local and received status are consistent
                    if (is_boundary_node != 1)
                    {
                      std::ostringstream error_message;
                      error_message << "Local node is boundary node but "
                                       "information sent is\n"
                                    << "for non-boundary node\n";
                      throw OomphLibError(error_message.str(),
                                          OOMPH_CURRENT_FUNCTION,
                                          OOMPH_EXCEPTION_LOCATION);
                    }
#endif
 
                    // Do we have entries in the map?
                    unsigned n_entry = unsigned(receive_data[count]);
                    count++;
                    if (n_entry > 0)
                    {
                      // Create storage, if it doesn't already exist, for the
                      // map that will contain the position of the first entry
                      // of this face element's additional values,
                      if (
                        bnod_pt
                          ->index_of_first_value_assigned_by_face_element_pt() ==
                        0)
                      {
                        bnod_pt
                          ->index_of_first_value_assigned_by_face_element_pt() =
                          new std::map<unsigned, unsigned>;
                      }
 
                      // Get pointer to the map of indices associated with
                      // additional values created by face elements
                      std::map<unsigned, unsigned>* map_pt =
                        bnod_pt
                          ->index_of_first_value_assigned_by_face_element_pt();
 
                      // Loop over number of entries in map
                      for (unsigned i = 0; i < n_entry; i++)
                      {
                        // Read out pairs...
                        unsigned first = unsigned(receive_data[count]);
                        count++;
                        unsigned second = unsigned(receive_data[count]);
                        count++;
 
                        // ...and assign
                        (*map_pt)[first] = second;
                      }
                    }
                  }
#ifdef PARANOID
                  // Not a boundary node
                  else
                  {
                    // Check if local and received status are consistent
                    if (is_boundary_node != 0)
                    {
                      std::ostringstream error_message;
                      error_message << "Local node is not a boundary node but "
                                       "information \n"
                                    << "sent is for boundary node.\n";
                      throw OomphLibError(error_message.str(),
                                          OOMPH_CURRENT_FUNCTION,
                                          OOMPH_EXCEPTION_LOCATION);
                    }
                  }
#endif
 
                  // Do we have to resize? This can happen if node was
                  // resized (due to a FaceElement that hasn't been attached
                  // yet here) when the send data was written. If so make space
                  // for the data here
                  if (nval > nod_pt->nvalue())
                  {
                    nod_pt->resize(nval);
                  }
 
                  // Now read the actual values
                  nod_pt->read_values_from_vector(receive_data, count);
                }
              }
            }
 
            // Now add internal data
            el_pt->read_internal_data_values_from_vector(receive_data, count);
          }
        }
      }
    }
 
    // Now that this is done, we need to synchronise dofs to get
    // the halo element and node values correct
    bool do_halos = true;
    bool do_external_halos = false;
    this->synchronise_dofs(do_halos, do_external_halos);
 
    // Now rebuild global mesh if required
    unsigned n_mesh = nsub_mesh();
    if (n_mesh != 0)
    {
      bool do_halos = false;
      bool do_external_halos = true;
      this->synchronise_dofs(do_halos, do_external_halos);
      rebuild_global_mesh();
    }
  }
 
 
  //==========================================================================
  /// Load balance helper routine: Get data to be sent to other
  /// processors during load balancing and other information about
  /// re-distribution.
  /// - target_domain_for_local_non_halo_element: Input, generated by METIS.
  ///   target_domain_for_local_non_halo_element[e] contains the number
  ///   of the domain [0,1,...,nproc-1] to which non-halo element e on THE
  ///   CURRENT PROCESSOR ONLY has been assigned. The order of the non-halo
  ///   elements is the same as in the Problem's mesh, with the halo
  ///   elements being skipped.
  /// - send_n: Output, number of data to be sent to each processor
  /// - send_data: Output, storage for all values to be sent to all processors
  /// - send_displacement: Output, start location within send_data for data to
  ///   be sent to each processor
  /// - max_refinement_level_overall: Output, max. refinement level of any
  ///   element
  //==========================================================================
  void Problem::get_data_to_be_sent_during_load_balancing(
    const Vector<unsigned>& target_domain_for_local_non_halo_element,
    Vector<int>& send_n,
    Vector<double>& send_data,
    Vector<int>& send_displacement,
    Vector<unsigned>& old_domain_for_base_element,
    Vector<unsigned>& new_domain_for_base_element,
    unsigned& max_refinement_level_overall)
  {
    // Communicator info
    OomphCommunicator* comm_pt = this->communicator_pt();
    const int n_proc = comm_pt->nproc();
    const int my_rank = this->communicator_pt()->my_rank();
 
    //------------------------------------------------------------------------
    // Overall strategy: Loop over all elements (in structured meshes),
    // identify their corresponding root elements and move all associated
    // leaves together, collecting the leaves in batches.
    // ------------------------------------------------------------------------
 
    // Map to store whether the root element has been visited yet
    std::map<RefineableElement*, bool> root_el_done;
 
#ifdef PARANOID
 
    // Map for checking if all elements associated with same root
    // have the same target processor
    std::map<RefineableElement*, unsigned> target_plus_one_for_root;
 
#endif
 
    // Storage for maximum refinement level
    unsigned max_refinement_level = 0;
 
    // Storage for (vector of) elements associated with target domain
    // (stored in map for sparsity): element_for_processor[d][e] is pointer
    // to e-th element that's supposed to move onto processor (domain) d.
    std::map<unsigned, Vector<GeneralisedElement*>> element_for_processor;
 
    // Storage for the number of elements in a specified batch of leaf
    // elements, all of which are associated with the same root/base element:
    // nelement_batch_for_processor[d][j] is the number of (leaf)
    // elements (all associated with the same root) to be moved together to
    // domain/processor d, in the j-th batch of elements.
    std::map<unsigned, Vector<unsigned>> nelement_batch_for_processor;
 
    // Storage for the unique number of the root element (in the unrefined
    // base mesh) whose leaves are moved together in a batch:
    // base_element_for_element_batch_for_processo[d][j] is the number of
    // unique number of the root element (in the unrefined
    // base mesh) of all leaf elements (associated with that root),
    // to be moved together to domain/processor d, in the j-th batch of
    // elements.
    std::map<unsigned, Vector<unsigned>>
      base_element_for_element_batch_for_processor;
 
    // Record old and new domains for non-halo root elements (will be
    // communicated globally). Initialise to -1 so we can use max
    // to extract the right one via MPI_Allreduce.
    // NOTE: We communicate these globally to facilitate distribution
    //       of refinement pattern. While the data itself can be
    //       sent point-to-point for non-halo elements,
    //       mesh refinement information also needs to be sent for
    //       halo elements which aren't known yet.
    unsigned n_base_element = Base_mesh_element_pt.size();
    Vector<int> old_domain_for_base_element_local(n_base_element, -1);
    Vector<int> new_domain_for_base_element_local(n_base_element, -1);
 
    // Loop over all non-halo elements on current processor and identify roots
    // -------------------------------------------------------------------
    // All leaf elements in associated tree (must!) get moved together
    //----------------------------------------------------------------
    unsigned count_non_halo_el = 0;
    // Get the number of submeshs, if there are no submeshes, then
    // increase the counter so that the loop below also work for the only
    // one mesh in the problem
    unsigned n_mesh = nsub_mesh();
    if (n_mesh == 0)
    {
      n_mesh = 1;
    }
    // We need to know if there are structure meshes (with elements) as
    // part of the problem in order to perform (or not) the proper
    // communications
    bool are_there_structured_meshes = false;
    // Go for the nonhalo elements only in the TreeBaseMeshes
    for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
    {
      // Only work with structured meshes
      TriangleMeshBase* sub_mesh_pt =
        dynamic_cast<TriangleMeshBase*>(mesh_pt(i_mesh));
      if (!(sub_mesh_pt != 0))
      {
        const unsigned nele = mesh_pt(i_mesh)->nelement();
        if (nele > 0)
        {
          // Change the flag to indicate that there are structured meshes
          // (with elements, because we may have meshes with face
          // elements and therefore zero elements at this point)
          are_there_structured_meshes = true;
        }
 
        for (unsigned e = 0; e < nele; e++)
        {
          GeneralisedElement* el_pt = mesh_pt(i_mesh)->element_pt(e);
          if (!el_pt->is_halo())
          {
            // New non-halo: Where is this element supposed to go to?
            //-------------------------------------------------------
            unsigned target_domain =
              target_domain_for_local_non_halo_element[count_non_halo_el];
 
            // Bump up counter for non-halo elements
            count_non_halo_el++;
 
            // Is it a root element? (It is, trivially, if it's not refineable)
            //------------------------------------------------------------------
            RefineableElement* ref_el_pt =
              dynamic_cast<RefineableElement*>(el_pt);
            if (ref_el_pt == 0)
            {
              // Not refineable so add element itself
              element_for_processor[target_domain].push_back(el_pt);
 
              // Number of elements associated with this root/base
              // element (just the element itself)
              nelement_batch_for_processor[target_domain].push_back(1);
 
              // This is the unique base/root element number in unrefined mesh
              unsigned element_number_in_base_mesh =
                Base_mesh_element_number_plus_one[el_pt];
#ifdef PARANOID
              if (element_number_in_base_mesh == 0)
              {
                throw OomphLibError("Base_mesh_element_number_plus_one[...]=0",
                                    OOMPH_CURRENT_FUNCTION,
                                    OOMPH_EXCEPTION_LOCATION);
              }
#endif
              element_number_in_base_mesh -= 1;
              base_element_for_element_batch_for_processor[target_domain]
                .push_back(element_number_in_base_mesh);
 
              /// Where do I come from, where do I go to?
              old_domain_for_base_element_local[element_number_in_base_mesh] =
                my_rank;
              new_domain_for_base_element_local[element_number_in_base_mesh] =
                target_domain;
            } // if (ref_el_pt==0)
            // It's not a root element so we package its leaves into a batch
            //--------------------------------------------------------------
            // of elements
            //------------
            else
            {
              // Get the root element
              RefineableElement* root_el_pt = ref_el_pt->root_element_pt();
 
              // Has this root been visited yet?
              if (!root_el_done[root_el_pt])
              {
                // Now we've done it
                root_el_done[root_el_pt] = true;
 
                // Unique number of root element in base mesh
                unsigned element_number_in_base_mesh =
                  Base_mesh_element_number_plus_one[root_el_pt];
#ifdef PARANOID
                if (element_number_in_base_mesh == 0)
                {
                  throw OomphLibError(
                    "Base_mesh_element_number_plus_one[...]=0",
                    OOMPH_CURRENT_FUNCTION,
                    OOMPH_EXCEPTION_LOCATION);
                }
#endif
                element_number_in_base_mesh -= 1;
 
                /// Where do I come from, where do I go to?
                old_domain_for_base_element_local[element_number_in_base_mesh] =
                  my_rank;
                new_domain_for_base_element_local[element_number_in_base_mesh] =
                  target_domain;
 
#ifdef PARANOID
                // Store target domain associated with this root element
                // (offset by one) to allow checking that all elements
                // with the same root move to the same processor
                target_plus_one_for_root[root_el_pt] = target_domain + 1;
#endif
 
                // Package all leaves into batch of elements
                Vector<Tree*> all_leaf_nodes_pt;
                root_el_pt->tree_pt()->stick_leaves_into_vector(
                  all_leaf_nodes_pt);
 
                // Number of leaves
                unsigned n_leaf = all_leaf_nodes_pt.size();
 
                // Number of elements associated with this root/base element
                // (all the leaves)
                nelement_batch_for_processor[target_domain].push_back(n_leaf);
 
                // Store the unique base/root element number in unrefined mesh
                base_element_for_element_batch_for_processor[target_domain]
                  .push_back(element_number_in_base_mesh);
 
                // Loop over leaves
                for (unsigned i_leaf = 0; i_leaf < n_leaf; i_leaf++)
                {
                  // Add element object at leaf
                  RefineableElement* leaf_el_pt =
                    all_leaf_nodes_pt[i_leaf]->object_pt();
                  element_for_processor[target_domain].push_back(leaf_el_pt);
 
                  // Monitor/update maximum refinement level
                  unsigned level = all_leaf_nodes_pt[i_leaf]->level();
                  if (level > max_refinement_level)
                  {
                    max_refinement_level = level;
                  }
                }
              }
 
#ifdef PARANOID
              // Root element has already been visited
              else
              {
                // We don't have to do anything with this element since it's
                // already been processed earlier, but check that it's scheduled
                // to go onto the same processor as its root.
                if ((target_plus_one_for_root[root_el_pt] - 1) != target_domain)
                {
                  std::ostringstream error_message;
                  error_message
                    << "All elements associated with same root must have "
                    << "same target. during load balancing\n";
                  throw OomphLibError(error_message.str(),
                                      OOMPH_CURRENT_FUNCTION,
                                      OOMPH_EXCEPTION_LOCATION);
                }
              }
#endif
            } // else if (ref_el_pt==0)
          } // if (!ele_pt->is_halo())
        } // for (e < nele)
      } // if (!(sub_mesh_pt!=0))
    } // for (i_mesh < n_mesh)
 
#ifdef PARANOID
    // Have we processed all target domains?
    if (target_domain_for_local_non_halo_element.size() != count_non_halo_el)
    {
      std::ostringstream error_message;
      error_message
        << "Have processed " << count_non_halo_el << " of "
        << target_domain_for_local_non_halo_element.size()
        << " target domains for local non-halo elelemts. \n "
        << "Very Odd -- we do (now) strip out the information for elements\n"
        << "that are removed in actions_before_distribute()...\n";
      throw OomphLibError(
        error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
    }
#endif
 
    // Determine max. refinement level and origin/destination scheme
    // -------------------------------------------------------------
    // for all root/base elements
    // --------------------------
 
    // Allreduce to work out max max refinement level across all processors
    max_refinement_level_overall = 0;
 
    // Only perform this communications if necessary (it means if there
    // are structured meshes as part of the problem)
    if (are_there_structured_meshes)
    {
      MPI_Allreduce(&max_refinement_level,
                    &max_refinement_level_overall,
                    1,
                    MPI_UNSIGNED,
                    MPI_MAX,
                    comm_pt->mpi_comm());
    } // if (are_there_structured_meshes)
 
    // Allreduce to tell everybody about the original and new domains
    // for root elements
    Vector<int> tmp_old_domain_for_base_element(n_base_element);
 
    // Only perform this communications if necessary (it means if there
    // are structured meshes as part of the problem)
    if (are_there_structured_meshes)
    {
      MPI_Allreduce(&old_domain_for_base_element_local[0],
                    &tmp_old_domain_for_base_element[0],
                    n_base_element,
                    MPI_INT,
                    MPI_MAX,
                    comm_pt->mpi_comm());
    } // if (are_there_structured_meshes)
 
    Vector<int> tmp_new_domain_for_base_element(n_base_element);
    // Only perform this communications if necessary (it means if there
    // are structured meshes as part of the problem)
    if (are_there_structured_meshes)
    {
      MPI_Allreduce(&new_domain_for_base_element_local[0],
                    &tmp_new_domain_for_base_element[0],
                    n_base_element,
                    MPI_INT,
                    MPI_MAX,
                    comm_pt->mpi_comm());
    } // if (are_there_structured_meshes)
 
    // Copy across (after optional sanity check)
    old_domain_for_base_element.resize(n_base_element);
    new_domain_for_base_element.resize(n_base_element);
    for (unsigned j = 0; j < n_base_element; j++)
    {
#ifdef PARANOID
      if (tmp_old_domain_for_base_element[j] == -1)
      {
        std::ostringstream error_message;
        error_message << "Old domain for base element " << j << ": "
                      << Base_mesh_element_pt[j]
                      << "or its incarnation as refineable el: "
                      << dynamic_cast<RefineableElement*>(
                           Base_mesh_element_pt[j])
                      << " which is of type "
                      << typeid(*Base_mesh_element_pt[j]).name()
                      << " does not\n"
                      << "appear to have been assigned by any processor\n";
        throw OomphLibError(error_message.str(),
                            OOMPH_CURRENT_FUNCTION,
                            OOMPH_EXCEPTION_LOCATION);
      }
#endif
      old_domain_for_base_element[j] = tmp_old_domain_for_base_element[j];
#ifdef PARANOID
      if (tmp_new_domain_for_base_element[j] == -1)
      {
        std::ostringstream error_message;
        error_message << "New domain for base element " << j
                      << "which is of type "
                      << typeid(*Base_mesh_element_pt[j]).name()
                      << " does not\n"
                      << "appear to have been assigned by any processor\n";
        throw OomphLibError(error_message.str(),
                            OOMPH_CURRENT_FUNCTION,
                            OOMPH_EXCEPTION_LOCATION);
      }
#endif
      new_domain_for_base_element[j] = tmp_new_domain_for_base_element[j];
    }
 
 
    // Loop over all processors and accumulate data to be sent
    //--------------------------------------------------------
    send_data.clear();
 
    // Only do each node once (per processor!)
    Vector<std::map<Node*, bool>> node_done(n_proc);
 
    // Loop over all processors. NOTE: We include current processor
    // since we have to refine local elements too -- store their data
    // in same data structure as the one used for off-processor elements.
    for (int rank = 0; rank < n_proc; rank++)
    {
      // Set the offset for the current processor
      send_displacement[rank] = send_data.size();
 
#ifdef PARANOID
      // Check that total number of elements processed matches those
      // in individual batches
      unsigned total_nel = element_for_processor[rank].size();
#endif
 
      // Counter for number of elements
      unsigned el_count = 0;
 
      // How many baches are there for current rank?
      unsigned nbatch = nelement_batch_for_processor[rank].size();
 
      // Add to vector of doubles to save on number of comms
      send_data.push_back(double(nbatch));
 
      // Loop over batches of elemnts associated with same root
      for (unsigned b = 0; b < nbatch; b++)
      {
        // How many elements are to be sent in this batch?
        unsigned nel = nelement_batch_for_processor[rank][b];
 
        // Get the unique number of the root element in unrefined mesh for
        // all the elements in this batch
        unsigned base_el_no =
          base_element_for_element_batch_for_processor[rank][b];
 
        // Add unsigneds to send data to minimise number of
        // communications
        send_data.push_back(double(nel));
        send_data.push_back(double(base_el_no));
 
        // Loop over batch of elements
        for (unsigned e = 0; e < nel; e++)
        {
          // Get element
          GeneralisedElement* el_pt = element_for_processor[rank][el_count];
 
          // FE?
          FiniteElement* fe_pt = dynamic_cast<FiniteElement*>(el_pt);
          if (fe_pt != 0)
          {
            // Loop over nodes
            unsigned nnod = fe_pt->nnode();
            for (unsigned j = 0; j < nnod; j++)
            {
              Node* nod_pt = fe_pt->node_pt(j);
 
              // Reconstruct the nodal values/position from the node's
              // possible hanging node representation to be on the safe side
              unsigned n_value = nod_pt->nvalue();
              unsigned nt = nod_pt->ntstorage();
              Vector<double> values(n_value);
              unsigned n_dim = nod_pt->ndim();
              Vector<double> position(n_dim);
 
              // Loop over all history values
              for (unsigned t = 0; t < nt; t++)
              {
                nod_pt->value(t, values);
                for (unsigned i = 0; i < n_value; i++)
                {
                  nod_pt->set_value(t, i, values[i]);
                }
                nod_pt->position(t, position);
                for (unsigned i = 0; i < n_dim; i++)
                {
                  nod_pt->x(t, i) = position[i];
                }
              }
 
 
              // Has the node already been done for current rank?
              if (!node_done[rank][nod_pt])
              {
                // Now it has been done
                node_done[rank][nod_pt] = true;
 
                // Store number of values (as double) to allow for resizing
                // before read (req'd in case we store data that
                // got introduced by attaching FaceElements to bulk)
                send_data.push_back(double(n_value));
 
                // Check if it's a boundary node
                BoundaryNodeBase* bnod_pt =
                  dynamic_cast<BoundaryNodeBase*>(nod_pt);
 
                // Not a boundary node
                if (bnod_pt == 0)
                {
#ifdef PARANOID
                  // Record status for checking
                  send_data.push_back(double(0));
#endif
                }
                // Yes it's a boundary node
                else
                {
#ifdef PARANOID
                  // Record status for checking
                  send_data.push_back(double(1));
#endif
                  // Get pointer to the map of indices associated with
                  // additional values created by face elements
                  std::map<unsigned, unsigned>* map_pt =
                    bnod_pt->index_of_first_value_assigned_by_face_element_pt();
 
                  // No additional values created
                  if (map_pt == 0)
                  {
                    send_data.push_back(double(0));
                  }
                  // Created additional values
                  else
                  {
                    // How many?
                    send_data.push_back(double(map_pt->size()));
 
                    // Loop over entries in map and add to send data
                    for (std::map<unsigned, unsigned>::iterator p =
                           map_pt->begin();
                         p != map_pt->end();
                         p++)
                    {
                      send_data.push_back(double((*p).first));
                      send_data.push_back(double((*p).second));
                    }
                  }
                }
 
                // Add the actual values
                nod_pt->add_values_to_vector(send_data);
              }
            }
          }
 
          // Now add internal data
          el_pt->add_internal_data_values_to_vector(send_data);
 
          // Bump up counter in long vector of elements
          el_count++;
        }
      }
 
 
#ifdef PARANOID
      // Check that total number of elements matches the total of those
      // in batches
      if (total_nel != el_count)
      {
        std::ostringstream error_message;
        error_message
          << "total_nel: " << total_nel << " "
          << " doesn't match total number of elements sent in batch: "
          << el_count << "\n";
        throw OomphLibError(error_message.str(),
                            OOMPH_CURRENT_FUNCTION,
                            OOMPH_EXCEPTION_LOCATION);
      }
#endif
 
      // Find the number of data added to the vector
      send_n[rank] = send_data.size() - send_displacement[rank];
    }
  }
 
 
  //==========================================================================
  /// Get flat-packed refinement pattern for each root element in current
  /// mesh (labeled by unique number of root element in unrefined base mesh).
  /// The vector stored for each root element contains the following
  /// information:
  /// - First entry: Number of tree nodes (not just leaves!) in refinement
  ///   tree emanating from this root [Zero if root element is not refineable]
  /// - Loop over all refinement levels
  ///   - Loop over all tree nodes (not just leaves!)
  ///     - If associated element exists when the mesh has been refined to
  ///       this level (either because it has been refined to this level or
  ///       because it's less refined): 1
  ///       - If the element is to be refined: 1; else: 0
  ///     - else (element doesn't exist when mesh is refined to this level
  ///       (because it's more refined): 0
  ///     .
  ///   .
  /// .
  //==========================================================================
  void Problem::get_flat_packed_refinement_pattern_for_load_balancing(
    const Vector<unsigned>& old_domain_for_base_element,
    const Vector<unsigned>& new_domain_for_base_element,
    const unsigned& max_refinement_level_overall,
    std::map<unsigned, Vector<unsigned>>& flat_packed_refinement_info_for_root)
  {
    // Map to store whether the root element has been visited yet
    std::map<RefineableElement*, bool> root_el_done;
 
    // Get the number of submeshs, if there are no submeshes, then
    // increase the counter so that the loop below also work for the only
    // one mesh in the problem
    unsigned n_mesh = nsub_mesh();
    if (n_mesh == 0)
    {
      n_mesh = 1;
    }
    // Go for the nonhalo elements only in the TreeBaseMeshes
    for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
    {
      // Only work with structured
      TriangleMeshBase* sub_mesh_pt =
        dynamic_cast<TriangleMeshBase*>(mesh_pt(i_mesh));
      if (!(sub_mesh_pt != 0))
      {
        const unsigned nele_submesh = mesh_pt(i_mesh)->nelement();
        for (unsigned e = 0; e < nele_submesh; e++)
        {
          // Get pointer to element
          GeneralisedElement* el_pt = mesh_pt(i_mesh)->element_pt(e);
 
          // Ignore halos
          if (!el_pt->is_halo())
          {
            // Is it refineable? No!
            RefineableElement* ref_el_pt =
              dynamic_cast<RefineableElement*>(el_pt);
            if (ref_el_pt == 0)
            {
              // The element is not refineable - stick a zero in refinement_info
              // indicating that there are no tree nodes following
              unsigned e = Base_mesh_element_number_plus_one[el_pt];
#ifdef PARANOID
              if (e == 0)
              {
                throw OomphLibError("Base_mesh_element_number_plus_one[...]=0",
                                    OOMPH_CURRENT_FUNCTION,
                                    OOMPH_EXCEPTION_LOCATION);
              }
#endif
              e -= 1;
              flat_packed_refinement_info_for_root[e].push_back(0);
            }
            // Refineable
            else
            {
              // Get the root element
              RefineableElement* root_el_pt = ref_el_pt->root_element_pt();
 
              // Has this root been visited yet?
              if (!root_el_done[root_el_pt])
              {
                // Get unique number of root element in base mesh
                unsigned root_element_number =
                  Base_mesh_element_number_plus_one[root_el_pt];
 
#ifdef PARANOID
                if (root_element_number == 0)
                {
                  throw OomphLibError(
                    "Base_mesh_element_number_plus_one[...]=0",
                    OOMPH_CURRENT_FUNCTION,
                    OOMPH_EXCEPTION_LOCATION);
                }
#endif
                root_element_number -= 1;
 
                // Get all the nodes associated with this root element
                Vector<Tree*> all_tree_nodes_pt;
                root_el_pt->tree_pt()->stick_all_tree_nodes_into_vector(
                  all_tree_nodes_pt);
 
                // How many tree nodes are there?
                unsigned n_tree_nodes = all_tree_nodes_pt.size();
                flat_packed_refinement_info_for_root[root_element_number]
                  .push_back(n_tree_nodes);
 
                // Loop over all levels
                for (unsigned current_level = 0;
                     current_level < max_refinement_level_overall;
                     current_level++)
                {
                  // Loop over all tree nodes
                  for (unsigned e = 0; e < n_tree_nodes; e++)
                  {
                    // What's the level of this tree node?
                    unsigned level = all_tree_nodes_pt[e]->level();
 
                    // Element exists at this refinement level of the mesh
                    // if it's at this level or it's at a lower level and a leaf
                    if ((level == current_level) ||
                        ((level < current_level) &&
                         (all_tree_nodes_pt[e]->is_leaf())))
                    {
                      flat_packed_refinement_info_for_root[root_element_number]
                        .push_back(1);
 
                      // If it's at this level, and not a leaf, then it will
                      // need to be refined in the new mesh
                      if ((level == current_level) &&
                          (!all_tree_nodes_pt[e]->is_leaf()))
                      {
                        flat_packed_refinement_info_for_root
                          [root_element_number]
                            .push_back(1);
                      }
                      // Element exists at this level and is a leaf so it
                      // doesn't have to be refined
                      else
                      {
                        flat_packed_refinement_info_for_root
                          [root_element_number]
                            .push_back(0);
                      }
                    }
                    // Element does not exist at this level so it doesn't have
                    // to be refined
                    else
                    {
                      flat_packed_refinement_info_for_root[root_element_number]
                        .push_back(0);
                    }
                  }
                }
                // Now we've done it
                root_el_done[root_el_pt] = true;
              }
            }
 
          } // if (!el_pt->is_halo())
        } // for (e < nele_submesh)
      } // if (!(sub_mesh_pt!=0))
    } // for (i_mesh < n_mesh)
  }
 
  //==========================================================================
  /// Load balance helper routine:  Function performs max_level_overall
  /// successive refinements of the problem's mesh(es) using the following
  /// procdure: Given ID of root element, root_element_id, and current
  /// refinement level, level, the e-th entry in
  /// refinement_info_for_root_elements[root_element_id][level][e] is equal
  /// to 2 if the e-th element (using the enumeration when the mesh has been
  /// refined to the level-th level) is to be refined during the next
  /// refinement; it's 1 if it's not to be refined.
  //==========================================================================
  void Problem::refine_distributed_base_mesh(
    Vector<Vector<Vector<unsigned>>>& refinement_info_for_root_elements,
    const unsigned& max_level_overall)
  {
    // Loop over sub meshes
    unsigned n_sub_mesh = nsub_mesh();
    unsigned max_mesh = std::max(n_sub_mesh, unsigned(1));
    for (unsigned i_mesh = 0; i_mesh < max_mesh; i_mesh++)
    {
      // Choose the right mesh
      Mesh* my_mesh_pt = 0;
      if (n_sub_mesh == 0)
      {
        my_mesh_pt = mesh_pt();
      }
      else
      {
        my_mesh_pt = mesh_pt(i_mesh);
      }
 
      // Number of elements on this processor -- currently all elements
      // are "base" elements since the mesh hasn't been refined.
      unsigned n_el_on_this_proc = my_mesh_pt->nelement();
 
      // Storage for actual refinement pattern:
      // to_be_refined_on_this_proc[level][e] contains the element number
      // of the e-th element that is to refined at the level-th refinement level
      Vector<Vector<unsigned>> to_be_refined_on_this_proc(max_level_overall);
 
      // Count, at each level, the total number of elements in the mesh
      // (we can accumulate this because we know that elements are
      // enumerated tree by tree).
      Vector<unsigned> el_count_on_this_proc(max_level_overall, 0);
 
      // Loop over levels where refinement is taking place
      for (unsigned level = 0; level < max_level_overall; level++)
      {
        // Loop over roots = unrefined elements on this processor in order.
        // Note that this loops over the trees in unique order
        for (unsigned e = 0; e < n_el_on_this_proc; e++)
        {
          // Get the (root) element
          FiniteElement* el_pt = my_mesh_pt->finite_element_pt(e);
 
          // What is its unique number in the base mesh
          unsigned root_el_no = Base_mesh_element_number_plus_one[el_pt];
#ifdef PARANOID
          if (root_el_no == 0)
          {
            throw OomphLibError("Base_mesh_element_number_plus_one[...]=0",
                                OOMPH_CURRENT_FUNCTION,
                                OOMPH_EXCEPTION_LOCATION);
          }
#endif
          root_el_no -= 1;
 
          // Number of refinements to be performed starting from current
          // root element
          unsigned n_refinements =
            refinement_info_for_root_elements[root_el_no].size();
 
          // Perform refinement?
          if (level < n_refinements)
          {
            // Loop over elements at this level
            unsigned n_el =
              refinement_info_for_root_elements[root_el_no][level].size();
            for (unsigned ee = 0; ee < n_el; ee++)
            {
              // Refinement code 2: Element is to be refined at this
              // level
              if (refinement_info_for_root_elements[root_el_no][level][ee] == 2)
              {
                to_be_refined_on_this_proc[level].push_back(
                  el_count_on_this_proc[level]);
                el_count_on_this_proc[level]++;
              }
              // Refinement code 1: Element should not be refined at this
              // level -- keep going
              else if (refinement_info_for_root_elements[root_el_no][level]
                                                        [ee] == 1)
              {
                el_count_on_this_proc[level]++;
              }
            }
          }
 
        } // end of loop over elements on proc; all of which should be root
      }
 
      // Now do the actual refinement
      TreeBasedRefineableMeshBase* ref_mesh_pt =
        dynamic_cast<TreeBasedRefineableMeshBase*>(my_mesh_pt);
      if (ref_mesh_pt != 0)
      {
        ref_mesh_pt->refine_base_mesh(to_be_refined_on_this_proc);
      }
    }
 
    // Rebuild global mesh after refinement
    if (n_sub_mesh != 0)
    {
      // Rebuild the global mesh
      rebuild_global_mesh();
    }
  }
 
 
  //====================================================================
  /// Helper function to re-setup the Base_mesh enumeration
  /// (used during load balancing) after pruning.
  //====================================================================
  void Problem::setup_base_mesh_info_after_pruning()
  {
    // Storage for number of processors and current processor
    int n_proc = this->communicator_pt()->nproc();
    int my_rank = this->communicator_pt()->my_rank();
 
    // Loop over sub meshes
    unsigned n_sub_mesh = nsub_mesh();
    unsigned max_mesh = std::max(n_sub_mesh, unsigned(1));
    for (unsigned i_mesh = 0; i_mesh < max_mesh; i_mesh++)
    {
      // Choose the right mesh
      Mesh* my_mesh_pt = 0;
      if (n_sub_mesh == 0)
      {
        my_mesh_pt = mesh_pt();
      }
      else
      {
        my_mesh_pt = mesh_pt(i_mesh);
      }
 
      // Only work with structured meshes
      TriangleMeshBase* sub_mesh_pt =
        dynamic_cast<TriangleMeshBase*>(my_mesh_pt);
      if (!(sub_mesh_pt != 0))
      {
        // Storage for number of data to be sent to each processor
        Vector<int> send_n(n_proc, 0);
 
        // Storage for all values to be sent to all processors
        Vector<unsigned> send_data;
 
        // Start location within send_data for data to be sent to each processor
        Vector<int> send_displacement(n_proc, 0);
 
        // Loop over all processors
        for (int rank = 0; rank < n_proc; rank++)
        {
          // Set the offset for the current processor
          send_displacement[rank] = send_data.size();
 
          // Don't bother to do anything if the processor in the loop is the
          // current processor
          if (rank != my_rank)
          {
            // Get root haloed elements with that processor
            Vector<GeneralisedElement*> root_haloed_elements_pt =
              my_mesh_pt->root_haloed_element_pt(rank);
            unsigned nel = root_haloed_elements_pt.size();
 
            // Store element numbers for send
            for (unsigned e = 0; e < nel; e++)
            {
              GeneralisedElement* el_pt = root_haloed_elements_pt[e];
              send_data.push_back(Base_mesh_element_number_plus_one[el_pt]);
            }
          }
 
          // Find the number of data added to the vector
          send_n[rank] = send_data.size() - send_displacement[rank];
        }
 
        // Storage for the number of data to be received from each processor
        Vector<int> receive_n(n_proc, 0);
 
        // Now send numbers of data to be sent between all processors
        MPI_Alltoall(&send_n[0],
                     1,
                     MPI_INT,
                     &receive_n[0],
                     1,
                     MPI_INT,
                     this->communicator_pt()->mpi_comm());
 
        // We now prepare the data to be received
        // by working out the displacements from the received data
        Vector<int> receive_displacement(n_proc, 0);
        int receive_data_count = 0;
        for (int rank = 0; rank < n_proc; ++rank)
        {
          // Displacement is number of data received so far
          receive_displacement[rank] = receive_data_count;
          receive_data_count += receive_n[rank];
        }
 
        // Now resize the receive buffer for all data from all processors
        // Make sure that it has a size of at least one
        if (receive_data_count == 0)
        {
          ++receive_data_count;
        }
        Vector<unsigned> receive_data(receive_data_count);
 
        // Make sure that the send buffer has size at least one
        // so that we don't get a segmentation fault
        if (send_data.size() == 0)
        {
          send_data.resize(1);
        }
 
        // Now send the data between all the processors
        MPI_Alltoallv(&send_data[0],
                      &send_n[0],
                      &send_displacement[0],
                      MPI_UNSIGNED,
                      &receive_data[0],
                      &receive_n[0],
                      &receive_displacement[0],
                      MPI_UNSIGNED,
                      this->communicator_pt()->mpi_comm());
 
        // Now use the received data to update the halo element numbers in
        // base mesh
        for (int send_rank = 0; send_rank < n_proc; send_rank++)
        {
          // Don't bother to do anything for the processor corresponding to the
          // current processor or if no data were received from this processor
          if ((send_rank != my_rank) && (receive_n[send_rank] != 0))
          {
            // Counter for the data within the large array
            unsigned count = receive_displacement[send_rank];
 
            // Get root halo elements with that processor
            Vector<GeneralisedElement*> root_halo_elements_pt =
              my_mesh_pt->root_halo_element_pt(send_rank);
            unsigned nel = root_halo_elements_pt.size();
 
            // Read in element numbers
            for (unsigned e = 0; e < nel; e++)
            {
              GeneralisedElement* el_pt = root_halo_elements_pt[e];
              unsigned el_number_plus_one = receive_data[count++];
              Base_mesh_element_number_plus_one[el_pt] = el_number_plus_one;
              Base_mesh_element_pt[el_number_plus_one - 1] = el_pt;
            }
          }
 
        } // End of data is received
 
      } // if (!(sub_mesh_pt!=0))
 
    } // for (i_mesh<max_mesh)
  }
 
#endif
 
  /// Instantiation of public flag to allow suppression of warning
  /// messages re reading in unstructured meshes during restart.
  bool Problem::Suppress_warning_about_actions_before_read_unstructured_meshes =
    false;
 
 
} // namespace oomph