partitioning.cc
Go to the documentation of this file.
1// LIC// ====================================================================
2// LIC// This file forms part of oomph-lib, the object-oriented,
3// LIC// multi-physics finite-element library, available
4// LIC// at http://www.oomph-lib.org.
5// LIC//
6// LIC// Copyright (C) 2006-2025 Matthias Heil and Andrew Hazel
7// LIC//
8// LIC// This library is free software; you can redistribute it and/or
9// LIC// modify it under the terms of the GNU Lesser General Public
10// LIC// License as published by the Free Software Foundation; either
11// LIC// version 2.1 of the License, or (at your option) any later version.
12// LIC//
13// LIC// This library is distributed in the hope that it will be useful,
14// LIC// but WITHOUT ANY WARRANTY; without even the implied warranty of
15// LIC// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16// LIC// Lesser General Public License for more details.
17// LIC//
18// LIC// You should have received a copy of the GNU Lesser General Public
19// LIC// License along with this library; if not, write to the Free Software
20// LIC// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21// LIC// 02110-1301 USA.
22// LIC//
23// LIC// The authors may be contacted at oomph-lib@maths.man.ac.uk.
24// LIC//
25// LIC//====================================================================
26#include <float.h>
27
28#include "partitioning.h"
29#include "mesh.h"
30#include "refineable_mesh.h"
31
32// for the new METIS API, need to use symbols defined in the standard header
33// which aren't available in the current frozen (old) version of METIS
34// Version 3 will (presumably) have this header in the include path as standard
35#include "metis.h"
36
37namespace oomph
38{
39 //====================================================================
40 /// Namespace for METIS graph partitioning routines
41 //====================================================================
42 namespace METIS
43 {
44 /// Default function that translates spatial
45 /// error into weight for METIS partitioning (unit weight regardless
46 /// of input).
48 const double& max_error,
49 const double& min_error,
50 int& weight)
51 {
52 weight = 1;
53 }
54
55 /// Function pointer to to function that translates spatial
56 /// error into weight for METIS partitioning.
58
59 } // namespace METIS
60
61
62 //==================================================================
63 /// Partition mesh uniformly by dividing elements
64 /// equally over the partitions, in the order
65 /// in which they are returned by problem.
66 /// On return, element_domain[ielem] contains the number
67 /// of the domain [0,1,...,ndomain-1] to which
68 /// element ielem has been assigned.
69 //==================================================================
71 const unsigned& ndomain,
73 {
74 // Number of elements
75 unsigned nelem = problem_pt->mesh_pt()->nelement();
76
77#ifdef PARANOID
78 if (nelem != element_domain.size())
79 {
80 std::ostringstream error_stream;
81 error_stream << "element_domain Vector has wrong length " << nelem << " "
82 << element_domain.size() << std::endl;
83
84 throw OomphLibError(
86 }
87#endif
88
89
90 // Uniform partitioning
91 unsigned nel_per_domain = int(float(nelem) / float(ndomain));
92 for (unsigned ielem = 0; ielem < nelem; ielem++)
93 {
94 unsigned idomain = unsigned(float(ielem) / float(nel_per_domain));
96 }
97 }
98
99
100 //==================================================================
101 /// Use METIS to assign each element to a domain.
102 /// On return, element_domain[ielem] contains the number
103 /// of the domain [0,1,...,ndomain-1] to which
104 /// element ielem has been assigned.
105 /// - objective=0: minimise edgecut.
106 /// - objective=1: minimise total communications volume.
107 /// .
108 /// Partioning is based on dual graph of mesh.
109 //==================================================================
111 const unsigned& ndomain,
112 const unsigned& objective,
114 {
115 // Global mesh
116 Mesh* mesh_pt = problem_pt->mesh_pt();
117
118 // Number of elements
119 unsigned nelem = mesh_pt->nelement();
120
121#ifdef PARANOID
122 if (nelem != element_domain.size())
123 {
124 std::ostringstream error_stream;
125 error_stream << "element_domain Vector has wrong length " << nelem << " "
126 << element_domain.size() << std::endl;
127
128 throw OomphLibError(
130 }
131#endif
132
133 // Setup dual graph
134 //------------------
135
136 // Start timer
138
139 // Container to collect all elements associated with given global eqn number
140 std::map<unsigned, std::set<unsigned>> elements_connected_with_global_eqn;
141
142 // Container for all unique global eqn numbers
143 std::set<unsigned> all_global_eqns;
144
145 // Loop over all elements
146 for (unsigned e = 0; e < nelem; e++)
147 {
149
150 // Add all global eqn numbers
151 unsigned ndof = el_pt->ndof();
152 for (unsigned j = 0; j < ndof; j++)
153 {
154 // Get global eqn number
155 unsigned eqn_number = el_pt->eqn_number(j);
156 elements_connected_with_global_eqn[eqn_number].insert(e);
157 all_global_eqns.insert(eqn_number);
158 }
159 }
160
161 // Now reverse the lookup scheme to find out all elements
162 // that are connected because they share the same global eqn
164
165 // Counter for total number of entries in connected_elements structure
166 unsigned count = 0;
167
168 // Loop over all global eqns
169 for (std::set<unsigned>::iterator it = all_global_eqns.begin();
170 it != all_global_eqns.end();
171 it++)
172 {
173 // Get set of elements connected with this data item
174 std::set<unsigned> elements = elements_connected_with_global_eqn[*it];
175
176 // Double loop over connnected elements: Everybody's connected to
177 // everybody
178 for (std::set<unsigned>::iterator it1 = elements.begin();
179 it1 != elements.end();
180 it1++)
181 {
182 for (std::set<unsigned>::iterator it2 = elements.begin();
183 it2 != elements.end();
184 it2++)
185 {
186 if ((*it1) != (*it2))
187 {
188 connected_elements[(*it1)].insert(*it2);
189 }
190 }
191 }
192 }
193
194
195 // Now convert into C-style packed array for interface with METIS
196 idx_t* xadj = new idx_t[nelem + 1];
198
199 // Reserve (too much) space
200 adjacency_vector.reserve(count);
201
202 // Initialise counters
203 unsigned ientry = 0;
204
205 // Loop over all elements
206 for (unsigned e = 0; e < nelem; e++)
207 {
208 // First entry for current element
209 xadj[e] = ientry;
210
211 // Loop over elements that are connected to current element
212 typedef std::set<unsigned>::iterator IT;
213 for (IT it = connected_elements[e].begin();
214 it != connected_elements[e].end();
215 it++)
216 {
217 // Copy into adjacency array
218 adjacency_vector.push_back(*it);
219
220 // We've just made another entry
221 ientry++;
222 }
223
224 // Entry after last entry for current element:
225 xadj[e + 1] = ientry;
226 }
227
228 // End timer
230
231 // Doc
234 << "CPU time for setup of METIS data structures [nelem="
235 << nelem << "]: " << cpu0 << " sec" << std::endl;
236
237
238 // If the adjacency vector is empty then the elements are
239 // actually unconnected (can happen in dummy problems where
240 // each element only has internal data). In that case the
241 // partition is irrelevant and we may as well distribute the
242 // elements in round-robin fashion
243 if (adjacency_vector.size() == 0)
244 {
245 unsigned n_proc = problem_pt->communicator_pt()->nproc();
247 << "Note: All elements in the Problem's Mesh appear to be\n"
248 << "unconnected. This happens, e.g. if all elements only have\n"
249 << "internal Data. Bypassing metis and distributing elements\n"
250 << "in round-robin fashion amongst the " << n_proc << " processors."
251 << std::endl;
252 for (unsigned e = 0; e < nelem; e++)
253 {
255 }
256 return;
257 }
258
259
260 // Call METIS graph partitioner
261 //-----------------------------
262
263 // Start timer
264 cpu_start = clock();
265
266 // Number of vertices in graph
267 idx_t nvertex = nelem;
268
269 // No vertex weights
270 idx_t* vwgt = 0;
271
272 // No edge weights
273 idx_t* adjwgt = 0;
274
275 // Flag indicating that graph isn't weighted: 0; vertex weights only: 2
276 // Note that wgtflag==2 requires nodal weights to be stored in vwgt.
277 int wgtflag = 0;
278
279 // Use C-style numbering (first array entry is zero)
280 int numflag = 0;
281
282 // Number of desired partitions
284
285 // Use default options
286 idx_t options[METIS_NOPTIONS];
288
289 switch (objective)
290 {
291 case 0:
292 // Edge-cut minimization
294 break;
295
296 case 1:
297 // communication volume minimisation
299 break;
300
301 default:
302 std::ostringstream error_stream;
303 error_stream << "Wrong objective for METIS. objective = " << objective
304 << std::endl;
305
306 throw OomphLibError(
308 }
309
310 // Number of cut edges in graph
311 idx_t* edgecut = new idx_t[nelem];
312
313 // Array containing the partition information
314 idx_t* part = new idx_t[nelem];
315
316 // Can we get an error estimate?
317
318 unsigned n_mesh = problem_pt->nsub_mesh();
319
320 if (n_mesh == 0)
321 {
322 RefineableMeshBase* mmesh_pt = dynamic_cast<RefineableMeshBase*>(mesh_pt);
323 if (mmesh_pt != 0)
324 {
325 // Bias distribution?
327 {
329 << "Biasing element distribution via spatial error estimate\n";
330
331 // Adjust flag and provide storage for weights
332 wgtflag = 2;
333 vwgt = new idx_t[nelem];
334
335 // Get error for all elements
337 mmesh_pt->spatial_error_estimator_pt()->get_element_errors(
338 mesh_pt, elemental_error);
339
340 double max_error =
341 *(std::max_element(elemental_error.begin(), elemental_error.end()));
342 double min_error =
343 *(std::min_element(elemental_error.begin(), elemental_error.end()));
344
345 // Bias weights
346 int weight = 1;
347 for (unsigned e = 0; e < nelem; e++)
348 {
349 // Translate error into weight
351 elemental_error[e], max_error, min_error, weight);
352 vwgt[e] = weight;
353 }
354 }
355 }
356 }
357 else // There are submeshes
358 {
359 // Are any of the submeshes refineable?
360 bool refineable_submesh_exists = false;
361 // Vector to store "start and end point" for loops in submeshes
363 loop_helper[0] = 0;
364
365 // Loop over submeshes
366 for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
367 {
368 // Store the end of the loop
369 loop_helper[i_mesh + 1] =
370 problem_pt->mesh_pt(i_mesh)->nelement() + loop_helper[i_mesh];
371
373 dynamic_cast<RefineableMeshBase*>(problem_pt->mesh_pt(i_mesh));
374 if (mmesh_pt != 0)
375 {
377 }
378 }
379
380 // If a refineable submesh exists
382 {
383 // Bias distribution?
385 {
387 << "Biasing element distribution via spatial error estimate\n";
388
389 // Adjust flag and provide storage for weights
390 wgtflag = 2;
391 vwgt = new idx_t[nelem];
392
393 // Loop over submeshes
394 for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
395 {
397 dynamic_cast<RefineableMeshBase*>(problem_pt->mesh_pt(i_mesh));
398 if (mmesh_pt != 0)
399 {
400 // Get error for all elements
401 unsigned nsub_elem =
404 mmesh_pt->spatial_error_estimator_pt()->get_element_errors(
405 problem_pt->mesh_pt(i_mesh), elemental_error);
406
407 double max_error = *(std::max_element(elemental_error.begin(),
408 elemental_error.end()));
409 double min_error = *(std::min_element(elemental_error.begin(),
410 elemental_error.end()));
411
412 // Bias weights
413 int weight = 1;
414 unsigned start = loop_helper[i_mesh];
415 unsigned end = loop_helper[i_mesh + 1];
416 for (unsigned e = start; e < end; e++)
417 {
418 unsigned error_index = e - start;
419 // Translate error into weight
421 elemental_error[error_index], max_error, min_error, weight);
422 vwgt[e] = weight;
423 }
424 }
425 else // This mesh is not refineable
426 {
427 // There's no error estimator, so use the default weight
428 int weight = 1;
429 unsigned start = loop_helper[i_mesh];
430 unsigned end = loop_helper[i_mesh + 1];
431 for (unsigned e = start; e < end; e++)
432 {
433 vwgt[e] = weight;
434 }
435 }
436 }
437 }
438 }
439 }
440
441 // If ncon is the number of weights associated with each vertex, the array
442 // vwgt contains n x ncon elements where n is the number of vertices).
443 idx_t ncon = 1;
444 idx_t* vsize = NULL;
445 real_t* tpwgts = NULL;
446 real_t* ubvec = NULL;
447
448 // Call partitioner
449 // METIS_API(int)
450 // METIS_PartGraphKway(idx_t * nvtxs,
451 // idx_t * ncon,
452 // idx_t * xadj,
453 // idx_t * adjncy,
454 // idx_t * vwgt,
455 // idx_t * vsize,
456 // idx_t * adjwgt,
457 // idx_t * nparts,
458 // real_t * tpwgts,
459 // real_t * ubvec,
460 // idx_t * options,
461 // idx_t * edgecut,
462 // idx_t * part);
463 METIS_PartGraphKway(&nvertex,
464 &ncon,
465 xadj,
467 vwgt,
468 vsize,
469 adjwgt,
470 &nparts,
471 tpwgts,
472 ubvec,
473 options,
474 edgecut,
475 part);
476
477#ifdef PARANOID
478 std::vector<bool> done(nparts, false);
479#endif
480
481 // Copy across
482 for (unsigned e = 0; e < nelem; e++)
483 {
485#ifdef PARANOID
486 done[part[e]] = true;
487#endif
488 }
489
490
491#ifdef PARANOID
492 // Check
493 std::ostringstream error_stream;
494 bool shout = false;
495 for (int p = 0; p < nparts; p++)
496 {
497 if (!done[p])
498 {
499 shout = true;
500 error_stream << "No elements on processor " << p
501 << "when trying to partition " << nelem << "elements over "
502 << nparts << " processors!\n";
503 }
504 }
505 if (shout)
506 {
507 throw OomphLibError(
509 }
510#endif
511
512
513 // End timer
514 cpu_end = clock();
515
516 // Doc
519 << "CPU time for METIS mesh partitioning [nelem="
520 << nelem << "]: " << cpu1 << " sec" << std::endl;
521
522
523 // Cleanup
524 delete[] xadj;
525 delete[] part;
526 delete[] edgecut;
527 }
528
529
530#ifdef OOMPH_HAS_MPI
531
532
533 //==================================================================
534 /// Use METIS to assign each element in an already-distributed mesh
535 /// to a domain. On return, element_domain_on_this_proc[e] contains the number
536 /// of the domain [0,1,...,ndomain-1] to which non-halo element e on THE
537 /// CURRENT PROCESSOR ONLY has been assigned. The order of the non-halo
538 /// elements is the same as in the Problem's mesh, with the halo
539 /// elements being skipped.
540 /// Objective:
541 /// - objective=0: minimise edgecut.
542 /// - objective=1: minimise total communications volume.
543 /// .
544 /// The partioning is based on the dof graph of the complete mesh by
545 /// taking into
546 /// account which global equation numbers are affected by each element and
547 /// connecting elements which affect the same global equation number.
548 /// Partitioning is done such that all elements associated with the
549 /// same tree root move together. Non-refineable elements are
550 /// treated as their own root elements. If the optional boolean
551 /// flag is set to true (it defaults to false) each processor
552 /// assigns a dumb-but-repeatable equidistribution of its non-halo
553 /// elements over the domains and outputs the input that would have
554 /// gone into METIS in the file metis_input_for_validation.dat
555 //==================================================================
557 Problem* problem_pt,
558 const unsigned& objective,
560 const bool& bypass_metis)
561 {
562 // Start timer
564
565 // Communicator
566 OomphCommunicator* comm_pt = problem_pt->communicator_pt();
567
568 // Number of processors / domains
569 unsigned n_proc = comm_pt->nproc();
570 unsigned my_rank = comm_pt->my_rank();
571
572 // Global mesh
573 Mesh* mesh_pt = problem_pt->mesh_pt();
574
575 // Total number of elements (halo and nonhalo) on this proc
576 unsigned n_elem = mesh_pt->nelement();
577
578 // Get elemental assembly times
579 Vector<double> elemental_assembly_time =
580 problem_pt->elemental_assembly_time();
581
582#ifdef PARANOID
583 unsigned n = elemental_assembly_time.size();
584 if ((n != 0) && (n != n_elem))
585 {
586 std::ostringstream error_stream;
587 error_stream << "Number of elements doesn't match the \n"
588 << "number of elemental assembly times: " << n_elem << " "
589 << n << std::endl;
590 throw OomphLibError(
592 }
593#endif
594
595 // Can we base load balancing on assembly times?
597 if (elemental_assembly_time.size() != 0)
598 {
600 }
601
602 // Storage for global eqn numbers on current processor
603 std::set<unsigned> global_eqns_on_this_proc;
604
605 // Storage for pointers to root elements that are connected with given
606 // eqn number -- assembled on local processor
607 std::map<unsigned, std::set<GeneralisedElement*>>
609
610 // Storage for long sequence of equation numbers as encountered
611 // by the root elements on this processor
613
614 // Reserve number of elements x average/estimate (?) for number of dofs
615 // per element
617
618 // Storage for the number of eqn numbers associated with each
619 // root element on this processors -- once this and the previous
620 // container have been collected from all processors we're
621 // able to reconstruct which root element (in the nominal "global" mesh)
622 // is connected with which global equations
625
626 // Ditto for number of "leaf" elements connected with each root
629
630 // Ditto for total assembly time of "leaf" elements connected with each root
633
634 // Map storing the number of the root elements on this processor
635 // (offset by one to bypass the zero default).
636 std::map<GeneralisedElement*, unsigned> root_el_number_plus_one;
637
638 // Loop over non-halo elements on this processor
640 unsigned number_of_non_halo_elements = 0;
641 for (unsigned e = 0; e < n_elem; e++)
642 {
643 double el_assembly_time = 0.0;
645 if (!el_pt->is_halo())
646 {
648 {
649 el_assembly_time = elemental_assembly_time[e];
650 }
651
652 // Get the associated root element which is either...
655 if (ref_el_pt != 0)
656 {
657 //...the actual root element
658 root_el_pt = ref_el_pt->root_element_pt();
659 }
660 // ...or the element itself
661 else
662 {
664 }
665
666 // Have we already encountered this root element?
667 // (offset of one to bypass the default return of zero)
668 bool already_encountered = false;
671 {
672 // This is a new one
673 already_encountered = false;
674
675 // Give it a number
678
679 // Remove offset
681 }
682 else
683 {
684 // We've already visited this one before...
685 already_encountered = true;
686
687 // Remove offset
688 root_el_number -= 1;
689 }
690
691
692 // Get global equation numbers of actual element
693 unsigned n_dof = el_pt->ndof();
694 for (unsigned i = 0; i < n_dof; i++)
695 {
696 unsigned eqn_no = el_pt->eqn_number(i);
697
698 // Record which root elements are connected with this eqn number
700 root_el_pt);
701
702 // Record all global eqn numbers on this processor
704
705 // Add eqn number of the current element to the long sequence
706 // of eqn numbers
708 }
709
710 // Now record how many equations are associated with the current
711 // non-halo element
713 {
718 }
719 else
720 {
724 }
725
726 // Bump up number of non-halos
728 }
729 }
730
731 // Tell everybody how many root elements
732 // are on each processor
733 unsigned root_processor = 0;
736 1,
737 MPI_INT,
739 1,
740 MPI_INT,
741 comm_pt->mpi_comm());
742
743
744 // In the big sequence of concatenated root elements (enumerated
745 // individually on the various processors) where do the root elements from a
746 // given processor start? Also figure out how many root elements there are
747 // in total by summing up their numbers
750 for (unsigned i_proc = 0; i_proc < n_proc; i_proc++)
751 {
754 if (i_proc != 0)
755 {
758 }
759 else
760 {
761 start_index[0] = 0;
762 }
763 }
764
765
766 // How many global equations are held on this processor?
769
770 // Gather this information for all processors:
771 // n_eqns_on_each_proc[iproc] now contains the number of global
772 // equations held on processor iproc.
775 1,
776 MPI_INT,
778 1,
779 MPI_INT,
780 comm_pt->mpi_comm());
781
782
783 // In the big sequence of equation numbers from the root elements
784 // (enumerated individually on the various processors) where do the
785 // equation numbers associated with the root elements from a given
786 // processor start? Also figure out how long the sequence of equation
787 // numbers is
789 unsigned total_n_eqn = 0;
790 for (unsigned i_proc = 0; i_proc < n_proc; i_proc++)
791 {
793 if (i_proc != 0)
794 {
796 }
797 else
798 {
799 start_eqns_index[0] = 0;
800 }
801 }
802
803
804 // Big vector that contains the number of dofs for each root element
805 // (concatenated in processor-by-processor order)
808 // Create at least one entry so we don't get a seg fault below
810 {
812 }
814 &number_of_dofs_for_root_element[0], // pointer to first entry in
815 // vector to be gathered on root
816 number_of_root_elements, // Number of entries to be sent
817 // from current processor
819 &number_of_dofs_for_global_root_element[0], // Target -- this will
820 // store the concatenated
821 // vectors sent from
822 // everywhere
823 &number_of_root_elements_on_each_proc[0], // Pointer to
824 // vector containing
825 // the length of the
826 // vectors received
827 // from elsewhere
828 &start_index[0], // "offset" for storage of vector received
829 // from various processors in the global
830 // concatenated vector stored on root
833 comm_pt->mpi_comm());
834
835
836 // ditto for number of non-halo elements associated with root element
839
840 // Create at least one entry so we don't get a seg fault below
842 {
844 }
846 // pointer to first entry in
847 // vector to be gathered on root
848 number_of_root_elements, // Number of entries to be sent
849 // from current processor
852 // Target -- this will
853 // store the concatenated
854 // vectors sent from
855 // everywhere
856 &number_of_root_elements_on_each_proc[0], // Pointer to
857 // vector containing
858 // the length of the
859 // vectors received
860 // from elsewhere
861 &start_index[0], // "offset" for storage of vector received
862 // from various processors in the global
863 // concatenated vector stored on root
866 comm_pt->mpi_comm());
867
868
869 // ditto for assembly times elements associated with root element
872
873 // Create at least one entry so we don't get a seg fault below
875 {
877 }
879 // pointer to first entry in
880 // vector to be gathered on root
881 number_of_root_elements, // Number of entries to be sent
882 // from current processor
885 // Target -- this will
886 // store the concatenated
887 // vectors sent from
888 // everywhere
889 &number_of_root_elements_on_each_proc[0], // Pointer to
890 // vector containing
891 // the length of the
892 // vectors received
893 // from elsewhere
894 &start_index[0], // "offset" for storage of vector received
895 // from various processors in the global
896 // concatenated vector stored on root
899 comm_pt->mpi_comm());
900
901
902 // Big vector to store the long sequence of global equation numbers
903 // associated with the long sequence of root elements
905
906 // Create at least one entry so we don't get a seg fault below
908 {
910 }
919 comm_pt->mpi_comm());
920
921 // Doc
923
926 << "CPU time for global setup of METIS data structures [nroot_elem="
927 << total_number_of_root_elements << "]: " << cpu0 << " sec" << std::endl;
928
929
930 // Now the root processor has gathered all the data needed to establish
931 // the root element connectivity (as in the serial case) so use METIS
932 // to determine "partitioning" for non-uniformly refined mesh
933 //----------------------------------------------------------------------
934
935 // Vector to store target domain for each of the root elements (concatenated
936 // in processor-by-processor order)
938 if (my_rank == root_processor) //--
939 {
940 // Start timer
942
943 // Repeat the steps used in the serial code: Storage for
944 // the global equations (on root processor)
945 std::set<unsigned> all_global_eqns_root_processor;
946
947 // Set of root elements (as enumerated in the processor-by-processor
948 // order) associated with given global equation number
949 std::map<unsigned, std::set<unsigned>>
951
952 // Retrace the steps of the serial code: Who's connected with who
953 unsigned count_all = 0;
954 for (unsigned e = 0; e < total_number_of_root_elements; e++)
955 {
957 for (unsigned n = 0; n < n_eqn_no; n++)
958 {
960 count_all++;
962 .insert(e);
964 }
965 }
966
967 // Number of domains
968 unsigned ndomain = n_proc;
969
970 // Now reverse the lookup scheme to find out all root elements
971 // that are connected because they share the same global eqn
974
975 // Counter for total number of entries in connected_root_elements
976 // structure
977 unsigned count = 0;
978
979 // Loop over all global eqns
980 for (std::set<unsigned>::iterator it =
983 it++)
984 {
985 // Get set of root elements connected with this data item
986 std::set<unsigned> root_elements =
988
989 // Double loop over connnected root elements: Everybody's connected to
990 // everybody
991 for (std::set<unsigned>::iterator it1 = root_elements.begin();
992 it1 != root_elements.end();
993 it1++)
994 {
995 for (std::set<unsigned>::iterator it2 = root_elements.begin();
996 it2 != root_elements.end();
997 it2++)
998 {
999 if ((*it1) != (*it2))
1000 {
1001 connected_root_elements[(*it1)].insert(*it2);
1002 }
1003 }
1004 }
1005 }
1006
1007 // End timer
1008 clock_t cpu_end = clock();
1009
1010 // Doc
1012 oomph_info << "CPU time for setup of connected elements (load balance) "
1013 "[nroot_elem="
1014 << total_number_of_root_elements << "]: " << cpu0b << " sec"
1015 << std::endl;
1016
1017 // Now convert into C-style packed array for interface with METIS
1018 cpu_start = clock();
1021
1022 // Reserve (too much) space
1023 adjacency_vector.reserve(count);
1024
1025 // Initialise counters
1026 unsigned ientry = 0;
1027
1028 // Loop over all elements
1029 for (unsigned e = 0; e < total_number_of_root_elements; e++)
1030 {
1031 // First entry for current element
1032 xadj[e] = ientry;
1033
1034 // Loop over elements that are connected to current element
1035 typedef std::set<unsigned>::iterator IT;
1037 it != connected_root_elements[e].end();
1038 it++)
1039 {
1040 // Copy into adjacency array
1041 adjacency_vector.push_back(*it);
1042
1043 // We've just made another entry
1044 ientry++;
1045 }
1046
1047 // Entry after last entry for current element:
1048 xadj[e + 1] = ientry;
1049 }
1050
1051 // End timer
1052 cpu_end = clock();
1053
1054 // Doc
1056 oomph_info << "CPU time for setup of METIS data structures (load "
1057 "balance) [nroot_elem="
1058 << total_number_of_root_elements << "]: " << cpu0 << " sec"
1059 << std::endl;
1060
1061 // Call METIS graph partitioner
1062 //-----------------------------
1063
1064 // Start timer
1065 cpu_start = clock();
1066
1067 // Number of vertices in graph
1069
1070 // No vertex weights
1071 idx_t* vwgt = 0;
1072
1073 // No edge weights
1074 idx_t* adjwgt = 0;
1075
1076 // Flag indicating that graph isn't weighted: 0; vertex weights only: 2
1077 // Note that wgtflag==2 requires nodal weights to be stored in vwgt.
1078 int wgtflag = 0;
1079
1080 // Use C-style numbering (first array entry is zero)
1081 int numflag = 0;
1082
1083 // Number of desired partitions
1085
1086 // Use default options
1087 idx_t options[METIS_NOPTIONS];
1088 METIS_SetDefaultOptions(options);
1089
1090
1091 switch (objective)
1092 {
1093 case 0:
1094 // Edge-cut minimization
1096 break;
1097
1098 case 1:
1099 // communication volume minimisation
1101 break;
1102
1103 default:
1104 std::ostringstream error_stream;
1105 error_stream << "Wrong objective for METIS. objective = " << objective
1106 << std::endl;
1107
1108 throw OomphLibError(error_stream.str(),
1111 }
1112
1113 // Number of cut edges in graph
1115
1116 // Array containing the partition information
1118
1119 // Now bias distribution by giving each root element
1120 // a weight equal to the number of elements associated with it
1121
1122 // Adjust flag and provide storage for weights
1123 wgtflag = 2;
1125
1126
1127 // Load balance based on assembly times of all leaf
1128 // elements associated with root
1130 {
1131 oomph_info << "Basing distribution on assembly times of elements\n";
1132
1133 // Normalise
1134 double min_time = *(
1135 std::min_element(total_assembly_time_for_global_root_element.begin(),
1137#ifdef PARANOID
1138 if (min_time == 0.0)
1139 {
1140 std::ostringstream error_stream;
1141 error_stream << "Minimum assemble time for element is zero!\n";
1142 throw OomphLibError(error_stream.str(),
1145 }
1146#endif
1147
1148 // Bypass METIS (usually for validation) and use made-up but
1149 // repeatable timings
1150 if (bypass_metis)
1151 {
1152 for (unsigned e = 0; e < total_number_of_root_elements; e++)
1153 {
1154 vwgt[e] = e;
1155 }
1156 }
1157 else
1158 {
1159 for (unsigned e = 0; e < total_number_of_root_elements; e++)
1160 {
1161 // Use assembly times (relative to minimum) as weight
1162 vwgt[e] =
1164 }
1165 }
1166 }
1167 // Load balanced based on number of leaf elements associated with
1168 // root
1169 else
1170 {
1171 oomph_info << "Basing distribution on number of elements\n";
1172 for (unsigned e = 0; e < total_number_of_root_elements; e++)
1173 {
1175 }
1176 }
1177
1178 // Bypass METIS (usually for validation)
1179 if (bypass_metis)
1180 {
1181 // Simple repeatable partition: Equidistribute root element
1182 for (unsigned e = 0; e < total_number_of_root_elements; e++)
1183 {
1184 // Simple repeatable partition: Equidistribute elements on each
1185 // processor
1186 part[e] = (n_proc - 1) -
1187 unsigned(double(e) / double(total_number_of_root_elements) *
1188 double(n_proc));
1189 }
1190
1192 << "Bypassing METIS for validation purposes.\n"
1193 << "Appending input for metis in metis_input_for_validation.dat\n";
1194 std::ofstream outfile;
1195 outfile.open("metis_input_for_validation.dat", std::ios_base::app);
1196
1197 // Dump out relevant input to metis
1198 for (unsigned e = 0; e < total_number_of_root_elements + 1; e++)
1199 {
1200 outfile << xadj[e] << std::endl;
1201 }
1202 unsigned n = adjacency_vector.size();
1203 for (unsigned i = 0; i < n; i++)
1204 {
1205 outfile << adjacency_vector[i] << std::endl;
1206 }
1207 for (unsigned e = 0; e < total_number_of_root_elements; e++)
1208 {
1209 outfile << vwgt[e] << std::endl;
1210 }
1211 outfile.close();
1212 }
1213 // Actually use METIS (good but not always repeatable!)
1214 else
1215 {
1216 // METIS_PartGraphKway(&nvertex,
1217 // xadj,
1218 // &adjacency_vector[0],
1219 // vwgt,
1220 // adjwgt,
1221 // &wgtflag,
1222 // &numflag,
1223 // &nparts,
1224 // options,
1225 // edgecut,
1226 // part);
1227
1228 // If ncon is the number of weights associated with each vertex, the
1229 // array vwgt contains n x ncon elements where n is the number of
1230 // vertices).
1231 idx_t ncon = 1;
1232 idx_t* vsize = NULL;
1233 real_t* tpwgts = NULL;
1234 real_t* ubvec = NULL;
1235
1236 // Call partitioner
1237 // METIS_API(int)
1238 // METIS_PartGraphKway(idx_t * nvtxs,
1239 // idx_t * ncon,
1240 // idx_t * xadj,
1241 // idx_t * adjncy,
1242 // idx_t * vwgt,
1243 // idx_t * vsize,
1244 // idx_t * adjwgt,
1245 // idx_t * nparts,
1246 // real_t * tpwgts,
1247 // real_t * ubvec,
1248 // idx_t * options,
1249 // idx_t * edgecut,
1250 // idx_t * part);
1251 METIS_PartGraphKway(&nvertex,
1252 &ncon,
1253 xadj,
1254 &adjacency_vector[0],
1255 vwgt,
1256 vsize,
1257 adjwgt,
1258 &nparts,
1259 tpwgts,
1260 ubvec,
1261 options,
1262 edgecut,
1263 part);
1264 }
1265
1266 // Copy across
1268 for (unsigned e = 0; e < total_number_of_root_elements; e++)
1269 {
1272 }
1273
1274 // Document success of partitioning
1275 for (unsigned j = 0; j < n_proc; j++)
1276 {
1277 oomph_info << "Total weight on proc " << j << " is "
1278 << total_weight_on_proc[j] << std::endl;
1279 }
1280
1281 // Doc
1283 oomph_info << "CPU time for METIS mesh partitioning [nroot_elem="
1284 << total_number_of_root_elements << "]: " << cpu1 << " sec"
1285 << std::endl;
1286
1287 // Cleanup
1288 delete[] xadj;
1289 delete[] part;
1290 delete[] vwgt;
1291 delete[] edgecut;
1292 }
1293
1294 // Now scatter things back to processors: root_element_domain[] contains
1295 // the target domain for all elements (concatenated in processor-by
1296 // processor order on the root processor). Distribute this back
1297 // to the processors so that root_element_domain_on_this_proc[e] contains
1298 // the target domain for root element e (in whatever order the processor
1299 // decided to line up its root elements).
1300 cpu_start = clock();
1302
1303 // Create at least one entry so we don't get a seg fault below
1305 {
1307 }
1310 &start_index[0],
1316 comm_pt->mpi_comm());
1317
1318
1319 // Now translate back into target domain for the actual (non-root)
1320 // elements
1322 unsigned count_non_halo = 0;
1323 for (unsigned e = 0; e < n_elem; e++)
1324 {
1325 GeneralisedElement* el_pt = mesh_pt->element_pt(e);
1326 if (!el_pt->is_halo())
1327 {
1328 // Get the associated root element which is either...
1331 if (ref_el_pt != 0)
1332 {
1333 //...the actual root element
1334 root_el_pt = ref_el_pt->root_element_pt();
1335 }
1336 // ...or the element itself
1337 else
1338 {
1339 root_el_pt = el_pt;
1340 }
1341
1342 // Recover the root element number (offset by one)
1344
1345 // Copy target domain across from root element
1348
1349 // Bump up counter for non-root elements
1351 }
1352 }
1353
1354
1355#ifdef PARANOID
1357 {
1358 std::ostringstream error_stream;
1359 error_stream << "Non-halo counts don't match: " << count_non_halo << " "
1360 << number_of_non_halo_elements << std::endl;
1361
1362 throw OomphLibError(
1364 }
1365#endif
1366
1367 // End timer
1368 cpu_end = clock();
1369
1370 // Doc
1372 oomph_info << "CPU time for communication of partition to all processors "
1373 "[nroot_elem="
1374 << total_number_of_root_elements << "]: " << cpu2 << " sec"
1375 << std::endl;
1376 }
1377
1378
1379#endif
1380
1381} // namespace oomph
e
Definition cfortran.h:571
cstr elem_len * i
Definition cfortran.h:603
double size() const
Calculate the size of the element (length, area, volume,...) in Eulerian computational coordinates....
Definition elements.cc:4320
A Generalised Element class.
Definition elements.h:73
bool is_halo() const
Is this element a halo?
Definition elements.h:1150
unsigned ndof() const
Return the number of equations/dofs in the element.
Definition elements.h:822
unsigned long eqn_number(const unsigned &ieqn_local) const
Return the global equation number corresponding to the ieqn_local-th local equation number.
Definition elements.h:691
A general mesh class.
Definition mesh.h:67
GeneralisedElement *& element_pt(const unsigned long &e)
Return pointer to element e.
Definition mesh.h:452
unsigned long nelement() const
Return number of elements in the mesh.
Definition mesh.h:598
An oomph-lib wrapper to the MPI_Comm communicator object. Just contains an MPI_Comm object (which is ...
An OomphLibError object which should be thrown when an run-time error is encountered....
////////////////////////////////////////////////////////////////// //////////////////////////////////...
Definition problem.h:154
OomphCommunicator * communicator_pt()
access function to the oomph-lib communicator
Definition problem.h:1266
Vector< double > elemental_assembly_time()
Return vector of most-recent elemental assembly times (used for load balancing). Zero sized if no Jac...
Definition problem.h:884
unsigned nsub_mesh() const
Return number of submeshes.
Definition problem.h:1343
Mesh *& mesh_pt()
Return a pointer to the global mesh.
Definition problem.h:1300
RefineableElements are FiniteElements that may be subdivided into children to provide a better local ...
Base class for refineable meshes. Provides standardised interfaces for the following standard mesh ad...
TAdvectionDiffusionReactionElement<NREAGENT,DIM,NNODE_1D> elements are isoparametric triangular DIM-d...
void uniform_partition_mesh(Problem *problem_pt, const unsigned &ndomain, Vector< unsigned > &element_domain)
Partition mesh uniformly by dividing elements equally over the partitions, in the order in which they...
void partition_distributed_mesh(Problem *problem_pt, const unsigned &objective, Vector< unsigned > &element_domain_on_this_proc, const bool &bypass_metis=false)
Use METIS to assign each element in an already-distributed mesh to a domain. On return,...
void partition_mesh(Problem *problem_pt, const unsigned &ndomain, const unsigned &objective, Vector< unsigned > &element_domain)
Use METIS to assign each element to a domain. On return, element_domain[ielem] contains the number of...
void(* ErrorToWeightFctPt)(const double &spatial_error, const double &max_error, const double &min_error, int &weight)
Typedef for function pointer to to function that translates spatial error into weight for METIS parti...
ErrorToWeightFctPt Error_to_weight_fct_pt
Function pointer to to function that translates spatial error into weight for METIS partitioning.
void default_error_to_weight_fct(const double &spatial_error, const double &max_error, const double &min_error, int &weight)
Default function that translates spatial error into weight for METIS partitioning (unit weight regard...
DRAIG: Change all instances of (SPATIAL_DIM) to (DIM-1).
OomphInfo oomph_info
Single (global) instantiation of the OomphInfo object – this is used throughout the library as a "rep...