preconditioner_array.cc
Go to the documentation of this file.
1// LIC// ====================================================================
2// LIC// This file forms part of oomph-lib, the object-oriented,
3// LIC// multi-physics finite-element library, available
4// LIC// at http://www.oomph-lib.org.
5// LIC//
6// LIC// Copyright (C) 2006-2025 Matthias Heil and Andrew Hazel
7// LIC//
8// LIC// This library is free software; you can redistribute it and/or
9// LIC// modify it under the terms of the GNU Lesser General Public
10// LIC// License as published by the Free Software Foundation; either
11// LIC// version 2.1 of the License, or (at your option) any later version.
12// LIC//
13// LIC// This library is distributed in the hope that it will be useful,
14// LIC// but WITHOUT ANY WARRANTY; without even the implied warranty of
15// LIC// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16// LIC// Lesser General Public License for more details.
17// LIC//
18// LIC// You should have received a copy of the GNU Lesser General Public
19// LIC// License along with this library; if not, write to the Free Software
20// LIC// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21// LIC// 02110-1301 USA.
22// LIC//
23// LIC// The authors may be contacted at oomph-lib@maths.man.ac.uk.
24// LIC//
25// LIC//====================================================================
26
27// Config header
28#ifdef HAVE_CONFIG_H
29#include <oomph-lib-config.h>
30#endif
31
32// Preconditioner array is only useful if we have mpi, otherwise a dummy
33// implmentation is used and this file doesn't need to implement anything
34// (see the header file).
35#ifdef OOMPH_HAS_MPI
36
37// oomph-lib includes
39
40namespace oomph
41{
42 //============================================================================
43 /// Setup the preconditioners. Sets up each preconditioner in the
44 /// array for the corresponding matrix in the vector matrix_pt.
45 /// The number of preconditioners in the array is taken to be the length of
46 /// prec_pt.
47 //============================================================================
49 Vector<CRDoubleMatrix*> matrix_pt,
50 Vector<Preconditioner*> prec_pt,
51 const OomphCommunicator* comm_pt)
52 {
53 // clean memory
54 this->clean_up_memory();
55
56 // get the number of preconditioners in the array
57 Nprec = prec_pt.size();
58
59#ifdef PARANOID
60 // check that the preconditioners have been set
61 if (Nprec < 2)
62 {
63 std::ostringstream error_message;
64 error_message << "The PreconditionerArray requires at least 2 "
65 << "preconditioners";
66 throw OomphLibError(
67 error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
68 }
69 // first check that the vector matrix_pt is the correct length
70 if (matrix_pt.size() != Nprec)
71 {
72 std::ostringstream error_message;
73 error_message << "The same number of preconditioners and matrices must "
74 << "be passed to the setup_preconditioners(...).";
75 throw OomphLibError(
76 error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
77 }
78
79 // Resize the storage of the PARANOID check distributions
80 // Already cleared by clean_up_memory call at top of function
81 Distribution_pt.resize(Nprec);
82#endif
83
84 // for each matrix... PARANOID and store copy of global communicator
85 for (unsigned i = 0; i < Nprec; i++)
86 {
87#ifdef PARANOID
88 // paranoid check that each matrix is a CRDoubleMatrix and that
89 // it is built
90 if (matrix_pt[i] == 0)
91 {
92 std::ostringstream error_message;
93 error_message << "matrix_pt[" << i << "] = NULL.";
94 throw OomphLibError(error_message.str(),
95 OOMPH_CURRENT_FUNCTION,
96 OOMPH_EXCEPTION_LOCATION);
97 }
98
99 // check the matrix has been built
100 if (!matrix_pt[i]->built())
101 {
102 std::ostringstream error_message;
103 error_message << "Matrix " << i << " has not been built.";
104 throw OomphLibError(error_message.str(),
105 OOMPH_CURRENT_FUNCTION,
106 OOMPH_EXCEPTION_LOCATION);
107 }
108#endif
109
110 // check that all the matrices have the same communicator
111 // and store a copy of the communicator
112 if (i == 0)
113 {
114 Global_communicator_pt = new OomphCommunicator(
115 matrix_pt[i]->distribution_pt()->communicator_pt());
116 }
117
118#ifdef PARANOID
119 else
120 {
122 *matrix_pt[i]->distribution_pt()->communicator_pt())
123 {
124 std::ostringstream error_message;
125 error_message << "All matrices must have the same communicator.";
126 throw OomphLibError(error_message.str(),
127 OOMPH_CURRENT_FUNCTION,
128 OOMPH_EXCEPTION_LOCATION);
129 }
130 }
131
132 // store a copy of the Distribution of each preconditioner for future
133 // PARANOID checks
135 new LinearAlgebraDistribution(matrix_pt[i]->distribution_pt());
136#endif
137 }
138
139 // number of processors
140 unsigned nproc = Global_communicator_pt->nproc();
141
142 // next compute the distribution of the preconditioner over the processors
143 // such that each preconditioner has an (as to near to) equal number of
144 // processors
146 Nproc_for_prec.resize(Nprec);
147
148 // compute first row
149 for (unsigned p = 0; p < Nprec; p++)
150 {
151 First_proc_for_prec[p] = unsigned(double(p * nproc) / double(Nprec));
152 }
153
154 // compute local nrow
155 for (unsigned p = 0; p < Nprec - 1; p++)
156 {
158 }
159 Nproc_for_prec[Nprec - 1] = nproc - First_proc_for_prec[Nprec - 1];
160
161#ifdef PARANOID
162 // paranoid check that every preconditioner has more than one processor
163 for (unsigned p = 0; p < Nprec; p++)
164 {
165 if (Nproc_for_prec[p] == 0)
166 {
167 std::ostringstream error_message;
168 error_message << "We only have " << nproc << " processor[s]!\n"
169 << "This is not enough to perform the " << Nprec
170 << " block solves in parallel! Sorry! \n"
171 << "Please run this with more processors or disable the\n"
172 << "request for two-level paralellism.\n";
173 throw OomphLibError(error_message.str(),
174 OOMPH_CURRENT_FUNCTION,
175 OOMPH_EXCEPTION_LOCATION);
176 }
177 }
178#endif
179
180 // compute the color of this processor
181 Color = 0;
182 unsigned my_rank = Global_communicator_pt->my_rank();
183 while (!(First_proc_for_prec[Color] <= my_rank &&
185 {
186 Color++;
187 }
188
189 // create the local preconditioner
191
192 // pointer for the local matrix on this processor
193 CRDoubleMatrix* local_matrix_pt = 0;
194
195 // resize storage for details of the data to be sent and received
200
201 // Vector of MPI_Requests - used for distributed matrices
202 Vector<MPI_Request> req;
203
204 // Counter for the number of requests used
205 unsigned c = 0;
206
207 // storage for the target distribution
208 Vector<Vector<unsigned>> target_first_row(Nprec);
209 Vector<Vector<unsigned>> target_nrow_local(Nprec);
210
211 // create storage for the nnz to be sent and received for each
212 // preconditioner
213 Vector<Vector<unsigned>> nnz_send(Nprec);
214 Vector<Vector<unsigned>> nnz_recv(Nprec);
215
216
217 /////////////////////////////////////////////////////////////////////////////
218 /////////////////////////////////////////////////////////////////////////////
219 /////////////////////////////////////////////////////////////////////////////
220 /////////////////////////////////////////////////////////////////////////////
221
222
223 // METHOD 0
224 if (Method == 0)
225 {
226 // for every matrix we assemble the duplicate of the matrix on fewer
227 // processors and setup the preconditioner
228 for (unsigned i = 0; i < Nprec; i++)
229 {
230 // if the matrix is global (!distributed) then just construct a copy
231 // on the subset of processors
232 if (matrix_pt[i]->distributed())
233 {
234 // first compute the distribution of this preconditioner on its subset
235 // of processors
236
237 // number of rows for this preconditioner
238 unsigned nrow = matrix_pt[i]->nrow();
239
240 // setup First_row_for_local_prec and Nrow_local_for_local_prec
241 target_first_row[i].resize(nproc);
242 target_nrow_local[i].resize(nproc);
243 unsigned nproc_local = Nproc_for_prec[i];
244 for (unsigned p = 0; p < nproc_local; p++)
245 {
246 int pp = First_proc_for_prec[i] + p;
247 target_first_row[i][pp] =
248 unsigned(double(p * nrow) / double(nproc_local));
249 }
250 for (unsigned p = 0; p < nproc_local - 1; p++)
251 {
252 int pp = First_proc_for_prec[i] + p;
253 target_nrow_local[i][pp] =
254 target_first_row[i][pp + 1] - target_first_row[i][pp];
255 }
256 unsigned last_local_proc = First_proc_for_prec[i] + nproc_local - 1;
257 target_nrow_local[i][last_local_proc] =
258 nrow - target_first_row[i][last_local_proc];
259
260 // get the details of the current distribution
261 Vector<unsigned> current_first_row(nproc);
262 Vector<unsigned> current_nrow_local(nproc);
263 for (unsigned p = 0; p < nproc; p++)
264 {
265 current_first_row[p] = matrix_pt[i]->first_row(p);
266 current_nrow_local[p] = matrix_pt[i]->nrow_local(p);
267 }
268
269 // resize storage for details of the data to be sent and received
270 First_row_for_proc[i].resize(nproc, 0);
271 Nrow_local_for_proc[i].resize(nproc, 0);
272 First_row_from_proc[i].resize(nproc, 0);
273 Nrow_local_from_proc[i].resize(nproc, 0);
274
275 // for every processor compute first_row and nrow_local that will
276 // will sent and received by this processor
277 for (unsigned p = 0; p < nproc; p++)
278 {
279 // start with data to be sent
280 if ((target_first_row[i][p] <
281 (current_first_row[my_rank] + current_nrow_local[my_rank])) &&
282 (current_first_row[my_rank] <
283 (target_first_row[i][p] + target_nrow_local[i][p])))
284 {
286 std::max(current_first_row[my_rank], target_first_row[i][p]);
288 std::min(
289 (current_first_row[my_rank] + current_nrow_local[my_rank]),
290 (target_first_row[i][p] + target_nrow_local[i][p])) -
292 }
293
294 // and data to be received
295 if ((target_first_row[i][my_rank] <
296 (current_first_row[p] + current_nrow_local[p])) &&
297 (current_first_row[p] < (target_first_row[i][my_rank] +
298 target_nrow_local[i][my_rank])))
299 {
301 std::max(current_first_row[p], target_first_row[i][my_rank]);
303 std::min((current_first_row[p] + current_nrow_local[p]),
304 (target_first_row[i][my_rank] +
305 target_nrow_local[i][my_rank])) -
307 }
308 }
309
310 // resize nnz_send
311 nnz_send[i].resize(nproc);
312
313 // compute the number of nnzs to be sent
314 // and the number of send and receive requests to be made (nreq)
315 for (unsigned p = 0; p < nproc; p++)
316 {
317 if (Nrow_local_for_proc[i][p] != 0)
318 {
319 int* row_start = matrix_pt[i]->row_start();
320 unsigned k =
321 First_row_for_proc[i][p] - current_first_row[my_rank];
322 nnz_send[i][p] =
323 row_start[k + Nrow_local_for_proc[i][p]] - row_start[k];
324 }
325 }
326
327 // send nnz to be sent to each processor
328 for (unsigned p = 0; p < nproc; p++)
329 {
330 // dont mpi send to send
331 if (p != my_rank)
332 {
333 // non block send
334 if (Nrow_local_for_proc[i][p] != 0)
335 {
336 // send to other processors
337 int tag = this->compute_tag(nproc, my_rank, p, 0);
338 MPI_Request tr;
339 req.push_back(tr);
340 MPI_Isend(&nnz_send[i][p],
341 1,
342 MPI_UNSIGNED,
343 p,
344 tag,
345 Global_communicator_pt->mpi_comm(),
346 &req[c]);
347 c++;
348 }
349 }
350 }
351
352 // resize nnz_recv
353 nnz_recv[i].resize(nproc);
354
355 // receive nnz from other processors
356 for (unsigned pp = 0; pp < nproc; pp++)
357 {
358 // next processor to receive from
359 unsigned p = (nproc + my_rank - pp) % nproc;
360
361 // dont mpi send to send
362 if (p != my_rank)
363 {
364 // blocking recv
365 if (Nrow_local_from_proc[i][p] != 0)
366 {
367 int tag = this->compute_tag(nproc, p, my_rank, 0);
368 MPI_Status stat;
369 unsigned nnz_temp;
370 MPI_Recv(&nnz_temp,
371 1,
372 MPI_UNSIGNED,
373 p,
374 tag,
375 Global_communicator_pt->mpi_comm(),
376 &stat);
377 nnz_recv[i][p] = nnz_temp;
378 }
379 }
380
381 // receive from self
382 else
383 {
384 nnz_recv[i][p] = nnz_send[i][p];
385 }
386 }
387
388 // get pointers to the underlying data in the current matrix
389 double* values_send = matrix_pt[i]->value();
390 int* row_start_send = matrix_pt[i]->row_start();
391 int* column_index_send = matrix_pt[i]->column_index();
392
393 // send and receive the contents of the vector
394 for (unsigned p = 0; p < nproc; p++)
395 {
396 // use mpi methods to send to and receive from all but my rank
397 if (p != my_rank)
398 {
399 // send
400 if (nnz_send[i][p] != 0)
401 {
402 // compute the offset for row_start
403 int offset_n =
404 First_row_for_proc[i][p] - current_first_row[my_rank];
405
406 // compute the offset for the values and column_index
407 int offset_nnz = row_start_send[offset_n];
408
409 // values
410 int tag = this->compute_tag(nproc, my_rank, p, 1);
411 MPI_Request tr1;
412 req.push_back(tr1);
413 MPI_Isend(values_send + offset_nnz,
414 int(nnz_send[i][p]),
415 MPI_DOUBLE,
416 p,
417 tag,
418 Global_communicator_pt->mpi_comm(),
419 &req[c]);
420 c++;
421
422 // column_index
423 tag = this->compute_tag(nproc, my_rank, p, 2);
424 MPI_Request tr2;
425 req.push_back(tr2);
426 MPI_Isend(column_index_send + offset_nnz,
427 int(nnz_send[i][p]),
428 MPI_INT,
429 p,
430 tag,
431 Global_communicator_pt->mpi_comm(),
432 &req[c]);
433 c++;
434
435 // row_start
436 tag = this->compute_tag(nproc, my_rank, p, 3);
437 MPI_Request tr3;
438 req.push_back(tr3);
439 MPI_Isend(row_start_send + offset_n,
440 int(Nrow_local_for_proc[i][p]),
441 MPI_INT,
442 p,
443 tag,
444 Global_communicator_pt->mpi_comm(),
445 &req[c]);
446 c++;
447 }
448 }
449 }
450 }
451 }
452
453 // for every matrix we assemble the duplicate of the matrix on fewer
454 // processors and setup the preconditioner
455 for (unsigned i = 0; i < Nprec; i++)
456 {
457 // if the matrix is global (!distributed) then just construct a copy
458 // on the subset of processors
459 if (!matrix_pt[i]->distributed())
460 {
461 oomph_info << "matrix not distributed" << std::endl;
462 // if this matrix is to be preconditioned my this processor
463 if (i == Color)
464 {
465 // create the local distribution for this matrix
466 LinearAlgebraDistribution* temp_dist_pt =
467 new LinearAlgebraDistribution(
468 Local_communicator_pt, matrix_pt[i]->nrow(), false);
469
470 // create the corresponding matrix
471 local_matrix_pt = new CRDoubleMatrix(temp_dist_pt);
472 delete temp_dist_pt; // (dist has now been copied)
473
474 // get pointers to the underlying data
475 double* values_pt = matrix_pt[i]->value();
476 int* column_index_pt = matrix_pt[i]->column_index();
477 int* row_start_pt = matrix_pt[i]->row_start();
478
479 // build the matrix without a copy of the data
480 local_matrix_pt->build_without_copy(matrix_pt[i]->ncol(),
481 matrix_pt[i]->nnz(),
482 values_pt,
483 column_index_pt,
484 row_start_pt);
485 }
486 }
487
488 // else we assemble a copy of the matrix distributed over a subset of
489 // processors
490 else
491 {
492 // number of rows for this preconditioner
493
494 // if we are assembling the matrix on this processor
495 if (i == Color)
496 {
497 // create the local distribution for this matrix
498 LinearAlgebraDistribution* temp_dist_pt =
499 new LinearAlgebraDistribution(Local_communicator_pt,
500 target_first_row[i][my_rank],
501 target_nrow_local[i][my_rank]);
502
503 // create the corresponding matrix
504 local_matrix_pt = new CRDoubleMatrix(temp_dist_pt);
505 delete temp_dist_pt; // (dist has now been copied)
506
507 // get the number of nnzs to be received from each processor
508
509 // total number of nnz to be reveived
510 unsigned nnz_total = 0;
511 for (unsigned p = 0; p < nproc; p++)
512 {
513 nnz_total += nnz_recv[i][p];
514 }
515
516 // compute nnz block start
517 Vector<unsigned> nnz_start_proc;
518 Vector<unsigned> nnz_start_index;
519 unsigned row_ptr = target_first_row[i][my_rank];
520 int p = 0;
521 unsigned nnz_ptr = 0;
522 for (p = 0; p < int(nproc); p++)
523 {
524 if (First_row_from_proc[i][p] == row_ptr &&
525 Nrow_local_from_proc[i][p] != 0 && nnz_ptr != nnz_total)
526 {
527 nnz_start_proc.push_back(p);
528 nnz_start_index.push_back(nnz_ptr);
529 nnz_ptr += nnz_recv[i][p];
530 row_ptr += Nrow_local_from_proc[i][p];
531 p = -1;
532 }
533 }
534
535 // storage for received data
536 double* values_recv = new double[nnz_total];
537 int* column_index_recv = new int[nnz_total];
538 int* row_start_recv = new int[target_nrow_local[i][my_rank] + 1];
539
540 // send and receive the contents of the vector
541 for (unsigned pp = 0; pp < nproc; pp++)
542 {
543 // next processor to receive from
544 unsigned p = (nproc + my_rank - pp) % nproc;
545
546 // use mpi methods to send to and receive from all but my rank
547 if (p != my_rank)
548 {
549 // just receive
550 if (nnz_recv[i][p] != 0)
551 {
552 // compute the offset for row_start
553 int offset_n =
554 First_row_from_proc[i][p] - target_first_row[i][my_rank];
555
556 // compute the offset for the values and column_index
557 unsigned k = 0;
558 while (nnz_start_proc[k] != p)
559 {
560 k++;
561 }
562 int offset_nnz = nnz_start_index[k];
563
564 // values
565 int tag = this->compute_tag(nproc, p, my_rank, 1);
566 MPI_Status stat1;
567 MPI_Recv(values_recv + offset_nnz,
568 int(nnz_recv[i][p]),
569 MPI_DOUBLE,
570 p,
571 tag,
572 Global_communicator_pt->mpi_comm(),
573 &stat1);
574
575 // column_index
576 tag = this->compute_tag(nproc, p, my_rank, 2);
577 MPI_Status stat2;
578 MPI_Recv(column_index_recv + offset_nnz,
579 int(nnz_recv[i][p]),
580 MPI_INT,
581 p,
582 tag,
583 Global_communicator_pt->mpi_comm(),
584 &stat2);
585
586 // row_start
587 tag = this->compute_tag(nproc, p, my_rank, 3);
588 MPI_Status stat3;
589 MPI_Recv(row_start_recv + offset_n,
590 int(Nrow_local_from_proc[i][p]),
591 MPI_INT,
592 p,
593 tag,
594 Global_communicator_pt->mpi_comm(),
595 &stat3);
596 }
597 }
598 // otehrwise just send to self (or copy)
599 else
600 {
601 if (nnz_recv[i][p] != 0)
602 {
603 // get pointers to the underlying data in the current matrix
604 double* values_send = matrix_pt[i]->value();
605 int* row_start_send = matrix_pt[i]->row_start();
606 int* column_index_send = matrix_pt[i]->column_index();
607
608 // offset for row_start send to self
609 unsigned offset_n_send =
610 First_row_for_proc[i][my_rank] - matrix_pt[i]->first_row(p);
611 // offset for values and column+_index send to self
612 unsigned offset_nnz_send = row_start_send[offset_n_send];
613
614 // offset for row_start receive from self
615 unsigned offset_n_recv = First_row_from_proc[i][my_rank] -
616 target_first_row[i][my_rank];
617
618 // offset for values and columm_index receive from self
619 unsigned k = 0;
620 while (nnz_start_proc[k] != p)
621 {
622 k++;
623 }
624 unsigned offset_nnz_recv = nnz_start_index[k];
625
626 // and send
627
628 // values and column_index
629 unsigned n_nnz = nnz_send[i][my_rank];
630 for (unsigned j = 0; j < n_nnz; j++)
631 {
632 values_recv[offset_nnz_recv + j] =
633 values_send[offset_nnz_send + j];
634 column_index_recv[offset_nnz_recv + j] =
635 column_index_send[offset_nnz_send + j];
636 }
637
638 // row start
639 unsigned n_n = Nrow_local_from_proc[i][my_rank];
640 for (unsigned j = 0; j < n_n; j++)
641 {
642 row_start_recv[offset_n_recv + j] =
643 row_start_send[offset_n_send + j];
644 }
645 }
646 }
647 }
648
649
650 // number of processors contributing to the local vector on this
651 // processor
652
653 // update the row start
654 unsigned nproc_contrib = nnz_start_index.size();
655 for (unsigned j = 0; j < nproc_contrib; j++)
656 {
657 unsigned first = First_row_from_proc[i][nnz_start_proc[j]] -
658 target_first_row[i][my_rank];
659 unsigned last =
660 first + Nrow_local_from_proc[i][nnz_start_proc[j]];
661 unsigned nnz_inc = nnz_start_index[j] - row_start_recv[first];
662 for (unsigned k = first; k < last; k++)
663 {
664 row_start_recv[k] += nnz_inc;
665 }
666 }
667 row_start_recv[target_nrow_local[i][my_rank]] = int(nnz_total);
668
669 // build the matrix without a copy of the data
670 local_matrix_pt->build_without_copy(matrix_pt[i]->ncol(),
671 nnz_total,
672 values_recv,
673 column_index_recv,
674 row_start_recv);
675 }
676 }
677 }
678
679 // wait for all sends to complete
680 if (c != 0)
681 {
682 Vector<MPI_Status> stat(c);
683 MPI_Waitall(c, &req[0], &stat[0]);
684 }
685 }
686
687
688 /////////////////////////////////////////////////////////////////////////////
689 /////////////////////////////////////////////////////////////////////////////
690 /////////////////////////////////////////////////////////////////////////////
691 /////////////////////////////////////////////////////////////////////////////
692
693
694 // METHOD 1
695 else if (Method == 1)
696 {
697 // temporary storgage for nnz recv
698 unsigned* nnz_recv_temp = new unsigned[nproc * Nprec];
699 for (unsigned j = 0; j < nproc * Nprec; j++)
700 {
701 nnz_recv_temp[j] = 0;
702 }
703
704 // for every matrix we assemble the duplicate of the matrix on fewer
705 // processors and setup the preconditioner
706 for (unsigned i = 0; i < Nprec; i++)
707 {
708 // if the matrix is global (!distributed) then just construct a copy
709 // on the subset of processors
710 if (!matrix_pt[i]->distributed())
711 {
712 // if this matrix is to be preconditioned my this processor
713 if (i == Color)
714 {
715 // create the local distribution for this matrix
716 LinearAlgebraDistribution* temp_dist_pt =
717 new LinearAlgebraDistribution(
718 Local_communicator_pt, matrix_pt[i]->nrow(), false);
719
720 // create the corresponding matrix
721 local_matrix_pt = new CRDoubleMatrix(temp_dist_pt);
722 delete temp_dist_pt; // (dist has now been copied)
723
724 // get pointers to the underlying data
725 double* values_pt = matrix_pt[i]->value();
726 int* column_index_pt = matrix_pt[i]->column_index();
727 int* row_start_pt = matrix_pt[i]->row_start();
728
729 // build the matrix without a copy of the data
730 local_matrix_pt->build_without_copy(matrix_pt[i]->ncol(),
731 matrix_pt[i]->nnz(),
732 values_pt,
733 column_index_pt,
734 row_start_pt);
735 }
736 }
737
738 // if the matrix is global (!distributed) then just construct a copy
739 // on the subset of processors
740 else
741 {
742 // first compute the distribution of this preconditioner on its subset
743 // of processors
744
745 // number of rows for this preconditioner
746 unsigned nrow = matrix_pt[i]->nrow();
747
748 // setup First_row_for_local_prec and Nrow_local_for_local_prec
749 target_first_row[i].resize(nproc);
750 target_nrow_local[i].resize(nproc);
751 unsigned nproc_local = Nproc_for_prec[i];
752 for (unsigned p = 0; p < nproc_local; p++)
753 {
754 int pp = First_proc_for_prec[i] + p;
755 target_first_row[i][pp] =
756 unsigned(double(p * nrow) / double(nproc_local));
757 }
758 for (unsigned p = 0; p < nproc_local - 1; p++)
759 {
760 int pp = First_proc_for_prec[i] + p;
761 target_nrow_local[i][pp] =
762 target_first_row[i][pp + 1] - target_first_row[i][pp];
763 }
764 unsigned last_local_proc = First_proc_for_prec[i] + nproc_local - 1;
765 target_nrow_local[i][last_local_proc] =
766 nrow - target_first_row[i][last_local_proc];
767
768 // get the details of the current distribution
769 Vector<unsigned> current_first_row(nproc);
770 Vector<unsigned> current_nrow_local(nproc);
771 for (unsigned p = 0; p < nproc; p++)
772 {
773 current_first_row[p] = matrix_pt[i]->first_row(p);
774 current_nrow_local[p] = matrix_pt[i]->nrow_local(p);
775 }
776
777 // resize storage for details of the data to be sent and received
778 First_row_for_proc[i].resize(nproc, 0);
779 Nrow_local_for_proc[i].resize(nproc, 0);
780 First_row_from_proc[i].resize(nproc, 0);
781 Nrow_local_from_proc[i].resize(nproc, 0);
782
783 // for every processor compute first_row and nrow_local that will
784 // will sent and received by this processor
785 for (unsigned p = 0; p < nproc; p++)
786 {
787 // start with data to be sent
788 if ((target_first_row[i][p] <
789 (current_first_row[my_rank] + current_nrow_local[my_rank])) &&
790 (current_first_row[my_rank] <
791 (target_first_row[i][p] + target_nrow_local[i][p])))
792 {
794 std::max(current_first_row[my_rank], target_first_row[i][p]);
796 std::min(
797 (current_first_row[my_rank] + current_nrow_local[my_rank]),
798 (target_first_row[i][p] + target_nrow_local[i][p])) -
800 }
801
802 // and data to be received
803 if ((target_first_row[i][my_rank] <
804 (current_first_row[p] + current_nrow_local[p])) &&
805 (current_first_row[p] < (target_first_row[i][my_rank] +
806 target_nrow_local[i][my_rank])))
807 {
809 std::max(current_first_row[p], target_first_row[i][my_rank]);
811 std::min((current_first_row[p] + current_nrow_local[p]),
812 (target_first_row[i][my_rank] +
813 target_nrow_local[i][my_rank])) -
815 }
816 }
817
818 // resize nnz_send
819 nnz_send[i].resize(nproc);
820
821 // compute the number of nnzs to be sent
822 // and the number of send and receive requests to be made (nreq)
823 for (unsigned p = 0; p < nproc; p++)
824 {
825 if (Nrow_local_for_proc[i][p] != 0)
826 {
827 int* row_start = matrix_pt[i]->row_start();
828 unsigned k =
829 First_row_for_proc[i][p] - current_first_row[my_rank];
830 nnz_send[i][p] =
831 row_start[k + Nrow_local_for_proc[i][p]] - row_start[k];
832 }
833 }
834
835 // resize nnz_recv
836 nnz_recv[i].resize(nproc);
837
838 // send nnz to be sent to each processor
839 for (unsigned p = 0; p < nproc; p++)
840 {
841 // send and recv
842
843 // dont mpi send to self
844 if (p != my_rank)
845 {
846 // non block send
847 if (Nrow_local_for_proc[i][p] != 0)
848 {
849 // send to other processors
850 int tag = this->compute_tag(nproc, my_rank, p, 0);
851 MPI_Request tr;
852 req.push_back(tr);
853 MPI_Isend(&nnz_send[i][p],
854 1,
855 MPI_UNSIGNED,
856 p,
857 tag,
858 Global_communicator_pt->mpi_comm(),
859 &req[c]);
860 c++;
861 }
862
863 // non blocking recv
864 if (Nrow_local_from_proc[i][p] != 0)
865 {
866 int tag = this->compute_tag(nproc, p, my_rank, 0);
867 MPI_Request tr;
868 req.push_back(tr);
869 MPI_Irecv(nnz_recv_temp + (i * nproc) + p,
870 1,
871 MPI_UNSIGNED,
872 p,
873 tag,
874 Global_communicator_pt->mpi_comm(),
875 &req[c]);
876 c++;
877 }
878 }
879 // receive from self
880 else
881 {
882 if (Nrow_local_for_proc[i][p] != 0)
883 {
884 nnz_recv_temp[(i * nproc) + p] = nnz_send[i][p];
885 }
886 }
887 }
888 }
889 }
890 if (c != 0)
891 {
892 Vector<MPI_Status> stat(c);
893 MPI_Waitall(c, &req[0], &stat[0]);
894 req.clear();
895 stat.clear();
896 }
897 c = 0;
898 for (unsigned i = 0; i < Nprec; i++)
899 {
900 for (unsigned p = 0; p < nproc; p++)
901 {
902 nnz_recv[i][p] = nnz_recv_temp[(i * nproc) + p];
903 }
904 }
905 delete[] nnz_recv_temp;
906
907 // get the number of nnzs to be received from each processor
908
909 // total number of nnz to be reveived
910 unsigned nnz_total = 0;
911 for (unsigned p = 0; p < nproc; p++)
912 {
913 nnz_total += nnz_recv[Color][p];
914 }
915
916 // compute nnz block start
917 Vector<unsigned> nnz_start_proc;
918 Vector<unsigned> nnz_start_index;
919 unsigned row_ptr = target_first_row[Color][my_rank];
920 int p = 0;
921 unsigned nnz_ptr = 0;
922 for (p = 0; p < int(nproc); p++)
923 {
924 if (First_row_from_proc[Color][p] == row_ptr &&
925 Nrow_local_from_proc[Color][p] != 0 && nnz_ptr != nnz_total)
926 {
927 nnz_start_proc.push_back(p);
928 nnz_start_index.push_back(nnz_ptr);
929 nnz_ptr += nnz_recv[Color][p];
930 row_ptr += Nrow_local_from_proc[Color][p];
931 p = -1;
932 }
933 }
934
935 // storage for derived datatypes
936 Vector<MPI_Datatype> datatypes;
937
938 // storage for received data
939 double* values_recv = new double[nnz_total];
940 int* column_index_recv = new int[nnz_total];
941 int* row_start_recv = new int[target_nrow_local[Color][my_rank] + 1];
942
943 ///////////////////////////////////////////////////////////////////////////
944 // SEND
945 ///////////////////////////////////////////////////////////////////////////
946 unsigned c_send = 0;
947 Vector<MPI_Request> send_req;
948
949 // for every matrix we assemble the duplicate of the matrix on fewer
950 // processors and setup the preconditioner
951 for (unsigned i = 0; i < Nprec; i++)
952 {
953 // get pointers to the underlying data in the current matrix
954 double* values_send = matrix_pt[i]->value();
955 int* row_start_send = matrix_pt[i]->row_start();
956 int* column_index_send = matrix_pt[i]->column_index();
957
958 // send and receive the contents of the vector
959 for (unsigned p = 0; p < nproc; p++)
960 {
961 // use mpi methods to send to and receive from all but my rank
962 if (p != my_rank)
963 {
964 // send
965 if (nnz_send[i][p] != 0)
966 {
967 // create 3 MPI contiguous datatypes
968 // + values
969 // + column_index
970 // + row_start
971
972 // values
973 MPI_Datatype datatype_values;
974 MPI_Type_contiguous(
975 int(nnz_send[i][p]), MPI_DOUBLE, &datatype_values);
976 MPI_Type_commit(&datatype_values);
977 datatypes.push_back(datatype_values);
978
979 // column index
980 MPI_Datatype datatype_column_index;
981 MPI_Type_contiguous(
982 int(nnz_send[i][p]), MPI_INT, &datatype_column_index);
983 MPI_Type_commit(&datatype_column_index);
984 datatypes.push_back(datatype_column_index);
985
986 // row start
987 MPI_Datatype datatype_row_start;
988 MPI_Type_contiguous(
989 int(Nrow_local_for_proc[i][p]), MPI_INT, &datatype_row_start);
990 MPI_Type_commit(&datatype_row_start);
991 datatypes.push_back(datatype_row_start);
992
993 // assemble the typelist
994 MPI_Datatype typelist[3];
995 typelist[0] = datatype_values;
996 typelist[1] = datatype_column_index;
997 typelist[2] = datatype_row_start;
998
999 // compute the offset for row_start
1000 int offset_n =
1001 First_row_for_proc[i][p] - matrix_pt[i]->first_row(my_rank);
1002
1003 // compute the offset for the values and column_index
1004 int offset_nnz = row_start_send[offset_n];
1005
1006 // next compute the displacements
1007 MPI_Aint displacements[3];
1008 MPI_Get_address(values_send + offset_nnz, &displacements[0]);
1009 MPI_Get_address(column_index_send + offset_nnz,
1010 &displacements[1]);
1011 MPI_Get_address(row_start_send + offset_n, &displacements[2]);
1012 for (int j = 2; j >= 0; j--)
1013 {
1014 displacements[j] -= displacements[0];
1015 }
1016
1017 // set the block lengths
1018 int block_length[3];
1019 block_length[0] = block_length[1] = block_length[2] = 1;
1020
1021 // now build the final datatype
1022 MPI_Datatype send_type;
1023 MPI_Type_create_struct(
1024 3, block_length, displacements, typelist, &send_type);
1025 MPI_Type_commit(&send_type);
1026 datatypes.push_back(send_type);
1027
1028 // send
1029 int tag = this->compute_tag(nproc, my_rank, p, 1);
1030 MPI_Request tr1;
1031 send_req.push_back(tr1);
1032 MPI_Isend(values_send + offset_nnz,
1033 1,
1034 send_type,
1035 p,
1036 tag,
1037 Global_communicator_pt->mpi_comm(),
1038 &send_req[c_send]);
1039 c_send++;
1040 }
1041 }
1042 }
1043 }
1044
1045 ///////////////////////////////////////////////////////////////////////////
1046 // RECV
1047 ///////////////////////////////////////////////////////////////////////////
1048 unsigned c_recv = 0;
1049 Vector<MPI_Request> recv_req;
1050
1051 // receive the contents of the vector
1052 for (unsigned p = 0; p < nproc; p++)
1053 {
1054 // use mpi methods to send to and receive from all but my rank
1055 if (p != my_rank)
1056 {
1057 // just receive
1058 if (nnz_recv[Color][p] != 0)
1059 {
1060 // create 3 MPI contiguous datatypes
1061 // + values
1062 // + column_index
1063 // + row_start
1064
1065 // values
1066 MPI_Datatype datatype_values;
1067 MPI_Type_contiguous(
1068 int(nnz_recv[Color][p]), MPI_DOUBLE, &datatype_values);
1069 MPI_Type_commit(&datatype_values);
1070 datatypes.push_back(datatype_values);
1071
1072 // column index
1073 MPI_Datatype datatype_column_index;
1074 MPI_Type_contiguous(
1075 int(nnz_recv[Color][p]), MPI_INT, &datatype_column_index);
1076 MPI_Type_commit(&datatype_column_index);
1077 datatypes.push_back(datatype_column_index);
1078
1079 // row start
1080 MPI_Datatype datatype_row_start;
1081 MPI_Type_contiguous(int(Nrow_local_from_proc[Color][p]),
1082 MPI_INT,
1083 &datatype_row_start);
1084 MPI_Type_commit(&datatype_row_start);
1085 datatypes.push_back(datatype_row_start);
1086
1087 // assemble the typelist
1088 MPI_Datatype typelist[3];
1089 typelist[0] = datatype_values;
1090 typelist[1] = datatype_column_index;
1091 typelist[2] = datatype_row_start;
1092
1093 // compute the offset for row_start
1094 int offset_n =
1095 First_row_from_proc[Color][p] - target_first_row[Color][my_rank];
1096
1097 // compute the offset for the values and column_index
1098 unsigned k = 0;
1099 while (nnz_start_proc[k] != p)
1100 {
1101 k++;
1102 }
1103 int offset_nnz = nnz_start_index[k];
1104
1105 // next compute the displacements
1106 MPI_Aint displacements[3];
1107 MPI_Get_address(values_recv + offset_nnz, &displacements[0]);
1108 MPI_Get_address(column_index_recv + offset_nnz, &displacements[1]);
1109 MPI_Get_address(row_start_recv + offset_n, &displacements[2]);
1110 for (int j = 2; j >= 0; j--)
1111 {
1112 displacements[j] -= displacements[0];
1113 }
1114
1115 // set the block lengths
1116 int block_length[3];
1117 block_length[0] = block_length[1] = block_length[2] = 1;
1118
1119 // now build the final datatype
1120 MPI_Datatype recv_type;
1121 MPI_Type_create_struct(
1122 3, block_length, displacements, typelist, &recv_type);
1123 MPI_Type_commit(&recv_type);
1124 datatypes.push_back(recv_type);
1125
1126 // recv
1127 int tag = this->compute_tag(nproc, p, my_rank, 1);
1128 MPI_Request tr1;
1129 recv_req.push_back(tr1);
1130 MPI_Irecv(values_recv + offset_nnz,
1131 1,
1132 recv_type,
1133 p,
1134 tag,
1135 Global_communicator_pt->mpi_comm(),
1136 &recv_req[c_recv]);
1137 c_recv++;
1138 }
1139 }
1140 }
1141
1142 // otherwise send to self (copy)
1143 if (nnz_recv[Color][my_rank] != 0)
1144 {
1145 // get pointers to the underlying data in the current matrix
1146 double* values_send = matrix_pt[Color]->value();
1147 int* row_start_send = matrix_pt[Color]->row_start();
1148 int* column_index_send = matrix_pt[Color]->column_index();
1149
1150 // offset for row_start send to self
1151 unsigned offset_n_send = First_row_for_proc[Color][my_rank] -
1152 matrix_pt[Color]->first_row(my_rank);
1153
1154 // offset for values and column+_index send to self
1155 unsigned offset_nnz_send = row_start_send[offset_n_send];
1156
1157 // offset for row_start receive from self
1158 unsigned offset_n_recv = First_row_from_proc[Color][my_rank] -
1159 target_first_row[Color][my_rank];
1160
1161 // offset for values and columm_index receive from self
1162 unsigned k = 0;
1163 while (nnz_start_proc[k] != my_rank)
1164 {
1165 k++;
1166 }
1167 unsigned offset_nnz_recv = nnz_start_index[k];
1168
1169 // and send
1170
1171 // values and column_index
1172 unsigned n_nnz = nnz_send[Color][my_rank];
1173 for (unsigned j = 0; j < n_nnz; j++)
1174 {
1175 values_recv[offset_nnz_recv + j] = values_send[offset_nnz_send + j];
1176 column_index_recv[offset_nnz_recv + j] =
1177 column_index_send[offset_nnz_send + j];
1178 }
1179
1180 // row start
1181 unsigned n_n = Nrow_local_from_proc[Color][my_rank];
1182 for (unsigned j = 0; j < n_n; j++)
1183 {
1184 row_start_recv[offset_n_recv + j] = row_start_send[offset_n_send + j];
1185 }
1186 }
1187
1188 // create the local distribution for this matrix
1189 LinearAlgebraDistribution* temp_dist_pt =
1190 new LinearAlgebraDistribution(Local_communicator_pt,
1191 target_first_row[Color][my_rank],
1192 target_nrow_local[Color][my_rank]);
1193
1194 // create the corresponding matrix
1195 local_matrix_pt = new CRDoubleMatrix(temp_dist_pt);
1196 delete temp_dist_pt; // (dist has now been copied)
1197
1198 ///////////////////////////////////////////////////////////////////////////
1199 // and WAIT...
1200 ///////////////////////////////////////////////////////////////////////////
1201 if (c_recv != 0)
1202 {
1203 Vector<MPI_Status> recv_stat(c_recv);
1204 MPI_Waitall(c_recv, &recv_req[0], &recv_stat[0]);
1205 recv_req.clear();
1206 recv_stat.clear();
1207 }
1208
1209 // build the matrix
1210
1211 // update the row start
1212 unsigned nproc_contrib = nnz_start_index.size();
1213 for (unsigned j = 0; j < nproc_contrib; j++)
1214 {
1215 unsigned first = First_row_from_proc[Color][nnz_start_proc[j]] -
1216 target_first_row[Color][my_rank];
1217 unsigned last = first + Nrow_local_from_proc[Color][nnz_start_proc[j]];
1218 unsigned nnz_inc = nnz_start_index[j] - row_start_recv[first];
1219 for (unsigned k = first; k < last; k++)
1220 {
1221 row_start_recv[k] += nnz_inc;
1222 }
1223 }
1224 row_start_recv[target_nrow_local[Color][my_rank]] = int(nnz_total);
1225
1226 // build the matrix without a copy of the data
1227 local_matrix_pt->build_without_copy(matrix_pt[Color]->ncol(),
1228 nnz_total,
1229 values_recv,
1230 column_index_recv,
1231 row_start_recv);
1232
1233 // and finally wait for the sends
1234 if (c_recv != 0)
1235 {
1236 Vector<MPI_Status> send_stat(c_recv);
1237 MPI_Waitall(c_send, &send_req[0], &send_stat[0]);
1238 send_req.clear();
1239 send_stat.clear();
1240 }
1241
1242 // and clear the datatype
1243 unsigned ndatatypes = datatypes.size();
1244 for (unsigned i = 0; i < ndatatypes; i++)
1245 {
1246 MPI_Type_free(&datatypes[i]);
1247 }
1248 }
1249
1250
1251 /////////////////////////////////////////////////////////////////////////////
1252 /////////////////////////////////////////////////////////////////////////////
1253 /////////////////////////////////////////////////////////////////////////////
1254 /////////////////////////////////////////////////////////////////////////////
1255
1256
1257 // METHOD 2
1258 else if (Method == 2)
1259 {
1260 // temporary storgage for nnz recv
1261 unsigned* nnz_recv_temp = new unsigned[nproc * Nprec];
1262 for (unsigned j = 0; j < nproc * Nprec; j++)
1263 {
1264 nnz_recv_temp[j] = 0;
1265 }
1266
1267 // for every matrix we assemble the duplicate of the matrix on fewer
1268 // processors and setup the preconditioner
1269 for (unsigned i = 0; i < Nprec; i++)
1270 {
1271 // if the matrix is global (!distributed) then just construct a copy
1272 // on the subset of processors
1273 if (!matrix_pt[i]->distributed())
1274 {
1275 // if this matrix is to be preconditioned my this processor
1276 if (i == Color)
1277 {
1278 // create the local distribution for this matrix
1279 LinearAlgebraDistribution* temp_dist_pt =
1280 new LinearAlgebraDistribution(
1281 Local_communicator_pt, matrix_pt[i]->nrow(), false);
1282
1283 // create the corresponding matrix
1284 local_matrix_pt = new CRDoubleMatrix(temp_dist_pt);
1285 delete temp_dist_pt; // (dist has now been copied)
1286
1287 // get pointers to the underlying data
1288 double* values_pt = matrix_pt[i]->value();
1289 int* column_index_pt = matrix_pt[i]->column_index();
1290 int* row_start_pt = matrix_pt[i]->row_start();
1291
1292 // build the matrix without a copy of the data
1293 local_matrix_pt->build_without_copy(matrix_pt[i]->ncol(),
1294 matrix_pt[i]->nnz(),
1295 values_pt,
1296 column_index_pt,
1297 row_start_pt);
1298 }
1299 }
1300
1301 // if the matrix is global (!distributed) then just construct a copy
1302 // on the subset of processors
1303 else
1304 {
1305 // first compute the distribution of this preconditioner on its subset
1306 // of processors
1307
1308 // number of rows for this preconditioner
1309 unsigned nrow = matrix_pt[i]->nrow();
1310
1311 // setup First_row_for_local_prec and Nrow_local_for_local_prec
1312 target_first_row[i].resize(nproc);
1313 target_nrow_local[i].resize(nproc);
1314 unsigned nproc_local = Nproc_for_prec[i];
1315 for (unsigned p = 0; p < nproc_local; p++)
1316 {
1317 int pp = First_proc_for_prec[i] + p;
1318 target_first_row[i][pp] =
1319 unsigned(double(p * nrow) / double(nproc_local));
1320 }
1321 for (unsigned p = 0; p < nproc_local - 1; p++)
1322 {
1323 int pp = First_proc_for_prec[i] + p;
1324 target_nrow_local[i][pp] =
1325 target_first_row[i][pp + 1] - target_first_row[i][pp];
1326 }
1327 unsigned last_local_proc = First_proc_for_prec[i] + nproc_local - 1;
1328 target_nrow_local[i][last_local_proc] =
1329 nrow - target_first_row[i][last_local_proc];
1330
1331 // get the details of the current distribution
1332 Vector<unsigned> current_first_row(nproc);
1333 Vector<unsigned> current_nrow_local(nproc);
1334 for (unsigned p = 0; p < nproc; p++)
1335 {
1336 current_first_row[p] = matrix_pt[i]->first_row(p);
1337 current_nrow_local[p] = matrix_pt[i]->nrow_local(p);
1338 }
1339
1340 // resize storage for details of the data to be sent and received
1341 First_row_for_proc[i].resize(nproc, 0);
1342 Nrow_local_for_proc[i].resize(nproc, 0);
1343 First_row_from_proc[i].resize(nproc, 0);
1344 Nrow_local_from_proc[i].resize(nproc, 0);
1345
1346 // for every processor compute first_row and nrow_local that will
1347 // will sent and received by this processor
1348 for (unsigned p = 0; p < nproc; p++)
1349 {
1350 // start with data to be sent
1351 if ((target_first_row[i][p] <
1352 (current_first_row[my_rank] + current_nrow_local[my_rank])) &&
1353 (current_first_row[my_rank] <
1354 (target_first_row[i][p] + target_nrow_local[i][p])))
1355 {
1356 First_row_for_proc[i][p] =
1357 std::max(current_first_row[my_rank], target_first_row[i][p]);
1359 std::min(
1360 (current_first_row[my_rank] + current_nrow_local[my_rank]),
1361 (target_first_row[i][p] + target_nrow_local[i][p])) -
1363 }
1364
1365 // and data to be received
1366 if ((target_first_row[i][my_rank] <
1367 (current_first_row[p] + current_nrow_local[p])) &&
1368 (current_first_row[p] < (target_first_row[i][my_rank] +
1369 target_nrow_local[i][my_rank])))
1370 {
1372 std::max(current_first_row[p], target_first_row[i][my_rank]);
1374 std::min((current_first_row[p] + current_nrow_local[p]),
1375 (target_first_row[i][my_rank] +
1376 target_nrow_local[i][my_rank])) -
1378 }
1379 }
1380
1381 // resize nnz_send
1382 nnz_send[i].resize(nproc);
1383
1384 // compute the number of nnzs to be sent
1385 // and the number of send and receive requests to be made (nreq)
1386 for (unsigned p = 0; p < nproc; p++)
1387 {
1388 if (Nrow_local_for_proc[i][p] != 0)
1389 {
1390 int* row_start = matrix_pt[i]->row_start();
1391 unsigned k =
1392 First_row_for_proc[i][p] - current_first_row[my_rank];
1393 nnz_send[i][p] =
1394 row_start[k + Nrow_local_for_proc[i][p]] - row_start[k];
1395 }
1396 }
1397
1398 // resize nnz_recv
1399 nnz_recv[i].resize(nproc);
1400
1401 // send nnz to be sent to each processor
1402 for (unsigned p = 0; p < nproc; p++)
1403 {
1404 // send and recv
1405
1406 // dont mpi send to self
1407 if (p != my_rank)
1408 {
1409 // non block send
1410 if (Nrow_local_for_proc[i][p] != 0)
1411 {
1412 // send to other processors
1413 int tag = this->compute_tag(nproc, my_rank, p, 0);
1414 MPI_Request tr;
1415 req.push_back(tr);
1416 MPI_Isend(&nnz_send[i][p],
1417 1,
1418 MPI_UNSIGNED,
1419 p,
1420 tag,
1421 Global_communicator_pt->mpi_comm(),
1422 &req[c]);
1423 c++;
1424 }
1425
1426 // non blocking recv
1427 if (Nrow_local_from_proc[i][p] != 0)
1428 {
1429 int tag = this->compute_tag(nproc, p, my_rank, 0);
1430 MPI_Request tr;
1431 req.push_back(tr);
1432 MPI_Irecv(nnz_recv_temp + (i * nproc) + p,
1433 1,
1434 MPI_UNSIGNED,
1435 p,
1436 tag,
1437 Global_communicator_pt->mpi_comm(),
1438 &req[c]);
1439 c++;
1440 }
1441 }
1442 // receive from self
1443 else
1444 {
1445 if (Nrow_local_for_proc[i][p] != 0)
1446 {
1447 nnz_recv_temp[(i * nproc) + p] = nnz_send[i][p];
1448 }
1449 }
1450 }
1451 }
1452 }
1453 if (c != 0)
1454 {
1455 Vector<MPI_Status> stat(c);
1456 MPI_Waitall(c, &req[0], &stat[0]);
1457 req.clear();
1458 stat.clear();
1459 c = 0;
1460 }
1461 for (unsigned i = 0; i < Nprec; i++)
1462 {
1463 for (unsigned p = 0; p < nproc; p++)
1464 {
1465 nnz_recv[i][p] = nnz_recv_temp[(i * nproc) + p];
1466 }
1467 }
1468 delete[] nnz_recv_temp;
1469
1470 // get the number of nnzs to be received from each processor
1471
1472 // total number of nnz to be reveived
1473 unsigned nnz_total = 0;
1474 for (unsigned p = 0; p < nproc; p++)
1475 {
1476 nnz_total += nnz_recv[Color][p];
1477 }
1478
1479 // compute nnz block start
1480 Vector<unsigned> nnz_start_proc;
1481 Vector<unsigned> nnz_start_index;
1482 unsigned row_ptr = target_first_row[Color][my_rank];
1483 int p = 0;
1484 unsigned nnz_ptr = 0;
1485 for (p = 0; p < int(nproc); p++)
1486 {
1487 if (First_row_from_proc[Color][p] == row_ptr &&
1488 Nrow_local_from_proc[Color][p] != 0 && nnz_ptr != nnz_total)
1489 {
1490 nnz_start_proc.push_back(p);
1491 nnz_start_index.push_back(nnz_ptr);
1492 nnz_ptr += nnz_recv[Color][p];
1493 row_ptr += Nrow_local_from_proc[Color][p];
1494 p = -1;
1495 }
1496 }
1497
1498 // storage for derived datatypes
1499 Vector<MPI_Datatype> datatypes;
1500
1501 // storage for received data
1502 double* values_recv = new double[nnz_total];
1503 int* column_index_recv = new int[nnz_total];
1504 int* row_start_recv = new int[target_nrow_local[Color][my_rank] + 1];
1505
1506 ///////////////////////////////////////////////////////////////////////////
1507 // RECV
1508 ///////////////////////////////////////////////////////////////////////////
1509 unsigned c_recv = 0;
1510 Vector<MPI_Request> recv_req;
1511
1512 // receive the contents of the vector
1513 for (unsigned p = 0; p < nproc; p++)
1514 {
1515 // use mpi methods to send to and receive from all but my rank
1516 if (p != my_rank)
1517 {
1518 // just receive
1519 if (nnz_recv[Color][p] != 0)
1520 {
1521 // create 3 MPI contiguous datatypes
1522 // + values
1523 // + column_index
1524 // + row_start
1525
1526 // values
1527 MPI_Datatype datatype_values;
1528 MPI_Type_contiguous(
1529 int(nnz_recv[Color][p]), MPI_DOUBLE, &datatype_values);
1530 MPI_Type_commit(&datatype_values);
1531 datatypes.push_back(datatype_values);
1532
1533 // column index
1534 MPI_Datatype datatype_column_index;
1535 MPI_Type_contiguous(
1536 int(nnz_recv[Color][p]), MPI_INT, &datatype_column_index);
1537 MPI_Type_commit(&datatype_column_index);
1538 datatypes.push_back(datatype_column_index);
1539
1540 // row start
1541 MPI_Datatype datatype_row_start;
1542 MPI_Type_contiguous(int(Nrow_local_from_proc[Color][p]),
1543 MPI_INT,
1544 &datatype_row_start);
1545 MPI_Type_commit(&datatype_row_start);
1546 datatypes.push_back(datatype_row_start);
1547
1548 // assemble the typelist
1549 MPI_Datatype typelist[3];
1550 typelist[0] = datatype_values;
1551 typelist[1] = datatype_column_index;
1552 typelist[2] = datatype_row_start;
1553
1554 // compute the offset for row_start
1555 int offset_n =
1556 First_row_from_proc[Color][p] - target_first_row[Color][my_rank];
1557
1558 // compute the offset for the values and column_index
1559 unsigned k = 0;
1560 while (nnz_start_proc[k] != p)
1561 {
1562 k++;
1563 }
1564 int offset_nnz = nnz_start_index[k];
1565
1566 // next compute the displacements
1567 MPI_Aint displacements[3];
1568 MPI_Get_address(values_recv + offset_nnz, &displacements[0]);
1569 MPI_Get_address(column_index_recv + offset_nnz, &displacements[1]);
1570 MPI_Get_address(row_start_recv + offset_n, &displacements[2]);
1571 for (int j = 2; j >= 0; j--)
1572 {
1573 displacements[j] -= displacements[0];
1574 }
1575
1576 // set the block lengths
1577 int block_length[3];
1578 block_length[0] = block_length[1] = block_length[2] = 1;
1579
1580 // now build the final datatype
1581 MPI_Datatype recv_type;
1582 MPI_Type_create_struct(
1583 3, block_length, displacements, typelist, &recv_type);
1584 MPI_Type_commit(&recv_type);
1585 datatypes.push_back(recv_type);
1586
1587 // recv
1588 int tag = this->compute_tag(nproc, p, my_rank, 1);
1589 MPI_Request tr1;
1590 recv_req.push_back(tr1);
1591 MPI_Irecv(values_recv + offset_nnz,
1592 1,
1593 recv_type,
1594 p,
1595 tag,
1596 Global_communicator_pt->mpi_comm(),
1597 &recv_req[c_recv]);
1598 c_recv++;
1599 }
1600 }
1601 }
1602
1603 ///////////////////////////////////////////////////////////////////////////
1604 // SEND
1605 ///////////////////////////////////////////////////////////////////////////
1606 unsigned c_send = 0;
1607 Vector<MPI_Request> send_req;
1608
1609 // for every matrix we assemble the duplicate of the matrix on fewer
1610 // processors and setup the preconditioner
1611 for (unsigned i = 0; i < Nprec; i++)
1612 {
1613 // get pointers to the underlying data in the current matrix
1614 double* values_send = matrix_pt[i]->value();
1615 int* row_start_send = matrix_pt[i]->row_start();
1616 int* column_index_send = matrix_pt[i]->column_index();
1617
1618 // send and receive the contents of the vector
1619 for (unsigned p = 0; p < nproc; p++)
1620 {
1621 // use mpi methods to send to and receive from all but my rank
1622 if (p != my_rank)
1623 {
1624 // send
1625 if (nnz_send[i][p] != 0)
1626 {
1627 // create 3 MPI contiguous datatypes
1628 // + values
1629 // + column_index
1630 // + row_start
1631
1632 // values
1633 MPI_Datatype datatype_values;
1634 MPI_Type_contiguous(
1635 int(nnz_send[i][p]), MPI_DOUBLE, &datatype_values);
1636 MPI_Type_commit(&datatype_values);
1637 datatypes.push_back(datatype_values);
1638
1639 // column index
1640 MPI_Datatype datatype_column_index;
1641 MPI_Type_contiguous(
1642 int(nnz_send[i][p]), MPI_INT, &datatype_column_index);
1643 MPI_Type_commit(&datatype_column_index);
1644 datatypes.push_back(datatype_column_index);
1645
1646 // row start
1647 MPI_Datatype datatype_row_start;
1648 MPI_Type_contiguous(
1649 int(Nrow_local_for_proc[i][p]), MPI_INT, &datatype_row_start);
1650 MPI_Type_commit(&datatype_row_start);
1651 datatypes.push_back(datatype_row_start);
1652
1653 // assemble the typelist
1654 MPI_Datatype typelist[3];
1655 typelist[0] = datatype_values;
1656 typelist[1] = datatype_column_index;
1657 typelist[2] = datatype_row_start;
1658
1659 // compute the offset for row_start
1660 int offset_n =
1661 First_row_for_proc[i][p] - matrix_pt[i]->first_row(my_rank);
1662
1663 // compute the offset for the values and column_index
1664 int offset_nnz = row_start_send[offset_n];
1665
1666 // next compute the displacements
1667 MPI_Aint displacements[3];
1668 MPI_Get_address(values_send + offset_nnz, &displacements[0]);
1669 MPI_Get_address(column_index_send + offset_nnz,
1670 &displacements[1]);
1671 MPI_Get_address(row_start_send + offset_n, &displacements[2]);
1672 for (int j = 2; j >= 0; j--)
1673 {
1674 displacements[j] -= displacements[0];
1675 }
1676
1677 // set the block lengths
1678 int block_length[3];
1679 block_length[0] = block_length[1] = block_length[2] = 1;
1680
1681 // now build the final datatype
1682 MPI_Datatype send_type;
1683 MPI_Type_create_struct(
1684 3, block_length, displacements, typelist, &send_type);
1685 MPI_Type_commit(&send_type);
1686 datatypes.push_back(send_type);
1687
1688 // send
1689 int tag = this->compute_tag(nproc, my_rank, p, 1);
1690 MPI_Request tr1;
1691 send_req.push_back(tr1);
1692 MPI_Isend(values_send + offset_nnz,
1693 1,
1694 send_type,
1695 p,
1696 tag,
1697 Global_communicator_pt->mpi_comm(),
1698 &send_req[c_send]);
1699 c_send++;
1700 }
1701 }
1702 }
1703 }
1704
1705 // otherwise send to self (copy)
1706 if (nnz_recv[Color][my_rank] != 0)
1707 {
1708 // get pointers to the underlying data in the current matrix
1709 double* values_send = matrix_pt[Color]->value();
1710 int* row_start_send = matrix_pt[Color]->row_start();
1711 int* column_index_send = matrix_pt[Color]->column_index();
1712
1713 // offset for row_start send to self
1714 unsigned offset_n_send = First_row_for_proc[Color][my_rank] -
1715 matrix_pt[Color]->first_row(my_rank);
1716
1717 // offset for values and column+_index send to self
1718 unsigned offset_nnz_send = row_start_send[offset_n_send];
1719
1720 // offset for row_start receive from self
1721 unsigned offset_n_recv = First_row_from_proc[Color][my_rank] -
1722 target_first_row[Color][my_rank];
1723
1724 // offset for values and columm_index receive from self
1725 unsigned k = 0;
1726 while (nnz_start_proc[k] != my_rank)
1727 {
1728 k++;
1729 }
1730 unsigned offset_nnz_recv = nnz_start_index[k];
1731
1732 // and send
1733
1734 // values and column_index
1735 unsigned n_nnz = nnz_send[Color][my_rank];
1736 for (unsigned j = 0; j < n_nnz; j++)
1737 {
1738 values_recv[offset_nnz_recv + j] = values_send[offset_nnz_send + j];
1739 column_index_recv[offset_nnz_recv + j] =
1740 column_index_send[offset_nnz_send + j];
1741 }
1742
1743 // row start
1744 unsigned n_n = Nrow_local_from_proc[Color][my_rank];
1745 for (unsigned j = 0; j < n_n; j++)
1746 {
1747 row_start_recv[offset_n_recv + j] = row_start_send[offset_n_send + j];
1748 }
1749 }
1750
1751 // create the local distribution for this matrix
1752 LinearAlgebraDistribution* temp_dist_pt =
1753 new LinearAlgebraDistribution(Local_communicator_pt,
1754 target_first_row[Color][my_rank],
1755 target_nrow_local[Color][my_rank]);
1756
1757 // create the corresponding matrix
1758 local_matrix_pt = new CRDoubleMatrix(temp_dist_pt);
1759 delete temp_dist_pt; // (dist has now been copied)
1760
1761 ///////////////////////////////////////////////////////////////////////////
1762 // and WAIT...
1763 ///////////////////////////////////////////////////////////////////////////
1764 if (c_recv != 0)
1765 {
1766 Vector<MPI_Status> recv_stat(c_recv);
1767 MPI_Waitall(c_recv, &recv_req[0], &recv_stat[0]);
1768 recv_req.clear();
1769 recv_stat.clear();
1770 }
1771
1772 // build the matrix
1773
1774 // update the row start
1775 unsigned nproc_contrib = nnz_start_index.size();
1776 for (unsigned j = 0; j < nproc_contrib; j++)
1777 {
1778 unsigned first = First_row_from_proc[Color][nnz_start_proc[j]] -
1779 target_first_row[Color][my_rank];
1780 unsigned last = first + Nrow_local_from_proc[Color][nnz_start_proc[j]];
1781 unsigned nnz_inc = nnz_start_index[j] - row_start_recv[first];
1782 for (unsigned k = first; k < last; k++)
1783 {
1784 row_start_recv[k] += nnz_inc;
1785 }
1786 }
1787 row_start_recv[target_nrow_local[Color][my_rank]] = int(nnz_total);
1788
1789 // build the matrix without a copy of the data
1790 local_matrix_pt->build_without_copy(matrix_pt[Color]->ncol(),
1791 nnz_total,
1792 values_recv,
1793 column_index_recv,
1794 row_start_recv);
1795
1796 // and finally wait for the sends
1797 if (c_send != 0)
1798 {
1799 Vector<MPI_Status> send_stat(c_send);
1800 MPI_Waitall(c_send, &send_req[0], &send_stat[0]);
1801 send_req.clear();
1802 send_stat.clear();
1803 }
1804
1805 // and clear the datatype
1806 unsigned ndatatypes = datatypes.size();
1807 for (unsigned i = 0; i < ndatatypes; i++)
1808 {
1809 MPI_Type_free(&datatypes[i]);
1810 }
1811 }
1812
1813
1814 /////////////////////////////////////////////////////////////////////////////
1815 /////////////////////////////////////////////////////////////////////////////
1816 /////////////////////////////////////////////////////////////////////////////
1817 /////////////////////////////////////////////////////////////////////////////
1818
1819
1820 // METHOD 3
1821 else if (Method == 3)
1822 {
1823 // temporary storgage for nnz recv
1824 unsigned* nnz_recv_temp = new unsigned[nproc * Nprec];
1825 for (unsigned j = 0; j < nproc * Nprec; j++)
1826 {
1827 nnz_recv_temp[j] = 0;
1828 }
1829
1830 // for every matrix we assemble the duplicate of the matrix on fewer
1831 // processors and setup the preconditioner
1832 for (unsigned i = 0; i < Nprec; i++)
1833 {
1834 // if the matrix is global (!distributed) then just construct a copy
1835 // on the subset of processors
1836 if (!matrix_pt[i]->distributed())
1837 {
1838 // if this matrix is to be preconditioned my this processor
1839 if (i == Color)
1840 {
1841 // create the local distribution for this matrix
1842 LinearAlgebraDistribution* temp_dist_pt =
1843 new LinearAlgebraDistribution(
1844 Local_communicator_pt, matrix_pt[i]->nrow(), false);
1845
1846 // create the corresponding matrix
1847 local_matrix_pt = new CRDoubleMatrix(temp_dist_pt);
1848 delete temp_dist_pt; // (dist has now been copied)
1849
1850 // get pointers to the underlying data
1851 double* values_pt = matrix_pt[i]->value();
1852 int* column_index_pt = matrix_pt[i]->column_index();
1853 int* row_start_pt = matrix_pt[i]->row_start();
1854
1855 // build the matrix without a copy of the data
1856 local_matrix_pt->build_without_copy(matrix_pt[i]->ncol(),
1857 matrix_pt[i]->nnz(),
1858 values_pt,
1859 column_index_pt,
1860 row_start_pt);
1861 }
1862 }
1863
1864 // if the matrix is global (!distributed) then just construct a copy
1865 // on the subset of processors
1866 else
1867 {
1868 // first compute the distribution of this preconditioner on its subset
1869 // of processors
1870
1871 // number of rows for this preconditioner
1872 unsigned nrow = matrix_pt[i]->nrow();
1873
1874 // setup First_row_for_local_prec and Nrow_local_for_local_prec
1875 target_first_row[i].resize(nproc);
1876 target_nrow_local[i].resize(nproc);
1877 unsigned nproc_local = Nproc_for_prec[i];
1878 for (unsigned p = 0; p < nproc_local; p++)
1879 {
1880 int pp = First_proc_for_prec[i] + p;
1881 target_first_row[i][pp] =
1882 unsigned(double(p * nrow) / double(nproc_local));
1883 }
1884 for (unsigned p = 0; p < nproc_local - 1; p++)
1885 {
1886 int pp = First_proc_for_prec[i] + p;
1887 target_nrow_local[i][pp] =
1888 target_first_row[i][pp + 1] - target_first_row[i][pp];
1889 }
1890 unsigned last_local_proc = First_proc_for_prec[i] + nproc_local - 1;
1891 target_nrow_local[i][last_local_proc] =
1892 nrow - target_first_row[i][last_local_proc];
1893
1894 // get the details of the current distribution
1895 Vector<unsigned> current_first_row(nproc);
1896 Vector<unsigned> current_nrow_local(nproc);
1897 for (unsigned p = 0; p < nproc; p++)
1898 {
1899 current_first_row[p] = matrix_pt[i]->first_row(p);
1900 current_nrow_local[p] = matrix_pt[i]->nrow_local(p);
1901 }
1902
1903 // resize storage for details of the data to be sent and received
1904 First_row_for_proc[i].resize(nproc, 0);
1905 Nrow_local_for_proc[i].resize(nproc, 0);
1906 First_row_from_proc[i].resize(nproc, 0);
1907 Nrow_local_from_proc[i].resize(nproc, 0);
1908
1909 // for every processor compute first_row and nrow_local that will
1910 // will sent and received by this processor
1911 for (unsigned p = 0; p < nproc; p++)
1912 {
1913 // start with data to be sent
1914 if ((target_first_row[i][p] <
1915 (current_first_row[my_rank] + current_nrow_local[my_rank])) &&
1916 (current_first_row[my_rank] <
1917 (target_first_row[i][p] + target_nrow_local[i][p])))
1918 {
1919 First_row_for_proc[i][p] =
1920 std::max(current_first_row[my_rank], target_first_row[i][p]);
1922 std::min(
1923 (current_first_row[my_rank] + current_nrow_local[my_rank]),
1924 (target_first_row[i][p] + target_nrow_local[i][p])) -
1926 }
1927
1928 // and data to be received
1929 if ((target_first_row[i][my_rank] <
1930 (current_first_row[p] + current_nrow_local[p])) &&
1931 (current_first_row[p] < (target_first_row[i][my_rank] +
1932 target_nrow_local[i][my_rank])))
1933 {
1935 std::max(current_first_row[p], target_first_row[i][my_rank]);
1937 std::min((current_first_row[p] + current_nrow_local[p]),
1938 (target_first_row[i][my_rank] +
1939 target_nrow_local[i][my_rank])) -
1941 }
1942 }
1943
1944 // resize nnz_send
1945 nnz_send[i].resize(nproc);
1946
1947 // compute the number of nnzs to be sent
1948 // and the number of send and receive requests to be made (nreq)
1949 for (unsigned p = 0; p < nproc; p++)
1950 {
1951 if (Nrow_local_for_proc[i][p] != 0)
1952 {
1953 int* row_start = matrix_pt[i]->row_start();
1954 unsigned k =
1955 First_row_for_proc[i][p] - current_first_row[my_rank];
1956 nnz_send[i][p] =
1957 row_start[k + Nrow_local_for_proc[i][p]] - row_start[k];
1958 }
1959 }
1960
1961 // resize nnz_recv
1962 nnz_recv[i].resize(nproc);
1963
1964 // send nnz to be sent to each processor
1965 for (unsigned p = 0; p < nproc; p++)
1966 {
1967 // send and recv
1968
1969 // dont mpi send to self
1970 if (p != my_rank)
1971 {
1972 // non block send
1973 if (Nrow_local_for_proc[i][p] != 0)
1974 {
1975 // send to other processors
1976 int tag = this->compute_tag(nproc, my_rank, p, 0);
1977 MPI_Request tr;
1978 req.push_back(tr);
1979 MPI_Isend(&nnz_send[i][p],
1980 1,
1981 MPI_UNSIGNED,
1982 p,
1983 tag,
1984 Global_communicator_pt->mpi_comm(),
1985 &req[c]);
1986 c++;
1987 }
1988 }
1989 // receive from self
1990 else
1991 {
1992 if (Nrow_local_for_proc[i][p] != 0)
1993 {
1994 nnz_recv_temp[(i * nproc) + p] = nnz_send[i][p];
1995 }
1996 }
1997 }
1998 }
1999 }
2000
2001 for (unsigned i = 0; i < Nprec; i++)
2002 {
2003 // resize nnz_recv
2004 nnz_recv[i].resize(nproc);
2005
2006 // receive nnz from other processors
2007 for (unsigned pp = 0; pp < nproc; pp++)
2008 {
2009 // next processor to receive from
2010 unsigned p = (nproc + my_rank - pp) % nproc;
2011
2012 // dont mpi send to send
2013 if (p != my_rank)
2014 {
2015 // blocking recv
2016 if (Nrow_local_from_proc[i][p] != 0)
2017 {
2018 int tag = this->compute_tag(nproc, p, my_rank, 0);
2019 MPI_Status stat;
2020 unsigned nnz_temp;
2021 MPI_Recv(&nnz_temp,
2022 1,
2023 MPI_UNSIGNED,
2024 p,
2025 tag,
2026 Global_communicator_pt->mpi_comm(),
2027 &stat);
2028 nnz_recv[i][p] = nnz_temp;
2029 }
2030 }
2031
2032 // receive from self
2033 else
2034 {
2035 nnz_recv[i][p] = nnz_send[i][p];
2036 }
2037 }
2038 }
2039
2040 // get the number of nnzs to be received from each processor
2041
2042 // total number of nnz to be reveived
2043 unsigned nnz_total = 0;
2044 for (unsigned p = 0; p < nproc; p++)
2045 {
2046 nnz_total += nnz_recv[Color][p];
2047 }
2048
2049 // compute nnz block start
2050 Vector<unsigned> nnz_start_proc;
2051 Vector<unsigned> nnz_start_index;
2052 unsigned row_ptr = target_first_row[Color][my_rank];
2053 int p = 0;
2054 unsigned nnz_ptr = 0;
2055 for (p = 0; p < int(nproc); p++)
2056 {
2057 if (First_row_from_proc[Color][p] == row_ptr &&
2058 Nrow_local_from_proc[Color][p] != 0 && nnz_ptr != nnz_total)
2059 {
2060 nnz_start_proc.push_back(p);
2061 nnz_start_index.push_back(nnz_ptr);
2062 nnz_ptr += nnz_recv[Color][p];
2063 row_ptr += Nrow_local_from_proc[Color][p];
2064 p = -1;
2065 }
2066 }
2067
2068 // storage for derived datatypes
2069 Vector<MPI_Datatype> datatypes;
2070
2071 // storage for received data
2072 double* values_recv = new double[nnz_total];
2073 int* column_index_recv = new int[nnz_total];
2074 int* row_start_recv = new int[target_nrow_local[Color][my_rank] + 1];
2075
2076 ///////////////////////////////////////////////////////////////////////////
2077 // RECV
2078 ///////////////////////////////////////////////////////////////////////////
2079 unsigned c_recv = 0;
2080 Vector<MPI_Request> recv_req;
2081
2082 // receive the contents of the vector
2083 for (unsigned p = 0; p < nproc; p++)
2084 {
2085 // use mpi methods to send to and receive from all but my rank
2086 if (p != my_rank)
2087 {
2088 // just receive
2089 if (nnz_recv[Color][p] != 0)
2090 {
2091 // create 3 MPI contiguous datatypes
2092 // + values
2093 // + column_index
2094 // + row_start
2095
2096 // values
2097 MPI_Datatype datatype_values;
2098 MPI_Type_contiguous(
2099 int(nnz_recv[Color][p]), MPI_DOUBLE, &datatype_values);
2100 MPI_Type_commit(&datatype_values);
2101 datatypes.push_back(datatype_values);
2102
2103 // column index
2104 MPI_Datatype datatype_column_index;
2105 MPI_Type_contiguous(
2106 int(nnz_recv[Color][p]), MPI_INT, &datatype_column_index);
2107 MPI_Type_commit(&datatype_column_index);
2108 datatypes.push_back(datatype_column_index);
2109
2110 // row start
2111 MPI_Datatype datatype_row_start;
2112 MPI_Type_contiguous(int(Nrow_local_from_proc[Color][p]),
2113 MPI_INT,
2114 &datatype_row_start);
2115 MPI_Type_commit(&datatype_row_start);
2116 datatypes.push_back(datatype_row_start);
2117
2118 // assemble the typelist
2119 MPI_Datatype typelist[3];
2120 typelist[0] = datatype_values;
2121 typelist[1] = datatype_column_index;
2122 typelist[2] = datatype_row_start;
2123
2124 // compute the offset for row_start
2125 int offset_n =
2126 First_row_from_proc[Color][p] - target_first_row[Color][my_rank];
2127
2128 // compute the offset for the values and column_index
2129 unsigned k = 0;
2130 while (nnz_start_proc[k] != p)
2131 {
2132 k++;
2133 }
2134 int offset_nnz = nnz_start_index[k];
2135
2136 // next compute the displacements
2137 MPI_Aint displacements[3];
2138 MPI_Get_address(values_recv + offset_nnz, &displacements[0]);
2139 MPI_Get_address(column_index_recv + offset_nnz, &displacements[1]);
2140 MPI_Get_address(row_start_recv + offset_n, &displacements[2]);
2141 for (int j = 2; j >= 0; j--)
2142 {
2143 displacements[j] -= displacements[0];
2144 }
2145
2146 // set the block lengths
2147 int block_length[3];
2148 block_length[0] = block_length[1] = block_length[2] = 1;
2149
2150 // now build the final datatype
2151 MPI_Datatype recv_type;
2152 MPI_Type_create_struct(
2153 3, block_length, displacements, typelist, &recv_type);
2154 MPI_Type_commit(&recv_type);
2155 datatypes.push_back(recv_type);
2156
2157 // recv
2158 int tag = this->compute_tag(nproc, p, my_rank, 1);
2159 MPI_Request tr1;
2160 recv_req.push_back(tr1);
2161 MPI_Irecv(values_recv + offset_nnz,
2162 1,
2163 recv_type,
2164 p,
2165 tag,
2166 Global_communicator_pt->mpi_comm(),
2167 &recv_req[c_recv]);
2168 c_recv++;
2169 }
2170 }
2171 }
2172
2173 ///////////////////////////////////////////////////////////////////////////
2174 // SEND
2175 ///////////////////////////////////////////////////////////////////////////
2176 unsigned c_send = 0;
2177 Vector<MPI_Request> send_req;
2178
2179 // for every matrix we assemble the duplicate of the matrix on fewer
2180 // processors and setup the preconditioner
2181 for (unsigned i = 0; i < Nprec; i++)
2182 {
2183 // get pointers to the underlying data in the current matrix
2184 double* values_send = matrix_pt[i]->value();
2185 int* row_start_send = matrix_pt[i]->row_start();
2186 int* column_index_send = matrix_pt[i]->column_index();
2187
2188 // send and receive the contents of the vector
2189 for (unsigned p = 0; p < nproc; p++)
2190 {
2191 // use mpi methods to send to and receive from all but my rank
2192 if (p != my_rank)
2193 {
2194 // send
2195 if (nnz_send[i][p] != 0)
2196 {
2197 // create 3 MPI contiguous datatypes
2198 // + values
2199 // + column_index
2200 // + row_start
2201
2202 // values
2203 MPI_Datatype datatype_values;
2204 MPI_Type_contiguous(
2205 int(nnz_send[i][p]), MPI_DOUBLE, &datatype_values);
2206 MPI_Type_commit(&datatype_values);
2207 datatypes.push_back(datatype_values);
2208
2209 // column index
2210 MPI_Datatype datatype_column_index;
2211 MPI_Type_contiguous(
2212 int(nnz_send[i][p]), MPI_INT, &datatype_column_index);
2213 MPI_Type_commit(&datatype_column_index);
2214 datatypes.push_back(datatype_column_index);
2215
2216 // row start
2217 MPI_Datatype datatype_row_start;
2218 MPI_Type_contiguous(
2219 int(Nrow_local_for_proc[i][p]), MPI_INT, &datatype_row_start);
2220 MPI_Type_commit(&datatype_row_start);
2221 datatypes.push_back(datatype_row_start);
2222
2223 // assemble the typelist
2224 MPI_Datatype typelist[3];
2225 typelist[0] = datatype_values;
2226 typelist[1] = datatype_column_index;
2227 typelist[2] = datatype_row_start;
2228
2229 // compute the offset for row_start
2230 int offset_n =
2231 First_row_for_proc[i][p] - matrix_pt[i]->first_row(my_rank);
2232
2233 // compute the offset for the values and column_index
2234 int offset_nnz = row_start_send[offset_n];
2235
2236 // next compute the displacements
2237 MPI_Aint displacements[3];
2238 MPI_Get_address(values_send + offset_nnz, &displacements[0]);
2239 MPI_Get_address(column_index_send + offset_nnz,
2240 &displacements[1]);
2241 MPI_Get_address(row_start_send + offset_n, &displacements[2]);
2242 for (int j = 2; j >= 0; j--)
2243 {
2244 displacements[j] -= displacements[0];
2245 }
2246
2247 // set the block lengths
2248 int block_length[3];
2249 block_length[0] = block_length[1] = block_length[2] = 1;
2250
2251 // now build the final datatype
2252 MPI_Datatype send_type;
2253 MPI_Type_create_struct(
2254 3, block_length, displacements, typelist, &send_type);
2255 MPI_Type_commit(&send_type);
2256 datatypes.push_back(send_type);
2257
2258 // send
2259 int tag = this->compute_tag(nproc, my_rank, p, 1);
2260 MPI_Request tr1;
2261 send_req.push_back(tr1);
2262 MPI_Isend(values_send + offset_nnz,
2263 1,
2264 send_type,
2265 p,
2266 tag,
2267 Global_communicator_pt->mpi_comm(),
2268 &send_req[c_send]);
2269 c_send++;
2270 }
2271 }
2272 }
2273 }
2274
2275 // otherwise send to self (copy)
2276 if (nnz_recv[Color][my_rank] != 0)
2277 {
2278 // get pointers to the underlying data in the current matrix
2279 double* values_send = matrix_pt[Color]->value();
2280 int* row_start_send = matrix_pt[Color]->row_start();
2281 int* column_index_send = matrix_pt[Color]->column_index();
2282
2283 // offset for row_start send to self
2284 unsigned offset_n_send = First_row_for_proc[Color][my_rank] -
2285 matrix_pt[Color]->first_row(my_rank);
2286
2287 // offset for values and column+_index send to self
2288 unsigned offset_nnz_send = row_start_send[offset_n_send];
2289
2290 // offset for row_start receive from self
2291 unsigned offset_n_recv = First_row_from_proc[Color][my_rank] -
2292 target_first_row[Color][my_rank];
2293
2294 // offset for values and columm_index receive from self
2295 unsigned k = 0;
2296 while (nnz_start_proc[k] != my_rank)
2297 {
2298 k++;
2299 }
2300 unsigned offset_nnz_recv = nnz_start_index[k];
2301
2302 // and send
2303
2304 // values and column_index
2305 unsigned n_nnz = nnz_send[Color][my_rank];
2306 for (unsigned j = 0; j < n_nnz; j++)
2307 {
2308 values_recv[offset_nnz_recv + j] = values_send[offset_nnz_send + j];
2309 column_index_recv[offset_nnz_recv + j] =
2310 column_index_send[offset_nnz_send + j];
2311 }
2312
2313 // row start
2314 unsigned n_n = Nrow_local_from_proc[Color][my_rank];
2315 for (unsigned j = 0; j < n_n; j++)
2316 {
2317 row_start_recv[offset_n_recv + j] = row_start_send[offset_n_send + j];
2318 }
2319 }
2320
2321 // create the local distribution for this matrix
2322 LinearAlgebraDistribution* temp_dist_pt =
2323 new LinearAlgebraDistribution(Local_communicator_pt,
2324 target_first_row[Color][my_rank],
2325 target_nrow_local[Color][my_rank]);
2326
2327 // create the corresponding matrix
2328 local_matrix_pt = new CRDoubleMatrix(temp_dist_pt);
2329 delete temp_dist_pt; // (dist has now been copied)
2330
2331 ///////////////////////////////////////////////////////////////////////////
2332 // and WAIT...
2333 ///////////////////////////////////////////////////////////////////////////
2334 if (c_recv != 0)
2335 {
2336 Vector<MPI_Status> recv_stat(c_recv);
2337 MPI_Waitall(c_recv, &recv_req[0], &recv_stat[0]);
2338 recv_req.clear();
2339 recv_stat.clear();
2340 }
2341
2342 // build the matrix
2343
2344 // update the row start
2345 unsigned nproc_contrib = nnz_start_index.size();
2346 for (unsigned j = 0; j < nproc_contrib; j++)
2347 {
2348 unsigned first = First_row_from_proc[Color][nnz_start_proc[j]] -
2349 target_first_row[Color][my_rank];
2350 unsigned last = first + Nrow_local_from_proc[Color][nnz_start_proc[j]];
2351 unsigned nnz_inc = nnz_start_index[j] - row_start_recv[first];
2352 for (unsigned k = first; k < last; k++)
2353 {
2354 row_start_recv[k] += nnz_inc;
2355 }
2356 }
2357 row_start_recv[target_nrow_local[Color][my_rank]] = int(nnz_total);
2358
2359 // build the matrix without a copy of the data
2360 local_matrix_pt->build_without_copy(matrix_pt[Color]->ncol(),
2361 nnz_total,
2362 values_recv,
2363 column_index_recv,
2364 row_start_recv);
2365
2366 // and finally wait for the sends
2367 if (c_recv != 0)
2368 {
2369 Vector<MPI_Status> send_stat(c_recv);
2370 MPI_Waitall(c_send, &send_req[0], &send_stat[0]);
2371 send_req.clear();
2372 send_stat.clear();
2373 }
2374
2375 // and clear the datatype
2376 unsigned ndatatypes = datatypes.size();
2377 for (unsigned i = 0; i < ndatatypes; i++)
2378 {
2379 MPI_Type_free(&datatypes[i]);
2380 }
2381 }
2382
2383 // now setup the preconditioner
2384 Preconditioner_pt = prec_pt[Color];
2385 Preconditioner_pt->setup(local_matrix_pt);
2386
2387 // clean up memory
2388 if (matrix_pt[0]->distributed())
2389 {
2390 delete local_matrix_pt;
2391 }
2392
2393 // delete the preconditioners not used on this processor
2394 for (unsigned i = 0; i < Nprec; i++)
2395 {
2396 if (i != Color)
2397 {
2398 delete prec_pt[i];
2399 }
2400 }
2401 } // end of setup_preconditioners()
2402
2403 //============================================================================
2404 /// Applies each preconditioner to the corresponding vector in
2405 /// r and z
2406 //=============================================================================
2407 void PreconditionerArray::solve_preconditioners(const Vector<DoubleVector>& r,
2408 Vector<DoubleVector>& z)
2409 {
2410#ifdef PARANOID
2411 // check that a preconditioner has been setup
2412 if (Preconditioner_pt == 0)
2413 {
2414 std::ostringstream error_message;
2415 error_message << "The preconditioners have not been setup.";
2416 throw OomphLibError(
2417 error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
2418 }
2419
2420 // check that r is the correct length
2421 if (r.size() != Nprec)
2422 {
2423 std::ostringstream error_message;
2424 error_message << "This PreconditionerArray has " << Nprec
2425 << " preconditioners but r only contains " << r.size()
2426 << " preconditioners.";
2427 throw OomphLibError(
2428 error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
2429 }
2430
2431 // check that z is the correct length
2432 if (z.size() != Nprec)
2433 {
2434 std::ostringstream error_message;
2435 error_message << "This PreconditionerArray has " << Nprec
2436 << " preconditioners but z only contains " << z.size()
2437 << " preconditioners.";
2438 throw OomphLibError(
2439 error_message.str(), OOMPH_CURRENT_FUNCTION, OOMPH_EXCEPTION_LOCATION);
2440 }
2441 // check that the vector has the same distribution as the
2442 // preconditioner
2443 for (unsigned i = 0; i < Nprec; i++)
2444 {
2445 if (*r[i].distribution_pt() != *Distribution_pt[i])
2446 {
2447 std::ostringstream error_message;
2448 error_message << "The distribution of r[" << i << "] does not have the"
2449 << " the same distribution as the matrix_pt[" << i
2450 << "] that was passed to setup_preconditioners(...)";
2451 throw OomphLibError(error_message.str(),
2452 OOMPH_CURRENT_FUNCTION,
2453 OOMPH_EXCEPTION_LOCATION);
2454 }
2455 }
2456#endif
2457
2458 // the local r vector
2459 DoubleVector local_r(Preconditioner_pt->distribution_pt(), 0.0);
2460
2461 // number of processors
2462 unsigned nproc = Global_communicator_pt->nproc();
2463
2464 // cache my global rank
2465 unsigned my_rank = Global_communicator_pt->my_rank();
2466
2467 // send and receive requests
2468 Vector<MPI_Request> send_reqs;
2469 Vector<MPI_Request> recv_reqs;
2470
2471 // cache first_row
2472 unsigned first_row = Preconditioner_pt->first_row();
2473
2474 // local residual values for this processor
2475 double* local_r_values = local_r.values_pt();
2476
2477 // for every vector we assemble the duplicate of the vector on the
2478 // appropirate subset of processors
2479
2480 // first we post the non-blocking sends and recvs
2481 for (unsigned i = 0; i < Nprec; i++)
2482 {
2483 if (r[i].distributed())
2484 {
2485 // current first_row and nrow_local
2486 unsigned current_first_row = r[i].first_row();
2487
2488 // send and receive the contents of the vector
2489 for (unsigned p = 0; p < nproc; p++)
2490 {
2491 // use mpi methods to send to and receive from all but my rank
2492 if (p != my_rank)
2493 {
2494 // send
2495 if (Nrow_local_for_proc[i][p] != 0)
2496 {
2497 // compute the offset for the values
2498 int offset_n = First_row_for_proc[i][p] - current_first_row;
2499
2500 // send the values
2501 int tag = this->compute_tag(nproc, my_rank, p, 0);
2502 MPI_Request tr;
2503 MPI_Isend(const_cast<double*>(r[i].values_pt()) + offset_n,
2504 int(Nrow_local_for_proc[i][p]),
2505 MPI_DOUBLE,
2506 p,
2507 tag,
2508 Global_communicator_pt->mpi_comm(),
2509 &tr);
2510 send_reqs.push_back(tr);
2511 }
2512
2513 // recv
2514 if (Nrow_local_from_proc[i][p] != 0)
2515 {
2516 // compute the offset for row_start
2517 int offset_n = First_row_from_proc[i][p] - first_row;
2518
2519 // row_start
2520 int tag = this->compute_tag(nproc, p, my_rank, 0);
2521 MPI_Request tr;
2522 MPI_Irecv(local_r_values + offset_n,
2523 int(Nrow_local_from_proc[i][p]),
2524 MPI_DOUBLE,
2525 p,
2526 tag,
2527 Global_communicator_pt->mpi_comm(),
2528 &tr);
2529 recv_reqs.push_back(tr);
2530 }
2531 }
2532 }
2533 }
2534 }
2535
2536
2537 // and now we send to self
2538 if (!r[Color].distributed())
2539 {
2540 // just copy to the new vector
2541 const double* r_pt = r[Color].values_pt();
2542 unsigned nrow_local = local_r.nrow_local();
2543 for (unsigned i = 0; i < nrow_local; i++)
2544 {
2545 local_r_values[i] = r_pt[i];
2546 }
2547 }
2548 else
2549 {
2550 // the incoming residual associated with the processor
2551 const double* r_pt = r[Color].values_pt();
2552
2553 // current first_row and nrow_local
2554 unsigned current_first_row = r[Color].first_row();
2555
2556 // cache first_row
2557 unsigned first_row = Preconditioner_pt->first_row();
2558
2559 //
2560 if (Nrow_local_from_proc[Color][my_rank] != 0)
2561 {
2562 // offset for values send to self
2563 unsigned offset_n_send =
2564 First_row_for_proc[Color][my_rank] - current_first_row;
2565
2566 // offset for values receive from self
2567 unsigned offset_n_recv =
2568 First_row_from_proc[Color][my_rank] - first_row;
2569
2570 // send/receive
2571 unsigned n_n = Nrow_local_from_proc[Color][my_rank];
2572 for (unsigned j = 0; j < n_n; j++)
2573 {
2574 local_r_values[offset_n_recv + j] = r_pt[offset_n_send + j];
2575 }
2576 }
2577 }
2578
2579 // wait for the receives to complete
2580 unsigned n_recv = recv_reqs.size();
2581 if (n_recv)
2582 {
2583 MPI_Waitall(n_recv, &recv_reqs[0], MPI_STATUS_IGNORE);
2584 }
2585 recv_reqs.clear();
2586
2587 // next solve
2588 // apply the local preconditioner
2589 DoubleVector local_z;
2590 Preconditioner_pt->preconditioner_solve(local_r, local_z);
2591 local_r.clear();
2592
2593 // the local z values
2594 double* local_z_values = local_z.values_pt();
2595
2596 // setup the vectors
2597 for (unsigned i = 0; i < Nprec; i++)
2598 {
2599 // if z[i] is not setup then set it up
2600 if (!z[i].built())
2601 {
2602 z[i].build(r[i].distribution_pt(), 0.0);
2603 }
2604 }
2605
2606 // first we post the non-blocking sends and recvs
2607 for (unsigned i = 0; i < Nprec; i++)
2608 {
2609 if (r[i].distributed())
2610 {
2611 // current first_row and nrow_local
2612 unsigned current_first_row = r[i].first_row();
2613
2614 // send and receive the contents of the vector
2615 for (unsigned p = 0; p < nproc; p++)
2616 {
2617 // use mpi methods to send to and receive from all but my rank
2618 if (p != my_rank)
2619 {
2620 // send
2621 if (Nrow_local_for_proc[i][p] != 0)
2622 {
2623 // compute the offset for the values
2624 int offset_n = First_row_for_proc[i][p] - current_first_row;
2625
2626 // send the values
2627 int tag = this->compute_tag(nproc, my_rank, p, 0);
2628 MPI_Request tr;
2629 MPI_Irecv(z[i].values_pt() + offset_n,
2630 int(Nrow_local_for_proc[i][p]),
2631 MPI_DOUBLE,
2632 p,
2633 tag,
2634 Global_communicator_pt->mpi_comm(),
2635 &tr);
2636 recv_reqs.push_back(tr);
2637 }
2638
2639 // recv
2640 if (Nrow_local_from_proc[i][p] != 0)
2641 {
2642 // compute the offset for row_start
2643 int offset_n = First_row_from_proc[i][p] - first_row;
2644
2645 // vector
2646 int tag = this->compute_tag(nproc, p, my_rank, 0);
2647 MPI_Request tr;
2648 MPI_Isend(local_z_values + offset_n,
2649 int(Nrow_local_from_proc[i][p]),
2650 MPI_DOUBLE,
2651 p,
2652 tag,
2653 Global_communicator_pt->mpi_comm(),
2654 &tr);
2655 send_reqs.push_back(tr);
2656 }
2657 }
2658 }
2659 }
2660 // otherwise we need to share the results
2661 else
2662 {
2663 // number of processors associated with this preconditioner
2664 unsigned nproc_local = Local_communicator_pt->nproc();
2665
2666 // my "proc number" for this preconditioner
2667 unsigned my_local_rank = Local_communicator_pt->my_rank();
2668
2669 // sends to self completed later
2670 if (i != Color)
2671 {
2672 // post send requests
2673 for (unsigned j = my_local_rank; j < Nproc_for_prec[i];
2674 j += nproc_local)
2675 {
2676 int p = j + First_proc_for_prec[i];
2677 MPI_Request tr;
2678 MPI_Isend(local_z_values,
2679 z[Color].nrow(),
2680 MPI_DOUBLE,
2681 p,
2682 0,
2683 Global_communicator_pt->mpi_comm(),
2684 &tr);
2685 send_reqs.push_back(tr);
2686 }
2687
2688 // compute the processor number to recv from
2689 int p = my_local_rank;
2690 while ((p - int(Nproc_for_prec[i])) >= 0)
2691 {
2692 p -= Nproc_for_prec[i];
2693 }
2694 p += First_proc_for_prec[i];
2695
2696 // and recv
2697 MPI_Request tr;
2698 MPI_Irecv(z[i].values_pt(),
2699 z[i].nrow(),
2700 MPI_DOUBLE,
2701 p,
2702 0,
2703 Global_communicator_pt->mpi_comm(),
2704 &tr);
2705 recv_reqs.push_back(tr);
2706 }
2707 }
2708 }
2709
2710 // and now we send to self
2711 if (!r[Color].distributed())
2712 {
2713 // just copy to the new vector
2714 double* z_pt = z[Color].values_pt();
2715 unsigned nrow_local = local_z.nrow_local();
2716 for (unsigned i = 0; i < nrow_local; i++)
2717 {
2718 z_pt[i] = local_z_values[i];
2719 }
2720 }
2721 else
2722 {
2723 //
2724 double* z_pt = z[Color].values_pt();
2725
2726 // current first_row and nrow_local
2727 unsigned current_first_row = r[Color].first_row();
2728
2729 // cache first_row
2730 unsigned first_row = Preconditioner_pt->first_row();
2731
2732 //
2733 if (Nrow_local_from_proc[Color][my_rank] != 0)
2734 {
2735 // offset for values send to self
2736 unsigned offset_n_send =
2737 First_row_for_proc[Color][my_rank] - current_first_row;
2738
2739 // offset for values receive from self
2740 unsigned offset_n_recv =
2741 First_row_from_proc[Color][my_rank] - first_row;
2742
2743 // send/receive
2744 unsigned n_n = Nrow_local_from_proc[Color][my_rank];
2745 for (unsigned j = 0; j < n_n; j++)
2746 {
2747 z_pt[offset_n_send + j] = local_z_values[offset_n_recv + j];
2748 }
2749 }
2750 }
2751
2752
2753 // wait for the receives to complete
2754 n_recv = recv_reqs.size();
2755 if (n_recv)
2756 {
2757 MPI_Waitall(n_recv, &recv_reqs[0], MPI_STATUS_IGNORE);
2758 }
2759 recv_reqs.clear();
2760
2761 // wait for the sends to complete
2762 unsigned n_send = send_reqs.size();
2763 if (n_send)
2764 {
2765 MPI_Waitall(n_send, &send_reqs[0], MPI_STATUS_IGNORE);
2766 }
2767 send_reqs.clear();
2768 }
2769} // namespace oomph
2770
2771// End of "if we have mpi"
2772#endif
cstr elem_len * i
Definition cfortran.h:603
unsigned first_row() const
access function for the first row on this processor
LinearAlgebraDistribution * distribution_pt() const
access to the LinearAlgebraDistribution
double size() const
Calculate the size of the element (length, area, volume,...) in Eulerian computational coordinates....
Definition elements.cc:4320
void solve_preconditioners(const Vector< DoubleVector > &r, Vector< DoubleVector > &z)
Applies each preconditioner to the corresponding vector in r and z.
unsigned Method
the communication method in the setup_preconditioners(...) method
Vector< Vector< unsigned > > First_row_for_proc
Storage (indexed [i][j]) for the first row that will be sent from this processor to processor j for p...
Vector< unsigned > First_proc_for_prec
The first_row component of the distribution of the processors over the preconditioners.
unsigned Color
the Color of this processor (or the preconditioner number)
Vector< Vector< unsigned > > Nrow_local_for_proc
Storage (indexed [i][j]) for the nrow_local that will be sent from this processor to processor j for ...
Vector< Vector< unsigned > > First_row_from_proc
Storage (indexed [i][j]) for the first row that will be received by this processor from processor j f...
int compute_tag(const int &nproc, const int &source, const int &dest, const int &type)
helper method for computing the MPI_Isend and MPI_Irecv tags
OomphCommunicator * Global_communicator_pt
pointer to the global communicator for this preconditioner array
void setup_preconditioners(Vector< CRDoubleMatrix * > matrix_pt, Vector< Preconditioner * > prec_pt, const OomphCommunicator *comm_pt)
Setup the preconditioners. Sets up each preconditioner in the array for the corresponding matrix in t...
OomphCommunicator * Local_communicator_pt
Vector of communicators for the preconditioners.
Preconditioner * Preconditioner_pt
The pointer to the local preconditioner on this processor.
void clean_up_memory()
Clean up memory.
unsigned Nprec
the number of preconditioner in the array
Vector< Vector< unsigned > > Nrow_local_from_proc
Storage (indexed [i][j]) for the nrow_local that will be received by this processor from processor j ...
Vector< LinearAlgebraDistribution * > Distribution_pt
Vector< unsigned > Nproc_for_prec
The nrow_local component of the distribution of the processors over the preconditioners.
virtual void setup(DoubleMatrixBase *matrix_pt)
Setup the preconditioner: store the matrix pointer and the communicator pointer then call preconditio...
virtual void preconditioner_solve(const DoubleVector &r, DoubleVector &z)=0
Apply the preconditioner. Pure virtual generic interface function. This method should apply the preco...
DRAIG: Change all instances of (SPATIAL_DIM) to (DIM-1).
OomphInfo oomph_info
Single (global) instantiation of the OomphInfo object – this is used throughout the library as a "rep...