Actual source code: chwirut2.c
  1: /*
  2:    Include "petsctao.h" so that we can use TAO solvers.  Note that this
  3:    file automatically includes libraries such as:
  4:      petsc.h       - base PETSc routines   petscvec.h - vectors
  5:      petscsys.h    - system routines        petscmat.h - matrices
  6:      petscis.h     - index sets            petscksp.h - Krylov subspace methods
  7:      petscviewer.h - viewers               petscpc.h  - preconditioners
  9: */
 11: #include <petsctao.h>
 13: /*
 14: Description:   These data are the result of a NIST study involving
 15:                ultrasonic calibration.  The response variable is
 16:                ultrasonic response, and the predictor variable is
 17:                metal distance.
 19: Reference:     Chwirut, D., NIST (197?).
 20:                Ultrasonic Reference Block Study.
 21: */
 23: static char help[] = "Finds the nonlinear least-squares solution to the model \n\
 24:             y = exp[-b1*x]/(b2+b3*x)  +  e \n";
 26: #define NOBSERVATIONS 214
 27: #define NPARAMETERS   3
 29: #define DIE_TAG  2000
 30: #define IDLE_TAG 1000
 32: /* User-defined application context */
 33: typedef struct {
 34:   /* Working space */
 35:   PetscReal   t[NOBSERVATIONS]; /* array of independent variables of observation */
 36:   PetscReal   y[NOBSERVATIONS]; /* array of dependent variables */
 37:   PetscMPIInt size, rank;
 38: } AppCtx;
 40: /* User provided Routines */
 41: PetscErrorCode InitializeData(AppCtx *user);
 42: PetscErrorCode FormStartingPoint(Vec);
 43: PetscErrorCode EvaluateFunction(Tao, Vec, Vec, void *);
 44: PetscErrorCode TaskWorker(AppCtx *user);
 45: PetscErrorCode StopWorkers(AppCtx *user);
 46: PetscErrorCode RunSimulation(PetscReal *x, PetscInt i, PetscReal *f, AppCtx *user);
 48: /*--------------------------------------------------------------------*/
 49: int main(int argc, char **argv)
 50: {
 51:   Vec    x, f; /* solution, function */
 52:   Tao    tao;  /* Tao solver context */
 53:   AppCtx user; /* user-defined work context */
 55:   /* Initialize TAO and PETSc */
 56:   PetscFunctionBeginUser;
 57:   PetscCall(PetscInitialize(&argc, &argv, NULL, help));
 58:   MPI_Comm_size(MPI_COMM_WORLD, &user.size);
 59:   MPI_Comm_rank(MPI_COMM_WORLD, &user.rank);
 60:   PetscCall(InitializeData(&user));
 62:   /* Run optimization on rank 0 */
 63:   if (user.rank == 0) {
 64:     /* Allocate vectors */
 65:     PetscCall(VecCreateSeq(PETSC_COMM_SELF, NPARAMETERS, &x));
 66:     PetscCall(VecCreateSeq(PETSC_COMM_SELF, NOBSERVATIONS, &f));
 68:     /* TAO code begins here */
 70:     /* Create TAO solver and set desired solution method */
 71:     PetscCall(TaoCreate(PETSC_COMM_SELF, &tao));
 72:     PetscCall(TaoSetType(tao, TAOPOUNDERS));
 74:     /* Set the function and Jacobian routines. */
 75:     PetscCall(FormStartingPoint(x));
 76:     PetscCall(TaoSetSolution(tao, x));
 77:     PetscCall(TaoSetResidualRoutine(tao, f, EvaluateFunction, (void *)&user));
 79:     /* Check for any TAO command line arguments */
 80:     PetscCall(TaoSetFromOptions(tao));
 82:     /* Perform the Solve */
 83:     PetscCall(TaoSolve(tao));
 85:     /* Free TAO data structures */
 86:     PetscCall(TaoDestroy(&tao));
 88:     /* Free PETSc data structures */
 89:     PetscCall(VecDestroy(&x));
 90:     PetscCall(VecDestroy(&f));
 91:     PetscCall(StopWorkers(&user));
 92:   } else {
 93:     PetscCall(TaskWorker(&user));
 94:   }
 95:   PetscCall(PetscFinalize());
 96:   return 0;
 97: }
 99: /*--------------------------------------------------------------------*/
100: PetscErrorCode EvaluateFunction(Tao tao, Vec X, Vec F, void *ptr)
101: {
102:   AppCtx    *user = (AppCtx *)ptr;
103:   PetscInt   i;
104:   PetscReal *x, *f;
106:   PetscFunctionBegin;
107:   PetscCall(VecGetArray(X, &x));
108:   PetscCall(VecGetArray(F, &f));
109:   if (user->size == 1) {
110:     /* Single processor */
111:     for (i = 0; i < NOBSERVATIONS; i++) PetscCall(RunSimulation(x, i, &f[i], user));
112:   } else {
113:     /* Multiprocessor main */
114:     PetscMPIInt tag;
115:     PetscInt    finishedtasks, next_task, checkedin;
116:     PetscReal   f_i = 0.0;
117:     MPI_Status  status;
119:     next_task     = 0;
120:     finishedtasks = 0;
121:     checkedin     = 0;
123:     while (finishedtasks < NOBSERVATIONS || checkedin < user->size - 1) {
124:       PetscCallMPI(MPI_Recv(&f_i, 1, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PETSC_COMM_WORLD, &status));
125:       if (status.MPI_TAG == IDLE_TAG) {
126:         checkedin++;
127:       } else {
128:         tag    = status.MPI_TAG;
129:         f[tag] = (PetscReal)f_i;
130:         finishedtasks++;
131:       }
133:       if (next_task < NOBSERVATIONS) {
134:         PetscCallMPI(MPI_Send(x, NPARAMETERS, MPIU_REAL, status.MPI_SOURCE, (PetscMPIInt)next_task, PETSC_COMM_WORLD));
135:         next_task++;
137:       } else {
138:         /* Send idle message */
139:         PetscCallMPI(MPI_Send(x, NPARAMETERS, MPIU_REAL, status.MPI_SOURCE, IDLE_TAG, PETSC_COMM_WORLD));
140:       }
141:     }
142:   }
143:   PetscCall(VecRestoreArray(X, &x));
144:   PetscCall(VecRestoreArray(F, &f));
145:   PetscCall(PetscLogFlops(6 * NOBSERVATIONS));
146:   PetscFunctionReturn(PETSC_SUCCESS);
147: }
149: /* ------------------------------------------------------------ */
150: PetscErrorCode FormStartingPoint(Vec X)
151: {
152:   PetscReal *x;
154:   PetscFunctionBegin;
155:   PetscCall(VecGetArray(X, &x));
156:   x[0] = 0.15;
157:   x[1] = 0.008;
158:   x[2] = 0.010;
159:   PetscCall(VecRestoreArray(X, &x));
160:   PetscFunctionReturn(PETSC_SUCCESS);
161: }
163: /* ---------------------------------------------------------------------- */
164: PetscErrorCode InitializeData(AppCtx *user)
165: {
166:   PetscReal *t = user->t, *y = user->y;
167:   PetscInt   i = 0;
169:   PetscFunctionBegin;
170:   y[i]   = 92.9000;
171:   t[i++] = 0.5000;
172:   y[i]   = 78.7000;
173:   t[i++] = 0.6250;
174:   y[i]   = 64.2000;
175:   t[i++] = 0.7500;
176:   y[i]   = 64.9000;
177:   t[i++] = 0.8750;
178:   y[i]   = 57.1000;
179:   t[i++] = 1.0000;
180:   y[i]   = 43.3000;
181:   t[i++] = 1.2500;
182:   y[i]   = 31.1000;
183:   t[i++] = 1.7500;
184:   y[i]   = 23.6000;
185:   t[i++] = 2.2500;
186:   y[i]   = 31.0500;
187:   t[i++] = 1.7500;
188:   y[i]   = 23.7750;
189:   t[i++] = 2.2500;
190:   y[i]   = 17.7375;
191:   t[i++] = 2.7500;
192:   y[i]   = 13.8000;
193:   t[i++] = 3.2500;
194:   y[i]   = 11.5875;
195:   t[i++] = 3.7500;
196:   y[i]   = 9.4125;
197:   t[i++] = 4.2500;
198:   y[i]   = 7.7250;
199:   t[i++] = 4.7500;
200:   y[i]   = 7.3500;
201:   t[i++] = 5.2500;
202:   y[i]   = 8.0250;
203:   t[i++] = 5.7500;
204:   y[i]   = 90.6000;
205:   t[i++] = 0.5000;
206:   y[i]   = 76.9000;
207:   t[i++] = 0.6250;
208:   y[i]   = 71.6000;
209:   t[i++] = 0.7500;
210:   y[i]   = 63.6000;
211:   t[i++] = 0.8750;
212:   y[i]   = 54.0000;
213:   t[i++] = 1.0000;
214:   y[i]   = 39.2000;
215:   t[i++] = 1.2500;
216:   y[i]   = 29.3000;
217:   t[i++] = 1.7500;
218:   y[i]   = 21.4000;
219:   t[i++] = 2.2500;
220:   y[i]   = 29.1750;
221:   t[i++] = 1.7500;
222:   y[i]   = 22.1250;
223:   t[i++] = 2.2500;
224:   y[i]   = 17.5125;
225:   t[i++] = 2.7500;
226:   y[i]   = 14.2500;
227:   t[i++] = 3.2500;
228:   y[i]   = 9.4500;
229:   t[i++] = 3.7500;
230:   y[i]   = 9.1500;
231:   t[i++] = 4.2500;
232:   y[i]   = 7.9125;
233:   t[i++] = 4.7500;
234:   y[i]   = 8.4750;
235:   t[i++] = 5.2500;
236:   y[i]   = 6.1125;
237:   t[i++] = 5.7500;
238:   y[i]   = 80.0000;
239:   t[i++] = 0.5000;
240:   y[i]   = 79.0000;
241:   t[i++] = 0.6250;
242:   y[i]   = 63.8000;
243:   t[i++] = 0.7500;
244:   y[i]   = 57.2000;
245:   t[i++] = 0.8750;
246:   y[i]   = 53.2000;
247:   t[i++] = 1.0000;
248:   y[i]   = 42.5000;
249:   t[i++] = 1.2500;
250:   y[i]   = 26.8000;
251:   t[i++] = 1.7500;
252:   y[i]   = 20.4000;
253:   t[i++] = 2.2500;
254:   y[i]   = 26.8500;
255:   t[i++] = 1.7500;
256:   y[i]   = 21.0000;
257:   t[i++] = 2.2500;
258:   y[i]   = 16.4625;
259:   t[i++] = 2.7500;
260:   y[i]   = 12.5250;
261:   t[i++] = 3.2500;
262:   y[i]   = 10.5375;
263:   t[i++] = 3.7500;
264:   y[i]   = 8.5875;
265:   t[i++] = 4.2500;
266:   y[i]   = 7.1250;
267:   t[i++] = 4.7500;
268:   y[i]   = 6.1125;
269:   t[i++] = 5.2500;
270:   y[i]   = 5.9625;
271:   t[i++] = 5.7500;
272:   y[i]   = 74.1000;
273:   t[i++] = 0.5000;
274:   y[i]   = 67.3000;
275:   t[i++] = 0.6250;
276:   y[i]   = 60.8000;
277:   t[i++] = 0.7500;
278:   y[i]   = 55.5000;
279:   t[i++] = 0.8750;
280:   y[i]   = 50.3000;
281:   t[i++] = 1.0000;
282:   y[i]   = 41.0000;
283:   t[i++] = 1.2500;
284:   y[i]   = 29.4000;
285:   t[i++] = 1.7500;
286:   y[i]   = 20.4000;
287:   t[i++] = 2.2500;
288:   y[i]   = 29.3625;
289:   t[i++] = 1.7500;
290:   y[i]   = 21.1500;
291:   t[i++] = 2.2500;
292:   y[i]   = 16.7625;
293:   t[i++] = 2.7500;
294:   y[i]   = 13.2000;
295:   t[i++] = 3.2500;
296:   y[i]   = 10.8750;
297:   t[i++] = 3.7500;
298:   y[i]   = 8.1750;
299:   t[i++] = 4.2500;
300:   y[i]   = 7.3500;
301:   t[i++] = 4.7500;
302:   y[i]   = 5.9625;
303:   t[i++] = 5.2500;
304:   y[i]   = 5.6250;
305:   t[i++] = 5.7500;
306:   y[i]   = 81.5000;
307:   t[i++] = .5000;
308:   y[i]   = 62.4000;
309:   t[i++] = .7500;
310:   y[i]   = 32.5000;
311:   t[i++] = 1.5000;
312:   y[i]   = 12.4100;
313:   t[i++] = 3.0000;
314:   y[i]   = 13.1200;
315:   t[i++] = 3.0000;
316:   y[i]   = 15.5600;
317:   t[i++] = 3.0000;
318:   y[i]   = 5.6300;
319:   t[i++] = 6.0000;
320:   y[i]   = 78.0000;
321:   t[i++] = .5000;
322:   y[i]   = 59.9000;
323:   t[i++] = .7500;
324:   y[i]   = 33.2000;
325:   t[i++] = 1.5000;
326:   y[i]   = 13.8400;
327:   t[i++] = 3.0000;
328:   y[i]   = 12.7500;
329:   t[i++] = 3.0000;
330:   y[i]   = 14.6200;
331:   t[i++] = 3.0000;
332:   y[i]   = 3.9400;
333:   t[i++] = 6.0000;
334:   y[i]   = 76.8000;
335:   t[i++] = .5000;
336:   y[i]   = 61.0000;
337:   t[i++] = .7500;
338:   y[i]   = 32.9000;
339:   t[i++] = 1.5000;
340:   y[i]   = 13.8700;
341:   t[i++] = 3.0000;
342:   y[i]   = 11.8100;
343:   t[i++] = 3.0000;
344:   y[i]   = 13.3100;
345:   t[i++] = 3.0000;
346:   y[i]   = 5.4400;
347:   t[i++] = 6.0000;
348:   y[i]   = 78.0000;
349:   t[i++] = .5000;
350:   y[i]   = 63.5000;
351:   t[i++] = .7500;
352:   y[i]   = 33.8000;
353:   t[i++] = 1.5000;
354:   y[i]   = 12.5600;
355:   t[i++] = 3.0000;
356:   y[i]   = 5.6300;
357:   t[i++] = 6.0000;
358:   y[i]   = 12.7500;
359:   t[i++] = 3.0000;
360:   y[i]   = 13.1200;
361:   t[i++] = 3.0000;
362:   y[i]   = 5.4400;
363:   t[i++] = 6.0000;
364:   y[i]   = 76.8000;
365:   t[i++] = .5000;
366:   y[i]   = 60.0000;
367:   t[i++] = .7500;
368:   y[i]   = 47.8000;
369:   t[i++] = 1.0000;
370:   y[i]   = 32.0000;
371:   t[i++] = 1.5000;
372:   y[i]   = 22.2000;
373:   t[i++] = 2.0000;
374:   y[i]   = 22.5700;
375:   t[i++] = 2.0000;
376:   y[i]   = 18.8200;
377:   t[i++] = 2.5000;
378:   y[i]   = 13.9500;
379:   t[i++] = 3.0000;
380:   y[i]   = 11.2500;
381:   t[i++] = 4.0000;
382:   y[i]   = 9.0000;
383:   t[i++] = 5.0000;
384:   y[i]   = 6.6700;
385:   t[i++] = 6.0000;
386:   y[i]   = 75.8000;
387:   t[i++] = .5000;
388:   y[i]   = 62.0000;
389:   t[i++] = .7500;
390:   y[i]   = 48.8000;
391:   t[i++] = 1.0000;
392:   y[i]   = 35.2000;
393:   t[i++] = 1.5000;
394:   y[i]   = 20.0000;
395:   t[i++] = 2.0000;
396:   y[i]   = 20.3200;
397:   t[i++] = 2.0000;
398:   y[i]   = 19.3100;
399:   t[i++] = 2.5000;
400:   y[i]   = 12.7500;
401:   t[i++] = 3.0000;
402:   y[i]   = 10.4200;
403:   t[i++] = 4.0000;
404:   y[i]   = 7.3100;
405:   t[i++] = 5.0000;
406:   y[i]   = 7.4200;
407:   t[i++] = 6.0000;
408:   y[i]   = 70.5000;
409:   t[i++] = .5000;
410:   y[i]   = 59.5000;
411:   t[i++] = .7500;
412:   y[i]   = 48.5000;
413:   t[i++] = 1.0000;
414:   y[i]   = 35.8000;
415:   t[i++] = 1.5000;
416:   y[i]   = 21.0000;
417:   t[i++] = 2.0000;
418:   y[i]   = 21.6700;
419:   t[i++] = 2.0000;
420:   y[i]   = 21.0000;
421:   t[i++] = 2.5000;
422:   y[i]   = 15.6400;
423:   t[i++] = 3.0000;
424:   y[i]   = 8.1700;
425:   t[i++] = 4.0000;
426:   y[i]   = 8.5500;
427:   t[i++] = 5.0000;
428:   y[i]   = 10.1200;
429:   t[i++] = 6.0000;
430:   y[i]   = 78.0000;
431:   t[i++] = .5000;
432:   y[i]   = 66.0000;
433:   t[i++] = .6250;
434:   y[i]   = 62.0000;
435:   t[i++] = .7500;
436:   y[i]   = 58.0000;
437:   t[i++] = .8750;
438:   y[i]   = 47.7000;
439:   t[i++] = 1.0000;
440:   y[i]   = 37.8000;
441:   t[i++] = 1.2500;
442:   y[i]   = 20.2000;
443:   t[i++] = 2.2500;
444:   y[i]   = 21.0700;
445:   t[i++] = 2.2500;
446:   y[i]   = 13.8700;
447:   t[i++] = 2.7500;
448:   y[i]   = 9.6700;
449:   t[i++] = 3.2500;
450:   y[i]   = 7.7600;
451:   t[i++] = 3.7500;
452:   y[i]   = 5.4400;
453:   t[i++] = 4.2500;
454:   y[i]   = 4.8700;
455:   t[i++] = 4.7500;
456:   y[i]   = 4.0100;
457:   t[i++] = 5.2500;
458:   y[i]   = 3.7500;
459:   t[i++] = 5.7500;
460:   y[i]   = 24.1900;
461:   t[i++] = 3.0000;
462:   y[i]   = 25.7600;
463:   t[i++] = 3.0000;
464:   y[i]   = 18.0700;
465:   t[i++] = 3.0000;
466:   y[i]   = 11.8100;
467:   t[i++] = 3.0000;
468:   y[i]   = 12.0700;
469:   t[i++] = 3.0000;
470:   y[i]   = 16.1200;
471:   t[i++] = 3.0000;
472:   y[i]   = 70.8000;
473:   t[i++] = .5000;
474:   y[i]   = 54.7000;
475:   t[i++] = .7500;
476:   y[i]   = 48.0000;
477:   t[i++] = 1.0000;
478:   y[i]   = 39.8000;
479:   t[i++] = 1.5000;
480:   y[i]   = 29.8000;
481:   t[i++] = 2.0000;
482:   y[i]   = 23.7000;
483:   t[i++] = 2.5000;
484:   y[i]   = 29.6200;
485:   t[i++] = 2.0000;
486:   y[i]   = 23.8100;
487:   t[i++] = 2.5000;
488:   y[i]   = 17.7000;
489:   t[i++] = 3.0000;
490:   y[i]   = 11.5500;
491:   t[i++] = 4.0000;
492:   y[i]   = 12.0700;
493:   t[i++] = 5.0000;
494:   y[i]   = 8.7400;
495:   t[i++] = 6.0000;
496:   y[i]   = 80.7000;
497:   t[i++] = .5000;
498:   y[i]   = 61.3000;
499:   t[i++] = .7500;
500:   y[i]   = 47.5000;
501:   t[i++] = 1.0000;
502:   y[i]   = 29.0000;
503:   t[i++] = 1.5000;
504:   y[i]   = 24.0000;
505:   t[i++] = 2.0000;
506:   y[i]   = 17.7000;
507:   t[i++] = 2.5000;
508:   y[i]   = 24.5600;
509:   t[i++] = 2.0000;
510:   y[i]   = 18.6700;
511:   t[i++] = 2.5000;
512:   y[i]   = 16.2400;
513:   t[i++] = 3.0000;
514:   y[i]   = 8.7400;
515:   t[i++] = 4.0000;
516:   y[i]   = 7.8700;
517:   t[i++] = 5.0000;
518:   y[i]   = 8.5100;
519:   t[i++] = 6.0000;
520:   y[i]   = 66.7000;
521:   t[i++] = .5000;
522:   y[i]   = 59.2000;
523:   t[i++] = .7500;
524:   y[i]   = 40.8000;
525:   t[i++] = 1.0000;
526:   y[i]   = 30.7000;
527:   t[i++] = 1.5000;
528:   y[i]   = 25.7000;
529:   t[i++] = 2.0000;
530:   y[i]   = 16.3000;
531:   t[i++] = 2.5000;
532:   y[i]   = 25.9900;
533:   t[i++] = 2.0000;
534:   y[i]   = 16.9500;
535:   t[i++] = 2.5000;
536:   y[i]   = 13.3500;
537:   t[i++] = 3.0000;
538:   y[i]   = 8.6200;
539:   t[i++] = 4.0000;
540:   y[i]   = 7.2000;
541:   t[i++] = 5.0000;
542:   y[i]   = 6.6400;
543:   t[i++] = 6.0000;
544:   y[i]   = 13.6900;
545:   t[i++] = 3.0000;
546:   y[i]   = 81.0000;
547:   t[i++] = .5000;
548:   y[i]   = 64.5000;
549:   t[i++] = .7500;
550:   y[i]   = 35.5000;
551:   t[i++] = 1.5000;
552:   y[i]   = 13.3100;
553:   t[i++] = 3.0000;
554:   y[i]   = 4.8700;
555:   t[i++] = 6.0000;
556:   y[i]   = 12.9400;
557:   t[i++] = 3.0000;
558:   y[i]   = 5.0600;
559:   t[i++] = 6.0000;
560:   y[i]   = 15.1900;
561:   t[i++] = 3.0000;
562:   y[i]   = 14.6200;
563:   t[i++] = 3.0000;
564:   y[i]   = 15.6400;
565:   t[i++] = 3.0000;
566:   y[i]   = 25.5000;
567:   t[i++] = 1.7500;
568:   y[i]   = 25.9500;
569:   t[i++] = 1.7500;
570:   y[i]   = 81.7000;
571:   t[i++] = .5000;
572:   y[i]   = 61.6000;
573:   t[i++] = .7500;
574:   y[i]   = 29.8000;
575:   t[i++] = 1.7500;
576:   y[i]   = 29.8100;
577:   t[i++] = 1.7500;
578:   y[i]   = 17.1700;
579:   t[i++] = 2.7500;
580:   y[i]   = 10.3900;
581:   t[i++] = 3.7500;
582:   y[i]   = 28.4000;
583:   t[i++] = 1.7500;
584:   y[i]   = 28.6900;
585:   t[i++] = 1.7500;
586:   y[i]   = 81.3000;
587:   t[i++] = .5000;
588:   y[i]   = 60.9000;
589:   t[i++] = .7500;
590:   y[i]   = 16.6500;
591:   t[i++] = 2.7500;
592:   y[i]   = 10.0500;
593:   t[i++] = 3.7500;
594:   y[i]   = 28.9000;
595:   t[i++] = 1.7500;
596:   y[i]   = 28.9500;
597:   t[i++] = 1.7500;
598:   PetscFunctionReturn(PETSC_SUCCESS);
599: }
601: PetscErrorCode TaskWorker(AppCtx *user)
602: {
603:   PetscReal   x[NPARAMETERS], f = 0.0;
604:   PetscMPIInt tag = IDLE_TAG;
605:   PetscInt    index;
606:   MPI_Status  status;
608:   PetscFunctionBegin;
609:   /* Send check-in message to rank-0 */
611:   PetscCallMPI(MPI_Send(&f, 1, MPIU_REAL, 0, IDLE_TAG, PETSC_COMM_WORLD));
612:   while (tag != DIE_TAG) {
613:     PetscCallMPI(MPI_Recv(x, NPARAMETERS, MPIU_REAL, 0, MPI_ANY_TAG, PETSC_COMM_WORLD, &status));
614:     tag = status.MPI_TAG;
615:     if (tag == IDLE_TAG) {
616:       PetscCallMPI(MPI_Send(&f, 1, MPIU_REAL, 0, IDLE_TAG, PETSC_COMM_WORLD));
617:     } else if (tag != DIE_TAG) {
618:       index = (PetscInt)tag;
619:       PetscCall(RunSimulation(x, index, &f, user));
620:       PetscCallMPI(MPI_Send(&f, 1, MPIU_REAL, 0, tag, PETSC_COMM_WORLD));
621:     }
622:   }
623:   PetscFunctionReturn(PETSC_SUCCESS);
624: }
626: PetscErrorCode RunSimulation(PetscReal *x, PetscInt i, PetscReal *f, AppCtx *user)
627: {
628:   PetscReal *t = user->t;
629:   PetscReal *y = user->y;
631:   PetscFunctionBeginUser;
632: #if defined(PETSC_USE_REAL_SINGLE)
633:   *f = y[i] - exp(-x[0] * t[i]) / (x[1] + x[2] * t[i]); /* expf() for single-precision breaks this example on Freebsd, Valgrind errors on Linux */
634: #else
635:   *f = y[i] - PetscExpScalar(-x[0] * t[i]) / (x[1] + x[2] * t[i]);
636: #endif
637:   PetscFunctionReturn(PETSC_SUCCESS);
638: }
640: PetscErrorCode StopWorkers(AppCtx *user)
641: {
642:   PetscInt   checkedin;
643:   MPI_Status status;
644:   PetscReal  f, x[NPARAMETERS];
646:   PetscFunctionBeginUser;
647:   checkedin = 0;
648:   while (checkedin < user->size - 1) {
649:     PetscCallMPI(MPI_Recv(&f, 1, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PETSC_COMM_WORLD, &status));
650:     checkedin++;
651:     PetscCall(PetscArrayzero(x, NPARAMETERS));
652:     PetscCallMPI(MPI_Send(x, NPARAMETERS, MPIU_REAL, status.MPI_SOURCE, DIE_TAG, PETSC_COMM_WORLD));
653:   }
654:   PetscFunctionReturn(PETSC_SUCCESS);
655: }
657: /*TEST
659:    build:
660:       requires: !complex
662:    test:
663:       nsize: 3
664:       requires: !single
665:       args: -tao_monitor_short -tao_max_it 100 -tao_type pounders -tao_gatol 1.e-5
667: TEST*/