Actual source code: ex26.c
  1: static char help[] = "Solves Laplacian with multigrid. Tests block API for PCMG\n\
  2:   -mx <xg>, where <xg> = number of grid points in the x-direction\n\
  3:   -my <yg>, where <yg> = number of grid points in the y-direction\n\
  4:   -Nx <npx>, where <npx> = number of processors in the x-direction\n\
  5:   -Ny <npy>, where <npy> = number of processors in the y-direction\n\n";
  7: /*  Modified from ~src/ksp/tests/ex19.c. Used for testing ML 6.2 interface.
  9:     This problem is modeled by
 10:     the partial differential equation
 12:             -Laplacian u  = g,  0 < x,y < 1,
 14:     with boundary conditions
 16:              u = 0  for  x = 0, x = 1, y = 0, y = 1.
 18:     A finite difference approximation with the usual 5-point stencil
 19:     is used to discretize the boundary value problem to obtain a linear
 20:     system of equations.
 22:     Usage: ./ex26 -ksp_monitor_short -pc_type ml
 23:            -mg_coarse_ksp_max_it 10
 24:            -mg_levels_1_ksp_max_it 10 -mg_levels_2_ksp_max_it 10
 25:            -mg_fine_ksp_max_it 10
 26: */
 28: #include <petscksp.h>
 29: #include <petscdm.h>
 30: #include <petscdmda.h>
 32: /* User-defined application contexts */
 33: typedef struct {
 34:   PetscInt mx, my;         /* number grid points in x and y direction */
 35:   Vec      localX, localF; /* local vectors with ghost region */
 36:   DM       da;
 37:   Vec      x, b, r; /* global vectors */
 38:   Mat      J;       /* Jacobian on grid */
 39:   Mat      A, P, R;
 40:   KSP      ksp;
 41: } GridCtx;
 43: static PetscErrorCode FormJacobian_Grid(GridCtx *, Mat);
 45: int main(int argc, char **argv)
 46: {
 47:   PetscInt    i, its, Nx = PETSC_DECIDE, Ny = PETSC_DECIDE, nlocal, nrhs = 1;
 48:   PetscScalar one = 1.0;
 49:   Mat         A, B, X;
 50:   GridCtx     fine_ctx;
 51:   KSP         ksp;
 52:   PetscBool   Brand = PETSC_FALSE, flg;
 54:   PetscFunctionBeginUser;
 55:   PetscCall(PetscInitialize(&argc, &argv, NULL, help));
 56:   /* set up discretization matrix for fine grid */
 57:   fine_ctx.mx = 9;
 58:   fine_ctx.my = 9;
 59:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-mx", &fine_ctx.mx, NULL));
 60:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-my", &fine_ctx.my, NULL));
 61:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-nrhs", &nrhs, NULL));
 62:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-Nx", &Nx, NULL));
 63:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-Ny", &Ny, NULL));
 64:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-rand", &Brand, NULL));
 65:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Fine grid size %" PetscInt_FMT " by %" PetscInt_FMT "\n", fine_ctx.mx, fine_ctx.my));
 67:   /* Set up distributed array for fine grid */
 68:   PetscCall(DMDACreate2d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, DM_BOUNDARY_NONE, DMDA_STENCIL_STAR, fine_ctx.mx, fine_ctx.my, Nx, Ny, 1, 1, NULL, NULL, &fine_ctx.da));
 69:   PetscCall(DMSetFromOptions(fine_ctx.da));
 70:   PetscCall(DMSetUp(fine_ctx.da));
 71:   PetscCall(DMCreateGlobalVector(fine_ctx.da, &fine_ctx.x));
 72:   PetscCall(VecDuplicate(fine_ctx.x, &fine_ctx.b));
 73:   PetscCall(VecGetLocalSize(fine_ctx.x, &nlocal));
 74:   PetscCall(DMCreateLocalVector(fine_ctx.da, &fine_ctx.localX));
 75:   PetscCall(VecDuplicate(fine_ctx.localX, &fine_ctx.localF));
 76:   PetscCall(DMCreateMatrix(fine_ctx.da, &A));
 77:   PetscCall(FormJacobian_Grid(&fine_ctx, A));
 79:   /* create linear solver */
 80:   PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
 81:   PetscCall(KSPSetDM(ksp, fine_ctx.da));
 82:   PetscCall(KSPSetDMActive(ksp, PETSC_FALSE));
 84:   /* set values for rhs vector */
 85:   PetscCall(VecSet(fine_ctx.b, one));
 87:   /* set options, then solve system */
 88:   PetscCall(KSPSetFromOptions(ksp)); /* calls PCSetFromOptions_ML if 'pc_type=ml' */
 89:   PetscCall(KSPSetOperators(ksp, A, A));
 90:   PetscCall(KSPSolve(ksp, fine_ctx.b, fine_ctx.x));
 91:   PetscCall(VecViewFromOptions(fine_ctx.x, NULL, "-debug"));
 92:   PetscCall(KSPGetIterationNumber(ksp, &its));
 93:   PetscCall(KSPGetIterationNumber(ksp, &its));
 94:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Number of iterations = %" PetscInt_FMT "\n", its));
 96:   /* test multiple right-hand side */
 97:   PetscCall(MatCreateDense(PETSC_COMM_WORLD, nlocal, PETSC_DECIDE, fine_ctx.mx * fine_ctx.my, nrhs, NULL, &B));
 98:   PetscCall(MatSetOptionsPrefix(B, "rhs_"));
 99:   PetscCall(MatSetFromOptions(B));
100:   PetscCall(MatDuplicate(B, MAT_DO_NOT_COPY_VALUES, &X));
101:   if (Brand) {
102:     PetscCall(MatSetRandom(B, NULL));
103:   } else {
104:     PetscScalar *b;
106:     PetscCall(MatDenseGetArrayWrite(B, &b));
107:     for (i = 0; i < nlocal * nrhs; i++) b[i] = 1.0;
108:     PetscCall(MatDenseRestoreArrayWrite(B, &b));
109:     PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
110:     PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
111:   }
112:   PetscCall(KSPMatSolve(ksp, B, X));
113:   PetscCall(MatViewFromOptions(X, NULL, "-debug"));
115:   PetscCall(PetscObjectTypeCompare((PetscObject)ksp, KSPPREONLY, &flg));
116:   if ((flg || nrhs == 1) && !Brand) {
117:     PetscInt           n;
118:     const PetscScalar *xx, *XX;
120:     PetscCall(VecGetArrayRead(fine_ctx.x, &xx));
121:     PetscCall(MatDenseGetArrayRead(X, &XX));
122:     for (n = 0; n < nrhs; n++) {
123:       for (i = 0; i < nlocal; i++) {
124:         if (PetscAbsScalar(xx[i] - XX[nlocal * n + i]) > PETSC_SMALL) {
125:           PetscCall(PetscPrintf(PETSC_COMM_SELF, "[%d] Error local solve %" PetscInt_FMT ", entry %" PetscInt_FMT " -> %g + i %g != %g + i %g\n", PetscGlobalRank, n, i, (double)PetscRealPart(xx[i]), (double)PetscImaginaryPart(xx[i]), (double)PetscRealPart(XX[i]), (double)PetscImaginaryPart(XX[i])));
126:         }
127:       }
128:     }
129:     PetscCall(MatDenseRestoreArrayRead(X, &XX));
130:     PetscCall(VecRestoreArrayRead(fine_ctx.x, &xx));
131:   }
133:   /* free data structures */
134:   PetscCall(VecDestroy(&fine_ctx.x));
135:   PetscCall(VecDestroy(&fine_ctx.b));
136:   PetscCall(DMDestroy(&fine_ctx.da));
137:   PetscCall(VecDestroy(&fine_ctx.localX));
138:   PetscCall(VecDestroy(&fine_ctx.localF));
139:   PetscCall(MatDestroy(&A));
140:   PetscCall(MatDestroy(&B));
141:   PetscCall(MatDestroy(&X));
142:   PetscCall(KSPDestroy(&ksp));
144:   PetscCall(PetscFinalize());
145:   return 0;
146: }
148: PetscErrorCode FormJacobian_Grid(GridCtx *grid, Mat jac)
149: {
150:   PetscInt               i, j, row, mx, my, xs, ys, xm, ym, Xs, Ys, Xm, Ym, col[5];
151:   PetscInt               grow;
152:   const PetscInt        *ltog;
153:   PetscScalar            two = 2.0, one = 1.0, v[5], hx, hy, hxdhy, hydhx, value;
154:   ISLocalToGlobalMapping ltogm;
156:   PetscFunctionBeginUser;
157:   mx    = grid->mx;
158:   my    = grid->my;
159:   hx    = one / (PetscReal)(mx - 1);
160:   hy    = one / (PetscReal)(my - 1);
161:   hxdhy = hx / hy;
162:   hydhx = hy / hx;
164:   /* Get ghost points */
165:   PetscCall(DMDAGetCorners(grid->da, &xs, &ys, 0, &xm, &ym, 0));
166:   PetscCall(DMDAGetGhostCorners(grid->da, &Xs, &Ys, 0, &Xm, &Ym, 0));
167:   PetscCall(DMGetLocalToGlobalMapping(grid->da, <ogm));
168:   PetscCall(ISLocalToGlobalMappingGetIndices(ltogm, <og));
170:   /* Evaluate Jacobian of function */
171:   for (j = ys; j < ys + ym; j++) {
172:     row = (j - Ys) * Xm + xs - Xs - 1;
173:     for (i = xs; i < xs + xm; i++) {
174:       row++;
175:       grow = ltog[row];
176:       if (i > 0 && i < mx - 1 && j > 0 && j < my - 1) {
177:         v[0]   = -hxdhy;
178:         col[0] = ltog[row - Xm];
179:         v[1]   = -hydhx;
180:         col[1] = ltog[row - 1];
181:         v[2]   = two * (hydhx + hxdhy);
182:         col[2] = grow;
183:         v[3]   = -hydhx;
184:         col[3] = ltog[row + 1];
185:         v[4]   = -hxdhy;
186:         col[4] = ltog[row + Xm];
187:         PetscCall(MatSetValues(jac, 1, &grow, 5, col, v, INSERT_VALUES));
188:       } else if ((i > 0 && i < mx - 1) || (j > 0 && j < my - 1)) {
189:         value = .5 * two * (hydhx + hxdhy);
190:         PetscCall(MatSetValues(jac, 1, &grow, 1, &grow, &value, INSERT_VALUES));
191:       } else {
192:         value = .25 * two * (hydhx + hxdhy);
193:         PetscCall(MatSetValues(jac, 1, &grow, 1, &grow, &value, INSERT_VALUES));
194:       }
195:     }
196:   }
197:   PetscCall(ISLocalToGlobalMappingRestoreIndices(ltogm, <og));
198:   PetscCall(MatAssemblyBegin(jac, MAT_FINAL_ASSEMBLY));
199:   PetscCall(MatAssemblyEnd(jac, MAT_FINAL_ASSEMBLY));
200:   PetscFunctionReturn(PETSC_SUCCESS);
201: }
203: /*TEST
205:     test:
206:       args: -ksp_monitor_short
208:     test:
209:       suffix: 2
210:       args: -ksp_monitor_short
211:       nsize: 3
213:     test:
214:       suffix: ml_1
215:       args: -ksp_monitor_short -pc_type ml -mat_no_inode
216:       nsize: 3
217:       requires: ml
219:     test:
220:       suffix: ml_2
221:       args: -ksp_monitor_short -pc_type ml -mat_no_inode -ksp_max_it 3
222:       nsize: 3
223:       requires: ml
225:     test:
226:       suffix: ml_3
227:       args: -ksp_monitor_short -pc_type ml -mat_no_inode -pc_mg_type ADDITIVE -ksp_max_it 7
228:       nsize: 1
229:       requires: ml
231:     test:
232:       suffix: cycles
233:       nsize: {{1 2}}
234:       args: -ksp_view_final_residual -pc_type mg -mx 5 -my 5 -pc_mg_levels 3 -pc_mg_galerkin -ksp_monitor -mg_levels_ksp_type richardson -mg_levels_pc_type jacobi -pc_mg_type {{additive multiplicative full kaskade}separate output} -nrhs 1
236:     test:
237:       suffix: matcycles
238:       nsize: {{1 2}}
239:       args: -ksp_view_final_residual -ksp_type preonly -pc_type mg -mx 5 -my 5 -pc_mg_levels 3 -pc_mg_galerkin -ksp_monitor -mg_levels_ksp_type richardson -mg_levels_pc_type jacobi -pc_mg_type {{additive multiplicative full kaskade}separate output} -nrhs 7 -ksp_matsolve_batch_size {{4 7}separate output}
241:     test:
242:       requires: ml
243:       suffix: matcycles_ml
244:       nsize: {{1 2}}
245:       args: -ksp_view_final_residual -ksp_type preonly -pc_type ml -mx 5 -my 5 -ksp_monitor -mg_levels_ksp_type richardson -mg_levels_pc_type jacobi -pc_mg_type {{additive multiplicative full kaskade}separate output} -nrhs 7 -ksp_matsolve_batch_size {{4 7}separate output}
247:     testset:
248:       requires: hpddm
249:       args: -ksp_view_final_residual -ksp_type hpddm -pc_type mg -pc_mg_levels 3 -pc_mg_galerkin -mx 5 -my 5 -ksp_monitor -mg_levels_ksp_type richardson -mg_levels_pc_type jacobi -nrhs 7
250:       test:
251:         suffix: matcycles_hpddm_mg
252:         nsize: {{1 2}}
253:         args: -pc_mg_type {{additive multiplicative full kaskade}separate output} -ksp_matsolve_batch_size {{4 7}separate output}
254:       test:
255:         requires: !__float128 !__fp16
256:         suffix: hpddm_mg_mixed_precision
257:         nsize: 2
258:         output_file: output/ex26_matcycles_hpddm_mg_pc_mg_type-multiplicative_ksp_matsolve_batch_size-4.out
259:         args: -ksp_matsolve_batch_size 4 -ksp_hpddm_precision {{single double}shared output}
260:       test:
261:         requires: __float128
262:         suffix: hpddm_mg_mixed_precision___float128
263:         nsize: 2
264:         output_file: output/ex26_matcycles_hpddm_mg_pc_mg_type-multiplicative_ksp_matsolve_batch_size-4.out
265:         args: -ksp_matsolve_batch_size 4 -ksp_hpddm_precision {{double quadruple}shared output}
266:       test:
267:         requires: double defined(PETSC_HAVE_F2CBLASLAPACK___FLOAT128_BINDINGS)
268:         suffix: hpddm_mg_mixed_precision_double
269:         nsize: 2
270:         output_file: output/ex26_matcycles_hpddm_mg_pc_mg_type-multiplicative_ksp_matsolve_batch_size-4.out
271:         args: -ksp_matsolve_batch_size 4 -ksp_hpddm_precision quadruple
272:       test:
273:         requires: single defined(PETSC_HAVE_F2CBLASLAPACK___FP16_BINDINGS)
274:         suffix: hpddm_mg_mixed_precision_single
275:         nsize: 2
276:         output_file: output/ex26_matcycles_hpddm_mg_pc_mg_type-multiplicative_ksp_matsolve_batch_size-4.out
277:         args: -ksp_matsolve_batch_size 4 -ksp_hpddm_precision half -ksp_rtol 1e-3
279:     test:
280:       requires: hpddm
281:       nsize: {{1 2}}
282:       suffix: matcycles_hpddm_ilu
283:       args: -ksp_view_final_residual -ksp_type hpddm -pc_type redundant -redundant_pc_type ilu -mx 5 -my 5 -ksp_monitor -nrhs 7 -ksp_matsolve_batch_size {{4 7}separate output}
285: TEST*/