Actual source code: vpbjacobi.c


  2: /*
  3:    Include files needed for the variable size block PBJacobi preconditioner:
  4:      pcimpl.h - private include file intended for use by all preconditioners
  5: */

  7: #include <petsc/private/pcimpl.h>

  9: /*
 10:    Private context (data structure) for the VPBJacobi preconditioner.
 11: */
 12: typedef struct {
 13:   MatScalar *diag;
 14: } PC_VPBJacobi;

 16: static PetscErrorCode PCApply_VPBJacobi(PC pc,Vec x,Vec y)
 17: {
 18:   PC_VPBJacobi      *jac = (PC_VPBJacobi*)pc->data;
 19:   PetscInt          i,ncnt = 0;
 20:   const MatScalar   *diag = jac->diag;
 21:   PetscInt          ib,jb,bs;
 22:   const PetscScalar *xx;
 23:   PetscScalar       *yy,x0,x1,x2,x3,x4,x5,x6;
 24:   PetscInt          nblocks;
 25:   const PetscInt    *bsizes;

 27:   MatGetVariableBlockSizes(pc->pmat,&nblocks,&bsizes);
 28:   VecGetArrayRead(x,&xx);
 29:   VecGetArray(y,&yy);
 30:   for (i=0; i<nblocks; i++) {
 31:     bs = bsizes[i];
 32:     switch (bs) {
 33:     case 1:
 34:       yy[ncnt] = *diag*xx[ncnt];
 35:       break;
 36:     case 2:
 37:       x0         = xx[ncnt]; x1 = xx[ncnt+1];
 38:       yy[ncnt]   = diag[0]*x0 + diag[2]*x1;
 39:       yy[ncnt+1] = diag[1]*x0 + diag[3]*x1;
 40:       break;
 41:     case 3:
 42:       x0 = xx[ncnt]; x1 = xx[ncnt+1]; x2 = xx[ncnt+2];
 43:       yy[ncnt]   = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
 44:       yy[ncnt+1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
 45:       yy[ncnt+2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
 46:       break;
 47:     case 4:
 48:       x0 = xx[ncnt]; x1 = xx[ncnt+1]; x2 = xx[ncnt+2]; x3 = xx[ncnt+3];
 49:       yy[ncnt]   = diag[0]*x0 + diag[4]*x1 + diag[8]*x2  + diag[12]*x3;
 50:       yy[ncnt+1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2  + diag[13]*x3;
 51:       yy[ncnt+2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2 + diag[14]*x3;
 52:       yy[ncnt+3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2 + diag[15]*x3;
 53:       break;
 54:     case 5:
 55:       x0 = xx[ncnt]; x1 = xx[ncnt+1]; x2 = xx[ncnt+2]; x3 = xx[ncnt+3]; x4 = xx[ncnt+4];
 56:       yy[ncnt]   = diag[0]*x0 + diag[5]*x1 + diag[10]*x2  + diag[15]*x3 + diag[20]*x4;
 57:       yy[ncnt+1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2  + diag[16]*x3 + diag[21]*x4;
 58:       yy[ncnt+2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
 59:       yy[ncnt+3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
 60:       yy[ncnt+4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
 61:       break;
 62:     case 6:
 63:       x0 = xx[ncnt]; x1 = xx[ncnt+1]; x2 = xx[ncnt+2]; x3 = xx[ncnt+3]; x4 = xx[ncnt+4]; x5 = xx[ncnt+5];
 64:       yy[ncnt]   = diag[0]*x0 + diag[6]*x1  + diag[12]*x2  + diag[18]*x3 + diag[24]*x4 + diag[30]*x5;
 65:       yy[ncnt+1] = diag[1]*x0 + diag[7]*x1  + diag[13]*x2  + diag[19]*x3 + diag[25]*x4 + diag[31]*x5;
 66:       yy[ncnt+2] = diag[2]*x0 + diag[8]*x1  + diag[14]*x2  + diag[20]*x3 + diag[26]*x4 + diag[32]*x5;
 67:       yy[ncnt+3] = diag[3]*x0 + diag[9]*x1  + diag[15]*x2  + diag[21]*x3 + diag[27]*x4 + diag[33]*x5;
 68:       yy[ncnt+4] = diag[4]*x0 + diag[10]*x1 + diag[16]*x2  + diag[22]*x3 + diag[28]*x4 + diag[34]*x5;
 69:       yy[ncnt+5] = diag[5]*x0 + diag[11]*x1 + diag[17]*x2  + diag[23]*x3 + diag[29]*x4 + diag[35]*x5;
 70:       break;
 71:     case 7:
 72:       x0 = xx[ncnt]; x1 = xx[ncnt+1]; x2 = xx[ncnt+2]; x3 = xx[ncnt+3]; x4 = xx[ncnt+4]; x5 = xx[ncnt+5]; x6 = xx[ncnt+6];
 73:       yy[ncnt]   = diag[0]*x0 + diag[7]*x1  + diag[14]*x2  + diag[21]*x3 + diag[28]*x4 + diag[35]*x5 + diag[42]*x6;
 74:       yy[ncnt+1] = diag[1]*x0 + diag[8]*x1  + diag[15]*x2  + diag[22]*x3 + diag[29]*x4 + diag[36]*x5 + diag[43]*x6;
 75:       yy[ncnt+2] = diag[2]*x0 + diag[9]*x1  + diag[16]*x2  + diag[23]*x3 + diag[30]*x4 + diag[37]*x5 + diag[44]*x6;
 76:       yy[ncnt+3] = diag[3]*x0 + diag[10]*x1 + diag[17]*x2  + diag[24]*x3 + diag[31]*x4 + diag[38]*x5 + diag[45]*x6;
 77:       yy[ncnt+4] = diag[4]*x0 + diag[11]*x1 + diag[18]*x2  + diag[25]*x3 + diag[32]*x4 + diag[39]*x5 + diag[46]*x6;
 78:       yy[ncnt+5] = diag[5]*x0 + diag[12]*x1 + diag[19]*x2  + diag[26]*x3 + diag[33]*x4 + diag[40]*x5 + diag[47]*x6;
 79:       yy[ncnt+6] = diag[6]*x0 + diag[13]*x1 + diag[20]*x2  + diag[27]*x3 + diag[34]*x4 + diag[41]*x5 + diag[48]*x6;
 80:       break;
 81:     default:
 82:       for (ib=0; ib<bs; ib++) {
 83:         PetscScalar rowsum = 0;
 84:         for (jb=0; jb<bs; jb++) {
 85:           rowsum += diag[ib+jb*bs] * xx[ncnt+jb];
 86:         }
 87:         yy[ncnt+ib] = rowsum;
 88:       }
 89:     }
 90:     ncnt += bsizes[i];
 91:     diag += bsizes[i]*bsizes[i];
 92:   }
 93:   VecRestoreArrayRead(x,&xx);
 94:   VecRestoreArray(y,&yy);
 95:   return 0;
 96: }

 98: /* -------------------------------------------------------------------------- */
 99: static PetscErrorCode PCSetUp_VPBJacobi(PC pc)
100: {
101:   PC_VPBJacobi    *jac = (PC_VPBJacobi*)pc->data;
102:   Mat            A = pc->pmat;
103:   MatFactorError err;
104:   PetscInt       i,nsize = 0,nlocal;
105:   PetscInt       nblocks;
106:   const PetscInt *bsizes;

108:   MatGetVariableBlockSizes(pc->pmat,&nblocks,&bsizes);
109:   MatGetLocalSize(pc->pmat,&nlocal,NULL);
111:   if (!jac->diag) {
112:     for (i=0; i<nblocks; i++) nsize += bsizes[i]*bsizes[i];
113:     PetscMalloc1(nsize,&jac->diag);
114:   }
115:   MatInvertVariableBlockDiagonal(A,nblocks,bsizes,jac->diag);
116:   MatFactorGetError(A,&err);
117:   if (err) pc->failedreason = (PCFailedReason)err;
118:   pc->ops->apply = PCApply_VPBJacobi;
119:   return 0;
120: }
121: /* -------------------------------------------------------------------------- */
122: static PetscErrorCode PCDestroy_VPBJacobi(PC pc)
123: {
124:   PC_VPBJacobi    *jac = (PC_VPBJacobi*)pc->data;

126:   /*
127:       Free the private data structure that was hanging off the PC
128:   */
129:   PetscFree(jac->diag);
130:   PetscFree(pc->data);
131:   return 0;
132: }

134: /* -------------------------------------------------------------------------- */
135: /*MC
136:      PCVPBJACOBI - Variable size point block Jacobi preconditioner

138:    Notes:
139:      See PCJACOBI for point Jacobi preconditioning, PCPBJACOBI for fixed point block size, and PCBJACOBI for large size blocks

141:      This works for AIJ matrices

143:      Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot
144:      is detected a PETSc error is generated.

146:      One must call MatSetVariableBlockSizes() to use this preconditioner

148:    Developer Notes:
149:      This should support the PCSetErrorIfFailure() flag set to PETSC_TRUE to allow
150:      the factorization to continue even after a zero pivot is found resulting in a Nan and hence
151:      terminating KSP with a KSP_DIVERGED_NANORIF allowing
152:      a nonlinear solver/ODE integrator to recover without stopping the program as currently happens.

154:      Perhaps should provide an option that allows generation of a valid preconditioner
155:      even if a block is singular as the PCJACOBI does.

157:    Level: beginner

159: .seealso:  MatSetVariableBlockSizes(), PCCreate(), PCSetType(), PCType (for list of available types), PC, PCJACOBI, PCPBJACOBI, PCBJACOBI

161: M*/

163: PETSC_EXTERN PetscErrorCode PCCreate_VPBJacobi(PC pc)
164: {
165:   PC_VPBJacobi   *jac;

167:   /*
168:      Creates the private data structure for this preconditioner and
169:      attach it to the PC object.
170:   */
171:   PetscNewLog(pc,&jac);
172:   pc->data = (void*)jac;

174:   /*
175:      Initialize the pointers to vectors to ZERO; these will be used to store
176:      diagonal entries of the matrix for fast preconditioner application.
177:   */
178:   jac->diag = NULL;

180:   /*
181:       Set the pointers for the functions that are provided above.
182:       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
183:       are called, they will automatically call these functions.  Note we
184:       choose not to provide a couple of these functions since they are
185:       not needed.
186:   */
187:   pc->ops->apply               = PCApply_VPBJacobi;
188:   pc->ops->applytranspose      = NULL;
189:   pc->ops->setup               = PCSetUp_VPBJacobi;
190:   pc->ops->destroy             = PCDestroy_VPBJacobi;
191:   pc->ops->setfromoptions      = NULL;
192:   pc->ops->applyrichardson     = NULL;
193:   pc->ops->applysymmetricleft  = NULL;
194:   pc->ops->applysymmetricright = NULL;
195:   return 0;
196: }