Actual source code: gmreig.c


  2: #include <../src/ksp/ksp/impls/gmres/gmresimpl.h>
  3: #include <petscblaslapack.h>

  5: PetscErrorCode KSPComputeExtremeSingularValues_GMRES(KSP ksp,PetscReal *emax,PetscReal *emin)
  6: {
  7:   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
  8:   PetscInt       n = gmres->it + 1,i,N = gmres->max_k + 2;
  9:   PetscBLASInt   bn, bN,lwork, idummy,lierr;
 10:   PetscScalar    *R        = gmres->Rsvd,*work = R + N*N,sdummy = 0;
 11:   PetscReal      *realpart = gmres->Dsvd;

 13:   PetscBLASIntCast(n,&bn);
 14:   PetscBLASIntCast(N,&bN);
 15:   PetscBLASIntCast(5*N,&lwork);
 16:   PetscBLASIntCast(N,&idummy);
 17:   if (n <= 0) {
 18:     *emax = *emin = 1.0;
 19:     return 0;
 20:   }
 21:   /* copy R matrix to work space */
 22:   PetscArraycpy(R,gmres->hh_origin,(gmres->max_k+2)*(gmres->max_k+1));

 24:   /* zero below diagonal garbage */
 25:   for (i=0; i<n; i++) R[i*N+i+1] = 0.0;

 27:   /* compute Singular Values */
 28:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 29: #if !defined(PETSC_USE_COMPLEX)
 30:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&bn,&bn,R,&bN,realpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr));
 31: #else
 32:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&bn,&bn,R,&bN,realpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,realpart+N,&lierr));
 33: #endif
 35:   PetscFPTrapPop();

 37:   *emin = realpart[n-1];
 38:   *emax = realpart[0];
 39:   return 0;
 40: }

 42: PetscErrorCode KSPComputeEigenvalues_GMRES(KSP ksp,PetscInt nmax,PetscReal *r,PetscReal *c,PetscInt *neig)
 43: {
 44: #if !defined(PETSC_USE_COMPLEX)
 45:   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
 46:   PetscInt       n = gmres->it + 1,N = gmres->max_k + 1,i,*perm;
 47:   PetscBLASInt   bn, bN, lwork, idummy, l-1;
 48:   PetscScalar    *R        = gmres->Rsvd,*work = R + N*N;
 49:   PetscScalar    *realpart = gmres->Dsvd,*imagpart = realpart + N,sdummy = 0;

 51:   PetscBLASIntCast(n,&bn);
 52:   PetscBLASIntCast(N,&bN);
 53:   PetscBLASIntCast(5*N,&lwork);
 54:   PetscBLASIntCast(N,&idummy);
 56:   *neig = n;

 58:   if (!n) return 0;

 60:   /* copy R matrix to work space */
 61:   PetscArraycpy(R,gmres->hes_origin,N*N);

 63:   /* compute eigenvalues */
 64:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 65:   PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,R,&bN,realpart,imagpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr));
 67:   PetscFPTrapPop();
 68:   PetscMalloc1(n,&perm);
 69:   for (i=0; i<n; i++) perm[i] = i;
 70:   PetscSortRealWithPermutation(n,realpart,perm);
 71:   for (i=0; i<n; i++) {
 72:     r[i] = realpart[perm[i]];
 73:     c[i] = imagpart[perm[i]];
 74:   }
 75:   PetscFree(perm);
 76: #else
 77:   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
 78:   PetscInt       n  = gmres->it + 1,N = gmres->max_k + 1,i,*perm;
 79:   PetscScalar    *R = gmres->Rsvd,*work = R + N*N,*eigs = work + 5*N,sdummy;
 80:   PetscBLASInt   bn,bN,lwork,idummy,l-1;

 82:   PetscBLASIntCast(n,&bn);
 83:   PetscBLASIntCast(N,&bN);
 84:   PetscBLASIntCast(5*N,&lwork);
 85:   PetscBLASIntCast(N,&idummy);
 87:   *neig = n;

 89:   if (!n) return 0;

 91:   /* copy R matrix to work space */
 92:   PetscArraycpy(R,gmres->hes_origin,N*N);

 94:   /* compute eigenvalues */
 95:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 96:   PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,R,&bN,eigs,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,gmres->Dsvd,&lierr));
 98:   PetscFPTrapPop();
 99:   PetscMalloc1(n,&perm);
100:   for (i=0; i<n; i++) perm[i] = i;
101:   for (i=0; i<n; i++) r[i] = PetscRealPart(eigs[i]);
102:   PetscSortRealWithPermutation(n,r,perm);
103:   for (i=0; i<n; i++) {
104:     r[i] = PetscRealPart(eigs[perm[i]]);
105:     c[i] = PetscImaginaryPart(eigs[perm[i]]);
106:   }
107:   PetscFree(perm);
108: #endif
109:   return 0;
110: }

112: #if !defined(PETSC_USE_COMPLEX)
113: PetscErrorCode KSPComputeRitz_GMRES(KSP ksp,PetscBool ritz,PetscBool small,PetscInt *nrit,Vec S[],PetscReal *tetar,PetscReal *tetai)
114: {
115:   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
116:   PetscInt       n = gmres->it + 1,N = gmres->max_k + 1,NbrRitz,nb=0;
117:   PetscInt       i,j,*perm;
118:   PetscReal      *H,*Q,*Ht;              /* H Hessenberg Matrix and Q matrix of eigenvectors of H*/
119:   PetscReal      *wr,*wi,*modul;       /* Real and imaginary part and modul of the Ritz values*/
120:   PetscReal      *SR,*work;
121:   PetscBLASInt   bn,bN,lwork,idummy;
122:   PetscScalar    *t,sdummy = 0;

124:   /* n: size of the Hessenberg matrix */
125:   if (gmres->fullcycle) n = N-1;
126:   /* NbrRitz: number of (harmonic) Ritz pairs to extract */
127:   NbrRitz = PetscMin(*nrit,n);

129:   /* Definition of PetscBLASInt for lapack routines*/
130:   PetscBLASIntCast(n,&bn);
131:   PetscBLASIntCast(N,&bN);
132:   PetscBLASIntCast(N,&idummy);
133:   PetscBLASIntCast(5*N,&lwork);
134:   /* Memory allocation */
135:   PetscMalloc1(bN*bN,&H);
136:   PetscMalloc1(bn*bn,&Q);
137:   PetscMalloc1(lwork,&work);
138:   PetscMalloc1(n,&wr);
139:   PetscMalloc1(n,&wi);

141:   /* copy H matrix to work space */
142:   if (gmres->fullcycle) {
143:     PetscArraycpy(H,gmres->hes_ritz,bN*bN);
144:   } else {
145:     PetscArraycpy(H,gmres->hes_origin,bN*bN);
146:   }

148:   /* Modify H to compute Harmonic Ritz pairs H = H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
149:   if (!ritz) {
150:     /* Transpose the Hessenberg matrix => Ht */
151:     PetscMalloc1(bn*bn,&Ht);
152:     for (i=0; i<bn; i++) {
153:       for (j=0; j<bn; j++) {
154:         Ht[i*bn+j] = H[j*bN+i];
155:       }
156:     }
157:     /* Solve the system H^T*t = h^2_{m+1,m}e_m */
158:     PetscCalloc1(bn,&t);
159:     /* t = h^2_{m+1,m}e_m */
160:     if (gmres->fullcycle) {
161:       t[bn-1] = PetscSqr(gmres->hes_ritz[(bn-1)*bN+bn]);
162:     } else {
163:       t[bn-1] = PetscSqr(gmres->hes_origin[(bn-1)*bN+bn]);
164:     }
165:     /* Call the LAPACK routine dgesv to compute t = H^{-T}*t */
166:     {
167:       PetscBLASInt info;
168:       PetscBLASInt nrhs = 1;
169:       PetscBLASInt *ipiv;
170:       PetscMalloc1(bn,&ipiv);
171:       PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&bn,&nrhs,Ht,&bn,ipiv,t,&bn,&info));
173:       PetscFree(ipiv);
174:       PetscFree(Ht);
175:     }
176:     /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
177:     for (i=0; i<bn; i++) H[(bn-1)*bn+i] += t[i];
178:     PetscFree(t);
179:   }

181:   /* Compute (harmonic) Ritz pairs */
182:   {
183:     PetscBLASInt info;
184:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
185:     PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","V",&bn,H,&bN,wr,wi,&sdummy,&idummy,Q,&bn,work,&lwork,&info));
187:   }
188:   /* sort the (harmonic) Ritz values */
189:   PetscMalloc1(n,&modul);
190:   PetscMalloc1(n,&perm);
191:   for (i=0; i<n; i++) modul[i] = PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
192:   for (i=0; i<n; i++) perm[i] = i;
193:   PetscSortRealWithPermutation(n,modul,perm);
194:   /* count the number of extracted Ritz or Harmonic Ritz pairs (with complex conjugates) */
195:   if (small) {
196:     while (nb < NbrRitz) {
197:       if (!wi[perm[nb]]) nb += 1;
198:       else nb += 2;
199:     }
200:     PetscMalloc1(nb*n,&SR);
201:     for (i=0; i<nb; i++) {
202:       tetar[i] = wr[perm[i]];
203:       tetai[i] = wi[perm[i]];
204:       PetscArraycpy(&SR[i*n],&(Q[perm[i]*bn]),n);
205:     }
206:   } else {
207:     while (nb < NbrRitz) {
208:       if (wi[perm[n-nb-1]] == 0) nb += 1;
209:       else nb += 2;
210:     }
211:     PetscMalloc1(nb*n,&SR);
212:     for (i=0; i<nb; i++) {
213:       tetar[i] = wr[perm[n-nb+i]];
214:       tetai[i] = wi[perm[n-nb+i]];
215:       PetscArraycpy(&SR[i*n], &(Q[perm[n-nb+i]*bn]), n);
216:     }
217:   }
218:   PetscFree(modul);
219:   PetscFree(perm);

221:   /* Form the Ritz or Harmonic Ritz vectors S=VV*Sr,
222:     where the columns of VV correspond to the basis of the Krylov subspace */
223:   if (gmres->fullcycle) {
224:     for (j=0; j<nb; j++) {
225:       VecZeroEntries(S[j]);
226:       VecMAXPY(S[j],n,&SR[j*n],gmres->vecb);
227:     }
228:   } else {
229:     for (j=0; j<nb; j++) {
230:       VecZeroEntries(S[j]);
231:       VecMAXPY(S[j],n,&SR[j*n],&VEC_VV(0));
232:     }
233:   }
234:   *nrit = nb;
235:   PetscFree(H);
236:   PetscFree(Q);
237:   PetscFree(SR);
238:   PetscFree(wr);
239:   PetscFree(wi);
240:   return 0;
241: }
242: #endif