Actual source code: dvd_blas.c

  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2013, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.

  8:    SLEPc is free software: you can redistribute it and/or modify it under  the
  9:    terms of version 3 of the GNU Lesser General Public License as published by
 10:    the Free Software Foundation.

 12:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
 13:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
 14:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
 15:    more details.

 17:    You  should have received a copy of the GNU Lesser General  Public  License
 18:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 19:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 20: */

 22: #include <slepc-private/vecimplslepc.h>
 23:  #include davidson.h

 25: PetscLogEvent SLEPC_SlepcDenseMatProd = 0;
 26: PetscLogEvent SLEPC_SlepcDenseNorm = 0;
 27: PetscLogEvent SLEPC_SlepcDenseCopy = 0;
 28: PetscLogEvent SLEPC_VecsMult = 0;

 30: void dvd_sum_local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *t);
 31: PetscErrorCode VecsMultS_copy_func(PetscScalar *out,PetscInt size_out,void *ptr);

 35: /*
 36:   Compute C <- a*A*B + b*C, where
 37:     ldC, the leading dimension of C,
 38:     ldA, the leading dimension of A,
 39:     rA, cA, rows and columns of A,
 40:     At, if true use the transpose of A instead,
 41:     ldB, the leading dimension of B,
 42:     rB, cB, rows and columns of B,
 43:     Bt, if true use the transpose of B instead
 44: */
 45: PetscErrorCode SlepcDenseMatProd(PetscScalar *C,PetscInt _ldC,PetscScalar b,PetscScalar a,const PetscScalar *A,PetscInt _ldA,PetscInt rA,PetscInt cA,PetscBool At,const PetscScalar *B,PetscInt _ldB,PetscInt rB,PetscInt cB,PetscBool Bt)
 46: {
 47:   PetscErrorCode  ierr;
 48:   PetscInt        tmp;
 49:   PetscBLASInt    m, n, k, ldA = _ldA, ldB = _ldB, ldC = _ldC;
 50:   const char      *N = "N", *T = "C", *qA = N, *qB = N;

 53:   if ((rA == 0) || (cB == 0)) return(0);

 58:   PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);

 60:   /* Transpose if needed */
 61:   if (At) tmp = rA, rA = cA, cA = tmp, qA = T;
 62:   if (Bt) tmp = rB, rB = cB, cB = tmp, qB = T;

 64:   /* Check size */
 65:   if (cA != rB) SETERRQ(PETSC_COMM_SELF,1, "Matrix dimensions do not match");

 67:   /* Do stub */
 68:   if ((rA == 1) && (cA == 1) && (cB == 1)) {
 69:     if (!At && !Bt) *C = *A * *B;
 70:     else if (At && !Bt) *C = PetscConj(*A) * *B;
 71:     else if (!At && Bt) *C = *A * PetscConj(*B);
 72:     else *C = PetscConj(*A) * PetscConj(*B);
 73:     m = n = k = 1;
 74:   } else {
 75:     m = rA; n = cB; k = cA;
 76:     PetscStackCallBLAS("BLASgemm",BLASgemm_(qA,qB,&m,&n,&k,&a,(PetscScalar*)A,&ldA,(PetscScalar*)B,&ldB,&b,C,&ldC));
 77:   }

 79:   PetscLogFlops(m*n*2*k);
 80:   PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);
 81:   return(0);
 82: }

 86: /*
 87:   Compute C <- A*B, where
 88:     sC, structure of C,
 89:     ldC, the leading dimension of C,
 90:     sA, structure of A,
 91:     ldA, the leading dimension of A,
 92:     rA, cA, rows and columns of A,
 93:     At, if true use the transpose of A instead,
 94:     sB, structure of B,
 95:     ldB, the leading dimension of B,
 96:     rB, cB, rows and columns of B,
 97:     Bt, if true use the transpose of B instead
 98: */
 99: PetscErrorCode SlepcDenseMatProdTriang(PetscScalar *C,MatType_t sC,PetscInt ldC,const PetscScalar *A,MatType_t sA,PetscInt ldA,PetscInt rA,PetscInt cA,PetscBool At,const PetscScalar *B,MatType_t sB,PetscInt ldB,PetscInt rB,PetscInt cB,PetscBool Bt)
100: {
101:   PetscErrorCode  ierr;
102:   PetscInt        tmp;
103:   PetscScalar     one=1.0, zero=0.0;
104:   PetscBLASInt    rC, cC, _ldA = ldA, _ldB = ldB, _ldC = ldC;

107:   if ((rA == 0) || (cB == 0)) return(0);

112:   /* Transpose if needed */
113:   if (At) tmp = rA, rA = cA, cA = tmp;
114:   if (Bt) tmp = rB, rB = cB, cB = tmp;

116:   /* Check size */
117:   if (cA != rB) SETERRQ(PETSC_COMM_SELF,1, "Matrix dimensions do not match");
118:   if (sB != 0) SETERRQ(PETSC_COMM_SELF,1, "Matrix type not supported for B");

120:   /* Optimized version: trivial case */
121:   if ((rA == 1) && (cA == 1) && (cB == 1)) {
122:     if (!At && !Bt)     *C = *A * *B;
123:     else if (At && !Bt) *C = PetscConj(*A) * *B;
124:     else if (!At && Bt) *C = *A * PetscConj(*B);
125:     else if (At && Bt)  *C = PetscConj(*A) * PetscConj(*B);
126:     return(0);
127:   }

129:   /* Optimized versions: sA == 0 && sB == 0 */
130:   if ((sA == 0) && (sB == 0)) {
131:     if (At) tmp = rA, rA = cA, cA = tmp;
132:     if (Bt) tmp = rB, rB = cB, cB = tmp;
133:     SlepcDenseMatProd(C, ldC, 0.0, 1.0, A, ldA, rA, cA, At, B, ldB, rB, cB, Bt);
134:     return(0);
135:   }

137:   /* Optimized versions: A hermitian && (B not triang) */
138:   if (DVD_IS(sA,DVD_MAT_HERMITIAN) &&
139:       DVD_ISNOT(sB,DVD_MAT_UTRIANG) &&
140:       DVD_ISNOT(sB,DVD_MAT_LTRIANG)) {
141:     PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);
142:     rC = rA; cC = cB;
143:     PetscStackCallBLAS("BLASsymm",BLASsymm_("L",DVD_ISNOT(sA,DVD_MAT_LTRIANG)?"U":"L",&rC,&cC,&one,(PetscScalar*)A,&_ldA,(PetscScalar*)B,&_ldB,&zero,C,&_ldC));
144:     PetscLogFlops(rA*cB*cA);
145:     PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);
146:     return(0);
147:   }

149:   /* Optimized versions: B hermitian && (A not triang) */
150:   if (DVD_IS(sB,DVD_MAT_HERMITIAN) &&
151:       DVD_ISNOT(sA,DVD_MAT_UTRIANG) &&
152:       DVD_ISNOT(sA,DVD_MAT_LTRIANG)) {
153:     PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);
154:     rC = rA; cC = cB;
155:     PetscStackCallBLAS("BLASsymm",BLASsymm_("R",DVD_ISNOT(sB,DVD_MAT_LTRIANG)?"U":"L",&rC,&cC,&one,(PetscScalar*)B,&_ldB,(PetscScalar*)A,&_ldA,&zero,C,&_ldC));
156:     PetscLogFlops(rA*cB*cA);
157:     PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);
158:     return(0);
159:   }

161:   SETERRQ(PETSC_COMM_SELF,1, "Matrix type not supported for A");
162: }

166: /*
167:   Normalize the columns of the matrix A, where
168:     ldA, the leading dimension of A,
169:     rA, cA, rows and columns of A.
170:   if eigi is given, the pairs of contiguous columns i i+1 such as eigi[i] != 0
171:   are normalized as being one column.
172: */
173: PetscErrorCode SlepcDenseNorm(PetscScalar *A,PetscInt ldA,PetscInt _rA,PetscInt cA,PetscScalar *eigi)
174: {
175:   PetscErrorCode  ierr;
176:   PetscInt        i;
177:   PetscScalar     norm, norm0;
178:   PetscBLASInt    rA = _rA, one=1;


184:   PetscLogEventBegin(SLEPC_SlepcDenseNorm,0,0,0,0);

186:   for (i=0;i<cA;i++) {
187:     if (eigi && eigi[i] != 0.0) {
188:       norm = BLASnrm2_(&rA, &A[i*ldA], &one);
189:       norm0 = BLASnrm2_(&rA, &A[(i+1)*ldA], &one);
190:       norm = 1.0/PetscSqrtScalar(norm*norm + norm0*norm0);
191:       PetscStackCallBLAS("BLASscal",BLASscal_(&rA, &norm, &A[i*ldA], &one));
192:       PetscStackCallBLAS("BLASscal",BLASscal_(&rA, &norm, &A[(i+1)*ldA], &one));
193:       i++;
194:     } else {
195:       norm = BLASnrm2_(&rA, &A[i*ldA], &one);
196:       norm = 1.0 / norm;
197:       PetscStackCallBLAS("BLASscal",BLASscal_(&rA, &norm, &A[i*ldA], &one));
198:     }
199:   }

201:   PetscLogEventEnd(SLEPC_SlepcDenseNorm,0,0,0,0);
202:   return(0);
203: }

207: /*
208:   Y <- X, where
209:   ldX, leading dimension of X,
210:   rX, cX, rows and columns of X
211:   ldY, leading dimension of Y
212: */
213: PetscErrorCode SlepcDenseCopy(PetscScalar *Y,PetscInt ldY,PetscScalar *X,PetscInt ldX,PetscInt rX,PetscInt cX)
214: {
215:   PetscErrorCode  ierr;
216:   PetscInt        i;


222:   if ((ldX < rX) || (ldY < rX)) SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");

224:   /* Quick exit */
225:   if (Y == X) {
226:     if (ldX != ldY) SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");
227:     return(0);
228:   }

230:   PetscLogEventBegin(SLEPC_SlepcDenseCopy,0,0,0,0);
231:   for (i=0;i<cX;i++) {
232:     PetscMemcpy(&Y[ldY*i], &X[ldX*i], sizeof(PetscScalar)*rX);
233:   }
234:   PetscLogEventEnd(SLEPC_SlepcDenseCopy,0,0,0,0);
235:   return(0);
236: }

240: /*
241:   Y <- X, where
242:   ldX, leading dimension of X,
243:   rX, cX, rows and columns of X
244:   ldY, leading dimension of Y
245: */
246: PetscErrorCode SlepcDenseCopyTriang(PetscScalar *Y,MatType_t sY,PetscInt ldY,PetscScalar *X,MatType_t sX,PetscInt ldX,PetscInt rX,PetscInt cX)
247: {
248:   PetscErrorCode  ierr;
249:   PetscInt        i,j,c;


255:   if ((ldX < rX) || (ldY < rX)) SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");

257:   if (sY == 0 && sX == 0) {
258:     SlepcDenseCopy(Y, ldY, X, ldX, rX, cX);
259:     return(0);
260:   }

262:   if (rX != cX) SETERRQ(PETSC_COMM_SELF,1, "Rectangular matrices not supported");

264:   if (DVD_IS(sX,DVD_MAT_UTRIANG) &&
265:       DVD_ISNOT(sX,DVD_MAT_LTRIANG)) {        /* UpTr to ... */
266:     if (DVD_IS(sY,DVD_MAT_UTRIANG) &&
267:         DVD_ISNOT(sY,DVD_MAT_LTRIANG))        /* ... UpTr, */
268:       c = 0;                                      /*     so copy */
269:     else if (DVD_ISNOT(sY,DVD_MAT_UTRIANG) &&
270:              DVD_IS(sY,DVD_MAT_LTRIANG))       /* ... LoTr, */
271:       c = 1;                                      /*     so transpose */
272:     else                                          /* ... Full, */
273:       c = 2;                                      /*     so reflect from up */
274:   } else if (DVD_ISNOT(sX,DVD_MAT_UTRIANG) &&
275:              DVD_IS(sX,DVD_MAT_LTRIANG)) {    /* LoTr to ... */
276:     if (DVD_IS(sY,DVD_MAT_UTRIANG) &&
277:         DVD_ISNOT(sY,DVD_MAT_LTRIANG))        /* ... UpTr, */
278:       c = 1;                                      /*     so transpose */
279:     else if (DVD_ISNOT(sY,DVD_MAT_UTRIANG) &&
280:              DVD_IS(sY,DVD_MAT_LTRIANG))       /* ... LoTr, */
281:       c = 0;                                      /*     so copy */
282:     else                                          /* ... Full, */
283:       c = 3;                                      /*     so reflect fr. down */
284:   } else                                          /* Full to any, */
285:     c = 0;                                        /*     so copy */

287:   PetscLogEventBegin(SLEPC_SlepcDenseCopy,0,0,0,0);

289:   switch (c) {
290:   case 0: /* copy */
291:     for (i=0;i<cX;i++) {
292:       PetscMemcpy(&Y[ldY*i],&X[ldX*i],sizeof(PetscScalar)*rX);
293:     }
294:     break;

296:   case 1: /* transpose */
297:     for (i=0;i<cX;i++)
298:       for (j=0;j<rX;j++)
299:         Y[ldY*j+i] = PetscConj(X[ldX*i+j]);
300:     break;

302:   case 2: /* reflection from up */
303:     for (i=0;i<cX;i++)
304:       for (j=0;j<PetscMin(i+1,rX);j++)
305:         Y[ldY*j+i] = PetscConj(Y[ldY*i+j] = X[ldX*i+j]);
306:     break;

308:   case 3: /* reflection from down */
309:     for (i=0;i<cX;i++)
310:       for (j=i;j<rX;j++)
311:         Y[ldY*j+i] = PetscConj(Y[ldY*i+j] = X[ldX*i+j]);
312:     break;
313:   }
314:   PetscLogEventEnd(SLEPC_SlepcDenseCopy,0,0,0,0);
315:   return(0);
316: }

320: /*
321:   Compute Y[0..cM-1] <- alpha * X[0..cX-1] * M + beta * Y[0..cM-1],
322:   where X and Y are contiguous global vectors.
323: */
324: PetscErrorCode SlepcUpdateVectorsZ(Vec *Y,PetscScalar beta,PetscScalar alpha,Vec *X,PetscInt cX,const PetscScalar *M,PetscInt ldM,PetscInt rM,PetscInt cM)
325: {
326:   PetscErrorCode  ierr;

329:   SlepcUpdateVectorsS(Y,1,beta,alpha,X,cX,1,M,ldM,rM,cM);
330:   return(0);
331: }

335: /*
336:   Compute Y[0:dY:cM*dY-1] <- alpha * X[0:dX:cX-1] * M + beta * Y[0:dY:cM*dY-1],
337:   where X and Y are contiguous global vectors.
338: */
339: PetscErrorCode SlepcUpdateVectorsS(Vec *Y,PetscInt dY,PetscScalar beta,PetscScalar alpha,Vec *X,PetscInt cX,PetscInt dX,const PetscScalar *M,PetscInt ldM,PetscInt rM,PetscInt cM)
340: {
341:   PetscErrorCode    ierr;
342:   const PetscScalar *px;
343:   PetscScalar       *py;
344:   PetscInt          rX, rY, ldX, ldY, i, rcX;

347:   SlepcValidVecsContiguous(Y,cM*dY,1);
348:   SlepcValidVecsContiguous(X,cX,5);

351:   /* Compute the real number of columns */
352:   rcX = cX/dX;
353:   if (rcX != rM) SETERRQ(PetscObjectComm((PetscObject)*Y),1, "Matrix dimensions do not match");

355:   if ((rcX == 0) || (rM == 0) || (cM == 0)) return(0);
356:   else if ((Y + cM <= X) || (X + cX <= Y) ||
357:              ((X != Y) && ((PetscMax(dX,dY))%(PetscMin(dX,dY))!=0))) {
358:     /* If Y[0..cM-1] and X[0..cX-1] are not overlapped... */

360:     /* Get the dense matrices and dimensions associated to Y and X */
361:     VecGetLocalSize(X[0], &rX);
362:     VecGetLocalSize(Y[0], &rY);
363:     if (rX != rY) SETERRQ(PetscObjectComm((PetscObject)*Y),1, "The multivectors do not have the same dimension");
364:     VecGetArrayRead(X[0], &px);
365:     VecGetArray(Y[0], &py);

367:     /* Update the strides */
368:     ldX = rX*dX; ldY= rY*dY;

370:     /* Do operation */
371:     SlepcDenseMatProd(py, ldY, beta, alpha, px, ldX, rX, rcX,
372:                     PETSC_FALSE, M, ldM, rM, cM, PETSC_FALSE);

374:     VecRestoreArrayRead(X[0], &px);
375:     VecRestoreArray(Y[0], &py);
376:     for (i=1;i<cM;i++) {
377:       PetscObjectStateIncrease((PetscObject)Y[dY*i]);
378:     }

380:   } else if ((Y >= X) && (beta == 0.0) && (dY == dX)) {
381:     /* If not, call to SlepcUpdateVectors */
382:     SlepcUpdateStrideVectors(cX, X, Y-X, dX, Y-X+cM*dX, M-ldM*(Y-X),
383:                                     ldM, PETSC_FALSE);
384:     if (alpha != 1.0)
385:       for (i=0;i<cM;i++) {
386:         VecScale(Y[i],alpha);
387:       }
388:   } else SETERRQ(PetscObjectComm((PetscObject)*Y),1, "Unsupported case");
389:   return(0);
390: }

394: /*
395:   Compute X <- alpha * X[0:dX:cX-1] * M
396:   where X is a matrix with non-consecutive columns
397: */
398: PetscErrorCode SlepcUpdateVectorsD(Vec *X,PetscInt cX,PetscScalar alpha,const PetscScalar *M,PetscInt ldM,PetscInt rM,PetscInt cM,PetscScalar *work,PetscInt lwork)
399: {
401:   PetscScalar    **px, *Y, *Z;
402:   PetscInt       rX, i, j, rY, rY0, ldY;

405:   SlepcValidVecsContiguous(X,cX,1);

409:   if (cX != rM) SETERRQ(PetscObjectComm((PetscObject)*X),1, "Matrix dimensions do not match");

411:   rY = (lwork/2)/cX;
412:   if (rY <= 0) SETERRQ(PetscObjectComm((PetscObject)*X),1, "Insufficient work space given");
413:   Y = work; Z = &Y[cX*rY]; ldY = rY;

415:   if ((cX == 0) || (rM == 0) || (cM == 0)) return(0);

417:   /* Get the dense vectors associated to the columns of X */
418:   PetscMalloc(sizeof(Vec)*cX,&px);
419:   for (i=0;i<cX;i++) {
420:     VecGetArray(X[i],&px[i]);
421:   }
422:   VecGetLocalSize(X[0],&rX);

424:   for (i=0,rY0=0;i<rX;i+=rY0) {
425:     rY0 = PetscMin(rY, rX-i);

427:     /* Y <- X[i0:i1,:] */
428:     for (j=0;j<cX;j++) {
429:       SlepcDenseCopy(&Y[ldY*j], ldY, px[j]+i, rX, rY0, 1);
430:     }

432:     /* Z <- Y * M */
433:     SlepcDenseMatProd(Z, ldY, 0.0, alpha, Y, ldY, rY0, cX, PETSC_FALSE,
434:                                                  M, ldM, rM, cM, PETSC_FALSE);

436:     /* X <- Z */
437:     for (j=0;j<cM;j++) {
438:       SlepcDenseCopy(px[j]+i, rX, &Z[j*ldY], ldY, rY0, 1);
439:     }
440:   }

442:   for (i=0;i<cX;i++) {
443:     VecRestoreArray(X[i],&px[i]);
444:   }
445:   PetscFree(px);
446:   return(0);
447: }

451: /* Computes M <- [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ]
452:                  [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
453:   where W = U' * V.
454:   workS0 and workS1 are an auxiliary scalar vector of size
455:   (eU-sU)*sV*(sU!=0)+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
456:   is needed, and of size eU*eV.
457: */
458: PetscErrorCode VecsMult(PetscScalar *M,MatType_t sM,PetscInt ldM,Vec *U,PetscInt sU,PetscInt eU,Vec *V,PetscInt sV,PetscInt eV,PetscScalar *workS0,PetscScalar *workS1)
459: {
460:   PetscErrorCode    ierr;
461:   PetscInt          ldU, ldV, i, j, k, ms = (eU-sU)*sV*(sU==0?0:1)+(eV-sV)*eU;
462:   const PetscScalar *pu, *pv;
463:   PetscScalar       *W, *Wr;

466:   /* Check if quick exit */
467:   if ((eU-sU == 0) || (eV-sV == 0)) return(0);

469:   SlepcValidVecsContiguous(U,eU,4);
470:   SlepcValidVecsContiguous(V,eV,7);

473:   /* Get the dense matrices and dimensions associated to U and V */
474:   VecGetLocalSize(U[0], &ldU);
475:   VecGetLocalSize(V[0], &ldV);
476:   if (ldU != ldV) SETERRQ(PetscObjectComm((PetscObject)*U),1, "Matrix dimensions do not match");
477:   VecGetArrayRead(U[0], &pu);
478:   VecGetArrayRead(V[0], &pv);

480:   if (workS0) {
482:     W = workS0;
483:   } else {
484:     PetscMalloc(sizeof(PetscScalar)*ms,&W);
485:   }

487:   PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);

489:   if ((sU == 0) && (sV == 0) && (eU == ldM)) {
490:     /* Use the smart memory usage version */

492:     /* W <- U' * V */
493:     SlepcDenseMatProdTriang(W, sM, eU,
494:                                    pu, 0, ldU, ldU, eU, PETSC_TRUE,
495:                                    pv, 0, ldV, ldV, eV, PETSC_FALSE);

497:     /* ReduceAll(W, SUM) */
498:     MPI_Allreduce(W, M, eU*eV, MPIU_SCALAR, MPIU_SUM,
499:                          PetscObjectComm((PetscObject)U[0]));
500:   /* Full M matrix */
501:   } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
502:              DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
503:     if (workS1) {
505:       Wr = workS1;
506:       if (PetscAbs(PetscMin(W-workS1, workS1-W)) < ms) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");
507:     } else {
508:       PetscMalloc(sizeof(PetscScalar)*ms,&Wr);
509:     }

511:     /* W(0:(eU-sU)*sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
512:     if (sU > 0) {
513:       SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
514:                                pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
515:                                pv       , ldV, ldV, sV,    PETSC_FALSE);
516:     }

518:     /* W((eU-sU)*sV:(eU-sU)*sV+(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
519:     SlepcDenseMatProd(W+(eU-sU)*sV*(sU > 0?1:0), eU, 0.0, 1.0,
520:                              pu,        ldU, ldU, eU,    PETSC_TRUE,
521:                              pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);

523:     /* ReduceAll(W, SUM) */
524:     MPI_Allreduce(W, Wr, ms, MPIU_SCALAR,
525:                       MPIU_SUM, PetscObjectComm((PetscObject)U[0]));

527:     /* M(...,...) <- W */
528:     k = 0;
529:     if (sU > 0) for (i=0; i<sV; i++)
530:       for (j=ldM*i+sU; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
531:     for (i=sV; i<eV; i++)
532:       for (j=ldM*i; j<ldM*i+eU; j++,k++) M[j] = Wr[k];

534:     if (!workS1) {
535:       PetscFree(Wr);
536:     }

538:   /* Upper triangular M matrix */
539:   } else if (DVD_IS(sM,DVD_MAT_UTRIANG) &&
540:              DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
541:     if (workS1) {
543:       Wr = workS1;
544:       if (PetscAbs(PetscMin(W-workS1,workS1-W)) < (eV-sV)*eU) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");
545:     } else {
546:       PetscMalloc(sizeof(PetscScalar)*(eV-sV)*eU,&Wr);
547:     }

549:     /* W(0:(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
550:     SlepcDenseMatProd(W,         eU,  0.0, 1.0,
551:                              pu,        ldU, ldU, eU,    PETSC_TRUE,
552:                              pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);

554:     /* ReduceAll(W, SUM) */
555:     MPI_Allreduce(W, Wr, (eV-sV)*eU, MPIU_SCALAR, MPIU_SUM,
556:                          PetscObjectComm((PetscObject)U[0]));

558:     /* M(...,...) <- W */
559:     for (i=sV,k=0; i<eV; i++)
560:         for (j=ldM*i; j<ldM*i+eU; j++,k++) M[j] = Wr[k];

562:     if (!workS1) {
563:       PetscFree(Wr);
564:     }

566:   /* Lower triangular M matrix */
567:   } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
568:              DVD_IS(sM,DVD_MAT_LTRIANG)) {
569:     if (workS1) {
571:       Wr = workS1;
572:       if (PetscMin(W - workS1, workS1 - W) < (eU-sU)*eV) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");
573:     } else {
574:       PetscMalloc(sizeof(PetscScalar)*(eU-sU)*eV, &Wr);
575:     }

577:     /* W(0:(eU-sU)*eV-1) <- U(sU:eU-1)' * V(0:eV-1) */
578:     SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
579:                              pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
580:                              pv       , ldV, ldV, eV,    PETSC_FALSE);

582:     /* ReduceAll(W, SUM) */
583:     MPI_Allreduce(W, Wr, (eU-sU)*eV, MPIU_SCALAR, MPIU_SUM,
584:                          PetscObjectComm((PetscObject)U[0]));

586:     /* M(...,...) <- W */
587:     for (i=0,k=0; i<eV; i++)
588:       for (j=ldM*i+sU; j<ldM*i+eU; j++,k++) M[j] = Wr[k];

590:     if (!workS1) {
591:       PetscFree(Wr);
592:     }
593:   }

595:   PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);

597:   if (!workS0) {
598:     PetscFree(W);
599:   }

601:   VecRestoreArrayRead(U[0],&pu);
602:   VecRestoreArrayRead(V[0],&pv);
603:   return(0);
604: }

608: /* Computes M <- [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ]
609:                  [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
610:   where W = local_U' * local_V. Needs VecsMultIb for completing the operation!
611:   workS0 and workS1 are an auxiliary scalar vector of size
612:   (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
613:   is needed, and of size eU*eV.
614: */
615: PetscErrorCode VecsMultIa(PetscScalar *M,MatType_t sM,PetscInt ldM,Vec *U,PetscInt sU,PetscInt eU,Vec *V,PetscInt sV,PetscInt eV)
616: {
617:   PetscErrorCode  ierr;
618:   PetscInt        ldU, ldV;
619:   PetscScalar     *pu, *pv;

622:   /* Check if quick exit */
623:   if ((eU-sU == 0) || (eV-sV == 0)) return(0);

625:   SlepcValidVecsContiguous(U,eU,4);
626:   SlepcValidVecsContiguous(V,eV,7);

629:   /* Get the dense matrices and dimensions associated to U and V */
630:   VecGetLocalSize(U[0],&ldU);
631:   VecGetLocalSize(V[0],&ldV);
632:   if (ldU != ldV) SETERRQ(PetscObjectComm((PetscObject)*U),1, "Matrix dimensions do not match");
633:   VecGetArray(U[0],&pu);
634:   VecGetArray(V[0],&pv);

636:   if ((sU == 0) && (sV == 0) && (eU == ldM)) {
637:     /* M <- local_U' * local_V */
638:     SlepcDenseMatProdTriang(M, sM, eU,
639:                                    pu, 0, ldU, ldU, eU, PETSC_TRUE,
640:                                    pv, 0, ldV, ldV, eV, PETSC_FALSE);

642:   /* Full M matrix */
643:   } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
644:              DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
645:     /* M(sU:eU-1,0:sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
646:     SlepcDenseMatProd(&M[sU], ldM, 0.0, 1.0,
647:                              pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
648:                              pv       , ldV, ldV, sV,    PETSC_FALSE);

650:     /* M(0:eU-1,sV:eV-1) <- U(0:eU-1)' * V(sV:eV-1) */
651:     SlepcDenseMatProd(&M[ldM*sV], ldM, 0.0, 1.0,
652:                              pu,        ldU, ldU, eU,    PETSC_TRUE,
653:                              pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);

655:   /* Other structures */
656:   } else SETERRQ(PetscObjectComm((PetscObject)*U),1, "Matrix structure not supported");

658:   VecRestoreArray(U[0],&pu);
659:   PetscObjectStateDecrease((PetscObject)U[0]);
660:   VecRestoreArray(V[0],&pv);
661:   PetscObjectStateDecrease((PetscObject)V[0]);
662:   return(0);
663: }

667: /* Computes M <- nprocs*M
668:   where nprocs is the number of processors.
669: */
670: PetscErrorCode VecsMultIc(PetscScalar *M,MatType_t sM,PetscInt ldM,PetscInt rM,PetscInt cM,Vec V)
671: {
672:   PetscInt    i,j;
673:   PetscMPIInt n;

676:   /* Check if quick exit */
677:   if ((rM == 0) || (cM == 0)) return(0);

680:   if (sM != 0) SETERRQ(PetscObjectComm((PetscObject)V),1, "Matrix structure not supported");

682:   MPI_Comm_size(PetscObjectComm((PetscObject)V), &n);

684:   for (i=0;i<cM;i++)
685:     for (j=0;j<rM;j++)
686:       M[ldM*i+j] /= (PetscScalar)n;
687:   return(0);
688: }

692: /* Computes N <- Allreduce([ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ])
693:                           ([ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ])
694:   where W = U' * V.
695:   workS0 and workS1 are an auxiliary scalar vector of size
696:   (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
697:   is needed, and of size eU*eV.
698: */
699: PetscErrorCode VecsMultIb(PetscScalar *M,MatType_t sM,PetscInt ldM,PetscInt rM,PetscInt cM,PetscScalar *auxS,Vec V)
700: {
702:   PetscScalar    *W, *Wr;

705:   /* Check if quick exit */
706:   if ((rM == 0) || (cM == 0)) return(0);

709:   if (auxS) {
711:     W = auxS;
712:   } else {
713:     PetscMalloc(sizeof(PetscScalar)*rM*cM*2,&W);
714:   }
715:   Wr = W + rM*cM;

717:   PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);

719:   if (sM == 0) {
720:     /* W <- M */
721:     SlepcDenseCopy(W, rM, M, ldM, rM, cM);

723:     /* Wr <- ReduceAll(W, SUM) */
724:     MPI_Allreduce(W, Wr, rM*cM, MPIU_SCALAR, MPIU_SUM,
725:                          PetscObjectComm((PetscObject)V));

727:     /* M <- Wr */
728:     SlepcDenseCopy(M, ldM, Wr, rM, rM, cM);

730:   /* Other structures */
731:   } else SETERRQ(PetscObjectComm((PetscObject)V),1, "Matrix structure not supported");

733:   PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);

735:   if (!auxS) {
736:     PetscFree(W);
737:   }
738:   return(0);
739: }

743: /* Computes M <- [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ]
744:                  [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
745:   where W = U' * V.
746:   r, a DvdReduction structure,
747:   sr, an structure DvdMult_copy_func.
748: */
749: PetscErrorCode VecsMultS(PetscScalar *M,MatType_t sM,PetscInt ldM,Vec *U,PetscInt sU,PetscInt eU,Vec *V,PetscInt sV,PetscInt eV,DvdReduction *r,DvdMult_copy_func *sr)
750: {
751:   PetscErrorCode    ierr;
752:   PetscInt          ldU, ldV, ms = (eU-sU)*sV*(sU==0?0:1)+(eV-sV)*eU;
753:   const PetscScalar *pu, *pv;
754:   PetscScalar       *W;

757:   /* Check if quick exit */
758:   if ((eU-sU == 0) || (eV-sV == 0)) return(0);

760:   SlepcValidVecsContiguous(U,eU,4);
761:   SlepcValidVecsContiguous(V,eV,7);

764:   /* Get the dense matrices and dimensions associated to U and V */
765:   VecGetLocalSize(U[0],&ldU);
766:   VecGetLocalSize(V[0],&ldV);
767:   if (ldU != ldV) SETERRQ(PetscObjectComm((PetscObject)*U),1, "Matrix dimensions do not match");
768:   VecGetArrayRead(U[0],&pu);
769:   VecGetArrayRead(V[0],&pv);

771:   PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);

773:   if ((sU == 0) && (sV == 0)) {
774:     /* Use the smart memory usage version */

776:     /* Add the reduction to r */
777:     SlepcAllReduceSum(r, eU*eV, VecsMultS_copy_func, sr, &W);

779:     /* W <- U' * V */
780:     SlepcDenseMatProdTriang(W, sM, eU,
781:                                    pu, 0, ldU, ldU, eU, PETSC_TRUE,
782:                                    pv, 0, ldV, ldV, eV, PETSC_FALSE);

784:     /* M <- ReduceAll(W, SUM) */
785:     sr->M = M;    sr->ld = ldM;
786:     sr->i0 = 0;   sr->i1 = eV;    sr->s0 = sU;    sr->e0 = eU;
787:                   sr->i2 = eV;

789:   /* Full M matrix */
790:   } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
791:              DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
792:     /* Add the reduction to r */
793:     SlepcAllReduceSum(r, ms, VecsMultS_copy_func, sr, &W);

795:     /* W(0:(eU-sU)*sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
796:     SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
797:                              pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
798:                              pv       , ldV, ldV, sV,    PETSC_FALSE);

800:     /* W((eU-sU)*sV:(eU-sU)*sV+(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
801:     SlepcDenseMatProd(W+(eU-sU)*sV*(sU > 0?1:0), eU, 0.0, 1.0,
802:                              pu,        ldU, ldU, eU,    PETSC_TRUE,
803:                              pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);

805:     /* M <- ReduceAll(W, SUM) */
806:     sr->M = M;            sr->ld = ldM;
807:     sr->i0 = sU>0?0:sV;   sr->i1 = sV;    sr->s0 = sU;    sr->e0 = eU;
808:                           sr->i2 = eV;    sr->s1 = 0;     sr->e1 = eU;

810:   /* Upper triangular M matrix */
811:   } else if (DVD_IS(sM,DVD_MAT_UTRIANG) &&
812:              DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
813:     /* Add the reduction to r */
814:     SlepcAllReduceSum(r, (eV-sV)*eU, VecsMultS_copy_func, sr, &W);

816:     /* W(0:(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
817:     SlepcDenseMatProd(W,         eU,  0.0, 1.0,
818:                              pu,        ldU, ldU, eU,    PETSC_TRUE,
819:                              pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);

821:     /* M <- ReduceAll(W, SUM) */
822:     sr->M = M;    sr->ld = ldM;
823:     sr->i0 = sV;  sr->i1 = eV;    sr->s0 = 0;     sr->e0 = eU;
824:                   sr->i2 = eV;

826:   /* Lower triangular M matrix */
827:   } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
828:              DVD_IS(sM,DVD_MAT_LTRIANG)) {
829:     /* Add the reduction to r */
830:     SlepcAllReduceSum(r, (eU-sU)*eV, VecsMultS_copy_func, sr, &W);

832:     /* W(0:(eU-sU)*eV-1) <- U(sU:eU-1)' * V(0:eV-1) */
833:     SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
834:                              pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
835:                              pv       , ldV, ldV, eV,    PETSC_FALSE);

837:     /* ReduceAll(W, SUM) */
838:     sr->M = M;    sr->ld = ldM;
839:     sr->i0 = 0;   sr->i1 = eV;    sr->s0 = sU;    sr->e0 = eU;
840:                   sr->i2 = eV;
841:   }

843:   PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);

845:   VecRestoreArrayRead(U[0],&pu);
846:   VecRestoreArrayRead(V[0],&pv);
847:   return(0);
848: }

852: PetscErrorCode VecsMultS_copy_func(PetscScalar *out,PetscInt size_out,void *ptr)
853: {
854:   PetscInt          i, j, k;
855:   DvdMult_copy_func *sr = (DvdMult_copy_func*)ptr;


860:   for (i=sr->i0,k=0; i<sr->i1; i++)
861:     for (j=sr->ld*i+sr->s0; j<sr->ld*i+sr->e0; j++,k++) sr->M[j] = out[k];
862:   for (i=sr->i1; i<sr->i2; i++)
863:     for (j=sr->ld*i+sr->s1; j<sr->ld*i+sr->e1; j++,k++) sr->M[j] = out[k];

865:   if (k != size_out) SETERRQ(PETSC_COMM_SELF,1, "Wrong size");
866:   return(0);
867: }

871: /* Orthonormalize a chunk of parallel vector.
872:    NOTE: wS0 and wS1 must be of size n*n.
873: */
874: PetscErrorCode VecsOrthonormalize(Vec *V,PetscInt n,PetscScalar *wS0,PetscScalar *wS1)
875: {
876: #if defined(PETSC_MISSING_LAPACK_PBTRF)
878:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PBTRF - Lapack routine is unavailable");
879: #else
880:   PetscErrorCode  ierr;
881:   PetscBLASInt    nn = n, info, ld;
882:   PetscInt        ldV, i;
883:   PetscScalar     *H, *T, one=1.0, *pv;

886:   if (!wS0) {
887:     PetscMalloc(sizeof(PetscScalar)*n*n,&H);
888:   } else {
890:     H = wS0;
891:   }
892:   if (!wS1) {
893:     PetscMalloc(sizeof(PetscScalar)*n*n,&T);
894:   } else {
896:     T = wS1;
897:   }

899:   /* H <- V' * V */
900:   VecsMult(H, 0, n, V, 0, n, V, 0, n, T, NULL);

902:   /* H <- chol(H) */
903:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
904:   PetscStackCallBLAS("LAPACKpbtrf",LAPACKpbtrf_("U", &nn, &nn, H, &nn, &info));
905:   PetscFPTrapPop();
906:   if (info) SETERRQ1(PetscObjectComm((PetscObject)*V),PETSC_ERR_LIB, "Error in Lapack PBTRF %d", info);

908:   /* V <- V * inv(H) */
909:   VecGetLocalSize(V[0],&ldV);
910:   VecGetArray(V[0],&pv);
911:   ld = ldV;
912:   PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R", "U", "N", "N", &ld, &nn, &one, H, &nn, pv, &ld));
913:   VecRestoreArray(V[0],&pv);
914:   for (i=1;i<n;i++) {
915:     PetscObjectStateIncrease((PetscObject)V[i]);
916:   }

918:   if (!wS0) {
919:     PetscFree(H);
920:   }
921:   if (!wS1) {
922:     PetscFree(T);
923:   }
924:   return(0);
925: #endif
926: }

930: /*
931:   Sum up several arrays with only one call to MPIReduce.
932: */
933: PetscErrorCode SlepcAllReduceSumBegin(DvdReductionChunk *ops,PetscInt max_size_ops,PetscScalar *in,PetscScalar *out,PetscInt max_size_in,DvdReduction *r,MPI_Comm comm)
934: {

939:   r->in = in;
940:   r->out = out;
941:   r->size_in = 0;
942:   r->max_size_in = max_size_in;
943:   r->ops = ops;
944:   r->size_ops = 0;
945:   r->max_size_ops = max_size_ops;
946:   r->comm = comm;
947:   return(0);
948: }

952: PetscErrorCode SlepcAllReduceSum(DvdReduction *r,PetscInt size_in,DvdReductionPostF f,void *ptr,PetscScalar **in)
953: {
955:   *in = r->in + r->size_in;
956:   r->ops[r->size_ops].out = r->out + r->size_in;
957:   r->ops[r->size_ops].size_out = size_in;
958:   r->ops[r->size_ops].f = f;
959:   r->ops[r->size_ops].ptr = ptr;
960:   if (++(r->size_ops) > r->max_size_ops) SETERRQ(PETSC_COMM_SELF,1, "max_size_ops is not large enough");
961:   if ((r->size_in+= size_in) > r->max_size_in) SETERRQ(PETSC_COMM_SELF,1, "max_size_in is not large enough");
962:   return(0);
963: }

967: PetscErrorCode SlepcAllReduceSumEnd(DvdReduction *r)
968: {
969:   PetscErrorCode  ierr;
970:   PetscInt        i;

973:   /* Check if quick exit */
974:   if (r->size_ops == 0) return(0);

976:   /* Call the MPIAllReduce routine */
977:   MPI_Allreduce(r->in, r->out, r->size_in, MPIU_SCALAR, MPIU_SUM, r->comm);

979:   /* Call the postponed routines */
980:   for (i=0;i<r->size_ops;i++) {
981:     r->ops[i].f(r->ops[i].out, r->ops[i].size_out, r->ops[i].ptr);
982:   }

984:   /* Tag the operation as done */
985:   r->size_ops = 0;
986:   return(0);
987: }

991: /* auxS: size_cX+V_new_e */
992: PetscErrorCode dvd_orthV(IP ip,Vec *defl,PetscInt size_DS,Vec *cX,PetscInt size_cX,Vec *V,PetscInt V_new_s,PetscInt V_new_e,PetscScalar *auxS,PetscRandom rand)
993: {
994:   PetscErrorCode  ierr;
995:   PetscInt        i, j;
996:   PetscBool       lindep;
997:   PetscReal       norm;
998:   PetscScalar     *auxS0 = auxS;

1001:   /* Orthonormalize V with IP */
1002:   for (i=V_new_s;i<V_new_e;i++) {
1003:     for (j=0;j<3;j++) {
1004:       if (j>0) { SlepcVecSetRandom(V[i], rand); }
1005:       if (cX + size_cX == V) {
1006:         /* If cX and V are contiguous, orthogonalize in one step */
1007:         IPOrthogonalize(ip, size_DS, defl, size_cX+i, NULL, cX,
1008:                                V[i], auxS0, &norm, &lindep);
1009:       } else if (defl) {
1010:         /* Else orthogonalize first against defl, and then against cX and V */
1011:         IPOrthogonalize(ip, size_DS, defl, size_cX, NULL, cX,
1012:                                V[i], auxS0, NULL, &lindep);
1013:         if (!lindep) {
1014:           IPOrthogonalize(ip, 0, NULL, i, NULL, V,
1015:                                  V[i], auxS0, &norm, &lindep);
1016:         }
1017:       } else {
1018:         /* Else orthogonalize first against cX and then against V */
1019:         IPOrthogonalize(ip, size_cX, cX, i, NULL, V,
1020:                                V[i], auxS0, &norm, &lindep);
1021:       }
1022:       if (!lindep && (norm > PETSC_SQRT_MACHINE_EPSILON)) break;
1023:       PetscInfo1(ip,"Orthonormalization problems adding the vector %D to the searching subspace\n",i);
1024:     }
1025:     if (lindep || (norm < PETSC_SQRT_MACHINE_EPSILON)) SETERRQ(PetscObjectComm((PetscObject)ip),1, "Error during orthonormalization of eigenvectors");
1026:     VecScale(V[i],1.0/norm);
1027:   }
1028:   return(0);
1029: }

1033: /* auxS: size_cX+V_new_e+1 */
1034: PetscErrorCode dvd_BorthV_faster(IP ip,Vec *defl,Vec *BDS,PetscReal *BDSn,PetscInt size_DS,Vec *cX,Vec *BcX,PetscReal *BcXn,PetscInt size_cX,Vec *V,Vec *BV,PetscReal *BVn,PetscInt V_new_s,PetscInt V_new_e,PetscScalar *auxS,PetscRandom rand)
1035: {
1036:   PetscErrorCode  ierr;
1037:   PetscInt        i, j;
1038:   PetscBool       lindep;
1039:   PetscReal       norm;
1040:   PetscScalar     *auxS0 = auxS;

1043:   /* Orthonormalize V with IP */
1044:   for (i=V_new_s;i<V_new_e;i++) {
1045:     for (j=0;j<3;j++) {
1046:       if (j>0) { SlepcVecSetRandom(V[i],rand); }
1047:       if (cX + size_cX == V && BcX + size_cX == BV) {
1048:         /* If cX and V are contiguous, orthogonalize in one step */
1049:         IPBOrthogonalize(ip, size_DS, defl, BDS, BDSn, size_cX+i, NULL, cX, BcX, BcXn,
1050:                                V[i], BV[i], auxS0, &norm, &lindep);
1051:       } else if (defl) {
1052:         /* Else orthogonalize first against defl, and then against cX and V */
1053:         IPBOrthogonalize(ip, size_DS, defl, BDS, BDSn, size_cX, NULL, cX, BcX, BcXn,
1054:                                V[i], BV[i], auxS0, NULL, &lindep);
1055:         if (!lindep) {
1056:           IPBOrthogonalize(ip, 0, NULL, NULL, NULL, i, NULL, V, BV, BVn,
1057:                                   V[i], BV[i], auxS0, &norm, &lindep);
1058:         }
1059:       } else {
1060:         /* Else orthogonalize first against cX and then against V */
1061:         IPBOrthogonalize(ip, size_cX, cX, BcX, BcXn, i, NULL, V, BV, BVn,
1062:                                 V[i], BV[i], auxS0, &norm, &lindep);
1063:       }
1064:       if (!lindep && (PetscAbs(norm) > PETSC_SQRT_MACHINE_EPSILON)) break;
1065:       PetscInfo1(ip, "Orthonormalization problems adding the vector %d to the searching subspace\n", i);
1066:     }
1067:     if (lindep || (PetscAbs(norm) < PETSC_SQRT_MACHINE_EPSILON)) {
1068:         SETERRQ(PetscObjectComm((PetscObject)ip),1, "Error during the orthonormalization of the eigenvectors");
1069:     }
1070:     if (BVn) BVn[i] = norm > 0.0 ? 1.0 : -1.0;
1071:     norm = PetscAbs(norm);
1072:     VecScale(V[i],1.0/norm);
1073:     VecScale(BV[i],1.0/norm);
1074:   }
1075:   return(0);
1076: }

1080: /* auxS: size_cX+V_new_e+1 */
1081: PetscErrorCode dvd_BorthV_stable(IP ip,Vec *defl,PetscReal *BDSn,PetscInt size_DS,Vec *cX,PetscReal *BcXn,PetscInt size_cX,Vec *V,PetscReal *BVn,PetscInt V_new_s,PetscInt V_new_e,PetscScalar *auxS,PetscRandom rand)
1082: {
1083:   PetscErrorCode  ierr;
1084:   PetscInt        i, j;
1085:   PetscBool       lindep;
1086:   PetscReal       norm;
1087:   PetscScalar     *auxS0 = auxS;

1090:   /* Orthonormalize V with IP */
1091:   for (i=V_new_s;i<V_new_e;i++) {
1092:     for (j=0;j<3;j++) {
1093:       if (j>0) {
1094:         SlepcVecSetRandom(V[i],rand);
1095:         PetscInfo1(ip,"Orthonormalization problems adding the vector %d to the searching subspace\n",i);
1096:       }
1097:       /* Orthogonalize against the deflation, if needed */
1098:       if (defl) {
1099:         IPPseudoOrthogonalize(ip,size_DS,defl,BDSn,V[i],auxS0,NULL,&lindep);
1100:         if (lindep) continue;
1101:       }
1102:       /* If cX and V are contiguous, orthogonalize in one step */
1103:       if (cX + size_cX == V) {
1104:         IPPseudoOrthogonalize(ip,size_cX+i,cX,BcXn,V[i],auxS0,&norm,&lindep);
1105:       /* Else orthogonalize first against cX and then against V */
1106:       } else {
1107:         IPPseudoOrthogonalize(ip,size_cX,cX,BcXn,V[i],auxS0,NULL,&lindep);
1108:         if (lindep) continue;
1109:         IPPseudoOrthogonalize(ip,i,V,BVn,V[i],auxS0,&norm,&lindep);
1110:       }
1111:       if (!lindep && (PetscAbs(norm) > PETSC_MACHINE_EPSILON)) break;
1112:     }
1113:     if (lindep || (PetscAbs(norm) < PETSC_MACHINE_EPSILON)) {
1114:         SETERRQ(PetscObjectComm((PetscObject)ip),1, "Error during the orthonormalization of the eigenvectors");
1115:     }
1116:     if (BVn) BVn[i] = norm > 0.0 ? 1.0 : -1.0;
1117:     norm = PetscAbs(norm);
1118:     VecScale(V[i],1.0/norm);
1119:   }
1120:   return(0);
1121: }