#ifdef PETSC_RCS_HEADER
static char vcid[] = "$Id: mlApply.c,v 1.5 2000/01/10 03:20:33 knepley Exp $";
#endif
/*
   Defines the application of the multilevel preconditioner
*/
#include "src/sles/pc/pcimpl.h"    /*I "pc.h" I*/
#include "ml.h"

#undef  __FUNCT__
#define __FUNCT__ "DQMV"
int DQMV(char *trans, int numRows, PetscReal *Q, int ldQ, PetscReal *Tau, int numCols, PetscReal *x, PetscReal *y)
{
  /* Matvec with orthogonal matrix Q (elementary reflections)
     Assumes LAPACK representation : Q is lower triangular with diagonal entries = 1 (which are not stored)
  */
  PetscTruth isnormal, istrans;
  PetscReal  dot;
  int        start, end, step;
  int        i, j;
  int        ierr;

  PetscFunctionBegin;
  if (numCols > numRows) SETERRQ(PETSC_ERR_ARG_WRONG, "Number of reflectors cannot exceed the size of Q");

  ierr = PetscMemcpy(y, x, numRows * sizeof(double));                                                     CHKERRQ(ierr);
  ierr = PetscStrcasecmp(trans, "N", &isnormal);                                                          CHKERRQ(ierr);
  ierr = PetscStrcasecmp(trans, "T", &istrans);                                                           CHKERRQ(ierr);
  if (isnormal == PETSC_TRUE) {
    start = numCols-1;
    end   = -1;
    step  = -1;
  } else if (istrans == PETSC_TRUE) {
    start = 0;
    end   = numCols;
    step  = 1;
  } else {
    SETERRQ1(PETSC_ERR_ARG_WRONG, "Application type must be 'N' or 'T', not %s", trans);
  }

  for(j = start; j != end; j += step)
  {
    if (Tau[j] != 0.0)
    {
      /* dot = v^T_j y */
      dot = y[j];
      for(i = j+1; i < numRows; i++)
        dot  += Q[j*ldQ+i]*y[i];
      /* dot = \tau v^T_j y */
      dot  *= Tau[j];
      /* y = (I - \tau v_j v^T_j) y */
      y[j] -= dot;
      for(i = j+1; i < numRows; i++)
        y[i] -= Q[j*ldQ+i]*dot;
    }
  }
  PetscLogFlops(numCols*(2 + numRows*2));
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PCMultiLevelApplyGradient"
/*@C PCMultiLevelApplyGradient
	This function applies the gradient to a vector.

  Input Parameters:
+ pc - The preconditioner context
- x  - The input vector

  Output Parameter:
. y  - The output vector

  Level: intermediate

.keywords multilevel
.seealso PCMultiLevelApplyGradientTrans, PCMultiLevelApplyP, PCMultiLevelApplyPTrans, PCMultiLevelApplyV,
         PCMultiLevelApplyVTrans, PCMultiLevelApplyDInv, PCMultiLevelApplyDInvTrans
@*/
int PCMultiLevelApplyGradient(PC pc, GVec x, GVec y)
{
  PC_Multilevel *ml;
  int            size, rows, cols;
  int            ierr;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(pc, PC_COOKIE);
  PetscValidHeaderSpecific(x,  VEC_COOKIE);
  PetscValidHeaderSpecific(y,  VEC_COOKIE);
  if (pc->setupcalled < 2) {
    ierr = PCSetUp(pc);                                                                                   CHKERRQ(ierr);
  }

  ml = (PC_Multilevel *) pc->data;
#ifdef PETSC_USE_BOPT_g
  if (ml->useMath == PETSC_FALSE) {
    ierr = PCValidQ_Multilevel(pc);                                                                       CHKERRQ(ierr);
  }
#endif
  ierr = VecGetSize(y, &size);                                                                            CHKERRQ(ierr);
  if (ml->B != PETSC_NULL) {
    ierr = MatGetSize(ml->B, &rows, &cols);                                                               CHKERRQ(ierr);
  }
  if ((ml->B != PETSC_NULL) && (size == rows)) {
    ierr = MatMult(ml->B, x, y);                                                                          CHKERRQ(ierr);
  } else {
    ierr = GVecEvaluateOperatorGalerkinRectangular(y, x, ml->sOrder, ml->sLocOrder, ml->tOrder, ml->tLocOrder,
                                                   ml->gradOp, ml->gradAlpha, PETSC_NULL);
                                                                                                          CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PCMultiLevelApplyGradientTrans"
/*@C PCMultiLevelApplyGradientTrans
	This function applies the transpose of the
  gradient to a vector.

  Input Parameters:
+ pc - The preconditioner context
- x  - The input vector

  Output Parameter:
. y  - The output vector

  Level: intermediate

.keywords multilevel
.seealso PCMultiLevelApplyGradient, PCMultiLevelApplyP, PCMultiLevelApplyPTrans, PCMultiLevelApplyV,
         PCMultiLevelApplyVTrans, PCMultiLevelApplyDInv, PCMultiLevelApplyDInvTrans
@*/
int PCMultiLevelApplyGradientTrans(PC pc, GVec x, GVec y)
{
  PC_Multilevel *ml;
  int            size, rows, cols;
  int            ierr;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(pc, PC_COOKIE);
  PetscValidHeaderSpecific(x,  VEC_COOKIE);
  PetscValidHeaderSpecific(y,  VEC_COOKIE);
  if (pc->setupcalled < 2) {
    ierr = PCSetUp(pc);                                                                                   CHKERRQ(ierr);
  }

  ml = (PC_Multilevel *) pc->data;
#ifdef PETSC_USE_BOPT_g
  if (ml->useMath == PETSC_FALSE) {
    ierr = PCValidQ_Multilevel(pc);                                                                       CHKERRQ(ierr);
  }
#endif
  ierr = VecGetSize(x, &size);                                                                            CHKERRQ(ierr);
  if (ml->B != PETSC_NULL) {
    ierr = MatGetSize(ml->B, &rows, &cols);                                                               CHKERRQ(ierr);
  }
  if ((ml->B != PETSC_NULL) && (size == rows)) {
    ierr = MatMultTranspose(ml->B, x, y);                                                                 CHKERRQ(ierr);
  } else {
    ierr = GVecEvaluateOperatorGalerkinRectangular(y, x, ml->tOrder, ml->tLocOrder, ml->sOrder, ml->sLocOrder,
                                                   ml->divOp, ml->gradAlpha, PETSC_NULL);
                                                                                                          CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}

/* This is for just applying the interior portions of D^{-1} */
int PCMultiLevelApplyDInvLoc_Private(PC pc, GVec x, GVec y)
{
#ifdef HAVE_MATHEMATICA
  MLINK          link;
#endif
  PC_Multilevel *ml = (PC_Multilevel *) pc->data;
  double         zeroTol = ml->zeroTol;
  PetscScalar   *yArray;
  PetscReal     *invSingVals;
  int           *colIndices;
  int            level, part, col;
  int            ierr;

  PetscFunctionBegin;
  /* Apply D^{-1} which is block diagonal, so we just split the vector and apply each local matrix */
  if (ml->useMath == PETSC_FALSE)
  {
#ifdef PETSC_USE_BOPT_g
    ierr = PCValidQ_Multilevel(pc);                                                                       CHKERRQ(ierr);
#endif
    if (x != y)
      {ierr = VecCopy(x, y);                                                                              CHKERRQ(ierr);}
    /* Apply D^{-1} for each level */
    for(level = 0; level < ml->numLevels; level++)
    {
      /* Apply D^{-1} for each partition */
      for(part = 0; part < ml->numPartitions[level]; part++)
      {
        colIndices  = ml->colPartition[level][part];
        invSingVals = ml->factors[level][part][FACT_DINV];
        /* Here, null singular values are replaced by 1 instead of zero since these columns
           are carried to the next level */
        ierr = VecGetArray(y, &yArray);                                                                   CHKERRQ(ierr); 
        for(col = 0; col < ml->numPartitionCols[level][part]; col++)
          if (invSingVals[col] > zeroTol)
            yArray[colIndices[col]] *= invSingVals[col];
        ierr = VecRestoreArray(y, &yArray);                                                               CHKERRQ(ierr); 
        PetscLogFlops(ml->numPartitionCols[level][part]);
      }
    }
  }
  else
  {
#ifdef HAVE_MATHEMATICA
    /* The link to Mathematica */
    ierr = PetscViewerMathematicaGetLink(ml->mathViewer, &link);                                          CHKERRQ(ierr);

    /* vec1 = input vector */
    ierr = PetscViewerMathematicaSetName(ml->mathViewer, "vec1");                                         CHKERRQ(ierr);
    ierr = VecView(x, ml->mathViewer);                                                                    CHKERRQ(ierr);

    /* vec2 = DInverseApply[mattML,vec] */
    MLPutFunction(link, "EvaluatePacket", 1);
      MLPutFunction(link, "Set", 2);
        MLPutSymbol(link, "vec2");
        MLPutFunction(link, "DInverseApply", 2);
          MLPutSymbol(link, "mattML");
          MLPutSymbol(link, "vec1");
    MLEndPacket(link);
    /* Skip packets until ReturnPacket */
    ierr = PetscViewerMathematicaSkipPackets(ml->mathViewer, RETURNPKT);                                  CHKERRQ(ierr);
    /* Skip ReturnPacket */
    MLNewPacket(link);

    /* y = vec2 */
    ierr = PetscViewerMathematicaSetName(ml->mathViewer, "vec2");                                         CHKERRQ(ierr);
    ierr = PetscViewerMathematicaGetVector(ml->mathViewer, y);                                            CHKERRQ(ierr);
    ierr = PetscViewerMathematicaClearName(ml->mathViewer);                                               CHKERRQ(ierr);
#endif
  }
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PCMultiLevelApplyDInv"
/*@C PCMultiLevelApplyDInv
	This function applies the inverse of D to a vector.

  Input Parameters:
+ pc - The preconditioner context
- x  - The input vector

  Output Parameter:
. y  - The output vector

  Level: intermediate

.keywords multilevel
.seealso PCMultiLevelApplyGradient, PCMultiLevelApplyGradientTrans, PCMultiLevelApplyP, PCMultiLevelApplyPTrans,
         PCMultiLevelApplyV, PCMultiLevelApplyVTrans, PCMultiLevelApplyDInvTrans
@*/
int PCMultiLevelApplyDInv(PC pc, GVec x, GVec y)
{
  PC_Multilevel *ml;
  PetscScalar   *rhsArray;
  int            numProcs, rank;
#ifdef PETSC_HAVE_PLAPACK
  PetscScalar    zero = 0.0;
  double         one  = 1.0;
  PLA_Obj        globalR = PETSC_NULL;
#else
  int            numRhs;
#endif
  int            ierr;

  PetscFunctionBegin;
  /* Setup the PC context */
  PetscValidHeaderSpecific(pc, PC_COOKIE);
  PetscValidHeaderSpecific(x,  VEC_COOKIE);
  PetscValidHeaderSpecific(y,  VEC_COOKIE);
  if (pc->setupcalled < 2)
  {
    ierr = PCSetUp(pc);                                                                                   CHKERRQ(ierr);
  }

  /* Scatter in interface vector now since D^{-1} zeros out null space rows */
  ml = (PC_Multilevel *) pc->data;
  ierr = MPI_Comm_size(pc->comm, &numProcs);                                                              CHKERRQ(ierr);
  ierr = MPI_Comm_rank(pc->comm, &rank);                                                                  CHKERRQ(ierr);
  if (numProcs > 1)
  {
    ierr = VecScatterBegin(x, ml->interfaceColRhs, INSERT_VALUES, SCATTER_FORWARD, ml->interfaceColScatter); CHKERRQ(ierr);
    ierr = VecScatterEnd(x, ml->interfaceColRhs, INSERT_VALUES, SCATTER_FORWARD, ml->interfaceColScatter);   CHKERRQ(ierr);
  }

  /* Apply interior portion of D^{-1} */
  ierr = PCMultiLevelApplyDInvLoc_Private(pc, x, y);                                                      CHKERRQ(ierr);

  /* Apply R^{-1} from the QR of the interface matrix */
#ifdef PETSC_HAVE_PLAPACK
  if (numProcs > 1)
  {
    /* Put result in y */
    ierr = VecGetArray(ml->interfaceColRhs, &rhsArray);                                                   CHKERRQ(ierr);
    ierr = PLA_Obj_set_to_zero(ml->PLArhsD);                                                              CHKERRQ(ierr);
    ierr = PLA_API_begin();                                                                               CHKERRQ(ierr);
    ierr = PLA_Obj_API_open(ml->PLArhsD);                                                                 CHKERRQ(ierr);
    ierr = PLA_API_axpy_vector_to_global(ml->numLocNullCols, &one, rhsArray, 1, ml->PLArhsD, ml->firstNullCol[rank]);
                                                                                                          CHKERRQ(ierr);
    ierr = PLA_Obj_API_close(ml->PLArhsD);                                                                CHKERRQ(ierr);
    ierr = PLA_API_end();                                                                                 CHKERRQ(ierr);
    /* Solve y <-- R^{-1} y */
    ierr = PLA_Obj_horz_split_2(ml->interfaceQR, ml->numNullCols, &globalR, PLA_DUMMY);                   CHKERRQ(ierr);
    ierr = PLA_Trsv(PLA_UPPER_TRIANGULAR, PLA_NO_TRANSPOSE, PLA_NONUNIT_DIAG, globalR, ml->PLArhsD);      CHKERRQ(ierr);
    ierr = PLA_Obj_free(&globalR);                                                                        CHKERRQ(ierr);
    /* Get result from y */
    ierr = VecSet(&zero, ml->interfaceColRhs);                                                            CHKERRQ(ierr);
    ierr = PLA_API_begin();                                                                               CHKERRQ(ierr);
    ierr = PLA_Obj_API_open(ml->PLArhsD);                                                                 CHKERRQ(ierr);
    ierr = PLA_API_axpy_global_to_vector(ml->numLocNullCols, &one, ml->PLArhsD, ml->firstNullCol[rank], rhsArray, 1);
                                                                                                          CHKERRQ(ierr);
    ierr = PLA_Obj_API_close(ml->PLArhsD);                                                                CHKERRQ(ierr);
    ierr = PLA_API_end();                                                                                 CHKERRQ(ierr);
    ierr = VecRestoreArray(ml->interfaceColRhs, &rhsArray);                                               CHKERRQ(ierr);
  }
#else
  if ((numProcs > 1) && (rank == 0))
  {
    numRhs = 1;
    ierr = VecGetArray(ml->interfaceColRhs, &rhsArray);                                                   CHKERRQ(ierr);
    LAtrtrs_("U", "N", "N", &ml->numNullCols, &numRhs, ml->interfaceQR, &ml->numInterfaceRows, rhsArray, &ml->numNullCols, &ierr);
                                                                                                          CHKERRQ(ierr);
    ierr = VecRestoreArray(ml->interfaceColRhs, &rhsArray);                                               CHKERRQ(ierr);
    PetscLogFlops((ml->numNullCols*(ml->numNullCols-1))/2 + 2*ml->numNullCols);
  }
#endif

  /* Scatter out interface vector */
  if (numProcs > 1)
  {
    ierr = VecScatterBegin(ml->interfaceColRhs, y, INSERT_VALUES, SCATTER_REVERSE, ml->interfaceColScatter); CHKERRQ(ierr);
    ierr = VecScatterEnd(ml->interfaceColRhs, y, INSERT_VALUES, SCATTER_REVERSE, ml->interfaceColScatter);   CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}

/* This is for just applying the interior portions of D^{-T} */
int PCMultiLevelApplyDInvTransLoc_Private(PC pc, GVec x, GVec y)
{
#ifdef HAVE_MATHEMATICA
  MLINK          link;
#endif
  PC_Multilevel *ml = (PC_Multilevel *) pc->data;
  double         zeroTol = ml->zeroTol;
  PetscScalar   *yArray;
  PetscReal     *invSingVals;
  int           *colIndices;
  int            level, part, col;
  int            ierr;

  PetscFunctionBegin;
  /* Apply D^{-T} which is block diagonal, so we just split the vector and apply each local matrix */
  if (ml->useMath == PETSC_FALSE)
  {
#ifdef PETSC_USE_BOPT_g
    ierr = PCValidQ_Multilevel(pc);                                                                       CHKERRQ(ierr);
#endif
    if (x != y)
      {ierr = VecCopy(x, y);                                                                              CHKERRQ(ierr);}
    /* Apply D^{-T} for each level */
    for(level = ml->numLevels-1; level >= 0; level--)
    {
      /* Apply D^{-T} for each partition */
      for(part = 0; part < ml->numPartitions[level]; part++)
      {
        colIndices  = ml->colPartition[level][part];
        invSingVals = ml->factors[level][part][FACT_DINV];
        /* Here, null singular values are replaced by 1 instead of zero since these columns
           are carried to the next level */
        ierr = VecGetArray(y, &yArray);                                                                   CHKERRQ(ierr); 
        for(col = 0; col < ml->numPartitionCols[level][part]; col++)
          if (invSingVals[col] > zeroTol)
            yArray[colIndices[col]] *= invSingVals[col];
        ierr = VecRestoreArray(y, &yArray);                                                               CHKERRQ(ierr); 
        PetscLogFlops(ml->numPartitionCols[level][part]);
      }
    }
  }
  else
  {
#ifdef HAVE_MATHEMATICA
    /* The link to Mathematica */
    ierr = PetscViewerMathematicaGetLink(ml->mathViewer, &link);                                              CHKERRQ(ierr);

    /* vec1 = input vector */
    ierr = PetscViewerMathematicaSetName(ml->mathViewer, "vec1");                                             CHKERRQ(ierr);
    ierr = VecView(x, ml->mathViewer);                                                                    CHKERRQ(ierr);

    /* vec2 = DInverseApply[mattML,vec] */
    MLPutFunction(link, "EvaluatePacket", 1);
      MLPutFunction(link, "Set", 2);
        MLPutSymbol(link, "vec2");
        MLPutFunction(link, "DInverseTransposeApply", 2);
          MLPutSymbol(link, "mattML");
          MLPutSymbol(link, "vec1");
    MLEndPacket(link);
    /* Skip packets until ReturnPacket */
    ierr = PetscViewerMathematicaSkipPackets(ml->mathViewer, RETURNPKT);                                      CHKERRQ(ierr);
    /* Skip ReturnPacket */
    MLNewPacket(link);

    /* y = vec2 */
    ierr = PetscViewerMathematicaSetName(ml->mathViewer, "vec2");                                             CHKERRQ(ierr);
    ierr = PetscViewerMathematicaGetVector(ml->mathViewer, y);                                                CHKERRQ(ierr);
    ierr = PetscViewerMathematicaClearName(ml->mathViewer);                                                   CHKERRQ(ierr);
#endif
  }
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PCMultiLevelApplyDInvTrans"
/*@C PCMultiLevelApplyDInvTrans
	This function applies the inverse transpose of D to a vector.

  Input Parameters:
+ pc - The preconditioner context
- x  - The input vector

  Output Parameter:
. y  - The output vector

  Level: intermediate

.keywords multilevel
.seealso PCMultiLevelApplyGradient, PCMultiLevelApplyGradientTrans, PCMultiLevelApplyP, PCMultiLevelApplyPTrans,
         PCMultiLevelApplyV, PCMultiLevelApplyVTrans, PCMultiLevelApplyDInv
@*/
int PCMultiLevelApplyDInvTrans(PC pc, GVec x, GVec y)
{
  PC_Multilevel *ml;
  PetscScalar   *rhsArray;
  int            numProcs, rank;
#ifdef PETSC_HAVE_PLAPACK
  PetscScalar    zero = 0.0;
  double         one  = 1.0;
  PLA_Obj        globalR = PETSC_NULL;
#else
  int            numRhs;
#endif
  int            ierr;

  PetscFunctionBegin;
  /* Setup the PC context */
  PetscValidHeaderSpecific(pc, PC_COOKIE);
  PetscValidHeaderSpecific(x,  VEC_COOKIE);
  PetscValidHeaderSpecific(y,  VEC_COOKIE);
  if (pc->setupcalled < 2)
  {
    ierr = PCSetUp(pc);                                                                                   CHKERRQ(ierr);
  }

  /* Scatter in interface vector now since D^{-T} zeros out null space rows */
  ml = (PC_Multilevel *) pc->data;
  ierr = MPI_Comm_size(pc->comm, &numProcs);                                                              CHKERRQ(ierr);
  ierr = MPI_Comm_rank(pc->comm, &rank);                                                                  CHKERRQ(ierr);
  if (numProcs > 1)
  {
    ierr = VecScatterBegin(x, ml->interfaceColRhs, INSERT_VALUES, SCATTER_FORWARD, ml->interfaceColScatter); CHKERRQ(ierr);
    ierr = VecScatterEnd(x, ml->interfaceColRhs, INSERT_VALUES, SCATTER_FORWARD, ml->interfaceColScatter);   CHKERRQ(ierr);
  }

  /* Apply interior portion of D^{-T} */
  ierr = PCMultiLevelApplyDInvTransLoc_Private(pc, x, y);                                                 CHKERRQ(ierr);

  /* Apply R^{-T} from the QR of the interface matrix */
#ifdef PETSC_HAVE_PLAPACK
  if (numProcs > 1)
  {
    /* Put result in y */
    ierr = VecGetArray(ml->interfaceColRhs, &rhsArray);                                                   CHKERRQ(ierr);
    ierr = PLA_Obj_set_to_zero(ml->PLArhsD);                                                              CHKERRQ(ierr);
    ierr = PLA_API_begin();                                                                               CHKERRQ(ierr);
    ierr = PLA_Obj_API_open(ml->PLArhsD);                                                                 CHKERRQ(ierr);
    ierr = PLA_API_axpy_vector_to_global(ml->numLocNullCols, &one, rhsArray, 1, ml->PLArhsD, ml->firstNullCol[rank]);
                                                                                                          CHKERRQ(ierr);
    ierr = PLA_Obj_API_close(ml->PLArhsD);                                                                CHKERRQ(ierr);
    ierr = PLA_API_end();                                                                                 CHKERRQ(ierr);
    /* Solve y <-- R^{-T} y */
    ierr = PLA_Obj_horz_split_2(ml->interfaceQR, ml->numNullCols, &globalR, PLA_DUMMY);                   CHKERRQ(ierr);
    ierr = PLA_Trsv(PLA_UPPER_TRIANGULAR, PLA_TRANSPOSE, PLA_NONUNIT_DIAG, globalR, ml->PLArhsD);         CHKERRQ(ierr);
    ierr = PLA_Obj_free(&globalR);                                                                        CHKERRQ(ierr);
    /* Get result from y */
    ierr = VecSet(&zero, ml->interfaceColRhs);                                                            CHKERRQ(ierr);
    ierr = PLA_API_begin();                                                                               CHKERRQ(ierr);
    ierr = PLA_Obj_API_open(ml->PLArhsD);                                                                 CHKERRQ(ierr);
    ierr = PLA_API_axpy_global_to_vector(ml->numLocNullCols, &one, ml->PLArhsD, ml->firstNullCol[rank], rhsArray, 1);
                                                                                                          CHKERRQ(ierr);
    ierr = PLA_Obj_API_close(ml->PLArhsD);                                                                CHKERRQ(ierr);
    ierr = PLA_API_end();                                                                                 CHKERRQ(ierr);
    ierr = VecRestoreArray(ml->interfaceColRhs, &rhsArray);                                               CHKERRQ(ierr);
  }
#else
  if ((numProcs > 1) && (rank == 0))
  {
    numRhs = 1;
    ierr = VecGetArray(ml->interfaceColRhs, &rhsArray);                                                   CHKERRQ(ierr);
    LAtrtrs_("U", "T", "N", &ml->numNullCols, &numRhs, ml->interfaceQR, &ml->numInterfaceRows, rhsArray, &ml->numNullCols, &ierr);
                                                                                                          CHKERRQ(ierr);
    ierr = VecRestoreArray(ml->interfaceColRhs, &rhsArray);                                               CHKERRQ(ierr);
    PetscLogFlops((ml->numNullCols*(ml->numNullCols-1))/2 + 2*ml->numNullCols);
  }
#endif

  /* Scatter out interface vector */
  if (numProcs > 1)
  {
    ierr = VecScatterBegin(ml->interfaceColRhs, y, INSERT_VALUES, SCATTER_REVERSE, ml->interfaceColScatter); CHKERRQ(ierr);
    ierr = VecScatterEnd(ml->interfaceColRhs, y, INSERT_VALUES, SCATTER_REVERSE, ml->interfaceColScatter);   CHKERRQ(ierr);
  }

  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PCMultiLevelApplyV"
/*@C PCMultiLevelApplyV
	This function applies V to a vector.

  Input Parameters:
+ pc - The preconditioner context
- x  - The input vector

  Output Parameter:
. y  - The output vector

  Level: intermediate

.keywords multilevel
.seealso PCMultiLevelApplyGradient, PCMultiLevelApplyGradientTrans, PCMultiLevelApplyP, PCMultiLevelApplyPTrans,
         PCMultiLevelApplyVTrans, PCMultiLevelApplyDInv, PCMultiLevelApplyDInvTrans
@*/
int PCMultiLevelApplyV(PC pc, GVec x, GVec y)
{
#ifdef HAVE_MATHEMATICA
  MLINK          link;
#endif
  PC_Multilevel *ml;
  PetscScalar   *yArray;
  PetscScalar   *colArray;
  PetscScalar   *colArray2;
  PetscReal     *VArray;
  int           *colIndices;
  PetscScalar    zero = 0.0;
  PetscScalar    one  = 1.0;
  int            dummy;
  int            level, part, dim, col;
  int            ierr;

  PetscFunctionBegin;
  /* Setup the PC context */
  PetscValidHeaderSpecific(pc, PC_COOKIE);
  PetscValidHeaderSpecific(x,  VEC_COOKIE);
  PetscValidHeaderSpecific(y,  VEC_COOKIE);
  if (pc->setupcalled < 2)
  {
    ierr = PCSetUp(pc);                                                                                   CHKERRQ(ierr);
  }

  /* Apply V which is block diagonal, so we just split the vector and apply each local matrix */
  ml = (PC_Multilevel *) pc->data;
  if (ml->useMath == PETSC_FALSE)
  {
#ifdef PETSC_USE_BOPT_g
    ierr = PCValidQ_Multilevel(pc);                                                                       CHKERRQ(ierr);
#endif
    if (x != y)
      {ierr = VecCopy(x, y);                                                                              CHKERRQ(ierr);}
    /* Apply V for each level */
    ierr = VecGetArray(y, &yArray);                                                                       CHKERRQ(ierr); 
    for(level = ml->numLevels-1; level >= 0; level--)
    {
      ierr = VecGetArray(ml->colReduceVecs[level],  &colArray);                                           CHKERRQ(ierr); 
      ierr = VecGetArray(ml->colReduceVecs2[level], &colArray2);                                          CHKERRQ(ierr); 
      /* Apply V for each partition */
      for(part = 0; part < ml->numPartitions[level]; part++)
      {
        colIndices = ml->colPartition[level][part];
        VArray     = ml->factors[level][part][FACT_V];
        dim        = ml->numPartitionCols[level][part];
        /* Scatter into work vector */
        for(col = 0; col < dim; col++)
          colArray[col] = yArray[colIndices[col]];
        dummy = 1;
        LAgemv_("T", &dim, &dim, &one, VArray, &dim, colArray, &dummy, &zero, colArray2, &dummy);
        /* Scatter from work vector */
        for(col = 0; col < dim; col++)
          yArray[colIndices[col]] = colArray2[col];
        PetscLogFlops(2*dim*dim - dim);
      }
      ierr = VecRestoreArray(ml->colReduceVecs[level],  &colArray);                                       CHKERRQ(ierr); 
      ierr = VecRestoreArray(ml->colReduceVecs2[level], &colArray2);                                      CHKERRQ(ierr); 
    }
    ierr = VecRestoreArray(y, &yArray);                                                                   CHKERRQ(ierr); 
  }
  else
  {
#ifdef HAVE_MATHEMATICA
    /* The link to Mathematica */
    ierr = PetscViewerMathematicaGetLink(ml->mathViewer, &link);                                              CHKERRQ(ierr);

    /* vec1 = input vector */
    ierr = PetscViewerMathematicaSetName(ml->mathViewer, "vec1");                                             CHKERRQ(ierr);
    ierr = VecView(x, ml->mathViewer);                                                                    CHKERRQ(ierr);

    /* vec2 = VApply[mattML,vec] */
    MLPutFunction(link, "EvaluatePacket", 1);
      MLPutFunction(link, "Set", 2);
        MLPutSymbol(link, "vec2");
        MLPutFunction(link, "VTransposeApply", 2);
          MLPutSymbol(link, "mattML");
          MLPutSymbol(link, "vec1");
    MLEndPacket(link);
    /* Skip packets until ReturnPacket */
    ierr = PetscViewerMathematicaSkipPackets(ml->mathViewer, RETURNPKT);                                      CHKERRQ(ierr);
    /* Skip ReturnPacket */
    MLNewPacket(link);

    /* y = vec2 */
    ierr = PetscViewerMathematicaSetName(ml->mathViewer, "vec2");                                             CHKERRQ(ierr);
    ierr = PetscViewerMathematicaGetVector(ml->mathViewer, y);                                                CHKERRQ(ierr);
    ierr = PetscViewerMathematicaClearName(ml->mathViewer);                                                   CHKERRQ(ierr);
#endif
  }
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PCMultiLevelApplyVTrans"
/*@C PCMultiLevelApplyVTrans
	This function applies the transpose of V to a vector.

  Input Parameters:
+ pc - The preconditioner context
- x  - The input vector

  Output Parameter:
. y  - The output vector

  Level: intermediate

.keywords multilevel
.seealso PCMultiLevelApplyGradient, PCMultiLevelApplyGradientTrans, PCMultiLevelApplyP, PCMultiLevelApplyPTrans,
         PCMultiLevelApplyV, PCMultiLevelApplyDInv, PCMultiLevelApplyDInvTrans
@*/
int PCMultiLevelApplyVTrans(PC pc, GVec x, GVec y)
{
#ifdef HAVE_MATHEMATICA
  MLINK          link;
#endif
  PC_Multilevel *ml;
  PetscScalar   *yArray;
  PetscScalar   *colArray;
  PetscScalar   *colArray2;
  PetscReal     *VArray;
  int           *colIndices;
  PetscScalar    zero = 0.0;
  PetscScalar    one  = 1.0;
  int            dummy;
  int            level, part, dim, col;
  int            ierr;

  PetscFunctionBegin;
  /* Setup the PC context */
  PetscValidHeaderSpecific(pc, PC_COOKIE);
  PetscValidHeaderSpecific(x,  VEC_COOKIE);
  PetscValidHeaderSpecific(y,  VEC_COOKIE);
  if (pc->setupcalled < 2)
  {
    ierr = PCSetUp(pc);                                                                                   CHKERRQ(ierr);
  }

  /* Apply V^T which is block diagonal, so we just split the vector and apply each local matrix */
  ml = (PC_Multilevel *) pc->data;
  if (ml->useMath == PETSC_FALSE)
  {
#ifdef PETSC_USE_BOPT_g
    ierr = PCValidQ_Multilevel(pc);                                                                       CHKERRQ(ierr);
#endif
    if (x != y)
      {ierr = VecCopy(x, y);                                                                              CHKERRQ(ierr);}
    /* Apply V^T for each level */
    ierr = VecGetArray(y, &yArray);                                                                       CHKERRQ(ierr); 
    for(level = 0; level < ml->numLevels; level++)
    {
      ierr = VecGetArray(ml->colReduceVecs[level],  &colArray);                                           CHKERRQ(ierr); 
      ierr = VecGetArray(ml->colReduceVecs2[level], &colArray2);                                          CHKERRQ(ierr); 
      /* Apply V^T for each partition */
      for(part = 0; part < ml->numPartitions[level]; part++)
      {
        colIndices = ml->colPartition[level][part];
        VArray     = ml->factors[level][part][FACT_V];
        dim        = ml->numPartitionCols[level][part];
        /* Scatter into work vector */
        for(col = 0; col < dim; col++)
          colArray[col] = yArray[colIndices[col]];
        dummy = 1;
        LAgemv_("N", &dim, &dim, &one, VArray, &dim, colArray, &dummy, &zero, colArray2, &dummy);
        /* Scatter from work vector */
        for(col = 0; col < dim; col++)
          yArray[colIndices[col]] = colArray2[col];
        PetscLogFlops(2*dim*dim - dim);
      }
      ierr = VecRestoreArray(ml->colReduceVecs[level],  &colArray);                                       CHKERRQ(ierr); 
      ierr = VecRestoreArray(ml->colReduceVecs2[level], &colArray2);                                      CHKERRQ(ierr); 
    }
    ierr = VecRestoreArray(y, &yArray);                                                                   CHKERRQ(ierr); 
  }
  else
  {
#ifdef HAVE_MATHEMATICA
    /* The link to Mathematica */
    ierr = PetscViewerMathematicaGetLink(ml->mathViewer, &link);                                              CHKERRQ(ierr);

    /* vec1 = input vector */
    ierr = PetscViewerMathematicaSetName(ml->mathViewer, "vec1");                                             CHKERRQ(ierr);
    ierr = VecView(x, ml->mathViewer);                                                                    CHKERRQ(ierr);

    /* vec2 = VApply[mattML,vec] */
    MLPutFunction(link, "EvaluatePacket", 1);
      MLPutFunction(link, "Set", 2);
        MLPutSymbol(link, "vec2");
        MLPutFunction(link, "VApply", 2);
          MLPutSymbol(link, "mattML");
          MLPutSymbol(link, "vec1");
    MLEndPacket(link);
    /* Skip packets until ReturnPacket */
    ierr = PetscViewerMathematicaSkipPackets(ml->mathViewer, RETURNPKT);                                      CHKERRQ(ierr);
    /* Skip ReturnPacket */
    MLNewPacket(link);

    /* y = vec2 */
    ierr = PetscViewerMathematicaSetName(ml->mathViewer, "vec2");                                             CHKERRQ(ierr);
    ierr = PetscViewerMathematicaGetVector(ml->mathViewer, y);                                                CHKERRQ(ierr);
    ierr = PetscViewerMathematicaClearName(ml->mathViewer);                                                   CHKERRQ(ierr);
#endif
  }
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PCMultiLevelApplyP"
/*@C PCMultiLevelApplyP
	This function applies P to a vector.

  Input Parameters:
+ pc - The preconditioner context
- x  - The input vector

  Output Parameter:
. y  - The output vector

  Level: intermediate

.keywords multilevel
.seealso PCMultiLevelApplyGradient, PCMultiLevelApplyGradientTrans, PCMultiLevelApplyPTrans, PCMultiLevelApplyP1,
         PCMultiLevelApplyP1Trans, PCMultiLevelApplyP2, PCMultiLevelApplyP2Trans, PCMultiLevelApplyV
         PCMultiLevelApplyVTrans, PCMultiLevelApplyDInv, PCMultiLevelApplyDInvTrans
@*/
int PCMultiLevelApplyP(PC pc, GVec x, GVec y)
{
#ifdef HAVE_MATHEMATICA
  MLINK          link;
#endif
  PC_Multilevel *ml;
  PetscScalar   *yArray;
  PetscScalar   *rhsArray;
  PetscScalar   *localWorkArray;
  PetscScalar   *interiorArray;
  PetscScalar   *interiorArray2;
  PetscScalar   *bdArray;
  PetscScalar   *colArray;
  PetscScalar   *colArray2;
  PetscReal     *UArray;
  PetscReal     *QRArray;
  PetscReal     *TAUArray;
  PetscReal     *invSingVals;
  PetscReal     *VArray;
  int           *rowIndices;
  int            numParts, numBdRows, numResRows;
  int            partOffset, locColVars;
  int            numProcs, rank;
  int            nullCol, rangeCol;
  PetscScalar    zero =  0.0;
  PetscScalar    one  =  1.0;
  int            dummy;
#ifndef PETSC_HAVE_PLAPACK
#ifdef PETSC_USE_DEBUG
  int            numInterfaceRows;
#endif
#endif
  int            level, part, dim, col, row;
  int            ierr;

  PetscFunctionBegin;
  /* Setup the PC context */
  PetscValidHeaderSpecific(pc, PC_COOKIE);
  PetscValidHeaderSpecific(x,  VEC_COOKIE);
  PetscValidHeaderSpecific(y,  VEC_COOKIE);
  if (pc->setupcalled < 2)
  {
    ierr = PCSetUp(pc);                                                                                   CHKERRQ(ierr);
  }

  /* Initialization */
  ml   = (PC_Multilevel *) pc->data;
  ierr = MPI_Comm_size(pc->comm, &numProcs);                                                              CHKERRQ(ierr);
  ierr = MPI_Comm_rank(pc->comm, &rank);                                                                  CHKERRQ(ierr);

  /* Copy x into y if necessary */
  if (x != y)
    {ierr = VecCopy(x, y);                                                                                CHKERRQ(ierr);}

  /* Calculate interface values */
  if (numProcs > 1)
  {
    ierr = PC_MLLogEventBegin(PC_ML_ApplySymmetricRightParallel, pc, x, y, 0);                            CHKERRQ(ierr);
#ifdef PETSC_HAVE_PLAPACK
    /* Get the interface vector and reduce interface columns */
    ierr = VecScatterBegin(y, ml->interfaceRhs, INSERT_VALUES, SCATTER_FORWARD, ml->interfaceScatter);    CHKERRQ(ierr);
    ierr = VecScatterEnd(y, ml->interfaceRhs, INSERT_VALUES, SCATTER_FORWARD, ml->interfaceScatter);      CHKERRQ(ierr);

    ierr = PC_MLLogEventBegin(PC_ML_ApplyQR, pc, x, y, 0);                                                CHKERRQ(ierr);
    ierr = VecGetArray(ml->interfaceRhs, &rhsArray);                                                      CHKERRQ(ierr);
    /* Put result in x */
    ierr = PLA_Obj_set_to_zero(ml->PLArhsP);                                                              CHKERRQ(ierr);
    ierr = PLA_API_begin();                                                                               CHKERRQ(ierr);
    ierr = PLA_Obj_API_open(ml->PLArhsP);                                                                 CHKERRQ(ierr);
    ierr = PLA_API_axpy_vector_to_global(ml->numLocInterfaceRows, &one, rhsArray, 1, ml->PLArhsP, ml->firstInterfaceRow[rank]);
                                                                                                          CHKERRQ(ierr);
    ierr = PLA_Obj_API_close(ml->PLArhsP);                                                                CHKERRQ(ierr);
    ierr = PLA_API_end();                                                                                 CHKERRQ(ierr);
    /* Apply x <-- Q x */
    ierr = PLA_Q_solve(PLA_SIDE_LEFT, PLA_TRANS, ml->interfaceQR, ml->interfaceTAU, ml->PLArhsP);         CHKERRQ(ierr);
    /* Get result from x */
    ierr = VecSet(&zero, ml->interfaceRhs);                                                               CHKERRQ(ierr);
    ierr = PLA_API_begin();                                                                               CHKERRQ(ierr);
    ierr = PLA_Obj_API_open(ml->PLArhsP);                                                                 CHKERRQ(ierr);
    ierr = PLA_API_axpy_global_to_vector(ml->numLocInterfaceRows, &one, ml->PLArhsP, ml->firstInterfaceRow[rank], rhsArray, 1);
                                                                                                          CHKERRQ(ierr);
    ierr = PLA_Obj_API_close(ml->PLArhsP);                                                                CHKERRQ(ierr);
    ierr = PLA_API_end();                                                                                 CHKERRQ(ierr);
    ierr = VecRestoreArray(ml->interfaceRhs, &rhsArray);                                                  CHKERRQ(ierr);
    ierr = PC_MLLogEventEnd(PC_ML_ApplyQR, pc, x, y, 0);                                                  CHKERRQ(ierr);

    /* Set the interface values */
    ierr = VecScatterBegin(ml->interfaceRhs, y, INSERT_VALUES, SCATTER_REVERSE, ml->interfaceScatter);    CHKERRQ(ierr);
    ierr = VecScatterEnd(ml->interfaceRhs, y, INSERT_VALUES, SCATTER_REVERSE, ml->interfaceScatter);      CHKERRQ(ierr);
#else
    /* Get the interface vector and reduce interface columns */
    ierr = VecScatterBegin(y, ml->locInterfaceRhs, INSERT_VALUES, SCATTER_FORWARD, ml->locInterfaceScatter);CHKERRQ(ierr);
    ierr = VecScatterEnd(y, ml->locInterfaceRhs, INSERT_VALUES, SCATTER_FORWARD, ml->locInterfaceScatter);  CHKERRQ(ierr);
    ierr = VecGetArray(ml->locInterfaceRhs, &rhsArray);                                                   CHKERRQ(ierr);

    ierr = PC_MLLogEventBegin(PC_ML_ApplyQR, pc, x, y, 0);                                                CHKERRQ(ierr);
    if (rank == 0)
    {
      /* Apply Q from the QR of the interface matrix */
      dummy = 1;
#ifdef PETSC_USE_DEBUG
      ierr = VecGetSize(ml->locInterfaceRhs, &numInterfaceRows);                                          CHKERRQ(ierr);
      if (numInterfaceRows != ml->numInterfaceRows) SETERRQ(PETSC_ERR_ARG_WRONG, "Invalid interface vector");
#endif
      LAormqr_("L", "N", &ml->numInterfaceRows, &dummy, &ml->numNullCols, ml->interfaceQR, &ml->numInterfaceRows,
             ml->interfaceTAU, rhsArray, &ml->numInterfaceRows, ml->work, &ml->workLen, &ierr);
                                                                                                          CHKERRQ(ierr);
      PetscLogFlops(ml->numNullCols*(2 + ml->numInterfaceRows*2));
    }
    ierr = PC_MLLogEventEnd(PC_ML_ApplyQR, pc, x, y, 0);                                                  CHKERRQ(ierr);

    /* Set the interface values */
    ierr = VecRestoreArray(ml->locInterfaceRhs, &rhsArray);                                               CHKERRQ(ierr);
    ierr = VecScatterBegin(ml->locInterfaceRhs, y, INSERT_VALUES, SCATTER_REVERSE, ml->locInterfaceScatter);CHKERRQ(ierr);
    ierr = VecScatterEnd(ml->locInterfaceRhs, y, INSERT_VALUES, SCATTER_REVERSE, ml->locInterfaceScatter);  CHKERRQ(ierr);

    /* Retrieve the local interface columns */
    ierr = VecScatterBegin(y, ml->interfaceRhs, INSERT_VALUES, SCATTER_FORWARD, ml->interfaceScatter);    CHKERRQ(ierr);
    ierr = VecScatterEnd(y, ml->interfaceRhs, INSERT_VALUES, SCATTER_FORWARD, ml->interfaceScatter);      CHKERRQ(ierr);
#endif

    /* Multiply x by B^T_I */
    ierr = MatMultTranspose(ml->interfaceB, ml->interfaceRhs, ml->colWorkVec);                            CHKERRQ(ierr);

    /* Multiply B_p^(IT) x by V^T_p */
    ierr = PCMultiLevelApplyVTrans(pc, ml->colWorkVec, ml->colWorkVec);                                   CHKERRQ(ierr);

    /* Multiply V^T_p B_p^(IT) x by -D_p^{-T} */
    ierr = PCMultiLevelApplyDInvTransLoc_Private(pc, ml->colWorkVec, ml->colWorkVec);                     CHKERRQ(ierr);

    /* Reduce rows */
    ierr = VecGetArray(ml->colWorkVec,         &localWorkArray);                                          CHKERRQ(ierr);
    ierr = VecGetArray(y,                      &yArray);                                                  CHKERRQ(ierr);
    ierr = VarOrderingGetLocalSize(ml->sOrder, &locColVars);                                              CHKERRQ(ierr);
    for(col = 0, nullCol = 0, rangeCol = 0; col < locColVars; col++)
      if (ml->nullCols[nullCol] == col)
        /* yArray[ml->nullCols[nullCol++]] -= localWorkArray[col]; */
        nullCol++;
      else
        yArray[ml->range[rangeCol++]]   -= localWorkArray[col];
    PetscLogFlops(rangeCol);
    if ((rangeCol != ml->localRank) || (nullCol != ml->numLocNullCols)) {
      SETERRQ(PETSC_ERR_ARG_WRONG, "Invalid range space");
    }
    ierr = VecRestoreArray(ml->colWorkVec, &localWorkArray);                                              CHKERRQ(ierr);
    ierr = VecRestoreArray(y,              &yArray);                                                      CHKERRQ(ierr);
    /* Here the new interface values are in y, and the old interior values */

    /* Scatter in interior values */
    ierr = VecScatterBegin(y, ml->interiorRhs, INSERT_VALUES, SCATTER_FORWARD, ml->interiorScatter);      CHKERRQ(ierr);
    ierr = VecScatterEnd(y, ml->interiorRhs, INSERT_VALUES, SCATTER_FORWARD, ml->interiorScatter);        CHKERRQ(ierr);
    ierr = PC_MLLogEventEnd(PC_ML_ApplySymmetricRightParallel, pc, x, y, 0);                              CHKERRQ(ierr);
  } else {
    ml->interiorRhs = y;
  }

  /* Apply P */
  if (ml->useMath == PETSC_FALSE)
  {
#ifdef PETSC_USE_BOPT_g
    ierr = PCValidQ_Multilevel(pc);                                                                       CHKERRQ(ierr);
#endif
    /* Apply P for each level */
    ierr = VecGetArray(ml->interiorRhs, &rhsArray);                                                       CHKERRQ(ierr); 
    interiorArray  = ml->interiorWork;
    interiorArray2 = ml->interiorWork2;
    for(level = ml->numLevels-1; level >= 0; level--)
    {
      numParts = ml->numPartitions[level];
      /* Scatter in boundary rows */
      ierr = VecGetArray(ml->bdReduceVecs[level], &bdArray);                                              CHKERRQ(ierr);
      rowIndices = ml->rowPartition[level][PART_ROW_BD][0];
      numBdRows  = ml->numPartitionRows[level][numParts];
      for(row = 0; row < numBdRows; row++)
        bdArray[row] = rhsArray[rowIndices[row]];
      /* Scatter in residual rows */
      rowIndices = ml->rowPartition[level][PART_ROW_RES][0];
      numResRows = ml->numPartitionRows[level][numParts+1];
      for(row = 0; row < numResRows; row++)
        bdArray[row+numBdRows] = rhsArray[rowIndices[row]];
      ierr = VecRestoreArray(ml->bdReduceVecs[level], &bdArray);                                          CHKERRQ(ierr);
      /* Create B^T_\Gamma x */
      ierr = MatMultTranspose(ml->grads[level], ml->bdReduceVecs[level], ml->colReduceVecs[level]);       CHKERRQ(ierr);
      if (numBdRows+numResRows == 0)
      {
        /* If ml->grads[level] has no rows, the default behavior is to leave ml->colReduceVecs[level] untouched */
        ierr = VecSet(&zero, ml->colReduceVecs[level]);                                                   CHKERRQ(ierr);
      }
      /* Reduce interior columns using / I   -D^{-T} V^T B^T_\Gamma \   and apply U
                                       \ 0              I           /                  */
      ierr = VecGetArray(ml->colReduceVecs[level],  &colArray);                                           CHKERRQ(ierr); 
      ierr = VecGetArray(ml->colReduceVecs2[level], &colArray2);                                          CHKERRQ(ierr); 
      for(part = 0, partOffset = 0; part < numParts; part++)
      {
        /* Apply V^T */
        VArray = ml->factors[level][part][FACT_V];
        dim    = ml->numPartitionCols[level][part];
        dummy  = 1;
        LAgemv_("N", &dim, &dim, &one, VArray, &dim, &colArray[partOffset], &dummy, &zero, colArray2, &dummy);
        PetscLogFlops(2*dim*dim - dim);
        partOffset += dim;
        /* Apply D^{-T} and reduce, since we take D as rectangular we must watch out for the dimension */
        rowIndices  = ml->rowPartition[level][PART_ROW_INT][part];
        invSingVals = ml->factors[level][part][FACT_DINV];
        dim         = PetscMin(ml->numPartitionCols[level][part], ml->numPartitionRows[level][part]);
        for(col = 0; col < dim; col++)
          rhsArray[rowIndices[col]] -= colArray2[col] * invSingVals[col];
        PetscLogFlops(2*dim);
        /* Apply U */
        UArray = ml->factors[level][part][FACT_U];
        dim    = ml->numPartitionRows[level][part];
        if (dim > 0) {
          col = ml->numPartitionCols[level][part];
          if (PCMultiLevelDoQR_Private(pc, dim, col) == PETSC_TRUE) {
            QRArray  = ml->factors[level][part][FACT_QR];
            TAUArray = ml->factors[level][part][FACT_TAU];
            /* Scatter into work vector */
            for(row = 0; row < dim; row++) interiorArray[row] = rhsArray[rowIndices[row]];
            dummy = 1;
            LAgemv_("N", &col, &col, &one, UArray, &col, interiorArray, &dummy, &zero, interiorArray2, &dummy);
            PetscLogFlops(2*col*col - col);
            ierr = PetscMemcpy(interiorArray2+col, interiorArray+col, (dim - col) * sizeof(double));      CHKERRQ(ierr);
            DQMV("N", dim, QRArray, dim, TAUArray, col, interiorArray2, interiorArray);
            /* Scatter from work vector */
            for(row = 0; row < dim; row++)
              rhsArray[rowIndices[row]] = interiorArray[row];
          }
          else
          {
            /* Scatter into work vector */
            for(row = 0; row < dim; row++)
              interiorArray[row] = rhsArray[rowIndices[row]];
            dummy = 1;
            LAgemv_("N", &dim, &dim, &one, UArray, &dim, interiorArray, &dummy, &zero, interiorArray2, &dummy);
            PetscLogFlops(2*dim*dim - dim);
            /* Scatter from work vector */
            for(row = 0; row < dim; row++)
              rhsArray[rowIndices[row]] = interiorArray2[row];
          }
        }
      }
      ierr = VecRestoreArray(ml->colReduceVecs[level],  &colArray);                                       CHKERRQ(ierr); 
      ierr = VecRestoreArray(ml->colReduceVecs2[level], &colArray2);                                      CHKERRQ(ierr); 
    }
    ierr = VecRestoreArray(ml->interiorRhs, &rhsArray);                                                   CHKERRQ(ierr);
  }
  else
  {
#ifdef HAVE_MATHEMATICA
    /* The link to Mathematica */
    ierr = PetscViewerMathematicaGetLink(ml->mathViewer, &link);                                              CHKERRQ(ierr);

    /* vec1 = input vector */
    ierr = PetscViewerMathematicaSetName(ml->mathViewer, "vec1");                                             CHKERRQ(ierr);
    ierr = VecView(ml->interiorRhs, ml->mathViewer);                                                      CHKERRQ(ierr);

    /* vec2 = PApply[mattML,vec] */
    MLPutFunction(link, "EvaluatePacket", 1);
      MLPutFunction(link, "Set", 2);
        MLPutSymbol(link, "vec2");
        MLPutFunction(link, "PApply", 2);
          MLPutSymbol(link, "mattML");
          MLPutSymbol(link, "vec1");
    MLEndPacket(link);
    /* Skip packets until ReturnPacket */
    ierr = PetscViewerMathematicaSkipPackets(ml->mathViewer, RETURNPKT);                                  CHKERRQ(ierr);
    /* Skip ReturnPacket */
    MLNewPacket(link);

    /* y = vec2 */
    ierr = PetscViewerMathematicaSetName(ml->mathViewer, "vec2");                                         CHKERRQ(ierr);
    ierr = PetscViewerMathematicaGetVector(ml->mathViewer,  ml->interiorRhs);                             CHKERRQ(ierr);
    ierr = PetscViewerMathematicaClearName(ml->mathViewer);                                               CHKERRQ(ierr);
#endif
  }

  /* Scatter back interior values */
  if (numProcs > 1)
  {
    ierr = PC_MLLogEventBegin(PC_ML_ApplySymmetricRightParallel, pc, x, y, 0);                            CHKERRQ(ierr);
    ierr = VecScatterBegin(ml->interiorRhs, y, INSERT_VALUES, SCATTER_REVERSE, ml->interiorScatter);      CHKERRQ(ierr);
    ierr = VecScatterEnd(ml->interiorRhs, y, INSERT_VALUES, SCATTER_REVERSE, ml->interiorScatter);        CHKERRQ(ierr);
    ierr = PC_MLLogEventEnd(PC_ML_ApplySymmetricRightParallel, pc, x, y, 0);                              CHKERRQ(ierr);
  }

  /* Scale by the diagonal of A */
  if (ml->diag != PETSC_NULL) {
    ierr = VecPointwiseMult(y, ml->diag, y);                                                              CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PCMultiLevelApplyPTrans"
/*@C PCMultiLevelApplyPTrans
	This function applies the transpose of P a vector.

  Input Parameters:
+ pc - The preconditioner context
- x  - The input vector

  Output Parameter:
. y  - The output vector

  Level: intermediate

.keywords multilevel
.seealso PCMultiLevelApplyGradient, PCMultiLevelApplyGradientTrans, PCMultiLevelApplyP, PCMultiLevelApplyP1,
         PCMultiLevelApplyP1Trans, PCMultiLevelApplyP2, PCMultiLevelApplyP2Trans, PCMultiLevelApplyV
         PCMultiLevelApplyVTrans, PCMultiLevelApplyDInv, PCMultiLevelApplyDInvTrans
@*/
int PCMultiLevelApplyPTrans(PC pc, GVec x, GVec y)
{
#ifdef HAVE_MATHEMATICA
  MLINK          link;
#endif
  PC_Multilevel *ml;
  PetscScalar   *yArray;
  PetscScalar   *rhsArray;
  PetscScalar   *localWorkArray;
  PetscScalar   *interiorArray;
  PetscScalar   *interiorArray2;
  PetscScalar   *bdArray;
  PetscScalar   *colArray;
  PetscScalar   *colArray2;
  PetscReal     *UArray;
  PetscReal     *QRArray;
  PetscReal     *TAUArray;
  PetscReal     *invSingVals;
  PetscReal     *VArray;
  int           *rowIndices;
  int            numParts, numBdRows, numResRows;
  int            partOffset, locColVars;
  int            numProcs, rank;
  int            nullCol, rangeCol;
  PetscScalar    minusOne = -1.0;
  PetscScalar    zero     =  0.0;
  PetscScalar    one      =  1.0;
  int            dummy;
#ifndef PETSC_HAVE_PLAPACK
#ifdef PETSC_USE_DEBUG
  int            numInterfaceRows;
#endif
#endif
  int            level, part, dim, row, col;
  int            ierr;

  PetscFunctionBegin;
  /* Setup the PC context */
  PetscValidHeaderSpecific(pc, PC_COOKIE);
  PetscValidHeaderSpecific(x,  VEC_COOKIE);
  PetscValidHeaderSpecific(y,  VEC_COOKIE);
  if (pc->setupcalled < 2)
  {
    ierr = PCSetUp(pc);                                                                                   CHKERRQ(ierr);
  }

  /* Initialization */
  ml   = (PC_Multilevel *) pc->data;
  ierr = MPI_Comm_size(pc->comm, &numProcs);                                                              CHKERRQ(ierr);
  ierr = MPI_Comm_rank(pc->comm, &rank);                                                                  CHKERRQ(ierr);

  if (ml->diag != PETSC_NULL) {
    /* Scale by the diagonal of A */
    ierr = VecPointwiseMult(x, ml->diag, y);                                                              CHKERRQ(ierr);
  } else if (x != y) {
    /* Copy x into y if necessary */
    ierr = VecCopy(x, y);                                                                                 CHKERRQ(ierr);
  }

  /* Scatter in interior values */
  if (numProcs > 1)
  {
    ierr = PC_MLLogEventBegin(PC_ML_ApplySymmetricLeftParallel, pc, x, y, 0);                             CHKERRQ(ierr);
    ierr = VecScatterBegin(y, ml->interiorRhs, INSERT_VALUES, SCATTER_FORWARD, ml->interiorScatter);      CHKERRQ(ierr);
    ierr = VecScatterEnd(y, ml->interiorRhs, INSERT_VALUES, SCATTER_FORWARD, ml->interiorScatter);        CHKERRQ(ierr);
    ierr = PC_MLLogEventEnd(PC_ML_ApplySymmetricLeftParallel, pc, x, y, 0);                               CHKERRQ(ierr);
  }
  else
  {
    ml->interiorRhs = y;
  }

  /* Apply P^T */
  if (ml->useMath == PETSC_FALSE)
  {
#ifdef PETSC_USE_BOPT_g
    ierr = PCValidQ_Multilevel(pc);                                                                       CHKERRQ(ierr);
#endif
    /* Apply P^T for each level */
    ierr = VecGetArray(ml->interiorRhs, &rhsArray);                                                       CHKERRQ(ierr); 
    interiorArray  = ml->interiorWork;
    interiorArray2 = ml->interiorWork2;
    for(level = 0; level < ml->numLevels; level++)
    {
      numParts = ml->numPartitions[level];
      ierr = VecGetArray(ml->colReduceVecs[level],  &colArray);                                           CHKERRQ(ierr); 
      ierr = VecGetArray(ml->colReduceVecs2[level], &colArray2);                                          CHKERRQ(ierr); 
      /* Apply U^T for each partition and form V D^{-1} */
      for(part = 0, partOffset = 0; part < numParts; part++)
      {
        /* Apply U^T */
        rowIndices = ml->rowPartition[level][PART_ROW_INT][part];
        UArray     = ml->factors[level][part][FACT_U];
        dim        = ml->numPartitionRows[level][part];
        if (dim  > 0)
        {
          col = ml->numPartitionCols[level][part];
          if (PCMultiLevelDoQR_Private(pc, dim, col) == PETSC_TRUE)
          {
            QRArray  = ml->factors[level][part][FACT_QR];
            TAUArray = ml->factors[level][part][FACT_TAU];
            /* Scatter into work vector */
            for(row = 0; row < dim; row++)
              interiorArray[row] = rhsArray[rowIndices[row]];
            DQMV("T", dim, QRArray, dim, TAUArray, col, interiorArray, interiorArray2);
            dummy = 1;
            LAgemv_("T", &col, &col, &one, UArray, &col, interiorArray2, &dummy, &zero, interiorArray, &dummy);
            PetscLogFlops(2*col*col - col);
            ierr = PetscMemcpy(interiorArray+col, interiorArray2+col, (dim - col) * sizeof(double));      CHKERRQ(ierr);
            /* Scatter from work vector */
            for(row = 0; row < dim; row++)
              rhsArray[rowIndices[row]] = interiorArray[row];
          }
          else
          {
            /* Scatter into work vector */
            for(row = 0; row < dim; row++)
              interiorArray[row] = rhsArray[rowIndices[row]];
            dummy = 1;
            LAgemv_("T", &dim, &dim, &one, UArray, &dim, interiorArray, &dummy, &zero, interiorArray2, &dummy);
            PetscLogFlops(2*dim*dim - dim);
            /* Scatter from work vector */
            for(row = 0; row < dim; row++)
              rhsArray[rowIndices[row]] = interiorArray2[row];
          }
        }
        /* Apply D^{-1}, since we take D as rectangular we must watch out for the dimension */
        invSingVals = ml->factors[level][part][FACT_DINV];
        dim         = PetscMin(ml->numPartitionCols[level][part], ml->numPartitionRows[level][part]);
        ierr = PetscMemzero(colArray2, ml->numPartitionCols[level][part] * sizeof(double));               CHKERRQ(ierr);
        for(col = 0; col < dim; col++)
          colArray2[col] = rhsArray[rowIndices[col]] * invSingVals[col];
        PetscLogFlops(dim);
        /* Apply V */
        VArray = ml->factors[level][part][FACT_V];
        dim    = ml->numPartitionCols[level][part];
        dummy = 1;
        LAgemv_("T", &dim, &dim, &one, VArray, &dim, colArray2, &dummy, &zero, &colArray[partOffset], &dummy);
        PetscLogFlops(2*dim*dim - dim);
        partOffset += dim;
      }
      ierr = VecRestoreArray(ml->colReduceVecs[level],  &colArray);                                       CHKERRQ(ierr); 
      ierr = VecRestoreArray(ml->colReduceVecs2[level], &colArray2);                                      CHKERRQ(ierr); 

      /* Reduce boundary columns using /          I           0 \
                                       \ -B_\Gamma V D^{-1}   I / */
      ierr = MatMult(ml->grads[level], ml->colReduceVecs[level], ml->bdReduceVecs[level]);                CHKERRQ(ierr);
      ierr = VecGetArray(ml->bdReduceVecs[level], &bdArray);                                              CHKERRQ(ierr);
      /* Update boundary rows */
      rowIndices = ml->rowPartition[level][PART_ROW_BD][0];
      numBdRows  = ml->numPartitionRows[level][numParts];
      for(row = 0; row < numBdRows; row++)
        rhsArray[rowIndices[row]] -= bdArray[row];
      /* Update residual rows */
      rowIndices = ml->rowPartition[level][PART_ROW_RES][0];
      numResRows = ml->numPartitionRows[level][numParts+1];
      for(row = 0; row < numResRows; row++)
        rhsArray[rowIndices[row]] -= bdArray[row+numBdRows];
      PetscLogFlops(numBdRows+numResRows);
      ierr = VecRestoreArray(ml->bdReduceVecs[level], &bdArray);                                          CHKERRQ(ierr);
    }
    ierr = VecRestoreArray(ml->interiorRhs, &rhsArray);                                                   CHKERRQ(ierr);
  }
  else
  {
#ifdef HAVE_MATHEMATICA
    /* The link to Mathematica */
    ierr = PetscViewerMathematicaGetLink(ml->mathViewer, &link);                                              CHKERRQ(ierr);

    /* vec1 = input vector */
    ierr = PetscViewerMathematicaSetName(ml->mathViewer, "vec1");                                             CHKERRQ(ierr);
    ierr = VecView(ml->interiorRhs, ml->mathViewer);                                                      CHKERRQ(ierr);

    /* vec2 = PApply[mattML,vec] */
    MLPutFunction(link, "EvaluatePacket", 1);
      MLPutFunction(link, "Set", 2);
        MLPutSymbol(link, "vec2");
        MLPutFunction(link, "PTransposeApply", 2);
          MLPutSymbol(link, "mattML");
          MLPutSymbol(link, "vec1");
    MLEndPacket(link);
    /* Skip packets until ReturnPacket */
    ierr = PetscViewerMathematicaSkipPackets(ml->mathViewer, RETURNPKT);                                      CHKERRQ(ierr);
    /* Skip ReturnPacket */
    MLNewPacket(link);

    /* y = vec2 */
    ierr = PetscViewerMathematicaSetName(ml->mathViewer, "vec2");                                         CHKERRQ(ierr);
    ierr = PetscViewerMathematicaGetVector(ml->mathViewer,  ml->interiorRhs);                             CHKERRQ(ierr);
    ierr = PetscViewerMathematicaClearName(ml->mathViewer);                                               CHKERRQ(ierr);
#endif
  }

  if (numProcs > 1)
  {
    ierr = PC_MLLogEventBegin(PC_ML_ApplySymmetricLeftParallel, pc, x, y, 0);                             CHKERRQ(ierr);
    /* Scatter back interior values */
    ierr = VecScatterBegin(ml->interiorRhs, y, INSERT_VALUES, SCATTER_REVERSE, ml->interiorScatter);      CHKERRQ(ierr);
    ierr = VecScatterEnd(ml->interiorRhs, y, INSERT_VALUES, SCATTER_REVERSE, ml->interiorScatter);        CHKERRQ(ierr);

    /* Calculate interface values */
    /* Apply (I - D^{-1} D): Scatter some of the interior of y into a column work vector */
    ierr = VecGetArray(y,                      &yArray);                                                  CHKERRQ(ierr);
    ierr = VecGetArray(ml->colWorkVec,         &localWorkArray);                                          CHKERRQ(ierr);
    ierr = VarOrderingGetLocalSize(ml->sOrder, &locColVars);                                              CHKERRQ(ierr);
    ierr = PetscMemzero(localWorkArray, locColVars * sizeof(PetscScalar));                                CHKERRQ(ierr);
    for(col = 0, nullCol = 0, rangeCol = 0; col < locColVars; col++)
      if (ml->nullCols[nullCol] == col)
        /* localWorkArray[col] = yArray[ml->nullCols[nullCol++]]; */
        nullCol++;
      else
        localWorkArray[col] = yArray[ml->range[rangeCol++]];
    if ((rangeCol != ml->localRank) || (nullCol != ml->numLocNullCols)) {
      SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid range space");
    }
    ierr = VecRestoreArray(y,                    &yArray);                                                CHKERRQ(ierr);
    ierr = VecRestoreArray(ml->colWorkVec,       &localWorkArray);                                        CHKERRQ(ierr);

    /* Multiply y by D_p^{-1} */
    ierr = PCMultiLevelApplyDInvLoc_Private(pc, ml->colWorkVec, ml->colWorkVec);                          CHKERRQ(ierr);

    /* Multiply (I - D_p^{-1} D_p) y by V_p */
    ierr = PCMultiLevelApplyV(pc, ml->colWorkVec, ml->colWorkVec);                                        CHKERRQ(ierr);

    /* Multiply V (I - D^{-1} D) y by -B_I */
    ierr = MatMult(ml->interfaceB, ml->colWorkVec, ml->interfaceRhs);                                     CHKERRQ(ierr);
    ierr = VecScale(&minusOne, ml->interfaceRhs);                                                         CHKERRQ(ierr);

#ifdef PETSC_HAVE_PLAPACK
    /* Reduce the local interface columns */
    ierr = VecScatterBegin(y, ml->interfaceRhs, ADD_VALUES, SCATTER_FORWARD, ml->interfaceScatter);       CHKERRQ(ierr);
    ierr = VecScatterEnd(y, ml->interfaceRhs, ADD_VALUES, SCATTER_FORWARD, ml->interfaceScatter);         CHKERRQ(ierr);

    ierr = PC_MLLogEventBegin(PC_ML_ApplyQR, pc, x, y, 0);                                                CHKERRQ(ierr);
    ierr = VecGetArray(ml->interfaceRhs, &rhsArray);                                                      CHKERRQ(ierr);
    /* Put result in x */
    ierr = PLA_Obj_set_to_zero(ml->PLArhsP);                                                              CHKERRQ(ierr);
    ierr = PLA_API_begin();                                                                               CHKERRQ(ierr);
    ierr = PLA_Obj_API_open(ml->PLArhsP);                                                                 CHKERRQ(ierr);
    ierr = PLA_API_axpy_vector_to_global(ml->numLocInterfaceRows, &one, rhsArray, 1, ml->PLArhsP, ml->firstInterfaceRow[rank]);
                                                                                                          CHKERRQ(ierr);
    ierr = PLA_Obj_API_close(ml->PLArhsP);                                                                CHKERRQ(ierr);
    ierr = PLA_API_end();                                                                                 CHKERRQ(ierr);
    /* Apply x <-- Q^T x */
    ierr = PLA_Q_solve(PLA_SIDE_LEFT, PLA_NO_TRANS, ml->interfaceQR, ml->interfaceTAU, ml->PLArhsP);      CHKERRQ(ierr);
    /* Get result from x */
    ierr = VecSet(&zero, ml->interfaceRhs);                                                               CHKERRQ(ierr);
    ierr = PLA_API_begin();                                                                               CHKERRQ(ierr);
    ierr = PLA_Obj_API_open(ml->PLArhsP);                                                                 CHKERRQ(ierr);
    ierr = PLA_API_axpy_global_to_vector(ml->numLocInterfaceRows, &one, ml->PLArhsP, ml->firstInterfaceRow[rank], rhsArray, 1);
                                                                                                          CHKERRQ(ierr);
    ierr = PLA_Obj_API_close(ml->PLArhsP);                                                                CHKERRQ(ierr);
    ierr = PLA_API_end();                                                                                 CHKERRQ(ierr);
    ierr = VecRestoreArray(ml->interfaceRhs, &rhsArray);                                                  CHKERRQ(ierr);
    ierr = PC_MLLogEventEnd(PC_ML_ApplyQR, pc, x, y, 0);                                                  CHKERRQ(ierr);

    /* Set the interface values */
    ierr = VecScatterBegin(ml->interfaceRhs, y, INSERT_VALUES, SCATTER_REVERSE, ml->interfaceScatter);    CHKERRQ(ierr);
    ierr = VecScatterEnd(ml->interfaceRhs, y, INSERT_VALUES, SCATTER_REVERSE, ml->interfaceScatter);      CHKERRQ(ierr);
#else
    /* Reduce the local interface columns */
    ierr = VecScatterBegin(ml->interfaceRhs, y, ADD_VALUES, SCATTER_REVERSE, ml->interfaceScatter);       CHKERRQ(ierr);
    ierr = VecScatterEnd(ml->interfaceRhs, y, ADD_VALUES, SCATTER_REVERSE, ml->interfaceScatter);         CHKERRQ(ierr);

    /* Get the interface vector */
    ierr = VecScatterBegin(y, ml->locInterfaceRhs, INSERT_VALUES, SCATTER_FORWARD, ml->locInterfaceScatter);CHKERRQ(ierr);
    ierr = VecScatterEnd(y, ml->locInterfaceRhs, INSERT_VALUES, SCATTER_FORWARD, ml->locInterfaceScatter);  CHKERRQ(ierr);
    ierr = VecGetArray(ml->locInterfaceRhs, &rhsArray);                                                   CHKERRQ(ierr);

    ierr = PC_MLLogEventBegin(PC_ML_ApplyQR, pc, x, y, 0);                                                CHKERRQ(ierr);
    if (rank == 0) {
      /* Apply Q^T from the QR of the interface matrix */
      dummy = 1;
#ifdef PETSC_USE_DEBUG
      ierr = VecGetSize(ml->locInterfaceRhs, &numInterfaceRows);                                          CHKERRQ(ierr);
      if (numInterfaceRows != ml->numInterfaceRows) SETERRQ(PETSC_ERR_ARG_WRONG, "Invalid interface vector");
#endif
      LAormqr_("L", "T", &ml->numInterfaceRows, &dummy, &ml->numNullCols, ml->interfaceQR, &ml->numInterfaceRows,
             ml->interfaceTAU, rhsArray, &ml->numInterfaceRows, ml->work, &ml->workLen, &ierr);
      PetscLogFlops(ml->numNullCols*(2 + ml->numInterfaceRows*2));
                                                                                                          CHKERRQ(ierr);
    }
    ierr = PC_MLLogEventEnd(PC_ML_ApplyQR, pc, x, y, 0);                                                  CHKERRQ(ierr);

    /* Set the interface values */
    ierr = VecRestoreArray(ml->locInterfaceRhs, &rhsArray);                                               CHKERRQ(ierr);
    ierr = VecScatterBegin(ml->locInterfaceRhs, y, INSERT_VALUES, SCATTER_REVERSE, ml->locInterfaceScatter);CHKERRQ(ierr);
    ierr = VecScatterEnd(ml->locInterfaceRhs, y, INSERT_VALUES, SCATTER_REVERSE, ml->locInterfaceScatter);  CHKERRQ(ierr);
#endif
    ierr = PC_MLLogEventEnd(PC_ML_ApplySymmetricLeftParallel, pc, x, y, 0);                               CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PCMultiLevelApplyP1"
/*@C PCMultiLevelApplyP1
	This function applies P_1, the projector on the range of B, to a vector.

  Input Parameters:
+ pc - The preconditioner context
- x  - The input vector

  Output Parameter:
. y  - The output vector

  Level: intermediate

.keywords multilevel
.seealso PCMultiLevelApplyGradient, PCMultiLevelApplyGradientTrans, PCMultiLevelApplyP, PCMultiLevelApplyPTrans,
         PCMultiLevelApplyP1Trans, PCMultiLevelApplyP2, PCMultiLevelApplyP2Trans, PCMultiLevelApplyV
         PCMultiLevelApplyVTrans, PCMultiLevelApplyDInv, PCMultiLevelApplyDInvTrans
@*/
int PCMultiLevelApplyP1(PC pc, GVec x, GVec y)
{
  PC_Multilevel *ml;
  PetscScalar    zero = 0.0;
  int            ierr;

  PetscFunctionBegin;
  /* Setup the PC context */
  PetscValidHeaderSpecific(pc, PC_COOKIE);
  PetscValidHeaderSpecific(x,  VEC_COOKIE);
  PetscValidHeaderSpecific(y,  VEC_COOKIE);
  if (pc->setupcalled < 2) {
    ierr = PCSetUp(pc);                                                                                   CHKERRQ(ierr);
  }

  /* Scatter the column vector x to the solution vector y */
  ml   = (PC_Multilevel *) pc->data;
  ierr = VecSet(&zero, y);                                                                                CHKERRQ(ierr);
  ierr = VecScatterBegin(x, y, INSERT_VALUES, SCATTER_REVERSE, ml->rangeScatter);                         CHKERRQ(ierr);
  ierr = VecScatterEnd(x, y, INSERT_VALUES, SCATTER_REVERSE, ml->rangeScatter);                           CHKERRQ(ierr);

  /* Apply P */
  ierr = PCMultiLevelApplyP(pc, y, y);                                                                    CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PCMultiLevelApplyP1Trans"
/*@C PCMultiLevelApplyP1Trans
	This function applies the transpose of P_1, the projector on the range of B, to a vector.

  Input Parameters:
+ pc - The preconditioner context
- x  - The input vector

  Output Parameter:
. y  - The output vector

  Level: intermediate

.keywords multilevel
.seealso PCMultiLevelApplyGradient, PCMultiLevelApplyGradientTrans, PCMultiLevelApplyP, PCMultiLevelApplyPTrans,
         PCMultiLevelApplyP1, PCMultiLevelApplyP2, PCMultiLevelApplyP2Trans, PCMultiLevelApplyV
         PCMultiLevelApplyVTrans, PCMultiLevelApplyDInv, PCMultiLevelApplyDInvTrans
@*/
int PCMultiLevelApplyP1Trans(PC pc, GVec x, GVec y)
{
  PC_Multilevel *ml;
  GVec           z;
  PetscScalar    zero = 0.0;
  int            ierr;

  PetscFunctionBegin;
  /* Setup the PC context */
  PetscValidHeaderSpecific(pc, PC_COOKIE);
  PetscValidHeaderSpecific(x,  VEC_COOKIE);
  PetscValidHeaderSpecific(y,  VEC_COOKIE);
  if (pc->setupcalled < 2)
  {
    ierr = PCSetUp(pc);                                                                                   CHKERRQ(ierr);
  }

  /* Apply P^T */
  ierr = VecDuplicate(x, &z);                                                                             CHKERRQ(ierr);
  ierr = PCMultiLevelApplyPTrans(pc, x, z);                                                               CHKERRQ(ierr);

  /* Scatter the solution vector z to the column vector y */
  ml = (PC_Multilevel *) pc->data;
  ierr = VecSet(&zero, y);                                                                                CHKERRQ(ierr);
  ierr = VecScatterBegin(z, y, INSERT_VALUES, SCATTER_FORWARD, ml->rangeScatter);                         CHKERRQ(ierr);
  ierr = VecScatterEnd(z, y, INSERT_VALUES, SCATTER_FORWARD, ml->rangeScatter);                           CHKERRQ(ierr);
  ierr = VecDestroy(z);                                                                                   CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PCMultiLevelApplyP2"
/*@C PCMultiLevelApplyP2
	This function applies P_2, the projector on the nullspace of B, to a vector.
  Note that this function is setup to take two vectors of the same size. The
  component of x and y in the range of B is zero on output.

  Input Parameters:
+ pc - The preconditioner context
- x  - The input vector

  Output Parameter:
. y  - The output vector

  Level: intermediate

.keywords multilevel
.seealso PCMultiLevelApplyGradient, PCMultiLevelApplyGradientTrans, PCMultiLevelApplyP, PCMultiLevelApplyPTrans,
         PCMultiLevelApplyP1, PCMultiLevelApplyP1Trans, PCMultiLevelApplyP2Trans, PCMultiLevelApplyV
         PCMultiLevelApplyVTrans, PCMultiLevelApplyDInv, PCMultiLevelApplyDInvTrans
@*/
int PCMultiLevelApplyP2(PC pc, GVec x, GVec y)
{
  PC_Multilevel *ml;
  PetscScalar   *xArray;
  int            row;
  int            ierr;

  PetscFunctionBegin;
  /* Setup the PC context */
  PetscValidHeaderSpecific(pc, PC_COOKIE);
  PetscValidHeaderSpecific(x,  VEC_COOKIE);
  PetscValidHeaderSpecific(y,  VEC_COOKIE);
  if (pc->setupcalled < 2)
  {
    ierr = PCSetUp(pc);                                                                                   CHKERRQ(ierr);
  }

  /* Zero out range space of B */
  ml = (PC_Multilevel *) pc->data;
  ierr = VecGetArray(x, &xArray);                                                                         CHKERRQ(ierr);
  for(row = 0; row < ml->rank; row++)
  {
    xArray[ml->range[row]] = 0.0;
  }
  ierr = VecRestoreArray(x, &xArray);                                                                     CHKERRQ(ierr);

  /* Apply P */
  ierr = PCMultiLevelApplyP(pc, x, y);                                                                    CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PCMultiLevelApplyP2Trans"
/*@C PCMultiLevelApplyP2Trans
	This function applies the transpose of P_2, the projector on the nullspace of B, to a vector.
  Note that this function is setup to take two vectors of the same size. The component of y in
  the range of B is zero on output.

  Input Parameters:
+ pc - The preconditioner context
- x  - The input vector

  Output Parameter:
. y  - The output vector

  Level: intermediate

.keywords multilevel
.seealso PCMultiLevelApplyGradient, PCMultiLevelApplyGradientTrans, PCMultiLevelApplyP, PCMultiLevelApplyPTrans,
         PCMultiLevelApplyP1, PCMultiLevelApplyP1Trans, PCMultiLevelApplyP2, PCMultiLevelApplyV
         PCMultiLevelApplyVTrans, PCMultiLevelApplyDInv, PCMultiLevelApplyDInvTrans
@*/
int PCMultiLevelApplyP2Trans(PC pc, GVec x, GVec y)
{
  PC_Multilevel *ml;
  PetscScalar   *yArray;
  int            row;
  int            ierr;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(pc, PC_COOKIE);
  PetscValidHeaderSpecific(x,  VEC_COOKIE);
  PetscValidHeaderSpecific(y,  VEC_COOKIE);
  /* Apply P */
  ierr = PCMultiLevelApplyPTrans(pc, x, y);                                                               CHKERRQ(ierr);

  /* Zero out range space of B */
  ml = (PC_Multilevel *) pc->data;
  ierr = VecGetArray(y, &yArray);                                                                         CHKERRQ(ierr);
  for(row = 0; row < ml->rank; row++)
  {
    yArray[ml->range[row]] = 0.0;
  }
  ierr = VecRestoreArray(y, &yArray);                                                                     CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
