#ifndef POOMA_TRANSFORM_PETSC_H
#define POOMA_TRANSFORM_PETSC_H

// PETSc interfacing with POOMA using the PETSc DA interface.
//
// Copyright (c) 2004 by Richard Guenther <richard.guenther@uni-tuebingen.de>
//
// This file is in the public domain.

/** @file
 * @ingroup Utilities
 * @brief
 * Interfacing with the PETSc library of (non-)linear solvers.
 *
 * Interfacing supports the PETSc DA (distributed arrays) notion
 * for creating (non-)linear solvers for implicit finite difference
 * methods.  Using this wrappers you can fill your right-hand-side
 * vector from a POOMA engine and transfer the result-vector to
 * a POOMA engine.
 *
 * You are going to use the PetscDA class and its methods.
 * See examples/Solver/PETSc for how to use this.
 */

#include "Pooma/Arrays.h"
#include "petscda.h"


template <class MeshTag, class T, class EngineTag>
class Field;


namespace Pooma {


/**
 * Helper to convert DALocalInfo domain info to appropriate
 * Pooma Interval
 */

template <int Dim>
struct PoomaDAGetDomain;

template <>
struct PoomaDAGetDomain<1> {
  static inline
  Interval<1> innerDomain(DALocalInfo& i)
  {
    return Interval<1>(i.xs, i.xs+i.xm-1);
  }
  static inline
  Interval<1> totalDomain(DALocalInfo& i)
  {
    return Interval<1>(i.gxs, i.gxs+i.gxm-1);
  }
};

template <>
struct PoomaDAGetDomain<2> {
  static inline
  Interval<2> innerDomain(DALocalInfo& i)
  {
    return Interval<2>(Interval<1>(i.xs, i.xs+i.xm-1),
		       Interval<1>(i.ys, i.ys+i.ym-1));
  }
  static inline
  Interval<2> totalDomain(DALocalInfo& i)
  {
    return Interval<2>(Interval<1>(i.gxs, i.gxs+i.gxm-1),
		       Interval<1>(i.gys, i.gys+i.gym-1));
  }
};

template <>
struct PoomaDAGetDomain<3> {
  static inline
  Interval<3> innerDomain(DALocalInfo& i)
  {
    return Interval<3>(Interval<1>(i.xs, i.xs+i.xm-1),
		       Interval<1>(i.ys, i.ys+i.ym-1),
		       Interval<1>(i.zs, i.zs+i.zm-1));
  }
  static inline
  Interval<3> totalDomain(DALocalInfo& i)
  {
    return Interval<3>(Interval<1>(i.gxs, i.gxs+i.gxm-1),
		       Interval<1>(i.gys, i.gys+i.gym-1),
		       Interval<1>(i.gzs, i.gzs+i.gzm-1));
  }
};



/**
 * Helper to ease brick-engine -> vector copy
 */

template <int Dim>
struct PoomaDACopy;

template <>
struct PoomaDACopy<1> {
  template <class T>
  static
  void copy(Vec v, const Engine<1, T, Brick>& e)
  {
    PetscScalar *pa;
    VecGetArray(v, &pa);
    int idx=0;
    Interval<1> d(e.domain());
    for (int I=d.first(); I<=d.last(); ++I)
      pa[idx++] = e.read(I);
    VecRestoreArray(v, &pa);
  }
  template <class T>
  static
  void copy(const Engine<1, T, Brick>& e, Vec v)
  {
    PetscScalar *pa;
    VecGetArray(v, &pa);
    int idx=0;
    Interval<1> d(e.domain());
    for (int I=d.first(); I<=d.last(); ++I)
      e(I) = pa[idx++];
    VecRestoreArray(v, &pa);
  }
};

template <>
struct PoomaDACopy<2> {
  template <class T>
  static
  void copy(Vec v, const Engine<2, T, Brick>& e)
  {
    PetscScalar *pa;
    VecGetArray(v, &pa);
    int idx=0;
    Interval<2> d(e.domain());
    for (int J=d[1].first(); J<=d[1].last(); ++J)
      for (int I=d[0].first(); I<=d[0].last(); ++I)
	pa[idx++] = e.read(I, J);
    VecRestoreArray(v, &pa);
  }
  template <class T>
  static
  void copy(const Engine<2, T, Brick>& e, Vec v)
  {
    PetscScalar *pa;
    VecGetArray(v, &pa);
    int idx=0;
    Interval<2> d(e.domain());
    for (int J=d[1].first(); J<=d[1].last(); ++J)
      for (int I=d[0].first(); I<=d[0].last(); ++I)
	e(I, J) = pa[idx++];
    VecRestoreArray(v, &pa);
  }
};

template <>
struct PoomaDACopy<3> {
  template <class T>
  static
  void copy(Vec v, const Engine<3, T, Brick>& e)
  {
    PetscScalar *pa;
    VecGetArray(v, &pa);
    int idx=0;
    Interval<3> d(e.domain());
    for (int K=d[2].first(); K<=d[2].last(); ++K)
      for (int J=d[1].first(); J<=d[1].last(); ++J)
	for (int I=d[0].first(); I<=d[0].last(); ++I)
	  pa[idx++] = e.read(I, J, K);
    VecRestoreArray(v, &pa);
  }
  template <class T>
  static
  void copy(const Engine<3, T, Brick>& e, Vec v)
  {
    PetscScalar *pa;
    VecGetArray(v, &pa);
    int idx=0;
    Interval<3> d(e.domain());
    for (int K=d[2].first(); K<=d[2].last(); ++K)
      for (int J=d[1].first(); J<=d[1].last(); ++J)
	for (int I=d[0].first(); I<=d[0].last(); ++I)
	  e(I, J, K) = pa[idx++];
    VecRestoreArray(v, &pa);
  }
};



/**
 * Struct to wrap extra global information about a DA.
 */

template <int Dim>
struct PoomaDA {

  /// Creates a PETSc DA from the specified array/field/layout.
  /// Extra arguments are like DACreateNd, namely the periodicity
  /// and stencil type and the stencil width.

  template <class T, class EngineTag>
  PoomaDA(const Array<Dim, T, EngineTag> &a, DAPeriodicType pt, DAStencilType st, int sw)
  {
    initialize(a.physicalDomain(), pt, st, sw);
  }

  template <class MeshTag, class T, class EngineTag>
  PoomaDA(const Field<MeshTag, T, EngineTag> &f, DAPeriodicType pt, DAStencilType st, int sw)
  {
    initialize(f.physicalDomain(), pt, st, sw);
  }

  template <class Layout>
  PoomaDA(const Layout &l, DAPeriodicType pt, DAStencilType st, int sw)
  {
    initialize(l.innerDomain(), pt, st, sw);
  }

  ~PoomaDA()
  {
    delete[] info;
    DADestroy(da);
  }


  /// Can use this as PETSc DA type.

  operator DA() const { return da; }


  /// Access PeriodicType.

  DAPeriodicType periodicType() const { return info[0].pt; }

  /// Access StencilType.

  DAStencilType stencilType() const { return info[0].st; }


  /// Assign from POOMA engine to PETSc vector.

  template <class T, class EngineTag>
  void assign(Vec v, const Engine<Dim, T, EngineTag> &e);

  /// Assign from POOMA array to PETSc vector.

  template <class T, class EngineTag>
  void assign(Vec v, const Array<Dim, T, EngineTag> &a)
  {
    this->assign(v, a.engine());
  }

  /// Assign from POOMA field to PETSc vector.

  template <class MeshTag, class T, class EngineTag>
  void assign(Vec v, const Field<MeshTag, T, EngineTag> &f)
  {
    forEach(f, PerformUpdateTag(), NullCombine());
    this->assign(v, f.fieldEngine().engine());
  }


  /// Assign from PETSc vector to POOMA engine.

  template <class T, class EngineTag>
  void assign(const Engine<Dim, T, EngineTag> &e, Vec v);

  /// Assign from PETSc vector to POOMA array.

  template <class T, class EngineTag>
  void assign(const Array<Dim, T, EngineTag> &a, Vec v)
  {
    this->assign(a.engine(), v);
  }

  /// Assign from PETSc vector to POOMA field.

  template <class MeshTag, class T, class EngineTag>
  void assign(const Field<MeshTag, T, EngineTag> &f, Vec v)
  {
    this->assign(f.fieldEngine().engine(), v);
    f.notifyPostWrite();
  }

protected:
  void initialize(const Interval<Dim> &d, DAPeriodicType pt, DAStencilType st, int sw);


private:
  DA da;
  Loc<Dim> offset;
  int n;
  DALocalInfo *info;

};


template <int Dim>
void PoomaDA<Dim>::initialize(const Interval<Dim> &d, DAPeriodicType pt, DAStencilType st, int sw)
{
  offset = Loc<Dim>(0);
  Interval<Dim> domain = d;
  if (pt != DA_XPERIODIC
      && pt != DA_XYPERIODIC
      && pt != DA_XYZPERIODIC
      && pt != DA_XZPERIODIC) {
   domain[0] = growLeft(growRight(domain[0], sw), sw);
   offset[0] = Loc<1>(sw);
  }
  if (Dim > 1
      && pt != DA_YPERIODIC
      && pt != DA_XYPERIODIC
      && pt != DA_XYZPERIODIC
      && pt != DA_YZPERIODIC) {
   domain[1] = growLeft(growRight(domain[1], sw), sw);
   offset[1] = Loc<1>(sw);
  }
  if (Dim > 2
      && pt != DA_ZPERIODIC
      && pt != DA_XZPERIODIC
      && pt != DA_XYZPERIODIC
      && pt != DA_YZPERIODIC) {
   domain[2] = growLeft(growRight(domain[2], sw), sw);
   offset[2] = Loc<1>(sw);
  }

  // create DA
  if (Dim == 1) {
    DACreate1d(PETSC_COMM_WORLD,          /* MPI communicator */
	       pt,                        /* grid periodicity */
	       domain[0].size(),          /* global domain size */
	       1,                         /* degrees of freedom */
	       sw,                        /* stencil width */
	       PETSC_NULL,                /* local domain sizes per dimension */
	       &this->da);
  } else if (Dim == 2) {
    DACreate2d(PETSC_COMM_WORLD,          /* MPI communicator */
	       pt,                        /* grid periodicity */
	       st,                        /* stencil type */
	       domain[0].size(),
	       domain[1].size(),          /* global domain size */
	       PETSC_DECIDE, PETSC_DECIDE,/* # processors */
	       1,                         /* degrees of freedom */
	       sw,                        /* stencil width */
	       PETSC_NULL, PETSC_NULL,    /* local domain sizes per dimension */
	       &this->da);
  } else if (Dim == 3) {
    DACreate3d(PETSC_COMM_WORLD,          /* MPI communicator */
	       pt,                        /* grid periodicity */
	       st,                        /* stencil type */
	       domain[0].size(), domain[1].size(),
	       domain[2].size(),          /* global domain size */
	       PETSC_DECIDE, PETSC_DECIDE,
	       PETSC_DECIDE,              /* # processors */
	       1,                         /* degrees of freedom */
	       sw,                        /* stencil width */
	       PETSC_NULL, PETSC_NULL,
	       PETSC_NULL,                /* local domain sizes per dimension */
	       &this->da);
  }

  // collect local information
  int m, n, p;
  DAGetInfo(this->da, PETSC_NULL, PETSC_NULL, PETSC_NULL, PETSC_NULL,
            &m, &n, &p,
            PETSC_NULL, PETSC_NULL, PETSC_NULL, PETSC_NULL);
  this->n = m*n*p;
  PInsist(Pooma::contexts() == this->n, "nr patches");
  this->info = new DALocalInfo[this->n];
  DAGetLocalInfo(this->da, &this->info[Pooma::context()]);

  // distribute local information
  // fixme - MPI_Allgather wrapper missing
#if POOMA_MPI
  MPI_Allgather(&this->info[Pooma::context()], sizeof(DALocalInfo), MPI_CHAR,
		this->info, sizeof(DALocalInfo), MPI_CHAR,
		MPI_COMM_WORLD);
#endif
}

template <int Dim>
template <class T, class EngineTag>
void PoomaDA<Dim>::assign(Vec v, const Engine<Dim, T, EngineTag> &e)
{
  typedef Engine<Dim, T, EngineTag> Engine_t;
  typedef typename NewEngine<Engine_t, Interval<Dim> >::Type_t ViewEngine_t;

  // our local brick engine
  Engine<Dim, T, Brick> local_e;
  Interval<Dim> local_I;

  // loop over all DA patches
  for (int i=0; i<this->n; ++i) {
	// create DA patch context local pooma array
        Interval<Dim> lPatch(PoomaDAGetDomain<Dim>::innerDomain(this->info[i]));
	Array<Dim, T, Remote<Brick> > a;
	a.engine() = Engine<Dim, T, Remote<Brick> >(i, lPatch);
	Array<Dim, T, typename ViewEngine_t::Tag_t> e_array(ViewEngine_t(e, lPatch + this->offset));
	a = e_array;

	// remember local engine
	if (i == Pooma::context()) {
	  local_e = a.engine().localEngine();
	  local_I = lPatch;
	}
  }
  Pooma::blockAndEvaluate();

  // do the copy locally
  PoomaDACopy<Dim>::copy(v, local_e);
}

template <int Dim>
template <class T, class EngineTag>
void PoomaDA<Dim>::assign(const Engine<Dim, T, EngineTag> &e, Vec v)
{
  typedef Engine<Dim, T, EngineTag> Engine_t;
  typedef typename NewEngine<Engine_t, Interval<Dim> >::Type_t ViewEngine_t;

  // our local brick engine
  Interval<Dim> local_I(PoomaDAGetDomain<Dim>::innerDomain(this->info[Pooma::context()]));
  Engine<Dim, T, Brick> local_e(local_I);

  // copy into the local brick
  // if it were not the different array index ordering we could construct
  // the brick engine with external data and avoid the double copying
  PoomaDACopy<Dim>::copy(local_e, v);

  // loop over all DA patches
  for (int i=0; i<this->n; ++i) {
	// create DA patch context local pooma array
	Interval<Dim> lPatch(PoomaDAGetDomain<Dim>::innerDomain(this->info[i]));
	Array<Dim, T, Remote<Brick> > a;
	a.engine() = Engine<Dim, T, Remote<Brick> >(i, lPatch);

	// override local engine with our one
	if (Pooma::context() == i)
	  a.engine().localEngine() = local_e;

	// distribute the copy
	Array<Dim, T, typename ViewEngine_t::Tag_t> e_array;
	e_array.engine() = ViewEngine_t(e, lPatch + this->offset);
	e_array = a;
  }
}


} // namespace Pooma

#endif
