// -*- C++ -*-
//
// Copyright (C) 1998, 1999, 2000, 2002  Los Alamos National Laboratory,
// Copyright (C) 1998, 1999, 2000, 2002  CodeSourcery, LLC
//
// This file is part of FreePOOMA.
//
// FreePOOMA is free software; you can redistribute it and/or modify it
// under the terms of the Expat license.
//
// This program is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Expat
// license for more details.
//
// You should have received a copy of the Expat license along with
// FreePOOMA; see the file LICENSE.
//

//-----------------------------------------------------------------------------
// JacobiInP2UnOpt is a Pooma II implementation of the Jacobi iterative solver
//-----------------------------------------------------------------------------

#ifndef POOMA_BENCHMARKS_SOLVERS_JACOBI_JACOBIINP2_H
#define POOMA_BENCHMARKS_SOLVERS_JACOBI_JACOBIINP2_H

// include files

#include "Pooma/Arrays.h"
#include "Utilities/Benchmark.h"

class JacobiInP2UnOpt : public Implementation {
public:
  
  typedef Array<2, double, Brick> Array2D;
  
  //---------------------------------------------------------------------------
  // We are a PoomaII implementation.

  const char* type() const { return P2Type(); }

  //---------------------------------------------------------------------------
  // We need to initialize the problem for a specific size.

  void initialize(int n) {
    // set array sizes
    Interval<1> I(1,n), J(1,n);
    Interval<2> newDomain(I,J);
    x0_m.initialize(newDomain);
    x1_m.initialize(newDomain);
    b_m.initialize(newDomain);

    // save problem size
    n_m = n;
  }

  //---------------------------------------------------------------------------
  // Runs the benchmark.

  void run() {
    // reset array values
    x0_m = 0.0;
    x1_m = 0.0;
    Pooma::blockAndEvaluate();
    x0_m(n_m/2,n_m/2) = 1.0;

    // set up for Jacobi iteration
    iters_m = 0;
    double dx = 1.0 / (n_m-1);
    double dt = 0.1 * dx;
    Interval<1> I(2,n_m-1), J(2,n_m-1);

    // The coefficients for the linear operator.
    double A2 = 1.0/(6.0*dx*dx);
    double A1 = 4.0*A2;
    double A0 = 4.0*(A1+A2);
    double h0 = 1.0 - dt*theta_s*A0;
    double h1 = dt*theta_s*A1;
    double h2 = dt*theta_s*A2;

    // The scaling factor for getting the constant offset part of 
    // the update from the right hand side of the equation.
    double dtth = dt*(1.0-theta_s);
    double e0 = 1.0 + dtth*A0;
    double e1 = dtth*A1;
    double e2 = dtth*A2;
    double c0 = phi_s/(1.0+dtth*A0);

    // The coefficients for the update matrix.
    double gf = phi_s*dtth/(1.0+dtth*A0);
    double g0 = 1.0-phi_s;
    double g1 = gf*A1;
    double g2 = gf*A2;
  
    for (double t=0; t<tfinal_s; t+=2*dt) {
      // Calculate the right hand side.
      b_m(I,J) = h0 * x0_m(I,J) + 
                 h1 * (x0_m(I-1,J) + x0_m(I+1,J) +
                       x0_m(I,J+1) + x0_m(I,J-1)) +
                 h2 * (x0_m(I-1,J-1) + x0_m(I+1,J-1) +
                       x0_m(I-1,J+1) + x0_m(I+1,J+1));
      double err;
      do {
        // Do a double iteration to use the allocated memory efficiently.
        x1_m(I,J) = c0 * b_m(I,J) + g0 * x0_m(I,J) +
	            g1 * (x0_m(I-1,J) + x0_m(I+1,J) +
                          x0_m(I,J+1) + x0_m(I,J-1)) +
	            g2 * (x0_m(I-1,J-1) + x0_m(I+1,J-1) +
                          x0_m(I-1,J+1) + x0_m(I+1,J+1));

        x0_m(I,J) = c0 * b_m(I,J) + g0 * x1_m(I,J) +
	            g1 * (x1_m(I-1,J) + x1_m(I+1,J) +
                          x1_m(I,J+1) + x1_m(I,J-1)) +
	            g2 * (x1_m(I-1,J-1) + x1_m(I+1,J-1) +
                          x1_m(I-1,J+1) + x1_m(I+1,J+1));
	Pooma::blockAndEvaluate();

        // Calculate the residual.
        // Since we don't have reductions yet, do this by hand.
        err = 0;
        for (int j=2; j<=n_m-1; ++j) {
	  for (int i=2; i<=n_m-1; ++i) {
	    double e = -b_m(i,j) + e0 * x0_m(i,j) -
	               e1 * (x0_m(i-1,j) + x0_m(i+1,j) +
                             x0_m(i,j+1) + x0_m(i,j-1)) -
	               e2 * (x0_m(i-1,j-1) + x0_m(i+1,j-1) +
                             x0_m(i-1,j+1) + x0_m(i+1,j+1));
	      
	    err += e*e;
	  }
        }
        ++iters_m;
      } while (err>1e-4);  
      
    }

    Pooma::blockAndEvaluate();

    // save check value
    check_m = x0_m(n_m/2,n_m/2);
    // tally up the flops
    flops_m = 11 * ((double)n_m - 2) * ((double)n_m - 2) * int(tfinal_s/(2*(0.1/((double)n_m-1)))) +
              iters_m * 41 * ((double)n_m - 2) * ((double)n_m - 2);
  }    

  //---------------------------------------------------------------------------
  // Prints out the check value for this case.

  double resultCheck() const { return check_m; }
  
  //---------------------------------------------------------------------------
  // Returns the number of flops.

  double opCount() const { return flops_m; }

private:

  //---------------------------------------------------------------------------
  // Arrays.
  
  Array2D x0_m, x1_m, b_m;
  
  //---------------------------------------------------------------------------
  // Parameters.
  
  static const double tfinal_s, theta_s, phi_s;
  
  //---------------------------------------------------------------------------
  // Problem size.
  
  int n_m;
  
  //---------------------------------------------------------------------------
  // Check value.
  
  double check_m;

  //---------------------------------------------------------------------------
  // Iteration count
  
  int iters_m;

  //---------------------------------------------------------------------------
  // Flop count
  
  double flops_m;
};

// initialize static variables
const double JacobiInP2UnOpt::tfinal_s = 0.5;
const double JacobiInP2UnOpt::theta_s = 0.5;
const double JacobiInP2UnOpt::phi_s = 1.0;



//-----------------------------------------------------------------------------
// Stencil NinePoint
//-----------------------------------------------------------------------------

class NinePoint
{
public:
  NinePoint(double a0, double a1, double a2)
    : a0_m(a0), a1_m(a1), a2_m(a2) {}

  template<class A>
  typename A::Element_t
  operator()(const A& x, int i, int j) const
  {
    return ( a0_m*x(i,j) +
             a1_m*(x(i+1,j)+x(i-1,j)+x(i,j+1)+x(i,j-1))	+
             a2_m*(x(i+1,j+1)+x(i-1,j+1)+x(i+1,j-1)+x(i-1,j-1)) );
  }

  inline int lowerExtent(int) const { return 1; }
  inline int upperExtent(int) const { return 1; }  

private:
  double a0_m, a1_m, a2_m;
};

//-----------------------------------------------------------------------------
// JacobiInP2Opt is a Pooma II Stencil implementation of the Jacobi solver
//-----------------------------------------------------------------------------

class JacobiInP2Opt : public Implementation {
public:
  
  typedef Array<2, double, Brick> Array2D;
  
  //---------------------------------------------------------------------------
  // We are a PoomaII implementation.

  const char* type() const { return P2Type(); }
  const char* qualification() const { return "StencilObj"; }

  //---------------------------------------------------------------------------
  // We need to initialize the problem for a specific size.

  void initialize(int n) {
    // set array sizes
    Interval<1> I(1,n), J(1,n);
    Interval<2> newDomain(I,J);
    x0_m.initialize(newDomain);
    x1_m.initialize(newDomain);
    b_m.initialize(newDomain);

    // save problem size
    n_m = n;
  }

  //---------------------------------------------------------------------------
  // Runs the benchmark.

  void run() {
    // reset array values
    x0_m = 0.0;
    x1_m = 0.0;
    Pooma::blockAndEvaluate();
    x0_m(n_m/2,n_m/2) = 1.0;

    // set up for Jacobi iteration
    iters_m = 0;
    double dx = 1.0 / (n_m-1);
    double dt = 0.1 * dx;
    Interval<1> I(2,n_m-1), J(2,n_m-1);

    // The coefficients for the linear operator.
    double A2 = 1.0/(6.0*dx*dx);
    double A1 = 4.0*A2;
    double A0 = 4.0*(A1+A2);
    double h0 = 1.0 - dt*theta_s*A0;
    double h1 = dt*theta_s*A1;
    double h2 = dt*theta_s*A2;

    // The scaling factor for getting the constant offset part of 
    // the update from the right hand side of the equation.
    double dtth = dt*(1.0-theta_s);
    double e0 = 1.0 + dtth*A0;
    double e1 = dtth*A1;
    double e2 = dtth*A2;
    double c0 = phi_s/(1.0+dtth*A0);

    // The coefficients for the update matrix.
    double gf = phi_s*dtth/(1.0+dtth*A0);
    double g0 = 1.0-phi_s;
    double g1 = gf*A1;
    double g2 = gf*A2;
  
    for (double t=0; t<tfinal_s; t+=2*dt) {
      // Calculate the right hand side.
      NinePoint ninePointH(h0,h1,h2);
      b_m(I,J) = Stencil<NinePoint>(ninePointH)(x0_m);
      double err;
      do {
	NinePoint ninePointG(g0,g1,g2);
        // Do a double iteration to use the allocated memory efficiently.
        x1_m(I,J) = c0 * b_m(I,J) + Stencil<NinePoint>(ninePointG)(x0_m);
        x0_m(I,J) = c0 * b_m(I,J) + Stencil<NinePoint>(ninePointG)(x1_m);

        Pooma::blockAndEvaluate();

        // Calculate the residual.
        // Since we don't have reductions yet, do this by hand.
        err = 0;
        for (int j=2; j<=n_m-1; ++j) {
	  for (int i=2; i<=n_m-1; ++i) {
	    double e = -b_m(i,j) + e0 * x0_m(i,j) -
	               e1 * (x0_m(i-1,j) + x0_m(i+1,j) +
                             x0_m(i,j+1) + x0_m(i,j-1)) -
	               e2 * (x0_m(i-1,j-1) + x0_m(i+1,j-1) +
                             x0_m(i-1,j+1) + x0_m(i+1,j+1));
	      
	    err += e*e;
	  }
        }
        ++iters_m;
      } while (err>1e-4);  
      
    }

    Pooma::blockAndEvaluate();

    // save check value
    check_m = x0_m(n_m/2,n_m/2);
    // tally up the flops
    flops_m = 11 * ((double)n_m - 2) * ((double)n_m - 2) * int(tfinal_s/(2*(0.1/((double)n_m-1)))) +
              iters_m * 41 * ((double)n_m - 2) * ((double)n_m - 2);
  }    

  //---------------------------------------------------------------------------
  // Prints out the check value for this case.

  double resultCheck() const { return check_m; }
  
  //---------------------------------------------------------------------------
  // Returns the number of flops.

  double opCount() const { return flops_m; }

private:

  //---------------------------------------------------------------------------
  // Arrays.
  
  Array2D x0_m, x1_m, b_m;
  
  //---------------------------------------------------------------------------
  // Parameters.
  
  static const double tfinal_s, theta_s, phi_s;
  
  //---------------------------------------------------------------------------
  // Problem size.
  
  int n_m;
  
  //---------------------------------------------------------------------------
  // Check value.
  
  double check_m;

  //---------------------------------------------------------------------------
  // Iteration count
  
  int iters_m;

  //---------------------------------------------------------------------------
  // Flop count
  
  double flops_m;
};

// initialize static variables
const double JacobiInP2Opt::tfinal_s = 0.5;
const double JacobiInP2Opt::theta_s = 0.5;
const double JacobiInP2Opt::phi_s = 1.0;

#endif // POOMA_BENCHMARKS_SOLVERS_JACOBI_JACOBIINP2_H

// ACL:rcsinfo
// ----------------------------------------------------------------------
// $RCSfile: JacobiInP2.h,v $   $Author: richard $
// $Revision: 1.25 $   $Date: 2004/11/01 18:15:17 $
// ----------------------------------------------------------------------
// ACL:rcsinfo
