/* -*- C -*- */
/*
   Copyright (C) 1998, 1999, 2000, 2002  Los Alamos National Laboratory,
   Copyright (C) 1998, 1999, 2000, 2002  CodeSourcery, LLC

   This file is part of FreePOOMA.

   FreePOOMA is free software; you can redistribute it and/or modify it
   under the terms of the Expat license.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Expat
   license for more details.

   You should have received a copy of the Expat license along with
   FreePOOMA; see the file LICENSE.
 */

/* include files */

#include "Pooma/Configuration.h"


/* wrapper for integrated C/C++ compiler */

#ifdef __cplusplus
extern "C" {
#endif

/* Helper functions for dot products. */

double dot(double* a, double* b, int n) {
  int i, j;
  double result;
  result = 0.0;
  for (j = 1; j <= n-2; j++) {
    for (i = 1; i <= n-2; i++) {
      result += a[i + n * j] * b[i + n * j];
    }
  }
  return result;
}

double autodot(double* a, int n) {
  int i, j;
  double result;
  result = 0.0;
  for (j = 1; j <= n-2; j++) {
    for (i = 1; i <= n-2; i++) {
      result += a[i + n * j] * a[i + n * j];
    }
  }
  return result;
}

/* C implementation of CG solver. */

void
runCGAInC(double* f, double* restrict x, double* restrict d,
          double* restrict q, double* restrict r, int n, int* iters)
{
  int i, j, ii, h2i;
  double resid, r2, r2new, alpha, beta, normb;
  const double tol=1.0E-10;
  const int max_iter=10000;

  h2i = (n-1)*(n-1);
  normb = autodot(f, n);
  for (j = 1; j <= n - 2; j++) {
    for (i = 1; i <= n - 2; i++) {
      r[i + n * j] = f[i + n * j] - h2i *
        (4*x[i + n * j] - x[i - 1 + n * j] - x[i + 1 + n * j] -
         x[i + n * (j - 1)] - x[i + n * (j + 1)]);
      d[i + n * j] = r[i + n * j];
      q[i + n * j] = 0.0;
    }
  }

  if (normb == 0.0) normb = 1;
  r2 = autodot(r, n);
  resid = r2 / normb;

  for (ii = 1; (ii <= max_iter)&&(resid>tol); ii++) {
    for (j = 1; j <= n - 2; j++) {
      for (i = 1; i <= n - 2; i++) {
	q[i + n * j] = h2i *
          (4*d[i + n * j] - d[i - 1 + n * j] - d[i + 1 + n * j] -
           d[i + n * (j - 1)] - d[i + n * (j + 1)]);
      }
    }

    alpha = r2 / dot(d, q, n);
    for (j = 0; j <= n - 1; j++) {
      for (i = 0; i <= n - 1; i++) {
	x[i + n * j] += alpha * d[i + n * j];
	r[i + n * j] -= alpha * q[i + n * j];
      }
    }

    r2new = autodot(r, n);
    beta = r2new/r2;
    for (j = 0; j <= n - 1; j++) {
      for (i = 0; i <= n - 1; i++) {
	d[i + n * j] = r[i + n * j] + beta * d[i + n * j];
      }
    }

    r2 = r2new;
    resid = r2 / normb;
  }

  *iters = ii;
}

#ifdef __cplusplus
}
#endif

/* ACL:rcsinfo */
/* ----------------------------------------------------------------------
 * $RCSfile: CGAInC.c,v $   $Author: richard $
 * $Revision: 1.11 $   $Date: 2004/11/01 18:15:18 $
 * ----------------------------------------------------------------------
 */
/* ACL:rcsinfo */
