/* Drip - a transcoder for Unix
 * Copyright (C) 2001-2003 Jarl van Katwijk
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 */


/*
 *
 * 1) Code taken from transcode, a linux video stream processing tool
 * 2) Code taken from the xine project:
 * Copyright (C) 2001 the xine project
 *
 * transcode is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * xine is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
 *
 * Deinterlace routines by Miguel Freitas
 * based of DScaler project sources (deinterlace.sourceforge.net)
 *
 * Currently only available for Xv driver and MMX extensions
 *
 */

#include "config.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <inttypes.h>
#include "deinterlace.hh"
#include "fast_memcpy.hh"

typedef struct MyFilterData
{
  int bShowDeinterlacedAreaOnly;
  int bBlend;
  // int iThresholdBlend; // here we start blending
int iThreshold;         // here we start interpolating TODO FIXME
int iEdgeDetect;
}
MyFilterData;

MyFilterData myfd;

gboolean deinterlacer_init(void) {
  myfd.bShowDeinterlacedAreaOnly = 0;

  myfd.bBlend = 0;
  // myfd->bBlend = 1; there should be a another threshold for us to know from which
  // threshold to begin with blending up to the next when we start interpolating
  // that would give us much better results and  better resolution within the
  // interlacing area

  myfd.iThreshold  = 50;
  myfd.iEdgeDetect = 25;

  return TRUE;
}


/*
   DeinterlaceFieldBob algorithm
   Based on Virtual Dub plugin by Gunnar Thalin
   MMX asm version from dscaler project (deinterlace.sourceforge.net)
   Linux version for Xine player by Miguel Freitas
*/

void deinterlace_apply(guint8 *pdst, guint8 *psrc, gint width, gint height ) {

#ifdef HAVE_MMX  // the x86 assembly code here.. non x86 cpu's use the C version
  gint Line;
  long long* YVal1;
  long long* YVal2;
  long long* YVal3;
  long long* Dest;
  guint8* pEvenLines = psrc;
  guint8* pOddLines = psrc+width;
  gint LineLength = width;
  gint Pitch = width * 2;
  gint IsOdd = 1;
  glong EdgeDetect = 625;
  glong JaggieThreshold = 37;//73;

  gint n;

  unsigned long long qwEdgeDetect;
  unsigned long long qwThreshold;
  const unsigned char Mask[8] = {0xfe,0xfe,0xfe,0xfe,0xfe,0xfe,0xfe,0xfe};
  const unsigned char YMask[8] = {0xff,0x00,0xff,0x00,0xff,0x00,0xff,0x00};


  qwEdgeDetect = EdgeDetect;
  qwEdgeDetect += (qwEdgeDetect << 48) + (qwEdgeDetect << 32) + (qwEdgeDetect << 16);
  qwThreshold = JaggieThreshold;
  qwThreshold += (qwThreshold << 48) + (qwThreshold << 32) + (qwThreshold << 16);


  // copy first even line no matter what, and the first odd line if we're
  // processing an odd field.
  fast_memcpy(pdst, pEvenLines, LineLength);
  if (IsOdd)
    fast_memcpy(pdst + LineLength, pOddLines, LineLength);

  height = height / 2;
  for (Line = 0; Line < height - 1; ++Line) {
    if (IsOdd) {
      YVal1 = (long long *)(pOddLines + Line * Pitch);
      YVal2 = (long long *)(pEvenLines + (Line + 1) * Pitch);
      YVal3 = (long long *)(pOddLines + (Line + 1) * Pitch);
      Dest = (long long *)(pdst + (Line * 2 + 2) * LineLength);
    } else {
      YVal1 = (long long *)(pEvenLines + Line * Pitch);
      YVal2 = (long long *)(pOddLines + Line * Pitch);
      YVal3 = (long long *)(pEvenLines + (Line + 1) * Pitch);
      Dest = (long long *)(pdst + (Line * 2 + 1) * LineLength);
    }

    // For ease of reading, the comments below assume that we're operating on an odd
    // field (i.e., that bIsOdd is true).  The exact same processing is done when we
    // operate on an even field, but the roles of the odd and even fields are reversed.
    // It's just too cumbersome to explain the algorithm in terms of "the next odd
    // line if we're doing an odd field, or the next even line if we're doing an
    // even field" etc.  So wherever you see "odd" or "even" below, keep in mind that
    // half the time this function is called, those words' meanings will invert.

    // Copy the odd line to the overlay verbatim.
    fast_memcpy((char *)Dest + LineLength, YVal3, LineLength);

    n = LineLength >> 3;
    while( n-- ) {
      movq_m2r (*YVal1++, mm0);
      movq_m2r (*YVal2++, mm1);
      movq_m2r (*YVal3++, mm2);

      // get intensities in mm3 - 4
      movq_r2r ( mm0, mm3 );
      movq_r2r ( mm1, mm4 );
      movq_r2r ( mm2, mm5 );

      pand_m2r ( *(unsigned long long*)(&YMask), mm3 );
      pand_m2r ( *(unsigned long long*)(&YMask), mm4 );
      pand_m2r ( *(unsigned long long*)(&YMask), mm5 );

      // get average in mm0
      pand_m2r ( *(unsigned long long*)(&Mask), mm0 );
      pand_m2r ( *(unsigned long long*)(&Mask), mm2 );
      psrlw_i2r ( 01, mm0 );
      psrlw_i2r ( 01, mm2 );
      paddw_r2r ( mm2, mm0 );

      // work out (O1 - E) * (O2 - E) / 2 - EdgeDetect * (O1 - O2) ^ 2 >> 12
      // result will be in mm6

      psrlw_i2r ( 01, mm3 );
      psrlw_i2r ( 01, mm4 );
      psrlw_i2r ( 01, mm5 );

      movq_r2r ( mm3, mm6 );
      psubw_r2r ( mm4, mm6 );	//mm6 = O1 - E

      movq_r2r ( mm5, mm7 );
      psubw_r2r ( mm4, mm7 );	//mm7 = O2 - E

      pmullw_r2r ( mm7, mm6 );		// mm6 = (O1 - E) * (O2 - E)

      movq_r2r ( mm3, mm7 );
      psubw_r2r ( mm5, mm7 );		// mm7 = (O1 - O2)
      pmullw_r2r ( mm7, mm7 );	// mm7 = (O1 - O2) ^ 2
      psrlw_i2r ( 12, mm7 );		// mm7 = (O1 - O2) ^ 2 >> 12
      pmullw_m2r ( *&qwEdgeDetect, mm7 );// mm7  = EdgeDetect * (O1 - O2) ^ 2 >> 12

      psubw_r2r ( mm7, mm6 );      // mm6 is what we want

      pcmpgtw_m2r ( *&qwThreshold, mm6 );

      movq_r2r ( mm6, mm7 );

      pand_r2r ( mm6, mm0 );

      pandn_r2r ( mm1, mm7 );

      por_r2r ( mm0, mm7 );

      movq_r2m ( mm7, *Dest++ );
    }
  }

  // Copy last odd line if we're processing an even field.
  if (! IsOdd) {
    fast_memcpy(pdst+(height*2-1)*LineLength, pOddLines+(height-1)*Pitch, LineLength);
  }

  // clear out the MMX registers ready for doing floating point
  // again
  emms();


#else  // non x86 assembly deinterlace code
#warning Using general C deinterlacer

    register gint bShowDeinterlacedAreaOnly = myfd.bShowDeinterlacedAreaOnly;
    static gint y0, y1, y2, y3;
    static guchar *psrc1, *psrc2, *psrc3, *pdst1;
    register gint iInterlaceValue0, iInterlaceValue1, iInterlaceValue2;
    register gint x, y;
    register gint y_line;

    // with initialization

    static gint picsize = 0;
    static guchar *dest = NULL;
    register guchar *y_dst, *y_src;
    register gint bBlend = myfd.bBlend;
    register gint iThreshold = myfd.iThreshold;
    register gint iEdgeDetect = myfd.iEdgeDetect;


    if (picsize != (width*height)) {
        deinterlacer_init();
        if (!dest) {
            picsize = (gint)(width*height);
            dest = (guchar*)malloc(picsize);
        }
    }
    y_dst = dest;    // dst y pointer
    // we should not change u,v because one u, v value stands for
    // 2 pixels per 2 lines = 4 pixel and we don't want to change
    // the color of
    y_line  = width;
    y_src = psrc;

    iThreshold = (iThreshold * iThreshold)>>2;
    // We don't want an integer overflow in the  interlace calculation.
    if (iEdgeDetect > 180) iEdgeDetect = 180;
        iEdgeDetect = iEdgeDetect * iEdgeDetect;

    y1 = 0;		// Avoid compiler warning. The value is not used.
    for (x=0; x<width; x++) {
        psrc3 = y_src + x;
        y3    = *psrc3;
        psrc2 = psrc3 + y_line;
        y2 = *psrc2;
        pdst1 = y_dst + x;
        iInterlaceValue1 = iInterlaceValue2 = 0;
        for (y=0; y<=height; y++) {
            psrc1 = psrc2;
            psrc2 = psrc3;
            psrc3 = psrc3 + y_line;
            y0 = y1;
            y1 = y2;
            y2 = y3;
            if (y < height - 1) {
                y3 = *psrc3;
            } else {
                y3 = y1;
            }

            iInterlaceValue0 = iInterlaceValue1;
            iInterlaceValue1 = iInterlaceValue2;

            if (y < height)
                iInterlaceValue2 = ((y1-y2)*(y3-y2)-((iEdgeDetect*(y1-y3)*(y1-y3))>>12))*10;
            else
                iInterlaceValue2 = 0;

            if (y>0) {			
                if (iInterlaceValue0 + iInterlaceValue1>1 + iInterlaceValue2 > iThreshold) {
                    if (bBlend) { 
                        *pdst1 = (guchar)((y0 + y1>1 + y2) >> 2);
                    } else {
                        // this method seems to work better than blending if the
                        // quality is pretty bad and the half pics don't fit together
                        if ((y % 2)==1) {  // if odd simply copy the value
                            *pdst1 = *psrc1;
                        } else { // even interpolate the even line (upper + lower)/2
                            *pdst1 = (guchar)((y0 + y2) >> 1);
                        }
                    } 
                } else {
                    // so we went below the treshold and therefore we don't have to 
                    // change anything
                    if (bShowDeinterlacedAreaOnly) {
                        // this is for testing to see how we should tune the treshhold
                        // and shows as the things that haven't change because the 
                        // threshhold was to low?? (or shows that everything is ok :-)
                        *pdst1 = 0; // blank the point and so the interlac area
                    } else {
                        *pdst1 = *psrc1;
                    }
                }
	        pdst1 = pdst1 + y_line;
            }
	}
    }
#endif

    return;
}

