// This file is part of the pdr/pdx project.
// Copyright (C) 2010 Torsten Mueller, Bern, Switzerland
//
// This program is free software: you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation, either version 2 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.

#include "../libpdrx/common.h"

using namespace std;
using namespace boost;
using namespace boost::posix_time;
using namespace boost::gregorian;
using namespace boost::program_options;

#include "../libpdrx/datatypes.h"
#include "../libpdrx/config.h"
#include "../libpdrx/encoding.h"
#include "db.h"
#include "in_impl.h"

//=== CSVFile ==============================================================
CSVFile::CSVFile (const string& option_key)
	: FileInputImpl(option_key, "csv")
{
}

	typedef vector<string> Collections;

	// this function parses a control line and does three things:
	// - extract the collection names and put them into a vector
	// - build an regular expression to parse whole data lines
	// - find the index of the datetime in the data lines
	static void evaluate_control_line (const string& line, Collections& collections, string& expr, size_t& datetime_index)
	{
		collections.clear();
		expr.clear();

		string s(line);

		while (!s.empty() && isspace(s[0]))
			s.erase(0, 1);

		while (!s.empty())
		{
			string::size_type p1 = s.find_first_of(",;\t");
			if (p1 != string::npos)
			{
				string c(s, 0, p1);
				trim(c);
				s.erase(0, p1);
				if (!c.empty())
				{
					collections.push_back(c);
					expr += string("((?:[^") + s[0] + "]*)|(?:\".*\"))";
				}
				else
					expr += ".*";
				expr += string("[") + s[0] + ']'; // the concrete separator
				s.erase(0, 1);
				while (!s.empty() && isspace(s[0]))
					s.erase(0, 1);
			}
			else
			{
				string c(s, 0, p1);
				trim(c);
				s.erase(0, p1);
				if (!c.empty())
				{
					collections.push_back(c);
					expr += "(.*)";
				}
				else
					expr += ".*";
			}
		}

		for (size_t i = 0; datetime_index == (size_t)-1 && i < collections.size(); i++)
		{
			if (collections[i] == "datetime")
				datetime_index = i;
		}
		if (datetime_index == (size_t)-1)
			throw Xception("missing datetime column");
	}

	// this function parses a data line using a given regular expression,
	// the result is a vector of CollectionElements that can be used in
	// a database insert transaction
	static void evaluate_data_line (const string& line, const string& expr, Collections& collections, size_t datetime_index, Database::CollectionElements& elements, Database& database)
	{
		// match the whole line against the built regex
		regex rx_line(expr);
		smatch mr;
		if (!regex_match(line, mr, rx_line))
			throw Xception("line format error");

		// extract the timestamp
		ptime timestamp(not_a_date_time);
		string s(mr[datetime_index + 1]);
		trim(s);
		try
		{
			if (s.find('T') != string::npos)
				timestamp = from_iso_string(s);
			else
				timestamp = lexical_cast<ptime>(s);
		}
		catch (bad_lexical_cast)
		{
			throw Xception(format("value error, %s is not a datetime") % s);
		}

		// extract the data values for the declared collections
		for (size_t i = 0; i < collections.size(); i++)
		{
			if (i == datetime_index)
				continue;

			// s is a data value as text
			s = mr[i + 1];
			trim(s);
			if (s[0] == '"' && s[s.length() - 1] == '"')
			{
				s.erase(0, 1);
				s.erase(s.length() - 1, 1);
			}
			if (s.empty())
				continue;

			// now we make an any from s according to the type
			// of the collection in the database
			any a;
			switch (database.GetCollectionType(collections[i]))
			{
				case 'n':
				{
					try
					{
						a = lexical_cast<double>(s);
					}
					catch (bad_lexical_cast)
					{
						throw Xception(format("value error, %s is not a double") % s);
					}
					break;
				}
				case 'r':
				{
					try
					{
						a = lexical_cast<Ratio>(s);
					}
					catch (bad_lexical_cast)
					{
						throw Xception(format("value error, %s is not a Ratio") % s);
					}
					break;
				}
				default:
				{
					a = s;
					break;
				}
			}

			// everything went ok, put a CollectionElement into
			// the vector
			elements.push_back(Database::CollectionElement(collections[i], timestamp, a));
		}
	}

void CSVFile::ProcessFile (const Config& config, Database& database, ifstream& ifs, Database::CollectionElements& elements) const throw (Xception)
{
	Collections collections;
	string expr;
	size_t datetime_index = (size_t)-1;
	bool had_ctrl_line = false;

	// get configuration data
	Poco::TextEncoding* pEncoding = NULL;
	if (m_option_key.empty())
	{
		const string& ctrl_line = config.GetStringOption("ctrl_line");
		if (!ctrl_line.empty())
		{
			evaluate_control_line(ctrl_line, collections, expr, datetime_index);
			had_ctrl_line = true;
		}
		pEncoding = &GetEncoding(config.GetStringOption("encoding"));
	}
	else
		pEncoding = &GetEncoding(config.GetStringOption(m_option_key + ".encoding"));

	// parse line by line
	string line;
	while (getline(ifs, line).good())
	{
		trim(line);
		line = ConvertFrom(line, *pEncoding);

		// ignore an empty line
		if (line.empty())
			continue;

		// check for a control line
		if (!had_ctrl_line)
		{
			static const regex rx_ctrl("^(#[ \t]*pdr[ \t]+).*$");
			smatch mr;
			if (regex_match(line, mr, rx_ctrl))
			{
				line.erase(0, mr[1].length());
				evaluate_control_line(line, collections, expr, datetime_index);
				continue;
			}
		}

		// ignore a comment line
		if (line[0] == '#')
			continue;

		// otherwise evaluate a data line
		if (!expr.empty())
			evaluate_data_line(line, expr, collections, datetime_index, elements, database);
	}
}
