// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
// Mobius Forensic Toolkit
// Copyright (C) 2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020 Eduardo Aguiar
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the
// Free Software Foundation; either version 2, or (at your option) any later
// version.
//
// This program is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
// Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
#include <mobius/charset.h>
#include <mobius/ref_counter.h>
#include <mobius/exception_posix.inc>
#include <unordered_map>
#include <stdexcept>
#include <memory>
#include <iconv.h>

// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
//! \brief auxiliary class to convert from one charset to another
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
class Iconv
{
public:
  Iconv (const Iconv&) = default;
  Iconv (Iconv&&) = default;
  Iconv& operator= (const Iconv&) = default;
  Iconv& operator= (Iconv&&) = default;
  Iconv (const std::string&, const std::string&);
  ~Iconv ();
  mobius::bytearray conv (const mobius::bytearray&) const;

private:
  iconv_t cd_ = (iconv_t) -1;
  mobius::ref_counter counter_;
};

// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
//! \brief constructor
//! \param f_charset from charset
//! \param t_charset to charset
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
Iconv::Iconv (const std::string& f_charset, const std::string& t_charset)
{
  cd_ = iconv_open (t_charset.c_str (), f_charset.c_str ());

  if (cd_ == (iconv_t) -1)
    throw std::invalid_argument (mobius::MOBIUS_EXCEPTION_POSIX);
}

// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
//! \brief destructor. Deallocate iconv structure
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
Iconv::~Iconv ()
{
  if (counter_.is_unique ())
    iconv_close (cd_);
}

// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
//! \brief Convert bytearray from one charset to another
//! \param str input bytearray
//! \return converted bytearray
//! \todo simplify output bytearray creation
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
mobius::bytearray
Iconv::conv (const mobius::bytearray& str) const
{
  size_t insize = str.size ();
  size_t outsize = insize * 6;

  char *inbuf = (char *) str.data ();
  auto outbuf = std::make_unique<char[]> (outsize);
  char *p_out = outbuf.get ();
  ::iconv (cd_, &inbuf, &insize, &p_out, &outsize);

  return mobius::bytearray (reinterpret_cast <const uint8_t *> (outbuf.get ()), p_out - outbuf.get ());
}

// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
//! \brief structure to hold charset conversion objects
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
using iconv_list_t = std::unordered_map <std::string, Iconv>;
static iconv_list_t iconv_list;

namespace mobius
{
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
//! \brief convert a bytearray from "charset" to UTF-8
//! \param data bytearray
//! \param charset charset
//! \return string encoded as UTF-8
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
std::string
conv_charset_to_utf8 (const mobius::bytearray& data, const std::string& charset)
{
  bytearray output = conv_charset (data, charset, "UTF-8");

  // strip trailings '\0'
  auto size = output.size ();
  while (size > 0 && output[size-1] == '\0')
    --size;

  return std::string (reinterpret_cast <const char *>(output.data ()), size);
}

// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
//! \brief convert a bytearray charset to UTF-8
//! \param data bytearray
//! \param f_charset from charset
//! \param t_charset to charset
//! \return new bytearray
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
mobius::bytearray
conv_charset (const mobius::bytearray& data, const std::string& f_charset, const std::string& t_charset)
{
  std::string key = f_charset + '|' + t_charset;

  auto iter = iconv_list.find (key);

  if (iter == iconv_list.end ())
    {
      auto p = iconv_list.emplace (key, Iconv (f_charset, t_charset));
      iter = p.first;
    }

  return iter->second.conv (data);
}

} // namespace mobius
