/*
 * Copyright (C) 2000-2025 the xine project
 *
 * This file is part of xine, a unix video player.
 *
 * xine is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * xine is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA
 *
 */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <string.h>
#include <iconv.h>

#ifdef HAVE_LANGINFO_CODESET
#include <langinfo.h>
#endif

#include <assert.h>
#include <pthread.h>

#include "_xitk.h"
#include "recode.h"

struct xitk_recode_s {
  iconv_t iconv_handle;
  int use_mutex;
  pthread_mutex_t mutex;
};

typedef union {
  char s[12];
  uint32_t w[3];
} _xitk_s4_t;

static const uint8_t _xitk_stab[256] = {
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
};

static void _xitk_shorten_enc (_xitk_s4_t *d, const char *s) {
  char *q = d->s, *e = q + sizeof (*d);
  uint32_t u;

  memset (d, 0, sizeof (*d));
  /* "ISO-8859-15" -> "iso885915". */
  do {
    *q = *s;
    q += _xitk_stab[*(const uint8_t *)s];
    s++;
  } while (s[-1] && (q < e));
  for (u = 0; u < sizeof (d->w) / sizeof (d->w[0]); u++)
    d->w[u] |= (d->w[u] & 0x40404040) >> 1;
}

static const char _xitk_enc[][12] = {
  /* syncronized with $ iconv --list. */
  /*  0 */ "",
  /*  1 */ "ISO-8859-1",
  /*  2 */ "ISO-8859-6",
  /*  3 */ "UTF-8",
  /*  4 */ "CP1251",
  /*  5 */ "ISO-8859-2",
  /*  6 */ "ISO-8859-15",
  /*  7 */ "ISO-8859-14",
  /*  8 */ "ISO-8859-7",
  /*  9 */ "ISO-8859-8",
  /* 10 */ "EUC-JP",
  /* 11 */ "UJIS",
  /* 12 */ "EUC",
  /* 13 */ "GEORGIAN-PS",
  /* 14 */ "EUC-KR",
  /* 15 */ "ISO-8859-13",
  /* 16 */ "ISO-8859-5",
  /* 17 */ "ISO-8859-3",
  /* 18 */ "KOI8-R",
  /* 19 */ "KOI8-U",
  /* 20 */ "KOI8-T",
  /* 21 */ "TIS-620",
  /* 22 */ "ISO-8859-9",
  /* 23 */ "TCVN",
  /* 24 */ "CP1255",
  /* 25 */ "GB18030",
  /* 26 */ "GB2312",
  /* 27 */ "GBK",
  /* 28 */ "BIG5-HKSCS",
  /* 29 */ "BIG5",
  /* 30 */ "EUC-TW",
};

static const struct { char lang[6]; uint8_t indx[4]; } _xitk_langs[] = {
  { "af_ZA", {  1 }},

  { "ar_AE", {  2 }}, { "ar_BH", {  2 }}, { "ar_DZ", {  2 }}, { "ar_EG", {  2 }},
  { "ar_IN", {  3 }}, { "ar_IQ", {  2 }}, { "ar_JO", {  2 }}, { "ar_KW", {  2 }},
  { "ar_LB", {  2 }}, { "ar_LY", {  2 }}, { "ar_MA", {  2 }}, { "ar_OM", {  2 }},
  { "ar_QA", {  2 }}, { "ar_SA", {  2 }}, { "ar_SD", {  2 }}, { "ar_SY", {  2 }},
  { "ar_TN", {  2 }}, { "ar_YE", {  2 }},

  { "be_BY", {  4 }},
  { "bg_BG", {  4 }},
  { "br_FR", {  1 }},
  { "bs_BA", {  5 }},

  { "ca_ES", {  1, 6 }}, { "cs_CZ", {  5 }},

  { "cy_GB", {  7 }},
  { "da_DK", {  1 }},

  { "de_AT", {  1, 6 }}, { "de_BE", {  1, 6 }},
  { "de_CH", {  1 }},    { "de_DE", {  1, 6 }}, { "de_LU", {  1, 5 }},

  { "el_GR", {  8 }},

  { "en_AU", {  1 }}, { "en_BW", {  1 }}, { "en_CA", {  1 }},    { "en_DK", {  1 }},
  { "en_GB", {  1 }}, { "en_HK", {  1 }}, { "en_IE", {  1, 6 }}, { "en_IN", {  3 }},
  { "en_NZ", {  1 }}, { "en_PH", {  1 }}, { "en_SG", {  1 }},    { "en_US", {  1 }},
  { "en_ZA", {  1 }}, { "en_ZW", {  1 }},

  { "es_AR", {  1 }}, { "es_BO", {  1 }}, { "es_CL", {  1 }}, { "es_CO", {  1 }},
  { "es_CR", {  1 }}, { "es_DO", {  1 }}, { "es_EC", {  1 }}, { "es_ES", {  1, 6 }},
  { "es_GT", {  1 }}, { "es_HN", {  1 }}, { "es_MX", {  1 }}, { "es_NI", {  1 }},
  { "es_PA", {  1 }}, { "es_PE", {  1 }}, { "es_PR", {  1 }}, { "es_PY", {  1 }},
  { "es_SV", {  1 }}, { "es_US", {  1 }}, { "es_UY", {  1 }}, { "es_VE", {  1 }},

  { "et_EE", {  1 }},

  { "eu_ES", {  1, 6 }},

  { "fa_IR", {  3 }},

  { "fi_FI", {  1, 6 }},

  { "fo_FO", {  1 }},

  { "fr_BE", {  1, 6 }}, { "fr_CA", {  1 }}, { "fr_CH", {  1 }}, { "fr_FR", {  1, 6 }},
  { "fr_LU", {  1, 6 }},

  { "ga_IE", {  1, 6 }},

  { "gl_ES", {  1, 6 }},

  { "gv_GB", {  1 }},
  { "he_IL", {  9 }},
  { "hi_IN", {  3 }},
  { "hr_HR", {  5 }},
  { "hu_HU", {  5 }},
  { "id_ID", {  1 }},
  { "is_IS", {  1 }},

  { "it_CH", {  1 }}, { "it_IT", {  1, 6 }},

  { "iw_IL", {  9 }},

  { "ja_JP", {  3, 10, 11 }}, { "japan", { 12 }},

  { "ka_GE", { 13 }},
  { "kl_GL", {  1 }},

  { "ko_KR", { 14, 3 }}, { "korea", { 12 }},

  { "kw_GB", {  1 }},
  { "lt_LT", { 15 }},
  { "lv_LV", { 15 }},
  { "mi_NZ", { 15 }},
  { "mk_MK", { 16 }},
  { "mr_IN", {  3 }},
  { "ms_MY", {  1 }},
  { "mt_MT", { 17 }},
  { "nb_NO", {  1 }},

  { "nl_BE", {  1, 6 }}, { "nl_NL", {  1, 6 }},

  { "nn_NO", {  1 }},
  { "no_NO", {  1 }},
  { "oc_FR", {  1 }},
  { "pl_PL", {  5 }},

  { "pt_BR", {  1 }}, { "pt_PT", {  1, 6 }},

  { "ro_RO", {  5 }},

  { "ru_RU", { 16, 18 }}, { "ru_UA", { 19 }},

  { "se_NO", {  3 }},
  { "sk_SK", {  5 }},
  { "sl_SI", {  5 }},
  { "sq_AL", {  1 }},

  { "sr_YU", {  5, 16 }},

  { "sv_FI", {  1, 6 }}, { "sv_SE", {  1 }},

  { "ta_IN", {  3 }},
  { "te_IN", {  3 }},
  { "tg_TJ", { 20 }},
  { "th_TH", { 21 }},
  { "tl_PH", {  1 }},
  { "tr_TR", { 22 }},
  { "uk_UA", { 19 }},
  { "ur_PK", {  3 }},
  { "uz_UZ", {  1 }},

  { "vi_VN", { 23, 3 }},

  { "wa_BE", {  1, 6 }},

  { "yi_US", { 24 }},

  { "zh_CN", { 25, 26, 27 }}, { "zh_HK", { 28 }},
  { "zh_TW", { 29, 30 }},

  { "", { 0 }}
};

static const uint8_t *_get_first_lang_locale (const char *lcal) {
  size_t l;
  unsigned int b = 0, e = sizeof (_xitk_langs) / sizeof (_xitk_langs[0]) - 1, f = e;

  l = xitk_find_byte (lcal, 0);
  if (!l)
    return _xitk_langs[f].indx;
  if (l > 5)
    l = 5;
  do {
    unsigned int m = (b + e) >> 1;
    int d = strncmp (lcal, _xitk_langs[m].lang, l);
    if (d <= 0) {
      if (d == 0)
        f = m;
      e = m;
    } else {
      b = m + 1;
    }
  } while (b != e);
  return _xitk_langs[f].indx;
}

static const char *xitk_get_system_encoding (char *dest, size_t dlen) {
  const char *rv, *start;
  const uint8_t *indx;

#ifdef HAVE_LANGINFO_CODESET
  rv = nl_langinfo (CODESET);
  if (rv && !strstr (rv, "ANSI")) {
    strlcpy (dest, rv, dlen);
    return dest;
  }
#endif

  /*
   * guess locale codeset according to shell variables
   * when nl_langinfo(CODESET) isn't available or working
   */
  rv = getenv ("LC_ALL");
  if (!rv)
    rv = getenv ("LC_MESSAGES");
  if (!rv)
    rv = getenv ("LANG");
  if (!rv)
    return NULL;

  start = strchr (rv, '.');
  if (start) {
    size_t l = xitk_find_0_or_byte (++start, '@');
    if (l > dlen - 1)
      l = dlen - 1;
    if (l > 1) {
      memcpy (dest, start, l);
      dest[l] = 0;
      return dest;
    }
  }

  for (indx = _get_first_lang_locale (rv); *indx; indx++) {
    iconv_t ih = iconv_open (_xitk_enc[*indx], "ASCII");
    if (ih == (iconv_t)-1)
      continue;
    iconv_close (ih);
    return _xitk_enc[*indx];
  }
  return NULL;
}

xitk_recode_t *xitk_recode_init (xitk_t *xitk, const char *src_encoding, const char *dst_encoding, int threadsafe) {
  int verbose = xitk ? xitk->verbosity : 2;
  xitk_recode_t *xrt = NULL;
  _xitk_s4_t s41, s42;
  char buf[80];

#if 0 /** << TEST. */
  {
    uint32_t u;
    for (u = 0; u < sizeof (_xitk_enc) / sizeof (_xitk_enc[0]); u++) {
      _xitk_shorten_enc (&s41, _xitk_enc[u]);
      printf ("  %s -> %s\n", _xitk_enc[u], s41.s);
    }
  }
#endif

  if (verbose >= 2)
    printf ("xitk.recode.init (%s, %s).\n", src_encoding, dst_encoding);

  if (!src_encoding || !dst_encoding) {
    const char *se = xitk_get_system_encoding (buf, sizeof (buf));
    if (!src_encoding)
      src_encoding = se;
    if (!dst_encoding)
      dst_encoding = se;
  }

  _xitk_shorten_enc (&s41, src_encoding);
  _xitk_shorten_enc (&s42, dst_encoding);
  if (memcmp (&s41, &s42, sizeof (s41))) {
    iconv_t ih = iconv_open (dst_encoding, src_encoding);
    if (ih == (iconv_t)-1)
      ih = iconv_open (dst_encoding = s42.s, src_encoding = s41.s);
    if (ih != (iconv_t)-1) {
      xrt = malloc (sizeof (*xrt));
      if (xrt) {
        xrt->iconv_handle = ih;
        xrt->use_mutex = threadsafe;
        if (threadsafe)
          pthread_mutex_init (&xrt->mutex, NULL);
      } else {
        iconv_close (ih);
      }
    }
  }

  if (verbose >= 2)
    printf ("xitk.recode.init (%s, %s) = %p.\n", src_encoding, dst_encoding, (void *)xrt);
  return xrt;
}

void xitk_recode_done (xitk_recode_t **xrt) {
  if (xrt) {
    printf ("xitk.recode.done (%p).\n", (void *)*xrt);
    if (*xrt) {
      iconv_close ((*xrt)->iconv_handle);
      if ((*xrt)->use_mutex)
        pthread_mutex_destroy (&(*xrt)->mutex);
      free (*xrt);
      *xrt = NULL;
    }
  }
}

void xitk_recode2_do (xitk_recode_t *xrt, xitk_recode_string_t *s) {
  if (!s)
    return;
  do {
    size_t slen, dsize;
    char *r, *w;

    if (!xrt || !s->src || !s->ssize)
      break;

    slen = s->ssize;
    dsize = 2 * slen;
    if (s->buf && (dsize <= s->bsize)) {
      s->res = w = s->buf;
      dsize = s->bsize;
    } else {
      s->res = w = malloc (dsize);
      if (!w)
        break;
    }

    if (xrt->use_mutex)
      pthread_mutex_lock (&xrt->mutex);
    r = (char *)s->src; /** << iconv.inbuf should really be const char **. */
    iconv (xrt->iconv_handle, NULL, &slen, NULL, &dsize);
    while (slen) {
      if (iconv (xrt->iconv_handle, &r, &slen, &w, &dsize) == (size_t)-1)
        break;
    }
    if (xrt->use_mutex)
      pthread_mutex_unlock (&xrt->mutex);
    s->rsize = (const char *)w - s->res;
    return;
  } while (0);

  /* fallback: dest = src. */
  s->res = s->src;
  s->rsize = s->ssize;
}

void xitk_recode2_done (xitk_recode_t *xrt, xitk_recode_string_t *s) {
  (void)xrt;
  if (s) {
    if (s->res && (s->res != s->src) && (s->res != (const char *)s->buf))
      free ((char *)s->res);
    s->res = NULL;
  }
}
