/*
 * AdabasUtil.c --
 *
 *      This module contains utility procedures.
 *      Mostly these are procedures for converting from one encoding style
 *      into another, but (for 6.2) there is also some unicode stuff.
 *
 * Copyright (c) 1996-1997 Christian Krone. All rights reserved.
 * This program is free software; you can redistribute it and/or
 * modify it under the same terms as Tcl itself.
 * See also licence.terms
 *
 * $Id: AdabasUtil.c,v 1.32 1997/06/30 21:46:05 adabas Exp $
 */


/*
 * Import interface (should move into AdabasTclInt.h someday)
 */

#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <tcl.h>

#include "adabas.h"
#include "sqlrte.h"
#include "AdabasPort.h"
#include "AdabasUtil.h"

/*
 * The following array is used for conversion into a base64 decoded string.
 * The algorithm takes 6 bits (with its value range from 0 to 63) and
 * puts the character at the corresponding index into the result.
 */

#define BASE64  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
#define ISO8859 "ISO8859_1"

/*
 * Local variables, used for unicode conversions.
 */

#if ADABAS_VERSION >= 62
static encodings_ptr encodings;		/* Table of encodings. */
static int2          encodingCnt;	/* Number of entries in encodings. */
static int2          extCodeset;	/* Code for external encoding. */
static int2          ucsCodeset = -1;	/* Code for ucs2 encoding. Also used
					 * as flag, if encoding initialized. */
#endif

/*
 * Prototypes for procedures referenced only in this file.
 */

#if ADABAS_VERSION >= 62
static int initUnicodeEncodings _ANSI_ARGS_((Tcl_Interp *interp));
#endif

/*
 *----------------------------------------------------------------------
 *
 * p2c --
 *
 *      Translates a pascal string (with fixed length) into a C string
 *      (with null termination, right side trimmed of blanks).
 *
 * Results:
 *      The Buffer pointed to by c_out will contain the transformed string.
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------
 */

void
p2c (c_out, p_in, len)
     char *c_out;			/* Resulting null terminated string. */
     char *p_in;			/* Incoming fixed width string. */
     int   len;				/* Length of p_in. */
{
  char *pptr = p_in+len-1;		/* pascal string ptr */
  char *cptr = c_out+len;		/* C      string ptr */
  
  *cptr -- = '\0';

  if (!len) return;
  
  while (pptr != p_in && * pptr == ' ') {
    *cptr-- = '\0';
    pptr--;
  }
  
  if (pptr == p_in && *pptr == ' ') {
    *cptr-- = '\0';
  } else {
    while (pptr != p_in) {
      *cptr-- = *pptr--;
    }
    *cptr = *pptr; /* don't forget last char */
  }
}

/*
 *----------------------------------------------------------------------
 *
 * c2p --
 *
 *      Translates a C string (variable length, null terminated) into a
 *      pascal string (fixed length, filled with blanks).
 *
 * Results:
 *      The Buffer pointed to by p_out will contain the transformed string.
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------
 */

void
c2p (p_out, c_in, len)
     char *p_out;			/* Resulting fixed width string. */
     char *c_in;			/* Incoming null terminated string. */
     int   len;				/* Length of p_out. */
{
  while (len-- > 0) {
    if (*c_in == '\0') {
      *p_out++ = ' ';
    } else {
      *p_out++ = *c_in++;
    }
  }
}

/*
 *----------------------------------------------------------------------
 *
 * scanEncoding --
 *
 *      Inspects the given string and returns the corresponding encoding.
 *
 * Results:
 *      Returns the scanned encoding or encNil. In this last case the
 *      result of interp (if not NULL) will contain an error message.
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------
 */

DataEncoding
scanEncoding (interp, encoding)
     Tcl_Interp *interp;		/* Current interpreter. */
     char       *encoding;		/* String to scan for long encoding. */
{
  int c      = encoding[0];		/* Length (used for option parsing). */
  int length = strlen (encoding);	/* Char   (used for option parsing). */

  switch (c) {
  case 'b':
    if (!strncmp (encoding, "base64", length)) {
      return encBase64;
    }
  case 'e':
    if (!strncmp (encoding, "escape", length)) {
      return encEscape;
    }
  case 'h':
    if (!strncmp (encoding, "hex", length)) {
      return encEscape;
    }
  default:
    if (interp) {
      AppendResult (interp, "invalid encoding \"");
      AppendResult (interp, encoding);
      AppendResult (interp, "\", must be base64, escape or hex");
    }
    return encNil;
  }
}

/*
 *----------------------------------------------------------------------
 *
 * decodeSequence --
 *
 *      Decodes the given string or object and puts the corresponding decoded
 *      string into the result of the interpreter. The decoding depends on
 *      the value of the parameter encoding; this may be:
 *      - encNil:    no decoding at all, the input is just copied;
 *      - encBase64: the encoding, e.g. Tk8.0 uses for its data images;
 *      - encEscape: all non printable characters are decoded as \xxx
 *                   (i.e. in octal with leading backslash).
 *      - encHex:    every character is decoded in two hex characters;
 *      - (others):  nothing is put into the result.
 *
 * Results:
 *      Puts the decoded string into the object outObj (if not NULL),
 *      or into the buffer out points to.
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------
 */

void
decodeSequence (interp, encoding, outObj, out, in, length)
     Tcl_Interp  *interp;		/* Current interpreter. */
     int          encoding;		/* Wanted encoding. */
     Tcl_Obj     *outObj;		/* Resulting string object (or NULL). */
     Tcl_DString *out;			/* Resulting decoded string (or NULL). */
     char        *in;			/* Incoming data. */
     int          length;		/* Length of in. */
{
  char  x64[73];			/* temp buffer to fill (for base64). */
  int   i;				/* Loop index for src (for base64). */
  int   j;				/* Loop index for dest (for base64). */
  char *base64;				/* Translation table (for base64). */
  unsigned char *x;			/* Loop pointer on src. */
  unsigned char *p;			/* Also loop pointer on src. */
  char oneEscape[5];			/* Escaped form of one char (e.g. \123). */
  char oneHex[3];			/* Hex form of one char (e.g. 7F). */

  switch (encoding) {

  case encNil:
    if (outObj) {
#ifdef HAS_TCL_OBJECTS
      Tcl_AppendToObj (outObj, in, length);
#endif
    } else {
      Tcl_DStringAppend (out, in, length);
    }
    return;

  case encBase64:
    j      = 0;
    base64 = BASE64;
    x      = (unsigned char *) in;

    for (i = 0; i < length; i+=3) {
      x64[j++] = base64[                     (x[i  ] >> 2)];
      x64[j++] = base64[((x[i  ]& 3) << 4) + (x[i+1] >> 4)];
      if (i == length-1) break;
      x64[j++] = base64[((x[i+1]&15) << 2) + (x[i+2] >> 6)];
      if (i == length-2) break;
      x64[j++] = base64[ (x[i+2]&63)                       ];

      if (j == sizeof (x64) - 1) {
	x64[j] = 0;
	if (outObj) {
#ifdef HAS_TCL_OBJECTS
	  Tcl_ListObjAppendElement (interp, outObj, Tcl_NewStringObj (x64, -1));
#endif
	} else {
	  Tcl_DStringAppendElement (out, x64);
	}
	j = 0;
      }
    }
    if (j) {
      x64[j] = 0;
      if (outObj) {
#ifdef HAS_TCL_OBJECTS
	Tcl_ListObjAppendElement (interp, outObj, Tcl_NewStringObj (x64, -1));
#endif
      } else {
	Tcl_DStringAppendElement (out, x64);
      }
    }
    return;

  case encEscape:
    x = (unsigned char *) in;
    p = (unsigned char *) in;

    while (length--) {
      if (isprint (*x) && *x != '\\') {
	x++;
      } else {
	if (p != x) {
	  if (outObj) {
#ifdef HAS_TCL_OBJECTS
	    Tcl_AppendToObj (outObj, (char *) p, x-p);
#endif
	  } else {
	    Tcl_DStringAppend (out, (char *) p, x-p);
	  }
	}
	sprintf (oneEscape, "\\%03o", *(x++));
	if (outObj) {
#ifdef HAS_TCL_OBJECTS
	  Tcl_AppendToObj (outObj, oneEscape, 4);
#endif
	} else {
	  Tcl_DStringAppend (out, oneEscape, 4);
	}
	p = x;
      }
    }
    if (p != x) {
      if (outObj) {
#ifdef HAS_TCL_OBJECTS
	Tcl_AppendToObj (outObj, (char *) p, x-p);
#endif
      } else {
	Tcl_DStringAppend (out, (char *) p, x-p);
      }
    }
    return;

  case encHex:
    x = (unsigned char *) in;

    while (length-- > 0) {
      sprintf (oneHex, "%02x", *(x++));
      if (outObj) {
#ifdef HAS_TCL_OBJECTS
	Tcl_AppendToObj (outObj, oneHex, 2);
#endif
      } else {
	Tcl_DStringAppend (out, oneHex, 2);
      }
    }
    return;
  }
}

/*
 *----------------------------------------------------------------------
 *
 * decodeSequenceAppend --
 *
 *      If we append to a base64 encoded string, it may be neccessary to
 *      delete the last 2 or 3 bytes of the old string and to prepend
 *      them with the first decoded 1 or 2 characters of the new string
 *      to form 4 valid bytes in the target.
 *      For all other encodings this function just calls decodeSequence.
 *
 * Results:
 *      Appends the decoded string to the object outObj (if not NULL),
 *      or to the buffer out points to. If an error occurs, TCL_ERROR
 *      is returned, else TCL_OK.
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------
 */

int
decodeSequenceAppend (interp, encoding, appObj, app, in, oldLength, length)
     Tcl_Interp  *interp;		/* Current interpreter. */
     int          encoding;		/* Wanted encoding. */
     Tcl_Obj     *appObj;		/* Resulting object (if with Tcl_Obj). */
     Tcl_DString *app;			/* Resulting decoded string. */
     char        *in;			/* Incoming data. */
     int          oldLength;		/* Start length of appObj (or app). */
     int          length;		/* Length of in. */
{
  int   old64Length;			/* Length of encoded string in app. */
  char *old64Value;			/* Encoded string in app. */
  int   corrCnt;			/* Count of odd chars in app.*/
  char *base64;				/* Translation table (for base64). */
  char *modChars;			/* Characters to replace inline. */
  char  addChars[3];			/* up to 2 characters to add at end. */
  int   addLength;			/* String length of addChars. */
  unsigned char *x;			/* Loop pointer on in. */
  char *oldPos64;			/* Pointer into base64 of old value. */
  int   oldInd;				/* offset of old value. */
#ifdef HAS_TCL_OBJECTS
  Tcl_Obj *lastRowObj;			/* Object representing the last string. */
  int      rowCnt;			/* Number of rows so far. */
#endif
  

  if (encoding == encBase64) {
#ifdef HAS_TCL_OBJECTS
    if (Tcl_ListObjLength (interp, appObj, &rowCnt) == TCL_ERROR
	|| Tcl_ListObjIndex (interp, appObj, rowCnt-1,
			     &lastRowObj) == TCL_ERROR) {
      SetResult (interp, "??? dsa: Invalid list object");
      return TCL_ERROR;
    }
    old64Value = Tcl_GetStringFromObj (lastRowObj, &old64Length);
#else
    old64Value  = Tcl_DStringValue  (app);
    old64Length = Tcl_DStringLength (app);
#endif

    if ((corrCnt = oldLength%3)) {
      base64 = BASE64;
      x      = (unsigned char *) in;

      modChars = old64Value + (old64Length - 1);
      if (!(oldPos64 = strchr (base64, *modChars))) {
	SetResult (interp, "??? dsa: Invalid base64 string");
	return TCL_ERROR;
      }
      oldInd = oldPos64 - base64;

      if (corrCnt == 1 && length >= 2) {
	modChars[0] = base64[ oldInd          + (x[0] >> 4)];
	addChars[0] = base64[((x[0]&15) << 2) + (x[1] >> 6)];
	addChars[1] = base64[ (x[1]&63)                    ];
	addChars[2] = 0;
      } else if (corrCnt == 1) {
	modChars[0] = base64[ oldInd          +  (x[0] >> 4)];
	addChars[0] = base64[((x[0]&15) << 2)               ];
	addChars[1] = 0;
      } else {
	modChars[0] = base64[ oldInd          +  (x[0] >> 6)];
	addChars[0] = base64[ (x[0]&63)                     ];
	addChars[1] = 0;
      }
      addLength = strlen (addChars);
      in       += addLength;
      length   -= addLength;

#ifdef HAS_TCL_OBJECTS
      Tcl_AppendToObj (lastRowObj, addChars, addLength);
#else
      Tcl_DStringAppend (app, addChars, addLength);
#endif
    }
  }

  decodeSequence (interp, encoding, appObj, app, in, length);
  return TCL_OK;
}

/*
 *----------------------------------------------------------------------
 *
 * encodeSequence --
 *
 *      undocumented.
 *
 * Results:
 *      Returns the length of the encoded string, which is stored in the
 *      buffer, out points to, or -1, if an error occured. In this last
 *      case the result of interp (if not NULL) will contain an error message.
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------
 */

/* -------------------------------------------------------------- */

int4
encodeSequence (interp, encoding, what, out, in, inPos, length)
     Tcl_Interp  *interp;		/* Interpreter for errors (or NULL). */
     int          encoding;		/* Encoding, data is decoded. */
     char        *what;			/* specification (for errormsg). */
     Tcl_DString *out;			/* Resulting encoded string. */
     char        *in;			/* incoming decoded string. */
     int          inPos;		/* Start position (counts from 1). */
     int          length;		/* length of in. */
{
  char *p;				/* Loop pointer on in. */
  char *old;				/* Another pointer into in. */
  int   count;				/* How many chars are encoded? */
  int4  resLength;			/* length of out (value to return). */
  char  c;				/* encoded character. */
  int   numb;				/* Intermediate number (for hex). */
  unsigned char u;			/* For type coercion (for hex). */

  Tcl_DStringInit (out);

  switch (encoding) {

  case encNil:
    Tcl_DStringAppend (out, in+inPos-1, length);
    return length;

  case encBase64:
    Tcl_DStringAppend (out, "base64 not yet implemented", 26);
    return 26;

  case encEscape:
    p         = in;
    resLength = 0;

    /* First find the start position. This can't be done by pointer
     * arithmetic, since all the special symbols are escaped yet.
     */

    while (*p != 0 && inPos-- > 1) {
      if (*p == '\\') {
	c  = Tcl_Backslash (p, &count);
	p += count;
      } else {
	p++;
      }
    }

    old = p;
    while (*p != 0 && (!length || resLength < length)) {
      if (*p == '\\') {
	if (p != old) {
	  Tcl_DStringAppend (out, old, p-old);
	}
	c   = Tcl_Backslash (p, &count);
	Tcl_DStringAppend (out, &c, 1);
	resLength++;
	p  += count;
	old = p;
      } else {
	p++; resLength++;
      }
    }
    if (p != old) {
      Tcl_DStringAppend(out, old, p-old);    
    }
    return resLength;

  case encHex:
    resLength = (length /= 2);
    p         = in;

    while (length-- > 0) {
      if (sscanf (p, "%2x", &numb) != 1) {
	if (interp) {
	  AppendResult (interp, "Expected ");
	  AppendResult (interp, what);
	  AppendResult (interp, ", but got \"");
	  AppendResult (interp, in);
	  AppendResult (interp, "\"");
	}
	return -1;
      }
      u = (unsigned char) numb;
      Tcl_DStringAppend (out, (char *) &u, 1);
      p += 2;
    }
    return resLength;

  default:
    if (interp) {
      SetResult (interp, "Invalid encoding ???");
    }
    return -1;
  }
}

/*
 *----------------------------------------------------------------------
 *
 * AdabasFromToUnicode --
 *
 *      undocumented.
 *      Note, that this function is only needed for 6.2 or above.
 *
 * Results:
 *      Returns a valid Tcl result. In case of an error the
 *      result of interp (if not NULL) will contain an error message.
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------
 */

#if ADABAS_VERSION >= 62

int
AdabasFromToUnicode (interp, src, srcLen, dest, destLen, trunc, dir)
     Tcl_Interp  *interp;		/* Interpreter for errors (or NULL). */
     char        *src;			/* internal encoded (unicode) string. */
     int4         srcLen;		/* length of src (in bytes). */
     char        *dest;			/* external encoded (e.g. ascii) string. */
     int4        *destLen;		/* in:  size of buffer, dest points to;
					 * out: length of dest (in bytes). */
     int         *trunc;		/* out: dest too short? */
     int          dir;			/* direction (FROM_UNICODE/TO_UNICODE). */
{
  uni_error   rc;			/* Returncode of uni_trans(). */
  int4        errCharNo;		/* Error position (here ignored). */
  c40         msg;			/* Pascal error text. */
  char        c_msg[41];		/* C style error text. */
  int         srcCodeset;		/* Encoding of source data. */
  int         destCodeset;		/* Encoding of destination data. */
  int         srcEqDest;		/* Are src and data identical? */
  Tcl_DString copy;			/* Copy of src, if srcEqDest. */

  if (ucsCodeset < 0 && initUnicodeEncodings (interp) == TCL_ERROR) {
    return TCL_ERROR;
  }
  switch (dir) {
  case FROM_UNICODE:
    srcCodeset  = ucsCodeset;
    destCodeset = extCodeset;
    break;
  case TO_UNICODE:
    srcCodeset  = extCodeset;
    destCodeset = ucsCodeset;
    break;
  default:
    if (interp) {
      SetResult (interp, "Unknwon direction???");
    }
    return TCL_ERROR;
  }

  if (srcEqDest = (src == dest)) {
    Tcl_DStringInit (&copy);
    Tcl_DStringAppend (&copy, src, srcLen);
    src = Tcl_DStringValue (&copy);
  }
  s80uni_trans (encodings, encodingCnt,
		src,  srcLen,  srcCodeset,
		dest, destLen, destCodeset,
		0, &rc, &errCharNo);
  if (srcEqDest) {
    Tcl_DStringFree (&copy);
  }
  if (*trunc = (rc == UNI_DEST_TOO_SHORT)) {
    rc = UNI_OK;
  }
  if (rc != UNI_OK) {
    if (interp) {
      s80uni_error (rc, msg);
      p2c (c_msg, msg, sizeof (msg));
      Tcl_ResetResult (interp);
      AppendResult (interp, c_msg);
    }
    return TCL_ERROR;
  }
  return TCL_OK;
}

#endif /* ADABAS_VERSION >= 62 */

/*
 *----------------------------------------------------------------------
 *
 * initUnicodeEncodings --
 *
 *      Initializes the unicode encoding tables dependend of the value
 *      of the environment variable DBLANG.
 *      Note, that this function is only needed for 6.2 or above.
 *
 * Results:
 *      Normally returns TCL_OK. If an error occured, TCL_ERROR will be
 *      returned; In this case the result of interp (if not NULL) will
 *      contain an error message.
 *
 * Side effects:
 *      The unicode translation tables are loaded and can be used. There
 *      are encodings (e.g. iso8859_1), where no table is needed, so that
 *      this function will return TCL_OK, but encodingsCnt is still 0.
 *
 *----------------------------------------------------------------------
 */

#if ADABAS_VERSION >= 62

static int
initUnicodeEncodings (interp)
     Tcl_Interp  *interp;		/* Interpreter for errors (or NULL). */
{
  uni_load_error loadRc;		/* Returncode of uni_fload(). */
  c40            msg;			/* Pascal error text. */
  char           c_msg[41];		/* C style error text. */
  char           iso8859[IDENTIFIER+1];	/* Default encoding if DBLANG isn't
					 * set (as pascal string). */
  int2  swapIt   = 1;			/* Is the machine big endian? */
  char *pointIt  = (char *) &swapIt;	/* Pointer to test the byte sex. */

  ucsCodeset = pointIt[0] ? UNICODE_SWAP : UNICODE;
    sprintf (iso8859, "%-*s", IDENTIFIER, ISO8859);

  s82uni_fload (&encodings, &encodingCnt, 1,
		iso8859, &extCodeset, &loadRc);
  if (loadRc == UNI_NO_DBLANG_FOUND) {
    s82uni_fload (&encodings, &encodingCnt, 0,
		  iso8859, &extCodeset, &loadRc);
  }
  if (loadRc != UNI_LOAD_OK) {
    if (interp) {
      s82uni_load_error (loadRc, msg);
      p2c (c_msg, msg, sizeof (msg));
      Tcl_ResetResult (interp);
      AppendResult (interp, c_msg);
    }
    return TCL_ERROR;
  }
  return TCL_OK;
}
#endif /* ADABAS_VERSION >= 62 */
