| ↓ | idna_to_ascii_4i | 37 | 92 | 191 | lib/idna.c | 
| 
int
idna_to_ascii_4i (const uint32_t * in, size_t inlen, char *out, int flags)
{
  size_t len, outlen;
  uint32_t *src;		/* XXX don't need to copy data? */
  int rc;
  /*
   * ToASCII consists of the following steps:
   *
   * 1. If all code points in the sequence are in the ASCII range (0..7F)
   * then skip to step 3.
   */
  {
    size_t i;
    int inasciirange;
    inasciirange = 1;
    for (i = 0; i < inlen; i++)
      if (in[i] > 0x7F)
	inasciirange = 0;
    if (inasciirange)
      {
	src = malloc (sizeof (in[0]) * (inlen + 1));
	if (src == NULL)
	  return IDNA_MALLOC_ERROR;
	memcpy (src, in, sizeof (in[0]) * inlen);
	src[inlen] = 0;
	goto step3;
      }
  }
  /*
   * 2. Perform the steps specified in [NAMEPREP] and fail if there is
   * an error. The AllowUnassigned flag is used in [NAMEPREP].
   */
  {
    char *p;
    p = stringprep_ucs4_to_utf8 (in, (ssize_t) inlen, NULL, NULL);
    if (p == NULL)
      return IDNA_MALLOC_ERROR;
    len = strlen (p);
    do
      {
	char *newp;
	len = 2 * len + 10;	/* XXX better guess? */
	newp = realloc (p, len);
	if (newp == NULL)
	  {
	    free (p);
	    return IDNA_MALLOC_ERROR;
	  }
	p = newp;
	if (flags & IDNA_ALLOW_UNASSIGNED)
	  rc = stringprep_nameprep (p, len);
	else
	  rc = stringprep_nameprep_no_unassigned (p, len);
      }
    while (rc == STRINGPREP_TOO_SMALL_BUFFER);
    if (rc != STRINGPREP_OK)
      {
	free (p);
	return IDNA_STRINGPREP_ERROR;
      }
    src = stringprep_utf8_to_ucs4 (p, -1, NULL);
    free (p);
  }
step3:
  /*
   * 3. If the UseSTD3ASCIIRules flag is set, then perform these checks:
   *
   * (a) Verify the absence of non-LDH ASCII code points; that is,
   * the absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
   *
   * (b) Verify the absence of leading and trailing hyphen-minus;
   * that is, the absence of U+002D at the beginning and end of
   * the sequence.
   */
  if (flags & IDNA_USE_STD3_ASCII_RULES)
    {
      size_t i;
      for (i = 0; src[i]; i++)
	if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F ||
	    (src[i] >= 0x3A && src[i] <= 0x40) ||
	    (src[i] >= 0x5B && src[i] <= 0x60) ||
	    (src[i] >= 0x7B && src[i] <= 0x7F))
	  {
	    free (src);
	    return IDNA_CONTAINS_NON_LDH;
	  }
      if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D))
	{
	  free (src);
	  return IDNA_CONTAINS_MINUS;
	}
    }
  /*
   * 4. If all code points in the sequence are in the ASCII range
   * (0..7F), then skip to step 8.
   */
  {
    size_t i;
    int inasciirange;
    inasciirange = 1;
    for (i = 0; src[i]; i++)
      {
	if (src[i] > 0x7F)
	  inasciirange = 0;
	/* copy string to output buffer if we are about to skip to step8 */
	if (i < 64)
	  out[i] = src[i];
      }
    if (i < 64)
      out[i] = '\0';
    if (inasciirange)
      goto step8;
  }
  /*
   * 5. Verify that the sequence does NOT begin with the ACE prefix.
   *
   */
  {
    size_t i;
    int match;
    match = 1;
    for (i = 0; match && i < strlen (IDNA_ACE_PREFIX); i++)
      if (((uint32_t) IDNA_ACE_PREFIX[i] & 0xFF) != src[i])
	match = 0;
    if (match)
      {
	free (src);
	return IDNA_CONTAINS_ACE_PREFIX;
      }
  }
  /*
   * 6. Encode the sequence using the encoding algorithm in [PUNYCODE]
   * and fail if there is an error.
   */
  for (len = 0; src[len]; len++)
    ;
  src[len] = '\0';
  outlen = 63 - strlen (IDNA_ACE_PREFIX);
  rc = punycode_encode (len, src, NULL,
			&outlen, &out[strlen (IDNA_ACE_PREFIX)]);
  if (rc != PUNYCODE_SUCCESS)
    {
      free (src);
      return IDNA_PUNYCODE_ERROR;
    }
  out[strlen (IDNA_ACE_PREFIX) + outlen] = '\0';
  /*
   * 7. Prepend the ACE prefix.
   */
  memcpy (out, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX));
  /*
   * 8. Verify that the number of code points is in the range 1 to 63
   * inclusive (0 is excluded).
   */
step8:
  free (src);
  if (strlen (out) < 1 || strlen (out) > 63)
    return IDNA_INVALID_LENGTH;
  return IDNA_SUCCESS;
}
 | 
| ↓ | stringprep_4i | 37 | 76 | 137 | lib/stringprep.c | 
| 
int
stringprep_4i (uint32_t * ucs4, size_t * len, size_t maxucs4len,
	       Stringprep_profile_flags flags,
	       const Stringprep_profile * profile)
{
  size_t i, j;
  ssize_t k;
  size_t ucs4len = *len;
  int rc;
  for (i = 0; profile[i].operation; i++)
    {
      switch (profile[i].operation)
	{
	case STRINGPREP_NFKC:
	  {
	    uint32_t *q = 0;
	    if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
	      break;
	    if (flags & STRINGPREP_NO_NFKC && !profile[i].flags)
	      /* Profile requires NFKC, but callee asked for no NFKC. */
	      return STRINGPREP_FLAG_ERROR;
	    q = stringprep_ucs4_nfkc_normalize (ucs4, ucs4len);
	    if (!q)
	      return STRINGPREP_NFKC_FAILED;
	    for (ucs4len = 0; q[ucs4len]; ucs4len++)
	      ;
	    if (ucs4len >= maxucs4len)
	      {
		free (q);
		return STRINGPREP_TOO_SMALL_BUFFER;
	      }
	    memcpy (ucs4, q, ucs4len * sizeof (ucs4[0]));
	    free (q);
	  }
	  break;
	case STRINGPREP_PROHIBIT_TABLE:
	  k = stringprep_find_string_in_table (ucs4, ucs4len,
					       NULL, profile[i].table);
	  if (k != -1)
	    return STRINGPREP_CONTAINS_PROHIBITED;
	  break;
	case STRINGPREP_UNASSIGNED_TABLE:
	  if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
	    break;
	  if (flags & STRINGPREP_NO_UNASSIGNED)
	    {
	      k = stringprep_find_string_in_table
		(ucs4, ucs4len, NULL, profile[i].table);
	      if (k != -1)
		return STRINGPREP_CONTAINS_UNASSIGNED;
	    }
	  break;
	case STRINGPREP_MAP_TABLE:
	  if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
	    break;
	  rc = stringprep_apply_table_to_string
	    (ucs4, &ucs4len, maxucs4len, profile[i].table);
	  if (rc != STRINGPREP_OK)
	    return rc;
	  break;
	case STRINGPREP_BIDI_PROHIBIT_TABLE:
	case STRINGPREP_BIDI_RAL_TABLE:
	case STRINGPREP_BIDI_L_TABLE:
	  break;
	case STRINGPREP_BIDI:
	  {
	    int done_prohibited = 0;
	    int done_ral = 0;
	    int done_l = 0;
	    size_t contains_ral = SIZE_MAX;
	    size_t contains_l = SIZE_MAX;
	    for (j = 0; profile[j].operation; j++)
	      if (profile[j].operation == STRINGPREP_BIDI_PROHIBIT_TABLE)
		{
		  done_prohibited = 1;
		  k = stringprep_find_string_in_table (ucs4, ucs4len,
						       NULL,
						       profile[j].table);
		  if (k != -1)
		    return STRINGPREP_BIDI_CONTAINS_PROHIBITED;
		}
	      else if (profile[j].operation == STRINGPREP_BIDI_RAL_TABLE)
		{
		  done_ral = 1;
		  if (stringprep_find_string_in_table
		      (ucs4, ucs4len, NULL, profile[j].table) != -1)
		    contains_ral = j;
		}
	      else if (profile[j].operation == STRINGPREP_BIDI_L_TABLE)
		{
		  done_l = 1;
		  if (stringprep_find_string_in_table
		      (ucs4, ucs4len, NULL, profile[j].table) != -1)
		    contains_l = j;
		}
	    if (!done_prohibited || !done_ral || !done_l)
	      return STRINGPREP_PROFILE_ERROR;
	    if (contains_ral != SIZE_MAX && contains_l != SIZE_MAX)
	      return STRINGPREP_BIDI_BOTH_L_AND_RAL;
	    if (contains_ral != SIZE_MAX)
	      {
		if (!(stringprep_find_character_in_table
		      (ucs4[0], profile[contains_ral].table) != -1 &&
		      stringprep_find_character_in_table
		      (ucs4[ucs4len - 1], profile[contains_ral].table) != -1))
		  return STRINGPREP_BIDI_LEADTRAIL_NOT_RAL;
	      }
	  }
	  break;
	default:
	  return STRINGPREP_PROFILE_ERROR;
	  break;
	}
    }
  *len = ucs4len;
  return STRINGPREP_OK;
}
 | 
| ↓ | _g_utf8_normalize_wc | 30 | 77 | 135 | lib/nfkc.c | 
| 
static gunichar *
_g_utf8_normalize_wc (const gchar * str, gssize max_len, GNormalizeMode mode)
{
  gsize n_wc;
  gunichar *wc_buffer;
  const char *p;
  gsize last_start;
  gboolean do_compat = (mode == G_NORMALIZE_NFKC || mode == G_NORMALIZE_NFKD);
  gboolean do_compose = (mode == G_NORMALIZE_NFC || mode == G_NORMALIZE_NFKC);
  n_wc = 0;
  p = str;
  while ((max_len < 0 || p < str + max_len) && *p)
    {
      const gchar *decomp;
      gunichar wc = g_utf8_get_char (p);
      if (wc >= 0xac00 && wc <= 0xd7af)
	{
	  gsize result_len;
	  decompose_hangul (wc, NULL, &result_len);
	  n_wc += result_len;
	}
      else
	{
	  decomp = find_decomposition (wc, do_compat);
	  if (decomp)
	    n_wc += g_utf8_strlen (decomp, -1);
	  else
	    n_wc++;
	}
      p = g_utf8_next_char (p);
    }
  wc_buffer = g_new (gunichar, n_wc + 1);
  if (!wc_buffer)
    return NULL;
  last_start = 0;
  n_wc = 0;
  p = str;
  while ((max_len < 0 || p < str + max_len) && *p)
    {
      gunichar wc = g_utf8_get_char (p);
      const gchar *decomp;
      int cc;
      gsize old_n_wc = n_wc;
      if (wc >= 0xac00 && wc <= 0xd7af)
	{
	  gsize result_len;
	  decompose_hangul (wc, wc_buffer + n_wc, &result_len);
	  n_wc += result_len;
	}
      else
	{
	  decomp = find_decomposition (wc, do_compat);
	  if (decomp)
	    {
	      const char *pd;
	      for (pd = decomp; *pd != '\0'; pd = g_utf8_next_char (pd))
		wc_buffer[n_wc++] = g_utf8_get_char (pd);
	    }
	  else
	    wc_buffer[n_wc++] = wc;
	}
      if (n_wc > 0)
	{
	  cc = COMBINING_CLASS (wc_buffer[old_n_wc]);
	  if (cc == 0)
	    {
	      g_unicode_canonical_ordering (wc_buffer + last_start,
					    n_wc - last_start);
	      last_start = old_n_wc;
	    }
	}
      p = g_utf8_next_char (p);
    }
  if (n_wc > 0)
    {
      g_unicode_canonical_ordering (wc_buffer + last_start,
				    n_wc - last_start);
      last_start = n_wc;
    }
  wc_buffer[n_wc] = 0;
  /* All decomposed and reordered */
  if (do_compose && n_wc > 0)
    {
      gsize i, j;
      int last_cc = 0;
      last_start = 0;
      for (i = 0; i < n_wc; i++)
	{
	  int cc = COMBINING_CLASS (wc_buffer[i]);
	  if (i > 0 &&
	      (last_cc == 0 || last_cc != cc) &&
	      combine (wc_buffer[last_start], wc_buffer[i],
		       &wc_buffer[last_start]))
	    {
	      for (j = i + 1; j < n_wc; j++)
		wc_buffer[j - 1] = wc_buffer[j];
	      n_wc--;
	      i--;
	      if (i == last_start)
		last_cc = 0;
	      else
		last_cc = COMBINING_CLASS (wc_buffer[i - 1]);
	      continue;
	    }
	  if (cc == 0)
	    last_start = i;
	  last_cc = cc;
	}
    }
  wc_buffer[n_wc] = 0;
  return wc_buffer;
}
 | 
| ↓ | punycode_encode | 22 | 57 | 111 | lib/punycode.c | 
| 
int
punycode_encode (size_t input_length,
		 const punycode_uint input[],
		 const unsigned char case_flags[],
		 size_t * output_length, char output[])
{
  punycode_uint input_len, n, delta, h, b, bias, j, m, q, k, t;
  size_t out, max_out;
  /* The Punycode spec assumes that the input length is the same type */
  /* of integer as a code point, so we need to convert the size_t to  */
  /* a punycode_uint, which could overflow.                           */
  if (input_length > maxint)
    return punycode_overflow;
  input_len = (punycode_uint) input_length;
  /* Initialize the state: */
  n = initial_n;
  delta = 0;
  out = 0;
  max_out = *output_length;
  bias = initial_bias;
  /* Handle the basic code points: */
  for (j = 0; j < input_len; ++j)
    {
      if (basic (input[j]))
	{
	  if (max_out - out < 2)
	    return punycode_big_output;
	  output[out++] = case_flags ?
	    encode_basic (input[j], case_flags[j]) : (char) input[j];
	}
      /* else if (input[j] < n) return punycode_bad_input; */
      /* (not needed for Punycode with unsigned code points) */
    }
  h = b = (punycode_uint) out;
  /* cannot overflow because out <= input_len <= maxint */
  /* h is the number of code points that have been handled, b is the  */
  /* number of basic code points, and out is the number of ASCII code */
  /* points that have been output.                                    */
  if (b > 0)
    output[out++] = delimiter;
  /* Main encoding loop: */
  while (h < input_len)
    {
      /* All non-basic code points < n have been     */
      /* handled already.  Find the next larger one: */
      for (m = maxint, j = 0; j < input_len; ++j)
	{
	  /* if (basic(input[j])) continue; */
	  /* (not needed for Punycode) */
	  if (input[j] >= n && input[j] < m)
	    m = input[j];
	}
      /* Increase delta enough to advance the decoder's    */
      /*  state to , but guard against overflow: */
      if (m - n > (maxint - delta) / (h + 1))
	return punycode_overflow;
      delta += (m - n) * (h + 1);
      n = m;
      for (j = 0; j < input_len; ++j)
	{
	  /* Punycode does not need to check whether input[j] is basic: */
	  if (input[j] < n /* || basic(input[j]) */ )
	    {
	      if (++delta == 0)
		return punycode_overflow;
	    }
	  if (input[j] == n)
	    {
	      /* Represent delta as a generalized variable-length integer: */
	      for (q = delta, k = base;; k += base)
		{
		  if (out >= max_out)
		    return punycode_big_output;
		  t = k <= bias /* + tmin */ ? tmin :	/* +tmin not needed */
		    k >= bias + tmax ? tmax : k - bias;
		  if (q < t)
		    break;
		  output[out++] = encode_digit (t + (q - t) % (base - t), 0);
		  q = (q - t) / (base - t);
		}
	      output[out++] = encode_digit (q, case_flags && case_flags[j]);
	      bias = adapt (delta, h + 1, h == b);
	      delta = 0;
	      ++h;
	    }
	}
      ++delta, ++n;
    }
  *output_length = out;
  return punycode_success;
}
 | 
| ↓ | punycode_decode | 21 | 59 | 101 | lib/punycode.c | 
| 
int
punycode_decode (size_t input_length,
		 const char input[],
		 size_t * output_length,
		 punycode_uint output[], unsigned char case_flags[])
{
  punycode_uint n, out, i, max_out, bias, oldi, w, k, digit, t;
  size_t b, j, in;
  /* Initialize the state: */
  n = initial_n;
  out = i = 0;
  max_out = *output_length > maxint ? maxint
    : (punycode_uint) * output_length;
  bias = initial_bias;
  /* Handle the basic code points:  Let b be the number of input code */
  /* points before the last delimiter, or 0 if there is none, then    */
  /* copy the first b code points to the output.                      */
  for (b = j = 0; j < input_length; ++j)
    if (delim (input[j]))
      b = j;
  if (b > max_out)
    return punycode_big_output;
  for (j = 0; j < b; ++j)
    {
      if (case_flags)
	case_flags[out] = flagged (input[j]);
      if (!basic (input[j]))
	return punycode_bad_input;
      output[out++] = input[j];
    }
  /* Main decoding loop:  Start just after the last delimiter if any  */
  /* basic code points were copied; start at the beginning otherwise. */
  for (in = b > 0 ? b + 1 : 0; in < input_length; ++out)
    {
      /* in is the index of the next ASCII code point to be consumed, */
      /* and out is the number of code points in the output array.    */
      /* Decode a generalized variable-length integer into delta,  */
      /* which gets added to i.  The overflow checking is easier   */
      /* if we increase i as we go, then subtract off its starting */
      /* value at the end to obtain delta.                         */
      for (oldi = i, w = 1, k = base;; k += base)
	{
	  if (in >= input_length)
	    return punycode_bad_input;
	  digit = decode_digit (input[in++]);
	  if (digit >= base)
	    return punycode_bad_input;
	  if (digit > (maxint - i) / w)
	    return punycode_overflow;
	  i += digit * w;
	  t = k <= bias /* + tmin */ ? tmin :	/* +tmin not needed */
	    k >= bias + tmax ? tmax : k - bias;
	  if (digit < t)
	    break;
	  if (w > maxint / (base - t))
	    return punycode_overflow;
	  w *= (base - t);
	}
      bias = adapt (i - oldi, out + 1, oldi == 0);
      /* i was supposed to wrap around from out+1 to 0,   */
      /* incrementing n each time, so we'll fix that now: */
      if (i / (out + 1) > maxint - n)
	return punycode_overflow;
      n += i / (out + 1);
      i %= (out + 1);
      /* Insert n at position i of the output: */
      /* not needed for Punycode: */
      /* if (basic(n)) return punycode_invalid_input; */
      if (out >= max_out)
	return punycode_big_output;
      if (case_flags)
	{
	  memmove (case_flags + i + 1, case_flags + i, out - i);
	  /* Case of last ASCII code point determines case flag: */
	  case_flags[i] = flagged (input[in - 1]);
	}
      memmove (output + i + 1, output + i, (out - i) * sizeof *output);
      output[i++] = n;
    }
  *output_length = (size_t) out;
  /* cannot overflow because out <= old value of *output_length */
  return punycode_success;
}
 | 
| ↓ | idna_to_ascii_4z | 15 | 45 | 82 | lib/idna.c | 
| 
int
idna_to_ascii_4z (const uint32_t * input, char **output, int flags)
{
  const uint32_t *start = input;
  const uint32_t *end = input;
  char buf[64];
  char *out = NULL;
  int rc;
  /* 1) Whenever dots are used as label separators, the following
     characters MUST be recognized as dots: U+002E (full stop),
     U+3002 (ideographic full stop), U+FF0E (fullwidth full stop),
     U+FF61 (halfwidth ideographic full stop). */
  if (input[0] == 0)
    {
      /* Handle implicit zero-length root label. */
      *output = malloc (1);
      if (!*output)
	return IDNA_MALLOC_ERROR;
      strcpy (*output, "");
      return IDNA_SUCCESS;
    }
  if (DOTP (input[0]) && input[1] == 0)
    {
      /* Handle explicit zero-length root label. */
      *output = malloc (2);
      if (!*output)
	return IDNA_MALLOC_ERROR;
      strcpy (*output, ".");
      return IDNA_SUCCESS;
    }
  *output = NULL;
  do
    {
      end = start;
      for (; *end && !DOTP (*end); end++)
	;
      if (*end == '\0' && start == end)
	{
	  /* Handle explicit zero-length root label. */
	  buf[0] = '\0';
	}
      else
	{
	  rc = idna_to_ascii_4i (start, (size_t) (end - start), buf, flags);
	  if (rc != IDNA_SUCCESS)
	    return rc;
	}
      if (out)
	{
	  char *newp = realloc (out, strlen (out) + 1 + strlen (buf) + 1);
	  if (!newp)
	    {
	      free (out);
	      return IDNA_MALLOC_ERROR;
	    }
	  out = newp;
	  strcat (out, ".");
	  strcat (out, buf);
	}
      else
	{
	  out = (char *) malloc (strlen (buf) + 1);
	  if (!out)
	    return IDNA_MALLOC_ERROR;
	  strcpy (out, buf);
	}
      start = end + 1;
    }
  while (*end);
  *output = out;
  return IDNA_SUCCESS;
}
 | 
| ↓ | g_utf8_to_ucs4_fast | 14 | 50 | 88 | lib/nfkc.c | 
| 
static gunichar *
g_utf8_to_ucs4_fast (const gchar * str, glong len, glong * items_written)
{
  gint j, charlen;
  gunichar *result;
  gint n_chars, i;
  const gchar *p;
  g_return_val_if_fail (str != NULL, NULL);
  p = str;
  n_chars = 0;
  if (len < 0)
    {
      while (*p)
	{
	  p = g_utf8_next_char (p);
	  ++n_chars;
	}
    }
  else
    {
      while (p < str + len && *p)
	{
	  p = g_utf8_next_char (p);
	  ++n_chars;
	}
    }
  result = g_new (gunichar, n_chars + 1);
  if (!result)
    return NULL;
  p = str;
  for (i = 0; i < n_chars; i++)
    {
      gunichar wc = ((const unsigned char *) p)[0];
      if (wc < 0x80)
	{
	  result[i] = wc;
	  p++;
	}
      else
	{
	  if (wc < 0xe0)
	    {
	      charlen = 2;
	      wc &= 0x1f;
	    }
	  else if (wc < 0xf0)
	    {
	      charlen = 3;
	      wc &= 0x0f;
	    }
	  else if (wc < 0xf8)
	    {
	      charlen = 4;
	      wc &= 0x07;
	    }
	  else if (wc < 0xfc)
	    {
	      charlen = 5;
	      wc &= 0x03;
	    }
	  else
	    {
	      charlen = 6;
	      wc &= 0x01;
	    }
	  for (j = 1; j < charlen; j++)
	    {
	      wc <<= 6;
	      wc |= ((const unsigned char *) p)[j] & 0x3f;
	    }
	  result[i] = wc;
	  p += charlen;
	}
    }
  result[i] = 0;
  if (items_written)
    *items_written = i;
  return result;
}
 | 
| ↓ | tld_get_4 | 13 | 24 | 36 | lib/tld.c | 
| 
int
tld_get_4 (const uint32_t * in, size_t inlen, char **out)
{
  const uint32_t *ipos;
  size_t olen;
  *out = NULL;
  if (!in || inlen == 0)
    return TLD_NODATA;
  ipos = &in[inlen - 1];
  olen = 0;
  /* Scan backwards for non(latin)letters. */
  while (ipos >= in && ((*ipos >= 0x41 && *ipos <= 0x5A) ||
			(*ipos >= 0x61 && *ipos <= 0x7A)))
    ipos--, olen++;
  if (olen > 0 && DOTP (*ipos))	/* Found something that appears a TLD. */
    {
      char *out_s = malloc (sizeof (char) * (olen + 1));
      char *opos = out_s;
      if (!opos)
	return TLD_MALLOC_ERROR;
      ipos++;
      /* Transcribe to lowercase ascii string. */
      for (; ipos < &in[inlen]; ipos++, opos++)
	*opos = *ipos > 0x5A ? *ipos : *ipos + 0x20;
      *opos = 0;
      *out = out_s;
      return TLD_SUCCESS;
    }
  return TLD_NO_TLD;
}
 | 
| ↓ | idna_to_unicode_internal | 12 | 47 | 116 | lib/idna.c | 
| 
static int
idna_to_unicode_internal (char *utf8in,
			  uint32_t * out, size_t * outlen, int flags)
{
  int rc;
  char tmpout[64];
  size_t utf8len = strlen (utf8in) + 1;
  size_t addlen = 0;
  /*
   * ToUnicode consists of the following steps:
   *
   * 1. If the sequence contains any code points outside the ASCII range
   * (0..7F) then proceed to step 2, otherwise skip to step 3.
   */
  {
    size_t i;
    int inasciirange;
    inasciirange = 1;
    for (i = 0; utf8in[i]; i++)
      if (utf8in[i] & ~0x7F)
	inasciirange = 0;
    if (inasciirange)
      goto step3;
  }
  /*
   * 2. Perform the steps specified in [NAMEPREP] and fail if there is an
   * error. (If step 3 of ToASCII is also performed here, it will not
   * affect the overall behavior of ToUnicode, but it is not
   * necessary.) The AllowUnassigned flag is used in [NAMEPREP].
   */
  do
    {
      char *newp = realloc (utf8in, utf8len + addlen);
      if (newp == NULL)
	{
	  free (utf8in);
	  return IDNA_MALLOC_ERROR;
	}
      utf8in = newp;
      if (flags & IDNA_ALLOW_UNASSIGNED)
	rc = stringprep_nameprep (utf8in, utf8len + addlen);
      else
	rc = stringprep_nameprep_no_unassigned (utf8in, utf8len + addlen);
      addlen += 1;
    }
  while (rc == STRINGPREP_TOO_SMALL_BUFFER);
  if (rc != STRINGPREP_OK)
    {
      free (utf8in);
      return IDNA_STRINGPREP_ERROR;
    }
  /* 3. Verify that the sequence begins with the ACE prefix, and save a
   * copy of the sequence.
   */
step3:
  if (memcmp (IDNA_ACE_PREFIX, utf8in, strlen (IDNA_ACE_PREFIX)) != 0)
    {
      free (utf8in);
      return IDNA_NO_ACE_PREFIX;
    }
  /* 4. Remove the ACE prefix.
   */
  memmove (utf8in, &utf8in[strlen (IDNA_ACE_PREFIX)],
	   strlen (utf8in) - strlen (IDNA_ACE_PREFIX) + 1);
  /* 5. Decode the sequence using the decoding algorithm in [PUNYCODE]
   * and fail if there is an error. Save a copy of the result of
   * this step.
   */
  (*outlen)--;			/* reserve one for the zero */
  rc = punycode_decode (strlen (utf8in), utf8in, outlen, out, NULL);
  if (rc != PUNYCODE_SUCCESS)
    {
      free (utf8in);
      return IDNA_PUNYCODE_ERROR;
    }
  out[*outlen] = 0;		/* add zero */
  /* 6. Apply ToASCII.
   */
  rc = idna_to_ascii_4i (out, *outlen, tmpout, flags);
  if (rc != IDNA_SUCCESS)
    {
      free (utf8in);
      return rc;
    }
  /* 7. Verify that the result of step 6 matches the saved copy from
   * step 3, using a case-insensitive ASCII comparison.
   */
  if (c_strcasecmp (utf8in, tmpout + strlen (IDNA_ACE_PREFIX)) != 0)
    {
      free (utf8in);
      return IDNA_ROUNDTRIP_VERIFY_ERROR;
    }
  /* 8. Return the saved copy from step 5.
   */
  free (utf8in);
  return IDNA_SUCCESS;
}
 | 
| ↓ | combine | 12 | 21 | 54 | lib/nfkc.c | 
| 
static gboolean
combine (gunichar a, gunichar b, gunichar * result)
{
  gushort index_a, index_b;
  if (combine_hangul (a, b, result))
    return TRUE;
  index_a = COMPOSE_INDEX (a);
  if (index_a >= COMPOSE_FIRST_SINGLE_START && index_a < COMPOSE_SECOND_START)
    {
      if (b == compose_first_single[index_a - COMPOSE_FIRST_SINGLE_START][0])
	{
	  *result =
	    compose_first_single[index_a - COMPOSE_FIRST_SINGLE_START][1];
	  return TRUE;
	}
      else
	return FALSE;
    }
  index_b = COMPOSE_INDEX (b);
  if (index_b >= COMPOSE_SECOND_SINGLE_START)
    {
      if (a ==
	  compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][0])
	{
	  *result =
	    compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][1];
	  return TRUE;
	}
      else
	return FALSE;
    }
  if (index_a >= COMPOSE_FIRST_START && index_a < COMPOSE_FIRST_SINGLE_START
      && index_b >= COMPOSE_SECOND_START
      && index_b < COMPOSE_SECOND_SINGLE_START)
    {
      gunichar res =
	compose_array[index_a - COMPOSE_FIRST_START][index_b -
						     COMPOSE_SECOND_START];
      if (res)
	{
	  *result = res;
	  return TRUE;
	}
    }
  return FALSE;
}
 | 
| ↓ | _tld_checkchar | 11 | 15 | 28 | lib/tld.c | 
| 
static int
_tld_checkchar (uint32_t ch, const Tld_table * tld)
{
  const Tld_table_element *s, *e, *m;
  if (!tld)
    return TLD_SUCCESS;
  /* Check for [-a-z0-9.]. */
  if ((ch >= 0x61 && ch <= 0x7A) ||
      (ch >= 0x30 && ch <= 0x39) || ch == 0x2D || DOTP (ch))
    return TLD_SUCCESS;
  s = tld->valid;
  e = s + tld->nvalid;
  while (s < e)
    {
      m = s + ((e - s) >> 1);
      if (ch < m->start)
	e = m;
      else if (ch > m->end)
	s = m + 1;
      else
	return TLD_SUCCESS;
    }
  return TLD_INVALID;
}
 | 
|  | g_ucs4_to_utf8 | 10 | 28 | 47 | lib/nfkc.c | 
|  | find_decomposition | 10 | 21 | 41 | lib/nfkc.c | 
|  | combine_hangul | 10 | 11 | 23 | lib/nfkc.c | 
|  | stringprep_profile | 9 | 24 | 38 | lib/stringprep.c | 
|  | g_utf8_strlen | 9 | 16 | 37 | lib/nfkc.c | 
|  | g_unichar_to_utf8 | 8 | 28 | 50 | lib/nfkc.c | 
|  | idna_to_unicode_4z4z | 7 | 38 | 62 | lib/idna.c | 
|  | g_unicode_canonical_ordering | 7 | 24 | 36 | lib/nfkc.c | 
|  | decompose_hangul | 7 | 16 | 34 | lib/nfkc.c | 
|  | pr29_4 | 7 | 15 | 27 | lib/pr29.c | 
|  | stringprep_find_character_in_table | 6 | 8 | 21 | lib/stringprep.c | 
|  | stringprep | 6 | 30 | 53 | lib/stringprep.c | 
|  | tld_get_table | 5 | 9 | 14 | lib/tld.c | 
|  | stringprep_locale_charset | 5 | 7 | 17 | lib/toutf8.c | 
|  | stringprep_apply_table_to_string | 5 | 13 | 27 | lib/stringprep.c | 
|  | tld_check_4t | 5 | 13 | 24 | lib/tld.c | 
|  | tld_default_table | 4 | 8 | 16 | lib/tld.c | 
|  | decode_digit | 4 | 4 | 6 | lib/punycode.c | 
|  | tld_check_4 | 4 | 13 | 28 | lib/tld.c | 
|  | idna_to_unicode_44i | 4 | 12 | 24 | lib/idna.c | 
|  | stringprep_find_string_in_table | 4 | 10 | 19 | lib/stringprep.c | 
|  | first_column | 4 | 10 | 12 | lib/pr29.c | 
|  | adapt | 3 | 9 | 16 | lib/punycode.c | 
|  | stringprep_4zi_1 | 3 | 8 | 18 | lib/stringprep.c | 
|  | combinationclass | 3 | 7 | 11 | lib/pr29.c | 
|  | in_last_column_row | 3 | 7 | 11 | lib/pr29.c | 
|  | tld_check_4z | 3 | 6 | 14 | lib/tld.c | 
|  | stringprep_4zi | 3 | 6 | 12 | lib/stringprep.c | 
|  | tld_check_4tz | 3 | 6 | 13 | lib/tld.c | 
|  | tld_get_4z | 3 | 6 | 13 | lib/tld.c | 
|  | stringprep_check_version | 3 | 3 | 8 | lib/version.c | 
|  | tld_get_z | 3 | 14 | 22 | lib/tld.c | 
|  | tld_check_8z | 3 | 11 | 21 | lib/tld.c | 
|  | tld_check_lz | 3 | 10 | 20 | lib/tld.c | 
|  | tld_strerror | 7 | 17 | 40 | lib/strerror-tld.c | 
|  | punycode_strerror | 5 | 13 | 32 | lib/strerror-punycode.c | 
|  | pr29_strerror | 4 | 11 | 28 | lib/strerror-pr29.c | 
|  | idna_to_unicode_8z4z | 2 | 9 | 16 | lib/idna.c | 
|  | idna_to_ascii_8z | 2 | 9 | 18 | lib/idna.c | 
|  | idna_to_unicode_lzlz | 2 | 8 | 15 | lib/idna.c | 
|  | idna_to_unicode_8zlz | 2 | 8 | 15 | lib/idna.c | 
|  | idna_to_unicode_8z8z | 2 | 8 | 15 | lib/idna.c | 
|  | idna_to_ascii_lz | 2 | 8 | 16 | lib/idna.c | 
|  | g_utf8_get_char | 2 | 8 | 14 | lib/nfkc.c | 
|  | pr29_8z | 2 | 8 | 16 | lib/pr29.c | 
|  | stringprep_convert | 2 | 7 | 16 | lib/toutf8.c | 
|  | pr29_4z | 2 | 6 | 10 | lib/pr29.c | 
|  | encode_basic | 2 | 2 | 6 | lib/punycode.c | 
|  | stringprep_strerror | 13 | 29 | 64 | lib/strerror-stringprep.c | 
|  | idna_strerror | 13 | 29 | 64 | lib/strerror-idna.c | 
|  | stringprep_ucs4_nfkc_normalize | 1 | 6 | 12 | lib/nfkc.c | 
|  | g_utf8_normalize | 1 | 5 | 11 | lib/nfkc.c | 
|  | stringprep_ucs4_to_utf8 | 1 | 1 | 7 | lib/nfkc.c | 
|  | encode_digit | 1 | 1 | 7 | lib/punycode.c | 
|  | stringprep_utf8_to_ucs4 | 1 | 1 | 5 | lib/nfkc.c | 
|  | stringprep_unichar_to_utf8 | 1 | 1 | 5 | lib/nfkc.c | 
|  | stringprep_utf8_to_unichar | 1 | 1 | 5 | lib/nfkc.c | 
|  | idn_free | 1 | 1 | 5 | lib/idn-free.c | 
|  | stringprep_utf8_to_locale | 1 | 1 | 5 | lib/toutf8.c | 
|  | stringprep_locale_to_utf8 | 1 | 1 | 5 | lib/toutf8.c |