hotspot/src/share/vm/utilities/utf8.cpp
author jrose
Sat, 09 Apr 2011 21:16:12 -0700
changeset 9124 f60dee480d49
parent 8921 14bfe81f2a9d
child 14477 95e66ea71f71
permissions -rw-r--r--
Merge
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
1
489c9b5090e2 Initial load
duke
parents:
diff changeset
     1
/*
8921
14bfe81f2a9d 7010070: Update all 2010 Oracle-changed OpenJDK files to have the proper copyright dates - second pass
trims
parents: 8076
diff changeset
     2
 * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
1
489c9b5090e2 Initial load
duke
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
489c9b5090e2 Initial load
duke
parents:
diff changeset
     4
 *
489c9b5090e2 Initial load
duke
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
489c9b5090e2 Initial load
duke
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
489c9b5090e2 Initial load
duke
parents:
diff changeset
     7
 * published by the Free Software Foundation.
489c9b5090e2 Initial load
duke
parents:
diff changeset
     8
 *
489c9b5090e2 Initial load
duke
parents:
diff changeset
     9
 * This code is distributed in the hope that it will be useful, but WITHOUT
489c9b5090e2 Initial load
duke
parents:
diff changeset
    10
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
489c9b5090e2 Initial load
duke
parents:
diff changeset
    11
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
489c9b5090e2 Initial load
duke
parents:
diff changeset
    12
 * version 2 for more details (a copy is included in the LICENSE file that
489c9b5090e2 Initial load
duke
parents:
diff changeset
    13
 * accompanied this code).
489c9b5090e2 Initial load
duke
parents:
diff changeset
    14
 *
489c9b5090e2 Initial load
duke
parents:
diff changeset
    15
 * You should have received a copy of the GNU General Public License version
489c9b5090e2 Initial load
duke
parents:
diff changeset
    16
 * 2 along with this work; if not, write to the Free Software Foundation,
489c9b5090e2 Initial load
duke
parents:
diff changeset
    17
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
489c9b5090e2 Initial load
duke
parents:
diff changeset
    18
 *
5547
f4b087cbb361 6941466: Oracle rebranding changes for Hotspot repositories
trims
parents: 1
diff changeset
    19
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
f4b087cbb361 6941466: Oracle rebranding changes for Hotspot repositories
trims
parents: 1
diff changeset
    20
 * or visit www.oracle.com if you need additional information or have any
f4b087cbb361 6941466: Oracle rebranding changes for Hotspot repositories
trims
parents: 1
diff changeset
    21
 * questions.
1
489c9b5090e2 Initial load
duke
parents:
diff changeset
    22
 *
489c9b5090e2 Initial load
duke
parents:
diff changeset
    23
 */
489c9b5090e2 Initial load
duke
parents:
diff changeset
    24
7397
5b173b4ca846 6989984: Use standard include model for Hospot
stefank
parents: 5547
diff changeset
    25
#include "precompiled.hpp"
5b173b4ca846 6989984: Use standard include model for Hospot
stefank
parents: 5547
diff changeset
    26
#include "utilities/utf8.hpp"
1
489c9b5090e2 Initial load
duke
parents:
diff changeset
    27
489c9b5090e2 Initial load
duke
parents:
diff changeset
    28
// Assume the utf8 string is in legal form and has been
489c9b5090e2 Initial load
duke
parents:
diff changeset
    29
// checked in the class file parser/format checker.
489c9b5090e2 Initial load
duke
parents:
diff changeset
    30
char* UTF8::next(const char* str, jchar* value) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
    31
  unsigned const char *ptr = (const unsigned char *)str;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    32
  unsigned char ch, ch2, ch3;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    33
  int length = -1;              /* bad length */
489c9b5090e2 Initial load
duke
parents:
diff changeset
    34
  jchar result;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    35
  switch ((ch = ptr[0]) >> 4) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
    36
    default:
489c9b5090e2 Initial load
duke
parents:
diff changeset
    37
    result = ch;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    38
    length = 1;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    39
    break;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    40
489c9b5090e2 Initial load
duke
parents:
diff changeset
    41
  case 0x8: case 0x9: case 0xA: case 0xB: case 0xF:
489c9b5090e2 Initial load
duke
parents:
diff changeset
    42
    /* Shouldn't happen. */
489c9b5090e2 Initial load
duke
parents:
diff changeset
    43
    break;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    44
489c9b5090e2 Initial load
duke
parents:
diff changeset
    45
  case 0xC: case 0xD:
489c9b5090e2 Initial load
duke
parents:
diff changeset
    46
    /* 110xxxxx  10xxxxxx */
489c9b5090e2 Initial load
duke
parents:
diff changeset
    47
    if (((ch2 = ptr[1]) & 0xC0) == 0x80) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
    48
      unsigned char high_five = ch & 0x1F;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    49
      unsigned char low_six = ch2 & 0x3F;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    50
      result = (high_five << 6) + low_six;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    51
      length = 2;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    52
      break;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    53
    }
489c9b5090e2 Initial load
duke
parents:
diff changeset
    54
    break;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    55
489c9b5090e2 Initial load
duke
parents:
diff changeset
    56
  case 0xE:
489c9b5090e2 Initial load
duke
parents:
diff changeset
    57
    /* 1110xxxx 10xxxxxx 10xxxxxx */
489c9b5090e2 Initial load
duke
parents:
diff changeset
    58
    if (((ch2 = ptr[1]) & 0xC0) == 0x80) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
    59
      if (((ch3 = ptr[2]) & 0xC0) == 0x80) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
    60
        unsigned char high_four = ch & 0x0f;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    61
        unsigned char mid_six = ch2 & 0x3f;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    62
        unsigned char low_six = ch3 & 0x3f;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    63
        result = (((high_four << 6) + mid_six) << 6) + low_six;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    64
        length = 3;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    65
      }
489c9b5090e2 Initial load
duke
parents:
diff changeset
    66
    }
489c9b5090e2 Initial load
duke
parents:
diff changeset
    67
    break;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    68
  } /* end of switch */
489c9b5090e2 Initial load
duke
parents:
diff changeset
    69
489c9b5090e2 Initial load
duke
parents:
diff changeset
    70
  if (length <= 0) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
    71
    *value = ptr[0];    /* default bad result; */
489c9b5090e2 Initial load
duke
parents:
diff changeset
    72
    return (char*)(ptr + 1); // make progress somehow
489c9b5090e2 Initial load
duke
parents:
diff changeset
    73
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
    74
489c9b5090e2 Initial load
duke
parents:
diff changeset
    75
  *value = result;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    76
489c9b5090e2 Initial load
duke
parents:
diff changeset
    77
  // The assert is correct but the .class file is wrong
489c9b5090e2 Initial load
duke
parents:
diff changeset
    78
  // assert(UNICODE::utf8_size(result) == length, "checking reverse computation");
489c9b5090e2 Initial load
duke
parents:
diff changeset
    79
  return (char *)(ptr + length);
489c9b5090e2 Initial load
duke
parents:
diff changeset
    80
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
    81
489c9b5090e2 Initial load
duke
parents:
diff changeset
    82
char* UTF8::next_character(const char* str, jint* value) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
    83
  unsigned const char *ptr = (const unsigned char *)str;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    84
  /* See if it's legal supplementary character:
489c9b5090e2 Initial load
duke
parents:
diff changeset
    85
     11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx */
489c9b5090e2 Initial load
duke
parents:
diff changeset
    86
  if (is_supplementary_character(ptr)) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
    87
    *value = get_supplementary_character(ptr);
489c9b5090e2 Initial load
duke
parents:
diff changeset
    88
    return (char *)(ptr + 6);
489c9b5090e2 Initial load
duke
parents:
diff changeset
    89
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
    90
  jchar result;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    91
  char* next_ch = next(str, &result);
489c9b5090e2 Initial load
duke
parents:
diff changeset
    92
  *value = result;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    93
  return next_ch;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    94
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
    95
489c9b5090e2 Initial load
duke
parents:
diff changeset
    96
// Count bytes of the form 10xxxxxx and deduct this count
489c9b5090e2 Initial load
duke
parents:
diff changeset
    97
// from the total byte count.  The utf8 string must be in
489c9b5090e2 Initial load
duke
parents:
diff changeset
    98
// legal form which has been verified in the format checker.
489c9b5090e2 Initial load
duke
parents:
diff changeset
    99
int UTF8::unicode_length(const char* str, int len) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   100
  int num_chars = len;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   101
  for (int i = 0; i < len; i++) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   102
    if ((str[i] & 0xC0) == 0x80) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   103
      --num_chars;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   104
    }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   105
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   106
  return num_chars;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   107
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   108
489c9b5090e2 Initial load
duke
parents:
diff changeset
   109
// Count bytes of the utf8 string except those in form
489c9b5090e2 Initial load
duke
parents:
diff changeset
   110
// 10xxxxxx which only appear in multibyte characters.
489c9b5090e2 Initial load
duke
parents:
diff changeset
   111
// The utf8 string must be in legal form and has been
489c9b5090e2 Initial load
duke
parents:
diff changeset
   112
// verified in the format checker.
489c9b5090e2 Initial load
duke
parents:
diff changeset
   113
int UTF8::unicode_length(const char* str) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   114
  int num_chars = 0;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   115
  for (const char* p = str; *p; p++) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   116
    if (((*p) & 0xC0) != 0x80) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   117
      num_chars++;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   118
    }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   119
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   120
  return num_chars;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   121
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   122
489c9b5090e2 Initial load
duke
parents:
diff changeset
   123
// Writes a jchar a utf8 and returns the end
489c9b5090e2 Initial load
duke
parents:
diff changeset
   124
static u_char* utf8_write(u_char* base, jchar ch) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   125
  if ((ch != 0) && (ch <=0x7f)) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   126
    base[0] = (u_char) ch;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   127
    return base + 1;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   128
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   129
489c9b5090e2 Initial load
duke
parents:
diff changeset
   130
  if (ch <= 0x7FF) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   131
    /* 11 bits or less. */
489c9b5090e2 Initial load
duke
parents:
diff changeset
   132
    unsigned char high_five = ch >> 6;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   133
    unsigned char low_six = ch & 0x3F;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   134
    base[0] = high_five | 0xC0; /* 110xxxxx */
489c9b5090e2 Initial load
duke
parents:
diff changeset
   135
    base[1] = low_six | 0x80;   /* 10xxxxxx */
489c9b5090e2 Initial load
duke
parents:
diff changeset
   136
    return base + 2;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   137
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   138
  /* possibly full 16 bits. */
489c9b5090e2 Initial load
duke
parents:
diff changeset
   139
  char high_four = ch >> 12;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   140
  char mid_six = (ch >> 6) & 0x3F;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   141
  char low_six = ch & 0x3f;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   142
  base[0] = high_four | 0xE0; /* 1110xxxx */
489c9b5090e2 Initial load
duke
parents:
diff changeset
   143
  base[1] = mid_six | 0x80;   /* 10xxxxxx */
489c9b5090e2 Initial load
duke
parents:
diff changeset
   144
  base[2] = low_six | 0x80;   /* 10xxxxxx */
489c9b5090e2 Initial load
duke
parents:
diff changeset
   145
  return base + 3;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   146
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   147
489c9b5090e2 Initial load
duke
parents:
diff changeset
   148
void UTF8::convert_to_unicode(const char* utf8_str, jchar* unicode_str, int unicode_length) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   149
  unsigned char ch;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   150
  const char *ptr = (const char *)utf8_str;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   151
  int index = 0;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   152
489c9b5090e2 Initial load
duke
parents:
diff changeset
   153
  /* ASCII case loop optimization */
489c9b5090e2 Initial load
duke
parents:
diff changeset
   154
  for (; index < unicode_length; index++) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   155
    if((ch = ptr[0]) > 0x7F) { break; }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   156
    unicode_str[index] = ch;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   157
    ptr = (const char *)(ptr + 1);
489c9b5090e2 Initial load
duke
parents:
diff changeset
   158
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   159
489c9b5090e2 Initial load
duke
parents:
diff changeset
   160
  for (; index < unicode_length; index++) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   161
    ptr = UTF8::next(ptr, &unicode_str[index]);
489c9b5090e2 Initial load
duke
parents:
diff changeset
   162
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   163
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   164
489c9b5090e2 Initial load
duke
parents:
diff changeset
   165
// Returns NULL if 'c' it not found. This only works as long
489c9b5090e2 Initial load
duke
parents:
diff changeset
   166
// as 'c' is an ASCII character
8076
96d498ec7ae1 6990754: Use native memory and reference counting to implement SymbolTable
coleenp
parents: 7397
diff changeset
   167
const jbyte* UTF8::strrchr(const jbyte* base, int length, jbyte c) {
1
489c9b5090e2 Initial load
duke
parents:
diff changeset
   168
  assert(length >= 0, "sanity check");
489c9b5090e2 Initial load
duke
parents:
diff changeset
   169
  assert(c >= 0, "does not work for non-ASCII characters");
489c9b5090e2 Initial load
duke
parents:
diff changeset
   170
  // Skip backwards in string until 'c' is found or end is reached
489c9b5090e2 Initial load
duke
parents:
diff changeset
   171
  while(--length >= 0 && base[length] != c);
489c9b5090e2 Initial load
duke
parents:
diff changeset
   172
  return (length < 0) ? NULL : &base[length];
489c9b5090e2 Initial load
duke
parents:
diff changeset
   173
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   174
8076
96d498ec7ae1 6990754: Use native memory and reference counting to implement SymbolTable
coleenp
parents: 7397
diff changeset
   175
bool UTF8::equal(const jbyte* base1, int length1, const jbyte* base2, int length2) {
1
489c9b5090e2 Initial load
duke
parents:
diff changeset
   176
  // Length must be the same
489c9b5090e2 Initial load
duke
parents:
diff changeset
   177
  if (length1 != length2) return false;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   178
  for (int i = 0; i < length1; i++) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   179
    if (base1[i] != base2[i]) return false;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   180
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   181
  return true;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   182
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   183
489c9b5090e2 Initial load
duke
parents:
diff changeset
   184
bool UTF8::is_supplementary_character(const unsigned char* str) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   185
  return ((str[0] & 0xFF) == 0xED) && ((str[1] & 0xF0) == 0xA0) && ((str[2] & 0xC0) == 0x80)
489c9b5090e2 Initial load
duke
parents:
diff changeset
   186
      && ((str[3] & 0xFF) == 0xED) && ((str[4] & 0xF0) == 0xB0) && ((str[5] & 0xC0) == 0x80);
489c9b5090e2 Initial load
duke
parents:
diff changeset
   187
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   188
489c9b5090e2 Initial load
duke
parents:
diff changeset
   189
jint UTF8::get_supplementary_character(const unsigned char* str) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   190
  return 0x10000 + ((str[1] & 0x0f) << 16) + ((str[2] & 0x3f) << 10)
489c9b5090e2 Initial load
duke
parents:
diff changeset
   191
                 + ((str[4] & 0x0f) << 6)  + (str[5] & 0x3f);
489c9b5090e2 Initial load
duke
parents:
diff changeset
   192
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   193
489c9b5090e2 Initial load
duke
parents:
diff changeset
   194
489c9b5090e2 Initial load
duke
parents:
diff changeset
   195
//-------------------------------------------------------------------------------------
489c9b5090e2 Initial load
duke
parents:
diff changeset
   196
489c9b5090e2 Initial load
duke
parents:
diff changeset
   197
489c9b5090e2 Initial load
duke
parents:
diff changeset
   198
int UNICODE::utf8_size(jchar c) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   199
  if ((0x0001 <= c) && (c <= 0x007F)) return 1;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   200
  if (c <= 0x07FF) return 2;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   201
  return 3;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   202
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   203
489c9b5090e2 Initial load
duke
parents:
diff changeset
   204
int UNICODE::utf8_length(jchar* base, int length) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   205
  int result = 0;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   206
  for (int index = 0; index < length; index++) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   207
    jchar c = base[index];
489c9b5090e2 Initial load
duke
parents:
diff changeset
   208
    if ((0x0001 <= c) && (c <= 0x007F)) result += 1;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   209
    else if (c <= 0x07FF) result += 2;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   210
    else result += 3;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   211
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   212
  return result;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   213
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   214
489c9b5090e2 Initial load
duke
parents:
diff changeset
   215
char* UNICODE::as_utf8(jchar* base, int length) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   216
  int utf8_len = utf8_length(base, length);
489c9b5090e2 Initial load
duke
parents:
diff changeset
   217
  u_char* result = NEW_RESOURCE_ARRAY(u_char, utf8_len + 1);
489c9b5090e2 Initial load
duke
parents:
diff changeset
   218
  u_char* p = result;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   219
  for (int index = 0; index < length; index++) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   220
    p = utf8_write(p, base[index]);
489c9b5090e2 Initial load
duke
parents:
diff changeset
   221
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   222
  *p = '\0';
489c9b5090e2 Initial load
duke
parents:
diff changeset
   223
  assert(p == &result[utf8_len], "length prediction must be correct");
489c9b5090e2 Initial load
duke
parents:
diff changeset
   224
  return (char*) result;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   225
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   226
489c9b5090e2 Initial load
duke
parents:
diff changeset
   227
char* UNICODE::as_utf8(jchar* base, int length, char* buf, int buflen) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   228
  u_char* p = (u_char*)buf;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   229
  u_char* end = (u_char*)buf + buflen;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   230
  for (int index = 0; index < length; index++) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   231
    jchar c = base[index];
489c9b5090e2 Initial load
duke
parents:
diff changeset
   232
    if (p + utf8_size(c) >= end) break;      // string is truncated
489c9b5090e2 Initial load
duke
parents:
diff changeset
   233
    p = utf8_write(p, base[index]);
489c9b5090e2 Initial load
duke
parents:
diff changeset
   234
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   235
  *p = '\0';
489c9b5090e2 Initial load
duke
parents:
diff changeset
   236
  return buf;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   237
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   238
489c9b5090e2 Initial load
duke
parents:
diff changeset
   239
void UNICODE::convert_to_utf8(const jchar* base, int length, char* utf8_buffer) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   240
  for(int index = 0; index < length; index++) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   241
    utf8_buffer = (char*)utf8_write((u_char*)utf8_buffer, base[index]);
489c9b5090e2 Initial load
duke
parents:
diff changeset
   242
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   243
  *utf8_buffer = '\0';
489c9b5090e2 Initial load
duke
parents:
diff changeset
   244
}