hotspot/src/share/vm/utilities/utf8.cpp
author tonyp
Wed, 19 Jan 2011 19:30:42 -0500
changeset 7923 fc200fcd4e05
parent 7397 5b173b4ca846
child 8076 96d498ec7ae1
permissions -rw-r--r--
6977804: G1: remove the zero-filling thread Summary: This changeset removes the zero-filling thread from G1 and collapses the two free region lists we had before (the "free" and "unclean" lists) into one. The new free list uses the new heap region sets / lists abstractions that we'll ultimately use it to keep track of all regions in the heap. A heap region set was also introduced for the humongous regions. Finally, this change increases the concurrency between the thread that completes freeing regions (after a cleanup pause) and the rest of the system (before we'd have to wait for said thread to complete before allocating a new region). The changest also includes a lot of refactoring and code simplification. Reviewed-by: jcoomes, johnc
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
1
489c9b5090e2 Initial load
duke
parents:
diff changeset
     1
/*
7397
5b173b4ca846 6989984: Use standard include model for Hospot
stefank
parents: 5547
diff changeset
     2
 * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
1
489c9b5090e2 Initial load
duke
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
489c9b5090e2 Initial load
duke
parents:
diff changeset
     4
 *
489c9b5090e2 Initial load
duke
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
489c9b5090e2 Initial load
duke
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
489c9b5090e2 Initial load
duke
parents:
diff changeset
     7
 * published by the Free Software Foundation.
489c9b5090e2 Initial load
duke
parents:
diff changeset
     8
 *
489c9b5090e2 Initial load
duke
parents:
diff changeset
     9
 * This code is distributed in the hope that it will be useful, but WITHOUT
489c9b5090e2 Initial load
duke
parents:
diff changeset
    10
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
489c9b5090e2 Initial load
duke
parents:
diff changeset
    11
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
489c9b5090e2 Initial load
duke
parents:
diff changeset
    12
 * version 2 for more details (a copy is included in the LICENSE file that
489c9b5090e2 Initial load
duke
parents:
diff changeset
    13
 * accompanied this code).
489c9b5090e2 Initial load
duke
parents:
diff changeset
    14
 *
489c9b5090e2 Initial load
duke
parents:
diff changeset
    15
 * You should have received a copy of the GNU General Public License version
489c9b5090e2 Initial load
duke
parents:
diff changeset
    16
 * 2 along with this work; if not, write to the Free Software Foundation,
489c9b5090e2 Initial load
duke
parents:
diff changeset
    17
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
489c9b5090e2 Initial load
duke
parents:
diff changeset
    18
 *
5547
f4b087cbb361 6941466: Oracle rebranding changes for Hotspot repositories
trims
parents: 1
diff changeset
    19
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
f4b087cbb361 6941466: Oracle rebranding changes for Hotspot repositories
trims
parents: 1
diff changeset
    20
 * or visit www.oracle.com if you need additional information or have any
f4b087cbb361 6941466: Oracle rebranding changes for Hotspot repositories
trims
parents: 1
diff changeset
    21
 * questions.
1
489c9b5090e2 Initial load
duke
parents:
diff changeset
    22
 *
489c9b5090e2 Initial load
duke
parents:
diff changeset
    23
 */
489c9b5090e2 Initial load
duke
parents:
diff changeset
    24
7397
5b173b4ca846 6989984: Use standard include model for Hospot
stefank
parents: 5547
diff changeset
    25
#include "precompiled.hpp"
5b173b4ca846 6989984: Use standard include model for Hospot
stefank
parents: 5547
diff changeset
    26
#include "utilities/utf8.hpp"
1
489c9b5090e2 Initial load
duke
parents:
diff changeset
    27
489c9b5090e2 Initial load
duke
parents:
diff changeset
    28
// Assume the utf8 string is in legal form and has been
489c9b5090e2 Initial load
duke
parents:
diff changeset
    29
// checked in the class file parser/format checker.
489c9b5090e2 Initial load
duke
parents:
diff changeset
    30
char* UTF8::next(const char* str, jchar* value) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
    31
  unsigned const char *ptr = (const unsigned char *)str;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    32
  unsigned char ch, ch2, ch3;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    33
  int length = -1;              /* bad length */
489c9b5090e2 Initial load
duke
parents:
diff changeset
    34
  jchar result;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    35
  switch ((ch = ptr[0]) >> 4) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
    36
    default:
489c9b5090e2 Initial load
duke
parents:
diff changeset
    37
    result = ch;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    38
    length = 1;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    39
    break;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    40
489c9b5090e2 Initial load
duke
parents:
diff changeset
    41
  case 0x8: case 0x9: case 0xA: case 0xB: case 0xF:
489c9b5090e2 Initial load
duke
parents:
diff changeset
    42
    /* Shouldn't happen. */
489c9b5090e2 Initial load
duke
parents:
diff changeset
    43
    break;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    44
489c9b5090e2 Initial load
duke
parents:
diff changeset
    45
  case 0xC: case 0xD:
489c9b5090e2 Initial load
duke
parents:
diff changeset
    46
    /* 110xxxxx  10xxxxxx */
489c9b5090e2 Initial load
duke
parents:
diff changeset
    47
    if (((ch2 = ptr[1]) & 0xC0) == 0x80) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
    48
      unsigned char high_five = ch & 0x1F;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    49
      unsigned char low_six = ch2 & 0x3F;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    50
      result = (high_five << 6) + low_six;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    51
      length = 2;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    52
      break;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    53
    }
489c9b5090e2 Initial load
duke
parents:
diff changeset
    54
    break;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    55
489c9b5090e2 Initial load
duke
parents:
diff changeset
    56
  case 0xE:
489c9b5090e2 Initial load
duke
parents:
diff changeset
    57
    /* 1110xxxx 10xxxxxx 10xxxxxx */
489c9b5090e2 Initial load
duke
parents:
diff changeset
    58
    if (((ch2 = ptr[1]) & 0xC0) == 0x80) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
    59
      if (((ch3 = ptr[2]) & 0xC0) == 0x80) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
    60
        unsigned char high_four = ch & 0x0f;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    61
        unsigned char mid_six = ch2 & 0x3f;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    62
        unsigned char low_six = ch3 & 0x3f;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    63
        result = (((high_four << 6) + mid_six) << 6) + low_six;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    64
        length = 3;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    65
      }
489c9b5090e2 Initial load
duke
parents:
diff changeset
    66
    }
489c9b5090e2 Initial load
duke
parents:
diff changeset
    67
    break;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    68
  } /* end of switch */
489c9b5090e2 Initial load
duke
parents:
diff changeset
    69
489c9b5090e2 Initial load
duke
parents:
diff changeset
    70
  if (length <= 0) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
    71
    *value = ptr[0];    /* default bad result; */
489c9b5090e2 Initial load
duke
parents:
diff changeset
    72
    return (char*)(ptr + 1); // make progress somehow
489c9b5090e2 Initial load
duke
parents:
diff changeset
    73
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
    74
489c9b5090e2 Initial load
duke
parents:
diff changeset
    75
  *value = result;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    76
489c9b5090e2 Initial load
duke
parents:
diff changeset
    77
  // The assert is correct but the .class file is wrong
489c9b5090e2 Initial load
duke
parents:
diff changeset
    78
  // assert(UNICODE::utf8_size(result) == length, "checking reverse computation");
489c9b5090e2 Initial load
duke
parents:
diff changeset
    79
  return (char *)(ptr + length);
489c9b5090e2 Initial load
duke
parents:
diff changeset
    80
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
    81
489c9b5090e2 Initial load
duke
parents:
diff changeset
    82
char* UTF8::next_character(const char* str, jint* value) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
    83
  unsigned const char *ptr = (const unsigned char *)str;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    84
  /* See if it's legal supplementary character:
489c9b5090e2 Initial load
duke
parents:
diff changeset
    85
     11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx */
489c9b5090e2 Initial load
duke
parents:
diff changeset
    86
  if (is_supplementary_character(ptr)) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
    87
    *value = get_supplementary_character(ptr);
489c9b5090e2 Initial load
duke
parents:
diff changeset
    88
    return (char *)(ptr + 6);
489c9b5090e2 Initial load
duke
parents:
diff changeset
    89
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
    90
  jchar result;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    91
  char* next_ch = next(str, &result);
489c9b5090e2 Initial load
duke
parents:
diff changeset
    92
  *value = result;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    93
  return next_ch;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    94
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
    95
489c9b5090e2 Initial load
duke
parents:
diff changeset
    96
// Count bytes of the form 10xxxxxx and deduct this count
489c9b5090e2 Initial load
duke
parents:
diff changeset
    97
// from the total byte count.  The utf8 string must be in
489c9b5090e2 Initial load
duke
parents:
diff changeset
    98
// legal form which has been verified in the format checker.
489c9b5090e2 Initial load
duke
parents:
diff changeset
    99
int UTF8::unicode_length(const char* str, int len) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   100
  int num_chars = len;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   101
  for (int i = 0; i < len; i++) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   102
    if ((str[i] & 0xC0) == 0x80) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   103
      --num_chars;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   104
    }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   105
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   106
  return num_chars;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   107
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   108
489c9b5090e2 Initial load
duke
parents:
diff changeset
   109
// Count bytes of the utf8 string except those in form
489c9b5090e2 Initial load
duke
parents:
diff changeset
   110
// 10xxxxxx which only appear in multibyte characters.
489c9b5090e2 Initial load
duke
parents:
diff changeset
   111
// The utf8 string must be in legal form and has been
489c9b5090e2 Initial load
duke
parents:
diff changeset
   112
// verified in the format checker.
489c9b5090e2 Initial load
duke
parents:
diff changeset
   113
int UTF8::unicode_length(const char* str) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   114
  int num_chars = 0;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   115
  for (const char* p = str; *p; p++) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   116
    if (((*p) & 0xC0) != 0x80) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   117
      num_chars++;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   118
    }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   119
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   120
  return num_chars;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   121
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   122
489c9b5090e2 Initial load
duke
parents:
diff changeset
   123
// Writes a jchar a utf8 and returns the end
489c9b5090e2 Initial load
duke
parents:
diff changeset
   124
static u_char* utf8_write(u_char* base, jchar ch) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   125
  if ((ch != 0) && (ch <=0x7f)) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   126
    base[0] = (u_char) ch;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   127
    return base + 1;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   128
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   129
489c9b5090e2 Initial load
duke
parents:
diff changeset
   130
  if (ch <= 0x7FF) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   131
    /* 11 bits or less. */
489c9b5090e2 Initial load
duke
parents:
diff changeset
   132
    unsigned char high_five = ch >> 6;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   133
    unsigned char low_six = ch & 0x3F;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   134
    base[0] = high_five | 0xC0; /* 110xxxxx */
489c9b5090e2 Initial load
duke
parents:
diff changeset
   135
    base[1] = low_six | 0x80;   /* 10xxxxxx */
489c9b5090e2 Initial load
duke
parents:
diff changeset
   136
    return base + 2;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   137
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   138
  /* possibly full 16 bits. */
489c9b5090e2 Initial load
duke
parents:
diff changeset
   139
  char high_four = ch >> 12;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   140
  char mid_six = (ch >> 6) & 0x3F;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   141
  char low_six = ch & 0x3f;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   142
  base[0] = high_four | 0xE0; /* 1110xxxx */
489c9b5090e2 Initial load
duke
parents:
diff changeset
   143
  base[1] = mid_six | 0x80;   /* 10xxxxxx */
489c9b5090e2 Initial load
duke
parents:
diff changeset
   144
  base[2] = low_six | 0x80;   /* 10xxxxxx */
489c9b5090e2 Initial load
duke
parents:
diff changeset
   145
  return base + 3;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   146
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   147
489c9b5090e2 Initial load
duke
parents:
diff changeset
   148
void UTF8::convert_to_unicode(const char* utf8_str, jchar* unicode_str, int unicode_length) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   149
  unsigned char ch;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   150
  const char *ptr = (const char *)utf8_str;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   151
  int index = 0;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   152
489c9b5090e2 Initial load
duke
parents:
diff changeset
   153
  /* ASCII case loop optimization */
489c9b5090e2 Initial load
duke
parents:
diff changeset
   154
  for (; index < unicode_length; index++) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   155
    if((ch = ptr[0]) > 0x7F) { break; }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   156
    unicode_str[index] = ch;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   157
    ptr = (const char *)(ptr + 1);
489c9b5090e2 Initial load
duke
parents:
diff changeset
   158
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   159
489c9b5090e2 Initial load
duke
parents:
diff changeset
   160
  for (; index < unicode_length; index++) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   161
    ptr = UTF8::next(ptr, &unicode_str[index]);
489c9b5090e2 Initial load
duke
parents:
diff changeset
   162
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   163
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   164
489c9b5090e2 Initial load
duke
parents:
diff changeset
   165
// Returns NULL if 'c' it not found. This only works as long
489c9b5090e2 Initial load
duke
parents:
diff changeset
   166
// as 'c' is an ASCII character
489c9b5090e2 Initial load
duke
parents:
diff changeset
   167
jbyte* UTF8::strrchr(jbyte* base, int length, jbyte c) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   168
  assert(length >= 0, "sanity check");
489c9b5090e2 Initial load
duke
parents:
diff changeset
   169
  assert(c >= 0, "does not work for non-ASCII characters");
489c9b5090e2 Initial load
duke
parents:
diff changeset
   170
  // Skip backwards in string until 'c' is found or end is reached
489c9b5090e2 Initial load
duke
parents:
diff changeset
   171
  while(--length >= 0 && base[length] != c);
489c9b5090e2 Initial load
duke
parents:
diff changeset
   172
  return (length < 0) ? NULL : &base[length];
489c9b5090e2 Initial load
duke
parents:
diff changeset
   173
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   174
489c9b5090e2 Initial load
duke
parents:
diff changeset
   175
bool UTF8::equal(jbyte* base1, int length1, jbyte* base2, int length2) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   176
  // Length must be the same
489c9b5090e2 Initial load
duke
parents:
diff changeset
   177
  if (length1 != length2) return false;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   178
  for (int i = 0; i < length1; i++) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   179
    if (base1[i] != base2[i]) return false;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   180
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   181
  return true;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   182
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   183
489c9b5090e2 Initial load
duke
parents:
diff changeset
   184
bool UTF8::is_supplementary_character(const unsigned char* str) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   185
  return ((str[0] & 0xFF) == 0xED) && ((str[1] & 0xF0) == 0xA0) && ((str[2] & 0xC0) == 0x80)
489c9b5090e2 Initial load
duke
parents:
diff changeset
   186
      && ((str[3] & 0xFF) == 0xED) && ((str[4] & 0xF0) == 0xB0) && ((str[5] & 0xC0) == 0x80);
489c9b5090e2 Initial load
duke
parents:
diff changeset
   187
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   188
489c9b5090e2 Initial load
duke
parents:
diff changeset
   189
jint UTF8::get_supplementary_character(const unsigned char* str) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   190
  return 0x10000 + ((str[1] & 0x0f) << 16) + ((str[2] & 0x3f) << 10)
489c9b5090e2 Initial load
duke
parents:
diff changeset
   191
                 + ((str[4] & 0x0f) << 6)  + (str[5] & 0x3f);
489c9b5090e2 Initial load
duke
parents:
diff changeset
   192
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   193
489c9b5090e2 Initial load
duke
parents:
diff changeset
   194
489c9b5090e2 Initial load
duke
parents:
diff changeset
   195
//-------------------------------------------------------------------------------------
489c9b5090e2 Initial load
duke
parents:
diff changeset
   196
489c9b5090e2 Initial load
duke
parents:
diff changeset
   197
489c9b5090e2 Initial load
duke
parents:
diff changeset
   198
int UNICODE::utf8_size(jchar c) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   199
  if ((0x0001 <= c) && (c <= 0x007F)) return 1;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   200
  if (c <= 0x07FF) return 2;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   201
  return 3;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   202
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   203
489c9b5090e2 Initial load
duke
parents:
diff changeset
   204
int UNICODE::utf8_length(jchar* base, int length) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   205
  int result = 0;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   206
  for (int index = 0; index < length; index++) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   207
    jchar c = base[index];
489c9b5090e2 Initial load
duke
parents:
diff changeset
   208
    if ((0x0001 <= c) && (c <= 0x007F)) result += 1;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   209
    else if (c <= 0x07FF) result += 2;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   210
    else result += 3;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   211
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   212
  return result;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   213
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   214
489c9b5090e2 Initial load
duke
parents:
diff changeset
   215
char* UNICODE::as_utf8(jchar* base, int length) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   216
  int utf8_len = utf8_length(base, length);
489c9b5090e2 Initial load
duke
parents:
diff changeset
   217
  u_char* result = NEW_RESOURCE_ARRAY(u_char, utf8_len + 1);
489c9b5090e2 Initial load
duke
parents:
diff changeset
   218
  u_char* p = result;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   219
  for (int index = 0; index < length; index++) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   220
    p = utf8_write(p, base[index]);
489c9b5090e2 Initial load
duke
parents:
diff changeset
   221
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   222
  *p = '\0';
489c9b5090e2 Initial load
duke
parents:
diff changeset
   223
  assert(p == &result[utf8_len], "length prediction must be correct");
489c9b5090e2 Initial load
duke
parents:
diff changeset
   224
  return (char*) result;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   225
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   226
489c9b5090e2 Initial load
duke
parents:
diff changeset
   227
char* UNICODE::as_utf8(jchar* base, int length, char* buf, int buflen) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   228
  u_char* p = (u_char*)buf;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   229
  u_char* end = (u_char*)buf + buflen;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   230
  for (int index = 0; index < length; index++) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   231
    jchar c = base[index];
489c9b5090e2 Initial load
duke
parents:
diff changeset
   232
    if (p + utf8_size(c) >= end) break;      // string is truncated
489c9b5090e2 Initial load
duke
parents:
diff changeset
   233
    p = utf8_write(p, base[index]);
489c9b5090e2 Initial load
duke
parents:
diff changeset
   234
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   235
  *p = '\0';
489c9b5090e2 Initial load
duke
parents:
diff changeset
   236
  return buf;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   237
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   238
489c9b5090e2 Initial load
duke
parents:
diff changeset
   239
void UNICODE::convert_to_utf8(const jchar* base, int length, char* utf8_buffer) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   240
  for(int index = 0; index < length; index++) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   241
    utf8_buffer = (char*)utf8_write((u_char*)utf8_buffer, base[index]);
489c9b5090e2 Initial load
duke
parents:
diff changeset
   242
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   243
  *utf8_buffer = '\0';
489c9b5090e2 Initial load
duke
parents:
diff changeset
   244
}