hotspot/src/share/vm/utilities/utf8.cpp
author mgerdin
Thu, 03 Apr 2014 14:54:42 +0200
changeset 24237 7b210ef8c830
parent 16602 5df51d3bc550
child 33628 09241459a8b8
permissions -rw-r--r--
6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion Summary: Added support for target buffer in as_utf8_string(), minor refactoring of as_utf8 and added some internal VM testing Reviewed-by: coleenp, dsimms, sla, dholmes Contributed-by: marcus.larsson@oracle.com
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
1
489c9b5090e2 Initial load
duke
parents:
diff changeset
     1
/*
16602
5df51d3bc550 8011048: Possible reading from unmapped memory in UTF8::as_quoted_ascii()
iklam
parents: 14477
diff changeset
     2
 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
1
489c9b5090e2 Initial load
duke
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
489c9b5090e2 Initial load
duke
parents:
diff changeset
     4
 *
489c9b5090e2 Initial load
duke
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
489c9b5090e2 Initial load
duke
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
489c9b5090e2 Initial load
duke
parents:
diff changeset
     7
 * published by the Free Software Foundation.
489c9b5090e2 Initial load
duke
parents:
diff changeset
     8
 *
489c9b5090e2 Initial load
duke
parents:
diff changeset
     9
 * This code is distributed in the hope that it will be useful, but WITHOUT
489c9b5090e2 Initial load
duke
parents:
diff changeset
    10
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
489c9b5090e2 Initial load
duke
parents:
diff changeset
    11
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
489c9b5090e2 Initial load
duke
parents:
diff changeset
    12
 * version 2 for more details (a copy is included in the LICENSE file that
489c9b5090e2 Initial load
duke
parents:
diff changeset
    13
 * accompanied this code).
489c9b5090e2 Initial load
duke
parents:
diff changeset
    14
 *
489c9b5090e2 Initial load
duke
parents:
diff changeset
    15
 * You should have received a copy of the GNU General Public License version
489c9b5090e2 Initial load
duke
parents:
diff changeset
    16
 * 2 along with this work; if not, write to the Free Software Foundation,
489c9b5090e2 Initial load
duke
parents:
diff changeset
    17
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
489c9b5090e2 Initial load
duke
parents:
diff changeset
    18
 *
5547
f4b087cbb361 6941466: Oracle rebranding changes for Hotspot repositories
trims
parents: 1
diff changeset
    19
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
f4b087cbb361 6941466: Oracle rebranding changes for Hotspot repositories
trims
parents: 1
diff changeset
    20
 * or visit www.oracle.com if you need additional information or have any
f4b087cbb361 6941466: Oracle rebranding changes for Hotspot repositories
trims
parents: 1
diff changeset
    21
 * questions.
1
489c9b5090e2 Initial load
duke
parents:
diff changeset
    22
 *
489c9b5090e2 Initial load
duke
parents:
diff changeset
    23
 */
489c9b5090e2 Initial load
duke
parents:
diff changeset
    24
7397
5b173b4ca846 6989984: Use standard include model for Hospot
stefank
parents: 5547
diff changeset
    25
#include "precompiled.hpp"
5b173b4ca846 6989984: Use standard include model for Hospot
stefank
parents: 5547
diff changeset
    26
#include "utilities/utf8.hpp"
1
489c9b5090e2 Initial load
duke
parents:
diff changeset
    27
489c9b5090e2 Initial load
duke
parents:
diff changeset
    28
// Assume the utf8 string is in legal form and has been
489c9b5090e2 Initial load
duke
parents:
diff changeset
    29
// checked in the class file parser/format checker.
489c9b5090e2 Initial load
duke
parents:
diff changeset
    30
char* UTF8::next(const char* str, jchar* value) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
    31
  unsigned const char *ptr = (const unsigned char *)str;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    32
  unsigned char ch, ch2, ch3;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    33
  int length = -1;              /* bad length */
489c9b5090e2 Initial load
duke
parents:
diff changeset
    34
  jchar result;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    35
  switch ((ch = ptr[0]) >> 4) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
    36
    default:
489c9b5090e2 Initial load
duke
parents:
diff changeset
    37
    result = ch;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    38
    length = 1;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    39
    break;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    40
489c9b5090e2 Initial load
duke
parents:
diff changeset
    41
  case 0x8: case 0x9: case 0xA: case 0xB: case 0xF:
489c9b5090e2 Initial load
duke
parents:
diff changeset
    42
    /* Shouldn't happen. */
489c9b5090e2 Initial load
duke
parents:
diff changeset
    43
    break;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    44
489c9b5090e2 Initial load
duke
parents:
diff changeset
    45
  case 0xC: case 0xD:
489c9b5090e2 Initial load
duke
parents:
diff changeset
    46
    /* 110xxxxx  10xxxxxx */
489c9b5090e2 Initial load
duke
parents:
diff changeset
    47
    if (((ch2 = ptr[1]) & 0xC0) == 0x80) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
    48
      unsigned char high_five = ch & 0x1F;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    49
      unsigned char low_six = ch2 & 0x3F;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    50
      result = (high_five << 6) + low_six;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    51
      length = 2;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    52
      break;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    53
    }
489c9b5090e2 Initial load
duke
parents:
diff changeset
    54
    break;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    55
489c9b5090e2 Initial load
duke
parents:
diff changeset
    56
  case 0xE:
489c9b5090e2 Initial load
duke
parents:
diff changeset
    57
    /* 1110xxxx 10xxxxxx 10xxxxxx */
489c9b5090e2 Initial load
duke
parents:
diff changeset
    58
    if (((ch2 = ptr[1]) & 0xC0) == 0x80) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
    59
      if (((ch3 = ptr[2]) & 0xC0) == 0x80) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
    60
        unsigned char high_four = ch & 0x0f;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    61
        unsigned char mid_six = ch2 & 0x3f;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    62
        unsigned char low_six = ch3 & 0x3f;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    63
        result = (((high_four << 6) + mid_six) << 6) + low_six;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    64
        length = 3;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    65
      }
489c9b5090e2 Initial load
duke
parents:
diff changeset
    66
    }
489c9b5090e2 Initial load
duke
parents:
diff changeset
    67
    break;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    68
  } /* end of switch */
489c9b5090e2 Initial load
duke
parents:
diff changeset
    69
489c9b5090e2 Initial load
duke
parents:
diff changeset
    70
  if (length <= 0) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
    71
    *value = ptr[0];    /* default bad result; */
489c9b5090e2 Initial load
duke
parents:
diff changeset
    72
    return (char*)(ptr + 1); // make progress somehow
489c9b5090e2 Initial load
duke
parents:
diff changeset
    73
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
    74
489c9b5090e2 Initial load
duke
parents:
diff changeset
    75
  *value = result;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    76
489c9b5090e2 Initial load
duke
parents:
diff changeset
    77
  // The assert is correct but the .class file is wrong
489c9b5090e2 Initial load
duke
parents:
diff changeset
    78
  // assert(UNICODE::utf8_size(result) == length, "checking reverse computation");
489c9b5090e2 Initial load
duke
parents:
diff changeset
    79
  return (char *)(ptr + length);
489c9b5090e2 Initial load
duke
parents:
diff changeset
    80
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
    81
489c9b5090e2 Initial load
duke
parents:
diff changeset
    82
char* UTF8::next_character(const char* str, jint* value) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
    83
  unsigned const char *ptr = (const unsigned char *)str;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    84
  /* See if it's legal supplementary character:
489c9b5090e2 Initial load
duke
parents:
diff changeset
    85
     11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx */
489c9b5090e2 Initial load
duke
parents:
diff changeset
    86
  if (is_supplementary_character(ptr)) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
    87
    *value = get_supplementary_character(ptr);
489c9b5090e2 Initial load
duke
parents:
diff changeset
    88
    return (char *)(ptr + 6);
489c9b5090e2 Initial load
duke
parents:
diff changeset
    89
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
    90
  jchar result;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    91
  char* next_ch = next(str, &result);
489c9b5090e2 Initial load
duke
parents:
diff changeset
    92
  *value = result;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    93
  return next_ch;
489c9b5090e2 Initial load
duke
parents:
diff changeset
    94
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
    95
489c9b5090e2 Initial load
duke
parents:
diff changeset
    96
// Count bytes of the form 10xxxxxx and deduct this count
489c9b5090e2 Initial load
duke
parents:
diff changeset
    97
// from the total byte count.  The utf8 string must be in
489c9b5090e2 Initial load
duke
parents:
diff changeset
    98
// legal form which has been verified in the format checker.
489c9b5090e2 Initial load
duke
parents:
diff changeset
    99
int UTF8::unicode_length(const char* str, int len) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   100
  int num_chars = len;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   101
  for (int i = 0; i < len; i++) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   102
    if ((str[i] & 0xC0) == 0x80) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   103
      --num_chars;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   104
    }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   105
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   106
  return num_chars;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   107
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   108
489c9b5090e2 Initial load
duke
parents:
diff changeset
   109
// Count bytes of the utf8 string except those in form
489c9b5090e2 Initial load
duke
parents:
diff changeset
   110
// 10xxxxxx which only appear in multibyte characters.
489c9b5090e2 Initial load
duke
parents:
diff changeset
   111
// The utf8 string must be in legal form and has been
489c9b5090e2 Initial load
duke
parents:
diff changeset
   112
// verified in the format checker.
489c9b5090e2 Initial load
duke
parents:
diff changeset
   113
int UTF8::unicode_length(const char* str) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   114
  int num_chars = 0;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   115
  for (const char* p = str; *p; p++) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   116
    if (((*p) & 0xC0) != 0x80) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   117
      num_chars++;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   118
    }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   119
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   120
  return num_chars;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   121
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   122
489c9b5090e2 Initial load
duke
parents:
diff changeset
   123
// Writes a jchar a utf8 and returns the end
489c9b5090e2 Initial load
duke
parents:
diff changeset
   124
static u_char* utf8_write(u_char* base, jchar ch) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   125
  if ((ch != 0) && (ch <=0x7f)) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   126
    base[0] = (u_char) ch;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   127
    return base + 1;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   128
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   129
489c9b5090e2 Initial load
duke
parents:
diff changeset
   130
  if (ch <= 0x7FF) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   131
    /* 11 bits or less. */
489c9b5090e2 Initial load
duke
parents:
diff changeset
   132
    unsigned char high_five = ch >> 6;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   133
    unsigned char low_six = ch & 0x3F;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   134
    base[0] = high_five | 0xC0; /* 110xxxxx */
489c9b5090e2 Initial load
duke
parents:
diff changeset
   135
    base[1] = low_six | 0x80;   /* 10xxxxxx */
489c9b5090e2 Initial load
duke
parents:
diff changeset
   136
    return base + 2;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   137
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   138
  /* possibly full 16 bits. */
489c9b5090e2 Initial load
duke
parents:
diff changeset
   139
  char high_four = ch >> 12;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   140
  char mid_six = (ch >> 6) & 0x3F;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   141
  char low_six = ch & 0x3f;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   142
  base[0] = high_four | 0xE0; /* 1110xxxx */
489c9b5090e2 Initial load
duke
parents:
diff changeset
   143
  base[1] = mid_six | 0x80;   /* 10xxxxxx */
489c9b5090e2 Initial load
duke
parents:
diff changeset
   144
  base[2] = low_six | 0x80;   /* 10xxxxxx */
489c9b5090e2 Initial load
duke
parents:
diff changeset
   145
  return base + 3;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   146
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   147
489c9b5090e2 Initial load
duke
parents:
diff changeset
   148
void UTF8::convert_to_unicode(const char* utf8_str, jchar* unicode_str, int unicode_length) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   149
  unsigned char ch;
14477
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   150
  const char *ptr = utf8_str;
1
489c9b5090e2 Initial load
duke
parents:
diff changeset
   151
  int index = 0;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   152
489c9b5090e2 Initial load
duke
parents:
diff changeset
   153
  /* ASCII case loop optimization */
489c9b5090e2 Initial load
duke
parents:
diff changeset
   154
  for (; index < unicode_length; index++) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   155
    if((ch = ptr[0]) > 0x7F) { break; }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   156
    unicode_str[index] = ch;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   157
    ptr = (const char *)(ptr + 1);
489c9b5090e2 Initial load
duke
parents:
diff changeset
   158
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   159
489c9b5090e2 Initial load
duke
parents:
diff changeset
   160
  for (; index < unicode_length; index++) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   161
    ptr = UTF8::next(ptr, &unicode_str[index]);
489c9b5090e2 Initial load
duke
parents:
diff changeset
   162
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   163
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   164
14477
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   165
// returns the quoted ascii length of a 0-terminated utf8 string
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   166
int UTF8::quoted_ascii_length(const char* utf8_str, int utf8_length) {
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   167
  const char *ptr = utf8_str;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   168
  const char* end = ptr + utf8_length;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   169
  int result = 0;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   170
  while (ptr < end) {
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   171
    jchar c;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   172
    ptr = UTF8::next(ptr, &c);
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   173
    if (c >= 32 && c < 127) {
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   174
      result++;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   175
    } else {
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   176
      result += 6;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   177
    }
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   178
  }
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   179
  return result;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   180
}
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   181
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   182
// converts a utf8 string to quoted ascii
16602
5df51d3bc550 8011048: Possible reading from unmapped memory in UTF8::as_quoted_ascii()
iklam
parents: 14477
diff changeset
   183
void UTF8::as_quoted_ascii(const char* utf8_str, int utf8_length, char* buf, int buflen) {
14477
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   184
  const char *ptr = utf8_str;
16602
5df51d3bc550 8011048: Possible reading from unmapped memory in UTF8::as_quoted_ascii()
iklam
parents: 14477
diff changeset
   185
  const char *utf8_end = ptr + utf8_length;
14477
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   186
  char* p = buf;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   187
  char* end = buf + buflen;
16602
5df51d3bc550 8011048: Possible reading from unmapped memory in UTF8::as_quoted_ascii()
iklam
parents: 14477
diff changeset
   188
  while (ptr < utf8_end) {
14477
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   189
    jchar c;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   190
    ptr = UTF8::next(ptr, &c);
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   191
    if (c >= 32 && c < 127) {
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   192
      if (p + 1 >= end) break;      // string is truncated
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   193
      *p++ = (char)c;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   194
    } else {
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   195
      if (p + 6 >= end) break;      // string is truncated
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   196
      sprintf(p, "\\u%04x", c);
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   197
      p += 6;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   198
    }
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   199
  }
16602
5df51d3bc550 8011048: Possible reading from unmapped memory in UTF8::as_quoted_ascii()
iklam
parents: 14477
diff changeset
   200
  assert(p < end, "sanity");
14477
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   201
  *p = '\0';
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   202
}
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   203
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   204
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   205
const char* UTF8::from_quoted_ascii(const char* quoted_ascii_str) {
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   206
  const char *ptr = quoted_ascii_str;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   207
  char* result = NULL;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   208
  while (*ptr != '\0') {
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   209
    char c = *ptr;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   210
    if (c < 32 || c >= 127) break;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   211
  }
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   212
  if (*ptr == '\0') {
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   213
    // nothing to do so return original string
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   214
    return quoted_ascii_str;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   215
  }
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   216
  // everything up to this point was ok.
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   217
  int length = ptr - quoted_ascii_str;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   218
  char* buffer = NULL;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   219
  for (int round = 0; round < 2; round++) {
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   220
    while (*ptr != '\0') {
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   221
      if (*ptr != '\\') {
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   222
        if (buffer != NULL) {
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   223
          buffer[length] = *ptr;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   224
        }
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   225
        length++;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   226
      } else {
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   227
        switch (ptr[1]) {
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   228
          case 'u': {
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   229
            ptr += 2;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   230
            jchar value=0;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   231
            for (int i=0; i<4; i++) {
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   232
              char c = *ptr++;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   233
              switch (c) {
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   234
                case '0': case '1': case '2': case '3': case '4':
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   235
                case '5': case '6': case '7': case '8': case '9':
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   236
                  value = (value << 4) + c - '0';
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   237
                  break;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   238
                case 'a': case 'b': case 'c':
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   239
                case 'd': case 'e': case 'f':
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   240
                  value = (value << 4) + 10 + c - 'a';
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   241
                  break;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   242
                case 'A': case 'B': case 'C':
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   243
                case 'D': case 'E': case 'F':
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   244
                  value = (value << 4) + 10 + c - 'A';
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   245
                  break;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   246
                default:
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   247
                  ShouldNotReachHere();
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   248
              }
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   249
            }
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   250
            if (buffer == NULL) {
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   251
              char utf8_buffer[4];
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   252
              char* next = (char*)utf8_write((u_char*)utf8_buffer, value);
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   253
              length += next - utf8_buffer;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   254
            } else {
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   255
              char* next = (char*)utf8_write((u_char*)&buffer[length], value);
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   256
              length += next - &buffer[length];
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   257
            }
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   258
            break;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   259
          }
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   260
          case 't': if (buffer != NULL) buffer[length] = '\t'; ptr += 2; length++; break;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   261
          case 'n': if (buffer != NULL) buffer[length] = '\n'; ptr += 2; length++; break;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   262
          case 'r': if (buffer != NULL) buffer[length] = '\r'; ptr += 2; length++; break;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   263
          case 'f': if (buffer != NULL) buffer[length] = '\f'; ptr += 2; length++; break;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   264
          default:
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   265
            ShouldNotReachHere();
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   266
        }
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   267
      }
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   268
    }
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   269
    if (round == 0) {
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   270
      buffer = NEW_RESOURCE_ARRAY(char, length + 1);
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   271
      ptr = quoted_ascii_str;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   272
    } else {
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   273
      buffer[length] = '\0';
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   274
    }
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   275
  }
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   276
  return buffer;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   277
}
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   278
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   279
1
489c9b5090e2 Initial load
duke
parents:
diff changeset
   280
// Returns NULL if 'c' it not found. This only works as long
489c9b5090e2 Initial load
duke
parents:
diff changeset
   281
// as 'c' is an ASCII character
8076
96d498ec7ae1 6990754: Use native memory and reference counting to implement SymbolTable
coleenp
parents: 7397
diff changeset
   282
const jbyte* UTF8::strrchr(const jbyte* base, int length, jbyte c) {
1
489c9b5090e2 Initial load
duke
parents:
diff changeset
   283
  assert(length >= 0, "sanity check");
489c9b5090e2 Initial load
duke
parents:
diff changeset
   284
  assert(c >= 0, "does not work for non-ASCII characters");
489c9b5090e2 Initial load
duke
parents:
diff changeset
   285
  // Skip backwards in string until 'c' is found or end is reached
489c9b5090e2 Initial load
duke
parents:
diff changeset
   286
  while(--length >= 0 && base[length] != c);
489c9b5090e2 Initial load
duke
parents:
diff changeset
   287
  return (length < 0) ? NULL : &base[length];
489c9b5090e2 Initial load
duke
parents:
diff changeset
   288
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   289
8076
96d498ec7ae1 6990754: Use native memory and reference counting to implement SymbolTable
coleenp
parents: 7397
diff changeset
   290
bool UTF8::equal(const jbyte* base1, int length1, const jbyte* base2, int length2) {
1
489c9b5090e2 Initial load
duke
parents:
diff changeset
   291
  // Length must be the same
489c9b5090e2 Initial load
duke
parents:
diff changeset
   292
  if (length1 != length2) return false;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   293
  for (int i = 0; i < length1; i++) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   294
    if (base1[i] != base2[i]) return false;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   295
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   296
  return true;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   297
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   298
489c9b5090e2 Initial load
duke
parents:
diff changeset
   299
bool UTF8::is_supplementary_character(const unsigned char* str) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   300
  return ((str[0] & 0xFF) == 0xED) && ((str[1] & 0xF0) == 0xA0) && ((str[2] & 0xC0) == 0x80)
489c9b5090e2 Initial load
duke
parents:
diff changeset
   301
      && ((str[3] & 0xFF) == 0xED) && ((str[4] & 0xF0) == 0xB0) && ((str[5] & 0xC0) == 0x80);
489c9b5090e2 Initial load
duke
parents:
diff changeset
   302
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   303
489c9b5090e2 Initial load
duke
parents:
diff changeset
   304
jint UTF8::get_supplementary_character(const unsigned char* str) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   305
  return 0x10000 + ((str[1] & 0x0f) << 16) + ((str[2] & 0x3f) << 10)
489c9b5090e2 Initial load
duke
parents:
diff changeset
   306
                 + ((str[4] & 0x0f) << 6)  + (str[5] & 0x3f);
489c9b5090e2 Initial load
duke
parents:
diff changeset
   307
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   308
489c9b5090e2 Initial load
duke
parents:
diff changeset
   309
489c9b5090e2 Initial load
duke
parents:
diff changeset
   310
//-------------------------------------------------------------------------------------
489c9b5090e2 Initial load
duke
parents:
diff changeset
   311
489c9b5090e2 Initial load
duke
parents:
diff changeset
   312
489c9b5090e2 Initial load
duke
parents:
diff changeset
   313
int UNICODE::utf8_size(jchar c) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   314
  if ((0x0001 <= c) && (c <= 0x007F)) return 1;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   315
  if (c <= 0x07FF) return 2;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   316
  return 3;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   317
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   318
489c9b5090e2 Initial load
duke
parents:
diff changeset
   319
int UNICODE::utf8_length(jchar* base, int length) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   320
  int result = 0;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   321
  for (int index = 0; index < length; index++) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   322
    jchar c = base[index];
489c9b5090e2 Initial load
duke
parents:
diff changeset
   323
    if ((0x0001 <= c) && (c <= 0x007F)) result += 1;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   324
    else if (c <= 0x07FF) result += 2;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   325
    else result += 3;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   326
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   327
  return result;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   328
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   329
489c9b5090e2 Initial load
duke
parents:
diff changeset
   330
char* UNICODE::as_utf8(jchar* base, int length) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   331
  int utf8_len = utf8_length(base, length);
24237
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   332
  u_char* buf = NEW_RESOURCE_ARRAY(u_char, utf8_len + 1);
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   333
  char* result = as_utf8(base, length, (char*) buf, utf8_len + 1);
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   334
  assert((int) strlen(result) == utf8_len, "length prediction must be correct");
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   335
  return result;
1
489c9b5090e2 Initial load
duke
parents:
diff changeset
   336
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   337
489c9b5090e2 Initial load
duke
parents:
diff changeset
   338
char* UNICODE::as_utf8(jchar* base, int length, char* buf, int buflen) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   339
  u_char* p = (u_char*)buf;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   340
  for (int index = 0; index < length; index++) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   341
    jchar c = base[index];
24237
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   342
    buflen -= utf8_size(c);
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   343
    if (buflen <= 0) break; // string is truncated
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   344
    p = utf8_write(p, c);
1
489c9b5090e2 Initial load
duke
parents:
diff changeset
   345
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   346
  *p = '\0';
489c9b5090e2 Initial load
duke
parents:
diff changeset
   347
  return buf;
489c9b5090e2 Initial load
duke
parents:
diff changeset
   348
}
489c9b5090e2 Initial load
duke
parents:
diff changeset
   349
489c9b5090e2 Initial load
duke
parents:
diff changeset
   350
void UNICODE::convert_to_utf8(const jchar* base, int length, char* utf8_buffer) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   351
  for(int index = 0; index < length; index++) {
489c9b5090e2 Initial load
duke
parents:
diff changeset
   352
    utf8_buffer = (char*)utf8_write((u_char*)utf8_buffer, base[index]);
489c9b5090e2 Initial load
duke
parents:
diff changeset
   353
  }
489c9b5090e2 Initial load
duke
parents:
diff changeset
   354
  *utf8_buffer = '\0';
489c9b5090e2 Initial load
duke
parents:
diff changeset
   355
}
14477
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   356
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   357
// returns the quoted ascii length of a unicode string
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   358
int UNICODE::quoted_ascii_length(jchar* base, int length) {
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   359
  int result = 0;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   360
  for (int i = 0; i < length; i++) {
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   361
    jchar c = base[i];
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   362
    if (c >= 32 && c < 127) {
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   363
      result++;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   364
    } else {
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   365
      result += 6;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   366
    }
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   367
  }
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   368
  return result;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   369
}
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   370
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   371
// converts a utf8 string to quoted ascii
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   372
void UNICODE::as_quoted_ascii(const jchar* base, int length, char* buf, int buflen) {
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   373
  char* p = buf;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   374
  char* end = buf + buflen;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   375
  for (int index = 0; index < length; index++) {
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   376
    jchar c = base[index];
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   377
    if (c >= 32 && c < 127) {
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   378
      if (p + 1 >= end) break;      // string is truncated
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   379
      *p++ = (char)c;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   380
    } else {
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   381
      if (p + 6 >= end) break;      // string is truncated
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   382
      sprintf(p, "\\u%04x", c);
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   383
      p += 6;
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   384
    }
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   385
  }
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   386
  *p = '\0';
95e66ea71f71 6830717: replay of compilations would help with debugging
minqi
parents: 8921
diff changeset
   387
}
24237
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   388
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   389
#ifndef PRODUCT
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   390
void TestAsUtf8() {
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   391
  char res[60];
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   392
  jchar str[20];
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   393
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   394
  for (int i = 0; i < 20; i++) {
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   395
    str[i] = 0x0800; // char that is 2B in UTF-16 but 3B in UTF-8
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   396
  }
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   397
  str[19] = (jchar)'\0';
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   398
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   399
  // The resulting string in UTF-8 is 3*19 bytes long, but should be truncated
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   400
  UNICODE::as_utf8(str, 19, res, 10);
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   401
  assert(strlen(res) == 9, "string should be truncated here");
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   402
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   403
  UNICODE::as_utf8(str, 19, res, 18);
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   404
  assert(strlen(res) == 15, "string should be truncated here");
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   405
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   406
  UNICODE::as_utf8(str, 19, res, 20);
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   407
  assert(strlen(res) == 18, "string should be truncated here");
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   408
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   409
  // Test with an "unbounded" buffer
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   410
  UNICODE::as_utf8(str, 19, res, INT_MAX);
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   411
  assert(strlen(res) == 3*19, "string should end here");
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   412
}
7b210ef8c830 6664815: Eliminate redundant memcpy operation in jni_GetStringUTFRegion
mgerdin
parents: 16602
diff changeset
   413
#endif