jdk/src/share/npt/utf.c
author kvn
Fri, 08 Jan 2010 09:51:24 -0800 (2010-01-08)
changeset 4583 b36d52bd2d19
parent 2 90ce3da70b43
child 5506 202f599c92aa
permissions -rw-r--r--
6910484: incorrect integer optimization (loosing and op-r in a given example) Summary: Remove AND operation only if mask is equal to shift. Reviewed-by: never
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     1
/*
90ce3da70b43 Initial load
duke
parents:
diff changeset
     2
 * Copyright 2004-2005 Sun Microsystems, Inc.  All Rights Reserved.
90ce3da70b43 Initial load
duke
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load
duke
parents:
diff changeset
     4
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load
duke
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
90ce3da70b43 Initial load
duke
parents:
diff changeset
     7
 * published by the Free Software Foundation.  Sun designates this
90ce3da70b43 Initial load
duke
parents:
diff changeset
     8
 * particular file as subject to the "Classpath" exception as provided
90ce3da70b43 Initial load
duke
parents:
diff changeset
     9
 * by Sun in the LICENSE file that accompanied this code.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    10
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load
duke
parents:
diff changeset
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
90ce3da70b43 Initial load
duke
parents:
diff changeset
    14
 * version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load
duke
parents:
diff changeset
    15
 * accompanied this code).
90ce3da70b43 Initial load
duke
parents:
diff changeset
    16
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    17
 * You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load
duke
parents:
diff changeset
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    20
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    21
 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    22
 * CA 95054 USA or visit www.sun.com if you need additional information or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    23
 * have any questions.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    24
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    25
90ce3da70b43 Initial load
duke
parents:
diff changeset
    26
/* Misc functions for conversion of Unicode and UTF-8 and platform encoding */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    27
90ce3da70b43 Initial load
duke
parents:
diff changeset
    28
#include <stdio.h>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    29
#include <stddef.h>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    30
#include <stdlib.h>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    31
#include <stdarg.h>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    32
#include <string.h>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    33
#include <ctype.h>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    34
90ce3da70b43 Initial load
duke
parents:
diff changeset
    35
#include "jni.h"
90ce3da70b43 Initial load
duke
parents:
diff changeset
    36
90ce3da70b43 Initial load
duke
parents:
diff changeset
    37
#include "utf.h"
90ce3da70b43 Initial load
duke
parents:
diff changeset
    38
90ce3da70b43 Initial load
duke
parents:
diff changeset
    39
/*
90ce3da70b43 Initial load
duke
parents:
diff changeset
    40
 * Error handler
90ce3da70b43 Initial load
duke
parents:
diff changeset
    41
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    42
void
90ce3da70b43 Initial load
duke
parents:
diff changeset
    43
utfError(char *file, int line, char *message)
90ce3da70b43 Initial load
duke
parents:
diff changeset
    44
{
90ce3da70b43 Initial load
duke
parents:
diff changeset
    45
    (void)fprintf(stderr, "UTF ERROR [\"%s\":%d]: %s\n", file, line, message);
90ce3da70b43 Initial load
duke
parents:
diff changeset
    46
    abort();
90ce3da70b43 Initial load
duke
parents:
diff changeset
    47
}
90ce3da70b43 Initial load
duke
parents:
diff changeset
    48
90ce3da70b43 Initial load
duke
parents:
diff changeset
    49
/*
90ce3da70b43 Initial load
duke
parents:
diff changeset
    50
 * Convert UTF-8 to UTF-16
90ce3da70b43 Initial load
duke
parents:
diff changeset
    51
 *    Returns length or -1 if output overflows.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    52
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    53
int JNICALL
90ce3da70b43 Initial load
duke
parents:
diff changeset
    54
utf8ToUtf16(struct UtfInst *ui, jbyte *utf8, int len, unsigned short *output, int outputMaxLen)
90ce3da70b43 Initial load
duke
parents:
diff changeset
    55
{
90ce3da70b43 Initial load
duke
parents:
diff changeset
    56
    int outputLen;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    57
    int i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    58
90ce3da70b43 Initial load
duke
parents:
diff changeset
    59
    UTF_ASSERT(utf8);
90ce3da70b43 Initial load
duke
parents:
diff changeset
    60
    UTF_ASSERT(len>=0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
    61
    UTF_ASSERT(output);
90ce3da70b43 Initial load
duke
parents:
diff changeset
    62
    UTF_ASSERT(outputMaxLen>0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
    63
90ce3da70b43 Initial load
duke
parents:
diff changeset
    64
    i = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    65
    outputLen = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    66
    while ( i<len ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    67
        unsigned code, x, y, z;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    68
90ce3da70b43 Initial load
duke
parents:
diff changeset
    69
        if ( outputLen >= outputMaxLen ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    70
            return -1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    71
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
    72
        x = (unsigned char)utf8[i++];
90ce3da70b43 Initial load
duke
parents:
diff changeset
    73
        code = x;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    74
        if ( (x & 0xE0)==0xE0 ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    75
            y = (unsigned char)utf8[i++];
90ce3da70b43 Initial load
duke
parents:
diff changeset
    76
            z = (unsigned char)utf8[i++];
90ce3da70b43 Initial load
duke
parents:
diff changeset
    77
            code = ((x & 0xF)<<12) + ((y & 0x3F)<<6) + (z & 0x3F);
90ce3da70b43 Initial load
duke
parents:
diff changeset
    78
        } else if ( (x & 0xC0)==0xC0 ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    79
            y = (unsigned char)utf8[i++];
90ce3da70b43 Initial load
duke
parents:
diff changeset
    80
            code = ((x & 0x1F)<<6) + (y & 0x3F);
90ce3da70b43 Initial load
duke
parents:
diff changeset
    81
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
    82
        output[outputLen++] = code;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    83
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
    84
    return outputLen;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    85
}
90ce3da70b43 Initial load
duke
parents:
diff changeset
    86
90ce3da70b43 Initial load
duke
parents:
diff changeset
    87
/*
90ce3da70b43 Initial load
duke
parents:
diff changeset
    88
 * Convert UTF-16 to UTF-8 Modified
90ce3da70b43 Initial load
duke
parents:
diff changeset
    89
 *    Returns length or -1 if output overflows.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    90
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    91
int JNICALL
90ce3da70b43 Initial load
duke
parents:
diff changeset
    92
utf16ToUtf8m(struct UtfInst *ui, unsigned short *utf16, int len, jbyte *output, int outputMaxLen)
90ce3da70b43 Initial load
duke
parents:
diff changeset
    93
{
90ce3da70b43 Initial load
duke
parents:
diff changeset
    94
    int i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    95
    int outputLen;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    96
90ce3da70b43 Initial load
duke
parents:
diff changeset
    97
    UTF_ASSERT(utf16);
90ce3da70b43 Initial load
duke
parents:
diff changeset
    98
    UTF_ASSERT(len>=0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
    99
    UTF_ASSERT(output);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   100
    UTF_ASSERT(outputMaxLen>0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   101
90ce3da70b43 Initial load
duke
parents:
diff changeset
   102
    outputLen = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   103
    for (i = 0; i < len; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   104
        unsigned code;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   105
90ce3da70b43 Initial load
duke
parents:
diff changeset
   106
        code = utf16[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   107
        if ( code >= 0x0001 && code <= 0x007F ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   108
            output[outputLen++] = code;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   109
        } else if ( code == 0 || ( code >= 0x0080 && code <= 0x07FF ) ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   110
            output[outputLen++] = ((code>>6) & 0x1F) | 0xC0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   111
            output[outputLen++] = (code & 0x3F) | 0x80;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   112
        } else if ( code >= 0x0800 && code <= 0xFFFF ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   113
            output[outputLen++] = ((code>>12) & 0x0F) | 0xE0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   114
            output[outputLen++] = ((code>>6) & 0x3F) | 0x80;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   115
            output[outputLen++] = (code & 0x3F) | 0x80;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   116
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   117
        if ( outputLen > outputMaxLen ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   118
            return -1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   119
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   120
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   121
    output[outputLen] = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   122
    return outputLen;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   123
}
90ce3da70b43 Initial load
duke
parents:
diff changeset
   124
90ce3da70b43 Initial load
duke
parents:
diff changeset
   125
int JNICALL
90ce3da70b43 Initial load
duke
parents:
diff changeset
   126
utf16ToUtf8s(struct UtfInst *ui, unsigned short *utf16, int len, jbyte *output, int outputMaxLen)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   127
{
90ce3da70b43 Initial load
duke
parents:
diff changeset
   128
    return -1; /* FIXUP */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   129
}
90ce3da70b43 Initial load
duke
parents:
diff changeset
   130
90ce3da70b43 Initial load
duke
parents:
diff changeset
   131
/* Determine length of this Standard UTF-8 in Modified UTF-8.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   132
 *    Validation is done of the basic UTF encoding rules, returns
90ce3da70b43 Initial load
duke
parents:
diff changeset
   133
 *    length (no change) when errors are detected in the UTF encoding.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   134
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   135
 *    Note: Accepts Modified UTF-8 also, no verification on the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   136
 *          correctness of Standard UTF-8 is done. e,g, 0xC080 input is ok.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   137
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   138
int JNICALL
90ce3da70b43 Initial load
duke
parents:
diff changeset
   139
utf8sToUtf8mLength(struct UtfInst *ui, jbyte *string, int length)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   140
{
90ce3da70b43 Initial load
duke
parents:
diff changeset
   141
    int newLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   142
    int i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   143
90ce3da70b43 Initial load
duke
parents:
diff changeset
   144
    newLength = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   145
    for ( i = 0 ; i < length ; i++ ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   146
        unsigned byte;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   147
90ce3da70b43 Initial load
duke
parents:
diff changeset
   148
        byte = (unsigned char)string[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   149
        if ( (byte & 0x80) == 0 ) { /* 1byte encoding */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   150
            newLength++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   151
            if ( byte == 0 ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   152
                newLength++; /* We gain one byte in length on NULL bytes */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   153
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   154
        } else if ( (byte & 0xE0) == 0xC0 ) { /* 2byte encoding */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   155
            /* Check encoding of following bytes */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   156
            if ( (i+1) >= length || (string[i+1] & 0xC0) != 0x80 ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   157
                break; /* Error condition */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   158
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   159
            i++; /* Skip next byte */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   160
            newLength += 2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   161
        } else if ( (byte & 0xF0) == 0xE0 ) { /* 3byte encoding */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   162
            /* Check encoding of following bytes */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   163
            if ( (i+2) >= length || (string[i+1] & 0xC0) != 0x80
90ce3da70b43 Initial load
duke
parents:
diff changeset
   164
                                 || (string[i+2] & 0xC0) != 0x80 ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   165
                break; /* Error condition */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   166
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   167
            i += 2; /* Skip next two bytes */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   168
            newLength += 3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   169
        } else if ( (byte & 0xF8) == 0xF0 ) { /* 4byte encoding */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   170
            /* Check encoding of following bytes */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   171
            if ( (i+3) >= length || (string[i+1] & 0xC0) != 0x80
90ce3da70b43 Initial load
duke
parents:
diff changeset
   172
                                 || (string[i+2] & 0xC0) != 0x80
90ce3da70b43 Initial load
duke
parents:
diff changeset
   173
                                 || (string[i+3] & 0xC0) != 0x80 ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   174
                break; /* Error condition */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   175
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   176
            i += 3; /* Skip next 3 bytes */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   177
            newLength += 6; /* 4byte encoding turns into 2 3byte ones */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   178
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   179
            break; /* Error condition */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   180
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   181
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   182
    if ( i != length ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   183
        /* Error in finding new length, return old length so no conversion */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   184
        /* FIXUP: ERROR_MESSAGE? */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   185
        return length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   186
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   187
    return newLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   188
}
90ce3da70b43 Initial load
duke
parents:
diff changeset
   189
90ce3da70b43 Initial load
duke
parents:
diff changeset
   190
/* Convert Standard UTF-8 to Modified UTF-8.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   191
 *    Assumes the UTF-8 encoding was validated by utf8mLength() above.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   192
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   193
 *    Note: Accepts Modified UTF-8 also, no verification on the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   194
 *          correctness of Standard UTF-8 is done. e,g, 0xC080 input is ok.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   195
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   196
void JNICALL
90ce3da70b43 Initial load
duke
parents:
diff changeset
   197
utf8sToUtf8m(struct UtfInst *ui, jbyte *string, int length, jbyte *newString, int newLength)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   198
{
90ce3da70b43 Initial load
duke
parents:
diff changeset
   199
    int i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   200
    int j;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   201
90ce3da70b43 Initial load
duke
parents:
diff changeset
   202
    j = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   203
    for ( i = 0 ; i < length ; i++ ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   204
        unsigned byte1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   205
90ce3da70b43 Initial load
duke
parents:
diff changeset
   206
        byte1 = (unsigned char)string[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   207
90ce3da70b43 Initial load
duke
parents:
diff changeset
   208
        /* NULL bytes and bytes starting with 11110xxx are special */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   209
        if ( (byte1 & 0x80) == 0 ) { /* 1byte encoding */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   210
            if ( byte1 == 0 ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   211
                /* Bits out: 11000000 10000000 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   212
                newString[j++] = (jbyte)0xC0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   213
                newString[j++] = (jbyte)0x80;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   214
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   215
                /* Single byte */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   216
                newString[j++] = byte1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   217
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   218
        } else if ( (byte1 & 0xE0) == 0xC0 ) { /* 2byte encoding */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   219
            newString[j++] = byte1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   220
            newString[j++] = string[++i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   221
        } else if ( (byte1 & 0xF0) == 0xE0 ) { /* 3byte encoding */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   222
            newString[j++] = byte1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   223
            newString[j++] = string[++i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   224
            newString[j++] = string[++i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   225
        } else if ( (byte1 & 0xF8) == 0xF0 ) { /* 4byte encoding */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   226
            /* Beginning of 4byte encoding, turn into 2 3byte encodings */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   227
            unsigned byte2, byte3, byte4, u21;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   228
90ce3da70b43 Initial load
duke
parents:
diff changeset
   229
            /* Bits in: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   230
            byte2 = (unsigned char)string[++i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   231
            byte3 = (unsigned char)string[++i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   232
            byte4 = (unsigned char)string[++i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   233
            /* Reconstruct full 21bit value */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   234
            u21  = (byte1 & 0x07) << 18;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   235
            u21 += (byte2 & 0x3F) << 12;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   236
            u21 += (byte3 & 0x3F) << 6;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   237
            u21 += (byte4 & 0x3F);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   238
            /* Bits out: 11101101 1010xxxx 10xxxxxx */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   239
            newString[j++] = (jbyte)0xED;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   240
            newString[j++] = (jbyte)(0xA0 + (((u21 >> 16) - 1) & 0x0F));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   241
            newString[j++] = (jbyte)(0x80 + ((u21 >> 10) & 0x3F));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   242
            /* Bits out: 11101101 1011xxxx 10xxxxxx */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   243
            newString[j++] = (jbyte)0xED;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   244
            newString[j++] = (jbyte)(0xB0 + ((u21 >>  6) & 0x0F));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   245
            newString[j++] = byte4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   246
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   247
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   248
    UTF_ASSERT(i==length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   249
    UTF_ASSERT(j==newLength);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   250
    newString[j] = (jbyte)0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   251
}
90ce3da70b43 Initial load
duke
parents:
diff changeset
   252
90ce3da70b43 Initial load
duke
parents:
diff changeset
   253
/* Given a Modified UTF-8 string, calculate the Standard UTF-8 length.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   254
 *   Basic validation of the UTF encoding rules is done, and length is
90ce3da70b43 Initial load
duke
parents:
diff changeset
   255
 *   returned (no change) when errors are detected.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   256
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   257
 *   Note: No validation is made that this is indeed Modified UTF-8 coming in.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   258
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   259
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   260
int JNICALL
90ce3da70b43 Initial load
duke
parents:
diff changeset
   261
utf8mToUtf8sLength(struct UtfInst *ui, jbyte *string, int length)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   262
{
90ce3da70b43 Initial load
duke
parents:
diff changeset
   263
    int newLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   264
    int i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   265
90ce3da70b43 Initial load
duke
parents:
diff changeset
   266
    newLength = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   267
    for ( i = 0 ; i < length ; i++ ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   268
        unsigned byte1, byte2, byte3, byte4, byte5, byte6;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   269
90ce3da70b43 Initial load
duke
parents:
diff changeset
   270
        byte1 = (unsigned char)string[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   271
        if ( (byte1 & 0x80) == 0 ) { /* 1byte encoding */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   272
            newLength++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   273
        } else if ( (byte1 & 0xE0) == 0xC0 ) { /* 2byte encoding */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   274
            /* Check encoding of following bytes */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   275
            if ( (i+1) >= length || (string[i+1] & 0xC0) != 0x80 ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   276
                break; /* Error condition */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   277
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   278
            byte2 = (unsigned char)string[++i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   279
            if ( byte1 != 0xC0 || byte2 != 0x80 ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   280
                newLength += 2; /* Normal 2byte encoding, not 0xC080 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   281
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   282
                newLength++;    /* We will turn 0xC080 into 0 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   283
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   284
        } else if ( (byte1 & 0xF0) == 0xE0 ) { /* 3byte encoding */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   285
            /* Check encoding of following bytes */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   286
            if ( (i+2) >= length || (string[i+1] & 0xC0) != 0x80
90ce3da70b43 Initial load
duke
parents:
diff changeset
   287
                                 || (string[i+2] & 0xC0) != 0x80 ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   288
                break; /* Error condition */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   289
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   290
            byte2 = (unsigned char)string[++i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   291
            byte3 = (unsigned char)string[++i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   292
            newLength += 3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   293
            /* Possible process a second 3byte encoding */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   294
            if ( (i+3) < length && byte1 == 0xED && (byte2 & 0xF0) == 0xA0 ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   295
                /* See if this is a pair of 3byte encodings */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   296
                byte4 = (unsigned char)string[i+1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   297
                byte5 = (unsigned char)string[i+2];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   298
                byte6 = (unsigned char)string[i+3];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   299
                if ( byte4 == 0xED && (byte5 & 0xF0) == 0xB0 ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   300
                    /* Check encoding of 3rd byte */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   301
                    if ( (byte6 & 0xC0) != 0x80 ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   302
                        break; /* Error condition */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   303
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   304
                    newLength++; /* New string will have 4byte encoding */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   305
                    i += 3;       /* Skip next 3 bytes */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   306
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   307
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   308
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   309
            break; /* Error condition */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   310
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   311
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   312
    if ( i != length ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   313
        /* Error in UTF encoding */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   314
        /*  FIXUP: ERROR_MESSAGE()? */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   315
        return length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   316
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   317
    return newLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   318
}
90ce3da70b43 Initial load
duke
parents:
diff changeset
   319
90ce3da70b43 Initial load
duke
parents:
diff changeset
   320
/* Convert a Modified UTF-8 string into a Standard UTF-8 string
90ce3da70b43 Initial load
duke
parents:
diff changeset
   321
 *   It is assumed that this string has been validated in terms of the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   322
 *   basic UTF encoding rules by utf8Length() above.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   323
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   324
 *   Note: No validation is made that this is indeed Modified UTF-8 coming in.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   325
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   326
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   327
void JNICALL
90ce3da70b43 Initial load
duke
parents:
diff changeset
   328
utf8mToUtf8s(struct UtfInst *ui, jbyte *string, int length, jbyte *newString, int newLength)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   329
{
90ce3da70b43 Initial load
duke
parents:
diff changeset
   330
    int i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   331
    int j;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   332
90ce3da70b43 Initial load
duke
parents:
diff changeset
   333
    j = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   334
    for ( i = 0 ; i < length ; i++ ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   335
        unsigned byte1, byte2, byte3, byte4, byte5, byte6;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   336
90ce3da70b43 Initial load
duke
parents:
diff changeset
   337
        byte1 = (unsigned char)string[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   338
        if ( (byte1 & 0x80) == 0 ) { /* 1byte encoding */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   339
            /* Single byte */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   340
            newString[j++] = byte1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   341
        } else if ( (byte1 & 0xE0) == 0xC0 ) { /* 2byte encoding */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   342
            byte2 = (unsigned char)string[++i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   343
            if ( byte1 != 0xC0 || byte2 != 0x80 ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   344
                newString[j++] = byte1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   345
                newString[j++] = byte2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   346
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   347
                newString[j++] = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   348
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   349
        } else if ( (byte1 & 0xF0) == 0xE0 ) { /* 3byte encoding */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   350
            byte2 = (unsigned char)string[++i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   351
            byte3 = (unsigned char)string[++i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   352
            if ( i+3 < length && byte1 == 0xED && (byte2 & 0xF0) == 0xA0 ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   353
                /* See if this is a pair of 3byte encodings */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   354
                byte4 = (unsigned char)string[i+1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   355
                byte5 = (unsigned char)string[i+2];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   356
                byte6 = (unsigned char)string[i+3];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   357
                if ( byte4 == 0xED && (byte5 & 0xF0) == 0xB0 ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   358
                    unsigned u21;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   359
90ce3da70b43 Initial load
duke
parents:
diff changeset
   360
                    /* Bits in: 11101101 1010xxxx 10xxxxxx */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   361
                    /* Bits in: 11101101 1011xxxx 10xxxxxx */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   362
                    i += 3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   363
90ce3da70b43 Initial load
duke
parents:
diff changeset
   364
                    /* Reconstruct 21 bit code */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   365
                    u21  = ((byte2 & 0x0F) + 1) << 16;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   366
                    u21 += (byte3 & 0x3F) << 10;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   367
                    u21 += (byte5 & 0x0F) << 6;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   368
                    u21 += (byte6 & 0x3F);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   369
90ce3da70b43 Initial load
duke
parents:
diff changeset
   370
                    /* Bits out: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   371
90ce3da70b43 Initial load
duke
parents:
diff changeset
   372
                    /* Convert to 4byte encoding */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   373
                    newString[j++] = 0xF0 + ((u21 >> 18) & 0x07);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   374
                    newString[j++] = 0x80 + ((u21 >> 12) & 0x3F);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   375
                    newString[j++] = 0x80 + ((u21 >>  6) & 0x3F);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   376
                    newString[j++] = 0x80 + (u21 & 0x3F);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   377
                    continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   378
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   379
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   380
            /* Normal 3byte encoding */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   381
            newString[j++] = byte1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   382
            newString[j++] = byte2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   383
            newString[j++] = byte3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   384
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   385
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   386
    UTF_ASSERT(i==length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   387
    UTF_ASSERT(j==newLength);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   388
    newString[j] = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   389
}
90ce3da70b43 Initial load
duke
parents:
diff changeset
   390
90ce3da70b43 Initial load
duke
parents:
diff changeset
   391
/* ================================================================= */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   392
90ce3da70b43 Initial load
duke
parents:
diff changeset
   393
#if 1  /* Test program */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   394
90ce3da70b43 Initial load
duke
parents:
diff changeset
   395
/*
90ce3da70b43 Initial load
duke
parents:
diff changeset
   396
 * Convert any byte array into a printable string.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   397
 *    Returns length or -1 if output overflows.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   398
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   399
static int
90ce3da70b43 Initial load
duke
parents:
diff changeset
   400
bytesToPrintable(struct UtfInst *ui, char *bytes, int len, char *output, int outputMaxLen)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   401
{
90ce3da70b43 Initial load
duke
parents:
diff changeset
   402
    int outputLen;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   403
    int i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   404
90ce3da70b43 Initial load
duke
parents:
diff changeset
   405
    UTF_ASSERT(bytes);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   406
    UTF_ASSERT(len>=0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   407
    UTF_ASSERT(output);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   408
    UTF_ASSERT(outputMaxLen>=0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   409
90ce3da70b43 Initial load
duke
parents:
diff changeset
   410
    outputLen = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   411
    for ( i=0; i<len ; i++ ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   412
        unsigned byte;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   413
90ce3da70b43 Initial load
duke
parents:
diff changeset
   414
        byte = bytes[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   415
        if ( outputLen >= outputMaxLen ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   416
            return -1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   417
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   418
        if ( byte <= 0x7f && isprint(byte) && !iscntrl(byte) ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   419
            output[outputLen++] = (char)byte;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   420
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   421
            (void)sprintf(output+outputLen,"\\x%02x",byte);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   422
            outputLen += 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   423
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   424
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   425
    output[outputLen] = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   426
    return outputLen;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   427
}
90ce3da70b43 Initial load
duke
parents:
diff changeset
   428
90ce3da70b43 Initial load
duke
parents:
diff changeset
   429
static void
90ce3da70b43 Initial load
duke
parents:
diff changeset
   430
test(void)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   431
{
90ce3da70b43 Initial load
duke
parents:
diff changeset
   432
    static char *strings[] = {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   433
                "characters",
90ce3da70b43 Initial load
duke
parents:
diff changeset
   434
                "abcdefghijklmnopqrstuvwxyz",
90ce3da70b43 Initial load
duke
parents:
diff changeset
   435
                "0123456789",
90ce3da70b43 Initial load
duke
parents:
diff changeset
   436
                "!@#$%^&*()_+=-{}[]:;",
90ce3da70b43 Initial load
duke
parents:
diff changeset
   437
                NULL };
90ce3da70b43 Initial load
duke
parents:
diff changeset
   438
    int i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   439
    struct UtfInst *ui;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   440
90ce3da70b43 Initial load
duke
parents:
diff changeset
   441
    ui = utfInitialize(NULL);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   442
90ce3da70b43 Initial load
duke
parents:
diff changeset
   443
    i = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   444
    while ( strings[i] != NULL ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   445
        char *str;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   446
        #define MAX 1024
90ce3da70b43 Initial load
duke
parents:
diff changeset
   447
        char buf0[MAX];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   448
        char buf1[MAX];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   449
        char buf2[MAX];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   450
        unsigned short buf3[MAX];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   451
        int len1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   452
        int len2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   453
        int len3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   454
90ce3da70b43 Initial load
duke
parents:
diff changeset
   455
        str = strings[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   456
90ce3da70b43 Initial load
duke
parents:
diff changeset
   457
        (void)bytesToPrintable(ui, str, (int)strlen(str), buf0, 1024);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   458
90ce3da70b43 Initial load
duke
parents:
diff changeset
   459
        len1 = utf8FromPlatform(ui, str, (int)strlen(str), (jbyte*)buf1, 1024);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   460
90ce3da70b43 Initial load
duke
parents:
diff changeset
   461
        UTF_ASSERT(len1==(int)strlen(str));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   462
90ce3da70b43 Initial load
duke
parents:
diff changeset
   463
        len3 = utf8ToUtf16(ui, (jbyte*)buf1, len1, (jchar*)buf3, 1024);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   464
90ce3da70b43 Initial load
duke
parents:
diff changeset
   465
        UTF_ASSERT(len3==len1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   466
90ce3da70b43 Initial load
duke
parents:
diff changeset
   467
        len1 = utf16ToUtf8m(ui, (jchar*)buf3, len3, (jbyte*)buf1, 1024);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   468
90ce3da70b43 Initial load
duke
parents:
diff changeset
   469
        UTF_ASSERT(len1==len3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   470
        UTF_ASSERT(strcmp(str, buf1) == 0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   471
90ce3da70b43 Initial load
duke
parents:
diff changeset
   472
        len2 = utf8ToPlatform(ui, (jbyte*)buf1, len1, buf2, 1024);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   473
90ce3da70b43 Initial load
duke
parents:
diff changeset
   474
        UTF_ASSERT(len2==len1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   475
        UTF_ASSERT(strcmp(str, buf2) == 0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   476
90ce3da70b43 Initial load
duke
parents:
diff changeset
   477
        i++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   478
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   479
90ce3da70b43 Initial load
duke
parents:
diff changeset
   480
    utfTerminate(ui, NULL);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   481
90ce3da70b43 Initial load
duke
parents:
diff changeset
   482
}
90ce3da70b43 Initial load
duke
parents:
diff changeset
   483
90ce3da70b43 Initial load
duke
parents:
diff changeset
   484
int
90ce3da70b43 Initial load
duke
parents:
diff changeset
   485
main(int argc, char **argv)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   486
{
90ce3da70b43 Initial load
duke
parents:
diff changeset
   487
    test();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   488
    return 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   489
}
90ce3da70b43 Initial load
duke
parents:
diff changeset
   490
90ce3da70b43 Initial load
duke
parents:
diff changeset
   491
#endif