jdk/src/share/classes/java/lang/CharacterName.java
author sherman
Tue, 18 May 2010 15:36:47 -0700
changeset 5610 fd2427610c7f
child 6374 e214162c907e
permissions -rw-r--r--
6945564: Unicode script support in Character class 6948903: Make Unicode scripts available for use in regular expressions Summary: added Unicode script suport Reviewed-by: martin
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
5610
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
     1
/*
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
     2
 * Copyright 2010 Sun Microsystems, Inc.  All Rights Reserved.
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
     4
 *
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
     7
 * published by the Free Software Foundation.  Sun designates this
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
     8
 * particular file as subject to the "Classpath" exception as provided
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
     9
 * by Sun in the LICENSE file that accompanied this code.
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    10
 *
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    14
 * version 2 for more details (a copy is included in the LICENSE file that
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    15
 * accompanied this code).
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    16
 *
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    17
 * You should have received a copy of the GNU General Public License version
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    20
 *
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    21
 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    22
 * CA 95054 USA or visit www.sun.com if you need additional information or
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    23
 * have any questions.
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    24
 */
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    25
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    26
package java.lang;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    27
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    28
import java.io.DataInputStream;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    29
import java.io.InputStream;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    30
import java.lang.ref.SoftReference;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    31
import java.util.Arrays;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    32
import java.util.zip.InflaterInputStream;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    33
import java.security.AccessController;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    34
import java.security.PrivilegedAction;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    35
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    36
class CharacterName {
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    37
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    38
    private static SoftReference<byte[]> refStrPool;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    39
    private static int[][] lookup;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    40
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    41
    private static synchronized byte[] initNamePool() {
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    42
        byte[] strPool = null;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    43
        if (refStrPool != null && (strPool = refStrPool.get()) != null)
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    44
            return strPool;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    45
        DataInputStream dis = null;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    46
        try {
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    47
            dis = new DataInputStream(new InflaterInputStream(
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    48
                AccessController.doPrivileged(new PrivilegedAction<InputStream>()
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    49
                {
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    50
                    public InputStream run() {
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    51
                        return getClass().getResourceAsStream("uniName.dat");
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    52
                    }
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    53
                })));
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    54
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    55
            lookup = new int[(Character.MAX_CODE_POINT + 1) >> 8][];
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    56
            int total = dis.readInt();
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    57
            int cpEnd = dis.readInt();
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    58
            byte ba[] = new byte[cpEnd];
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    59
            dis.readFully(ba);
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    60
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    61
            int nameOff = 0;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    62
            int cpOff = 0;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    63
            int cp = 0;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    64
            do {
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    65
                int len = ba[cpOff++] & 0xff;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    66
                if (len == 0) {
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    67
                    len = ba[cpOff++] & 0xff;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    68
                    // always big-endian
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    69
                    cp = ((ba[cpOff++] & 0xff) << 16) |
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    70
                         ((ba[cpOff++] & 0xff) <<  8) |
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    71
                         ((ba[cpOff++] & 0xff));
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    72
                }  else {
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    73
                    cp++;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    74
                }
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    75
                int hi = cp >> 8;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    76
                if (lookup[hi] == null) {
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    77
                    lookup[hi] = new int[0x100];
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    78
                }
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    79
                lookup[hi][cp&0xff] = (nameOff << 8) | len;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    80
                nameOff += len;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    81
            } while (cpOff < cpEnd);
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    82
            strPool = new byte[total - cpEnd];
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    83
            dis.readFully(strPool);
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    84
            refStrPool = new SoftReference<byte[]>(strPool);
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    85
        } catch (Exception x) {
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    86
            throw new InternalError(x.getMessage());
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    87
        } finally {
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    88
            try {
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    89
                if (dis != null)
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    90
                    dis.close();
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    91
            } catch (Exception xx) {}
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    92
        }
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    93
        return strPool;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    94
    }
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    95
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    96
    public static String get(int cp) {
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    97
        byte[] strPool = null;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    98
        if (refStrPool == null || (strPool = refStrPool.get()) == null)
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
    99
            strPool = initNamePool();
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   100
        int off = 0;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   101
        if (lookup[cp>>8] == null ||
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   102
            (off = lookup[cp>>8][cp&0xff]) == 0)
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   103
            return null;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   104
        return new String(strPool, 0, off >>> 8, off & 0xff);  // ASCII
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   105
    }
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents:
diff changeset
   106
}