src/java.base/share/classes/java/lang/CharacterName.java
changeset 47216 71c04702a3d5
parent 35783 2690535d72cc
equal deleted inserted replaced
47215:4ebc2e2fb97c 47216:71c04702a3d5
       
     1 /*
       
     2  * Copyright (c) 2010, 2016, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Oracle designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Oracle in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    22  * or visit www.oracle.com if you need additional information or have any
       
    23  * questions.
       
    24  */
       
    25 
       
    26 package java.lang;
       
    27 
       
    28 import java.io.DataInputStream;
       
    29 import java.io.InputStream;
       
    30 import java.lang.ref.SoftReference;
       
    31 import java.util.Arrays;
       
    32 import java.util.Locale;
       
    33 import java.util.zip.InflaterInputStream;
       
    34 import java.security.AccessController;
       
    35 import java.security.PrivilegedAction;
       
    36 
       
    37 class CharacterName {
       
    38 
       
    39     private static SoftReference<CharacterName> refCharName;
       
    40 
       
    41     // codepoint -> bkIndex -> lookup -> offset/len
       
    42     private final byte[] strPool;
       
    43     private final int[] lookup;      // code point -> offset/len in strPool
       
    44     private final int[] bkIndices;   // code point -> lookup index
       
    45 
       
    46     // name -> hash -> hsIndices -> cpEntries -> code point
       
    47     private final int[] cpEntries;   // code points that have name in strPool
       
    48     private final int[] hsIndices;   // chain heads, hash indices into "cps"
       
    49 
       
    50     private CharacterName()  {
       
    51         try (DataInputStream dis = new DataInputStream(new InflaterInputStream(
       
    52             AccessController.doPrivileged(new PrivilegedAction<>() {
       
    53                 public InputStream run() {
       
    54                     return getClass().getResourceAsStream("uniName.dat");
       
    55                 }
       
    56             })))) {
       
    57 
       
    58             int total = dis.readInt();
       
    59             int bkNum = dis.readInt();
       
    60             int cpNum = dis.readInt();
       
    61             int cpEnd = dis.readInt();
       
    62             byte ba[] = new byte[cpEnd];
       
    63             lookup = new int[bkNum * 256];
       
    64             bkIndices = new int[(Character.MAX_CODE_POINT + 1) >> 8];
       
    65             strPool = new byte[total - cpEnd];
       
    66             cpEntries = new int[cpNum * 3];
       
    67             hsIndices = new int[(cpNum / 2) | 1];
       
    68             Arrays.fill(bkIndices, -1);
       
    69             Arrays.fill(hsIndices, -1);
       
    70             dis.readFully(ba);
       
    71             dis.readFully(strPool);
       
    72 
       
    73             int nameOff = 0;
       
    74             int cpOff = 0;
       
    75             int cp = 0;
       
    76             int bk = -1;
       
    77             int prevBk = -1;   // prev bkNo;
       
    78             int idx = 0;
       
    79             int next = -1;
       
    80             int hash = 0;
       
    81             int hsh = 0;
       
    82             do {
       
    83                 int len = ba[cpOff++] & 0xff;
       
    84                 if (len == 0) {
       
    85                     len = ba[cpOff++] & 0xff;
       
    86                     // always big-endian
       
    87                     cp = ((ba[cpOff++] & 0xff) << 16) |
       
    88                          ((ba[cpOff++] & 0xff) <<  8) |
       
    89                          ((ba[cpOff++] & 0xff));
       
    90                 }  else {
       
    91                     cp++;
       
    92                 }
       
    93                 // cp -> name
       
    94                 int hi = cp >> 8;
       
    95                 if (prevBk != hi) {
       
    96                     bk++;
       
    97                     bkIndices[hi] = bk;
       
    98                     prevBk = hi;
       
    99                 }
       
   100                 lookup[(bk << 8) + (cp & 0xff)] = (nameOff << 8) | len;
       
   101                 // name -> cp
       
   102                 hash = hashN(strPool, nameOff, len);
       
   103                 hsh = (hash & 0x7fffffff) % hsIndices.length;
       
   104                 next = hsIndices[hsh];
       
   105                 hsIndices[hsh] = idx;
       
   106                 idx = addCp(idx, hash, next, cp);
       
   107                 nameOff += len;
       
   108             } while (cpOff < cpEnd);
       
   109         } catch (Exception x) {
       
   110             throw new InternalError(x.getMessage(), x);
       
   111         }
       
   112     }
       
   113 
       
   114     private static final int hashN(byte[] a, int off, int len) {
       
   115         int h = 1;
       
   116         while (len-- > 0) {
       
   117             h = 31 * h + a[off++];
       
   118         }
       
   119         return h;
       
   120     }
       
   121 
       
   122     private int addCp(int idx, int hash, int next, int cp) {
       
   123         cpEntries[idx++] = hash;
       
   124         cpEntries[idx++] = next;
       
   125         cpEntries[idx++] = cp;
       
   126         return idx;
       
   127     }
       
   128 
       
   129     private int getCpHash(int idx) { return cpEntries[idx]; }
       
   130     private int getCpNext(int idx) { return cpEntries[idx + 1]; }
       
   131     private int getCp(int idx)  { return cpEntries[idx + 2]; }
       
   132 
       
   133     public static CharacterName getInstance() {
       
   134         SoftReference<CharacterName> ref = refCharName;
       
   135         CharacterName cname = null;
       
   136         if (ref == null || (cname = ref.get()) == null) {
       
   137             cname = new CharacterName();
       
   138             refCharName = new SoftReference<>(cname);
       
   139         }
       
   140         return cname;
       
   141     }
       
   142 
       
   143     public String getName(int cp) {
       
   144         int off = 0;
       
   145         int bk = bkIndices[cp >> 8];
       
   146         if (bk == -1 || (off = lookup[(bk << 8) + (cp & 0xff)]) == 0)
       
   147             return null;
       
   148         @SuppressWarnings("deprecation")
       
   149         String result = new String(strPool, 0, off >>> 8, off & 0xff);  // ASCII
       
   150         return result;
       
   151     }
       
   152 
       
   153     public int getCodePoint(String name) {
       
   154         byte[] bname = name.getBytes(java.nio.charset.StandardCharsets.ISO_8859_1);
       
   155         int hsh = hashN(bname, 0, bname.length);
       
   156         int idx = hsIndices[(hsh & 0x7fffffff) % hsIndices.length];
       
   157         while (idx != -1) {
       
   158             if (getCpHash(idx) == hsh) {
       
   159                 int cp = getCp(idx);
       
   160                 int off = -1;
       
   161                 int bk = bkIndices[cp >> 8];
       
   162                 if (bk != -1 && (off = lookup[(bk << 8) + (cp & 0xff)]) != 0) {
       
   163                     int len = off & 0xff;
       
   164                     off = off >>> 8;
       
   165                     if (bname.length == len) {
       
   166                         int i = 0;
       
   167                         while (i < len && bname[i] == strPool[off++]) {
       
   168                             i++;
       
   169                         }
       
   170                         if (i == len) {
       
   171                             return cp;
       
   172                         }
       
   173                     }
       
   174                  }
       
   175             }
       
   176             idx = getCpNext(idx);
       
   177         }
       
   178         return -1;
       
   179     }
       
   180 }