jdk/make/src/classes/build/tools/generatecharacter/GenerateCharacter.java
author lana
Thu, 26 Dec 2013 12:04:16 -0800
changeset 23010 6dadb192ad81
parent 21805 c7d7946239de
child 31680 88c53c2293b4
permissions -rw-r--r--
8029235: Update copyright year to match last edit in jdk8 jdk repository for 2013 Summary: updated files with 2011, 2012 and 2013 years according to the file's last updated date Reviewed-by: tbell, lancea, chegar
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     1
/*
23010
6dadb192ad81 8029235: Update copyright year to match last edit in jdk8 jdk repository for 2013
lana
parents: 21805
diff changeset
     2
 * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load
duke
parents:
diff changeset
     4
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load
duke
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
     7
 * published by the Free Software Foundation.  Oracle designates this
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     8
 * particular file as subject to the "Classpath" exception as provided
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
     9
 * by Oracle in the LICENSE file that accompanied this code.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    10
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load
duke
parents:
diff changeset
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
90ce3da70b43 Initial load
duke
parents:
diff changeset
    14
 * version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load
duke
parents:
diff changeset
    15
 * accompanied this code).
90ce3da70b43 Initial load
duke
parents:
diff changeset
    16
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    17
 * You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load
duke
parents:
diff changeset
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    20
 *
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
    21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
    22
 * or visit www.oracle.com if you need additional information or have any
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
    23
 * questions.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    24
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    25
90ce3da70b43 Initial load
duke
parents:
diff changeset
    26
package build.tools.generatecharacter;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    27
90ce3da70b43 Initial load
duke
parents:
diff changeset
    28
import java.io.IOException;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    29
import java.io.FileNotFoundException;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    30
import java.io.BufferedReader;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    31
import java.io.FileReader;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    32
import java.io.PrintWriter;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    33
import java.io.BufferedWriter;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    34
import java.io.FileWriter;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    35
import java.io.File;
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
    36
import java.util.List;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    37
5610
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents: 2
diff changeset
    38
import build.tools.generatecharacter.CharacterName;
fd2427610c7f 6945564: Unicode script support in Character class
sherman
parents: 2
diff changeset
    39
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    40
/**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    41
 * This program generates the source code for the class java.lang.Character.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    42
 * It also generates native C code that can perform the same operations.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    43
 * It requires two external input data files:
90ce3da70b43 Initial load
duke
parents:
diff changeset
    44
 * <ul>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    45
 * <li> Unicode specification file
90ce3da70b43 Initial load
duke
parents:
diff changeset
    46
 * <li> Character class template file
90ce3da70b43 Initial load
duke
parents:
diff changeset
    47
 * </ul>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    48
 * The Unicode specification file is available from the Unicode consortium.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    49
 * It has character specification lines that look like this:
90ce3da70b43 Initial load
duke
parents:
diff changeset
    50
 * <listing>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    51
 * 0041;LATIN CAPITAL LETTER A;Lu;0;L;;;;;N;;;;0061;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    52
 * </listing>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    53
 * The Character class template file is filled in with additional
90ce3da70b43 Initial load
duke
parents:
diff changeset
    54
 * information to produce the file Character.java, which can then be
90ce3da70b43 Initial load
duke
parents:
diff changeset
    55
 * compiled by a Java compiler.  The template file contains certain
90ce3da70b43 Initial load
duke
parents:
diff changeset
    56
 * markers consisting of an alphabetic name string preceded by "$$".
90ce3da70b43 Initial load
duke
parents:
diff changeset
    57
 * Such markers are replaced with generated program text.  As a special
90ce3da70b43 Initial load
duke
parents:
diff changeset
    58
 * case, the marker "Lookup(xxx)" is recognized, where "xxx" consists of
90ce3da70b43 Initial load
duke
parents:
diff changeset
    59
 * alphabetic characters constituting a variable name.  The character "_"
90ce3da70b43 Initial load
duke
parents:
diff changeset
    60
 * is considered alphabetic for these purposes.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    61
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    62
 * @author  Guy Steele
90ce3da70b43 Initial load
duke
parents:
diff changeset
    63
 * @author  Alan Liu
90ce3da70b43 Initial load
duke
parents:
diff changeset
    64
 * @author  John O'Conner
90ce3da70b43 Initial load
duke
parents:
diff changeset
    65
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    66
90ce3da70b43 Initial load
duke
parents:
diff changeset
    67
public class GenerateCharacter {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    68
90ce3da70b43 Initial load
duke
parents:
diff changeset
    69
    final static boolean DEBUG = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    70
90ce3da70b43 Initial load
duke
parents:
diff changeset
    71
    final static String commandMarker = "$$";
90ce3da70b43 Initial load
duke
parents:
diff changeset
    72
    static String ROOT                        = "";
90ce3da70b43 Initial load
duke
parents:
diff changeset
    73
    static String DefaultUnicodeSpecFileName  = ROOT + "UnicodeData.txt";
90ce3da70b43 Initial load
duke
parents:
diff changeset
    74
    static String DefaultSpecialCasingFileName = ROOT + "SpecialCasing.txt";
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
    75
    static String DefaultPropListFileName     = ROOT + "PropList.txt";
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    76
    static String DefaultJavaTemplateFileName = ROOT + "Character.java.template";
90ce3da70b43 Initial load
duke
parents:
diff changeset
    77
    static String DefaultJavaOutputFileName   = ROOT + "Character.java";
90ce3da70b43 Initial load
duke
parents:
diff changeset
    78
    static String DefaultCTemplateFileName    = ROOT + "Character.c.template";
90ce3da70b43 Initial load
duke
parents:
diff changeset
    79
    static String DefaultCOutputFileName      = ROOT + "Character.c";
90ce3da70b43 Initial load
duke
parents:
diff changeset
    80
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
    81
    static int plane = 0;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    82
90ce3da70b43 Initial load
duke
parents:
diff changeset
    83
    /* The overall idea is that, in the generated Character class source code,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    84
    most character property data is stored in a special multi-level table whose
90ce3da70b43 Initial load
duke
parents:
diff changeset
    85
    structure is defined by a sequence of nonnegative integers [k1, k2, ..., kn].
90ce3da70b43 Initial load
duke
parents:
diff changeset
    86
    The integers must sum to 16 (the number of bits in a character).
90ce3da70b43 Initial load
duke
parents:
diff changeset
    87
    The first table is indexed by the k1 high-order bits of the character code.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    88
    The result is concatenated to the next k2 bits of the character code to index
90ce3da70b43 Initial load
duke
parents:
diff changeset
    89
    the second table, and so on.  Eventually the kn low-order bits of the character
90ce3da70b43 Initial load
duke
parents:
diff changeset
    90
    code are concatenated and used to index one of two tables A and B; A contains
90ce3da70b43 Initial load
duke
parents:
diff changeset
    91
    32-bit integer entries and B contains 16-bit short entries.  The 48 bits that
90ce3da70b43 Initial load
duke
parents:
diff changeset
    92
    can be thus obtained encode the properties for the character.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    93
90ce3da70b43 Initial load
duke
parents:
diff changeset
    94
    The default specification is [9, 4, 3, 0].  This particular table format was
90ce3da70b43 Initial load
duke
parents:
diff changeset
    95
    designed by conducting an exhaustive search of table formats to minimize the
90ce3da70b43 Initial load
duke
parents:
diff changeset
    96
    space consumed by the tables: the first and third tables need have only byte
90ce3da70b43 Initial load
duke
parents:
diff changeset
    97
    values (the second table must have short values).  Another good choice is
90ce3da70b43 Initial load
duke
parents:
diff changeset
    98
    [10, 6, 0], which produces a larger table but allows particularly fast table
90ce3da70b43 Initial load
duke
parents:
diff changeset
    99
    lookup code.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   100
90ce3da70b43 Initial load
duke
parents:
diff changeset
   101
    In each case, where the word "concatenated" is used, this may imply
90ce3da70b43 Initial load
duke
parents:
diff changeset
   102
    first a << and then a | operation, or perhaps just a | operation if
90ce3da70b43 Initial load
duke
parents:
diff changeset
   103
    the values in the table can be preshifted (generally possible if the table
90ce3da70b43 Initial load
duke
parents:
diff changeset
   104
    entries are short rather than byte).
90ce3da70b43 Initial load
duke
parents:
diff changeset
   105
    */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   106
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   107
    /* The character properties are currently encoded into A (32 bits)and B (16 bits)
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   108
       two parts.
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   109
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   110
    A: the low 32 bits are defined  in the following manner:
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   111
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   112
    1 bit Mirrored property.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   113
    4 bits      Bidirectional category (see below) (unused if -nobidi switch specified)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   114
    9 bits      A signed offset used for converting case .
90ce3da70b43 Initial load
duke
parents:
diff changeset
   115
    1 bit       If 1, adding the signed offset converts the character to lowercase.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   116
    1 bit       If 1, subtracting the signed offset converts the character to uppercase.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   117
        Note: for a titlecase character, both of the preceding bits will be 1
90ce3da70b43 Initial load
duke
parents:
diff changeset
   118
        and the signed offset will be 1.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   119
    1 bit   If 1, this character has a titlecase equivalent (possibly itself);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   120
        in this case, the two bits before this bit can be used to decide
90ce3da70b43 Initial load
duke
parents:
diff changeset
   121
        whether this character is in fact uppercase, lowercase, or titlecase.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   122
    3 bits      This field provides a quick way to lex identifiers.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   123
        The eight possible values for this field are as follows:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   124
        0  May not be part of an identifier
90ce3da70b43 Initial load
duke
parents:
diff changeset
   125
        1  Ignorable control; may continue a Unicode identifier or Java identifier
90ce3da70b43 Initial load
duke
parents:
diff changeset
   126
        2  May continue a Java identifier but not a Unicode identifier (unused)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   127
        3  May continue a Unicode identifier or Java identifier
90ce3da70b43 Initial load
duke
parents:
diff changeset
   128
        4  Is a Java whitespace character
90ce3da70b43 Initial load
duke
parents:
diff changeset
   129
        5  May start or continue a Java identifier;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   130
           may continue but not start a Unicode identifier
90ce3da70b43 Initial load
duke
parents:
diff changeset
   131
           (this value is used for connector punctuation such as _)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   132
        6  May start or continue a Java identifier;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   133
           may not occur in a Unicode identifier
90ce3da70b43 Initial load
duke
parents:
diff changeset
   134
           (this value is used for currency symbols such as $)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   135
        7  May start or continue a Unicode identifier or Java identifier
90ce3da70b43 Initial load
duke
parents:
diff changeset
   136
        Thus:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   137
           5, 6, 7 may start a Java identifier
90ce3da70b43 Initial load
duke
parents:
diff changeset
   138
           1, 2, 3, 5, 6, 7 may continue a Java identifier
90ce3da70b43 Initial load
duke
parents:
diff changeset
   139
           7 may start a Unicode identifier
90ce3da70b43 Initial load
duke
parents:
diff changeset
   140
           1, 3, 5, 7 may continue a Unicode identifier
90ce3da70b43 Initial load
duke
parents:
diff changeset
   141
           1 is ignorable within an identifier
90ce3da70b43 Initial load
duke
parents:
diff changeset
   142
           4 is Java whitespace
90ce3da70b43 Initial load
duke
parents:
diff changeset
   143
    2 bits      This field indicates whether the character has a numeric property.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   144
        The four possible values for this field are as follows:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   145
        0  This character has no numeric property.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   146
        1  Adding the digit offset to the character code and then
90ce3da70b43 Initial load
duke
parents:
diff changeset
   147
           masking with 0x1F will produce the desired numeric value.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   148
        2  This character has a "strange" numeric value.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   149
        3  A Java supradecimal digit: adding the digit offset to the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   150
           character code, then masking with 0x1F, then adding 10
90ce3da70b43 Initial load
duke
parents:
diff changeset
   151
           will produce the desired numeric value.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   152
    5 bits  The digit offset (see description of previous field)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   153
    5 bits      Character type (see below)
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   154
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   155
    B: the high 16 bits are defined as:
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   156
    1 bit Other_Lowercase property
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   157
    1 bit Other_Uppercase property
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   158
    1 bit Other_Alphabetic property
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   159
    1 bit Other_Math property
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   160
    1 bit Ideographic property
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   161
    1 bit Noncharacter codepoint property
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   162
    */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   163
90ce3da70b43 Initial load
duke
parents:
diff changeset
   164
90ce3da70b43 Initial load
duke
parents:
diff changeset
   165
    // bit masks identify each component of a 32-bit property field described
90ce3da70b43 Initial load
duke
parents:
diff changeset
   166
    // above.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   167
    // shift* indicates how many shifts right must happen to get the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   168
    // indicated property value in the lowest bits of the 32-bit space.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   169
    private static final int
90ce3da70b43 Initial load
duke
parents:
diff changeset
   170
        shiftType           = 0,        maskType            =       0x001F,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   171
        shiftDigitOffset    = 5,        maskDigitOffset     =       0x03E0,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   172
        shiftNumericType    = 10,       maskNumericType     =       0x0C00,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   173
        shiftIdentifierInfo = 12,       maskIdentifierInfo  =       0x7000,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   174
                                        maskUnicodePart     =       0x1000,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   175
        shiftCaseInfo       = 15,       maskCaseInfo        =      0x38000,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   176
                                        maskLowerCase       =      0x20000,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   177
                                        maskUpperCase       =      0x10000,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   178
                                        maskTitleCase       =      0x08000,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   179
        shiftCaseOffset     = 18,       maskCaseOffset      =   0x07FC0000,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   180
        shiftCaseOffsetSign = 5,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   181
                                        // used only when calculating and
90ce3da70b43 Initial load
duke
parents:
diff changeset
   182
                                        // storing digit offsets from char values
90ce3da70b43 Initial load
duke
parents:
diff changeset
   183
                                        maskDigit               =   0x001F,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   184
                                        // case offset are 9 bits
90ce3da70b43 Initial load
duke
parents:
diff changeset
   185
                                        maskCase                =   0x01FF,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   186
        shiftBidi           = 27,       maskBidi              = 0x78000000,
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   187
        shiftMirrored       = 31,       //maskMirrored          = 0x80000000,
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   188
        shiftPlane          = 16,       maskPlane = 0xFF0000;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   189
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   190
    // maskMirrored needs to be long, if up 16-bit
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   191
    private static final long maskMirrored          = 0x80000000L;
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   192
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   193
    // bit masks identify the 16-bit priperty field described above, in B
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   194
    // table
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   195
    private static final long
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   196
        maskOtherLowercase  = 0x100000000L,
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   197
        maskOtherUppercase  = 0x200000000L,
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   198
        maskOtherAlphabetic = 0x400000000L,
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   199
        maskOtherMath       = 0x800000000L,
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   200
        maskIdeographic     = 0x1000000000L,
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   201
        maskNoncharacterCP  = 0x2000000000L;
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   202
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   203
    // Can compare masked values with these to determine
90ce3da70b43 Initial load
duke
parents:
diff changeset
   204
    // numeric or lexical types.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   205
    public static int
90ce3da70b43 Initial load
duke
parents:
diff changeset
   206
        valueNotNumeric             = 0x0000,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   207
        valueDigit                  = 0x0400,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   208
        valueStrangeNumeric         = 0x0800,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   209
        valueJavaSupradecimal       = 0x0C00,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   210
        valueIgnorable              = 0x1000,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   211
        valueJavaOnlyPart           = 0x2000,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   212
        valueJavaUnicodePart        = 0x3000,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   213
        valueJavaWhitespace         = 0x4000,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   214
        valueJavaStartUnicodePart   = 0x5000,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   215
        valueJavaOnlyStart          = 0x6000,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   216
        valueJavaUnicodeStart       = 0x7000,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   217
        lowJavaStart                = 0x5000,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   218
        nonzeroJavaPart             = 0x3000,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   219
        valueUnicodeStart           = 0x7000;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   220
90ce3da70b43 Initial load
duke
parents:
diff changeset
   221
    // these values are used when only identifier properties are generated
90ce3da70b43 Initial load
duke
parents:
diff changeset
   222
    // for use in verifier code. Shortens the property down to a single byte.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   223
    private static final int
90ce3da70b43 Initial load
duke
parents:
diff changeset
   224
        bitJavaStart            = 0x02,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   225
        bitJavaPart             = 0x01,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   226
        maskIsJavaIdentifierPart = bitJavaPart,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   227
        maskIsJavaIdentifierStart = bitJavaStart;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   228
90ce3da70b43 Initial load
duke
parents:
diff changeset
   229
    static int maxOffset = maskCase/2 ;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   230
    static int minOffset = -maxOffset;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   231
90ce3da70b43 Initial load
duke
parents:
diff changeset
   232
    /* The following routines provide simple, concise formatting of long integer values.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   233
     The number in the name of the method indicates the desired number of characters
90ce3da70b43 Initial load
duke
parents:
diff changeset
   234
     to be produced.  If the number of digits required to represent the integer value
90ce3da70b43 Initial load
duke
parents:
diff changeset
   235
     is less than that number, then the output is padded on the left  with zeros
90ce3da70b43 Initial load
duke
parents:
diff changeset
   236
     (for hex) or with spaces (for decimal).  If the number of digits required to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   237
     represent the integer value is greater than the desired number, then all the digits
90ce3da70b43 Initial load
duke
parents:
diff changeset
   238
     that are required are actually produced.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   239
    */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   240
90ce3da70b43 Initial load
duke
parents:
diff changeset
   241
    static String hex(long n) { return Long.toHexString(n).toUpperCase(); }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   242
90ce3da70b43 Initial load
duke
parents:
diff changeset
   243
    static String hex2(long n) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   244
        String q = Long.toHexString(n & 0xFF).toUpperCase();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   245
        return "00".substring(Math.min(2, q.length())) + q;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   246
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   247
90ce3da70b43 Initial load
duke
parents:
diff changeset
   248
    static String hex4(long n) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   249
        String q = Long.toHexString(n & 0xFFFF).toUpperCase();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   250
        return "0000".substring(Math.min(4, q.length())) + q;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   251
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   252
90ce3da70b43 Initial load
duke
parents:
diff changeset
   253
    static String hex8(long n) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   254
        String q = Long.toHexString(n & 0xFFFFFFFFL).toUpperCase();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   255
        return "00000000".substring(Math.min(8, q.length())) + q;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   256
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   257
90ce3da70b43 Initial load
duke
parents:
diff changeset
   258
    static String hex16(long n) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   259
        String q = Long.toHexString(n).toUpperCase();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   260
        return "0000000000000000".substring(Math.min(16, q.length())) + q;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   261
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   262
90ce3da70b43 Initial load
duke
parents:
diff changeset
   263
    static String dec3(long n) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   264
        String q = Long.toString(n);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   265
        return "   ".substring(Math.min(3, q.length())) + q;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   266
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   267
90ce3da70b43 Initial load
duke
parents:
diff changeset
   268
    static String dec5(long n) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   269
        String q = Long.toString(n);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   270
        return "     ".substring(Math.min(5, q.length())) + q;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   271
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   272
90ce3da70b43 Initial load
duke
parents:
diff changeset
   273
    /* This routine is called when some failure occurs. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   274
90ce3da70b43 Initial load
duke
parents:
diff changeset
   275
    static void FAIL(String s) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   276
        System.out.println("** " + s);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   277
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   278
90ce3da70b43 Initial load
duke
parents:
diff changeset
   279
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   280
    * Given the data from the Unicode specification file, this routine builds a map.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   281
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   282
    * The specification file is assumed to contain its data in sorted order by
90ce3da70b43 Initial load
duke
parents:
diff changeset
   283
    * character code; as a result, the array passed as an argument to this method
90ce3da70b43 Initial load
duke
parents:
diff changeset
   284
    * has its components in the same sorted order, with one entry for each defined
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   285
    * Unicode character or character range.  (A range is indicated by two consecutive
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   286
    * entries, such that the name of the first entry begins with "<" and ends with
90ce3da70b43 Initial load
duke
parents:
diff changeset
   287
    * "First>" and the second entry begins with "<" and ends with "Last>".)  This is
90ce3da70b43 Initial load
duke
parents:
diff changeset
   288
    * therefore a sparse representation of the character property data.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   289
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   290
    * The resulting map is dense representation of the character data.  It contains
90ce3da70b43 Initial load
duke
parents:
diff changeset
   291
    * 2^16 = 65536 entries, each of which is a long integer.  (Right now only 32 bits
90ce3da70b43 Initial load
duke
parents:
diff changeset
   292
    * of this long value are used, but type long is used rather than int to facilitate
90ce3da70b43 Initial load
duke
parents:
diff changeset
   293
    * future extensions of this source code generator that might require more than
90ce3da70b43 Initial load
duke
parents:
diff changeset
   294
    * 32 bits to encode relevant character properties.)  Entry k holds the encoded
90ce3da70b43 Initial load
duke
parents:
diff changeset
   295
    * properties for character k.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   296
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   297
    * Method buildMap manages the transformation from the sparse representation to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   298
    * the dense representation.  It calls method buildOne to handle the encoding
90ce3da70b43 Initial load
duke
parents:
diff changeset
   299
    * of character property data from a single UnicodeSpec object into 32 bits.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   300
    * For undefined characters, method buildOne is not called and the map entry for
90ce3da70b43 Initial load
duke
parents:
diff changeset
   301
    * that character is set to UnicodeSpec.UNASSIGNED.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   302
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   303
    * @param data       character property data from the Unicode specification file
90ce3da70b43 Initial load
duke
parents:
diff changeset
   304
    * @return   an array of length 65536 with one entry for every possible char value
90ce3da70b43 Initial load
duke
parents:
diff changeset
   305
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   306
    * @see GenerateCharacter#buildOne
90ce3da70b43 Initial load
duke
parents:
diff changeset
   307
    */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   308
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   309
    static long[] buildMap(UnicodeSpec[] data, SpecialCaseMap[] specialMaps, PropList propList)
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   310
    {
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   311
        long[] result;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   312
        if (bLatin1 == true) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   313
            result = new long[256];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   314
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   315
            result = new long[1<<16];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   316
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   317
        int k=0;
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   318
        int codePoint = plane<<16;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   319
        UnicodeSpec nonCharSpec = new UnicodeSpec();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   320
        for (int j = 0; j < data.length && k < result.length; j++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   321
            if (data[j].codePoint == codePoint) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   322
                result[k] = buildOne(codePoint, data[j], specialMaps);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   323
                ++k;
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   324
                ++codePoint;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   325
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   326
            else if(data[j].codePoint > codePoint) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   327
                if (data[j].name.endsWith("Last>")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   328
                    // build map data for all chars except last in range
90ce3da70b43 Initial load
duke
parents:
diff changeset
   329
                    while (codePoint < data[j].codePoint && k < result.length) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   330
                        result[k] = buildOne(codePoint, data[j], specialMaps);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   331
                        ++k;
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   332
                        ++codePoint;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   333
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   334
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   335
                else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   336
                    // we have a few unassigned chars before data[j].codePoint
90ce3da70b43 Initial load
duke
parents:
diff changeset
   337
                    while (codePoint < data[j].codePoint && k < result.length) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   338
                        result[k] = buildOne(codePoint, nonCharSpec, specialMaps);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   339
                        ++k;
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   340
                        ++codePoint;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   341
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   342
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   343
                k = data[j].codePoint & 0xFFFF;
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   344
                codePoint = data[j].codePoint;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   345
                result[k] = buildOne(codePoint, data[j], specialMaps);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   346
                ++k;
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   347
                ++codePoint;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   348
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   349
            else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   350
                System.out.println("An error has occured during spec mapping.");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   351
                System.exit(0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   352
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   353
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   354
        // if there are still unprocessed chars, process them
90ce3da70b43 Initial load
duke
parents:
diff changeset
   355
        // as unassigned/undefined.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   356
        codePoint = (plane<<16) | k;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   357
        while (k < result.length) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   358
            result[k] = buildOne(codePoint, nonCharSpec, specialMaps);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   359
            ++k;
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   360
            ++codePoint;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   361
        }
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   362
        // now add all extra supported properties from PropList, to the
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   363
        // upper 16-bit
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   364
        addExProp(result, propList, "Other_Lowercase", maskOtherLowercase);
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   365
        addExProp(result, propList, "Other_Uppercase", maskOtherUppercase);
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   366
        addExProp(result, propList, "Other_Alphabetic", maskOtherAlphabetic);
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   367
        addExProp(result, propList, "Ideographic", maskIdeographic);
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   368
        //addExProp(result, propList, "Other_Math", maskOtherMath);
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   369
        //addExProp(result, propList, "Noncharacter_CodePoint", maskNoncharacterCP);
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   370
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   371
        return result;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   372
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   373
90ce3da70b43 Initial load
duke
parents:
diff changeset
   374
    // The maximum and minimum offsets found while scanning the database
90ce3da70b43 Initial load
duke
parents:
diff changeset
   375
    static int maxOffsetSeen = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   376
    static int minOffsetSeen = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   377
90ce3da70b43 Initial load
duke
parents:
diff changeset
   378
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   379
     * Some Unicode separator characters are not considered Java whitespace.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   380
     * @param c character to test
90ce3da70b43 Initial load
duke
parents:
diff changeset
   381
     * @return true if c in an invalid Java whitespace character, false otherwise.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   382
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   383
    static boolean isInvalidJavaWhiteSpace(int c) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   384
        int[] exceptions = {0x00A0, 0x2007, 0x202F, 0xFEFF};
90ce3da70b43 Initial load
duke
parents:
diff changeset
   385
        boolean retValue = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   386
        for(int x=0;x<exceptions.length;x++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   387
            if(c == exceptions[x]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   388
                retValue = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   389
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   390
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   391
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   392
        return retValue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   393
90ce3da70b43 Initial load
duke
parents:
diff changeset
   394
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   395
90ce3da70b43 Initial load
duke
parents:
diff changeset
   396
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   397
    * Given the character property data for one Unicode character, encode the data
90ce3da70b43 Initial load
duke
parents:
diff changeset
   398
    * of interest into a single long integer value.  (Right now only 32 bits
90ce3da70b43 Initial load
duke
parents:
diff changeset
   399
    * of this long value are used, but type long is used rather than int to facilitate
90ce3da70b43 Initial load
duke
parents:
diff changeset
   400
    * future extensions of this source code generator that might require more than
90ce3da70b43 Initial load
duke
parents:
diff changeset
   401
    * 32 bits to encode relevant character properties.)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   402
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   403
    * @param c   the character code for which to encode property data
90ce3da70b43 Initial load
duke
parents:
diff changeset
   404
    * @param us  property data record from the Unicode specification file
90ce3da70b43 Initial load
duke
parents:
diff changeset
   405
    *            (its character code might not be equal to c if it specifies data
90ce3da70b43 Initial load
duke
parents:
diff changeset
   406
    *            for a range of characters)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   407
    * @return   an encoded long value that contains the properties for a single char
90ce3da70b43 Initial load
duke
parents:
diff changeset
   408
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   409
    * @see GenerateCharacter#buildMap
90ce3da70b43 Initial load
duke
parents:
diff changeset
   410
    */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   411
90ce3da70b43 Initial load
duke
parents:
diff changeset
   412
    static long buildOne(int c, UnicodeSpec us, SpecialCaseMap[] specialMaps) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   413
        long resultA = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   414
        // record the general category
90ce3da70b43 Initial load
duke
parents:
diff changeset
   415
        resultA |= us.generalCategory;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   416
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   417
        // record the numeric properties
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   418
        NUMERIC: {
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   419
        STRANGE: {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   420
            int val = 0;
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   421
            // c is A-Z
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   422
            if ((c >= 0x0041) && (c <= 0x005A)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   423
                val = c - 0x0041;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   424
                resultA |= valueJavaSupradecimal;
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   425
            // c is a-z
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   426
            } else if ((c >= 0x0061) && (c <= 0x007A)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   427
                val = c - 0x0061;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   428
                resultA |= valueJavaSupradecimal;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   429
            // c is a full-width A-Z
90ce3da70b43 Initial load
duke
parents:
diff changeset
   430
            } else if ((c >= 0xFF21) && (c <= 0xFF3A)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   431
                val = c - 0xFF21;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   432
                resultA |= valueJavaSupradecimal;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   433
            // c is a full-width a-z
90ce3da70b43 Initial load
duke
parents:
diff changeset
   434
            } else if ((c >= 0xFF41) && (c <= 0xFF5A)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   435
                val = c - 0xFF41;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   436
                resultA |= valueJavaSupradecimal;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   437
            } else if (us.isDecimalValue()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   438
                val = us.decimalValue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   439
                resultA |= valueDigit;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   440
            } else if (us.isDigitValue()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   441
                val = us.digitValue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   442
                resultA |= valueDigit;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   443
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   444
                if (us.numericValue.length() == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   445
                    break NUMERIC;                      // no numeric value at all
90ce3da70b43 Initial load
duke
parents:
diff changeset
   446
                } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   447
                    try {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   448
                        val = Integer.parseInt(us.numericValue);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   449
                        if (val >= 32 || val < 0) break STRANGE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   450
                        if (c == 0x215F) break STRANGE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   451
                    } catch(NumberFormatException e) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   452
                        break STRANGE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   453
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   454
                    resultA |= valueDigit;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   455
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   456
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   457
            if (val >= 32 || val < 0) break STRANGE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   458
            resultA |= ((val - c & maskDigit) << shiftDigitOffset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   459
            break NUMERIC;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   460
        } // end STRANGE
90ce3da70b43 Initial load
duke
parents:
diff changeset
   461
        resultA |= valueStrangeNumeric;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   462
        } // end NUMERIC
90ce3da70b43 Initial load
duke
parents:
diff changeset
   463
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   464
        // record case mapping
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   465
        int offset = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   466
        // might have a 1:M mapping
90ce3da70b43 Initial load
duke
parents:
diff changeset
   467
        int specialMap = SpecialCaseMap.find(c, specialCaseMaps);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   468
        boolean bHasUpper = (us.hasUpperMap()) || (specialMap != -1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   469
        if (bHasUpper) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   470
            resultA |= maskUpperCase;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   471
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   472
        if (specialMap != -1) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   473
            // has mapping, but cannot record the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   474
            // proper offset; can only flag it and provide special case
90ce3da70b43 Initial load
duke
parents:
diff changeset
   475
            // code in Character.java
90ce3da70b43 Initial load
duke
parents:
diff changeset
   476
            offset = -1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   477
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   478
        else if (us.hasUpperMap())  {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   479
            offset = c - us.upperMap;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   480
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   481
90ce3da70b43 Initial load
duke
parents:
diff changeset
   482
        if (us.hasLowerMap()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   483
            resultA |= maskLowerCase;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   484
            if (offset == 0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   485
                offset = us.lowerMap - c;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   486
            else if (offset != (us.lowerMap - c)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   487
                if (DEBUG) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   488
                FAIL("Character " + hex(c) +
90ce3da70b43 Initial load
duke
parents:
diff changeset
   489
                " has incompatible lowercase and uppercase mappings");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   490
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   491
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   492
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   493
        if ((us.hasTitleMap() && us.titleMap != us.upperMap) ||
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   494
            (bHasUpper && us.hasLowerMap())) {
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   495
            resultA |= maskTitleCase;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   496
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   497
        if (bHasUpper && !us.hasLowerMap() && !us.hasTitleMap() && verbose) {
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   498
            System.out.println("Warning: Character " + hex4(c) + " has upper but " +
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   499
                               "no title case; Java won't know this");
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   500
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   501
        if (offset < minOffsetSeen) minOffsetSeen = offset;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   502
        if (offset > maxOffsetSeen) maxOffsetSeen = offset;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   503
        if (offset > maxOffset || offset < minOffset) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   504
            if (DEBUG) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   505
            FAIL("Case offset " + offset + " for character " + hex4(c) + " must be handled as a special case");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   506
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   507
            offset = maskCase;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   508
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   509
        resultA |= ((offset & maskCase) << shiftCaseOffset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   510
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   511
        // record lexical info about this character
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   512
        if (us.generalCategory == UnicodeSpec.LOWERCASE_LETTER
90ce3da70b43 Initial load
duke
parents:
diff changeset
   513
                || us.generalCategory == UnicodeSpec.UPPERCASE_LETTER
90ce3da70b43 Initial load
duke
parents:
diff changeset
   514
                || us.generalCategory == UnicodeSpec.TITLECASE_LETTER
90ce3da70b43 Initial load
duke
parents:
diff changeset
   515
                || us.generalCategory == UnicodeSpec.MODIFIER_LETTER
90ce3da70b43 Initial load
duke
parents:
diff changeset
   516
                || us.generalCategory == UnicodeSpec.OTHER_LETTER
90ce3da70b43 Initial load
duke
parents:
diff changeset
   517
                || us.generalCategory == UnicodeSpec.LETTER_NUMBER) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   518
            resultA |= valueJavaUnicodeStart;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   519
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   520
        else if (us.generalCategory == UnicodeSpec.COMBINING_SPACING_MARK
90ce3da70b43 Initial load
duke
parents:
diff changeset
   521
                || us.generalCategory == UnicodeSpec.NON_SPACING_MARK
90ce3da70b43 Initial load
duke
parents:
diff changeset
   522
                || us.generalCategory == UnicodeSpec.DECIMAL_DIGIT_NUMBER) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   523
            resultA |= valueJavaUnicodePart;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   524
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   525
        else if (us.generalCategory == UnicodeSpec.CONNECTOR_PUNCTUATION) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   526
            resultA |= valueJavaStartUnicodePart;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   527
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   528
        else if (us.generalCategory == UnicodeSpec.CURRENCY_SYMBOL) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   529
            resultA |= valueJavaOnlyStart;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   530
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   531
        else if (((c >= 0x0000) && (c <= 0x0008))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   532
                || ((c >= 0x000E) && (c <= 0x001B))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   533
                || ((c >= 0x007F) && (c <= 0x009F))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   534
                || us.generalCategory == UnicodeSpec.FORMAT) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   535
            resultA |= valueIgnorable;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   536
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   537
        else if (us.generalCategory == UnicodeSpec.SPACE_SEPARATOR
90ce3da70b43 Initial load
duke
parents:
diff changeset
   538
                || us.generalCategory == UnicodeSpec.LINE_SEPARATOR
90ce3da70b43 Initial load
duke
parents:
diff changeset
   539
                || us.generalCategory == UnicodeSpec.PARAGRAPH_SEPARATOR) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   540
            if (!isInvalidJavaWhiteSpace(c)) resultA |= valueJavaWhitespace;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   541
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   542
        else if (((c >= 0x0009) && (c <= 0x000D))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   543
                || ((c >= 0x001C) && (c <= 0x001F))) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   544
            resultA |= valueJavaWhitespace;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   545
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   546
90ce3da70b43 Initial load
duke
parents:
diff changeset
   547
        // record bidi category
90ce3da70b43 Initial load
duke
parents:
diff changeset
   548
        if (!nobidi) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   549
            int tmpBidi =
90ce3da70b43 Initial load
duke
parents:
diff changeset
   550
                (us.bidiCategory > UnicodeSpec.DIRECTIONALITY_OTHER_NEUTRALS ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   551
                    us.bidiCategory == -1) ? maskBidi : (us.bidiCategory << shiftBidi);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   552
            resultA |= tmpBidi;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   553
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   554
90ce3da70b43 Initial load
duke
parents:
diff changeset
   555
        // record mirrored property
90ce3da70b43 Initial load
duke
parents:
diff changeset
   556
        if (!nomirror) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   557
            resultA |= us.mirrored ? maskMirrored : 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   558
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   559
90ce3da70b43 Initial load
duke
parents:
diff changeset
   560
        if (identifiers) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   561
            long replacement = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   562
            if ((resultA & maskIdentifierInfo) >= lowJavaStart) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   563
                replacement |= bitJavaStart;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   564
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   565
            if ( ((resultA & nonzeroJavaPart) != 0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   566
                    && ((resultA & maskIdentifierInfo) != valueIgnorable)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   567
                replacement |= bitJavaPart;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   568
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   569
            resultA = replacement;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   570
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   571
        return resultA;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   572
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   573
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   574
    static void addExProp(long[] map, PropList propList, String prop, long mask) {
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   575
        List<Integer> cps = propList.codepoints(prop);
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   576
        if (cps != null) {
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   577
            for (Integer cp : cps) {
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   578
                if (cp < map.length)
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   579
                    map[cp] |= mask;
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   580
            }
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   581
        }
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   582
    }
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   583
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   584
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   585
    * This is the heart of the table compression strategy.  The inputs are a map
90ce3da70b43 Initial load
duke
parents:
diff changeset
   586
    * and a number of bits (size).  The map is simply an array of long integer values;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   587
    * the number of bits indicates how index values for that map are to be split.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   588
    * The length of the given map must be a multiple of (1 << size).  The result is
90ce3da70b43 Initial load
duke
parents:
diff changeset
   589
    * a new map z and a compressed table t such that for every valid index value k
90ce3da70b43 Initial load
duke
parents:
diff changeset
   590
    * for the original map, t[(z[k>>size]<<size)|(k & ((1<<size)-1))] == map[k].
90ce3da70b43 Initial load
duke
parents:
diff changeset
   591
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   592
    * In other words, the index k can be split into two parts, namely the "size"
90ce3da70b43 Initial load
duke
parents:
diff changeset
   593
    * low-order bits and all the remaining high-order bits; the high-order bits are then
90ce3da70b43 Initial load
duke
parents:
diff changeset
   594
    * remapped by map z to produce an index into table t.  In effect, the data of the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   595
    * original map m is broken up into blocks of size (1<<size); the compression relies
90ce3da70b43 Initial load
duke
parents:
diff changeset
   596
    * on the expectation that many of these blocks will be identical and therefore need
90ce3da70b43 Initial load
duke
parents:
diff changeset
   597
    * be represented only once in the compressed table t.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   598
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   599
    * This method is intended to be used iteratively.  The first map to be handed
90ce3da70b43 Initial load
duke
parents:
diff changeset
   600
    * to it is the one constructed by method buildMap.  After that, the first of the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   601
    * two arrays returned by this method is fed back into it for further compression.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   602
    * At the end of the iteration, one has a starter map and a sequence of tables.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   603
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   604
    * The algorithm used to implement this computation is straightforward and not
90ce3da70b43 Initial load
duke
parents:
diff changeset
   605
    * especially clever.  It uses brute-force linear search (the loop labeled MIDDLE)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   606
    * to locate identical blocks, so overall the time complexity of the algorithm
90ce3da70b43 Initial load
duke
parents:
diff changeset
   607
    * is quadratic in the length of the input map.  Fortunately, speed is not crucial
90ce3da70b43 Initial load
duke
parents:
diff changeset
   608
    * to this application.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   609
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   610
    * @param map                a map to be compressed
90ce3da70b43 Initial load
duke
parents:
diff changeset
   611
    * @param size       the number of index bits to be split off by the compression
90ce3da70b43 Initial load
duke
parents:
diff changeset
   612
    * @return   an array of length 2 containing two arrays; the first is a new map
90ce3da70b43 Initial load
duke
parents:
diff changeset
   613
    *           and the second is a compressed data table
90ce3da70b43 Initial load
duke
parents:
diff changeset
   614
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   615
    * @see GenerateCharacter#buildMap
90ce3da70b43 Initial load
duke
parents:
diff changeset
   616
    */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   617
90ce3da70b43 Initial load
duke
parents:
diff changeset
   618
    static long[][] buildTable(long[] map, int size) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   619
        int n = map.length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   620
        if (((n >> size) << size) != n) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   621
            FAIL("Length " + n + " is not a multiple of " + (1 << size));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   622
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   623
        int m = 1 << size;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   624
        // We know the final length of the new map up front.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   625
        long[] newmap = new long[n >> size];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   626
        // The buffer is used temporarily to hold data for the compressed table
90ce3da70b43 Initial load
duke
parents:
diff changeset
   627
        // because we don't know its final length yet.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   628
        long[] buffer = new long[n];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   629
        int ptr = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   630
OUTER:  for (int i = 0; i < n; i += m) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   631
            // For every block of size m in the original map...
90ce3da70b43 Initial load
duke
parents:
diff changeset
   632
    MIDDLE: for (int j = 0; j < ptr; j += m) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   633
            // Find out whether there is already a block just like it in the buffer.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   634
                for (int k = 0; k < m; k++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   635
                    if (buffer[j+k] != map[i+k])
90ce3da70b43 Initial load
duke
parents:
diff changeset
   636
                        continue MIDDLE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   637
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   638
                // There is a block just like it at position j, so just
90ce3da70b43 Initial load
duke
parents:
diff changeset
   639
                // put its index into the new map (thereby sharing it).
90ce3da70b43 Initial load
duke
parents:
diff changeset
   640
                newmap[i >> size] = (j >> size);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   641
                continue OUTER;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   642
            } // end MIDDLE
90ce3da70b43 Initial load
duke
parents:
diff changeset
   643
            // There is no block just like it already, so add it to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   644
            // the buffer and put its index into the new map.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   645
            for (int k = 0; k < m; k++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   646
                buffer[ptr+k] = map[i+k];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   647
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   648
            newmap[i >> size] = (ptr >> size);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   649
            ptr += m;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   650
        } // end OUTER
90ce3da70b43 Initial load
duke
parents:
diff changeset
   651
        // Now we know how long the compressed table should be,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   652
        // so create a new array and copy data from the temporary buffer.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   653
        long[] newdata = new long[ptr];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   654
        for (int j = 0; j < ptr; j++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   655
            newdata[j] = buffer[j];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   656
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   657
        // Return the new map and the new data table.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   658
        long[][] result = { newmap, newdata };
90ce3da70b43 Initial load
duke
parents:
diff changeset
   659
        return result;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   660
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   661
90ce3da70b43 Initial load
duke
parents:
diff changeset
   662
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   663
    * Once the compressed tables have been computed, this method reads in a
90ce3da70b43 Initial load
duke
parents:
diff changeset
   664
    * template file for the source code to be generated and writes out the final
90ce3da70b43 Initial load
duke
parents:
diff changeset
   665
    * source code by acting as a sort of specialized macro processor.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   666
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   667
    * The first output line is a comment saying that the file was automatically
90ce3da70b43 Initial load
duke
parents:
diff changeset
   668
    * generated; it includes a timestamp.  All other output is generated by
90ce3da70b43 Initial load
duke
parents:
diff changeset
   669
    * reading a line from the template file, performing macro replacements,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   670
    * and then writing the resulting line or lines of code to the output file.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   671
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   672
    * This method handles the I/O, the timestamp comment, and the locating of
90ce3da70b43 Initial load
duke
parents:
diff changeset
   673
    * macro calls within each input line.  The method replaceCommand is called
90ce3da70b43 Initial load
duke
parents:
diff changeset
   674
    * to generate replacement text for each macro call.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   675
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   676
    * Macro calls to be replaced are indicated in the template file by
90ce3da70b43 Initial load
duke
parents:
diff changeset
   677
    * occurrences of the commandMarker "$$".  The rest of the call may consist
90ce3da70b43 Initial load
duke
parents:
diff changeset
   678
    * of Java letters (including the underscore "_") and also of balanced
90ce3da70b43 Initial load
duke
parents:
diff changeset
   679
    * parentheses.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   680
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   681
    * @param theTemplateFileName
90ce3da70b43 Initial load
duke
parents:
diff changeset
   682
    *           the file name for the template input file
90ce3da70b43 Initial load
duke
parents:
diff changeset
   683
    * @param theOutputFileName
90ce3da70b43 Initial load
duke
parents:
diff changeset
   684
    *           the file name for the source code output file
90ce3da70b43 Initial load
duke
parents:
diff changeset
   685
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   686
    *     @see GenerateCharacter#replaceCommand
90ce3da70b43 Initial load
duke
parents:
diff changeset
   687
    */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   688
90ce3da70b43 Initial load
duke
parents:
diff changeset
   689
    static void generateCharacterClass(String theTemplateFileName,
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   690
                                       String theOutputFileName)
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   691
        throws FileNotFoundException, IOException {
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   692
        BufferedReader in = new BufferedReader(new FileReader(theTemplateFileName));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   693
        PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(theOutputFileName)));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   694
        out.println(commentStart +
90ce3da70b43 Initial load
duke
parents:
diff changeset
   695
            " This file was generated AUTOMATICALLY from a template file " +
90ce3da70b43 Initial load
duke
parents:
diff changeset
   696
            new java.util.Date() + commentEnd);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   697
        int marklen = commandMarker.length();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   698
        LOOP: while(true) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   699
            try {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   700
                String line = in.readLine();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   701
                if (line == null) break LOOP;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   702
                int pos = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   703
                int depth = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   704
                while ((pos = line.indexOf(commandMarker, pos)) >= 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   705
                    int newpos = pos + marklen;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   706
                    char ch = 'x';
90ce3da70b43 Initial load
duke
parents:
diff changeset
   707
                    SCAN: while (newpos < line.length() &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   708
                            (Character.isJavaIdentifierStart(ch = line.charAt(newpos))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   709
                            || ch == '(' || (ch == ')' && depth > 0))) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   710
                        ++newpos;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   711
                        if (ch == '(') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   712
                            ++depth;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   713
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   714
                        else if (ch == ')') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   715
                            --depth;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   716
                            if (depth == 0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   717
                                break SCAN;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   718
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   719
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   720
                    String replacement = replaceCommand(line.substring(pos + marklen, newpos));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   721
                    line = line.substring(0, pos) + replacement + line.substring(newpos);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   722
                    pos += replacement.length();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   723
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   724
                out.println(line);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   725
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   726
            catch (IOException e) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   727
                break LOOP;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   728
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   729
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   730
        in.close();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   731
        out.close();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   732
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   733
90ce3da70b43 Initial load
duke
parents:
diff changeset
   734
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   735
    * The replaceCommand method takes a command (a macro call without the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   736
    * leading marker "$$") and computes replacement text for it.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   737
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   738
    * Most of the commands are simply names of integer constants that are defined
90ce3da70b43 Initial load
duke
parents:
diff changeset
   739
    * in the source code of this GenerateCharacter class.  The replacement text is
90ce3da70b43 Initial load
duke
parents:
diff changeset
   740
    * simply the value of the constant as an appropriately formatted integer literal.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   741
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   742
    * Two cases are more complicated, however.  The command "Tables" causes the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   743
    * final map and compressed tables to be emitted, with elaborate comments
90ce3da70b43 Initial load
duke
parents:
diff changeset
   744
    * describing their contents.  (This is actually handled by method genTables.)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   745
    * The command "Lookup(xxx)", where "xxx" is the name of a variable, generates
90ce3da70b43 Initial load
duke
parents:
diff changeset
   746
    * an expression that will return the character property data for the character
90ce3da70b43 Initial load
duke
parents:
diff changeset
   747
    * whose code is the value of the variable "xxx".  (this is handled by method
90ce3da70b43 Initial load
duke
parents:
diff changeset
   748
    * "genAccess".)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   749
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   750
    * @param x  a command from the template file to be replaced
90ce3da70b43 Initial load
duke
parents:
diff changeset
   751
    * @return   the replacement text, as a String
90ce3da70b43 Initial load
duke
parents:
diff changeset
   752
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   753
    * @see GenerateCharacter#genTables
90ce3da70b43 Initial load
duke
parents:
diff changeset
   754
    * @see GenerateCharacter#genAccess
90ce3da70b43 Initial load
duke
parents:
diff changeset
   755
    * @see GenerateCharacter#generateCharacterClass
90ce3da70b43 Initial load
duke
parents:
diff changeset
   756
    */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   757
90ce3da70b43 Initial load
duke
parents:
diff changeset
   758
    static String replaceCommand(String x) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   759
        if (x.equals("Tables")) return genTables();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   760
        if (x.equals("Initializers")) return genInitializers();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   761
        if (x.length() >= 9 && x.substring(0, 7).equals("Lookup(") &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   762
                x.substring(x.length()-1).equals(")") )
90ce3da70b43 Initial load
duke
parents:
diff changeset
   763
            return genAccess("A", x.substring(7, x.length()-1), (identifiers ? 2 : 32));
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   764
        if (x.length() >= 11 && x.substring(0, 9).equals("LookupEx(") &&
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   765
                x.substring(x.length()-1).equals(")") )
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   766
            return genAccess("B", x.substring(9, x.length()-1), 16);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   767
        if (x.equals("shiftType")) return Long.toString(shiftType);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   768
        if (x.equals("shiftIdentifierInfo")) return Long.toString(shiftIdentifierInfo);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   769
        if (x.equals("maskIdentifierInfo")) return "0x" + hex8(maskIdentifierInfo);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   770
        if (x.equals("maskUnicodePart")) return "0x" + hex8(maskUnicodePart);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   771
        if (x.equals("shiftCaseOffset")) return Long.toString(shiftCaseOffset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   772
        if (x.equals("shiftCaseInfo")) return Long.toString(shiftCaseInfo);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   773
        if (x.equals("shiftCaseOffsetSign")) return Long.toString(shiftCaseOffsetSign);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   774
        if (x.equals("maskCase")) return "0x" + hex8(maskCase);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   775
        if (x.equals("maskCaseOffset")) return "0x" + hex8(maskCaseOffset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   776
        if (x.equals("maskLowerCase")) return "0x" + hex8(maskLowerCase);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   777
        if (x.equals("maskUpperCase")) return "0x" + hex8(maskUpperCase);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   778
        if (x.equals("maskTitleCase")) return "0x" + hex8(maskTitleCase);
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   779
        if (x.equals("maskOtherLowercase")) return "0x" + hex4(maskOtherLowercase >> 32);
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   780
        if (x.equals("maskOtherUppercase")) return "0x" + hex4(maskOtherUppercase >> 32);
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   781
        if (x.equals("maskOtherAlphabetic")) return "0x" + hex4(maskOtherAlphabetic >> 32);
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   782
        if (x.equals("maskIdeographic")) return "0x" + hex4(maskIdeographic >> 32);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   783
        if (x.equals("valueIgnorable")) return "0x" + hex8(valueIgnorable);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   784
        if (x.equals("valueJavaUnicodeStart")) return "0x" + hex8(valueJavaUnicodeStart);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   785
        if (x.equals("valueJavaOnlyStart")) return "0x" + hex8(valueJavaOnlyStart);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   786
        if (x.equals("valueJavaUnicodePart")) return "0x" + hex8(valueJavaUnicodePart);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   787
        if (x.equals("valueJavaOnlyPart")) return "0x" + hex8(valueJavaOnlyPart);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   788
        if (x.equals("valueJavaWhitespace")) return "0x" + hex8(valueJavaWhitespace);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   789
        if (x.equals("lowJavaStart")) return "0x" + hex8(lowJavaStart);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   790
        if (x.equals("nonzeroJavaPart")) return "0x" + hex8(nonzeroJavaPart);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   791
        if (x.equals("bitJavaStart")) return "0x" + hex8(bitJavaStart);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   792
        if (x.equals("bitJavaPart")) return Long.toString(bitJavaPart);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   793
        if (x.equals("valueUnicodeStart")) return "0x" + hex8(valueUnicodeStart);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   794
        if (x.equals("maskIsJavaIdentifierStart")) return "0x" + hex(maskIsJavaIdentifierStart);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   795
        if (x.equals("maskIsJavaIdentifierPart")) return "0x" + hex(maskIsJavaIdentifierPart);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   796
        if (x.equals("shiftDigitOffset")) return Long.toString(shiftDigitOffset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   797
        if (x.equals("maskDigitOffset")) return "0x" + hex(maskDigitOffset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   798
        if (x.equals("maskDigit")) return "0x" + hex(maskDigit);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   799
        if (x.equals("shiftNumericType")) return Long.toString(shiftNumericType);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   800
        if (x.equals("maskNumericType")) return "0x" + hex(maskNumericType);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   801
        if (x.equals("valueNotNumeric")) return "0x" + hex8(valueNotNumeric);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   802
        if (x.equals("valueDigit")) return "0x" + hex8(valueDigit);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   803
        if (x.equals("valueStrangeNumeric")) return "0x" + hex8(valueStrangeNumeric);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   804
        if (x.equals("valueJavaSupradecimal")) return "0x" + hex8(valueJavaSupradecimal);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   805
        if (x.equals("valueDigit")) return "0x" + hex8(valueDigit);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   806
        if (x.equals("valueStrangeNumeric")) return "0x" + hex8(valueStrangeNumeric);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   807
        if (x.equals("maskType")) return "0x" + hex(maskType);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   808
        if (x.equals("shiftBidi")) return Long.toString(shiftBidi);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   809
        if (x.equals("maskBidi")) return "0x" + hex(maskBidi);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   810
        if (x.equals("maskMirrored")) return "0x" + hex8(maskMirrored);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   811
        if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.UNASSIGNED][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   812
            return Integer.toString(UnicodeSpec.UNASSIGNED);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   813
        if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.UPPERCASE_LETTER][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   814
            return Integer.toString(UnicodeSpec.UPPERCASE_LETTER);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   815
        if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.LOWERCASE_LETTER][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   816
            return Integer.toString(UnicodeSpec.LOWERCASE_LETTER);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   817
        if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.TITLECASE_LETTER][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   818
            return Integer.toString(UnicodeSpec.TITLECASE_LETTER);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   819
        if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.MODIFIER_LETTER][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   820
             return Integer.toString(UnicodeSpec.MODIFIER_LETTER);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   821
        if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.OTHER_LETTER][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   822
             return Integer.toString(UnicodeSpec.OTHER_LETTER);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   823
        if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.NON_SPACING_MARK][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   824
             return Integer.toString(UnicodeSpec.NON_SPACING_MARK);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   825
        if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.ENCLOSING_MARK][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   826
             return Integer.toString(UnicodeSpec.ENCLOSING_MARK);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   827
        if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.COMBINING_SPACING_MARK][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   828
             return Integer.toString(UnicodeSpec.COMBINING_SPACING_MARK);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   829
        if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.DECIMAL_DIGIT_NUMBER][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   830
             return Integer.toString(UnicodeSpec.DECIMAL_DIGIT_NUMBER);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   831
        if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.OTHER_NUMBER][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   832
             return Integer.toString(UnicodeSpec.OTHER_NUMBER);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   833
        if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.SPACE_SEPARATOR][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   834
             return Integer.toString(UnicodeSpec.SPACE_SEPARATOR);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   835
        if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.LINE_SEPARATOR][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   836
             return Integer.toString(UnicodeSpec.LINE_SEPARATOR);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   837
        if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.PARAGRAPH_SEPARATOR][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   838
             return Integer.toString(UnicodeSpec.PARAGRAPH_SEPARATOR);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   839
        if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.CONTROL][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   840
            return Integer.toString(UnicodeSpec.CONTROL);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   841
        if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.FORMAT][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   842
            return Integer.toString(UnicodeSpec.FORMAT);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   843
        if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.PRIVATE_USE][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   844
            return Integer.toString(UnicodeSpec.PRIVATE_USE);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   845
        if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.SURROGATE][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   846
            return Integer.toString(UnicodeSpec.SURROGATE);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   847
        if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.DASH_PUNCTUATION][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   848
            return Integer.toString(UnicodeSpec.DASH_PUNCTUATION);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   849
        if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.START_PUNCTUATION][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   850
            return Integer.toString(UnicodeSpec.START_PUNCTUATION);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   851
        if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.END_PUNCTUATION][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   852
            return Integer.toString(UnicodeSpec.END_PUNCTUATION);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   853
        if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.INITIAL_QUOTE_PUNCTUATION][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   854
            return Integer.toString(UnicodeSpec.INITIAL_QUOTE_PUNCTUATION);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   855
        if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.FINAL_QUOTE_PUNCTUATION][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   856
            return Integer.toString(UnicodeSpec.FINAL_QUOTE_PUNCTUATION);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   857
        if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.CONNECTOR_PUNCTUATION][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   858
            return Integer.toString(UnicodeSpec.CONNECTOR_PUNCTUATION);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   859
        if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.OTHER_PUNCTUATION][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   860
            return Integer.toString(UnicodeSpec.OTHER_PUNCTUATION);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   861
        if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.LETTER_NUMBER][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   862
            return Integer.toString(UnicodeSpec.LETTER_NUMBER);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   863
        if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.MATH_SYMBOL][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   864
            return Integer.toString(UnicodeSpec.MATH_SYMBOL);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   865
        if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.CURRENCY_SYMBOL][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   866
            return Integer.toString(UnicodeSpec.CURRENCY_SYMBOL);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   867
        if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.MODIFIER_SYMBOL][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   868
            return Integer.toString(UnicodeSpec.MODIFIER_SYMBOL);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   869
        if (x.equals(UnicodeSpec.generalCategoryList[UnicodeSpec.OTHER_SYMBOL][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   870
            return Integer.toString(UnicodeSpec.OTHER_SYMBOL);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   871
        if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_LEFT_TO_RIGHT][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   872
            return Integer.toString(UnicodeSpec.DIRECTIONALITY_LEFT_TO_RIGHT);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   873
        if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   874
            return Integer.toString(UnicodeSpec.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   875
        if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   876
            return Integer.toString(UnicodeSpec.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   877
        if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_RIGHT_TO_LEFT][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   878
            return Integer.toString(UnicodeSpec.DIRECTIONALITY_RIGHT_TO_LEFT);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   879
        if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   880
            return Integer.toString(UnicodeSpec.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   881
        if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   882
            return Integer.toString(UnicodeSpec.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   883
        if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   884
            return Integer.toString(UnicodeSpec.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   885
        if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   886
            return Integer.toString(UnicodeSpec.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   887
        if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_EUROPEAN_NUMBER][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   888
            return Integer.toString(UnicodeSpec.DIRECTIONALITY_EUROPEAN_NUMBER);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   889
        if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   890
            return Integer.toString(UnicodeSpec.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   891
        if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   892
            return Integer.toString(UnicodeSpec.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   893
        if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_ARABIC_NUMBER][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   894
            return Integer.toString(UnicodeSpec.DIRECTIONALITY_ARABIC_NUMBER);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   895
        if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   896
            return Integer.toString(UnicodeSpec.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   897
        if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_NONSPACING_MARK][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   898
            return Integer.toString(UnicodeSpec.DIRECTIONALITY_NONSPACING_MARK);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   899
         if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_BOUNDARY_NEUTRAL][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   900
            return Integer.toString(UnicodeSpec.DIRECTIONALITY_BOUNDARY_NEUTRAL);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   901
        if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_PARAGRAPH_SEPARATOR][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   902
            return Integer.toString(UnicodeSpec.DIRECTIONALITY_PARAGRAPH_SEPARATOR);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   903
        if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_SEGMENT_SEPARATOR][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   904
            return Integer.toString(UnicodeSpec.DIRECTIONALITY_SEGMENT_SEPARATOR);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   905
        if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_WHITESPACE][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   906
            return Integer.toString(UnicodeSpec.DIRECTIONALITY_WHITESPACE);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   907
        if (x.equals(UnicodeSpec.bidiCategoryList[UnicodeSpec.DIRECTIONALITY_OTHER_NEUTRALS][UnicodeSpec.LONG]))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   908
            return Integer.toString(UnicodeSpec.DIRECTIONALITY_OTHER_NEUTRALS);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   909
        FAIL("Unknown text substitution marker " + commandMarker + x);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   910
        return commandMarker + x;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   911
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   912
90ce3da70b43 Initial load
duke
parents:
diff changeset
   913
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   914
    * The genTables method generates source code for all the lookup tables
90ce3da70b43 Initial load
duke
parents:
diff changeset
   915
    * needed to represent the various Unicode character properties.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   916
    * It simply calls the method genTable once for each table to be generated
90ce3da70b43 Initial load
duke
parents:
diff changeset
   917
    * and then generates a summary comment.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   918
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   919
    * @return   the replacement text for the "Tables" command, as a String
90ce3da70b43 Initial load
duke
parents:
diff changeset
   920
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   921
    * @see GenerateCharacter#genTable
90ce3da70b43 Initial load
duke
parents:
diff changeset
   922
    * @see GenerateCharacter#replaceCommand
90ce3da70b43 Initial load
duke
parents:
diff changeset
   923
    */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   924
    static String genTables() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   925
        int n = sizes.length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   926
        StringBuffer result = new StringBuffer();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   927
        // liu : Add a comment showing the source of this table
90ce3da70b43 Initial load
duke
parents:
diff changeset
   928
        result.append(commentStart + " The following tables and code generated using:" +
90ce3da70b43 Initial load
duke
parents:
diff changeset
   929
                  commentEnd + "\n  ");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   930
        result.append(commentStart + ' ' + commandLineDescription + commentEnd + "\n  ");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   931
90ce3da70b43 Initial load
duke
parents:
diff changeset
   932
                if (plane == 0 && bLatin1 == false) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   933
            genCaseMapTableDeclaration(result);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   934
            genCaseMapTable(initializers, specialCaseMaps);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   935
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   936
        int totalBytes = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   937
        for (int k = 0; k < n - 1; k++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   938
            genTable(result, tableNames[k], tables[k], 0, bytes[k]<<3, sizes[k], preshifted[k],
90ce3da70b43 Initial load
duke
parents:
diff changeset
   939
                sizes[k+1], false, false, k==0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   940
            int s = bytes[k];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   941
            if (s == 1 && useCharForByte) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   942
                s = 2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   943
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   944
            totalBytes += tables[k].length * s;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   945
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   946
        genTable(result, "A", tables[n - 1], 0, (identifiers ? 2 : 32),
90ce3da70b43 Initial load
duke
parents:
diff changeset
   947
            sizes[n - 1], false, 0, true, !(identifiers), false);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   948
90ce3da70b43 Initial load
duke
parents:
diff changeset
   949
        // If we ever need more than 32 bits to represent the character properties,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   950
        // then a table "B" may be needed as well.
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
   951
        genTable(result, "B", tables[n - 1], 32, 16, sizes[n - 1], false, 0, true, true, false);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   952
90ce3da70b43 Initial load
duke
parents:
diff changeset
   953
        totalBytes += ((((tables[n - 1].length * (identifiers ? 2 : 32)) + 31) >> 5) << 2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   954
        result.append(commentStart);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   955
        result.append(" In all, the character property tables require ");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   956
        result.append(totalBytes).append(" bytes.").append(commentEnd);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   957
        if (verbose) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   958
            System.out.println("The character property tables require "
90ce3da70b43 Initial load
duke
parents:
diff changeset
   959
                 + totalBytes + " bytes.");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   960
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   961
        return result.toString();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   962
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   963
90ce3da70b43 Initial load
duke
parents:
diff changeset
   964
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   965
     * The genInitializers method generates the body of the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   966
     * ensureInitted() method, which enables lazy initialization of
90ce3da70b43 Initial load
duke
parents:
diff changeset
   967
     * the case map table and other tables.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   968
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   969
    static String genInitializers() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   970
        return initializers.toString();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   971
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   972
90ce3da70b43 Initial load
duke
parents:
diff changeset
   973
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   974
     * Return the total number of bytes needed by all tables.  This is a stripped-
90ce3da70b43 Initial load
duke
parents:
diff changeset
   975
     * down copy of genTables().
90ce3da70b43 Initial load
duke
parents:
diff changeset
   976
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   977
    static int getTotalBytes() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   978
        int n = sizes.length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   979
        int totalBytes = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   980
        for (int k = 0; k < n - 1; k++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   981
            totalBytes += tables[k].length * bytes[k];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   982
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   983
        totalBytes += ((((tables[n - 1].length * (identifiers ? 2 : 32))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   984
                         + 31) >> 5) << 2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   985
        return totalBytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   986
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   987
90ce3da70b43 Initial load
duke
parents:
diff changeset
   988
    static void appendEscapedStringFragment(StringBuffer result,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   989
                                            char[] line,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   990
                                            int length,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   991
                                            boolean lastFragment) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   992
        result.append("    \"");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   993
        for (int k=0; k<length; ++k) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   994
            result.append("\\u");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   995
            result.append(hex4(line[k]));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   996
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   997
        result.append("\"");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   998
        result.append(lastFragment ? ";" : "+");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   999
        result.append("\n");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1000
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1001
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1002
    static String SMALL_INITIALIZER =
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1003
        "        { // THIS CODE WAS AUTOMATICALLY CREATED BY GenerateCharacter:\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1004
        // "            $$name = new $$type[$$size];\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1005
        "            int len = $$name_DATA.length();\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1006
        "            int j=0;\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1007
        "            for (int i=0; i<len; ++i) {\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1008
        "                int c = $$name_DATA.charAt(i);\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1009
        "                for (int k=0; k<$$entriesPerChar; ++k) {\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1010
        "                    $$name[j++] = ($$type)c;\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1011
        "                    c >>= $$bits;\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1012
        "                }\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1013
        "            }\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1014
        "            assert (j == $$size);\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1015
        "        }\n";
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1016
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1017
    static String SAME_SIZE_INITIALIZER =
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1018
        "        { // THIS CODE WAS AUTOMATICALLY CREATED BY GenerateCharacter:\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1019
        "            assert ($$name_DATA.length() == $$size);\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1020
        // "            $$name = new $$type[$$size];\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1021
        "            for (int i=0; i<$$size; ++i)\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1022
        "                $$name[i] = ($$type)$$name_DATA.charAt(i);\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1023
        "        }\n";
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1024
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1025
    static String BIG_INITIALIZER =
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1026
        "        { // THIS CODE WAS AUTOMATICALLY CREATED BY GenerateCharacter:\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1027
        // "            $$name = new $$type[$$size];\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1028
        "            int len = $$name_DATA.length();\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1029
        "            int j=0;\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1030
        "            int charsInEntry=0;\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1031
        "            $$type entry=0;\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1032
        "            for (int i=0; i<len; ++i) {\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1033
        "                entry |= $$name_DATA.charAt(i);\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1034
        "                if (++charsInEntry == $$charsPerEntry) {\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1035
        "                    $$name[j++] = entry;\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1036
        "                    entry = 0;\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1037
        "                    charsInEntry = 0;\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1038
        "                }\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1039
        "                else {\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1040
        "                    entry <<= 16;\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1041
        "                }\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1042
        "            }\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1043
        "            assert (j == $$size);\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1044
        "        }\n";
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1045
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1046
    static String INT32_INITIALIZER =
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1047
        "        { // THIS CODE WAS AUTOMATICALLY CREATED BY GenerateCharacter:\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1048
        "            char[] data = $$name_DATA.toCharArray();\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1049
        "            assert (data.length == ($$size * 2));\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1050
        "            int i = 0, j = 0;\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1051
        "            while (i < ($$size * 2)) {\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1052
        "                int entry = data[i++] << 16;\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1053
        "                $$name[j++] = entry | data[i++];\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1054
        "            }\n"+
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1055
        "        }\n";
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1056
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1057
    static void addInitializer(String name, String type, int entriesPerChar,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1058
                               int bits, int size) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1059
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1060
        String template = (entriesPerChar == 1) ? SAME_SIZE_INITIALIZER :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1061
                          ((entriesPerChar > 0) ? SMALL_INITIALIZER : BIG_INITIALIZER);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1062
        if (entriesPerChar == -2) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1063
            template = INT32_INITIALIZER;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1064
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1065
        int marklen = commandMarker.length();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1066
        int pos = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1067
        while ((pos = template.indexOf(commandMarker, pos)) >= 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1068
            int newpos = pos + marklen;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1069
            char ch = 'x';
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1070
            while (newpos < template.length() &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1071
                   Character.isJavaIdentifierStart(ch = template.charAt(newpos)) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1072
                   ch != '_') // Don't allow this in token names
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1073
                ++newpos;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1074
            String token = template.substring(pos+marklen, newpos);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1075
            String replacement = "ERROR";
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1076
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1077
            if (token.equals("name")) replacement = name;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1078
            else if (token.equals("type")) replacement = type;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1079
            else if (token.equals("bits")) replacement = ""+bits;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1080
            else if (token.equals("size")) replacement = ""+size;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1081
            else if (token.equals("entriesPerChar")) replacement = ""+entriesPerChar;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1082
            else if (token.equals("charsPerEntry")) replacement = ""+(-entriesPerChar);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1083
            else FAIL("Unrecognized token: " + token);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1084
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1085
            template = template.substring(0, pos) + replacement + template.substring(newpos);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1086
            pos += replacement.length();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1087
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1088
        initializers.append(template);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1089
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1090
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1091
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1092
    * The genTable method generates source code for one lookup table.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1093
    * Most of the complexity stems from handling various options as to
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1094
    * the type of the array components, the precise representation of the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1095
    * values, the format in which to render each value, the number of values
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1096
    * to emit on each line of source code, and the kinds of useful comments
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1097
    * to be generated.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1098
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1099
    * @param result     a StringBuffer, to which the generated source code
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1100
    *                   text is to be appended
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1101
    * @param name       the name of the table
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1102
    * @param table      the table data (an array of long values)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1103
    * @param extract    a distance, in bits, by which each entry of the table
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1104
    *                   is to be right-shifted before it is processed
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1105
    * @param bits       the number of bits (not bytes) to be used to represent
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1106
    *                   each table entry
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1107
    * @param size       the table data is divided up into blocks of size (1<<size);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1108
    *                   in this method, this information is used only to affect
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1109
    *                   how many table values are to be generated per line
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1110
    * @param preshifted if this flag is true, then the table entries are to be
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1111
    *                   emitted in a preshifted form; that is, each value should
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1112
    *                   be left-shifted by the amount "shift", so that this work
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1113
    *                   is built into the table and need not be performed by an
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1114
    *                   explicit shift operator at run time
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1115
    * @param shift      this is the shift amount for preshifting of table entries
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1116
    * @param hexFormat  if this flag is true, table entries should be emitted as
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1117
    *                   hexadecimal literals; otherwise decimal literals are used
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1118
    * @param properties if this flag is true, the table entries are encoded
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1119
    *                   character properties rather than indexes into yet other tables;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1120
    *                   therefore comments describing the encoded properties should
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1121
    *                   be generated
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1122
    * @param hexComment if this flag is true, each line of output is labelled with
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1123
    *                   a hexadecimal comment indicating the character values to
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1124
    *                   which that line applies; otherwise, decimal values indicating
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1125
    *                   table indices are generated
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1126
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1127
    * @see GenerateCharacter#genTables
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1128
    * @see GenerateCharacter#replaceCommand
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1129
    */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1130
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1131
    static void genTable(StringBuffer result, String name,
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1132
                         long[] table, int extract, int bits, int size,
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1133
                         boolean preshifted, int shift, boolean hexFormat,
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1134
                         boolean properties, boolean hexComment) {
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1135
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1136
        String atype = bits == 1 ? (Csyntax ? "unsigned long" : "int") :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1137
            bits == 2 ? (Csyntax ? "unsigned long" : "int") :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1138
            bits == 4 ? (Csyntax ? "unsigned long" : "int") :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1139
            bits == 8 ? (Csyntax ? "unsigned char" : "byte") :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1140
            bits == 16 ? (Csyntax ? "unsigned short" : "char") :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1141
            bits == 32 ? (Csyntax ? "unsigned long" : "int") :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1142
            (Csyntax ? "int64" : "long");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1143
        long maxPosEntry = bits == 1 ? Integer.MAX_VALUE : // liu
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1144
            bits == 2 ? Integer.MAX_VALUE :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1145
            bits == 4 ? Integer.MAX_VALUE :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1146
            bits == 8 ? Byte.MAX_VALUE :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1147
            bits == 16 ? Short.MAX_VALUE :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1148
            bits == 32 ? Integer.MAX_VALUE :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1149
            Long.MAX_VALUE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1150
        int entriesPerChar = bits <= 16 ? (16 / bits) : -(bits / 16);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1151
        boolean shiftEntries = preshifted && shift != 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1152
        if (bits == 8 && tableAsString && useCharForByte) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1153
            atype = "char";
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1154
            maxPosEntry = Character.MAX_VALUE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1155
            entriesPerChar = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1156
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1157
        boolean noConversion = atype.equals("char");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1158
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1159
        result.append(commentStart);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1160
        result.append(" The ").append(name).append(" table has ").append(table.length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1161
        result.append(" entries for a total of ");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1162
        int sizeOfTable = ((table.length * bits + 31) >> 5) << 2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1163
        if (bits == 8 && useCharForByte) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1164
            sizeOfTable *= 2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1165
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1166
        result.append(sizeOfTable);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1167
        result.append(" bytes.").append(commentEnd).append("\n\n");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1168
        if (Csyntax)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1169
            result.append("  static ");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1170
        else
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1171
            result.append("  static final ");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1172
        result.append(atype);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1173
        result.append(" ").append(name).append("[");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1174
        if (Csyntax)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1175
            result.append(table.length >> (bits == 1 ? 5 : bits == 2 ? 4 : bits == 4 ? 3 : 0));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1176
        if (tableAsString) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1177
            if (noConversion) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1178
                result.append("] = (\n");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1179
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1180
                result.append("] = new ").append(atype).append("["+table.length+"];\n  ");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1181
                result.append("static final String ").append(name).append("_DATA =\n");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1182
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1183
            int CHARS_PER_LINE = 8;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1184
            StringBuffer theString = new StringBuffer();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1185
            int entriesInCharSoFar = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1186
            char ch = '\u0000';
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1187
            int charsPerEntry = -entriesPerChar;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1188
            for (int j=0; j<table.length; ++j) {
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1189
                //long entry = table[j] >> extract;
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1190
                long entry;
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1191
                if ("A".equals(name))
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1192
                    entry = (table[j] & 0xffffffffL) >> extract;
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1193
                else
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1194
                    entry = (table[j] >> extract);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1195
                if (shiftEntries) entry <<= shift;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1196
                if (entry >= (1L << bits)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1197
                    FAIL("Entry too big");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1198
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1199
                if (entriesPerChar > 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1200
                    // Pack multiple entries into a character
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1201
                    ch = (char)(((int)ch >> bits) | (entry << (entriesPerChar-1)*bits));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1202
                    ++entriesInCharSoFar;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1203
                    if (entriesInCharSoFar == entriesPerChar) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1204
                        // Character is full
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1205
                        theString.append(ch);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1206
                        entriesInCharSoFar = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1207
                        ch = '\u0000';
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1208
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1209
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1210
                else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1211
                    // Use multiple characters per entry
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1212
                    for (int k=0; k<charsPerEntry; ++k) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1213
                        ch = (char)(entry >> ((charsPerEntry-1)*16));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1214
                        entry <<= 16;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1215
                        theString.append(ch);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1216
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1217
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1218
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1219
            if (entriesInCharSoFar > 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1220
                while (entriesInCharSoFar < entriesPerChar) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1221
                    ch = (char)((int)ch >> bits);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1222
                    ++entriesInCharSoFar;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1223
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1224
                theString.append(ch);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1225
                entriesInCharSoFar = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1226
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1227
            result.append(Utility.formatForSource(theString.toString(), "    "));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1228
            if (noConversion) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1229
                result.append(").toCharArray()");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1230
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1231
            result.append(";\n\n  ");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1232
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1233
            if (!noConversion) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1234
                addInitializer(name, atype, entriesPerChar, bits, table.length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1235
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1236
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1237
        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1238
            result.append("] = {");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1239
            boolean castEntries = shiftEntries && (bits < 32);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1240
            int printPerLine = hexFormat ? (bits == 1 ? 32*4 :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1241
                bits == 2 ? 16*4 :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1242
                bits == 4 ? 8*4 :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1243
                bits == 8 ? 8 :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1244
                bits == 16 ? 8 :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1245
                bits == 32 ? 4 : 2) :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1246
                (bits == 8 ? 8 :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1247
                bits == 16 ? 8 : 4);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1248
            int printMask = properties ? 0 :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1249
            Math.min(1 << size,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1250
                printPerLine >> (castEntries ? (Csyntax ? 2 : 1) : 0)) - 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1251
            int commentShift = ((1 << size) == table.length) ? 0 : size;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1252
            int commentMask = ((1 << size) == table.length) ? printMask : (1 << size) - 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1253
            long val = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1254
            for (int j = 0; j < table.length; j++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1255
                if ((j & printMask) == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1256
                    while (result.charAt(result.length() - 1) == ' ')
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1257
                        result.setLength(result.length() - 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1258
                    result.append("\n    ");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1259
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1260
        PRINT:  {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1261
                if (castEntries)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1262
                    result.append("(").append(atype).append(")(");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1263
                long entry = table[j] >> extract;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1264
                int packMask = ((1 << (bits == 1 ? 5 : bits == 2 ? 4 : bits == 4 ? 3 : 2)) - 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1265
                int k = j & packMask;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1266
                if (bits >= 8)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1267
                    val = entry;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1268
                else if (k == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1269
                    val = entry;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1270
                    break PRINT;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1271
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1272
                else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1273
                    val |= (entry << (k*bits));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1274
                    if (k != packMask)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1275
                        break PRINT;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1276
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1277
                if (val > maxPosEntry && !Csyntax) { // liu
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1278
                // For values that are out of range, convert them to in-range negative values.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1279
                // Actually, output the '-' and convert them to the negative of the corresponding
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1280
                // in-range negative values.  E.g., convert 130 == -126 (in 8 bits) -> 126.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1281
                    result.append('-');
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1282
                    val = maxPosEntry + maxPosEntry + 2 - val;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1283
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1284
                if (hexFormat) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1285
                    result.append("0x");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1286
                    if (bits == 8)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1287
                        result.append(hex2((byte)val));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1288
                    else if (bits == 16)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1289
                        result.append(hex4((short)val));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1290
                    else if (bits == 32 || bits < 8)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1291
                        result.append(hex8((int)val));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1292
                    else {
10110
75674d930b1f 7058708: Eliminate JDK build tools build warnings
jjg
parents: 9535
diff changeset
  1293
                        result.append(hex16(val));
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1294
                        if (!Csyntax)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1295
                            result.append("L");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1296
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1297
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1298
                else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1299
                    if (bits == 8)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1300
                        result.append(dec3(val));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1301
                    else if (bits == 64) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1302
                        result.append(dec5(val));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1303
                        if (!Csyntax)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1304
                            result.append("L");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1305
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1306
                    else
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1307
                        result.append(dec5(val));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1308
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1309
                if (shiftEntries)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1310
                    result.append("<<").append(shift);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1311
                if (castEntries) result.append(")");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1312
                if (j < (table.length - 1))
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1313
                    result.append(", ");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1314
                else
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1315
                    result.append("  ");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1316
                if ((j & printMask) == printMask) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1317
                    result.append(" ").append(commentStart).append(" ");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1318
                    if (hexComment)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1319
                        result.append("0x").append(hex4((j & ~commentMask) << (16 - size)));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1320
                    else
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1321
                        result.append(dec3((j & ~commentMask) >> commentShift));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1322
                    if (properties) propertiesComments(result, val);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1323
                    result.append(commentEnd);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1324
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1325
                } // end PRINT
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1326
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1327
            result.append("\n  };\n\n  ");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1328
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1329
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1330
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1331
    static void genCaseMapTableDeclaration(StringBuffer result) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1332
        String myTab = "    ";
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1333
        result.append(myTab + "static final char[][][] charMap;\n");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1334
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1335
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1336
    static void genCaseMapTable(StringBuffer result, SpecialCaseMap[] specialCaseMaps){
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1337
        String myTab = "    ";
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1338
        int ch;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1339
        char[] map;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1340
        result.append(myTab + "charMap = new char[][][] {\n");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1341
        for (int x = 0; x < specialCaseMaps.length; x++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1342
            ch = specialCaseMaps[x].getCharSource();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1343
            map = specialCaseMaps[x].getUpperCaseMap();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1344
            result.append(myTab + myTab);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1345
            result.append("{ ");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1346
            result.append("{\'\\u"+hex4(ch)+"\'}, {");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1347
            for (int y = 0; y < map.length; y++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1348
                result.append("\'\\u"+hex4(map[y])+"\', ");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1349
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1350
            result.append("} },\n");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1351
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1352
        result.append(myTab + "};\n");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1353
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1354
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1355
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1356
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1357
    * The propertiesComments method generates comments describing encoded
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1358
    * character properties.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1359
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1360
    * @param result     a StringBuffer, to which the generated source code
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1361
    *                   text is to be appended
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1362
    * @param val                encoded character properties
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1363
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1364
    * @see GenerateCharacter#genTable
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1365
    */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1366
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1367
    static void propertiesComments(StringBuffer result, long val) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1368
        result.append("   ");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1369
        switch ((int)(val & maskType)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1370
            case UnicodeSpec.CONTROL:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1371
                result.append("Cc");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1372
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1373
            case UnicodeSpec.FORMAT:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1374
                result.append("Cf");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1375
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1376
            case UnicodeSpec.PRIVATE_USE:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1377
                result.append("Co");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1378
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1379
            case UnicodeSpec.SURROGATE:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1380
                result.append("Cs");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1381
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1382
            case UnicodeSpec.LOWERCASE_LETTER:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1383
                result.append("Ll");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1384
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1385
            case UnicodeSpec.MODIFIER_LETTER:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1386
                result.append("Lm");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1387
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1388
            case UnicodeSpec.OTHER_LETTER:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1389
                result.append("Lo");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1390
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1391
            case UnicodeSpec.TITLECASE_LETTER:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1392
                result.append("Lt");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1393
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1394
            case UnicodeSpec.UPPERCASE_LETTER:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1395
                result.append("Lu");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1396
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1397
            case UnicodeSpec.COMBINING_SPACING_MARK:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1398
                result.append("Mc");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1399
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1400
            case UnicodeSpec.ENCLOSING_MARK:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1401
                result.append("Me");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1402
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1403
            case UnicodeSpec.NON_SPACING_MARK:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1404
                result.append("Mn");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1405
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1406
            case UnicodeSpec.DECIMAL_DIGIT_NUMBER:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1407
                result.append("Nd");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1408
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1409
            case UnicodeSpec.LETTER_NUMBER:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1410
                result.append("Nl");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1411
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1412
            case UnicodeSpec.OTHER_NUMBER:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1413
                result.append("No");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1414
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1415
            case UnicodeSpec.CONNECTOR_PUNCTUATION:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1416
                result.append("Pc");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1417
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1418
            case UnicodeSpec.DASH_PUNCTUATION:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1419
                result.append("Pd");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1420
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1421
            case UnicodeSpec.END_PUNCTUATION:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1422
                result.append("Pe");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1423
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1424
            case UnicodeSpec.OTHER_PUNCTUATION:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1425
                result.append("Po");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1426
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1427
            case UnicodeSpec.START_PUNCTUATION:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1428
                result.append("Ps");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1429
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1430
            case UnicodeSpec.CURRENCY_SYMBOL:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1431
                result.append("Sc");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1432
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1433
            case UnicodeSpec.MODIFIER_SYMBOL:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1434
                result.append("Sk");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1435
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1436
            case UnicodeSpec.MATH_SYMBOL:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1437
                result.append("Sm");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1438
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1439
            case UnicodeSpec.OTHER_SYMBOL:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1440
                result.append("So");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1441
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1442
            case UnicodeSpec.LINE_SEPARATOR:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1443
                result.append("Zl"); break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1444
            case UnicodeSpec.PARAGRAPH_SEPARATOR:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1445
                result.append("Zp");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1446
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1447
            case UnicodeSpec.SPACE_SEPARATOR:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1448
                result.append("Zs");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1449
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1450
            case UnicodeSpec.UNASSIGNED:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1451
                result.append("unassigned");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1452
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1453
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1454
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1455
        switch ((int)((val & maskBidi) >> shiftBidi)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1456
            case UnicodeSpec.DIRECTIONALITY_LEFT_TO_RIGHT:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1457
                result.append(", L");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1458
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1459
            case UnicodeSpec.DIRECTIONALITY_RIGHT_TO_LEFT:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1460
                result.append(", R");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1461
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1462
            case UnicodeSpec.DIRECTIONALITY_EUROPEAN_NUMBER:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1463
                result.append(", EN");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1464
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1465
            case UnicodeSpec.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1466
                result.append(", ES");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1467
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1468
            case UnicodeSpec.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1469
                result.append(", ET");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1470
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1471
            case UnicodeSpec.DIRECTIONALITY_ARABIC_NUMBER:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1472
                result.append(", AN");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1473
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1474
            case UnicodeSpec.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1475
                result.append(", CS");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1476
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1477
            case UnicodeSpec.DIRECTIONALITY_PARAGRAPH_SEPARATOR:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1478
                result.append(", B");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1479
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1480
            case UnicodeSpec.DIRECTIONALITY_SEGMENT_SEPARATOR:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1481
                result.append(", S");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1482
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1483
            case UnicodeSpec.DIRECTIONALITY_WHITESPACE:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1484
                result.append(", WS");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1485
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1486
            case UnicodeSpec.DIRECTIONALITY_OTHER_NEUTRALS:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1487
                result.append(", ON");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1488
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1489
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1490
        if ((val & maskUpperCase) != 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1491
            result.append(", hasUpper (subtract ");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1492
            result.append((val & maskCaseOffset) >> shiftCaseOffset).append(")");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1493
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1494
        if ((val & maskLowerCase) != 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1495
            result.append(", hasLower (add ");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1496
            result.append((val & maskCaseOffset) >> shiftCaseOffset).append(")");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1497
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1498
        if ((val & maskTitleCase) != 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1499
            result.append(", hasTitle");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1500
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1501
        if ((val & maskIdentifierInfo) == valueIgnorable) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1502
            result.append(", ignorable");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1503
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1504
        if ((val & maskIdentifierInfo) == valueJavaUnicodePart) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1505
            result.append(", identifier part");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1506
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1507
        if ((val & maskIdentifierInfo) == valueJavaStartUnicodePart) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1508
            result.append(", underscore");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1509
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1510
        if ((val & maskIdentifierInfo) == valueJavaWhitespace) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1511
            result.append(", whitespace");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1512
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1513
        if ((val & maskIdentifierInfo) == valueJavaOnlyStart) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1514
            result.append(", currency");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1515
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1516
        if ((val & maskIdentifierInfo) == valueJavaUnicodeStart) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1517
            result.append(", identifier start");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1518
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1519
        if ((val & maskNumericType) == valueDigit) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1520
            result.append(", decimal ");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1521
            result.append((val & maskDigitOffset) >> shiftDigitOffset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1522
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1523
        if ((val & maskNumericType) == valueStrangeNumeric) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1524
            result.append(", strange");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1525
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1526
        if ((val & maskNumericType) == valueJavaSupradecimal) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1527
            result.append(", supradecimal ");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1528
            result.append((val & maskDigitOffset) >> shiftDigitOffset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1529
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1530
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1531
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1532
    static String[] tableNames = { "X", "Y", "Z", "P", "Q", "R", "S", "T", "U", "V", "W" };
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1533
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1534
    static String tableName(int j) { return tableNames[j]; }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1535
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1536
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1537
    * The genAccess method generates source code for one table access expression.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1538
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1539
    * Most of the complexity stems from handling various options as to
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1540
    * table representation, such as whether it contains values so large that
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1541
    * they are represented as negative values and whether the table values are
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1542
    * preshifted.  This method also avoids such "ugly" expressions as shifting
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1543
    * by distance zero, masking when no masking is necessary, and so on.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1544
    * For clarity, it generates expressions that do not rely on operator
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1545
    * precedence, but otherwise it avoids generating redundant parentheses.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1546
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1547
    * A generated expression might look like A[Y[(X[ch>>6]<<6)|(ch&0x3F)]]
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1548
    * or A[Z[Y[(X[ch>>7]<<4)|((ch>>3)&0xF)]|(ch&0x7)]], for example.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1549
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1550
    * @param tbl                the name of the final table to be accessed
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1551
    * @param var                the variable name that appeared in parentheses in the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1552
    *                           "Lookup" command
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1553
    * @param bits       the number of bits (not bytes) to be used to represent
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1554
    *                   the final table entry
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1555
    * @return   the replacement text for the "Lookup(xxx)" command, as a String
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1556
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1557
    * @see GenerateCharacter#replaceCommand
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1558
    */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1559
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1560
    static String genAccess(String tbl, String var, int bits) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1561
        String access = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1562
        int bitoffset = bits == 1 ? 5 : bits == 2 ? 4 : bits == 4 ? 3 : 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1563
        for (int k = 0; k < sizes.length; k++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1564
            int offset = ((k < sizes.length - 1) ? 0 : bitoffset);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1565
            int shift = shifts[k] + offset;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1566
            String shifted = (shift == 0) ? var : "(" + var + ">>" + shift + ")";
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1567
            int mask = (1 << (sizes[k] - offset)) - 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1568
            String masked = (k == 0) ? shifted :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1569
              "(" + shifted + "&0x" + hex(mask) + ")";
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1570
            String index = (k == 0) ? masked :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1571
             (mask == 0) ? access : "(" + access + "|" + masked + ")";
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1572
            String indexNoParens = (index.charAt(0) != '(') ? index :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1573
                 index.substring(1, index.length() - 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1574
            String tblname = (k == sizes.length - 1) ? tbl : tableName(k);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1575
            String fetched = tblname + "[" + indexNoParens + "]";
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1576
            String zeroextended = (zeroextend[k] == 0) ? fetched :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1577
                "(" + fetched + "&0x" + hex(zeroextend[k]) + ")";
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1578
            int adjustment = preshifted[k] ? 0 :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1579
               sizes[k+1] - ((k == sizes.length - 2) ? bitoffset : 0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1580
            String adjusted = (preshifted[k] || adjustment == 0) ? zeroextended :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1581
                "(" + zeroextended + "<<" + adjustment + ")";
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1582
            String bitshift = (bits == 1) ? "(" + var + "&0x1F)" :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1583
                (bits == 2) ? "((" + var + "&0xF)<<1)" :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1584
                (bits == 4) ? "((" + var + "&7)<<2)" : null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1585
            String extracted = ((k < sizes.length - 1) || (bits >= 8)) ? adjusted :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1586
                "((" + adjusted + ">>" + bitshift + ")&" +
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1587
                (bits == 4 ? "0xF" : "" + ((1 << bits) - 1)) + ")";
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1588
            access = extracted;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1589
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1590
        return access;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1591
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1592
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1593
    /* The command line arguments are decoded and used to set the following
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1594
     global variables.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1595
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1596
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1597
    static boolean verbose = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1598
    static boolean nobidi = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1599
    static boolean nomirror = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1600
    static boolean identifiers = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1601
    static boolean Csyntax = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1602
    static String TemplateFileName = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1603
    static String OutputFileName = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1604
    static String UnicodeSpecFileName = null; // liu
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1605
    static String SpecialCasingFileName = null;
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1606
    static String PropListFileName = null;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1607
    static boolean useCharForByte = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1608
    static int[] sizes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1609
    static int bins = 0; // liu; if > 0, then perform search
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1610
    static boolean tableAsString = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1611
    static boolean bLatin1 = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1612
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1613
    static String commandLineDescription;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1614
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1615
    /* Other global variables, equal in length to the "sizes" array. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1616
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1617
    static int[] shifts;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1618
    static int[] zeroextend;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1619
    static int[] bytes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1620
    static boolean[] preshifted;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1621
    static long[][] tables;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1622
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1623
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1624
    /* Other global variables */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1625
    static String commentStart;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1626
    static String commentEnd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1627
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1628
    static StringBuffer initializers = new StringBuffer();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1629
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1630
    /* special casing rules for 1:M toUpperCase mappings */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1631
    static SpecialCaseMap[] specialCaseMaps;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1632
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1633
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1634
    * Process the command line arguments.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1635
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1636
    * The allowed flags in command line are:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1637
    * <dl>
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1638
    * <dt> -verbose             <dd> Emit comments to standard output describing
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1639
    *                                   what's going on during the processing.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1640
    * <dt> -nobidi              <dd> Do not include bidi categories in the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1641
    *                                   encoded character properties.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1642
    * <dt> -nomirror    <dd> Do no include mirror property in the encoded
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1643
    *                        character properties.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1644
    * <dt> -identifiers         <dd> Generate tables for scanning identifiers only.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1645
    * <dt> -c                   <dd> Output code in C syntax instead of Java syntax.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1646
    * <dt> -o filename          <dd> Specify output file name.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1647
    * <dt> -template filename   <dd> Specify template input file name.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1648
    * <dt> -spec filename        <dd> Specify Unicode spec file name.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1649
    * <dt> -specialcasing filename <dd> Specify Unicode special casing file name.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1650
    * <dt> -search bins          <dd> Try different partitions into the specified
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1651
    *                                    number of bins.  E.g., for 2 bins, try
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1652
    *                                    16 0, 15 1,..., 0 16.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1653
    * <dt> -string               <dd> Create table as string.  Only valid with Java
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1654
    *                                    syntax.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1655
    * <dt> -latin1          <dd> Create a latin 1 only property table.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1656
    * </dl>
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1657
    * In addition, decimal literals may appear as command line arguments;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1658
    * each one represents the number of bits of the character to be broken
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1659
    * off at each lookup step.  If present, they must add up to 16 (the number
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1660
    * of bits in a char value).  For smaller tables, the last value should
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1661
    * be 0; values other than the last one may not be zero.  If no such
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1662
    * numeric values are provided, default values are used.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1663
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1664
    * @param args       the command line arguments, as an array of String
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1665
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1666
    * @see GenerateCharacter#main
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1667
    */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1668
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1669
    static void processArgs(String[] args) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1670
        StringBuffer desc = new StringBuffer("java GenerateCharacter");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1671
        for (int j=0; j<args.length; ++j) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1672
            desc.append(" " + args[j]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1673
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1674
        for (int j = 0; j < args.length; j++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1675
            if (args[j].equals("-verbose") || args[j].equals("-v"))
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1676
                verbose = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1677
            else if (args[j].equals("-nobidi"))
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1678
                nobidi = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1679
            else if (args[j].equals("-nomirror"))
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1680
                nomirror = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1681
            else if (args[j].equals("-identifiers"))
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1682
                identifiers = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1683
            else if (args[j].equals("-c"))
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1684
                Csyntax = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1685
            else if (args[j].equals("-string"))
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1686
                tableAsString = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1687
            else if (args[j].equals("-o")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1688
                if (j == args.length - 1) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1689
                    FAIL("File name missing after -o");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1690
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1691
                else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1692
                    OutputFileName = args[++j];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1693
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1694
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1695
            else if (args[j].equals("-search")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1696
                if (j == args.length - 1)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1697
                    FAIL("Bin count missing after -search");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1698
                else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1699
                    bins = Integer.parseInt(args[++j]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1700
                    if (bins < 1 || bins > 10)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1701
                        FAIL("Bin count must be >= 1 and <= 10");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1702
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1703
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1704
            else if (args[j].equals("-template")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1705
                if (j == args.length - 1)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1706
                    FAIL("File name missing after -template");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1707
                else
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1708
                    TemplateFileName = args[++j];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1709
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1710
            else if (args[j].equals("-spec")) { // liu
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1711
                if (j == args.length - 1) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1712
                    FAIL("File name missing after -spec");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1713
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1714
                else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1715
                    UnicodeSpecFileName = args[++j];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1716
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1717
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1718
            else if (args[j].equals("-specialcasing")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1719
                if (j == args.length -1) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1720
                    FAIL("File name missing after -specialcasing");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1721
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1722
                else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1723
                    SpecialCasingFileName = args[++j];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1724
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1725
            }
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1726
            else if (args[j].equals("-proplist")) {
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1727
                if (j == args.length -1) {
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1728
                    FAIL("File name missing after -proplist");
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1729
                }
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1730
                else {
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1731
                    PropListFileName = args[++j];
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1732
                }
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1733
            }
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1734
            else if (args[j].equals("-plane")) {
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1735
                if (j == args.length -1) {
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1736
                    FAIL("Plane number missing after -plane");
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1737
                }
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1738
                else {
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1739
                    plane = Integer.parseInt(args[++j]);
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1740
                }
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1741
                if (plane > 0) {
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1742
                    bLatin1 = false;
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1743
                }
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1744
            }
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1745
            else if ("-usecharforbyte".equals(args[j])) {
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1746
                useCharForByte = true;
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1747
            }
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1748
            else if (args[j].equals("-latin1")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1749
                bLatin1 = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1750
                plane = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1751
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1752
            else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1753
                try {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1754
                    int val = Integer.parseInt(args[j]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1755
                    if (val < 0 || val > 32) FAIL("Incorrect bit field width: " + args[j]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1756
                    if (sizes == null)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1757
                        sizes = new int[1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1758
                    else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1759
                        int[] newsizes = new int[sizes.length + 1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1760
                        System.arraycopy(sizes, 0, newsizes, 0, sizes.length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1761
                        sizes = newsizes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1762
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1763
                    sizes[sizes.length - 1] = val;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1764
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1765
                catch(NumberFormatException e) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1766
                    FAIL("Unknown switch: " + args[j]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1767
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1768
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1769
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1770
        if (Csyntax && tableAsString) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1771
            FAIL("Can't specify table as string with C syntax");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1772
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1773
        if (sizes == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1774
            desc.append(" [");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1775
            if (identifiers) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1776
                int[] newsizes = { 8, 4, 4 };           // Good default values
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1777
                desc.append("8 4 4]");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1778
                sizes = newsizes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1779
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1780
            else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1781
                int[] newsizes = { 10, 5, 1 }; // Guy's old defaults for 2.0.14: { 9, 4, 3, 0 }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1782
                desc.append("10 5 1]");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1783
                sizes = newsizes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1784
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1785
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1786
        if (UnicodeSpecFileName == null) { // liu
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1787
            UnicodeSpecFileName = DefaultUnicodeSpecFileName;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1788
            desc.append(" [-spec " + UnicodeSpecFileName + ']');
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1789
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1790
        if (SpecialCasingFileName == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1791
            SpecialCasingFileName = DefaultSpecialCasingFileName;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1792
            desc.append(" [-specialcasing " + SpecialCasingFileName + ']');
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1793
        }
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1794
        if (PropListFileName == null) {
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1795
            PropListFileName = DefaultPropListFileName;
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1796
            desc.append(" [-proplist " + PropListFileName + ']');
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1797
        }
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1798
        if (TemplateFileName == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1799
            TemplateFileName = (Csyntax ? DefaultCTemplateFileName
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1800
                  : DefaultJavaTemplateFileName);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1801
            desc.append(" [-template " + TemplateFileName + ']');
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1802
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1803
        if (OutputFileName == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1804
            OutputFileName = (Csyntax ? DefaultCOutputFileName
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1805
                    : DefaultJavaOutputFileName);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1806
            desc.append(" [-o " + OutputFileName + ']');
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1807
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1808
        commentStart = (Csyntax ? "/*" : "//");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1809
        commentEnd = (Csyntax ? " */" : "");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1810
        commandLineDescription = desc.toString();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1811
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1812
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1813
    private static void searchBins(long[] map, int binsOccupied) throws Exception {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1814
        int bitsFree = 16;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1815
        for (int i=0; i<binsOccupied; ++i) bitsFree -= sizes[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1816
        if (binsOccupied == (bins-1)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1817
            sizes[binsOccupied] = bitsFree;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1818
            generateForSizes(map);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1819
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1820
        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1821
            for (int i=1; i<bitsFree; ++i) { // Don't allow bins of 0 except for last one
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1822
                sizes[binsOccupied] = i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1823
                searchBins(map, binsOccupied+1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1824
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1825
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1826
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1827
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1828
    private static void generateForSizes(long[] map) throws Exception {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1829
        int sum = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1830
        shifts = new int[sizes.length];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1831
        for (int k = sizes.length - 1; k >= 0; k--) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1832
            shifts[k] = sum;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1833
            sum += sizes[k];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1834
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1835
        if ((1 << sum) < map.length || (1 << (sum - 1)) >= map.length) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1836
            FAIL("Bit field widths total to " + sum +
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1837
             ": wrong total for map of size " + map.length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1838
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1839
        // need a table for each set of lookup bits in char
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1840
        tables = new long[sizes.length][];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1841
        // the last table is the map
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1842
        tables[sizes.length - 1] = map;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1843
        for (int j = sizes.length - 1; j > 0; j--) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1844
            if (verbose && bins==0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1845
                System.err.println("Building map " + (j+1) + " of bit width " + sizes[j]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1846
            long[][] temp = buildTable(tables[j], sizes[j]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1847
            tables[j-1] = temp[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1848
            tables[j] = temp[1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1849
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1850
        preshifted = new boolean[sizes.length];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1851
        zeroextend = new int[sizes.length];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1852
        bytes = new int[sizes.length];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1853
        for (int j = 0; j < sizes.length - 1; j++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1854
            int len = tables[j+1].length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1855
            int size = sizes[j+1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1856
            if (len > 0x100 && (len >> size) <= 0x100) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1857
                len >>= size;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1858
                preshifted[j] = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1859
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1860
            else if (len > 0x10000 && (len >> size) <= 0x10000) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1861
                len >>= size;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1862
                preshifted[j] = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1863
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1864
            else preshifted[j] = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1865
            if (Csyntax)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1866
                zeroextend[j] = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1867
            else if (len > 0x7F && len <= 0xFF) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1868
                if (!useCharForByte) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1869
                    zeroextend[j] = 0xFF;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1870
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1871
            } else if (len > 0x7FFF && len <= 0xFFFF)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1872
                zeroextend[j] = 0xFFFF;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1873
            else zeroextend[j] = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1874
            if (len <= 0x100) bytes[j] = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1875
            else if (len <= 0x10000) bytes[j] = 2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1876
            else bytes[j] = 4;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1877
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1878
        preshifted[sizes.length - 1] = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1879
        zeroextend[sizes.length - 1] = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1880
        bytes[sizes.length - 1] = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1881
        if (bins > 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1882
            int totalBytes = getTotalBytes();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1883
            String access = genAccess("A", "ch", (identifiers ? 2 : 32));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1884
            int accessComplexity = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1885
            for (int j=0; j<access.length(); ++j) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1886
                char ch = access.charAt(j);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1887
                if ("[&|><".indexOf(ch) >= 0) ++accessComplexity;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1888
                if (ch == '<' || ch == '>') ++j;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1889
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1890
            System.out.print("(");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1891
            for (int j=0; j<sizes.length; ++j) System.out.print(" " + sizes[j]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1892
            System.out.println(" ) " + totalBytes + " " + accessComplexity + " " + access);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1893
            return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1894
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1895
        if (verbose) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1896
            System.out.println("    n\t size\tlength\tshift\tzeroext\tbytes\tpreshifted");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1897
            for (int j = 0; j < sizes.length; j++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1898
                System.out.println(dec5(j) + "\t" +
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1899
                    dec5(sizes[j]) + "\t" +
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1900
                    dec5(tables[j].length) + "\t" +
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1901
                    dec5(shifts[j]) + "\t" +
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1902
                    dec5(zeroextend[j]) + "\t" +
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1903
                    dec5(bytes[j]) + "\t " +
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1904
                    preshifted[j]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1905
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1906
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1907
        if (verbose) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1908
            System.out.println("Generating source code for class Character");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1909
            System.out.println("A table access looks like " +
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1910
                         genAccess("A", "ch", (identifiers ? 2 : 32)));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1911
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1912
        generateCharacterClass(TemplateFileName, OutputFileName);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1913
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1914
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1915
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1916
    * The main program for generating source code for the Character class.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1917
    * The basic outline of its operation is:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1918
    * <ol>
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1919
    * <li> Process the command line arguments.  One result of this process
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1920
    *           is a list of sizes (measured in bits and summing to 16).
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1921
    * <li> Get the Unicode character property data from the specification file.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1922
    * <li> From that, build a map that has, for each character code, its
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1923
    *           relevant properties encoded as a long integer value.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1924
    * <li> Repeatedly compress the map, producing a compressed table and a
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1925
    *           new map.  This is done once for each size value in the list.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1926
    *           When this is done, we have a set of tables.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1927
    * <li> Make some decisions about table representation; record these
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1928
    *           decisions in arrays named preshifted, zeroextend, and bytes.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1929
    * <li> Generate the source code for the class Character by performing
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1930
    *           macro processing on a template file.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1931
    * </ol>
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1932
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1933
    * @param args       the command line arguments, as an array of String
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1934
    *
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1935
    * @see GenerateCharacter#processArgs
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1936
    * @see UnicodeSpec@readSpecFile
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1937
    * @see GenerateCharacter#buildMap
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1938
    * @see GenerateCharacter#buildTable
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1939
    * @see GenerateCharacter#generateCharacterClass
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1940
    */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1941
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1942
    public static void main(String[] args) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1943
        processArgs(args);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1944
        try {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1945
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1946
            UnicodeSpec[] data = UnicodeSpec.readSpecFile(new File(UnicodeSpecFileName), plane);
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1947
            specialCaseMaps = SpecialCaseMap.readSpecFile(new File(SpecialCasingFileName), plane);
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1948
            PropList propList = PropList.readSpecFile(new File(PropListFileName), plane);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1949
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1950
            if (verbose) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1951
                System.out.println(data.length + " items read from Unicode spec file " + UnicodeSpecFileName); // liu
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1952
            }
9535
d930011fd275 7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard
sherman
parents: 7668
diff changeset
  1953
            long[] map = buildMap(data, specialCaseMaps, propList);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1954
            if (verbose) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1955
                System.err.println("Completed building of initial map");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1956
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1957
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1958
            if (bins == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1959
                generateForSizes(map);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1960
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1961
            else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1962
                while (bins > 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1963
                    sizes = new int[bins];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1964
                    searchBins(map, 0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1965
                    --bins;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1966
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1967
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1968
            if (verbose && false) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1969
                System.out.println("Offset range seen: -" + hex8(-minOffsetSeen) + "..+" +
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1970
                             hex8(maxOffsetSeen));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1971
                System.out.println("          allowed: -" + hex8(-minOffset) + "..+" +
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1972
                             hex8(maxOffset));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1973
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1974
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1975
        catch (FileNotFoundException e) { FAIL(e.toString()); }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1976
        catch (IOException e) { FAIL(e.toString()); }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1977
        catch (Throwable e) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1978
            System.out.println("Unexpected exception:");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1979
            e.printStackTrace();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1980
            FAIL("Unexpected exception!");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1981
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1982
        if (verbose) { System.out.println("Done!");}
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1983
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1984
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1985
}   // end class