jdk/make/tools/GenerateCharacter/CharacterData02.java.template
changeset 2 90ce3da70b43
child 5506 202f599c92aa
equal deleted inserted replaced
0:fd16c54261b3 2:90ce3da70b43
       
     1 /*
       
     2  * Copyright 2003-2006 Sun Microsystems, Inc.  All Rights Reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Sun designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Sun in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
       
    22  * CA 95054 USA or visit www.sun.com if you need additional information or
       
    23  * have any questions.
       
    24  */
       
    25 
       
    26 package java.lang;
       
    27 
       
    28 /** The CharacterData class encapsulates the large tables found in
       
    29     Java.lang.Character. */
       
    30 
       
    31 class CharacterData02 extends CharacterData {
       
    32     /* The character properties are currently encoded into 32 bits in the following manner:
       
    33         1 bit   mirrored property
       
    34         4 bits  directionality property
       
    35         9 bits  signed offset used for converting case
       
    36         1 bit   if 1, adding the signed offset converts the character to lowercase
       
    37         1 bit   if 1, subtracting the signed offset converts the character to uppercase
       
    38         1 bit   if 1, this character has a titlecase equivalent (possibly itself)
       
    39         3 bits  0  may not be part of an identifier
       
    40                 1  ignorable control; may continue a Unicode identifier or Java identifier
       
    41                 2  may continue a Java identifier but not a Unicode identifier (unused)
       
    42                 3  may continue a Unicode identifier or Java identifier
       
    43                 4  is a Java whitespace character
       
    44                 5  may start or continue a Java identifier;
       
    45                    may continue but not start a Unicode identifier (underscores)
       
    46                 6  may start or continue a Java identifier but not a Unicode identifier ($)
       
    47                 7  may start or continue a Unicode identifier or Java identifier
       
    48                 Thus:
       
    49                    5, 6, 7 may start a Java identifier
       
    50                    1, 2, 3, 5, 6, 7 may continue a Java identifier
       
    51                    7 may start a Unicode identifier
       
    52                    1, 3, 5, 7 may continue a Unicode identifier
       
    53                    1 is ignorable within an identifier
       
    54                    4 is Java whitespace
       
    55         2 bits  0  this character has no numeric property
       
    56                 1  adding the digit offset to the character code and then
       
    57                    masking with 0x1F will produce the desired numeric value
       
    58                 2  this character has a "strange" numeric value
       
    59                 3  a Java supradecimal digit: adding the digit offset to the
       
    60                    character code, then masking with 0x1F, then adding 10
       
    61                    will produce the desired numeric value
       
    62         5 bits  digit offset
       
    63         5 bits  character type
       
    64 
       
    65         The encoding of character properties is subject to change at any time.
       
    66      */
       
    67 
       
    68     int getProperties(int ch) {
       
    69 		char offset = (char)ch;
       
    70         int props = $$Lookup(offset);
       
    71         return props;
       
    72     }
       
    73 
       
    74     int getType(int ch) {
       
    75         int props = getProperties(ch);
       
    76         return (props & $$maskType);
       
    77     }
       
    78 
       
    79     boolean isJavaIdentifierStart(int ch) {
       
    80         int props = getProperties(ch);
       
    81         return ((props & $$maskIdentifierInfo) >= $$lowJavaStart);
       
    82     }
       
    83 
       
    84     boolean isJavaIdentifierPart(int ch) {
       
    85         int props = getProperties(ch);
       
    86         return ((props & $$nonzeroJavaPart) != 0);
       
    87     }
       
    88 
       
    89     boolean isUnicodeIdentifierStart(int ch) {
       
    90         int props = getProperties(ch);
       
    91         return ((props & $$maskIdentifierInfo) == $$valueUnicodeStart);
       
    92     }
       
    93 
       
    94     boolean isUnicodeIdentifierPart(int ch) {
       
    95         int props = getProperties(ch);
       
    96         return ((props & $$maskUnicodePart) != 0);
       
    97     }
       
    98 
       
    99     boolean isIdentifierIgnorable(int ch) {
       
   100         int props = getProperties(ch);
       
   101         return ((props & $$maskIdentifierInfo) == $$valueIgnorable);
       
   102     }
       
   103 
       
   104     int toLowerCase(int ch) {
       
   105         int mapChar = ch;
       
   106         int val = getProperties(ch);
       
   107 
       
   108         if ((val & $$maskLowerCase) != 0) {
       
   109             int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset);
       
   110             mapChar = ch + offset;
       
   111         }
       
   112         return mapChar;
       
   113     }
       
   114 
       
   115     int toUpperCase(int ch) {
       
   116         int mapChar = ch;
       
   117         int val = getProperties(ch);
       
   118 
       
   119         if ((val & $$maskUpperCase) != 0) {
       
   120             int offset = val  << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset);
       
   121             mapChar =  ch - offset;
       
   122         }
       
   123         return mapChar;
       
   124     }
       
   125 
       
   126     int toTitleCase(int ch) {
       
   127         int mapChar = ch;
       
   128         int val = getProperties(ch);
       
   129 
       
   130         if ((val & $$maskTitleCase) != 0) {
       
   131             // There is a titlecase equivalent.  Perform further checks:
       
   132             if ((val & $$maskUpperCase) == 0) {
       
   133                 // The character does not have an uppercase equivalent, so it must
       
   134                 // already be uppercase; so add 1 to get the titlecase form.
       
   135                 mapChar = ch + 1;
       
   136             }
       
   137             else if ((val & $$maskLowerCase) == 0) {
       
   138                 // The character does not have a lowercase equivalent, so it must
       
   139                 // already be lowercase; so subtract 1 to get the titlecase form.
       
   140                 mapChar = ch - 1;
       
   141             }
       
   142             // else {
       
   143             // The character has both an uppercase equivalent and a lowercase
       
   144             // equivalent, so it must itself be a titlecase form; return it.
       
   145             // return ch;
       
   146             //}
       
   147         }
       
   148         else if ((val & $$maskUpperCase) != 0) {
       
   149             // This character has no titlecase equivalent but it does have an
       
   150             // uppercase equivalent, so use that (subtract the signed case offset).
       
   151             mapChar = toUpperCase(ch);
       
   152         }
       
   153         return mapChar;
       
   154     }
       
   155 
       
   156     int digit(int ch, int radix) {
       
   157         int value = -1;
       
   158         if (radix >= Character.MIN_RADIX && radix <= Character.MAX_RADIX) {
       
   159             int val = getProperties(ch);
       
   160             int kind = val & $$maskType;
       
   161             if (kind == Character.DECIMAL_DIGIT_NUMBER) {
       
   162                 value = ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit;
       
   163             }
       
   164             else if ((val & $$maskNumericType) == $$valueJavaSupradecimal) {
       
   165                 // Java supradecimal digit
       
   166                 value = (ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit) + 10;
       
   167             }
       
   168         }
       
   169         return (value < radix) ? value : -1;
       
   170     }
       
   171 
       
   172     int getNumericValue(int ch) {
       
   173         int val = getProperties(ch);
       
   174         int retval = -1;
       
   175 
       
   176         switch (val & $$maskNumericType) {
       
   177         default: // cannot occur
       
   178         case ($$valueNotNumeric):         // not numeric
       
   179             retval = -1;
       
   180             break;
       
   181         case ($$valueDigit):              // simple numeric
       
   182             retval = ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit;
       
   183             break;
       
   184         case ($$valueStrangeNumeric)      :       // "strange" numeric
       
   185             retval = -2;
       
   186             break;
       
   187         case ($$valueJavaSupradecimal):           // Java supradecimal
       
   188             retval = (ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit) + 10;
       
   189             break;
       
   190         }
       
   191         return retval;
       
   192     }
       
   193 
       
   194     boolean isWhitespace(int ch) {
       
   195         return (getProperties(ch) & $$maskIdentifierInfo) == $$valueJavaWhitespace;
       
   196     }
       
   197 
       
   198     byte getDirectionality(int ch) {
       
   199         int val = getProperties(ch);
       
   200         byte directionality = (byte)((val & $$maskBidi) >> $$shiftBidi);
       
   201         if (directionality == 0xF ) {
       
   202 	        directionality = Character.DIRECTIONALITY_UNDEFINED;
       
   203         }
       
   204         return directionality;
       
   205     }
       
   206 
       
   207     boolean isMirrored(int ch) {
       
   208         return (getProperties(ch) & $$maskMirrored) != 0;
       
   209     }
       
   210 
       
   211     static final CharacterData instance = new CharacterData02();
       
   212     private CharacterData02() {};
       
   213 
       
   214     $$Tables
       
   215 
       
   216     static {
       
   217         $$Initializers
       
   218     }        
       
   219 }