jdk/make/tools/GenerateCharacter/CharacterDataLatin1.java.template
changeset 2 90ce3da70b43
child 5506 202f599c92aa
equal deleted inserted replaced
0:fd16c54261b3 2:90ce3da70b43
       
     1 /*
       
     2  * Copyright 2002-2006 Sun Microsystems, Inc.  All Rights Reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Sun designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Sun in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
       
    22  * CA 95054 USA or visit www.sun.com if you need additional information or
       
    23  * have any questions.
       
    24  */
       
    25 
       
    26 package java.lang;
       
    27 
       
    28 /** The CharacterData class encapsulates the large tables found in
       
    29     Java.lang.Character. */
       
    30 
       
    31 class CharacterDataLatin1 extends CharacterData {
       
    32 
       
    33     /* The character properties are currently encoded into 32 bits in the following manner:
       
    34         1 bit   mirrored property
       
    35         4 bits  directionality property
       
    36         9 bits  signed offset used for converting case
       
    37         1 bit   if 1, adding the signed offset converts the character to lowercase
       
    38         1 bit   if 1, subtracting the signed offset converts the character to uppercase
       
    39         1 bit   if 1, this character has a titlecase equivalent (possibly itself)
       
    40         3 bits  0  may not be part of an identifier
       
    41                 1  ignorable control; may continue a Unicode identifier or Java identifier
       
    42                 2  may continue a Java identifier but not a Unicode identifier (unused)
       
    43                 3  may continue a Unicode identifier or Java identifier
       
    44                 4  is a Java whitespace character
       
    45                 5  may start or continue a Java identifier;
       
    46                    may continue but not start a Unicode identifier (underscores)
       
    47                 6  may start or continue a Java identifier but not a Unicode identifier ($)
       
    48                 7  may start or continue a Unicode identifier or Java identifier
       
    49                 Thus:
       
    50                    5, 6, 7 may start a Java identifier
       
    51                    1, 2, 3, 5, 6, 7 may continue a Java identifier
       
    52                    7 may start a Unicode identifier
       
    53                    1, 3, 5, 7 may continue a Unicode identifier
       
    54                    1 is ignorable within an identifier
       
    55                    4 is Java whitespace
       
    56         2 bits  0  this character has no numeric property
       
    57                 1  adding the digit offset to the character code and then
       
    58                    masking with 0x1F will produce the desired numeric value
       
    59                 2  this character has a "strange" numeric value
       
    60                 3  a Java supradecimal digit: adding the digit offset to the
       
    61                    character code, then masking with 0x1F, then adding 10
       
    62                    will produce the desired numeric value
       
    63         5 bits  digit offset
       
    64         5 bits  character type
       
    65 
       
    66         The encoding of character properties is subject to change at any time.
       
    67      */
       
    68 
       
    69     int getProperties(int ch) {
       
    70 		char offset = (char)ch;
       
    71         int props = $$Lookup(offset);
       
    72         return props;
       
    73     }
       
    74 
       
    75     int getType(int ch) {
       
    76         int props = getProperties(ch);
       
    77         return (props & $$maskType);
       
    78     }
       
    79 
       
    80     boolean isJavaIdentifierStart(int ch) {
       
    81         int props = getProperties(ch);
       
    82         return ((props & $$maskIdentifierInfo) >= $$lowJavaStart);
       
    83     }
       
    84 
       
    85     boolean isJavaIdentifierPart(int ch) {
       
    86         int props = getProperties(ch);
       
    87         return ((props & $$nonzeroJavaPart) != 0);
       
    88     }
       
    89 
       
    90     boolean isUnicodeIdentifierStart(int ch) {
       
    91         int props = getProperties(ch);
       
    92         return ((props & $$maskIdentifierInfo) == $$valueUnicodeStart);
       
    93     }
       
    94 
       
    95     boolean isUnicodeIdentifierPart(int ch) {
       
    96         int props = getProperties(ch);
       
    97         return ((props & $$maskUnicodePart) != 0);
       
    98     }
       
    99 
       
   100     boolean isIdentifierIgnorable(int ch) {
       
   101         int props = getProperties(ch);
       
   102         return ((props & $$maskIdentifierInfo) == $$valueIgnorable);
       
   103     }
       
   104 
       
   105     int toLowerCase(int ch) {
       
   106         int mapChar = ch;
       
   107         int val = getProperties(ch);
       
   108 
       
   109         if (((val & $$maskLowerCase) != 0) && 
       
   110                 ((val & $$maskCaseOffset) != $$maskCaseOffset)) { 
       
   111             int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset);
       
   112             mapChar = ch + offset;
       
   113         }
       
   114         return mapChar;
       
   115     }
       
   116 
       
   117     int toUpperCase(int ch) {
       
   118         int mapChar = ch;
       
   119         int val = getProperties(ch);
       
   120 
       
   121         if ((val & $$maskUpperCase) != 0) {
       
   122             if ((val & $$maskCaseOffset) != $$maskCaseOffset) {
       
   123                 int offset = val  << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset);
       
   124                 mapChar =  ch - offset;
       
   125             } else if (ch == 0x00B5) {
       
   126                 mapChar = 0x039C;
       
   127             }
       
   128         }
       
   129         return mapChar;
       
   130     }
       
   131 
       
   132     int toTitleCase(int ch) {
       
   133         return toUpperCase(ch);
       
   134     }
       
   135 
       
   136     int digit(int ch, int radix) {
       
   137         int value = -1;
       
   138         if (radix >= Character.MIN_RADIX && radix <= Character.MAX_RADIX) {
       
   139             int val = getProperties(ch);
       
   140             int kind = val & $$maskType;
       
   141             if (kind == Character.DECIMAL_DIGIT_NUMBER) {
       
   142                 value = ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit;
       
   143             }
       
   144             else if ((val & $$maskNumericType) == $$valueJavaSupradecimal) {
       
   145                 // Java supradecimal digit
       
   146                 value = (ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit) + 10;
       
   147             }
       
   148         }
       
   149         return (value < radix) ? value : -1;
       
   150     }
       
   151 
       
   152     int getNumericValue(int ch) {
       
   153         int val = getProperties(ch);
       
   154         int retval = -1;
       
   155 
       
   156         switch (val & $$maskNumericType) {
       
   157             default: // cannot occur
       
   158             case ($$valueNotNumeric):         // not numeric
       
   159                 retval = -1;
       
   160                 break;
       
   161             case ($$valueDigit):              // simple numeric
       
   162                 retval = ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit;
       
   163                 break;
       
   164             case ($$valueStrangeNumeric)      :       // "strange" numeric
       
   165                  retval = -2; 
       
   166                  break;
       
   167             case ($$valueJavaSupradecimal):           // Java supradecimal
       
   168                 retval = (ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit) + 10;
       
   169                 break;
       
   170         }
       
   171         return retval;
       
   172     }
       
   173 
       
   174     boolean isWhitespace(int ch) {
       
   175         int props = getProperties(ch);
       
   176         return ((props & $$maskIdentifierInfo) == $$valueJavaWhitespace);
       
   177     }
       
   178 
       
   179     byte getDirectionality(int ch) {
       
   180         int val = getProperties(ch);
       
   181         byte directionality = (byte)((val & $$maskBidi) >> $$shiftBidi);
       
   182 
       
   183         if (directionality == 0xF ) {
       
   184             directionality = -1;
       
   185         }
       
   186         return directionality;
       
   187     }
       
   188 
       
   189     boolean isMirrored(int ch) {
       
   190         int props = getProperties(ch);
       
   191         return ((props & $$maskMirrored) != 0);
       
   192     }
       
   193 
       
   194     int toUpperCaseEx(int ch) {
       
   195         int mapChar = ch;
       
   196         int val = getProperties(ch);
       
   197 
       
   198         if ((val & $$maskUpperCase) != 0) {
       
   199             if ((val & $$maskCaseOffset) != $$maskCaseOffset) {
       
   200                 int offset = val  << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset);
       
   201                 mapChar =  ch - offset;
       
   202             }
       
   203             else {
       
   204                 switch(ch) {
       
   205                     // map overflow characters
       
   206                     case 0x00B5 : mapChar = 0x039C; break;
       
   207                     default       : mapChar = Character.ERROR; break;
       
   208                 }
       
   209             }
       
   210         }
       
   211         return mapChar;
       
   212     }
       
   213 
       
   214     static char[] sharpsMap = new char[] {'S', 'S'};
       
   215 
       
   216     char[] toUpperCaseCharArray(int ch) {
       
   217         char[] upperMap = {(char)ch};
       
   218         if (ch == 0x00DF) {
       
   219             upperMap = sharpsMap;
       
   220         }
       
   221         return upperMap;
       
   222     }
       
   223 
       
   224     static final CharacterDataLatin1 instance = new CharacterDataLatin1();
       
   225     private CharacterDataLatin1() {};
       
   226 
       
   227     $$Tables
       
   228 
       
   229     static {
       
   230         $$Initializers
       
   231     }        
       
   232 }
       
   233