make/data/characterdata/CharacterData0E.java.template
changeset 47216 71c04702a3d5
parent 23010 6dadb192ad81
child 52979 7384e00d5860
equal deleted inserted replaced
47215:4ebc2e2fb97c 47216:71c04702a3d5
       
     1 /*
       
     2  * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Oracle designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Oracle in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    22  * or visit www.oracle.com if you need additional information or have any
       
    23  * questions.
       
    24  */
       
    25 
       
    26 package java.lang;
       
    27 
       
    28 /** The CharacterData class encapsulates the large tables found in
       
    29     Java.lang.Character. */
       
    30 
       
    31 class CharacterData0E extends CharacterData {
       
    32     /* The character properties are currently encoded into 32 bits in the following manner:
       
    33         1 bit   mirrored property
       
    34         4 bits  directionality property
       
    35         9 bits  signed offset used for converting case
       
    36         1 bit   if 1, adding the signed offset converts the character to lowercase
       
    37         1 bit   if 1, subtracting the signed offset converts the character to uppercase
       
    38         1 bit   if 1, this character has a titlecase equivalent (possibly itself)
       
    39         3 bits  0  may not be part of an identifier
       
    40                 1  ignorable control; may continue a Unicode identifier or Java identifier
       
    41                 2  may continue a Java identifier but not a Unicode identifier (unused)
       
    42                 3  may continue a Unicode identifier or Java identifier
       
    43                 4  is a Java whitespace character
       
    44                 5  may start or continue a Java identifier;
       
    45                    may continue but not start a Unicode identifier (underscores)
       
    46                 6  may start or continue a Java identifier but not a Unicode identifier ($)
       
    47                 7  may start or continue a Unicode identifier or Java identifier
       
    48                 Thus:
       
    49                    5, 6, 7 may start a Java identifier
       
    50                    1, 2, 3, 5, 6, 7 may continue a Java identifier
       
    51                    7 may start a Unicode identifier
       
    52                    1, 3, 5, 7 may continue a Unicode identifier
       
    53                    1 is ignorable within an identifier
       
    54                    4 is Java whitespace
       
    55         2 bits  0  this character has no numeric property
       
    56                 1  adding the digit offset to the character code and then
       
    57                    masking with 0x1F will produce the desired numeric value
       
    58                 2  this character has a "strange" numeric value
       
    59                 3  a Java supradecimal digit: adding the digit offset to the
       
    60                    character code, then masking with 0x1F, then adding 10
       
    61                    will produce the desired numeric value
       
    62         5 bits  digit offset
       
    63         5 bits  character type
       
    64 
       
    65         The encoding of character properties is subject to change at any time.
       
    66      */
       
    67 
       
    68     int getProperties(int ch) {
       
    69         char offset = (char)ch;
       
    70         int props = $$Lookup(offset);
       
    71         return props;
       
    72     }
       
    73 
       
    74     int getPropertiesEx(int ch) {
       
    75         char offset = (char)ch;
       
    76         int props = $$LookupEx(offset);
       
    77         return props;
       
    78     }
       
    79 
       
    80     boolean isOtherLowercase(int ch) {
       
    81         int props = getPropertiesEx(ch);
       
    82         return (props & $$maskOtherLowercase) != 0;
       
    83     }
       
    84 
       
    85     boolean isOtherUppercase(int ch) {
       
    86         int props = getPropertiesEx(ch);
       
    87         return (props & $$maskOtherUppercase) != 0;
       
    88     }
       
    89 
       
    90     boolean isOtherAlphabetic(int ch) {
       
    91         int props = getPropertiesEx(ch);
       
    92         return (props & $$maskOtherAlphabetic) != 0;
       
    93     }
       
    94 
       
    95     boolean isIdeographic(int ch) {
       
    96         int props = getPropertiesEx(ch);
       
    97         return (props & $$maskIdeographic) != 0;
       
    98     }
       
    99 
       
   100     int getType(int ch) {
       
   101         int props = getProperties(ch);
       
   102         return (props & $$maskType);
       
   103     }
       
   104 
       
   105     boolean isJavaIdentifierStart(int ch) {
       
   106         int props = getProperties(ch);
       
   107         return ((props & $$maskIdentifierInfo) >= $$lowJavaStart);
       
   108     }
       
   109 
       
   110     boolean isJavaIdentifierPart(int ch) {
       
   111         int props = getProperties(ch);
       
   112         return ((props & $$nonzeroJavaPart) != 0);
       
   113     }
       
   114 
       
   115     boolean isUnicodeIdentifierStart(int ch) {
       
   116         int props = getProperties(ch);
       
   117         return ((props & $$maskIdentifierInfo) == $$valueUnicodeStart);
       
   118     }
       
   119 
       
   120     boolean isUnicodeIdentifierPart(int ch) {
       
   121         int props = getProperties(ch);
       
   122         return ((props & $$maskUnicodePart) != 0);
       
   123     }
       
   124 
       
   125     boolean isIdentifierIgnorable(int ch) {
       
   126         int props = getProperties(ch);
       
   127         return ((props & $$maskIdentifierInfo) == $$valueIgnorable);
       
   128     }
       
   129 
       
   130     int toLowerCase(int ch) {
       
   131         int mapChar = ch;
       
   132         int val = getProperties(ch);
       
   133 
       
   134         if ((val & $$maskLowerCase) != 0) {
       
   135             int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset);
       
   136             mapChar = ch + offset;
       
   137         }
       
   138         return mapChar;
       
   139     }
       
   140 
       
   141     int toUpperCase(int ch) {
       
   142         int mapChar = ch;
       
   143         int val = getProperties(ch);
       
   144 
       
   145         if ((val & $$maskUpperCase) != 0) {
       
   146             int offset = val  << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset);
       
   147             mapChar =  ch - offset;
       
   148         }
       
   149         return mapChar;
       
   150     }
       
   151 
       
   152     int toTitleCase(int ch) {
       
   153         int mapChar = ch;
       
   154         int val = getProperties(ch);
       
   155 
       
   156         if ((val & $$maskTitleCase) != 0) {
       
   157             // There is a titlecase equivalent.  Perform further checks:
       
   158             if ((val & $$maskUpperCase) == 0) {
       
   159                 // The character does not have an uppercase equivalent, so it must
       
   160                 // already be uppercase; so add 1 to get the titlecase form.
       
   161                 mapChar = ch + 1;
       
   162             }
       
   163             else if ((val & $$maskLowerCase) == 0) {
       
   164                 // The character does not have a lowercase equivalent, so it must
       
   165                 // already be lowercase; so subtract 1 to get the titlecase form.
       
   166                 mapChar = ch - 1;
       
   167             }
       
   168             // else {
       
   169             // The character has both an uppercase equivalent and a lowercase
       
   170             // equivalent, so it must itself be a titlecase form; return it.
       
   171             // return ch;
       
   172             //}
       
   173         }
       
   174         else if ((val & $$maskUpperCase) != 0) {
       
   175             // This character has no titlecase equivalent but it does have an
       
   176             // uppercase equivalent, so use that (subtract the signed case offset).
       
   177             mapChar = toUpperCase(ch);
       
   178         }
       
   179         return mapChar;
       
   180     }
       
   181 
       
   182     int digit(int ch, int radix) {
       
   183         int value = -1;
       
   184         if (radix >= Character.MIN_RADIX && radix <= Character.MAX_RADIX) {
       
   185             int val = getProperties(ch);
       
   186             int kind = val & $$maskType;
       
   187             if (kind == Character.DECIMAL_DIGIT_NUMBER) {
       
   188                 value = ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit;
       
   189             }
       
   190             else if ((val & $$maskNumericType) == $$valueJavaSupradecimal) {
       
   191                 // Java supradecimal digit
       
   192                 value = (ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit) + 10;
       
   193             }
       
   194         }
       
   195         return (value < radix) ? value : -1;
       
   196     }
       
   197 
       
   198     int getNumericValue(int ch) {
       
   199         int val = getProperties(ch);
       
   200         int retval = -1;
       
   201 
       
   202         switch (val & $$maskNumericType) {
       
   203         default: // cannot occur
       
   204         case ($$valueNotNumeric):         // not numeric
       
   205             retval = -1;
       
   206             break;
       
   207         case ($$valueDigit):              // simple numeric
       
   208             retval = ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit;
       
   209             break;
       
   210         case ($$valueStrangeNumeric)      :       // "strange" numeric
       
   211             retval = -2;
       
   212             break;
       
   213         case ($$valueJavaSupradecimal):           // Java supradecimal
       
   214             retval = (ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit) + 10;
       
   215             break;
       
   216         }
       
   217         return retval;
       
   218     }
       
   219 
       
   220     boolean isWhitespace(int ch) {
       
   221         int props = getProperties(ch);
       
   222         return ((props & $$maskIdentifierInfo) == $$valueJavaWhitespace);
       
   223     }
       
   224 
       
   225     byte getDirectionality(int ch) {
       
   226         int val = getProperties(ch);
       
   227         byte directionality = (byte)((val & $$maskBidi) >> $$shiftBidi);
       
   228         if (directionality == 0xF ) {
       
   229 	        directionality = Character.DIRECTIONALITY_UNDEFINED;
       
   230         }
       
   231         return directionality;
       
   232     }
       
   233 
       
   234     boolean isMirrored(int ch) {
       
   235         int props = getProperties(ch);
       
   236         return ((props & $$maskMirrored) != 0);
       
   237     }
       
   238 
       
   239     static final CharacterData instance = new CharacterData0E();
       
   240     private CharacterData0E() {};
       
   241 
       
   242     $$Tables
       
   243 
       
   244     static {
       
   245         $$Initializers
       
   246     }        
       
   247 }