jdk/src/share/classes/java/lang/Character.java
changeset 5986 04eb44085c00
parent 5985 f98ac682b34c
child 5989 05ad44c7b244
equal deleted inserted replaced
5985:f98ac682b34c 5986:04eb44085c00
    65  * (Refer to the <a
    65  * (Refer to the <a
    66  * href="http://www.unicode.org/reports/tr27/#notation"><i>
    66  * href="http://www.unicode.org/reports/tr27/#notation"><i>
    67  * definition</i></a> of the U+<i>n</i> notation in the Unicode
    67  * definition</i></a> of the U+<i>n</i> notation in the Unicode
    68  * standard.)
    68  * standard.)
    69  *
    69  *
    70  * <p>The set of characters from U+0000 to U+FFFF is sometimes
    70  * <p><a name="BMP">The set of characters from U+0000 to U+FFFF is
    71  * referred to as the <em>Basic Multilingual Plane (BMP)</em>. <a
    71  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
    72  * name="supplementary">Characters</a> whose code points are greater
    72  * <a name="supplementary">Characters</a> whose code points are greater
    73  * than U+FFFF are called <em>supplementary character</em>s.  The Java
    73  * than U+FFFF are called <em>supplementary character</em>s.  The Java
    74  * 2 platform uses the UTF-16 representation in <code>char</code>
    74  * platform uses the UTF-16 representation in <code>char</code> arrays and
    75  * arrays and in the <code>String</code> and <code>StringBuffer</code>
    75  * in the <code>String</code> and <code>StringBuffer</code> classes. In
    76  * classes. In this representation, supplementary characters are
    76  * this representation, supplementary characters are represented as a pair
    77  * represented as a pair of <code>char</code> values, the first from
    77  * of <code>char</code> values, the first from the <em>high-surrogates</em>
    78  * the <em>high-surrogates</em> range, (&#92;uD800-&#92;uDBFF), the
    78  * range, (&#92;uD800-&#92;uDBFF), the second from the
    79  * second from the <em>low-surrogates</em> range
    79  * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
    80  * (&#92;uDC00-&#92;uDFFF).
       
    81  *
    80  *
    82  * <p>A <code>char</code> value, therefore, represents Basic
    81  * <p>A <code>char</code> value, therefore, represents Basic
    83  * Multilingual Plane (BMP) code points, including the surrogate
    82  * Multilingual Plane (BMP) code points, including the surrogate
    84  * code points, or code units of the UTF-16 encoding. An
    83  * code points, or code units of the UTF-16 encoding. An
    85  * <code>int</code> value represents all Unicode code points,
    84  * <code>int</code> value represents all Unicode code points,
  3922         return plane < ((MAX_CODE_POINT + 1) >>> 16);
  3921         return plane < ((MAX_CODE_POINT + 1) >>> 16);
  3923     }
  3922     }
  3924 
  3923 
  3925     /**
  3924     /**
  3926      * Determines whether the specified character (Unicode code point)
  3925      * Determines whether the specified character (Unicode code point)
       
  3926      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
       
  3927      * Such code points can be represented using a single {@code char}.
       
  3928      *
       
  3929      * @param  codePoint the character (Unicode code point) to be tested
       
  3930      * @return {@code true} if the specified code point is between
       
  3931      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
       
  3932      *         {@code false} otherwise.
       
  3933      * @since  1.7
       
  3934      */
       
  3935     public static boolean isBmpCodePoint(int codePoint) {
       
  3936         return codePoint >>> 16 == 0;
       
  3937         // Optimized form of:
       
  3938         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
       
  3939         // We consistently use logical shift (>>>) to facilitate
       
  3940         // additional runtime optimizations.
       
  3941     }
       
  3942 
       
  3943     /**
       
  3944      * Determines whether the specified character (Unicode code point)
  3927      * is in the <a href="#supplementary">supplementary character</a> range.
  3945      * is in the <a href="#supplementary">supplementary character</a> range.
  3928      *
  3946      *
  3929      * @param  codePoint the character (Unicode code point) to be tested
  3947      * @param  codePoint the character (Unicode code point) to be tested
  3930      * @return {@code true} if the specified code point is between
  3948      * @return {@code true} if the specified code point is between
  3931      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
  3949      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
  4317      * high-surrogate value is not stored in
  4335      * high-surrogate value is not stored in
  4318      * <code>dst[dstIndex]</code>.)
  4336      * <code>dst[dstIndex]</code>.)
  4319      * @since  1.5
  4337      * @since  1.5
  4320      */
  4338      */
  4321     public static int toChars(int codePoint, char[] dst, int dstIndex) {
  4339     public static int toChars(int codePoint, char[] dst, int dstIndex) {
  4322         if (codePoint < 0 || codePoint > MAX_CODE_POINT) {
  4340         if (isBmpCodePoint(codePoint)) {
       
  4341             dst[dstIndex] = (char) codePoint;
       
  4342             return 1;
       
  4343         } else if (isValidCodePoint(codePoint)) {
       
  4344             toSurrogates(codePoint, dst, dstIndex);
       
  4345             return 2;
       
  4346         } else {
  4323             throw new IllegalArgumentException();
  4347             throw new IllegalArgumentException();
  4324         }
  4348         }
  4325         if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
       
  4326             dst[dstIndex] = (char) codePoint;
       
  4327             return 1;
       
  4328         }
       
  4329         toSurrogates(codePoint, dst, dstIndex);
       
  4330         return 2;
       
  4331     }
  4349     }
  4332 
  4350 
  4333     /**
  4351     /**
  4334      * Converts the specified character (Unicode code point) to its
  4352      * Converts the specified character (Unicode code point) to its
  4335      * UTF-16 representation stored in a <code>char</code> array. If
  4353      * UTF-16 representation stored in a <code>char</code> array. If
  4345      * @exception IllegalArgumentException if the specified
  4363      * @exception IllegalArgumentException if the specified
  4346      * <code>codePoint</code> is not a valid Unicode code point.
  4364      * <code>codePoint</code> is not a valid Unicode code point.
  4347      * @since  1.5
  4365      * @since  1.5
  4348      */
  4366      */
  4349     public static char[] toChars(int codePoint) {
  4367     public static char[] toChars(int codePoint) {
  4350         if (codePoint < 0 || codePoint > MAX_CODE_POINT) {
  4368         if (isBmpCodePoint(codePoint)) {
       
  4369             return new char[] { (char) codePoint };
       
  4370         } else if (isValidCodePoint(codePoint)) {
       
  4371             char[] result = new char[2];
       
  4372             toSurrogates(codePoint, result, 0);
       
  4373             return result;
       
  4374         } else {
  4351             throw new IllegalArgumentException();
  4375             throw new IllegalArgumentException();
  4352         }
  4376         }
  4353         if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
       
  4354                 return new char[] { (char) codePoint };
       
  4355         }
       
  4356         char[] result = new char[2];
       
  4357         toSurrogates(codePoint, result, 0);
       
  4358         return result;
       
  4359     }
  4377     }
  4360 
  4378 
  4361     static void toSurrogates(int codePoint, char[] dst, int index) {
  4379     static void toSurrogates(int codePoint, char[] dst, int index) {
  4362         // We write elements "backwards" to guarantee all-or-nothing
  4380         // We write elements "backwards" to guarantee all-or-nothing
  4363         dst[index+1] = (char)((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
  4381         dst[index+1] = (char)((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
  6257      * @return a <code>char[]</code> with the uppercased character.
  6275      * @return a <code>char[]</code> with the uppercased character.
  6258      * @since 1.4
  6276      * @since 1.4
  6259      */
  6277      */
  6260     static char[] toUpperCaseCharArray(int codePoint) {
  6278     static char[] toUpperCaseCharArray(int codePoint) {
  6261         // As of Unicode 4.0, 1:M uppercasings only happen in the BMP.
  6279         // As of Unicode 4.0, 1:M uppercasings only happen in the BMP.
  6262         assert isValidCodePoint(codePoint) &&
  6280         assert isBmpCodePoint(codePoint);
  6263                !isSupplementaryCodePoint(codePoint);
       
  6264         return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
  6281         return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
  6265     }
  6282     }
  6266 
  6283 
  6267     /**
  6284     /**
  6268      * The number of bits used to represent a <tt>char</tt> value in unsigned
  6285      * The number of bits used to represent a <tt>char</tt> value in unsigned