jdk-sandbox: comparison jdk/src/share/classes/java/lang/Character.java

equal deleted inserted replaced

-:f98ac682b34c
+:04eb44085c00
 * (Refer to the <a
 * href="http://www.unicode.org/reports/tr27/#notation"><i>
 * definition</i></a> of the U+<i>n</i> notation in the Unicode
 * standard.)
 *
-* <p>The set of characters from U+0000 to U+FFFF is sometimes
+* <p><a name="BMP">The set of characters from U+0000 to U+FFFF is
-* referred to as the <em>Basic Multilingual Plane (BMP)</em>. <a
+* sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
-* name="supplementary">Characters</a> whose code points are greater
+* <a name="supplementary">Characters</a> whose code points are greater
 * than U+FFFF are called <em>supplementary character</em>s.  The Java
-* 2 platform uses the UTF-16 representation in <code>char</code>
+* platform uses the UTF-16 representation in <code>char</code> arrays and
-* arrays and in the <code>String</code> and <code>StringBuffer</code>
+* in the <code>String</code> and <code>StringBuffer</code> classes. In
-* classes. In this representation, supplementary characters are
+* this representation, supplementary characters are represented as a pair
-* represented as a pair of <code>char</code> values, the first from
+* of <code>char</code> values, the first from the <em>high-surrogates</em>
-* the <em>high-surrogates</em> range, (&#92;uD800-&#92;uDBFF), the
+* range, (&#92;uD800-&#92;uDBFF), the second from the
-* second from the <em>low-surrogates</em> range
+* <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
-* (&#92;uDC00-&#92;uDFFF).
 *
 * <p>A <code>char</code> value, therefore, represents Basic
 * Multilingual Plane (BMP) code points, including the surrogate
 * code points, or code units of the UTF-16 encoding. An
 * <code>int</code> value represents all Unicode code points,
 return plane < ((MAX_CODE_POINT + 1) >>> 16);
 }
 /**
 * Determines whether the specified character (Unicode code point)
+* is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
+* Such code points can be represented using a single {@code char}.
+*
+* @param  codePoint the character (Unicode code point) to be tested
+* @return {@code true} if the specified code point is between
+*         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
+*         {@code false} otherwise.
+* @since  1.7
+*/
+public static boolean isBmpCodePoint(int codePoint) {
+return codePoint >>> 16 == 0;
+// Optimized form of:
+//     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
+// We consistently use logical shift (>>>) to facilitate
+// additional runtime optimizations.
+}
+/**
+* Determines whether the specified character (Unicode code point)
 * is in the <a href="#supplementary">supplementary character</a> range.
 *
 * @param  codePoint the character (Unicode code point) to be tested
 * @return {@code true} if the specified code point is between
 *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
 * high-surrogate value is not stored in
 * <code>dst[dstIndex]</code>.)
 * @since  1.5
 */
 public static int toChars(int codePoint, char[] dst, int dstIndex) {
-if (codePoint < 0 || codePoint > MAX_CODE_POINT) {
+if (isBmpCodePoint(codePoint)) {
+dst[dstIndex] = (char) codePoint;
+return 1;
+} else if (isValidCodePoint(codePoint)) {
+toSurrogates(codePoint, dst, dstIndex);
+return 2;
+} else {
 throw new IllegalArgumentException();
 }
-if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
-dst[dstIndex] = (char) codePoint;
-return 1;
-}
-toSurrogates(codePoint, dst, dstIndex);
-return 2;
 }
 /**
 * Converts the specified character (Unicode code point) to its
 * UTF-16 representation stored in a <code>char</code> array. If
 * @exception IllegalArgumentException if the specified
 * <code>codePoint</code> is not a valid Unicode code point.
 * @since  1.5
 */
 public static char[] toChars(int codePoint) {
-if (codePoint < 0 || codePoint > MAX_CODE_POINT) {
+if (isBmpCodePoint(codePoint)) {
+return new char[] { (char) codePoint };
+} else if (isValidCodePoint(codePoint)) {
+char[] result = new char[2];
+toSurrogates(codePoint, result, 0);
+return result;
+} else {
 throw new IllegalArgumentException();
 }
-if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
-return new char[] { (char) codePoint };
-}
-char[] result = new char[2];
-toSurrogates(codePoint, result, 0);
-return result;
 }
 static void toSurrogates(int codePoint, char[] dst, int index) {
 // We write elements "backwards" to guarantee all-or-nothing
 dst[index+1] = (char)((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
 * @return a <code>char[]</code> with the uppercased character.
 * @since 1.4
 */
 static char[] toUpperCaseCharArray(int codePoint) {
 // As of Unicode 4.0, 1:M uppercasings only happen in the BMP.
-assert isValidCodePoint(codePoint) &&
+assert isBmpCodePoint(codePoint);
-!isSupplementaryCodePoint(codePoint);
 return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
 }
 /**
 * The number of bits used to represent a <tt>char</tt> value in unsigned

changeset 5986	04eb44085c00
parent 5985	f98ac682b34c
child 5989	05ad44c7b244