6934265: Add public method Character.isBmpCodePoint
Summary: Move isBmpCodePoint from sun.nio.cs.Surrogate to Character
Reviewed-by: sherman
Contributed-by: Ulf Zibis <ulf.zibis@gmx.de>
--- a/jdk/src/share/classes/java/lang/AbstractStringBuilder.java Wed Jun 30 16:11:31 2010 -0700
+++ b/jdk/src/share/classes/java/lang/AbstractStringBuilder.java Wed Jun 30 16:11:32 2010 -0700
@@ -721,20 +721,19 @@
* {@code codePoint} isn't a valid Unicode code point
*/
public AbstractStringBuilder appendCodePoint(int codePoint) {
- if (!Character.isValidCodePoint(codePoint)) {
+ final int count = this.count;
+
+ if (Character.isBmpCodePoint(codePoint)) {
+ ensureCapacityInternal(count + 1);
+ value[count] = (char) codePoint;
+ this.count = count + 1;
+ } else if (Character.isValidCodePoint(codePoint)) {
+ ensureCapacityInternal(count + 2);
+ Character.toSurrogates(codePoint, value, count);
+ this.count = count + 2;
+ } else {
throw new IllegalArgumentException();
}
- int n = 1;
- if (codePoint >= Character.MIN_SUPPLEMENTARY_CODE_POINT) {
- n++;
- }
- ensureCapacityInternal(count + n);
- if (n == 1) {
- value[count++] = (char) codePoint;
- } else {
- Character.toSurrogates(codePoint, value, count);
- count += n;
- }
return this;
}
--- a/jdk/src/share/classes/java/lang/Character.java Wed Jun 30 16:11:31 2010 -0700
+++ b/jdk/src/share/classes/java/lang/Character.java Wed Jun 30 16:11:32 2010 -0700
@@ -67,17 +67,16 @@
* definition</i></a> of the U+<i>n</i> notation in the Unicode
* standard.)
*
- * <p>The set of characters from U+0000 to U+FFFF is sometimes
- * referred to as the <em>Basic Multilingual Plane (BMP)</em>. <a
- * name="supplementary">Characters</a> whose code points are greater
+ * <p><a name="BMP">The set of characters from U+0000 to U+FFFF is
+ * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
+ * <a name="supplementary">Characters</a> whose code points are greater
* than U+FFFF are called <em>supplementary character</em>s. The Java
- * 2 platform uses the UTF-16 representation in <code>char</code>
- * arrays and in the <code>String</code> and <code>StringBuffer</code>
- * classes. In this representation, supplementary characters are
- * represented as a pair of <code>char</code> values, the first from
- * the <em>high-surrogates</em> range, (\uD800-\uDBFF), the
- * second from the <em>low-surrogates</em> range
- * (\uDC00-\uDFFF).
+ * platform uses the UTF-16 representation in <code>char</code> arrays and
+ * in the <code>String</code> and <code>StringBuffer</code> classes. In
+ * this representation, supplementary characters are represented as a pair
+ * of <code>char</code> values, the first from the <em>high-surrogates</em>
+ * range, (\uD800-\uDBFF), the second from the
+ * <em>low-surrogates</em> range (\uDC00-\uDFFF).
*
* <p>A <code>char</code> value, therefore, represents Basic
* Multilingual Plane (BMP) code points, including the surrogate
@@ -3924,6 +3923,25 @@
/**
* Determines whether the specified character (Unicode code point)
+ * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
+ * Such code points can be represented using a single {@code char}.
+ *
+ * @param codePoint the character (Unicode code point) to be tested
+ * @return {@code true} if the specified code point is between
+ * {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
+ * {@code false} otherwise.
+ * @since 1.7
+ */
+ public static boolean isBmpCodePoint(int codePoint) {
+ return codePoint >>> 16 == 0;
+ // Optimized form of:
+ // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
+ // We consistently use logical shift (>>>) to facilitate
+ // additional runtime optimizations.
+ }
+
+ /**
+ * Determines whether the specified character (Unicode code point)
* is in the <a href="#supplementary">supplementary character</a> range.
*
* @param codePoint the character (Unicode code point) to be tested
@@ -4319,15 +4337,15 @@
* @since 1.5
*/
public static int toChars(int codePoint, char[] dst, int dstIndex) {
- if (codePoint < 0 || codePoint > MAX_CODE_POINT) {
+ if (isBmpCodePoint(codePoint)) {
+ dst[dstIndex] = (char) codePoint;
+ return 1;
+ } else if (isValidCodePoint(codePoint)) {
+ toSurrogates(codePoint, dst, dstIndex);
+ return 2;
+ } else {
throw new IllegalArgumentException();
}
- if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
- dst[dstIndex] = (char) codePoint;
- return 1;
- }
- toSurrogates(codePoint, dst, dstIndex);
- return 2;
}
/**
@@ -4347,15 +4365,15 @@
* @since 1.5
*/
public static char[] toChars(int codePoint) {
- if (codePoint < 0 || codePoint > MAX_CODE_POINT) {
+ if (isBmpCodePoint(codePoint)) {
+ return new char[] { (char) codePoint };
+ } else if (isValidCodePoint(codePoint)) {
+ char[] result = new char[2];
+ toSurrogates(codePoint, result, 0);
+ return result;
+ } else {
throw new IllegalArgumentException();
}
- if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
- return new char[] { (char) codePoint };
- }
- char[] result = new char[2];
- toSurrogates(codePoint, result, 0);
- return result;
}
static void toSurrogates(int codePoint, char[] dst, int index) {
@@ -6259,8 +6277,7 @@
*/
static char[] toUpperCaseCharArray(int codePoint) {
// As of Unicode 4.0, 1:M uppercasings only happen in the BMP.
- assert isValidCodePoint(codePoint) &&
- !isSupplementaryCodePoint(codePoint);
+ assert isBmpCodePoint(codePoint);
return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
}
--- a/jdk/src/share/classes/java/lang/String.java Wed Jun 30 16:11:31 2010 -0700
+++ b/jdk/src/share/classes/java/lang/String.java Wed Jun 30 16:11:32 2010 -0700
@@ -99,6 +99,8 @@
*
* @author Lee Boynton
* @author Arthur van Hoff
+ * @author Martin Buchholz
+ * @author Ulf Zibis
* @see java.lang.Object#toString()
* @see java.lang.StringBuffer
* @see java.lang.StringBuilder
@@ -273,32 +275,32 @@
throw new StringIndexOutOfBoundsException(offset + count);
}
+ final int end = offset + count;
+
// Pass 1: Compute precise size of char[]
- int n = 0;
- for (int i = offset; i < offset + count; i++) {
+ int n = count;
+ for (int i = offset; i < end; i++) {
int c = codePoints[i];
- if (c >= Character.MIN_CODE_POINT &&
- c < Character.MIN_SUPPLEMENTARY_CODE_POINT)
- n += 1;
- else if (Character.isSupplementaryCodePoint(c))
- n += 2;
+ if (Character.isBmpCodePoint(c))
+ continue;
+ else if (Character.isValidCodePoint(c))
+ n++;
else throw new IllegalArgumentException(Integer.toString(c));
}
// Pass 2: Allocate and fill in char[]
- char[] v = new char[n];
- for (int i = offset, j = 0; i < offset + count; i++) {
+ final char[] v = new char[n];
+
+ for (int i = offset, j = 0; i < end; i++, j++) {
int c = codePoints[i];
- if (c < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
- v[j++] = (char) c;
- } else {
- Character.toSurrogates(c, v, j);
- j += 2;
- }
+ if (Character.isBmpCodePoint(c))
+ v[j] = (char) c;
+ else
+ Character.toSurrogates(c, v, j++);
}
this.value = v;
- this.count = v.length;
+ this.count = n;
this.offset = 0;
}
--- a/jdk/src/share/classes/sun/io/CharToByteDBCS_ASCII.java Wed Jun 30 16:11:31 2010 -0700
+++ b/jdk/src/share/classes/sun/io/CharToByteDBCS_ASCII.java Wed Jun 30 16:11:32 2010 -0700
@@ -24,7 +24,6 @@
*/
package sun.io;
-import sun.nio.cs.Surrogate;
import sun.nio.cs.ext.DoubleByte;
import static sun.nio.cs.CharsetMapping.*;
--- a/jdk/src/share/classes/sun/io/CharToByteDBCS_EBCDIC.java Wed Jun 30 16:11:31 2010 -0700
+++ b/jdk/src/share/classes/sun/io/CharToByteDBCS_EBCDIC.java Wed Jun 30 16:11:32 2010 -0700
@@ -24,7 +24,6 @@
*/
package sun.io;
-import sun.nio.cs.Surrogate;
import sun.nio.cs.ext.DoubleByte;
import static sun.nio.cs.CharsetMapping.*;
--- a/jdk/src/share/classes/sun/nio/cs/Surrogate.java Wed Jun 30 16:11:31 2010 -0700
+++ b/jdk/src/share/classes/sun/nio/cs/Surrogate.java Wed Jun 30 16:11:32 2010 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2001, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -34,8 +34,9 @@
* Utility class for dealing with surrogates.
*
* @author Mark Reinhold
+ * @author Martin Buchholz
+ * @author Ulf Zibis
*/
-
public class Surrogate {
private Surrogate() { }
@@ -75,16 +76,9 @@
}
/**
- * Tells whether or not the given UCS-4 character is in the Basic
- * Multilingual Plane, and can be represented using a single char.
- */
- public static boolean isBMPCodePoint(int uc) {
- return uc >> 16 == 0;
- }
-
- /**
* Tells whether or not the given UCS-4 character must be represented as a
* surrogate pair in UTF-16.
+ * Use of {@link Character#isSupplementaryCodePoint} is generally preferred.
*/
public static boolean neededFor(int uc) {
return Character.isSupplementaryCodePoint(uc);
@@ -110,6 +104,7 @@
/**
* Converts the given surrogate pair into a 32-bit UCS-4 character.
+ * Use of {@link Character#toCodePoint} is generally preferred.
*/
public static int toUCS4(char c, char d) {
assert Character.isHighSurrogate(c) && Character.isLowSurrogate(d);
@@ -290,8 +285,9 @@
* error() will return a descriptive result object
*/
public int generate(int uc, int len, CharBuffer dst) {
- if (Surrogate.isBMPCodePoint(uc)) {
- if (Surrogate.is(uc)) {
+ if (Character.isBmpCodePoint(uc)) {
+ char c = (char) uc;
+ if (Character.isSurrogate(c)) {
error = CoderResult.malformedForLength(len);
return -1;
}
@@ -299,10 +295,10 @@
error = CoderResult.OVERFLOW;
return -1;
}
- dst.put((char)uc);
+ dst.put(c);
error = null;
return 1;
- } else if (Character.isSupplementaryCodePoint(uc)) {
+ } else if (Character.isValidCodePoint(uc)) {
if (dst.remaining() < 2) {
error = CoderResult.OVERFLOW;
return -1;
@@ -334,8 +330,9 @@
* error() will return a descriptive result object
*/
public int generate(int uc, int len, char[] da, int dp, int dl) {
- if (Surrogate.isBMPCodePoint(uc)) {
- if (Surrogate.is(uc)) {
+ if (Character.isBmpCodePoint(uc)) {
+ char c = (char) uc;
+ if (Character.isSurrogate(c)) {
error = CoderResult.malformedForLength(len);
return -1;
}
@@ -343,10 +340,10 @@
error = CoderResult.OVERFLOW;
return -1;
}
- da[dp] = (char)uc;
+ da[dp] = c;
error = null;
return 1;
- } else if (Character.isSupplementaryCodePoint(uc)) {
+ } else if (Character.isValidCodePoint(uc)) {
if (dl - dp < 2) {
error = CoderResult.OVERFLOW;
return -1;
--- a/jdk/src/share/classes/sun/nio/cs/UTF_32Coder.java Wed Jun 30 16:11:31 2010 -0700
+++ b/jdk/src/share/classes/sun/nio/cs/UTF_32Coder.java Wed Jun 30 16:11:32 2010 -0700
@@ -86,22 +86,21 @@
src.position(mark);
}
}
- while (src.remaining() > 3) {
+ while (src.remaining() >= 4) {
cp = getCP(src);
- if (cp < 0 || cp > Surrogate.UCS4_MAX) {
- return CoderResult.malformedForLength(4);
- }
- if (cp < Surrogate.UCS4_MIN) {
+ if (Character.isBmpCodePoint(cp)) {
if (!dst.hasRemaining())
return CoderResult.OVERFLOW;
mark += 4;
- dst.put((char)cp);
- } else {
+ dst.put((char) cp);
+ } else if (Character.isValidCodePoint(cp)) {
if (dst.remaining() < 2)
return CoderResult.OVERFLOW;
mark += 4;
dst.put(Surrogate.high(cp));
dst.put(Surrogate.low(cp));
+ } else {
+ return CoderResult.malformedForLength(4);
}
}
return CoderResult.UNDERFLOW;
@@ -154,7 +153,12 @@
try {
while (src.hasRemaining()) {
char c = src.get();
- if (Character.isHighSurrogate(c)) {
+ if (!Character.isSurrogate(c)) {
+ if (dst.remaining() < 4)
+ return CoderResult.OVERFLOW;
+ mark++;
+ put(c, dst);
+ } else if (Character.isHighSurrogate(c)) {
if (!src.hasRemaining())
return CoderResult.UNDERFLOW;
char low = src.get();
@@ -162,17 +166,13 @@
if (dst.remaining() < 4)
return CoderResult.OVERFLOW;
mark += 2;
- put(Surrogate.toUCS4(c, low), dst);
+ put(Character.toCodePoint(c, low), dst);
} else {
return CoderResult.malformedForLength(1);
}
- } else if (Character.isLowSurrogate(c)) {
+ } else {
+ // assert Character.isLowSurrogate(c);
return CoderResult.malformedForLength(1);
- } else {
- if (dst.remaining() < 4)
- return CoderResult.OVERFLOW;
- mark++;
- put(c, dst);
}
}
return CoderResult.UNDERFLOW;
--- a/jdk/src/share/classes/sun/nio/cs/UTF_8.java Wed Jun 30 16:11:31 2010 -0700
+++ b/jdk/src/share/classes/sun/nio/cs/UTF_8.java Wed Jun 30 16:11:32 2010 -0700
@@ -102,7 +102,7 @@
// [F1..F3] [80..BF] [80..BF] [80..BF]
// [F4] [80..8F] [80..BF] [80..BF]
// only check 80-be range here, the [0xf0,0x80...] and [0xf4,0x90-...]
- // will be checked by Surrogate.neededFor(uc)
+ // will be checked by Character.isSupplementaryCodePoint(uc)
private static boolean isMalformed4(int b2, int b3, int b4) {
return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||
(b4 & 0xc0) != 0x80;
@@ -248,7 +248,8 @@
((b3 & 0x3f) << 06) |
(b4 & 0x3f);
if (isMalformed4(b2, b3, b4) ||
- !Surrogate.neededFor(uc)) {
+ // shortest form check
+ !Character.isSupplementaryCodePoint(uc)) {
return malformed(src, sp, dst, dp, 4);
}
da[dp++] = Surrogate.high(uc);
@@ -304,7 +305,8 @@
((b3 & 0x3f) << 06) |
(b4 & 0x3f);
if (isMalformed4(b2, b3, b4) ||
- !Surrogate.neededFor(uc)) { // shortest form check
+ // shortest form check
+ !Character.isSupplementaryCodePoint(uc)) {
return malformed(src, mark, 4);
}
dst.put(Surrogate.high(uc));
--- a/jdk/src/share/classes/sun/nio/cs/ext/EUC_TW.java Wed Jun 30 16:11:31 2010 -0700
+++ b/jdk/src/share/classes/sun/nio/cs/ext/EUC_TW.java Wed Jun 30 16:11:32 2010 -0700
@@ -441,7 +441,7 @@
}
static int encode(char hi, char low, byte[] bb) {
- int c = Surrogate.toUCS4(hi, low);
+ int c = Character.toCodePoint(hi, low);
if ((c & 0xf0000) != 0x20000)
return -1;
c -= 0x20000;
--- a/jdk/src/share/classes/sun/nio/cs/ext/GB18030.java Wed Jun 30 16:11:31 2010 -0700
+++ b/jdk/src/share/classes/sun/nio/cs/ext/GB18030.java Wed Jun 30 16:11:32 2010 -0700
@@ -12628,7 +12628,7 @@
if (Character.isSurrogate(c)) {
if ((condensedKey=sgp.parse(c, sa, sp, sl)) < 0)
return sgp.error();
- // Surogate.toUCS4 looks like
+ // Character.toCodePoint looks like
// (((high & 0x3ff) << 10) | (low & 0x3ff)) + 0x10000;
// so we add (0x2e248 - 0x10000) to get the "key".
condensedKey += 0x1E248;
--- a/jdk/src/share/classes/sun/nio/cs/ext/IBM33722.java Wed Jun 30 16:11:31 2010 -0700
+++ b/jdk/src/share/classes/sun/nio/cs/ext/IBM33722.java Wed Jun 30 16:11:32 2010 -0700
@@ -36,7 +36,6 @@
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import sun.nio.cs.HistoricallyNamedCharset;
-import sun.nio.cs.Surrogate;
public class IBM33722
extends Charset
--- a/jdk/src/share/classes/sun/nio/cs/ext/IBM964.java Wed Jun 30 16:11:31 2010 -0700
+++ b/jdk/src/share/classes/sun/nio/cs/ext/IBM964.java Wed Jun 30 16:11:32 2010 -0700
@@ -36,7 +36,6 @@
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import sun.nio.cs.HistoricallyNamedCharset;
-import sun.nio.cs.Surrogate;
public class IBM964
extends Charset
--- a/jdk/test/java/nio/charset/coders/BashStreams.java Wed Jun 30 16:11:31 2010 -0700
+++ b/jdk/test/java/nio/charset/coders/BashStreams.java Wed Jun 30 16:11:32 2010 -0700
@@ -46,7 +46,7 @@
CharacterGenerator(long seed, String csn, int limit) {
rand = new Random(seed);
- this.max = Surrogate.UCS4_MAX + 1;
+ this.max = Character.MAX_CODE_POINT + 1;
this.limit = limit;
}
@@ -77,17 +77,20 @@
int c;
for (;;) {
c = rand.nextInt(max);
- if (Surrogate.is(c) || (c == 0xfffe) || (c == 0xffff))
+ if ((Character.isBmpCodePoint(c)
+ && (Character.isSurrogate((char) c)
+ || (c == 0xfffe) || (c == 0xffff))))
continue;
- if (Surrogate.neededFor(c) && (count == limit - 1))
+ if (Character.isSupplementaryCodePoint(c)
+ && (count == limit - 1))
continue;
break;
}
count++;
- if (Surrogate.neededFor(c)) {
+ if (Character.isSupplementaryCodePoint(c)) {
count++;
- push(Surrogate.low(c));
- return Surrogate.high(c);
+ push(sun.nio.cs.Surrogate.low(c));
+ return sun.nio.cs.Surrogate.high(c);
}
return (char)c;
}
@@ -137,7 +140,7 @@
char d = cg.next();
if (c != d) {
if (c == '?') {
- if (Surrogate.isHigh(d))
+ if (Character.isHighSurrogate(d))
cg.next();
continue;
}
@@ -187,7 +190,7 @@
w.write(ca, 0, n);
count += n;
}
- if (Surrogate.isHigh(ca[n - 1]))
+ if (Character.isHighSurrogate(ca[n - 1]))
w.write(cg.next());
w.close();
}
@@ -253,7 +256,8 @@
if (!cg.hasNext())
break;
char c = cg.next();
- if (Surrogate.isHigh(c) && (cb.remaining() == 1)) {
+ if (Character.isHighSurrogate(c)
+ && cb.remaining() == 1) {
cg.push(c);
break;
}
@@ -311,7 +315,7 @@
mismatchedEOF(csn, count + i, cg.count());
char d = cg.next();
if (c == '?') {
- if (Surrogate.isHigh(d)) {
+ if (Character.isHighSurrogate(d)) {
cg.next();
continue;
}
--- a/jdk/test/java/nio/charset/coders/Surrogate.java Wed Jun 30 16:11:31 2010 -0700
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-public class Surrogate {
-
- public static final int UCS4_SURROGATE_MIN = 0x10000;
- public static final int UCS4_MAX = (1 << 20) + UCS4_SURROGATE_MIN - 1;
-
- // UTF-16 surrogate-character ranges
- //
- public static final char MIN_HIGH = '\uD800';
- public static final char MAX_HIGH = '\uDBFF';
- public static final char MIN_LOW = '\uDC00';
- public static final char MAX_LOW = '\uDFFF';
- public static final char MIN = MIN_HIGH;
- public static final char MAX = MAX_LOW;
-
- public static boolean neededFor(int uc) {
- return (uc >= UCS4_SURROGATE_MIN) && (uc <= UCS4_MAX);
- }
-
- public static boolean isHigh(int c) {
- return (MIN_HIGH <= c) && (c <= MAX_HIGH);
- }
-
- static char high(int uc) {
- return (char)(0xd800 | (((uc - UCS4_SURROGATE_MIN) >> 10) & 0x3ff));
- }
-
- public static boolean isLow(int c) {
- return (MIN_LOW <= c) && (c <= MAX_LOW);
- }
-
- static char low(int uc) {
- return (char)(0xdc00 | ((uc - UCS4_SURROGATE_MIN) & 0x3ff));
- }
-
- public static boolean is(int c) {
- return (MIN <= c) && (c <= MAX);
- }
-
- static int toUCS4(char c, char d) {
- return (((c & 0x3ff) << 10) | (d & 0x3ff)) + 0x10000;
- }
-
-}
--- a/jdk/test/java/nio/charset/coders/Surrogates.java Wed Jun 30 16:11:31 2010 -0700
+++ b/jdk/test/java/nio/charset/coders/Surrogates.java Wed Jun 30 16:11:32 2010 -0700
@@ -42,9 +42,8 @@
static void initData() throws IOException {
StringBuffer sb = new StringBuffer();
for (int i = 0; i < LEN; i++) {
- int c = Surrogate.UCS4_SURROGATE_MIN + 1;
- sb.append(Surrogate.high(c));
- sb.append(Surrogate.low(c));
+ int c = Character.MIN_SUPPLEMENTARY_CODE_POINT + 1;
+ sb.append(Character.toChars(c));
}
input = sb.toString().toCharArray();
ByteArrayOutputStream bos = new ByteArrayOutputStream();