6941948: NumaricShaper.shape() doesn't work with NumericShaper.Range.EASTERN_ARABIC
authorokutsu
Wed, 14 Apr 2010 13:53:17 +0900
changeset 5281 fb279b232508
parent 5280 eb7aefd114b4
child 5282 0dd428e93294
6941948: NumaricShaper.shape() doesn't work with NumericShaper.Range.EASTERN_ARABIC Reviewed-by: peytoia
jdk/src/share/classes/java/awt/font/NumericShaper.java
jdk/test/java/awt/font/NumericShaper/EasternArabicTest.java
--- a/jdk/src/share/classes/java/awt/font/NumericShaper.java	Tue Apr 13 13:47:54 2010 +0900
+++ b/jdk/src/share/classes/java/awt/font/NumericShaper.java	Wed Apr 14 13:53:17 2010 +0900
@@ -129,6 +129,8 @@
      * @since 1.7
      */
     public static enum Range {
+        // The order of EUROPEAN to MOGOLIAN must be consistent
+        // with the bitmask-based constants.
         /**
          * The Latin (European) range with the Latin (ASCII) digits.
          */
@@ -210,6 +212,9 @@
          * The Mongolian range with the Mongolian digits.
          */
         MONGOLIAN       ('\u1810', '\u1800', '\u1900'),
+        // The order of EUROPEAN to MOGOLIAN must be consistent
+        // with the bitmask-based constants.
+
         /**
          * The N'Ko range with the N'Ko digits.
          */
@@ -259,17 +264,6 @@
          */
         CHAM            ('\uaa50', '\uaa00', '\uaa60');
 
-        private static final Range[] ranges = Range.class.getEnumConstants();
-        static {
-            // sort ranges[] by base for binary search
-            Arrays.sort(ranges,
-                        new Comparator<Range>() {
-                            public int compare(Range s1, Range s2) {
-                                return s1.base > s2.base ? 1 : s1.base == s2.base ? 0 : -1;
-                            }
-                        });
-        }
-
         private static int toRangeIndex(Range script) {
             int index = script.ordinal();
             return index < NUM_KEYS ? index : -1;
@@ -346,11 +340,20 @@
     /**
      * {@code Set<Range>} indicating which Unicode ranges to
      * shape. {@code null} for the bit mask-based API.
-     *
-     * @since 1.7
      */
     private transient Set<Range> rangeSet;
 
+    /**
+     * rangeSet.toArray() value. Sorted by Range.base when the number
+     * of elements is greater then BSEARCH_THRESHOLD.
+     */
+    private transient Range[] rangeArray;
+
+    /**
+     * If more than BSEARCH_THRESHOLD ranges are specified, binary search is used.
+     */
+    private static final int BSEARCH_THRESHOLD = 3;
+
     private static final long serialVersionUID = -8022764705923730308L;
 
     /** Identifies the Latin-1 (European) and extended range, and
@@ -513,25 +516,32 @@
     // cache for the NumericShaper.Range version
     private transient volatile Range currentRange = Range.EUROPEAN;
 
-    private Range rangeForCodePoint(int codepoint) {
-        Range range = currentRange;
-        if (range.inRange(codepoint)) {
-            return range;
+    private Range rangeForCodePoint(final int codepoint) {
+        if (currentRange.inRange(codepoint)) {
+            return currentRange;
         }
 
-        final Range[] ranges = Range.ranges;
-        int lo = 0;
-        int hi = ranges.length - 1;
-        while (lo <= hi) {
-            int mid = (lo + hi) / 2;
-            range = ranges[mid];
-            if (codepoint < range.start) {
-                hi = mid - 1;
-            } else if (codepoint >= range.end) {
-                lo = mid + 1;
-            } else {
-                currentRange = range;
-                return range;
+        final Range[] ranges = rangeArray;
+        if (ranges.length > BSEARCH_THRESHOLD) {
+            int lo = 0;
+            int hi = ranges.length - 1;
+            while (lo <= hi) {
+                int mid = (lo + hi) / 2;
+                Range range = ranges[mid];
+                if (codepoint < range.start) {
+                    hi = mid - 1;
+                } else if (codepoint >= range.end) {
+                    lo = mid + 1;
+                } else {
+                    currentRange = range;
+                    return range;
+                }
+            }
+        } else {
+            for (int i = 0; i < ranges.length; i++) {
+                if (ranges[i].inRange(codepoint)) {
+                    return ranges[i];
+                }
             }
         }
         return Range.EUROPEAN;
@@ -928,8 +938,25 @@
     }
 
     private NumericShaper(Range defaultContext, Set<Range> ranges) {
-        this.shapingRange = defaultContext;
-        this.rangeSet = EnumSet.copyOf(ranges); // throws NPE if ranges is null.
+        shapingRange = defaultContext;
+        rangeSet = EnumSet.copyOf(ranges); // throws NPE if ranges is null.
+
+        // Give precedance to EASTERN_ARABIC if both ARABIC and
+        // EASTERN_ARABIC are specified.
+        if (rangeSet.contains(Range.EASTERN_ARABIC)
+            && rangeSet.contains(Range.ARABIC)) {
+            rangeSet.remove(Range.ARABIC);
+        }
+        rangeArray = rangeSet.toArray(new Range[rangeSet.size()]);
+        if (rangeArray.length > BSEARCH_THRESHOLD) {
+            // sort rangeArray for binary search
+            Arrays.sort(rangeArray,
+                        new Comparator<Range>() {
+                            public int compare(Range s1, Range s2) {
+                                return s1.base > s2.base ? 1 : s1.base == s2.base ? 0 : -1;
+                            }
+                        });
+        }
     }
 
     /**
@@ -1152,31 +1179,25 @@
     }
 
     private void shapeContextually(char[] text, int start, int count, Range ctxKey) {
-        if (ctxKey == null) {
+        // if we don't support the specified context, then don't shape.
+        if (ctxKey == null || !rangeSet.contains(ctxKey)) {
             ctxKey = Range.EUROPEAN;
         }
 
         Range lastKey = ctxKey;
         int base = ctxKey.getDigitBase();
         char minDigit = (char)('0' + ctxKey.getNumericBase());
-        for (int i = start, end = start + count; i < end; ++i) {
+        final int end = start + count;
+        for (int i = start; i < end; ++i) {
             char c = text[i];
             if (c >= minDigit && c <= '9') {
                 text[i] = (char)(c + base);
                 continue;
             }
             if (isStrongDirectional(c)) {
-                Range newKey = rangeForCodePoint(c);
-                if (newKey != lastKey) {
-                    lastKey = newKey;
-                    ctxKey = newKey;
-                    if (rangeSet.contains(Range.EUROPEAN)
-                        && (ctxKey == Range.ARABIC || ctxKey == Range.EASTERN_ARABIC)) {
-                        ctxKey = Range.EASTERN_ARABIC;
-                    } else if (!rangeSet.contains(ctxKey)) {
-                        ctxKey = Range.EUROPEAN;
-                    }
-
+                ctxKey = rangeForCodePoint(c);
+                if (ctxKey != lastKey) {
+                    lastKey = ctxKey;
                     base = ctxKey.getDigitBase();
                     minDigit = (char)('0' + ctxKey.getNumericBase());
                 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/java/awt/font/NumericShaper/EasternArabicTest.java	Wed Apr 14 13:53:17 2010 +0900
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2010 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/*
+ * @test
+ * @bug 6941948
+ * @summary Make sure that EASTERN_ARABIC works with the enum interface.
+ */
+
+import java.awt.font.NumericShaper;
+import java.util.EnumSet;
+import static java.awt.font.NumericShaper.*;
+
+public class EasternArabicTest {
+    static NumericShaper ns_old, ns_new;
+    static boolean err = false;
+
+    static String[][] testData = {
+        // Arabic "October 10"
+        {"\u0623\u0643\u062a\u0648\u0628\u0631 10",
+         "\u0623\u0643\u062a\u0648\u0628\u0631 \u06f1\u06f0"}, // EASTERN_ARABIC digits
+
+        // Tamil "Year 2009"
+        {"\u0b86\u0ba3\u0bcd\u0b9f\u0bc1 2009",
+         "\u0b86\u0ba3\u0bcd\u0b9f\u0bc1 \u0be8\u0be6\u0be6\u0bef"},
+        // "\u0be800\u0bef is returned by pre-JDK7 because Tamil zero was not
+        //  included in Unicode 4.0.0.
+
+        // Ethiopic "Syllable<HA> 2009"
+        {"\u1200 2009",
+         "\u1200 \u136a00\u1371"},
+        // Ethiopic zero doesn't exist even in Unicode 5.1.0.
+    };
+
+    public static void main(String[] args) {
+        ns_old = getContextualShaper(TAMIL|ETHIOPIC|EASTERN_ARABIC|ARABIC|THAI|LAO,
+                                     EUROPEAN);
+        ns_new = getContextualShaper(EnumSet.of(Range.THAI,
+                                                Range.TAMIL,
+                                                Range.ETHIOPIC,
+                                                Range.EASTERN_ARABIC,
+                                                Range.ARABIC,
+                                                Range.LAO),
+                                     Range.EUROPEAN);
+
+
+        StringBuilder cData = new StringBuilder();
+        StringBuilder cExpected = new StringBuilder();
+        for (int i = 0; i < testData.length; i++) {
+            String data = testData[i][0];
+            String expected = testData[i][1];
+            test(data, expected);
+            cData.append(data).append(' ');
+            cExpected.append(expected).append(' ');
+        }
+        test(cData.toString(), cExpected.toString());
+
+        if (err) {
+            throw new RuntimeException("shape() returned unexpected value.");
+        }
+    }
+
+    private static void test(String data, String expected) {
+        char[] text = data.toCharArray();
+        ns_old.shape(text, 0, text.length);
+        String got = new String(text);
+
+        if (!expected.equals(got)) {
+            err = true;
+            System.err.println("Error with traditional range.");
+            System.err.println("  text = " + data);
+            System.err.println("  got = " + got);
+            System.err.println("  expected = " + expected);
+        } else {
+            System.err.println("OK with traditional range.");
+            System.err.println("  text = " + data);
+            System.err.println("  got = " + got);
+            System.err.println("  expected = " + expected);
+        }
+
+        text = data.toCharArray();
+        ns_new.shape(text, 0, text.length);
+        got = new String(text);
+
+        if (!expected.equals(got)) {
+            err = true;
+            System.err.println("Error with new Enum range.");
+            System.err.println("  text = " + data);
+            System.err.println("  got = " + got);
+            System.err.println("  expected = " + expected);
+        } else {
+            System.err.println("OK with new Enum range.");
+            System.err.println("  text = " + data);
+            System.err.println("  got = " + got);
+            System.err.println("  expected = " + expected);
+        }
+    }
+}