jdk-sandbox: test/jdk/java/text/Collator/Regression.java@28ab01c06755


/*
 * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

/**
 * @test
 * @bug 4048446 4051866 4053636 4054238 4054734 4054736 4058613 4059820 4060154
 *      4062418 4065540 4066189 4066696 4076676 4078588 4079231 4081866 4087241
 *      4087243 4092260 4095316 4101940 4103436 4114076 4114077 4124632 4132736
 *      4133509 4139572 4141640 4179126 4179686 4244884 4663220
 * @library /java/text/testlib
 * @summary Regression tests for Collation and associated classes
 * @modules jdk.localedata
 */
/*
(C) Copyright Taligent, Inc. 1996 - All Rights Reserved
(C) Copyright IBM Corp. 1996 - All Rights Reserved

  The original version of this source code and documentation is copyrighted and
owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These materials are
provided under terms of a License Agreement between Taligent and Sun. This
technology is protected by multiple US and International patents. This notice and
attribution to Taligent may not be removed.
  Taligent is a registered trademark of Taligent, Inc.
*/

import java.text.*;
import java.util.Locale;
import java.util.Vector;


public class Regression extends CollatorTest {

    public static void main(String[] args) throws Exception {
        new Regression().run(args);
    }

    // CollationElementIterator.reset() doesn't work
    //
    public void Test4048446() {
        CollationElementIterator i1 = en_us.getCollationElementIterator(test1);
        CollationElementIterator i2 = en_us.getCollationElementIterator(test1);

        while ( i1.next() != CollationElementIterator.NULLORDER ) {
        }
        i1.reset();

        assertEqual(i1, i2);
    }


    // Collator -> rules -> Collator round-trip broken for expanding characters
    //
    public void Test4051866() throws ParseException {
        // Build a collator containing expanding characters
        RuleBasedCollator c1 = new RuleBasedCollator("< o "
                                                    +"& oe ,o\u3080"
                                                    +"& oe ,\u1530 ,O"
                                                    +"& OE ,O\u3080"
                                                    +"& OE ,\u1520"
                                                    +"< p ,P");

        // Build another using the rules from  the first
        RuleBasedCollator c2 = new RuleBasedCollator(c1.getRules());

        // Make sure they're the same
        if (!c1.getRules().equals(c2.getRules())) {
            errln("Rules are not equal");
        }
    }

    // Collator thinks "black-bird" == "black"
    //
    public void Test4053636() {
        if (en_us.equals("black-bird","black")) {
            errln("black-bird == black");
        }
    }


    // CollationElementIterator will not work correctly if the associated
    // Collator object's mode is changed
    //
    public void Test4054238() {
        RuleBasedCollator c = (RuleBasedCollator) en_us.clone();

        c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
        CollationElementIterator i1 = en_us.getCollationElementIterator(test3);

        c.setDecomposition(Collator.NO_DECOMPOSITION);
        CollationElementIterator i2 = en_us.getCollationElementIterator(test3);

        // At this point, BOTH iterators should use NO_DECOMPOSITION, since the
        // collator itself is in that mode
        assertEqual(i1, i2);
    }

    // Collator.IDENTICAL documented but not implemented
    //
    public void Test4054734() {
        RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
        try {
            c.setStrength(Collator.IDENTICAL);
        }
        catch (Exception e) {
            errln("Caught " + e.toString() + " setting Collator.IDENTICAL");
        }

        String[] decomp = {
            "\u0001",   "<",    "\u0002",
            "\u0001",   "=",    "\u0001",
            "A\u0001",  ">",    "~\u0002",      // Ensure A and ~ are not compared bitwise
            "\u00C0",   "=",    "A\u0300"       // Decomp should make these equal
        };
        c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
        compareArray(c, decomp);

        String[] nodecomp = {
            "\u00C0",   ">",    "A\u0300"       // A-grave vs. A combining-grave
        };
        c.setDecomposition(Collator.NO_DECOMPOSITION);
        compareArray(c, nodecomp);
    }

    // Full Decomposition mode not implemented
    //
    public void Test4054736() {
        RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
        c.setDecomposition(Collator.FULL_DECOMPOSITION);

        String[] tests = {
            "\uFB4f", "=", "\u05D0\u05DC",  // Alef-Lamed vs. Alef, Lamed
        };

        compareArray(c, tests);
    }

    // Collator.getInstance() causes an ArrayIndexOutofBoundsException for Korean
    //
    public void Test4058613() {
        // Creating a default collator doesn't work when Korean is the default
        // locale

        Locale oldDefault = Locale.getDefault();

        Locale.setDefault( Locale.KOREAN );
        try {
            Collator c = Collator.getInstance();

            // Since the fix to this bug was to turn of decomposition for Korean collators,
            // ensure that's what we got
            if (c.getDecomposition() != Collator.NO_DECOMPOSITION) {
              errln("Decomposition is not set to NO_DECOMPOSITION");
            }
        }
        finally {
            Locale.setDefault(oldDefault);
        }
    }

    // RuleBasedCollator.getRules does not return the exact pattern as input
    // for expanding character sequences
    //
    public void Test4059820() {
        RuleBasedCollator c = null;
        try {
            c = new RuleBasedCollator("< a < b , c/a < d < z");
        } catch (ParseException e) {
            errln("Exception building collator: " + e.toString());
            return;
        }
        if ( c.getRules().indexOf("c/a") == -1) {
            errln("returned rules do not contain 'c/a'");
        }
    }

    // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
    //
    public void Test4060154() {
        RuleBasedCollator c = null;
        try {
            c = new RuleBasedCollator("< g, G < h, H < i, I < j, J"
                                      + " & H < \u0131, \u0130, i, I" );
        } catch (ParseException e) {
            errln("Exception building collator: " + e.toString());
            return;
        }
        c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);

        String[] tertiary = {
            "A",        "<",    "B",
            "H",        "<",    "\u0131",
            "H",        "<",    "I",
            "\u0131",   "<",    "\u0130",
            "\u0130",   "<",    "i",
            "\u0130",   ">",    "H",
        };
        c.setStrength(Collator.TERTIARY);
        compareArray(c, tertiary);

        String[] secondary = {
            "H",        "<",    "I",
            "\u0131",   "=",    "\u0130",
        };
        c.setStrength(Collator.PRIMARY);
        compareArray(c, secondary);
    };

    // Secondary/Tertiary comparison incorrect in French Secondary
    //
    public void Test4062418() throws ParseException {
        RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE);
        c.setStrength(Collator.SECONDARY);

        String[] tests = {
                "p\u00eache",    "<",    "p\u00e9ch\u00e9",    // Comparing accents from end, p\u00e9ch\u00e9 is greater
        };

        compareArray(c, tests);
    }

    // Collator.compare() method broken if either string contains spaces
    //
    public void Test4065540() {
        if (en_us.compare("abcd e", "abcd f") == 0) {
            errln("'abcd e' == 'abcd f'");
        }
    }

    // Unicode characters need to be recursively decomposed to get the
    // correct result. For example,
    // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
    //
    public void Test4066189() {
        String test1 = "\u1EB1";
        String test2 = "a\u0306\u0300";

        RuleBasedCollator c1 = (RuleBasedCollator) en_us.clone();
        c1.setDecomposition(Collator.FULL_DECOMPOSITION);
        CollationElementIterator i1 = en_us.getCollationElementIterator(test1);

        RuleBasedCollator c2 = (RuleBasedCollator) en_us.clone();
        c2.setDecomposition(Collator.NO_DECOMPOSITION);
        CollationElementIterator i2 = en_us.getCollationElementIterator(test2);

        assertEqual(i1, i2);
    }

    // French secondary collation checking at the end of compare iteration fails
    //
    public void Test4066696() {
        RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE);
        c.setStrength(Collator.SECONDARY);

        String[] tests = {
            "\u00e0",   "<",     "\u01fa",       // a-grave <  A-ring-acute
        };

        compareArray(c, tests);
    }


    // Bad canonicalization of same-class combining characters
    //
    public void Test4076676() {
        // These combining characters are all in the same class, so they should not
        // be reordered, and they should compare as unequal.
        String s1 = "A\u0301\u0302\u0300";
        String s2 = "A\u0302\u0300\u0301";

        RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
        c.setStrength(Collator.TERTIARY);

        if (c.compare(s1,s2) == 0) {
            errln("Same-class combining chars were reordered");
        }
    }


    // RuleBasedCollator.equals(null) throws NullPointerException
    //
    public void Test4079231() {
        try {
            if (en_us.equals(null)) {
                errln("en_us.equals(null) returned true");
            }
        }
        catch (Exception e) {
            errln("en_us.equals(null) threw " + e.toString());
        }
    }

    // RuleBasedCollator breaks on "< a < bb" rule
    //
    public void Test4078588() throws ParseException {
        RuleBasedCollator rbc=new RuleBasedCollator("< a < bb");

        int result = rbc.compare("a","bb");

        if (result != -1) {
            errln("Compare(a,bb) returned " + result + "; expected -1");
        }
    }

    // Combining characters in different classes not reordered properly.
    //
    public void Test4081866() throws ParseException {
        // These combining characters are all in different classes,
        // so they should be reordered and the strings should compare as equal.
        String s1 = "A\u0300\u0316\u0327\u0315";
        String s2 = "A\u0327\u0316\u0315\u0300";

        RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
        c.setStrength(Collator.TERTIARY);

        // Now that the default collators are set to NO_DECOMPOSITION
        // (as a result of fixing bug 4114077), we must set it explicitly
        // when we're testing reordering behavior.  -- lwerner, 5/5/98
        c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);

        if (c.compare(s1,s2) != 0) {
            errln("Combining chars were not reordered");
        }
    }

    // string comparison errors in Scandinavian collators
    //
    public void Test4087241() {
        RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(
                                                        new Locale("da", "DK"));
        c.setStrength(Collator.SECONDARY);

        String[] tests = {
            "\u007a",   "<",    "\u00e6",       // z        < ae
            "a\u0308",  "<",    "a\u030a",      // a-unlaut < a-ring
            "Y",        "<",    "u\u0308",      // Y        < u-umlaut
        };

        compareArray(c, tests);
    }

    // CollationKey takes ignorable strings into account when it shouldn't
    //
    public void Test4087243() {
        RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
        c.setStrength(Collator.TERTIARY);

        String[] tests = {
            "123",      "=",    "123\u0001",        // 1 2 3  =  1 2 3 ctrl-A
        };

        compareArray(c, tests);
    }

    // Mu/micro conflict
    // Micro symbol and greek lowercase letter Mu should sort identically
    //
    public void Test4092260() {
        Collator c = Collator.getInstance(new Locale("el", ""));

        // will only be equal when FULL_DECOMPOSITION is used
        c.setDecomposition(Collator.FULL_DECOMPOSITION);

        String[] tests = {
            "\u00B5",      "=",    "\u03BC",
        };

        compareArray(c, tests);
    }

    void Test4095316() {
        Collator c = Collator.getInstance(new Locale("el", "GR"));
        c.setStrength(Collator.TERTIARY);
        // javadocs for RuleBasedCollator clearly specify that characters containing compatability
        // chars MUST use FULL_DECOMPOSITION to get accurate comparisons.
        c.setDecomposition(Collator.FULL_DECOMPOSITION);

        String[] tests = {
            "\u03D4",      "=",    "\u03AB",
        };

        compareArray(c, tests);
    }

    public void Test4101940() {
        try {
            RuleBasedCollator c = new RuleBasedCollator("< a < b");
            CollationElementIterator i = c.getCollationElementIterator("");
            i.reset();

            if (i.next() != i.NULLORDER) {
                errln("next did not return NULLORDER");
            }
        }
        catch (Exception e) {
            errln("Caught " + e );
        }
    }

    // Collator.compare not handling spaces properly
    //
    public void Test4103436() {
        RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
        c.setStrength(Collator.TERTIARY);

        String[] tests = {
            "file",      "<",    "file access",
            "file",      "<",    "fileaccess",
        };

        compareArray(c, tests);
    }

    // Collation not Unicode conformant with Hangul syllables
    //
    public void Test4114076() {
        RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
        c.setStrength(Collator.TERTIARY);

        //
        // With Canonical decomposition, Hangul syllables should get decomposed
        // into Jamo, but Jamo characters should not be decomposed into
        // conjoining Jamo
        //
        c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
        String[] test1 = {
            "\ud4db",   "=",    "\u1111\u1171\u11b6",
        };
        compareArray(c, test1);

        // Full decomposition result should be the same as canonical decomposition
        // for all hangul.
        c.setDecomposition(Collator.FULL_DECOMPOSITION);
        compareArray(c, test1);

    }


    // Collator.getCollationKey was hanging on certain character sequences
    //
    public void Test4124632() throws Exception {
        Collator coll = Collator.getInstance(Locale.JAPAN);

        try {
            coll.getCollationKey("A\u0308bc");
        } catch (OutOfMemoryError e) {
            errln("Ran out of memory -- probably an infinite loop");
        }
    }

    // sort order of french words with multiple accents has errors
    //
    public void Test4132736() {
        Collator c = Collator.getInstance(Locale.FRANCE);

        String[] test1 = {
            "e\u0300e\u0301",   "<",    "e\u0301e\u0300",
            "e\u0300\u0301",    ">",    "e\u0301\u0300",
        };
        compareArray(c, test1);
    }

    // The sorting using java.text.CollationKey is not in the exact order
    //
    public void Test4133509() {
        String[] test1 = {
            "Exception",    "<",    "ExceptionInInitializerError",
            "Graphics",     "<",    "GraphicsEnvironment",
            "String",       "<",    "StringBuffer",
        };
        compareArray(en_us, test1);
    }

    // Collation with decomposition off doesn't work for Europe
    //
    public void Test4114077() {
        // Ensure that we get the same results with decomposition off
        // as we do with it on....

        RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
        c.setStrength(Collator.TERTIARY);

        String[] test1 = {
            "\u00C0",        "=", "A\u0300",        // Should be equivalent
            "p\u00eache",         ">", "p\u00e9ch\u00e9",
            "\u0204",        "=", "E\u030F",
            "\u01fa",        "=", "A\u030a\u0301",  // a-ring-acute -> a-ring, acute
                                                    //   -> a, ring, acute
            "A\u0300\u0316", "<", "A\u0316\u0300",  // No reordering --> unequal
        };
        c.setDecomposition(Collator.NO_DECOMPOSITION);
        compareArray(c, test1);

        String[] test2 = {
            "A\u0300\u0316", "=", "A\u0316\u0300",      // Reordering --> equal
        };
        c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
        compareArray(c, test2);
    }

    // Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
    //
    public void Test4141640() {
        //
        // Rather than just creating a Swedish collator, we might as well
        // try to instantiate one for every locale available on the system
        // in order to prevent this sort of bug from cropping up in the future
        //
        Locale[] locales = Collator.getAvailableLocales();

        for (int i = 0; i < locales.length; i++) {
            try {
                Collator c = Collator.getInstance(locales[i]);
            } catch (Exception e) {
                errln("Caught " + e + " creating collator for " + locales[i]);
            }
        }
    }

    // getCollationKey throws exception for spanish text
    // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
    //
    public void Test4139572() {
        //
        // Code pasted straight from the bug report
        //
        // create spanish locale and collator
        Locale l = new Locale("es", "es");
        Collator col = Collator.getInstance(l);

        // this spanish phrase kills it!
        col.getCollationKey("Nombre De Objeto");
    }

    // RuleBasedCollator doesn't use getCollationElementIterator internally
    //
    public void Test4146160() throws ParseException {
        //
        // Use a custom collator class whose getCollationElementIterator
        // methods increment a count....
        //
        My4146160Collator.count = 0;
        new My4146160Collator().getCollationKey("1");
        if (My4146160Collator.count < 1) {
            errln("getCollationElementIterator not called");
        }

        My4146160Collator.count = 0;
        new My4146160Collator().compare("1", "2");
        if (My4146160Collator.count < 1) {
            errln("getCollationElementIterator not called");
        }
    }

    static class My4146160Collator extends RuleBasedCollator {
        public My4146160Collator() throws ParseException {
            super(Regression.en_us.getRules());
        }

        public CollationElementIterator getCollationElementIterator(
                                            String text) {
            count++;
            return super.getCollationElementIterator(text);
        }
        public CollationElementIterator getCollationElementIterator(
                                            CharacterIterator text) {
            count++;
            return super.getCollationElementIterator(text);
        }

        public static int count = 0;
    };

    // CollationElementIterator.previous broken for expanding char sequences
    //
    public void Test4179686() throws ParseException {

        // Create a collator with a few expanding character sequences in it....
        RuleBasedCollator coll = new RuleBasedCollator(en_us.getRules()
                                                    + " & ae ; \u00e4 & AE ; \u00c4"
                                                    + " & oe ; \u00f6 & OE ; \u00d6"
                                                    + " & ue ; \u00fc & UE ; \u00dc");

        String text = "T\u00f6ne"; // o-umlaut

        CollationElementIterator iter = coll.getCollationElementIterator(text);
        Vector elements = new Vector();
        int elem;

        // Iterate forward and collect all of the elements into a Vector
        while ((elem = iter.next()) != iter.NULLORDER) {
            elements.addElement(new Integer(elem));
        }

        // Now iterate backward and make sure they're the same
        int index = elements.size() - 1;
        while ((elem = iter.previous()) != iter.NULLORDER) {
            int expect = ((Integer)elements.elementAt(index)).intValue();

            if (elem != expect) {
                errln("Mismatch at index " + index
                      + ": got " + Integer.toString(elem,16)
                      + ", expected " + Integer.toString(expect,16));
            }
            index--;
        }
    }

    public void Test4244884() throws ParseException {
        RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US);
        coll = new RuleBasedCollator(coll.getRules()
                + " & C < ch , cH , Ch , CH < cat < crunchy");

        String[] testStrings = new String[] {
            "car",
            "cave",
            "clamp",
            "cramp",
            "czar",
            "church",
            "catalogue",
            "crunchy",
            "dog"
        };

        for (int i = 1; i < testStrings.length; i++) {
            if (coll.compare(testStrings[i - 1], testStrings[i]) >= 0) {
                errln("error: \"" + testStrings[i - 1]
                    + "\" is greater than or equal to \"" + testStrings[i]
                    + "\".");
            }
        }
    }

    public void Test4179216() throws ParseException {
        // you can position a CollationElementIterator in the middle of
        // a contracting character sequence, yielding a bogus collation
        // element
        RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US);
        coll = new RuleBasedCollator(coll.getRules()
                + " & C < ch , cH , Ch , CH < cat < crunchy");
        String testText = "church church catcatcher runcrunchynchy";
        CollationElementIterator iter = coll.getCollationElementIterator(
                testText);

        // test that the "ch" combination works properly
        iter.setOffset(4);
        int elt4 = CollationElementIterator.primaryOrder(iter.next());

        iter.reset();
        int elt0 = CollationElementIterator.primaryOrder(iter.next());

        iter.setOffset(5);
        int elt5 = CollationElementIterator.primaryOrder(iter.next());

        if (elt4 != elt0 || elt5 != elt0)
            errln("The collation elements at positions 0 (" + elt0 + "), 4 ("
                    + elt4 + "), and 5 (" + elt5 + ") don't match.");

        // test that the "cat" combination works properly
        iter.setOffset(14);
        int elt14 = CollationElementIterator.primaryOrder(iter.next());

        iter.setOffset(15);
        int elt15 = CollationElementIterator.primaryOrder(iter.next());

        iter.setOffset(16);
        int elt16 = CollationElementIterator.primaryOrder(iter.next());

        iter.setOffset(17);
        int elt17 = CollationElementIterator.primaryOrder(iter.next());

        iter.setOffset(18);
        int elt18 = CollationElementIterator.primaryOrder(iter.next());

        iter.setOffset(19);
        int elt19 = CollationElementIterator.primaryOrder(iter.next());

        if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17
                || elt14 != elt18 || elt14 != elt19)
            errln("\"cat\" elements don't match: elt14 = " + elt14 + ", elt15 = "
            + elt15 + ", elt16 = " + elt16 + ", elt17 = " + elt17
            + ", elt18 = " + elt18 + ", elt19 = " + elt19);

        // now generate a complete list of the collation elements,
        // first using next() and then using setOffset(), and
        // make sure both interfaces return the same set of elements
        iter.reset();

        int elt = iter.next();
        int count = 0;
        while (elt != CollationElementIterator.NULLORDER) {
            ++count;
            elt = iter.next();
        }

        String[] nextElements = new String[count];
        String[] setOffsetElements = new String[count];
        int lastPos = 0;

        iter.reset();
        elt = iter.next();
        count = 0;
        while (elt != CollationElementIterator.NULLORDER) {
            nextElements[count++] = testText.substring(lastPos, iter.getOffset());
            lastPos = iter.getOffset();
            elt = iter.next();
        }
        count = 0;
        for (int i = 0; i < testText.length(); ) {
            iter.setOffset(i);
            lastPos = iter.getOffset();
            elt = iter.next();
            setOffsetElements[count++] = testText.substring(lastPos, iter.getOffset());
            i = iter.getOffset();
        }
        for (int i = 0; i < nextElements.length; i++) {
            if (nextElements[i].equals(setOffsetElements[i])) {
                logln(nextElements[i]);
            } else {
                errln("Error: next() yielded " + nextElements[i] + ", but setOffset() yielded "
                    + setOffsetElements[i]);
            }
        }
    }

    public void Test4216006() throws Exception {
        // rule parser barfs on "<\u00e0=a\u0300", and on other cases
        // where the same token (after normalization) appears twice in a row
        boolean caughtException = false;
        try {
            RuleBasedCollator dummy = new RuleBasedCollator("\u00e0<a\u0300");
        }
        catch (ParseException e) {
            caughtException = true;
        }
        if (!caughtException) {
            throw new Exception("\"a<a\" collation sequence didn't cause parse error!");
        }

        RuleBasedCollator collator = new RuleBasedCollator("<\u00e0=a\u0300");
        collator.setDecomposition(Collator.FULL_DECOMPOSITION);
        collator.setStrength(Collator.IDENTICAL);

        String[] tests = {
            "a\u0300", "=", "\u00e0",
            "\u00e0",  "=", "a\u0300"
        };

        compareArray(collator, tests);
    }

    public void Test4171974() {
        // test French accent ordering more thoroughly
        String[] frenchList = {
            "\u0075\u0075",     // u u
            "\u00fc\u0075",     // u-umlaut u
            "\u01d6\u0075",     // u-umlaut-macron u
            "\u016b\u0075",     // u-macron u
            "\u1e7b\u0075",     // u-macron-umlaut u
            "\u0075\u00fc",     // u u-umlaut
            "\u00fc\u00fc",     // u-umlaut u-umlaut
            "\u01d6\u00fc",     // u-umlaut-macron u-umlaut
            "\u016b\u00fc",     // u-macron u-umlaut
            "\u1e7b\u00fc",     // u-macron-umlaut u-umlaut
            "\u0075\u01d6",     // u u-umlaut-macron
            "\u00fc\u01d6",     // u-umlaut u-umlaut-macron
            "\u01d6\u01d6",     // u-umlaut-macron u-umlaut-macron
            "\u016b\u01d6",     // u-macron u-umlaut-macron
            "\u1e7b\u01d6",     // u-macron-umlaut u-umlaut-macron
            "\u0075\u016b",     // u u-macron
            "\u00fc\u016b",     // u-umlaut u-macron
            "\u01d6\u016b",     // u-umlaut-macron u-macron
            "\u016b\u016b",     // u-macron u-macron
            "\u1e7b\u016b",     // u-macron-umlaut u-macron
            "\u0075\u1e7b",     // u u-macron-umlaut
            "\u00fc\u1e7b",     // u-umlaut u-macron-umlaut
            "\u01d6\u1e7b",     // u-umlaut-macron u-macron-umlaut
            "\u016b\u1e7b",     // u-macron u-macron-umlaut
            "\u1e7b\u1e7b"      // u-macron-umlaut u-macron-umlaut
        };
        Collator french = Collator.getInstance(Locale.FRENCH);

        logln("Testing French order...");
        checkListOrder(frenchList, french);

        logln("Testing French order without decomposition...");
        french.setDecomposition(Collator.NO_DECOMPOSITION);
        checkListOrder(frenchList, french);

        String[] englishList = {
            "\u0075\u0075",     // u u
            "\u0075\u00fc",     // u u-umlaut
            "\u0075\u01d6",     // u u-umlaut-macron
            "\u0075\u016b",     // u u-macron
            "\u0075\u1e7b",     // u u-macron-umlaut
            "\u00fc\u0075",     // u-umlaut u
            "\u00fc\u00fc",     // u-umlaut u-umlaut
            "\u00fc\u01d6",     // u-umlaut u-umlaut-macron
            "\u00fc\u016b",     // u-umlaut u-macron
            "\u00fc\u1e7b",     // u-umlaut u-macron-umlaut
            "\u01d6\u0075",     // u-umlaut-macron u
            "\u01d6\u00fc",     // u-umlaut-macron u-umlaut
            "\u01d6\u01d6",     // u-umlaut-macron u-umlaut-macron
            "\u01d6\u016b",     // u-umlaut-macron u-macron
            "\u01d6\u1e7b",     // u-umlaut-macron u-macron-umlaut
            "\u016b\u0075",     // u-macron u
            "\u016b\u00fc",     // u-macron u-umlaut
            "\u016b\u01d6",     // u-macron u-umlaut-macron
            "\u016b\u016b",     // u-macron u-macron
            "\u016b\u1e7b",     // u-macron u-macron-umlaut
            "\u1e7b\u0075",     // u-macron-umlaut u
            "\u1e7b\u00fc",     // u-macron-umlaut u-umlaut
            "\u1e7b\u01d6",     // u-macron-umlaut u-umlaut-macron
            "\u1e7b\u016b",     // u-macron-umlaut u-macron
            "\u1e7b\u1e7b"      // u-macron-umlaut u-macron-umlaut
        };
        Collator english = Collator.getInstance(Locale.ENGLISH);

        logln("Testing English order...");
        checkListOrder(englishList, english);

        logln("Testing English order without decomposition...");
        english.setDecomposition(Collator.NO_DECOMPOSITION);
        checkListOrder(englishList, english);
    }

    private void checkListOrder(String[] sortedList, Collator c) {
        // this function uses the specified Collator to make sure the
        // passed-in list is already sorted into ascending order
        for (int i = 0; i < sortedList.length - 1; i++) {
            if (c.compare(sortedList[i], sortedList[i + 1]) >= 0) {
                errln("List out of order at element #" + i + ": "
                        + prettify(sortedList[i]) + " >= "
                        + prettify(sortedList[i + 1]));
            }
        }
    }

    // CollationElementIterator set doesn't work propertly with next/prev
    public void Test4663220() {
        RuleBasedCollator collator = (RuleBasedCollator)Collator.getInstance(Locale.US);
        CharacterIterator stringIter = new StringCharacterIterator("fox");
        CollationElementIterator iter = collator.getCollationElementIterator(stringIter);

        int[] elements_next = new int[3];
        logln("calling next:");
        for (int i = 0; i < 3; ++i) {
            logln("[" + i + "] " + (elements_next[i] = iter.next()));
        }

        int[] elements_fwd = new int[3];
        logln("calling set/next:");
        for (int i = 0; i < 3; ++i) {
            iter.setOffset(i);
            logln("[" + i + "] " + (elements_fwd[i] = iter.next()));
        }

        for (int i = 0; i < 3; ++i) {
            if (elements_next[i] != elements_fwd[i]) {
                errln("mismatch at position " + i +
                      ": " + elements_next[i] +
                      " != " + elements_fwd[i]);
            }
        }
    }

    //------------------------------------------------------------------------
    // Internal utilities
    //
    private void compareArray(Collator c, String[] tests) {
        for (int i = 0; i < tests.length; i += 3) {

            int expect = 0;
            if (tests[i+1].equals("<")) {
                expect = -1;
            } else if (tests[i+1].equals(">")) {
                expect = 1;
            } else if (tests[i+1].equals("=")) {
                expect = 0;
            } else {
                expect = Integer.decode(tests[i+1]).intValue();
            }

            int result = c.compare(tests[i], tests[i+2]);
            if (sign(result) != sign(expect))
            {
                errln( i/3 + ": compare(" + prettify(tests[i])
                                    + " , " + prettify(tests[i+2])
                                    + ") got " + result + "; expected " + expect);
            }
            else
            {
                // Collator.compare worked OK; now try the collation keys
                CollationKey k1 = c.getCollationKey(tests[i]);
                CollationKey k2 = c.getCollationKey(tests[i+2]);

                result = k1.compareTo(k2);
                if (sign(result) != sign(expect)) {
                    errln( i/3 + ": key(" + prettify(tests[i])
                                        + ").compareTo(key(" + prettify(tests[i+2])
                                        + ")) got " + result + "; expected " + expect);

                    errln("  " + prettify(k1) + " vs. " + prettify(k2));
                }
            }
        }
    }

    private static final int sign(int i) {
        if (i < 0) return -1;
        if (i > 0) return 1;
        return 0;
    }


    static RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US);

    String test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
    String test2 = "Xf ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
    String test3 = "a\u00FCbeck Gr\u00F6\u00DFe L\u00FCbeck";
}
author	tschatzl
	Wed, 24 Jul 2019 11:49:39 +0200
changeset 57508	28ab01c06755
parent 47216	71c04702a3d5
permissions	-rw-r--r--