--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/share/classes/java/text/Collator.java Sat Dec 01 00:00:00 2007 +0000
@@ -0,0 +1,553 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation. Sun designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Sun in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/*
+ * (C) Copyright Taligent, Inc. 1996-1998 - All Rights Reserved
+ * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
+ *
+ * The original version of this source code and documentation is copyrighted
+ * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
+ * materials are provided under terms of a License Agreement between Taligent
+ * and Sun. This technology is protected by multiple US and International
+ * patents. This notice and attribution to Taligent may not be removed.
+ * Taligent is a registered trademark of Taligent, Inc.
+ *
+ */
+
+package java.text;
+
+import java.text.spi.CollatorProvider;
+import java.util.Locale;
+import java.util.MissingResourceException;
+import java.util.ResourceBundle;
+import java.util.spi.LocaleServiceProvider;
+import sun.misc.SoftCache;
+import sun.util.resources.LocaleData;
+import sun.util.LocaleServiceProviderPool;
+
+
+/**
+ * The <code>Collator</code> class performs locale-sensitive
+ * <code>String</code> comparison. You use this class to build
+ * searching and sorting routines for natural language text.
+ *
+ * <p>
+ * <code>Collator</code> is an abstract base class. Subclasses
+ * implement specific collation strategies. One subclass,
+ * <code>RuleBasedCollator</code>, is currently provided with
+ * the Java Platform and is applicable to a wide set of languages. Other
+ * subclasses may be created to handle more specialized needs.
+ *
+ * <p>
+ * Like other locale-sensitive classes, you can use the static
+ * factory method, <code>getInstance</code>, to obtain the appropriate
+ * <code>Collator</code> object for a given locale. You will only need
+ * to look at the subclasses of <code>Collator</code> if you need
+ * to understand the details of a particular collation strategy or
+ * if you need to modify that strategy.
+ *
+ * <p>
+ * The following example shows how to compare two strings using
+ * the <code>Collator</code> for the default locale.
+ * <blockquote>
+ * <pre>
+ * // Compare two strings in the default locale
+ * Collator myCollator = Collator.getInstance();
+ * if( myCollator.compare("abc", "ABC") < 0 )
+ * System.out.println("abc is less than ABC");
+ * else
+ * System.out.println("abc is greater than or equal to ABC");
+ * </pre>
+ * </blockquote>
+ *
+ * <p>
+ * You can set a <code>Collator</code>'s <em>strength</em> property
+ * to determine the level of difference considered significant in
+ * comparisons. Four strengths are provided: <code>PRIMARY</code>,
+ * <code>SECONDARY</code>, <code>TERTIARY</code>, and <code>IDENTICAL</code>.
+ * The exact assignment of strengths to language features is
+ * locale dependant. For example, in Czech, "e" and "f" are considered
+ * primary differences, while "e" and "ě" are secondary differences,
+ * "e" and "E" are tertiary differences and "e" and "e" are identical.
+ * The following shows how both case and accents could be ignored for
+ * US English.
+ * <blockquote>
+ * <pre>
+ * //Get the Collator for US English and set its strength to PRIMARY
+ * Collator usCollator = Collator.getInstance(Locale.US);
+ * usCollator.setStrength(Collator.PRIMARY);
+ * if( usCollator.compare("abc", "ABC") == 0 ) {
+ * System.out.println("Strings are equivalent");
+ * }
+ * </pre>
+ * </blockquote>
+ * <p>
+ * For comparing <code>String</code>s exactly once, the <code>compare</code>
+ * method provides the best performance. When sorting a list of
+ * <code>String</code>s however, it is generally necessary to compare each
+ * <code>String</code> multiple times. In this case, <code>CollationKey</code>s
+ * provide better performance. The <code>CollationKey</code> class converts
+ * a <code>String</code> to a series of bits that can be compared bitwise
+ * against other <code>CollationKey</code>s. A <code>CollationKey</code> is
+ * created by a <code>Collator</code> object for a given <code>String</code>.
+ * <br>
+ * <strong>Note:</strong> <code>CollationKey</code>s from different
+ * <code>Collator</code>s can not be compared. See the class description
+ * for {@link CollationKey}
+ * for an example using <code>CollationKey</code>s.
+ *
+ * @see RuleBasedCollator
+ * @see CollationKey
+ * @see CollationElementIterator
+ * @see Locale
+ * @author Helena Shih, Laura Werner, Richard Gillam
+ */
+
+public abstract class Collator
+ implements java.util.Comparator<Object>, Cloneable
+{
+ /**
+ * Collator strength value. When set, only PRIMARY differences are
+ * considered significant during comparison. The assignment of strengths
+ * to language features is locale dependant. A common example is for
+ * different base letters ("a" vs "b") to be considered a PRIMARY difference.
+ * @see java.text.Collator#setStrength
+ * @see java.text.Collator#getStrength
+ */
+ public final static int PRIMARY = 0;
+ /**
+ * Collator strength value. When set, only SECONDARY and above differences are
+ * considered significant during comparison. The assignment of strengths
+ * to language features is locale dependant. A common example is for
+ * different accented forms of the same base letter ("a" vs "\u00E4") to be
+ * considered a SECONDARY difference.
+ * @see java.text.Collator#setStrength
+ * @see java.text.Collator#getStrength
+ */
+ public final static int SECONDARY = 1;
+ /**
+ * Collator strength value. When set, only TERTIARY and above differences are
+ * considered significant during comparison. The assignment of strengths
+ * to language features is locale dependant. A common example is for
+ * case differences ("a" vs "A") to be considered a TERTIARY difference.
+ * @see java.text.Collator#setStrength
+ * @see java.text.Collator#getStrength
+ */
+ public final static int TERTIARY = 2;
+
+ /**
+ * Collator strength value. When set, all differences are
+ * considered significant during comparison. The assignment of strengths
+ * to language features is locale dependant. A common example is for control
+ * characters ("\u0001" vs "\u0002") to be considered equal at the
+ * PRIMARY, SECONDARY, and TERTIARY levels but different at the IDENTICAL
+ * level. Additionally, differences between pre-composed accents such as
+ * "\u00C0" (A-grave) and combining accents such as "A\u0300"
+ * (A, combining-grave) will be considered significant at the IDENTICAL
+ * level if decomposition is set to NO_DECOMPOSITION.
+ */
+ public final static int IDENTICAL = 3;
+
+ /**
+ * Decomposition mode value. With NO_DECOMPOSITION
+ * set, accented characters will not be decomposed for collation. This
+ * is the default setting and provides the fastest collation but
+ * will only produce correct results for languages that do not use accents.
+ * @see java.text.Collator#getDecomposition
+ * @see java.text.Collator#setDecomposition
+ */
+ public final static int NO_DECOMPOSITION = 0;
+
+ /**
+ * Decomposition mode value. With CANONICAL_DECOMPOSITION
+ * set, characters that are canonical variants according to Unicode
+ * standard will be decomposed for collation. This should be used to get
+ * correct collation of accented characters.
+ * <p>
+ * CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
+ * described in
+ * <a href="http://www.unicode.org/unicode/reports/tr15/tr15-23.html">Unicode
+ * Technical Report #15</a>.
+ * @see java.text.Collator#getDecomposition
+ * @see java.text.Collator#setDecomposition
+ */
+ public final static int CANONICAL_DECOMPOSITION = 1;
+
+ /**
+ * Decomposition mode value. With FULL_DECOMPOSITION
+ * set, both Unicode canonical variants and Unicode compatibility variants
+ * will be decomposed for collation. This causes not only accented
+ * characters to be collated, but also characters that have special formats
+ * to be collated with their norminal form. For example, the half-width and
+ * full-width ASCII and Katakana characters are then collated together.
+ * FULL_DECOMPOSITION is the most complete and therefore the slowest
+ * decomposition mode.
+ * <p>
+ * FULL_DECOMPOSITION corresponds to Normalization Form KD as
+ * described in
+ * <a href="http://www.unicode.org/unicode/reports/tr15/tr15-23.html">Unicode
+ * Technical Report #15</a>.
+ * @see java.text.Collator#getDecomposition
+ * @see java.text.Collator#setDecomposition
+ */
+ public final static int FULL_DECOMPOSITION = 2;
+
+ /**
+ * Gets the Collator for the current default locale.
+ * The default locale is determined by java.util.Locale.getDefault.
+ * @return the Collator for the default locale.(for example, en_US)
+ * @see java.util.Locale#getDefault
+ */
+ public static synchronized Collator getInstance() {
+ return getInstance(Locale.getDefault());
+ }
+
+ /**
+ * Gets the Collator for the desired locale.
+ * @param desiredLocale the desired locale.
+ * @return the Collator for the desired locale.
+ * @see java.util.Locale
+ * @see java.util.ResourceBundle
+ */
+ public static synchronized
+ Collator getInstance(Locale desiredLocale)
+ {
+ Collator result = (Collator) cache.get(desiredLocale);
+ if (result != null) {
+ return (Collator)result.clone(); // make the world safe
+ }
+
+ // Check whether a provider can provide an implementation that's closer
+ // to the requested locale than what the Java runtime itself can provide.
+ LocaleServiceProviderPool pool =
+ LocaleServiceProviderPool.getPool(CollatorProvider.class);
+ if (pool.hasProviders()) {
+ Collator providersInstance = pool.getLocalizedObject(
+ CollatorGetter.INSTANCE,
+ desiredLocale,
+ desiredLocale);
+ if (providersInstance != null) {
+ return providersInstance;
+ }
+ }
+
+ // Load the resource of the desired locale from resource
+ // manager.
+ String colString = "";
+ try {
+ ResourceBundle resource = LocaleData.getCollationData(desiredLocale);
+
+ colString = resource.getString("Rule");
+ } catch (MissingResourceException e) {
+ // Use default values
+ }
+ try
+ {
+ result = new RuleBasedCollator( CollationRules.DEFAULTRULES +
+ colString,
+ CANONICAL_DECOMPOSITION );
+ }
+ catch(ParseException foo)
+ {
+ // predefined tables should contain correct grammar
+ try {
+ result = new RuleBasedCollator( CollationRules.DEFAULTRULES );
+ } catch (ParseException bar) {
+ // do nothing
+ }
+ }
+ // Now that RuleBasedCollator adds expansions for pre-composed characters
+ // into their decomposed equivalents, the default collators don't need
+ // to have decomposition turned on. Laura, 5/5/98, bug 4114077
+ result.setDecomposition(NO_DECOMPOSITION);
+
+ cache.put(desiredLocale,result);
+ return (Collator)result.clone();
+ }
+
+ /**
+ * Compares the source string to the target string according to the
+ * collation rules for this Collator. Returns an integer less than,
+ * equal to or greater than zero depending on whether the source String is
+ * less than, equal to or greater than the target string. See the Collator
+ * class description for an example of use.
+ * <p>
+ * For a one time comparison, this method has the best performance. If a
+ * given String will be involved in multiple comparisons, CollationKey.compareTo
+ * has the best performance. See the Collator class description for an example
+ * using CollationKeys.
+ * @param source the source string.
+ * @param target the target string.
+ * @return Returns an integer value. Value is less than zero if source is less than
+ * target, value is zero if source and target are equal, value is greater than zero
+ * if source is greater than target.
+ * @see java.text.CollationKey
+ * @see java.text.Collator#getCollationKey
+ */
+ public abstract int compare(String source, String target);
+
+ /**
+ * Compares its two arguments for order. Returns a negative integer,
+ * zero, or a positive integer as the first argument is less than, equal
+ * to, or greater than the second.
+ * <p>
+ * This implementation merely returns
+ * <code> compare((String)o1, (String)o2) </code>.
+ *
+ * @return a negative integer, zero, or a positive integer as the
+ * first argument is less than, equal to, or greater than the
+ * second.
+ * @exception ClassCastException the arguments cannot be cast to Strings.
+ * @see java.util.Comparator
+ * @since 1.2
+ */
+ public int compare(Object o1, Object o2) {
+ return compare((String)o1, (String)o2);
+ }
+
+ /**
+ * Transforms the String into a series of bits that can be compared bitwise
+ * to other CollationKeys. CollationKeys provide better performance than
+ * Collator.compare when Strings are involved in multiple comparisons.
+ * See the Collator class description for an example using CollationKeys.
+ * @param source the string to be transformed into a collation key.
+ * @return the CollationKey for the given String based on this Collator's collation
+ * rules. If the source String is null, a null CollationKey is returned.
+ * @see java.text.CollationKey
+ * @see java.text.Collator#compare
+ */
+ public abstract CollationKey getCollationKey(String source);
+
+ /**
+ * Convenience method for comparing the equality of two strings based on
+ * this Collator's collation rules.
+ * @param source the source string to be compared with.
+ * @param target the target string to be compared with.
+ * @return true if the strings are equal according to the collation
+ * rules. false, otherwise.
+ * @see java.text.Collator#compare
+ */
+ public boolean equals(String source, String target)
+ {
+ return (compare(source, target) == Collator.EQUAL);
+ }
+
+ /**
+ * Returns this Collator's strength property. The strength property determines
+ * the minimum level of difference considered significant during comparison.
+ * See the Collator class description for an example of use.
+ * @return this Collator's current strength property.
+ * @see java.text.Collator#setStrength
+ * @see java.text.Collator#PRIMARY
+ * @see java.text.Collator#SECONDARY
+ * @see java.text.Collator#TERTIARY
+ * @see java.text.Collator#IDENTICAL
+ */
+ public synchronized int getStrength()
+ {
+ return strength;
+ }
+
+ /**
+ * Sets this Collator's strength property. The strength property determines
+ * the minimum level of difference considered significant during comparison.
+ * See the Collator class description for an example of use.
+ * @param newStrength the new strength value.
+ * @see java.text.Collator#getStrength
+ * @see java.text.Collator#PRIMARY
+ * @see java.text.Collator#SECONDARY
+ * @see java.text.Collator#TERTIARY
+ * @see java.text.Collator#IDENTICAL
+ * @exception IllegalArgumentException If the new strength value is not one of
+ * PRIMARY, SECONDARY, TERTIARY or IDENTICAL.
+ */
+ public synchronized void setStrength(int newStrength) {
+ if ((newStrength != PRIMARY) &&
+ (newStrength != SECONDARY) &&
+ (newStrength != TERTIARY) &&
+ (newStrength != IDENTICAL))
+ throw new IllegalArgumentException("Incorrect comparison level.");
+ strength = newStrength;
+ }
+
+ /**
+ * Get the decomposition mode of this Collator. Decomposition mode
+ * determines how Unicode composed characters are handled. Adjusting
+ * decomposition mode allows the user to select between faster and more
+ * complete collation behavior.
+ * <p>The three values for decomposition mode are:
+ * <UL>
+ * <LI>NO_DECOMPOSITION,
+ * <LI>CANONICAL_DECOMPOSITION
+ * <LI>FULL_DECOMPOSITION.
+ * </UL>
+ * See the documentation for these three constants for a description
+ * of their meaning.
+ * @return the decomposition mode
+ * @see java.text.Collator#setDecomposition
+ * @see java.text.Collator#NO_DECOMPOSITION
+ * @see java.text.Collator#CANONICAL_DECOMPOSITION
+ * @see java.text.Collator#FULL_DECOMPOSITION
+ */
+ public synchronized int getDecomposition()
+ {
+ return decmp;
+ }
+ /**
+ * Set the decomposition mode of this Collator. See getDecomposition
+ * for a description of decomposition mode.
+ * @param decompositionMode the new decomposition mode.
+ * @see java.text.Collator#getDecomposition
+ * @see java.text.Collator#NO_DECOMPOSITION
+ * @see java.text.Collator#CANONICAL_DECOMPOSITION
+ * @see java.text.Collator#FULL_DECOMPOSITION
+ * @exception IllegalArgumentException If the given value is not a valid decomposition
+ * mode.
+ */
+ public synchronized void setDecomposition(int decompositionMode) {
+ if ((decompositionMode != NO_DECOMPOSITION) &&
+ (decompositionMode != CANONICAL_DECOMPOSITION) &&
+ (decompositionMode != FULL_DECOMPOSITION))
+ throw new IllegalArgumentException("Wrong decomposition mode.");
+ decmp = decompositionMode;
+ }
+
+ /**
+ * Returns an array of all locales for which the
+ * <code>getInstance</code> methods of this class can return
+ * localized instances.
+ * The returned array represents the union of locales supported
+ * by the Java runtime and by installed
+ * {@link java.text.spi.CollatorProvider CollatorProvider} implementations.
+ * It must contain at least a Locale instance equal to
+ * {@link java.util.Locale#US Locale.US}.
+ *
+ * @return An array of locales for which localized
+ * <code>Collator</code> instances are available.
+ */
+ public static synchronized Locale[] getAvailableLocales() {
+ LocaleServiceProviderPool pool =
+ LocaleServiceProviderPool.getPool(CollatorProvider.class);
+ return pool.getAvailableLocales();
+ }
+
+ /**
+ * Overrides Cloneable
+ */
+ public Object clone()
+ {
+ try {
+ return (Collator)super.clone();
+ } catch (CloneNotSupportedException e) {
+ throw new InternalError();
+ }
+ }
+
+ /**
+ * Compares the equality of two Collators.
+ * @param that the Collator to be compared with this.
+ * @return true if this Collator is the same as that Collator;
+ * false otherwise.
+ */
+ public boolean equals(Object that)
+ {
+ if (this == that) return true;
+ if (that == null) return false;
+ if (getClass() != that.getClass()) return false;
+ Collator other = (Collator) that;
+ return ((strength == other.strength) &&
+ (decmp == other.decmp));
+ }
+
+ /**
+ * Generates the hash code for this Collator.
+ */
+ abstract public int hashCode();
+
+ /**
+ * Default constructor. This constructor is
+ * protected so subclasses can get access to it. Users typically create
+ * a Collator sub-class by calling the factory method getInstance.
+ * @see java.text.Collator#getInstance
+ */
+ protected Collator()
+ {
+ strength = TERTIARY;
+ decmp = CANONICAL_DECOMPOSITION;
+ }
+
+ private int strength = 0;
+ private int decmp = 0;
+ private static SoftCache cache = new SoftCache();
+
+ //
+ // FIXME: These three constants should be removed.
+ //
+ /**
+ * LESS is returned if source string is compared to be less than target
+ * string in the compare() method.
+ * @see java.text.Collator#compare
+ */
+ final static int LESS = -1;
+ /**
+ * EQUAL is returned if source string is compared to be equal to target
+ * string in the compare() method.
+ * @see java.text.Collator#compare
+ */
+ final static int EQUAL = 0;
+ /**
+ * GREATER is returned if source string is compared to be greater than
+ * target string in the compare() method.
+ * @see java.text.Collator#compare
+ */
+ final static int GREATER = 1;
+
+ /**
+ * Obtains a Collator instance from a CollatorProvider
+ * implementation.
+ */
+ private static class CollatorGetter
+ implements LocaleServiceProviderPool.LocalizedObjectGetter<CollatorProvider, Collator> {
+ private static final CollatorGetter INSTANCE = new CollatorGetter();
+
+ public Collator getObject(CollatorProvider collatorProvider,
+ Locale locale,
+ String key,
+ Object... params) {
+ assert params.length == 1;
+ Collator result = collatorProvider.getInstance(locale);
+ if (result != null) {
+ // put this Collator instance in the cache for two locales, one
+ // is for the desired locale, and the other is for the actual
+ // locale where the provider is found, which may be a fall back locale.
+ cache.put((Locale)params[0], result);
+ cache.put(locale, result);
+ return (Collator)result.clone();
+ }
+
+ return null;
+ }
+ }
+ }