jdk/src/share/classes/java/text/RBCollationTables.java
changeset 2 90ce3da70b43
child 5506 202f599c92aa
equal deleted inserted replaced
0:fd16c54261b3 2:90ce3da70b43
       
     1 /*
       
     2  * Copyright 1999-2003 Sun Microsystems, Inc.  All Rights Reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Sun designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Sun in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
       
    22  * CA 95054 USA or visit www.sun.com if you need additional information or
       
    23  * have any questions.
       
    24  */
       
    25 
       
    26 /*
       
    27  * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
       
    28  * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
       
    29  *
       
    30  *   The original version of this source code and documentation is copyrighted
       
    31  * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
       
    32  * materials are provided under terms of a License Agreement between Taligent
       
    33  * and Sun. This technology is protected by multiple US and International
       
    34  * patents. This notice and attribution to Taligent may not be removed.
       
    35  *   Taligent is a registered trademark of Taligent, Inc.
       
    36  *
       
    37  */
       
    38 
       
    39 package java.text;
       
    40 
       
    41 import java.util.Vector;
       
    42 import sun.text.UCompactIntArray;
       
    43 import sun.text.IntHashtable;
       
    44 
       
    45 /**
       
    46  * This class contains the static state of a RuleBasedCollator: The various
       
    47  * tables that are used by the collation routines.  Several RuleBasedCollators
       
    48  * can share a single RBCollationTables object, easing memory requirements and
       
    49  * improving performance.
       
    50  */
       
    51 final class RBCollationTables {
       
    52     //===========================================================================================
       
    53     //  The following diagram shows the data structure of the RBCollationTables object.
       
    54     //  Suppose we have the rule, where 'o-umlaut' is the unicode char 0x00F6.
       
    55     //  "a, A < b, B < c, C, ch, cH, Ch, CH < d, D ... < o, O; 'o-umlaut'/E, 'O-umlaut'/E ...".
       
    56     //  What the rule says is, sorts 'ch'ligatures and 'c' only with tertiary difference and
       
    57     //  sorts 'o-umlaut' as if it's always expanded with 'e'.
       
    58     //
       
    59     // mapping table                     contracting list           expanding list
       
    60     // (contains all unicode char
       
    61     //  entries)                   ___    ____________       _________________________
       
    62     //  ________                +>|_*_|->|'c' |v('c') |  +>|v('o')|v('umlaut')|v('e')|
       
    63     // |_\u0001_|-> v('\u0001') | |_:_|  |------------|  | |-------------------------|
       
    64     // |_\u0002_|-> v('\u0002') | |_:_|  |'ch'|v('ch')|  | |             :           |
       
    65     // |____:___|               | |_:_|  |------------|  | |-------------------------|
       
    66     // |____:___|               |        |'cH'|v('cH')|  | |             :           |
       
    67     // |__'a'___|-> v('a')      |        |------------|  | |-------------------------|
       
    68     // |__'b'___|-> v('b')      |        |'Ch'|v('Ch')|  | |             :           |
       
    69     // |____:___|               |        |------------|  | |-------------------------|
       
    70     // |____:___|               |        |'CH'|v('CH')|  | |             :           |
       
    71     // |___'c'__|----------------         ------------   | |-------------------------|
       
    72     // |____:___|                                        | |             :           |
       
    73     // |o-umlaut|----------------------------------------  |_________________________|
       
    74     // |____:___|
       
    75     //
       
    76     // Noted by Helena Shih on 6/23/97
       
    77     //============================================================================================
       
    78 
       
    79     public RBCollationTables(String rules, int decmp) throws ParseException {
       
    80         this.rules = rules;
       
    81 
       
    82         RBTableBuilder builder = new RBTableBuilder(new BuildAPI());
       
    83         builder.build(rules, decmp); // this object is filled in through
       
    84                                             // the BuildAPI object
       
    85     }
       
    86 
       
    87     final class BuildAPI {
       
    88         /**
       
    89          * Private constructor.  Prevents anyone else besides RBTableBuilder
       
    90          * from gaining direct access to the internals of this class.
       
    91          */
       
    92         private BuildAPI() {
       
    93         }
       
    94 
       
    95         /**
       
    96          * This function is used by RBTableBuilder to fill in all the members of this
       
    97          * object.  (Effectively, the builder class functions as a "friend" of this
       
    98          * class, but to avoid changing too much of the logic, it carries around "shadow"
       
    99          * copies of all these variables until the end of the build process and then
       
   100          * copies them en masse into the actual tables object once all the construction
       
   101          * logic is complete.  This function does that "copying en masse".
       
   102          * @param f2ary The value for frenchSec (the French-secondary flag)
       
   103          * @param swap The value for SE Asian swapping rule
       
   104          * @param map The collator's character-mapping table (the value for mapping)
       
   105          * @param cTbl The collator's contracting-character table (the value for contractTable)
       
   106          * @param eTbl The collator's expanding-character table (the value for expandTable)
       
   107          * @param cFlgs The hash table of characters that participate in contracting-
       
   108          *              character sequences (the value for contractFlags)
       
   109          * @param mso The value for maxSecOrder
       
   110          * @param mto The value for maxTerOrder
       
   111          */
       
   112         void fillInTables(boolean f2ary,
       
   113                           boolean swap,
       
   114                           UCompactIntArray map,
       
   115                           Vector cTbl,
       
   116                           Vector eTbl,
       
   117                           IntHashtable cFlgs,
       
   118                           short mso,
       
   119                           short mto) {
       
   120             frenchSec = f2ary;
       
   121             seAsianSwapping = swap;
       
   122             mapping = map;
       
   123             contractTable = cTbl;
       
   124             expandTable = eTbl;
       
   125             contractFlags = cFlgs;
       
   126             maxSecOrder = mso;
       
   127             maxTerOrder = mto;
       
   128         }
       
   129     }
       
   130 
       
   131     /**
       
   132      * Gets the table-based rules for the collation object.
       
   133      * @return returns the collation rules that the table collation object
       
   134      * was created from.
       
   135      */
       
   136     public String getRules()
       
   137     {
       
   138         return rules;
       
   139     }
       
   140 
       
   141     public boolean isFrenchSec() {
       
   142         return frenchSec;
       
   143     }
       
   144 
       
   145     public boolean isSEAsianSwapping() {
       
   146         return seAsianSwapping;
       
   147     }
       
   148 
       
   149     // ==============================================================
       
   150     // internal (for use by CollationElementIterator)
       
   151     // ==============================================================
       
   152 
       
   153     /**
       
   154      *  Get the entry of hash table of the contracting string in the collation
       
   155      *  table.
       
   156      *  @param ch the starting character of the contracting string
       
   157      */
       
   158     Vector getContractValues(int ch)
       
   159     {
       
   160         int index = mapping.elementAt(ch);
       
   161         return getContractValuesImpl(index - CONTRACTCHARINDEX);
       
   162     }
       
   163 
       
   164     //get contract values from contractTable by index
       
   165     private Vector getContractValuesImpl(int index)
       
   166     {
       
   167         if (index >= 0)
       
   168         {
       
   169             return (Vector)contractTable.elementAt(index);
       
   170         }
       
   171         else // not found
       
   172         {
       
   173             return null;
       
   174         }
       
   175     }
       
   176 
       
   177     /**
       
   178      * Returns true if this character appears anywhere in a contracting
       
   179      * character sequence.  (Used by CollationElementIterator.setOffset().)
       
   180      */
       
   181     boolean usedInContractSeq(int c) {
       
   182         return contractFlags.get(c) == 1;
       
   183     }
       
   184 
       
   185     /**
       
   186       * Return the maximum length of any expansion sequences that end
       
   187       * with the specified comparison order.
       
   188       *
       
   189       * @param order a collation order returned by previous or next.
       
   190       * @return the maximum length of any expansion seuences ending
       
   191       *         with the specified order.
       
   192       *
       
   193       * @see CollationElementIterator#getMaxExpansion
       
   194       */
       
   195     int getMaxExpansion(int order)
       
   196     {
       
   197         int result = 1;
       
   198 
       
   199         if (expandTable != null) {
       
   200             // Right now this does a linear search through the entire
       
   201             // expandsion table.  If a collator had a large number of expansions,
       
   202             // this could cause a performance problem, but in practise that
       
   203             // rarely happens
       
   204             for (int i = 0; i < expandTable.size(); i++) {
       
   205                 int[] valueList = (int [])expandTable.elementAt(i);
       
   206                 int length = valueList.length;
       
   207 
       
   208                 if (length > result && valueList[length-1] == order) {
       
   209                     result = length;
       
   210                 }
       
   211             }
       
   212         }
       
   213 
       
   214         return result;
       
   215     }
       
   216 
       
   217     /**
       
   218      *  Get the entry of hash table of the expanding string in the collation
       
   219      *  table.
       
   220      *  @param idx the index of the expanding string value list
       
   221      */
       
   222     final int[] getExpandValueList(int order) {
       
   223         return (int[])expandTable.elementAt(order - EXPANDCHARINDEX);
       
   224     }
       
   225 
       
   226     /**
       
   227      *  Get the comarison order of a character from the collation table.
       
   228      *  @return the comparison order of a character.
       
   229      */
       
   230     int getUnicodeOrder(int ch)
       
   231     {
       
   232         return mapping.elementAt(ch);
       
   233     }
       
   234 
       
   235     short getMaxSecOrder() {
       
   236         return maxSecOrder;
       
   237     }
       
   238 
       
   239     short getMaxTerOrder() {
       
   240         return maxTerOrder;
       
   241     }
       
   242 
       
   243     /**
       
   244      * Reverse a string.
       
   245      */
       
   246     //shemran/Note: this is used for secondary order value reverse, no
       
   247     //              need to consider supplementary pair.
       
   248     static void reverse (StringBuffer result, int from, int to)
       
   249     {
       
   250         int i = from;
       
   251         char swap;
       
   252 
       
   253         int j = to - 1;
       
   254         while (i < j) {
       
   255             swap =  result.charAt(i);
       
   256             result.setCharAt(i, result.charAt(j));
       
   257             result.setCharAt(j, swap);
       
   258             i++;
       
   259             j--;
       
   260         }
       
   261     }
       
   262 
       
   263     final static int getEntry(Vector list, String name, boolean fwd) {
       
   264         for (int i = 0; i < list.size(); i++) {
       
   265             EntryPair pair = (EntryPair)list.elementAt(i);
       
   266             if (pair.fwd == fwd && pair.entryName.equals(name)) {
       
   267                 return i;
       
   268             }
       
   269         }
       
   270         return UNMAPPED;
       
   271     }
       
   272 
       
   273     // ==============================================================
       
   274     // constants
       
   275     // ==============================================================
       
   276     //sherman/Todo: is the value big enough?????
       
   277     final static int EXPANDCHARINDEX = 0x7E000000; // Expand index follows
       
   278     final static int CONTRACTCHARINDEX = 0x7F000000;  // contract indexes follow
       
   279     final static int UNMAPPED = 0xFFFFFFFF;
       
   280 
       
   281     final static int PRIMARYORDERMASK = 0xffff0000;
       
   282     final static int SECONDARYORDERMASK = 0x0000ff00;
       
   283     final static int TERTIARYORDERMASK = 0x000000ff;
       
   284     final static int PRIMARYDIFFERENCEONLY = 0xffff0000;
       
   285     final static int SECONDARYDIFFERENCEONLY = 0xffffff00;
       
   286     final static int PRIMARYORDERSHIFT = 16;
       
   287     final static int SECONDARYORDERSHIFT = 8;
       
   288 
       
   289     // ==============================================================
       
   290     // instance variables
       
   291     // ==============================================================
       
   292     private String rules = null;
       
   293     private boolean frenchSec = false;
       
   294     private boolean seAsianSwapping = false;
       
   295 
       
   296     private UCompactIntArray mapping = null;
       
   297     private Vector contractTable = null;
       
   298     private Vector expandTable = null;
       
   299     private IntHashtable contractFlags = null;
       
   300 
       
   301     private short maxSecOrder = 0;
       
   302     private short maxTerOrder = 0;
       
   303 }