corba/src/share/classes/sun/rmi/rmic/iiop/StaticStringsHash.java
changeset 4 02bb8761fcce
child 5555 b2b5ed3f0d0d
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/corba/src/share/classes/sun/rmi/rmic/iiop/StaticStringsHash.java	Sat Dec 01 00:00:00 2007 +0000
@@ -0,0 +1,372 @@
+/*
+ * Portions Copyright 1999-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Sun designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Sun in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+/*
+ * Licensed Materials - Property of IBM
+ * RMI-IIOP v1.0
+ * Copyright IBM Corp. 1998 1999  All Rights Reserved
+ *
+ */
+
+package sun.rmi.rmic.iiop;
+
+/**
+ * StaticStringsHash takes an array of constant strings and
+ * uses several different hash methods to try to find the
+ * 'best' one for that set. The set of methods is currently
+ * fixed, but with a little work could be made extensible thru
+ * subclassing.
+ * <p>
+ * The current set of methods is:
+ * <ol>
+ * <li> length() - works well when all strings are different length.</li>
+ * <li> charAt(n) - works well when one offset into all strings is different.</li>
+ * <li> hashCode() - works well with larger arrays.</li>
+ * </ol>
+ * After constructing an instance over the set of strings, the
+ * <code>getKey(String)</code> method can be used to use the selected hash
+ * method to produce a key.  The <code>method</code> string will contain
+ * "length()", "charAt(n)", or "hashCode()", and is intended for use by
+ * code generators.
+ * <p>
+ * The <code>keys</code> array will contain the full set of unique keys.
+ * <p>
+ * The <code>buckets</code> array will contain a set of arrays, one for
+ * each key in the <code>keys</code>, where <code>buckets[x][y]</code>
+ * is an index into the <code>strings</code> array.
+ * @author      Bryan Atsatt
+ */
+public class StaticStringsHash {
+
+    /** The set of strings upon which the hash info is created */
+    public String[] strings = null;
+
+    /** Unique hash keys */
+    public int[] keys = null;
+
+    /** Buckets for each key, where buckets[x][y] is an index
+     * into the strings[] array. */
+    public int[][] buckets = null;
+
+    /** The method to invoke on String to produce the hash key */
+    public String method = null;
+
+    /** Get a key for the given string using the
+     * selected hash method.
+     * @param str the string to return a key for.
+     * @return the key.
+     */
+    public int getKey(String str) {
+        switch (keyKind) {
+        case LENGTH: return str.length();
+        case CHAR_AT: return str.charAt(charAt);
+        case HASH_CODE: return str.hashCode();
+        }
+        throw new Error("Bad keyKind");
+    }
+
+    /** Constructor
+     * @param strings the set of strings upon which to
+     * find an optimal hash method. Must not contain
+     * duplicates.
+     */
+    public StaticStringsHash(String[] strings) {
+        this.strings = strings;
+        length = strings.length;
+        tempKeys = new int[length];
+        bucketSizes = new int[length];
+        setMinStringLength();
+
+        // Decide on the best algorithm based on
+        // which one has the smallest maximum
+        // bucket depth. First, try length()...
+
+        int currentMaxDepth = getKeys(LENGTH);
+        int useCharAt = -1;
+        boolean useHashCode = false;
+
+        if (currentMaxDepth > 1) {
+
+            // At least one bucket had more than one
+            // entry, so try charAt(i).  If there
+            // are a lot of strings in the array,
+            // and minStringLength is large, limit
+            // the search to a smaller number of
+            // characters to avoid spending a lot
+            // of time here that is most likely to
+            // be pointless...
+
+            int minLength = minStringLength;
+            if (length > CHAR_AT_MAX_LINES &&
+                length * minLength > CHAR_AT_MAX_CHARS) {
+                minLength = length/CHAR_AT_MAX_CHARS;
+            }
+
+            charAt = 0;
+            for (int i = 0; i < minLength; i++) {
+                int charAtDepth = getKeys(CHAR_AT);
+                if (charAtDepth < currentMaxDepth) {
+                    currentMaxDepth = charAtDepth;
+                    useCharAt = i;
+                    if (currentMaxDepth == 1) {
+                        break;
+                    }
+                }
+                charAt++;
+            }
+            charAt = useCharAt;
+
+
+            if (currentMaxDepth > 1) {
+
+                // At least one bucket had more than one
+                // entry, try hashCode().
+                //
+                // Since the cost of computing a full hashCode
+                // (for the runtime target string) is much higher
+                // than the previous methods, use it only if it is
+                // substantially better. The definition of 'substantial'
+                // here is not very well founded, and could be improved
+                // with some further analysis ;^)
+
+                int hashCodeDepth = getKeys(HASH_CODE);
+                if (hashCodeDepth < currentMaxDepth-3) {
+
+                    // Using the full hashCode results in at least
+                    // 3 fewer entries in the worst bucket, so will
+                    // therefore avoid at least 3 calls to equals()
+                    // in the worst case.
+                    //
+                    // Note that using a number smaller than 3 could
+                    // result in using a hashCode when there are only
+                    // 2 strings in the array, and that would surely
+                    // be a poor performance choice.
+
+                    useHashCode = true;
+                }
+            }
+
+            // Reset keys if needed...
+
+            if (!useHashCode) {
+                if (useCharAt >= 0) {
+
+                    // Use the charAt(i) method...
+
+                    getKeys(CHAR_AT);
+
+                } else {
+
+                    // Use length method...
+
+                    getKeys(LENGTH);
+                }
+            }
+        }
+
+        // Now allocate and fill our real hashKeys array...
+
+        keys = new int[bucketCount];
+        System.arraycopy(tempKeys,0,keys,0,bucketCount);
+
+        // Sort keys and bucketSizes arrays...
+
+        boolean didSwap;
+        do {
+            didSwap = false;
+            for (int i = 0; i < bucketCount - 1; i++) {
+                if (keys[i] > keys[i+1]) {
+                    int temp = keys[i];
+                    keys[i] = keys[i+1];
+                    keys[i+1] = temp;
+                    temp = bucketSizes[i];
+                    bucketSizes[i] = bucketSizes[i+1];
+                    bucketSizes[i+1] = temp;
+                    didSwap = true;
+                }
+            }
+        }
+        while (didSwap == true);
+
+        // Allocate our buckets array. Fill the string
+        // index slot with an unused key so we can
+        // determine which are free...
+
+        int unused = findUnusedKey();
+        buckets = new int[bucketCount][];
+        for (int i = 0; i < bucketCount; i++) {
+            buckets[i] = new int[bucketSizes[i]];
+            for (int j = 0; j < bucketSizes[i]; j++) {
+                buckets[i][j] = unused;
+            }
+        }
+
+        // And fill it in...
+
+        for(int i = 0; i < strings.length; i++) {
+            int key = getKey(strings[i]);
+            for (int j = 0; j < bucketCount; j++) {
+                if (keys[j] == key) {
+                    int k = 0;
+                    while (buckets[j][k] != unused) {
+                        k++;
+                    }
+                    buckets[j][k] = i;
+                    break;
+                }
+            }
+        }
+    }
+
+    /** Print an optimized 'contains' method for the
+     * argument strings
+     */
+    public static void main (String[] args) {
+        StaticStringsHash hash = new StaticStringsHash(args);
+        System.out.println();
+        System.out.println("    public boolean contains(String key) {");
+        System.out.println("        switch (key."+hash.method+") {");
+        for (int i = 0; i < hash.buckets.length; i++) {
+            System.out.println("            case "+hash.keys[i]+": ");
+            for (int j = 0; j < hash.buckets[i].length; j++) {
+                if (j > 0) {
+                    System.out.print("                } else ");
+                } else {
+                    System.out.print("                ");
+                }
+                System.out.println("if (key.equals(\""+ hash.strings[hash.buckets[i][j]] +"\")) {");
+                System.out.println("                    return true;");
+            }
+            System.out.println("                }");
+        }
+        System.out.println("        }");
+        System.out.println("        return false;");
+        System.out.println("    }");
+    }
+
+    private int length;
+    private int[] tempKeys;
+    private int[] bucketSizes;
+    private int bucketCount;
+    private int maxDepth;
+    private int minStringLength = Integer.MAX_VALUE;
+    private int keyKind;
+    private int charAt;
+
+    private static final int LENGTH = 0;
+    private static final int CHAR_AT = 1;
+    private static final int HASH_CODE = 2;
+
+    /* Determines the maximum number of charAt(i)
+     * tests that will be done. The search is
+     * limited because if the number of characters
+     * is large enough, the likelyhood of finding
+     * a good hash key  based on this method is
+     * low. The CHAR_AT_MAX_CHARS limit only
+     * applies f there are more strings than
+     * CHAR_AT_MAX_LINES.
+     */
+    private static final int CHAR_AT_MAX_LINES = 50;
+    private static final int CHAR_AT_MAX_CHARS = 1000;
+
+    private void resetKeys(int keyKind) {
+        this.keyKind = keyKind;
+        switch (keyKind) {
+        case LENGTH: method = "length()"; break;
+        case CHAR_AT: method = "charAt("+charAt+")"; break;
+        case HASH_CODE: method = "hashCode()"; break;
+        }
+        maxDepth = 1;
+        bucketCount = 0;
+        for (int i = 0; i < length; i++) {
+            tempKeys[i] = 0;
+            bucketSizes[i] = 0;
+        }
+    }
+
+    private void setMinStringLength() {
+        for (int i = 0; i < length; i++) {
+            if (strings[i].length() < minStringLength) {
+                minStringLength = strings[i].length();
+            }
+        }
+    }
+
+    private int findUnusedKey() {
+        int unused = 0;
+        int keysLength = keys.length;
+
+        // Note that we just assume that resource
+        // exhaustion will occur rather than an
+        // infinite loop here if the set of keys
+        // is very large.
+
+        while (true) {
+            boolean match = false;
+            for (int i = 0; i < keysLength; i++) {
+                if (keys[i] == unused) {
+                    match = true;
+                    break;
+                }
+            }
+            if (match) {
+                unused--;
+            } else {
+                break;
+            }
+        }
+        return unused;
+    }
+
+    private int getKeys(int methodKind) {
+        resetKeys(methodKind);
+        for(int i = 0; i < strings.length; i++) {
+            addKey(getKey(strings[i]));
+        }
+        return maxDepth;
+    }
+
+    private void addKey(int key) {
+
+        // Have we seen this one before?
+
+        boolean addIt = true;
+        for (int j = 0; j < bucketCount; j++) {
+            if (tempKeys[j] == key) {
+                addIt = false;
+                bucketSizes[j]++;
+                if (bucketSizes[j] > maxDepth) {
+                    maxDepth = bucketSizes[j];
+                }
+                break;
+            }
+        }
+
+        if (addIt) {
+            tempKeys[bucketCount] = key;
+            bucketSizes[bucketCount] = 1;
+            bucketCount++;
+        }
+    }
+}