jdk/src/share/classes/com/sun/java/util/jar/pack/Histogram.java
author never
Mon, 12 Jul 2010 22:27:18 -0700
changeset 5926 a36f90d986b6
parent 5506 202f599c92aa
child 7192 445c518364c4
permissions -rw-r--r--
6968385: malformed xml in sweeper logging Reviewed-by: kvn
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     1
/*
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
     2
 * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load
duke
parents:
diff changeset
     4
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load
duke
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
     7
 * published by the Free Software Foundation.  Oracle designates this
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     8
 * particular file as subject to the "Classpath" exception as provided
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
     9
 * by Oracle in the LICENSE file that accompanied this code.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    10
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load
duke
parents:
diff changeset
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
90ce3da70b43 Initial load
duke
parents:
diff changeset
    14
 * version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load
duke
parents:
diff changeset
    15
 * accompanied this code).
90ce3da70b43 Initial load
duke
parents:
diff changeset
    16
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    17
 * You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load
duke
parents:
diff changeset
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    20
 *
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
    21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
    22
 * or visit www.oracle.com if you need additional information or have any
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
    23
 * questions.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    24
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    25
90ce3da70b43 Initial load
duke
parents:
diff changeset
    26
package com.sun.java.util.jar.pack;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    27
90ce3da70b43 Initial load
duke
parents:
diff changeset
    28
import java.util.*;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    29
import java.io.*;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    30
90ce3da70b43 Initial load
duke
parents:
diff changeset
    31
/**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    32
 * Histogram derived from an integer array of events (int[]).
90ce3da70b43 Initial load
duke
parents:
diff changeset
    33
 * @author John Rose
90ce3da70b43 Initial load
duke
parents:
diff changeset
    34
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    35
class Histogram {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    36
    // Compact histogram representation:  4 bytes per distinct value,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    37
    // plus 5 words per distinct count.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    38
    protected final int[][] matrix;  // multi-row matrix {{counti,valueij...}}
90ce3da70b43 Initial load
duke
parents:
diff changeset
    39
    protected final int     totalWeight;  // sum of all counts
90ce3da70b43 Initial load
duke
parents:
diff changeset
    40
90ce3da70b43 Initial load
duke
parents:
diff changeset
    41
    // These are created eagerly also, since that saves work.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    42
    // They cost another 8 bytes per distinct value.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    43
    protected final int[]   values;  // unique values, sorted by value
90ce3da70b43 Initial load
duke
parents:
diff changeset
    44
    protected final int[]   counts;  // counts, same order as values
90ce3da70b43 Initial load
duke
parents:
diff changeset
    45
90ce3da70b43 Initial load
duke
parents:
diff changeset
    46
    private static final long LOW32 = (long)-1 >>> 32;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    47
90ce3da70b43 Initial load
duke
parents:
diff changeset
    48
    /** Build a histogram given a sequence of values.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    49
     *  To save work, the input should be sorted, but need not be.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    50
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    51
    public
90ce3da70b43 Initial load
duke
parents:
diff changeset
    52
    Histogram(int[] valueSequence) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    53
        long[] hist2col = computeHistogram2Col(maybeSort(valueSequence));
90ce3da70b43 Initial load
duke
parents:
diff changeset
    54
        int[][] table = makeTable(hist2col);
90ce3da70b43 Initial load
duke
parents:
diff changeset
    55
        values = table[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
    56
        counts = table[1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
    57
        this.matrix = makeMatrix(hist2col);
90ce3da70b43 Initial load
duke
parents:
diff changeset
    58
        this.totalWeight = valueSequence.length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    59
        assert(assertWellFormed(valueSequence));
90ce3da70b43 Initial load
duke
parents:
diff changeset
    60
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
    61
    public
90ce3da70b43 Initial load
duke
parents:
diff changeset
    62
    Histogram(int[] valueSequence, int start, int end) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    63
        this(sortedSlice(valueSequence, start, end));
90ce3da70b43 Initial load
duke
parents:
diff changeset
    64
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
    65
90ce3da70b43 Initial load
duke
parents:
diff changeset
    66
    /** Build a histogram given a compact matrix of counts and values. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    67
    public
90ce3da70b43 Initial load
duke
parents:
diff changeset
    68
    Histogram(int[][] matrix) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    69
        // sort the rows
90ce3da70b43 Initial load
duke
parents:
diff changeset
    70
        matrix = normalizeMatrix(matrix);  // clone and sort
90ce3da70b43 Initial load
duke
parents:
diff changeset
    71
        this.matrix = matrix;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    72
        int length = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    73
        int weight = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    74
        for (int i = 0; i < matrix.length; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    75
            int rowLength = matrix[i].length-1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    76
            length += rowLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    77
            weight += matrix[i][0] * rowLength;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    78
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
    79
        this.totalWeight = weight;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    80
        long[] hist2col = new long[length];
90ce3da70b43 Initial load
duke
parents:
diff changeset
    81
        int fillp = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    82
        for (int i = 0; i < matrix.length; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    83
            for (int j = 1; j < matrix[i].length; j++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    84
                // sort key is value, so put it in the high 32!
90ce3da70b43 Initial load
duke
parents:
diff changeset
    85
                hist2col[fillp++] = ((long) matrix[i][j] << 32)
90ce3da70b43 Initial load
duke
parents:
diff changeset
    86
                                  | (LOW32 & matrix[i][0]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
    87
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
    88
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
    89
        assert(fillp == hist2col.length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
    90
        Arrays.sort(hist2col);
90ce3da70b43 Initial load
duke
parents:
diff changeset
    91
        int[][] table = makeTable(hist2col);
90ce3da70b43 Initial load
duke
parents:
diff changeset
    92
        values = table[1]; //backwards
90ce3da70b43 Initial load
duke
parents:
diff changeset
    93
        counts = table[0]; //backwards
90ce3da70b43 Initial load
duke
parents:
diff changeset
    94
        assert(assertWellFormed(null));
90ce3da70b43 Initial load
duke
parents:
diff changeset
    95
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
    96
90ce3da70b43 Initial load
duke
parents:
diff changeset
    97
    /** Histogram of int values, reported compactly as a ragged matrix,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    98
     *  indexed by descending frequency rank.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    99
     *  <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   100
     *  Format of matrix:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   101
     *  Each row in the matrix begins with an occurrence count,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   102
     *  and continues with all int values that occur at that frequency.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   103
     *  <pre>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   104
     *  int[][] matrix = {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   105
     *    { count1, value11, value12, value13, ...  },
90ce3da70b43 Initial load
duke
parents:
diff changeset
   106
     *    { count2, value21, value22, ... },
90ce3da70b43 Initial load
duke
parents:
diff changeset
   107
     *    ...
90ce3da70b43 Initial load
duke
parents:
diff changeset
   108
     *  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   109
     *  </pre>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   110
     *  The first column of the matrix { count1, count2, ... }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   111
     *  is sorted in descending order, and contains no duplicates.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   112
     *  Each row of the matrix (apart from its first element)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   113
     *  is sorted in ascending order, and contains no duplicates.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   114
     *  That is, each sequence { valuei1, valuei2, ... } is sorted.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   115
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   116
    public
90ce3da70b43 Initial load
duke
parents:
diff changeset
   117
    int[][] getMatrix() { return matrix; }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   118
90ce3da70b43 Initial load
duke
parents:
diff changeset
   119
    public
90ce3da70b43 Initial load
duke
parents:
diff changeset
   120
    int getRowCount() { return matrix.length; }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   121
90ce3da70b43 Initial load
duke
parents:
diff changeset
   122
    public
90ce3da70b43 Initial load
duke
parents:
diff changeset
   123
    int getRowFrequency(int rn) { return matrix[rn][0]; }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   124
90ce3da70b43 Initial load
duke
parents:
diff changeset
   125
    public
90ce3da70b43 Initial load
duke
parents:
diff changeset
   126
    int getRowLength(int rn) { return matrix[rn].length-1; }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   127
90ce3da70b43 Initial load
duke
parents:
diff changeset
   128
    public
90ce3da70b43 Initial load
duke
parents:
diff changeset
   129
    int getRowValue(int rn, int vn) { return matrix[rn][vn+1]; }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   130
90ce3da70b43 Initial load
duke
parents:
diff changeset
   131
    public
90ce3da70b43 Initial load
duke
parents:
diff changeset
   132
    int getRowWeight(int rn) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   133
        return getRowFrequency(rn) * getRowLength(rn);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   134
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   135
90ce3da70b43 Initial load
duke
parents:
diff changeset
   136
    public
90ce3da70b43 Initial load
duke
parents:
diff changeset
   137
    int getTotalWeight() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   138
        return totalWeight;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   139
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   140
90ce3da70b43 Initial load
duke
parents:
diff changeset
   141
    public
90ce3da70b43 Initial load
duke
parents:
diff changeset
   142
    int getTotalLength() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   143
        return values.length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   144
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   145
90ce3da70b43 Initial load
duke
parents:
diff changeset
   146
    /** Returns an array of all values, sorted. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   147
    public
90ce3da70b43 Initial load
duke
parents:
diff changeset
   148
    int[] getAllValues() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   149
90ce3da70b43 Initial load
duke
parents:
diff changeset
   150
        return values;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   151
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   152
90ce3da70b43 Initial load
duke
parents:
diff changeset
   153
    /** Returns an array parallel with {@link #getValues},
90ce3da70b43 Initial load
duke
parents:
diff changeset
   154
     *  with a frequency for each value.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   155
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   156
    public
90ce3da70b43 Initial load
duke
parents:
diff changeset
   157
    int[] getAllFrequencies() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   158
        return counts;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   159
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   160
90ce3da70b43 Initial load
duke
parents:
diff changeset
   161
    private static double log2 = Math.log(2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   162
90ce3da70b43 Initial load
duke
parents:
diff changeset
   163
    public
90ce3da70b43 Initial load
duke
parents:
diff changeset
   164
    int getFrequency(int value) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   165
        int pos = Arrays.binarySearch(values, value);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   166
        if (pos < 0)  return 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   167
        assert(values[pos] == value);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   168
        return counts[pos];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   169
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   170
90ce3da70b43 Initial load
duke
parents:
diff changeset
   171
    public
90ce3da70b43 Initial load
duke
parents:
diff changeset
   172
    double getBitLength(int value) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   173
        double prob = (double) getFrequency(value) / getTotalWeight();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   174
        return - Math.log(prob) / log2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   175
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   176
90ce3da70b43 Initial load
duke
parents:
diff changeset
   177
    public
90ce3da70b43 Initial load
duke
parents:
diff changeset
   178
    double getRowBitLength(int rn) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   179
        double prob = (double) getRowFrequency(rn) / getTotalWeight();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   180
        return - Math.log(prob) / log2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   181
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   182
90ce3da70b43 Initial load
duke
parents:
diff changeset
   183
    public
90ce3da70b43 Initial load
duke
parents:
diff changeset
   184
    interface BitMetric {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   185
        public double getBitLength(int value);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   186
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   187
    private final BitMetric bitMetric = new BitMetric() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   188
        public double getBitLength(int value) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   189
            return Histogram.this.getBitLength(value);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   190
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   191
    };
90ce3da70b43 Initial load
duke
parents:
diff changeset
   192
    public BitMetric getBitMetric() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   193
        return bitMetric;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   194
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   195
90ce3da70b43 Initial load
duke
parents:
diff changeset
   196
    /** bit-length is negative entropy:  -H(matrix). */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   197
    public
90ce3da70b43 Initial load
duke
parents:
diff changeset
   198
    double getBitLength() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   199
        double sum = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   200
        for (int i = 0; i < matrix.length; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   201
            sum += getRowBitLength(i) * getRowWeight(i);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   202
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   203
        assert(0.1 > Math.abs(sum - getBitLength(bitMetric)));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   204
        return sum;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   205
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   206
90ce3da70b43 Initial load
duke
parents:
diff changeset
   207
    /** bit-length in to another coding (cross-entropy) */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   208
    public
90ce3da70b43 Initial load
duke
parents:
diff changeset
   209
    double getBitLength(BitMetric len) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   210
        double sum = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   211
        for (int i = 0; i < matrix.length; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   212
            for (int j = 1; j < matrix[i].length; j++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   213
                sum += matrix[i][0] * len.getBitLength(matrix[i][j]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   214
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   215
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   216
        return sum;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   217
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   218
90ce3da70b43 Initial load
duke
parents:
diff changeset
   219
    static private
90ce3da70b43 Initial load
duke
parents:
diff changeset
   220
    double round(double x, double scale) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   221
        return Math.round(x * scale) / scale;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   222
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   223
90ce3da70b43 Initial load
duke
parents:
diff changeset
   224
    /** Sort rows and columns.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   225
     *  Merge adjacent rows with the same key element [0].
90ce3da70b43 Initial load
duke
parents:
diff changeset
   226
     *  Make a fresh copy of all of it.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   227
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   228
    public int[][] normalizeMatrix(int[][] matrix) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   229
        long[] rowMap = new long[matrix.length];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   230
        for (int i = 0; i < matrix.length; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   231
            if (matrix[i].length <= 1)  continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   232
            int count = matrix[i][0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   233
            if (count <= 0)  continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   234
            rowMap[i] = (long) count << 32 | i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   235
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   236
        Arrays.sort(rowMap);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   237
        int[][] newMatrix = new int[matrix.length][];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   238
        int prevCount = -1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   239
        int fillp1 = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   240
        int fillp2 = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   241
        for (int i = 0; ; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   242
            int[] row;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   243
            if (i < matrix.length) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   244
                long rowMapEntry = rowMap[rowMap.length-i-1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   245
                if (rowMapEntry == 0)  continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   246
                row = matrix[(int)rowMapEntry];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   247
                assert(rowMapEntry>>>32 == row[0]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   248
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   249
                row = new int[]{ -1 };  // close it off
90ce3da70b43 Initial load
duke
parents:
diff changeset
   250
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   251
            if (row[0] != prevCount && fillp2 > fillp1) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   252
                // Close off previous run.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   253
                int length = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   254
                for (int p = fillp1; p < fillp2; p++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   255
                    int[] row0 = newMatrix[p];  // previously visited row
90ce3da70b43 Initial load
duke
parents:
diff changeset
   256
                    assert(row0[0] == prevCount);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   257
                    length += row0.length-1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   258
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   259
                int[] row1 = new int[1+length];  // cloned & consolidated row
90ce3da70b43 Initial load
duke
parents:
diff changeset
   260
                row1[0] = prevCount;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   261
                int rfillp = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   262
                for (int p = fillp1; p < fillp2; p++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   263
                    int[] row0 = newMatrix[p];  // previously visited row
90ce3da70b43 Initial load
duke
parents:
diff changeset
   264
                    assert(row0[0] == prevCount);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   265
                    System.arraycopy(row0, 1, row1, rfillp, row0.length-1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   266
                    rfillp += row0.length-1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   267
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   268
                if (!isSorted(row1, 1, true)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   269
                    Arrays.sort(row1, 1, row1.length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   270
                    int jfillp = 2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   271
                    // Detect and squeeze out duplicates.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   272
                    for (int j = 2; j < row1.length; j++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   273
                        if (row1[j] != row1[j-1])
90ce3da70b43 Initial load
duke
parents:
diff changeset
   274
                            row1[jfillp++] = row1[j];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   275
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   276
                    if (jfillp < row1.length) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   277
                        // Reallocate because of lost duplicates.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   278
                        int[] newRow1 = new int[jfillp];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   279
                        System.arraycopy(row1, 0, newRow1, 0, jfillp);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   280
                        row1 = newRow1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   281
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   282
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   283
                newMatrix[fillp1++] = row1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   284
                fillp2 = fillp1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   285
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   286
            if (i == matrix.length)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   287
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   288
            prevCount = row[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   289
            newMatrix[fillp2++] = row;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   290
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   291
        assert(fillp1 == fillp2);  // no unfinished business
90ce3da70b43 Initial load
duke
parents:
diff changeset
   292
        // Now drop missing rows.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   293
        matrix = newMatrix;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   294
        if (fillp1 < matrix.length) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   295
            newMatrix = new int[fillp1][];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   296
            System.arraycopy(matrix, 0, newMatrix, 0, fillp1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   297
            matrix = newMatrix;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   298
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   299
        return matrix;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   300
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   301
90ce3da70b43 Initial load
duke
parents:
diff changeset
   302
    public
90ce3da70b43 Initial load
duke
parents:
diff changeset
   303
    String[] getRowTitles(String name) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   304
        int totalUnique = getTotalLength();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   305
        int totalWeight = getTotalWeight();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   306
        String[] histTitles = new String[matrix.length];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   307
        int cumWeight = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   308
        int cumUnique = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   309
        for (int i = 0; i < matrix.length; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   310
            int count  = getRowFrequency(i);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   311
            int unique = getRowLength(i);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   312
            int weight = getRowWeight(i);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   313
            cumWeight += weight;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   314
            cumUnique += unique;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   315
            long wpct = ((long)cumWeight * 100 + totalWeight/2) / totalWeight;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   316
            long upct = ((long)cumUnique * 100 + totalUnique/2) / totalUnique;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   317
            double len = getRowBitLength(i);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   318
            assert(0.1 > Math.abs(len - getBitLength(matrix[i][1])));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   319
            histTitles[i] = name+"["+i+"]"
90ce3da70b43 Initial load
duke
parents:
diff changeset
   320
                +" len="+round(len,10)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   321
                +" ("+count+"*["+unique+"])"
90ce3da70b43 Initial load
duke
parents:
diff changeset
   322
                +" ("+cumWeight+":"+wpct+"%)"
90ce3da70b43 Initial load
duke
parents:
diff changeset
   323
                +" ["+cumUnique+":"+upct+"%]";
90ce3da70b43 Initial load
duke
parents:
diff changeset
   324
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   325
        return histTitles;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   326
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   327
90ce3da70b43 Initial load
duke
parents:
diff changeset
   328
    /** Print a report of this histogram.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   329
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   330
    public
90ce3da70b43 Initial load
duke
parents:
diff changeset
   331
    void print(PrintStream out) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   332
        print("hist", out);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   333
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   334
90ce3da70b43 Initial load
duke
parents:
diff changeset
   335
    /** Print a report of this histogram.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   336
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   337
    public
90ce3da70b43 Initial load
duke
parents:
diff changeset
   338
    void print(String name, PrintStream out) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   339
        print(name, getRowTitles(name), out);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   340
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   341
90ce3da70b43 Initial load
duke
parents:
diff changeset
   342
    /** Print a report of this histogram.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   343
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   344
    public
90ce3da70b43 Initial load
duke
parents:
diff changeset
   345
    void print(String name, String[] histTitles, PrintStream out) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   346
        int totalUnique = getTotalLength();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   347
        int totalWeight = getTotalWeight();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   348
        double tlen = getBitLength();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   349
        double avgLen = tlen / totalWeight;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   350
        double avg = (double) totalWeight / totalUnique;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   351
        String title = (name
90ce3da70b43 Initial load
duke
parents:
diff changeset
   352
                        +" len="+round(tlen,10)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   353
                        +" avgLen="+round(avgLen,10)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   354
                        +" weight("+totalWeight+")"
90ce3da70b43 Initial load
duke
parents:
diff changeset
   355
                        +" unique["+totalUnique+"]"
90ce3da70b43 Initial load
duke
parents:
diff changeset
   356
                        +" avgWeight("+round(avg,100)+")");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   357
        if (histTitles == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   358
            out.println(title);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   359
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   360
            out.println(title+" {");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   361
            StringBuffer buf = new StringBuffer();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   362
            for (int i = 0; i < matrix.length; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   363
                buf.setLength(0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   364
                buf.append("  "+histTitles[i]+" {");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   365
                for (int j = 1; j < matrix[i].length; j++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   366
                    buf.append(" "+matrix[i][j]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   367
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   368
                buf.append(" }");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   369
                out.println(buf);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   370
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   371
            out.println("}");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   372
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   373
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   374
90ce3da70b43 Initial load
duke
parents:
diff changeset
   375
/*
90ce3da70b43 Initial load
duke
parents:
diff changeset
   376
    public static
90ce3da70b43 Initial load
duke
parents:
diff changeset
   377
    int[][] makeHistogramMatrix(int[] values) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   378
        // Make sure they are sorted.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   379
        values = maybeSort(values);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   380
        long[] hist2col = computeHistogram2Col(values);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   381
        int[][] matrix = makeMatrix(hist2col);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   382
        return matrix;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   383
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   384
*/
90ce3da70b43 Initial load
duke
parents:
diff changeset
   385
90ce3da70b43 Initial load
duke
parents:
diff changeset
   386
    private static
90ce3da70b43 Initial load
duke
parents:
diff changeset
   387
    int[][] makeMatrix(long[] hist2col) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   388
        // Sort by increasing count, then by increasing value.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   389
        Arrays.sort(hist2col);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   390
        int[] counts = new int[hist2col.length];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   391
        for (int i = 0; i < counts.length; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   392
            counts[i] = (int)( hist2col[i] >>> 32 );
90ce3da70b43 Initial load
duke
parents:
diff changeset
   393
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   394
        long[] countHist = computeHistogram2Col(counts);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   395
        int[][] matrix = new int[countHist.length][];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   396
        int histp = 0;  // cursor into hist2col (increasing count, value)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   397
        int countp = 0; // cursor into countHist (increasing count)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   398
        // Do a join between hist2col (resorted) and countHist.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   399
        for (int i = matrix.length; --i >= 0; ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   400
            long countAndRep = countHist[countp++];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   401
            int count  = (int) (countAndRep);  // what is the value count?
90ce3da70b43 Initial load
duke
parents:
diff changeset
   402
            int repeat = (int) (countAndRep >>> 32);  // # times repeated?
90ce3da70b43 Initial load
duke
parents:
diff changeset
   403
            int[] row = new int[1+repeat];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   404
            row[0] = count;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   405
            for (int j = 0; j < repeat; j++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   406
                long countAndValue = hist2col[histp++];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   407
                assert(countAndValue >>> 32 == count);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   408
                row[1+j] = (int) countAndValue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   409
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   410
            matrix[i] = row;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   411
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   412
        assert(histp == hist2col.length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   413
        return matrix;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   414
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   415
90ce3da70b43 Initial load
duke
parents:
diff changeset
   416
    private static
90ce3da70b43 Initial load
duke
parents:
diff changeset
   417
    int[][] makeTable(long[] hist2col) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   418
        int[][] table = new int[2][hist2col.length];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   419
        // Break apart the entries in hist2col.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   420
        // table[0] gets values, table[1] gets entries.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   421
        for (int i = 0; i < hist2col.length; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   422
            table[0][i] = (int)( hist2col[i] );
90ce3da70b43 Initial load
duke
parents:
diff changeset
   423
            table[1][i] = (int)( hist2col[i] >>> 32 );
90ce3da70b43 Initial load
duke
parents:
diff changeset
   424
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   425
        return table;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   426
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   427
90ce3da70b43 Initial load
duke
parents:
diff changeset
   428
    /** Simple two-column histogram.  Contains repeated counts.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   429
     *  Assumes input is sorted.  Does not sort output columns.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   430
     *  <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   431
     *  Format of result:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   432
     *  <pre>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   433
     *  long[] hist = {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   434
     *    (count1 << 32) | (value1),
90ce3da70b43 Initial load
duke
parents:
diff changeset
   435
     *    (count2 << 32) | (value2),
90ce3da70b43 Initial load
duke
parents:
diff changeset
   436
     *    ...
90ce3da70b43 Initial load
duke
parents:
diff changeset
   437
     *  }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   438
     *  </pre>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   439
     *  In addition, the sequence {valuei...} is guaranteed to be sorted.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   440
     *  Note that resorting this using Arrays.sort() will reorder the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   441
     *  entries by increasing count.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   442
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   443
    private static
90ce3da70b43 Initial load
duke
parents:
diff changeset
   444
    long[] computeHistogram2Col(int[] sortedValues) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   445
        switch (sortedValues.length) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   446
        case 0:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   447
            return new long[]{ };
90ce3da70b43 Initial load
duke
parents:
diff changeset
   448
        case 1:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   449
            return new long[]{ ((long)1 << 32) | (LOW32 & sortedValues[0]) };
90ce3da70b43 Initial load
duke
parents:
diff changeset
   450
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   451
        long[] hist = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   452
        for (boolean sizeOnly = true; ; sizeOnly = false) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   453
            int prevIndex = -1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   454
            int prevValue = sortedValues[0] ^ -1;  // force a difference
90ce3da70b43 Initial load
duke
parents:
diff changeset
   455
            int prevCount = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   456
            for (int i = 0; i <= sortedValues.length; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   457
                int thisValue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   458
                if (i < sortedValues.length)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   459
                    thisValue = sortedValues[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   460
                else
90ce3da70b43 Initial load
duke
parents:
diff changeset
   461
                    thisValue = prevValue ^ -1;  // force a difference at end
90ce3da70b43 Initial load
duke
parents:
diff changeset
   462
                if (thisValue == prevValue) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   463
                    prevCount += 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   464
                } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   465
                    // Found a new value.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   466
                    if (!sizeOnly && prevCount != 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   467
                        // Save away previous value.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   468
                        hist[prevIndex] = ((long)prevCount << 32)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   469
                                        | (LOW32 & prevValue);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   470
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   471
                    prevValue = thisValue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   472
                    prevCount = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   473
                    prevIndex += 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   474
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   475
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   476
            if (sizeOnly) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   477
                // Finished the sizing pass.  Allocate the histogram.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   478
                hist = new long[prevIndex];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   479
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   480
                break;  // done
90ce3da70b43 Initial load
duke
parents:
diff changeset
   481
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   482
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   483
        return hist;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   484
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   485
90ce3da70b43 Initial load
duke
parents:
diff changeset
   486
    /** Regroup the histogram, so that it becomes an approximate histogram
90ce3da70b43 Initial load
duke
parents:
diff changeset
   487
     *  whose rows are of the given lengths.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   488
     *  If matrix rows must be split, the latter parts (larger values)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   489
     *  are placed earlier in the new matrix.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   490
     *  If matrix rows are joined, they are resorted into ascending order.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   491
     *  In the new histogram, the counts are averaged over row entries.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   492
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   493
    private static
90ce3da70b43 Initial load
duke
parents:
diff changeset
   494
    int[][] regroupHistogram(int[][] matrix, int[] groups) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   495
        long oldEntries = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   496
        for (int i = 0; i < matrix.length; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   497
            oldEntries += matrix[i].length-1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   498
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   499
        long newEntries = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   500
        for (int ni = 0; ni < groups.length; ni++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   501
            newEntries += groups[ni];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   502
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   503
        if (newEntries > oldEntries) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   504
            int newlen = groups.length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   505
            long ok = oldEntries;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   506
            for (int ni = 0; ni < groups.length; ni++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   507
                if (ok < groups[ni]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   508
                    int[] newGroups = new int[ni+1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   509
                    System.arraycopy(groups, 0, newGroups, 0, ni+1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   510
                    groups = newGroups;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   511
                    groups[ni] = (int) ok;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   512
                    ok = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   513
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   514
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   515
                ok -= groups[ni];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   516
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   517
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   518
            long excess = oldEntries - newEntries;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   519
            int[] newGroups = new int[groups.length+1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   520
            System.arraycopy(groups, 0, newGroups, 0, groups.length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   521
            newGroups[groups.length] = (int) excess;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   522
            groups = newGroups;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   523
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   524
        int[][] newMatrix = new int[groups.length][];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   525
        // Fill pointers.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   526
        int i = 0;  // into matrix
90ce3da70b43 Initial load
duke
parents:
diff changeset
   527
        int jMin = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   528
        int jMax = matrix[i].length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   529
        for (int ni = 0; ni < groups.length; ni++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   530
            int groupLength = groups[ni];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   531
            int[] group = new int[1+groupLength];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   532
            long groupWeight = 0;  // count of all in new group
90ce3da70b43 Initial load
duke
parents:
diff changeset
   533
            newMatrix[ni] = group;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   534
            int njFill = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   535
            while (njFill < group.length) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   536
                int len = group.length - njFill;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   537
                while (jMin == jMax) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   538
                    jMin = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   539
                    jMax = matrix[++i].length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   540
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   541
                if (len > jMax - jMin)  len = jMax - jMin;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   542
                groupWeight += (long) matrix[i][0] * len;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   543
                System.arraycopy(matrix[i], jMax - len, group, njFill, len);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   544
                jMax -= len;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   545
                njFill += len;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   546
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   547
            Arrays.sort(group, 1, group.length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   548
            // compute average count of new group:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   549
            group[0] = (int) ((groupWeight + groupLength/2) / groupLength);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   550
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   551
        assert(jMin == jMax);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   552
        assert(i == matrix.length-1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   553
        return newMatrix;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   554
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   555
90ce3da70b43 Initial load
duke
parents:
diff changeset
   556
    public static
90ce3da70b43 Initial load
duke
parents:
diff changeset
   557
    Histogram makeByteHistogram(InputStream bytes) throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   558
        byte[] buf = new byte[1<<12];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   559
        int[] tally = new int[1<<8];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   560
        for (int nr; (nr = bytes.read(buf)) > 0; ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   561
            for (int i = 0; i < nr; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   562
                tally[buf[i] & 0xFF] += 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   563
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   564
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   565
        // Build a matrix.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   566
        int[][] matrix = new int[1<<8][2];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   567
        for (int i = 0; i < tally.length; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   568
            matrix[i][0] = tally[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   569
            matrix[i][1] = i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   570
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   571
        return new Histogram(matrix);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   572
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   573
90ce3da70b43 Initial load
duke
parents:
diff changeset
   574
    /** Slice and sort the given input array. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   575
    private static
90ce3da70b43 Initial load
duke
parents:
diff changeset
   576
    int[] sortedSlice(int[] valueSequence, int start, int end) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   577
        if (start == 0 && end == valueSequence.length &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   578
            isSorted(valueSequence, 0, false)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   579
            return valueSequence;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   580
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   581
            int[] slice = new int[end-start];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   582
            System.arraycopy(valueSequence, start, slice, 0, slice.length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   583
            Arrays.sort(slice);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   584
            return slice;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   585
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   586
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   587
90ce3da70b43 Initial load
duke
parents:
diff changeset
   588
    /** Tell if an array is sorted. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   589
    private static
90ce3da70b43 Initial load
duke
parents:
diff changeset
   590
    boolean isSorted(int[] values, int from, boolean strict) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   591
        for (int i = from+1; i < values.length; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   592
            if (strict ? !(values[i-1] < values[i])
90ce3da70b43 Initial load
duke
parents:
diff changeset
   593
                       : !(values[i-1] <= values[i])) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   594
                return false;  // found witness to disorder
90ce3da70b43 Initial load
duke
parents:
diff changeset
   595
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   596
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   597
        return true;  // no witness => sorted
90ce3da70b43 Initial load
duke
parents:
diff changeset
   598
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   599
90ce3da70b43 Initial load
duke
parents:
diff changeset
   600
    /** Clone and sort the array, if not already sorted. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   601
    private static
90ce3da70b43 Initial load
duke
parents:
diff changeset
   602
    int[] maybeSort(int[] values) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   603
        if (!isSorted(values, 0, false)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   604
            values = (int[]) values.clone();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   605
            Arrays.sort(values);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   606
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   607
        return values;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   608
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   609
90ce3da70b43 Initial load
duke
parents:
diff changeset
   610
90ce3da70b43 Initial load
duke
parents:
diff changeset
   611
    /// Debug stuff follows.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   612
90ce3da70b43 Initial load
duke
parents:
diff changeset
   613
    private boolean assertWellFormed(int[] valueSequence) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   614
/*
90ce3da70b43 Initial load
duke
parents:
diff changeset
   615
        // Sanity check.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   616
        int weight = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   617
        int vlength = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   618
        for (int i = 0; i < matrix.length; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   619
            int vlengthi = (matrix[i].length-1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   620
            int count = matrix[i][0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   621
            assert(vlengthi > 0);  // no empty rows
90ce3da70b43 Initial load
duke
parents:
diff changeset
   622
            assert(count > 0);  // no impossible rows
90ce3da70b43 Initial load
duke
parents:
diff changeset
   623
            vlength += vlengthi;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   624
            weight += count * vlengthi;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   625
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   626
        assert(isSorted(values, 0, true));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   627
        // make sure the counts all add up
90ce3da70b43 Initial load
duke
parents:
diff changeset
   628
        assert(totalWeight == weight);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   629
        assert(vlength == values.length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   630
        assert(vlength == counts.length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   631
        int weight2 = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   632
        for (int i = 0; i < counts.length; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   633
            weight2 += counts[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   634
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   635
        assert(weight2 == weight);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   636
        int[] revcol1 = new int[matrix.length];  //1st matrix colunm
90ce3da70b43 Initial load
duke
parents:
diff changeset
   637
        for (int i = 0; i < matrix.length; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   638
            // spot checking:  try a random query on each matrix row
90ce3da70b43 Initial load
duke
parents:
diff changeset
   639
            assert(matrix[i].length > 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   640
            revcol1[matrix.length-i-1] = matrix[i][0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   641
            assert(isSorted(matrix[i], 1, true));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   642
            int rand = (matrix[i].length+1) / 2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   643
            int val = matrix[i][rand];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   644
            int count = matrix[i][0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   645
            int pos = Arrays.binarySearch(values, val);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   646
            assert(values[pos] == val);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   647
            assert(counts[pos] == matrix[i][0]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   648
            if (valueSequence != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   649
                int count2 = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   650
                for (int j = 0; j < valueSequence.length; j++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   651
                    if (valueSequence[j] == val)  count2++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   652
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   653
                assert(count2 == count);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   654
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   655
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   656
        assert(isSorted(revcol1, 0, true));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   657
//*/
90ce3da70b43 Initial load
duke
parents:
diff changeset
   658
        return true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   659
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   660
90ce3da70b43 Initial load
duke
parents:
diff changeset
   661
/*
90ce3da70b43 Initial load
duke
parents:
diff changeset
   662
    public static
90ce3da70b43 Initial load
duke
parents:
diff changeset
   663
    int[] readValuesFrom(InputStream instr) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   664
        return readValuesFrom(new InputStreamReader(instr));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   665
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   666
    public static
90ce3da70b43 Initial load
duke
parents:
diff changeset
   667
    int[] readValuesFrom(Reader inrdr) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   668
        inrdr = new BufferedReader(inrdr);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   669
        final StreamTokenizer in = new StreamTokenizer(inrdr);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   670
        final int TT_NOTHING = -99;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   671
        in.commentChar('#');
90ce3da70b43 Initial load
duke
parents:
diff changeset
   672
        return readValuesFrom(new Iterator() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   673
            int token = TT_NOTHING;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   674
            private int getToken() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   675
                if (token == TT_NOTHING) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   676
                    try {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   677
                        token = in.nextToken();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   678
                        assert(token != TT_NOTHING);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   679
                    } catch (IOException ee) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   680
                        throw new RuntimeException(ee);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   681
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   682
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   683
                return token;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   684
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   685
            public boolean hasNext() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   686
                return getToken() != StreamTokenizer.TT_EOF;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   687
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   688
            public Object next() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   689
                int ntok = getToken();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   690
                token = TT_NOTHING;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   691
                switch (ntok) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   692
                case StreamTokenizer.TT_EOF:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   693
                    throw new NoSuchElementException();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   694
                case StreamTokenizer.TT_NUMBER:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   695
                    return new Integer((int) in.nval);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   696
                default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   697
                    assert(false);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   698
                    return null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   699
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   700
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   701
            public void remove() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   702
                throw new UnsupportedOperationException();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   703
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   704
        });
90ce3da70b43 Initial load
duke
parents:
diff changeset
   705
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   706
    public static
90ce3da70b43 Initial load
duke
parents:
diff changeset
   707
    int[] readValuesFrom(Iterator iter) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   708
        return readValuesFrom(iter, 0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   709
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   710
    public static
90ce3da70b43 Initial load
duke
parents:
diff changeset
   711
    int[] readValuesFrom(Iterator iter, int initSize) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   712
        int[] na = new int[Math.max(10, initSize)];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   713
        int np = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   714
        while (iter.hasNext()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   715
            Integer val = (Integer) iter.next();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   716
            if (np == na.length) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   717
                int[] na2 = new int[np*2];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   718
                System.arraycopy(na, 0, na2, 0, np);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   719
                na = na2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   720
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   721
            na[np++] = val.intValue();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   722
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   723
        if (np != na.length) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   724
            int[] na2 = new int[np];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   725
            System.arraycopy(na, 0, na2, 0, np);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   726
            na = na2;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   727
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   728
        return na;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   729
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   730
90ce3da70b43 Initial load
duke
parents:
diff changeset
   731
    public static
90ce3da70b43 Initial load
duke
parents:
diff changeset
   732
    Histogram makeByteHistogram(byte[] bytes) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   733
        try {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   734
            return makeByteHistogram(new ByteArrayInputStream(bytes));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   735
        } catch (IOException ee) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   736
            throw new RuntimeException(ee);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   737
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   738
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   739
90ce3da70b43 Initial load
duke
parents:
diff changeset
   740
    public static
90ce3da70b43 Initial load
duke
parents:
diff changeset
   741
    void main(String[] av) throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   742
        if (av.length > 0 && av[0].equals("-r")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   743
            int[] values = new int[Integer.parseInt(av[1])];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   744
            int limit = values.length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   745
            if (av.length >= 3) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   746
                limit  = (int)( limit * Double.parseDouble(av[2]) );
90ce3da70b43 Initial load
duke
parents:
diff changeset
   747
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   748
            Random rnd = new Random();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   749
            for (int i = 0; i < values.length; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   750
                values[i] = rnd.nextInt(limit);;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   751
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   752
            Histogram rh = new Histogram(values);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   753
            rh.print("random", System.out);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   754
            return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   755
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   756
        if (av.length > 0 && av[0].equals("-s")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   757
            int[] values = readValuesFrom(System.in);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   758
            Random rnd = new Random();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   759
            for (int i = values.length; --i > 0; ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   760
                int j = rnd.nextInt(i+1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   761
                if (j < i) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   762
                    int tem = values[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   763
                    values[i] = values[j];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   764
                    values[j] = tem;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   765
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   766
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   767
            for (int i = 0; i < values.length; i++)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   768
                System.out.println(values[i]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   769
            return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   770
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   771
        if (av.length > 0 && av[0].equals("-e")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   772
            // edge cases
90ce3da70b43 Initial load
duke
parents:
diff changeset
   773
            new Histogram(new int[][] {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   774
                {1, 11, 111},
90ce3da70b43 Initial load
duke
parents:
diff changeset
   775
                {0, 123, 456},
90ce3da70b43 Initial load
duke
parents:
diff changeset
   776
                {1, 111, 1111},
90ce3da70b43 Initial load
duke
parents:
diff changeset
   777
                {0, 456, 123},
90ce3da70b43 Initial load
duke
parents:
diff changeset
   778
                {3},
90ce3da70b43 Initial load
duke
parents:
diff changeset
   779
                {},
90ce3da70b43 Initial load
duke
parents:
diff changeset
   780
                {3},
90ce3da70b43 Initial load
duke
parents:
diff changeset
   781
                {2, 22},
90ce3da70b43 Initial load
duke
parents:
diff changeset
   782
                {4}
90ce3da70b43 Initial load
duke
parents:
diff changeset
   783
            }).print(System.out);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   784
            return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   785
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   786
        if (av.length > 0 && av[0].equals("-b")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   787
            // edge cases
90ce3da70b43 Initial load
duke
parents:
diff changeset
   788
            Histogram bh = makeByteHistogram(System.in);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   789
            bh.print("bytes", System.out);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   790
            return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   791
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   792
        boolean regroup = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   793
        if (av.length > 0 && av[0].equals("-g")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   794
            regroup = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   795
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   796
90ce3da70b43 Initial load
duke
parents:
diff changeset
   797
        int[] values = readValuesFrom(System.in);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   798
        Histogram h = new Histogram(values);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   799
        if (!regroup)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   800
            h.print(System.out);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   801
        if (regroup) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   802
            int[] groups = new int[12];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   803
            for (int i = 0; i < groups.length; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   804
                groups[i] = 1<<i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   805
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   806
            int[][] gm = regroupHistogram(h.getMatrix(), groups);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   807
            Histogram g = new Histogram(gm);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   808
            System.out.println("h.getBitLength(g) = "+
90ce3da70b43 Initial load
duke
parents:
diff changeset
   809
                               h.getBitLength(g.getBitMetric()));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   810
            System.out.println("g.getBitLength(h) = "+
90ce3da70b43 Initial load
duke
parents:
diff changeset
   811
                               g.getBitLength(h.getBitMetric()));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   812
            g.print("regrouped", System.out);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   813
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   814
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   815
//*/
90ce3da70b43 Initial load
duke
parents:
diff changeset
   816
}