make/jdk/src/classes/build/tools/generatecharacter/SpecialCaseMap.java
author pliden
Thu, 26 Sep 2019 13:56:58 +0200
changeset 58355 de246fd65587
parent 47216 71c04702a3d5
permissions -rw-r--r--
8231294: ZGC: vmTestbase/nsk/jvmti/ResourceExhausted/resexhausted002 fails Reviewed-by: shade, dholmes

/*
 * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

package build.tools.generatecharacter;

import java.io.*;
import java.util.*;
import java.lang.*;

/**
 * SpecialCaseMap has the responsibility of storing the
 * 1:M, locale-sensitive, and context sensitive case mappings
 * that occur when uppercasing Unicode 4.0 characters. This class can
 * read and parse the SpecialCasing.txt file that contains those mappings.
 * <p>
 * A single SpecialCaseMap contains the mapping for one character.
 * <p>
 * @author John O'Conner
 */
public class SpecialCaseMap implements Comparable<SpecialCaseMap> {

    SpecialCaseMap() {
        chSource = 0xFFFF;
    }


    /**
     * Read and parse a Unicode special case map file.
     *
     * @param file   a file specifying the Unicode special case mappings
     * @return  an array of SpecialCaseMap objects, one for each line of the
     *          special case map data file that could be successfully parsed
     */

    public static SpecialCaseMap[] readSpecFile(File file, int plane) throws FileNotFoundException {
        ArrayList<SpecialCaseMap> caseMaps = new ArrayList<>(150);
        int count = 0;
        BufferedReader f = new BufferedReader(new FileReader(file));
                String line = null;
        loop:
        while(true) {
            try {
                line = f.readLine();
            }
            catch (IOException e) { break loop; }
                if (line == null) break loop;
                SpecialCaseMap item = parse(line.trim());
                if (item != null) {
                                if(item.getCharSource() >> 16 < plane) continue;
                                if(item.getCharSource() >> 16 > plane) break;
                                caseMaps.add(item);
                ++count;
            }

        }
        caseMaps.trimToSize();
        SpecialCaseMap[] result = new SpecialCaseMap[caseMaps.size()];
        caseMaps.toArray(result);
        Arrays.sort(result);
        return result;

    }

   /**
    * Given one line of a Unicode special casing data file as a String, parse the line
    * and return a SpecialCaseMap object that contains the case mapping.
    *
    * @param s a line of the Unicode special case map data file to be parsed
    * @return a SpecialCaseMap object, or null if the parsing process failed for some reason
    */
    public static SpecialCaseMap parse(String s) {
        SpecialCaseMap spec = null;
        String[] tokens = new String[REQUIRED_FIELDS];
        if ( s != null && s.length() != 0 && s.charAt(0) != '#') {
            try {
                int x = 0, tokenStart = 0, tokenEnd = 0;
                for (x=0; x<REQUIRED_FIELDS-1; x++) {
                    tokenEnd = s.indexOf(';', tokenStart);
                    tokens[x] = s.substring(tokenStart, tokenEnd);
                    tokenStart = tokenEnd+1;
                }
                tokens[x] = s.substring(tokenStart);

                if(tokens[FIELD_CONDITIONS].indexOf(';') == -1) {
                    spec = new SpecialCaseMap();
                    spec.setCharSource(parseChar(tokens[FIELD_SOURCE]));
                    spec.setUpperCaseMap(parseCaseMap(tokens[FIELD_UPPERCASE]));
                    spec.setLowerCaseMap(parseCaseMap(tokens[FIELD_LOWERCASE]));
                    spec.setTitleCaseMap(parseCaseMap(tokens[FIELD_TITLECASE]));
                    spec.setLocale(parseLocale(tokens[FIELD_CONDITIONS]));
                    spec.setContext(parseContext(tokens[FIELD_CONDITIONS]));
                }
            }
            catch(Exception e) {
                spec = null;
                System.out.println("Error parsing spec line.");
            }
        }
        return spec;
    }

    static int parseChar(String token) throws NumberFormatException {
        return Integer.parseInt(token, 16);
    }

    static char[] parseCaseMap(String token ) throws NumberFormatException {
        int pos = 0;
        StringBuffer buff = new StringBuffer();
        int start = 0, end = 0;
        while(pos < token.length() ){
            while(Character.isSpaceChar(token.charAt(pos++)));
            --pos;
            start = pos;
            while(pos < token.length() && !Character.isSpaceChar(token.charAt(pos))) pos++;
            end = pos;
            int ch = parseChar(token.substring(start,end));
                        if (ch > 0xFFFF) {
                                buff.append(getHighSurrogate(ch));
                                buff.append(getLowSurrogate(ch));
                        } else {
                                buff.append((char)ch);
                        }
        }
        char[] map = new char[buff.length()];
        buff.getChars(0, buff.length(), map, 0);
        return map;
    }

    static Locale parseLocale(String token) {
        return null;
    }

    static String[] parseContext(String token) {
        return null;
    }

    static  int find(int ch, SpecialCaseMap[] map) {
        if ((map == null) || (map.length == 0)) {
            return -1;
        }
        int top, bottom, current;
        bottom = 0;
        top = map.length;
        current = top/2;
        // invariant: top > current >= bottom && ch >= map.chSource
        while (top - bottom > 1) {
            if (ch >= map[current].getCharSource()) {
                bottom = current;
            } else {
                top = current;
            }
            current = (top + bottom) / 2;
        }
        if (ch == map[current].getCharSource()) return current;
        else return -1;
    }

    /*
     * Extracts and returns the high surrogate value from a UTF-32 code point.
     * If argument is a BMP character, then it is converted to a char and returned;
     * otherwise the high surrogate value is extracted.
     * @param codePoint a UTF-32 codePoint with value greater than 0xFFFF.
     * @return the high surrogate value that helps create <code>codePoint</code>; else
     *         the char representation of <code>codePoint</code> if it is a BMP character.
     * @since 1.5
     */
    static char getHighSurrogate(int codePoint) {
        char high = (char)codePoint;
        if (codePoint > 0xFFFF) {
            high = (char)((codePoint - 0x10000)/0x0400 + 0xD800);
        }
        return high;
    }


    /*
     * Extracts and returns the low surrogate value from a UTF-32 code point.
     * If argument is a BMP character, then it is converted to a char and returned;
     * otherwise the high surrogate value is extracted.
     * @param codePoint a UTF-32 codePoint with value greater than 0xFFFF.
     * @return the low surrogate value that helps create <code>codePoint</code>; else
     *         the char representation of <code>codePoint</code> if it is a BMP character.
     * @since 1.5
     */
    static char getLowSurrogate(int codePoint) {
        char low = (char)codePoint;
        if(codePoint > 0xFFFF) {
                low = (char)((codePoint - 0x10000)%0x0400 + 0xDC00);
        }
        return low;
    }

    static String hex6(int n) {
        String str = Integer.toHexString(n & 0xFFFFFF).toUpperCase();
        return "000000".substring(Math.min(6, str.length())) + str;
    }

    static String hex6(char[] map){
        StringBuffer buff = new StringBuffer();
        int x=0;
        buff.append(hex6(map[x++]));
        while(x<map.length) {
            buff.append(" " + hex6(map[x++]));
        }
        return buff.toString();
    }

    void setCharSource(int ch) {
        chSource = ch;
    }

    void setLowerCaseMap(char[] map) {
        lowerCaseMap = map;
    }

    void setUpperCaseMap(char[] map) {
        upperCaseMap = map;
    }

    void setTitleCaseMap(char[] map) {
        titleCaseMap = map;
    }

    void setLocale(Locale locale) {
        this.locale = locale;
    }

    void setContext(String[] context) {
        this.context = context;
    }

    public int getCharSource() {
        return chSource;
    }

    public char[] getLowerCaseMap() {
        return lowerCaseMap;
    }

    public char[] getUpperCaseMap() {
        return upperCaseMap;
    }

    public char[] getTitleCaseMap() {
        return titleCaseMap;
    }

    public Locale getLocale() {
        return locale;
    }

    public String[] getContext() {
        return context;
    }


    int chSource;
    Locale locale;
    char[] lowerCaseMap;
    char[] upperCaseMap;
    char[] titleCaseMap;
    String[] context;

    /**
     * Fields that can be found in the SpecialCasing.txt file.
     */
    static int REQUIRED_FIELDS = 5;
    static int FIELD_SOURCE = 0;
    static int FIELD_LOWERCASE = 1;
    static int FIELD_TITLECASE = 2;
    static int FIELD_UPPERCASE = 3;
    static int FIELD_CONDITIONS = 4;

    /**
     * Context values
     */
    static String CONTEXT_FINAL = "FINAL";
    static String CONTEXT_NONFINAL = "NON_FINAL";
    static String CONTEXT_MODERN = "MODERN";
    static String CONTEXT_NONMODERN = "NON_MODERN";

    public int compareTo(SpecialCaseMap otherObject) {
        if (chSource < otherObject.chSource) {
            return -1;
        }
        else if (chSource > otherObject.chSource) {
            return 1;
        }
        else return 0;
    }

    public boolean equals(Object o1) {
        if (this == o1) {
            return true;
        }
        if (o1 == null || !(o1 instanceof SpecialCaseMap)) {
            return false;
        }
        SpecialCaseMap other = (SpecialCaseMap)o1;
        boolean bEqual = false;
        if (0 == compareTo(other)) {
            bEqual = true;
        }
        return bEqual;
    }

    public String toString() {
        StringBuffer buff = new StringBuffer();
        buff.append(hex6(getCharSource()));
        buff.append("|" + hex6(lowerCaseMap));
        buff.append("|" + hex6(upperCaseMap));
        buff.append("|" + hex6(titleCaseMap));
        buff.append("|" + context);
        return buff.toString();
    }

    public int hashCode() {
        return chSource;
    }

    public static void main(String[] args) {
        SpecialCaseMap[] spec = null;
        if (args.length == 2 ) {
            try {
                File file = new File(args[0]);
                int plane = Integer.parseInt(args[1]);
                spec = SpecialCaseMap.readSpecFile(file, plane);
                System.out.println("SpecialCaseMap[" + spec.length + "]:");
                for (int x=0; x<spec.length; x++) {
                    System.out.println(spec[x].toString());
                }
            }
            catch(Exception e) {
                e.printStackTrace();
            }
        }

    }

}