make/jdk/src/classes/build/tools/generatecharacter/CharacterName.java
author pliden
Thu, 26 Sep 2019 13:56:58 +0200
changeset 58355 de246fd65587
parent 47216 71c04702a3d5
permissions -rw-r--r--
8231294: ZGC: vmTestbase/nsk/jvmti/ResourceExhausted/resexhausted002 fails Reviewed-by: shade, dholmes

package build.tools.generatecharacter;

import java.io.*;
import java.nio.*;
import java.util.*;
import java.util.zip.*;

public class CharacterName {

    public static void main(String[] args) {
        FileReader reader = null;
        try {
            if (args.length != 2) {
                System.err.println("Usage: java CharacterName UnicodeData.txt uniName.dat");
                System.exit(1);
            }
            reader = new FileReader(args[0]);
            BufferedReader bfr = new BufferedReader(reader);
            String line = null;

            StringBuilder namePool = new StringBuilder();
            byte[] cpPoolBytes = new byte[0x100000];
            boolean[] cpBlocks = new boolean[(Character.MAX_CODE_POINT + 1) >> 8];
            int bkNum = 0;
            ByteBuffer cpBB = ByteBuffer.wrap(cpPoolBytes);
            int lastCp = 0;
            int cpNum = 0;

            while ((line = bfr.readLine()) != null) {
                if (line.startsWith("#"))
                    continue;
                UnicodeSpec spec = UnicodeSpec.parse(line);
                if (spec != null) {
                    int cp = spec.getCodePoint();
                    String name = spec.getName();
                    if (name.equals("<control>") && spec.getOldName() != null) {
                        if (cp == 0x7)  // <control>BELL -> BEL; u+1f514 <-> BELL
                            name = "BEL";
                        else if (spec.getOldName().length() != 0)
                            name = spec.getOldName();
                        /*
                           3 "figment" characters from NameAliases.txt
                           Several documented labels for C1 control code points which
                           were never actually approved in any standard...but were
                           implemented in Perl regex.
                           0080;PADDING CHARACTER;figment
                           0081;HIGH OCTET PRESET;figment
                           0099;SINGLE GRAPHIC CHARACTER INTRODUCER;figment
                        */
                        else if (cp == 0x80)
                            name = "PADDING CHARACTER";
                        else if (cp == 0x81)
                            name = "HIGH OCTET PRESET";
                        else if (cp == 0x99)
                            name = "SINGLE GRAPHIC CHARACTER INTRODUCER";
                        else
                            continue;
                    } else if (name.startsWith("<")) {
                        /*
                          3400    <CJK Ideograph Extension A, First>
                          4db5    <CJK Ideograph Extension A, Last>
                          4e00    <CJK Ideograph, First>
                          9fc3    <CJK Ideograph, Last>
                          ac00    <Hangul Syllable, First>
                          d7a3    <Hangul Syllable, Last>
                          d800    <Non Private Use High Surrogate, First>
                          db7f    <Non Private Use High Surrogate, Last>
                          db80    <Private Use High Surrogate, First>
                          dbff    <Private Use High Surrogate, Last>
                          dc00    <Low Surrogate, First>
                          dfff    <Low Surrogate, Last>
                          e000    <Private Use, First>
                          f8ff    <Private Use, Last>
                         20000    <CJK Ideograph Extension B, First>
                         2a6d6    <CJK Ideograph Extension B, Last>
                         f0000    <Plane 15 Private Use, First>
                         ffffd    <Plane 15 Private Use, Last>
                        */
                        continue;
                    }
                    cpNum++;
                    if (!cpBlocks[cp >> 8]) {
                        cpBlocks[cp >> 8] = true;
                        bkNum++;
                    }
                    if (cp == lastCp + 1) {
                        cpBB.put((byte)name.length());
                    } else {
                        cpBB.put((byte)0);  // segment start flag
                        cpBB.putInt((name.length() << 24) | (cp & 0xffffff));
                    }
                    namePool.append(name);
                    lastCp = cp;
                }
            }

            byte[] namePoolBytes = namePool.toString().getBytes("ASCII");
            int cpLen = cpBB.position();
            int total = cpLen + namePoolBytes.length;
            DataOutputStream dos = new DataOutputStream(
                                       new DeflaterOutputStream(
                                           new FileOutputStream(args[1])));
            dos.writeInt(total);  // total
            dos.writeInt(bkNum);  // bkNum;
            dos.writeInt(cpNum);  // cpNum
            dos.writeInt(cpLen);  // nameOff
            dos.write(cpPoolBytes, 0, cpLen);
            dos.write(namePoolBytes);
            dos.close();

        } catch (Throwable e) {
            System.out.println("Unexpected exception:");
            e.printStackTrace();
        } finally {
            if (reader != null) {
                try {
                    reader.close();
                } catch (Throwable ee) { ee.printStackTrace(); }
            }
        }
    }
}