jdk-sandbox: langtools/test/tools/javac/4846262/Native2Ascii.java@f34e7e8b4eac


/*
 * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FilterReader;
import java.io.FilterWriter;
import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.file.Files;
import java.nio.file.Path;
import static java.nio.charset.StandardCharsets.*;

/**
 * Simple utility to convert from native encoding file to ascii or reverse
 * including \udddd Unicode notation.
 */
public class Native2Ascii {
    final Charset cs;
    final CharsetEncoder encoder;
    public Native2Ascii(Charset cs) {
        this.cs = cs;
        this.encoder = cs.newEncoder();
    }

    /**
     * ASCII to Native conversion
     */
    public void asciiToNative(Path infile, Path outfile) throws IOException {
        try (BufferedReader in = Files.newBufferedReader(infile, US_ASCII);
             BufferedReader reader = new BufferedReader(new A2NFilter(in));
             BufferedWriter writer = Files.newBufferedWriter(outfile, cs)) {
            String line;
            while ((line = reader.readLine()) != null) {
                writer.write(line.toCharArray());
                writer.newLine();
            }
        }
    }

    /**
     * Native to ASCII conversion
     */
    public void nativeToAscii(Path infile, Path outfile) throws IOException {
        try (BufferedReader reader = Files.newBufferedReader(infile, cs);
             BufferedWriter out = Files.newBufferedWriter(outfile, US_ASCII);
             BufferedWriter writer = new BufferedWriter(new N2AFilter(out))) {
            String line;
            while ((line = reader.readLine()) != null) {
                writer.write(line.toCharArray());
                writer.newLine();
            }
        }
    }

    // A copy of native2ascii N2AFilter
    class N2AFilter extends FilterWriter {
        public N2AFilter(Writer out) { super(out); }
        public void write(char b) throws IOException {
            char[] buf = new char[1];
            buf[0] = b;
            write(buf, 0, 1);
        }

        public void write(char[] buf, int off, int len) throws IOException {
            for (int i = 0; i < len; i++) {
                if ((buf[i] > '\u007f')) {
                    // write \udddd
                    out.write('\\');
                    out.write('u');
                    String hex = Integer.toHexString(buf[i]);
                    StringBuilder hex4 = new StringBuilder(hex);
                    hex4.reverse();
                    int length = 4 - hex4.length();
                    for (int j = 0; j < length; j++) {
                        hex4.append('0');
                    }
                    for (int j = 0; j < 4; j++) {
                        out.write(hex4.charAt(3 - j));
                    }
                } else
                    out.write(buf[i]);
            }
        }
    }

    // A copy of native2ascii A2NFilter
    class A2NFilter extends FilterReader {
        // maintain a trailing buffer to hold any incompleted
        // unicode escaped sequences
        private char[] trailChars = null;

        public A2NFilter(Reader in) {
            super(in);
        }

        public int read(char[] buf, int off, int len) throws IOException {
            int numChars = 0;        // how many characters have been read
            int retChars = 0;        // how many characters we'll return

            char[] cBuf = new char[len];
            int cOffset = 0;         // offset at which we'll start reading
            boolean eof = false;

            // copy trailing chars from previous invocation to input buffer
            if (trailChars != null) {
                for (int i = 0; i < trailChars.length; i++)
                    cBuf[i] = trailChars[i];
                numChars = trailChars.length;
                trailChars = null;
            }

            int n = in.read(cBuf, numChars, len - numChars);
            if (n < 0) {
                eof = true;
                if (numChars == 0)
                    return -1;              // EOF;
            } else {
                numChars += n;
            }

            for (int i = 0; i < numChars; ) {
                char c = cBuf[i++];

                if (c != '\\' || (eof && numChars <= 5)) {
                    // Not a backslash, so copy and continue
                    // Always pass non backslash chars straight thru
                    // for regular encoding. If backslash occurs in
                    // input stream at the final 5 chars then don't
                    // attempt to read-ahead and de-escape since these
                    // are literal occurrences of U+005C which need to
                    // be encoded verbatim in the target encoding.
                    buf[retChars++] = c;
                    continue;
                }

                int remaining = numChars - i;
                if (remaining < 5) {
                    // Might be the first character of a unicode escape, but we
                    // don't have enough characters to tell, so save it and finish
                    trailChars = new char[1 + remaining];
                    trailChars[0] = c;
                    for (int j = 0; j < remaining; j++)
                        trailChars[1 + j] = cBuf[i + j];
                    break;
                }
                // At this point we have at least five characters remaining

                c = cBuf[i++];
                if (c != 'u') {
                    // Not a unicode escape, so copy and continue
                    buf[retChars++] = '\\';
                    buf[retChars++] = c;
                    continue;
                }

                // The next four characters are the hex part of a unicode escape
                char rc = 0;
                boolean isUE = true;
                try {
                    rc = (char) Integer.parseInt(new String(cBuf, i, 4), 16);
                } catch (NumberFormatException x) {
                    isUE = false;
                }
                if (isUE && encoder.canEncode(rc)) {
                    // We'll be able to convert this
                    buf[retChars++] = rc;
                    i += 4; // Align beyond the current uXXXX sequence
                } else {
                    // We won't, so just retain the original sequence
                    buf[retChars++] = '\\';
                    buf[retChars++] = 'u';
                    continue;
                }

            }

            return retChars;
        }

        public int read() throws IOException {
            char[] buf = new char[1];

            if (read(buf, 0, 1) == -1)
                return -1;
            else
                return (int) buf[0];
        }
    }
}
author	shade
	Tue, 02 Feb 2016 18:15:15 +0300
changeset 35639	f34e7e8b4eac
parent 30844	a33cd63c54ef
permissions	-rw-r--r--