langtools/test/tools/javac/4846262/Native2Ascii.java
changeset 30844 a33cd63c54ef
equal deleted inserted replaced
30843:6e378856c5a2 30844:a33cd63c54ef
       
     1 /*
       
     2  * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.
       
     8  *
       
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    12  * version 2 for more details (a copy is included in the LICENSE file that
       
    13  * accompanied this code).
       
    14  *
       
    15  * You should have received a copy of the GNU General Public License version
       
    16  * 2 along with this work; if not, write to the Free Software Foundation,
       
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    18  *
       
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    20  * or visit www.oracle.com if you need additional information or have any
       
    21  * questions.
       
    22  */
       
    23 
       
    24 import java.io.BufferedReader;
       
    25 import java.io.BufferedWriter;
       
    26 import java.io.FilterReader;
       
    27 import java.io.FilterWriter;
       
    28 import java.io.IOException;
       
    29 import java.io.Reader;
       
    30 import java.io.Writer;
       
    31 import java.nio.charset.Charset;
       
    32 import java.nio.charset.CharsetEncoder;
       
    33 import java.nio.file.Files;
       
    34 import java.nio.file.Path;
       
    35 import static java.nio.charset.StandardCharsets.*;
       
    36 
       
    37 /**
       
    38  * Simple utility to convert from native encoding file to ascii or reverse
       
    39  * including \udddd Unicode notation.
       
    40  */
       
    41 public class Native2Ascii {
       
    42     final Charset cs;
       
    43     final CharsetEncoder encoder;
       
    44     public Native2Ascii(Charset cs) {
       
    45         this.cs = cs;
       
    46         this.encoder = cs.newEncoder();
       
    47     }
       
    48 
       
    49     /**
       
    50      * ASCII to Native conversion
       
    51      */
       
    52     public void asciiToNative(Path infile, Path outfile) throws IOException {
       
    53         try (BufferedReader in = Files.newBufferedReader(infile, US_ASCII);
       
    54              BufferedReader reader = new BufferedReader(new A2NFilter(in));
       
    55              BufferedWriter writer = Files.newBufferedWriter(outfile, cs)) {
       
    56             String line;
       
    57             while ((line = reader.readLine()) != null) {
       
    58                 writer.write(line.toCharArray());
       
    59                 writer.newLine();
       
    60             }
       
    61         }
       
    62     }
       
    63 
       
    64     /**
       
    65      * Native to ASCII conversion
       
    66      */
       
    67     public void nativeToAscii(Path infile, Path outfile) throws IOException {
       
    68         try (BufferedReader reader = Files.newBufferedReader(infile, cs);
       
    69              BufferedWriter out = Files.newBufferedWriter(outfile, US_ASCII);
       
    70              BufferedWriter writer = new BufferedWriter(new N2AFilter(out))) {
       
    71             String line;
       
    72             while ((line = reader.readLine()) != null) {
       
    73                 writer.write(line.toCharArray());
       
    74                 writer.newLine();
       
    75             }
       
    76         }
       
    77     }
       
    78 
       
    79     // A copy of native2ascii N2AFilter
       
    80     class N2AFilter extends FilterWriter {
       
    81         public N2AFilter(Writer out) { super(out); }
       
    82         public void write(char b) throws IOException {
       
    83             char[] buf = new char[1];
       
    84             buf[0] = b;
       
    85             write(buf, 0, 1);
       
    86         }
       
    87 
       
    88         public void write(char[] buf, int off, int len) throws IOException {
       
    89             for (int i = 0; i < len; i++) {
       
    90                 if ((buf[i] > '\u007f')) {
       
    91                     // write \udddd
       
    92                     out.write('\\');
       
    93                     out.write('u');
       
    94                     String hex = Integer.toHexString(buf[i]);
       
    95                     StringBuilder hex4 = new StringBuilder(hex);
       
    96                     hex4.reverse();
       
    97                     int length = 4 - hex4.length();
       
    98                     for (int j = 0; j < length; j++) {
       
    99                         hex4.append('0');
       
   100                     }
       
   101                     for (int j = 0; j < 4; j++) {
       
   102                         out.write(hex4.charAt(3 - j));
       
   103                     }
       
   104                 } else
       
   105                     out.write(buf[i]);
       
   106             }
       
   107         }
       
   108     }
       
   109 
       
   110     // A copy of native2ascii A2NFilter
       
   111     class A2NFilter extends FilterReader {
       
   112         // maintain a trailing buffer to hold any incompleted
       
   113         // unicode escaped sequences
       
   114         private char[] trailChars = null;
       
   115 
       
   116         public A2NFilter(Reader in) {
       
   117             super(in);
       
   118         }
       
   119 
       
   120         public int read(char[] buf, int off, int len) throws IOException {
       
   121             int numChars = 0;        // how many characters have been read
       
   122             int retChars = 0;        // how many characters we'll return
       
   123 
       
   124             char[] cBuf = new char[len];
       
   125             int cOffset = 0;         // offset at which we'll start reading
       
   126             boolean eof = false;
       
   127 
       
   128             // copy trailing chars from previous invocation to input buffer
       
   129             if (trailChars != null) {
       
   130                 for (int i = 0; i < trailChars.length; i++)
       
   131                     cBuf[i] = trailChars[i];
       
   132                 numChars = trailChars.length;
       
   133                 trailChars = null;
       
   134             }
       
   135 
       
   136             int n = in.read(cBuf, numChars, len - numChars);
       
   137             if (n < 0) {
       
   138                 eof = true;
       
   139                 if (numChars == 0)
       
   140                     return -1;              // EOF;
       
   141             } else {
       
   142                 numChars += n;
       
   143             }
       
   144 
       
   145             for (int i = 0; i < numChars; ) {
       
   146                 char c = cBuf[i++];
       
   147 
       
   148                 if (c != '\\' || (eof && numChars <= 5)) {
       
   149                     // Not a backslash, so copy and continue
       
   150                     // Always pass non backslash chars straight thru
       
   151                     // for regular encoding. If backslash occurs in
       
   152                     // input stream at the final 5 chars then don't
       
   153                     // attempt to read-ahead and de-escape since these
       
   154                     // are literal occurrences of U+005C which need to
       
   155                     // be encoded verbatim in the target encoding.
       
   156                     buf[retChars++] = c;
       
   157                     continue;
       
   158                 }
       
   159 
       
   160                 int remaining = numChars - i;
       
   161                 if (remaining < 5) {
       
   162                     // Might be the first character of a unicode escape, but we
       
   163                     // don't have enough characters to tell, so save it and finish
       
   164                     trailChars = new char[1 + remaining];
       
   165                     trailChars[0] = c;
       
   166                     for (int j = 0; j < remaining; j++)
       
   167                         trailChars[1 + j] = cBuf[i + j];
       
   168                     break;
       
   169                 }
       
   170                 // At this point we have at least five characters remaining
       
   171 
       
   172                 c = cBuf[i++];
       
   173                 if (c != 'u') {
       
   174                     // Not a unicode escape, so copy and continue
       
   175                     buf[retChars++] = '\\';
       
   176                     buf[retChars++] = c;
       
   177                     continue;
       
   178                 }
       
   179 
       
   180                 // The next four characters are the hex part of a unicode escape
       
   181                 char rc = 0;
       
   182                 boolean isUE = true;
       
   183                 try {
       
   184                     rc = (char) Integer.parseInt(new String(cBuf, i, 4), 16);
       
   185                 } catch (NumberFormatException x) {
       
   186                     isUE = false;
       
   187                 }
       
   188                 if (isUE && encoder.canEncode(rc)) {
       
   189                     // We'll be able to convert this
       
   190                     buf[retChars++] = rc;
       
   191                     i += 4; // Align beyond the current uXXXX sequence
       
   192                 } else {
       
   193                     // We won't, so just retain the original sequence
       
   194                     buf[retChars++] = '\\';
       
   195                     buf[retChars++] = 'u';
       
   196                     continue;
       
   197                 }
       
   198 
       
   199             }
       
   200 
       
   201             return retChars;
       
   202         }
       
   203 
       
   204         public int read() throws IOException {
       
   205             char[] buf = new char[1];
       
   206 
       
   207             if (read(buf, 0, 1) == -1)
       
   208                 return -1;
       
   209             else
       
   210                 return (int) buf[0];
       
   211         }
       
   212     }
       
   213 }