jdk/src/jdk.dev/share/classes/sun/tools/native2ascii/A2NFilter.java
changeset 30789 9eca83469588
parent 30788 ea573d35531a
child 30790 f81f9725a1c6
equal deleted inserted replaced
30788:ea573d35531a 30789:9eca83469588
     1 /*
       
     2  * Copyright (c) 2001, 2005, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Oracle designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Oracle in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    22  * or visit www.oracle.com if you need additional information or have any
       
    23  * questions.
       
    24  */
       
    25 
       
    26 /**
       
    27  * This FilterReader class processes a sequence of characters from
       
    28  * a source stream containing a mixture of 7-bit ASCII data and
       
    29  * 'back-tick U' escaped sequences representing characters which have
       
    30  * the possibility of being encoded in a user specified encoding
       
    31  * The filter relies on knowing the target encoding and makes a
       
    32  * determination as to whether a given supplied character in its
       
    33  * source character stream is encodeable in the target encoding.
       
    34  * If not, it is remains in its back-tick U escaped form.
       
    35  */
       
    36 
       
    37 package sun.tools.native2ascii;
       
    38 import java.io.*;
       
    39 
       
    40 
       
    41 class A2NFilter extends FilterReader {
       
    42 
       
    43     // maintain a trailing buffer to hold any incompleted
       
    44     // unicode escaped sequences
       
    45     private char[] trailChars = null;
       
    46 
       
    47     public A2NFilter(Reader in) {
       
    48         super(in);
       
    49     }
       
    50 
       
    51     public int read(char[] buf, int off, int len) throws IOException {
       
    52         int numChars = 0;        // how many characters have been read
       
    53         int retChars = 0;        // how many characters we'll return
       
    54 
       
    55         char[] cBuf = new char[len];
       
    56         int cOffset = 0;         // offset at which we'll start reading
       
    57         boolean eof = false;
       
    58 
       
    59         // copy trailing chars from previous invocation to input buffer
       
    60         if (trailChars != null) {
       
    61             for (int i = 0; i < trailChars.length; i++)
       
    62                 cBuf[i] = trailChars[i];
       
    63             numChars = trailChars.length;
       
    64             trailChars = null;
       
    65         }
       
    66 
       
    67         int n = in.read(cBuf, numChars, len - numChars);
       
    68         if (n < 0) {
       
    69             eof = true;
       
    70             if (numChars == 0)
       
    71                 return -1;              // EOF;
       
    72         } else {
       
    73             numChars += n;
       
    74         }
       
    75 
       
    76         for (int i = 0; i < numChars;) {
       
    77             char c = cBuf[i++];
       
    78 
       
    79             if (c != '\\' || (eof && numChars <= 5)) {
       
    80                 // Not a backslash, so copy and continue
       
    81                 // Always pass non backslash chars straight thru
       
    82                 // for regular encoding. If backslash occurs in
       
    83                 // input stream at the final 5 chars then don't
       
    84                 // attempt to read-ahead and de-escape since these
       
    85                 // are literal occurrences of U+005C which need to
       
    86                 // be encoded verbatim in the target encoding.
       
    87                 buf[retChars++] = c;
       
    88                 continue;
       
    89             }
       
    90 
       
    91             int remaining = numChars - i;
       
    92             if (remaining < 5) {
       
    93                 // Might be the first character of a unicode escape, but we
       
    94                 // don't have enough characters to tell, so save it and finish
       
    95                 trailChars = new char[1 + remaining];
       
    96                 trailChars[0] = c;
       
    97                 for (int j = 0; j < remaining; j++)
       
    98                     trailChars[1 + j] = cBuf[i + j];
       
    99                 break;
       
   100             }
       
   101             // At this point we have at least five characters remaining
       
   102 
       
   103             c = cBuf[i++];
       
   104             if (c != 'u') {
       
   105                 // Not a unicode escape, so copy and continue
       
   106                 buf[retChars++] = '\\';
       
   107                 buf[retChars++] = c;
       
   108                 continue;
       
   109             }
       
   110 
       
   111             // The next four characters are the hex part of a unicode escape
       
   112             char rc = 0;
       
   113             boolean isUE = true;
       
   114             try {
       
   115                 rc = (char)Integer.parseInt(new String(cBuf, i, 4), 16);
       
   116             } catch (NumberFormatException x) {
       
   117                 isUE = false;
       
   118             }
       
   119             if (isUE && Main.canConvert(rc)) {
       
   120                 // We'll be able to convert this
       
   121                 buf[retChars++] = rc;
       
   122                 i += 4; // Align beyond the current uXXXX sequence
       
   123             } else {
       
   124                 // We won't, so just retain the original sequence
       
   125                 buf[retChars++] = '\\';
       
   126                 buf[retChars++] = 'u';
       
   127                 continue;
       
   128             }
       
   129 
       
   130         }
       
   131 
       
   132         return retChars;
       
   133     }
       
   134 
       
   135     public int read() throws IOException {
       
   136         char[] buf = new char[1];
       
   137 
       
   138         if (read(buf, 0, 1) == -1)
       
   139             return -1;
       
   140         else
       
   141             return (int)buf[0];
       
   142     }
       
   143 
       
   144 }