jaxws/src/share/jaxws_classes/com/sun/codemodel/internal/util/Surrogate.java
changeset 23782 953bfc3fbe31
parent 23403 85dbdc227c5e
child 23783 57704b80bf4d
equal deleted inserted replaced
23403:85dbdc227c5e 23782:953bfc3fbe31
     1 /*
       
     2  * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Oracle designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Oracle in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    22  * or visit www.oracle.com if you need additional information or have any
       
    23  * questions.
       
    24  */
       
    25 
       
    26 package com.sun.codemodel.internal.util;
       
    27 
       
    28 import java.nio.CharBuffer;
       
    29 import java.nio.charset.CoderResult;
       
    30 
       
    31 
       
    32 /**
       
    33  * Utility class for dealing with surrogates.
       
    34  *
       
    35  * @author Mark Reinhold
       
    36  * @version 1.11, 03/01/23
       
    37  */
       
    38 
       
    39 class Surrogate {
       
    40 
       
    41     private Surrogate() { }
       
    42 
       
    43     // UTF-16 surrogate-character ranges
       
    44     //
       
    45     public static final char MIN_HIGH = '\uD800';
       
    46     public static final char MAX_HIGH = '\uDBFF';
       
    47     public static final char MIN_LOW  = '\uDC00';
       
    48     public static final char MAX_LOW  = '\uDFFF';
       
    49     public static final char MIN = MIN_HIGH;
       
    50     public static final char MAX = MAX_LOW;
       
    51 
       
    52     // Range of UCS-4 values that need surrogates in UTF-16
       
    53     //
       
    54     public static final int UCS4_MIN = 0x10000;
       
    55     public static final int UCS4_MAX = (1 << 20) + UCS4_MIN - 1;
       
    56 
       
    57     /**
       
    58      * Tells whether or not the given UTF-16 value is a high surrogate.
       
    59      */
       
    60     public static boolean isHigh(int c) {
       
    61         return (MIN_HIGH <= c) && (c <= MAX_HIGH);
       
    62     }
       
    63 
       
    64     /**
       
    65      * Tells whether or not the given UTF-16 value is a low surrogate.
       
    66      */
       
    67     public static boolean isLow(int c) {
       
    68         return (MIN_LOW <= c) && (c <= MAX_LOW);
       
    69     }
       
    70 
       
    71     /**
       
    72      * Tells whether or not the given UTF-16 value is a surrogate character,
       
    73      */
       
    74     public static boolean is(int c) {
       
    75         return (MIN <= c) && (c <= MAX);
       
    76     }
       
    77 
       
    78     /**
       
    79      * Tells whether or not the given UCS-4 character must be represented as a
       
    80      * surrogate pair in UTF-16.
       
    81      */
       
    82     public static boolean neededFor(int uc) {
       
    83         return (uc >= UCS4_MIN) && (uc <= UCS4_MAX);
       
    84     }
       
    85 
       
    86     /**
       
    87      * Returns the high UTF-16 surrogate for the given UCS-4 character.
       
    88      */
       
    89     public static char high(int uc) {
       
    90         return (char)(0xd800 | (((uc - UCS4_MIN) >> 10) & 0x3ff));
       
    91     }
       
    92 
       
    93     /**
       
    94      * Returns the low UTF-16 surrogate for the given UCS-4 character.
       
    95      */
       
    96     public static char low(int uc) {
       
    97         return (char)(0xdc00 | ((uc - UCS4_MIN) & 0x3ff));
       
    98     }
       
    99 
       
   100     /**
       
   101      * Converts the given surrogate pair into a 32-bit UCS-4 character.
       
   102      */
       
   103     public static int toUCS4(char c, char d) {
       
   104         return (((c & 0x3ff) << 10) | (d & 0x3ff)) + 0x10000;
       
   105     }
       
   106 
       
   107     /**
       
   108      * Surrogate parsing support.  Charset implementations may use instances of
       
   109      * this class to handle the details of parsing UTF-16 surrogate pairs.
       
   110      */
       
   111     public static class Parser {
       
   112 
       
   113         public Parser() { }
       
   114 
       
   115         private int character;          // UCS-4
       
   116         private CoderResult error = CoderResult.UNDERFLOW;
       
   117         private boolean isPair;
       
   118 
       
   119         /**
       
   120          * Returns the UCS-4 character previously parsed.
       
   121          */
       
   122         public int character() {
       
   123             return character;
       
   124         }
       
   125 
       
   126         /**
       
   127          * Tells whether or not the previously-parsed UCS-4 character was
       
   128          * originally represented by a surrogate pair.
       
   129          */
       
   130         public boolean isPair() {
       
   131             return isPair;
       
   132         }
       
   133 
       
   134         /**
       
   135          * Returns the number of UTF-16 characters consumed by the previous
       
   136          * parse.
       
   137          */
       
   138         public int increment() {
       
   139             return isPair ? 2 : 1;
       
   140         }
       
   141 
       
   142         /**
       
   143          * If the previous parse operation detected an error, return the object
       
   144          * describing that error.
       
   145          */
       
   146         public CoderResult error() {
       
   147             return error;
       
   148         }
       
   149 
       
   150         /**
       
   151          * Returns an unmappable-input result object, with the appropriate
       
   152          * input length, for the previously-parsed character.
       
   153          */
       
   154         public CoderResult unmappableResult() {
       
   155             return CoderResult.unmappableForLength(isPair ? 2 : 1);
       
   156         }
       
   157 
       
   158         /**
       
   159          * Parses a UCS-4 character from the given source buffer, handling
       
   160          * surrogates.
       
   161          *
       
   162          * @param  c    The first character
       
   163          * @param  in   The source buffer, from which one more character
       
   164          *              will be consumed if c is a high surrogate
       
   165          *
       
   166          * @return   Either a parsed UCS-4 character, in which case the isPair()
       
   167          *           and increment() methods will return meaningful values, or
       
   168          *           -1, in which case error() will return a descriptive result
       
   169          *           object
       
   170          */
       
   171         public int parse(char c, CharBuffer in) {
       
   172             if (isHigh(c)) {
       
   173                 if (!in.hasRemaining()) {
       
   174                     error = CoderResult.UNDERFLOW;
       
   175                     return -1;
       
   176                 }
       
   177                 char d = in.get();
       
   178                 if (isLow(d)) {
       
   179                     character = toUCS4(c, d);
       
   180                     isPair = true;
       
   181                     error = null;
       
   182                     return character;
       
   183                 }
       
   184                 error = CoderResult.malformedForLength(1);
       
   185                 return -1;
       
   186             }
       
   187             if (isLow(c)) {
       
   188                 error = CoderResult.malformedForLength(1);
       
   189                 return -1;
       
   190             }
       
   191             character = c;
       
   192             isPair = false;
       
   193             error = null;
       
   194             return character;
       
   195         }
       
   196 
       
   197         /**
       
   198          * Parses a UCS-4 character from the given source buffer, handling
       
   199          * surrogates.
       
   200          *
       
   201          * @param  c    The first character
       
   202          * @param  ia   The input array, from which one more character
       
   203          *              will be consumed if c is a high surrogate
       
   204          * @param  ip   The input index
       
   205          * @param  il   The input limit
       
   206          *
       
   207          * @return   Either a parsed UCS-4 character, in which case the isPair()
       
   208          *           and increment() methods will return meaningful values, or
       
   209          *           -1, in which case error() will return a descriptive result
       
   210          *           object
       
   211          */
       
   212         public int parse(char c, char[] ia, int ip, int il) {
       
   213             if (isHigh(c)) {
       
   214                 if (il - ip < 2) {
       
   215                     error = CoderResult.UNDERFLOW;
       
   216                     return -1;
       
   217                 }
       
   218                 char d = ia[ip + 1];
       
   219                 if (isLow(d)) {
       
   220                     character = toUCS4(c, d);
       
   221                     isPair = true;
       
   222                     error = null;
       
   223                     return character;
       
   224                 }
       
   225                 error = CoderResult.malformedForLength(1);
       
   226                 return -1;
       
   227             }
       
   228             if (isLow(c)) {
       
   229                 error = CoderResult.malformedForLength(1);
       
   230                 return -1;
       
   231             }
       
   232             character = c;
       
   233             isPair = false;
       
   234             error = null;
       
   235             return character;
       
   236         }
       
   237 
       
   238     }
       
   239 
       
   240     /**
       
   241      * Surrogate generation support.  Charset implementations may use instances
       
   242      * of this class to handle the details of generating UTF-16 surrogate
       
   243      * pairs.
       
   244      */
       
   245     public static class Generator {
       
   246 
       
   247         public Generator() { }
       
   248 
       
   249         private CoderResult error = CoderResult.OVERFLOW;
       
   250 
       
   251         /**
       
   252          * If the previous generation operation detected an error, return the
       
   253          * object describing that error.
       
   254          */
       
   255         public CoderResult error() {
       
   256             return error;
       
   257         }
       
   258 
       
   259         /**
       
   260          * Generates one or two UTF-16 characters to represent the given UCS-4
       
   261          * character.
       
   262          *
       
   263          * @param  uc   The UCS-4 character
       
   264          * @param  len  The number of input bytes from which the UCS-4 value
       
   265          *              was constructed (used when creating result objects)
       
   266          * @param  dst  The destination buffer, to which one or two UTF-16
       
   267          *              characters will be written
       
   268          *
       
   269          * @return   Either a positive count of the number of UTF-16 characters
       
   270          *           written to the destination buffer, or -1, in which case
       
   271          *           error() will return a descriptive result object
       
   272          */
       
   273         public int generate(int uc, int len, CharBuffer dst) {
       
   274             if (uc <= 0xffff) {
       
   275                 if (is(uc)) {
       
   276                     error = CoderResult.malformedForLength(len);
       
   277                     return -1;
       
   278                 }
       
   279                 if (dst.remaining() < 1) {
       
   280                     error = CoderResult.OVERFLOW;
       
   281                     return -1;
       
   282                 }
       
   283                 dst.put((char)uc);
       
   284                 error = null;
       
   285                 return 1;
       
   286             }
       
   287             if (uc < UCS4_MIN) {
       
   288                 error = CoderResult.malformedForLength(len);
       
   289                 return -1;
       
   290             }
       
   291             if (uc <= UCS4_MAX) {
       
   292                 if (dst.remaining() < 2) {
       
   293                     error = CoderResult.OVERFLOW;
       
   294                     return -1;
       
   295                 }
       
   296                 dst.put(high(uc));
       
   297                 dst.put(low(uc));
       
   298                 error = null;
       
   299                 return 2;
       
   300             }
       
   301             error = CoderResult.unmappableForLength(len);
       
   302             return -1;
       
   303         }
       
   304 
       
   305         /**
       
   306          * Generates one or two UTF-16 characters to represent the given UCS-4
       
   307          * character.
       
   308          *
       
   309          * @param  uc   The UCS-4 character
       
   310          * @param  len  The number of input bytes from which the UCS-4 value
       
   311          *              was constructed (used when creating result objects)
       
   312          * @param  da   The destination array, to which one or two UTF-16
       
   313          *              characters will be written
       
   314          * @param  dp   The destination position
       
   315          * @param  dl   The destination limit
       
   316          *
       
   317          * @return   Either a positive count of the number of UTF-16 characters
       
   318          *           written to the destination buffer, or -1, in which case
       
   319          *           error() will return a descriptive result object
       
   320          */
       
   321         public int generate(int uc, int len, char[] da, int dp, int dl) {
       
   322             if (uc <= 0xffff) {
       
   323                 if (is(uc)) {
       
   324                     error = CoderResult.malformedForLength(len);
       
   325                     return -1;
       
   326                 }
       
   327                 if (dl - dp < 1) {
       
   328                     error = CoderResult.OVERFLOW;
       
   329                     return -1;
       
   330                 }
       
   331                 da[dp] = (char)uc;
       
   332                 error = null;
       
   333                 return 1;
       
   334             }
       
   335             if (uc < UCS4_MIN) {
       
   336                 error = CoderResult.malformedForLength(len);
       
   337                 return -1;
       
   338             }
       
   339             if (uc <= UCS4_MAX) {
       
   340                 if (dl - dp < 2) {
       
   341                     error = CoderResult.OVERFLOW;
       
   342                     return -1;
       
   343                 }
       
   344                 da[dp] = high(uc);
       
   345                 da[dp + 1] = low(uc);
       
   346                 error = null;
       
   347                 return 2;
       
   348             }
       
   349             error = CoderResult.unmappableForLength(len);
       
   350             return -1;
       
   351         }
       
   352 
       
   353     }
       
   354 
       
   355 }