src/java.xml.crypto/share/classes/com/sun/org/apache/xml/internal/security/c14n/implementations/UtfHelpper.java
changeset 50614 3810c9a2efa1
parent 47216 71c04702a3d5
equal deleted inserted replaced
50613:0f93a75b9213 50614:3810c9a2efa1
    22  */
    22  */
    23 package com.sun.org.apache.xml.internal.security.c14n.implementations;
    23 package com.sun.org.apache.xml.internal.security.c14n.implementations;
    24 
    24 
    25 import java.io.IOException;
    25 import java.io.IOException;
    26 import java.io.OutputStream;
    26 import java.io.OutputStream;
       
    27 import java.security.AccessController;
       
    28 import java.security.PrivilegedAction;
    27 import java.util.Map;
    29 import java.util.Map;
    28 
    30 
    29 public class UtfHelpper {
    31 public final class UtfHelpper {
    30 
    32 
    31     static final void writeByte(
    33     /**
       
    34      * Revert to the old behavior (version 2 or before), i.e. surrogate pairs characters becomes
       
    35      * '??' in output. Set system property com.sun.org.apache.xml.internal.security.c14n.oldUtf8=true if you want
       
    36      * to verify signatures generated by version 2 or before that contains 32 bit chars in the
       
    37      * XML document.
       
    38      */
       
    39     private static final boolean OLD_UTF8 =
       
    40         AccessController.doPrivileged((PrivilegedAction<Boolean>)
       
    41             () -> Boolean.getBoolean("com.sun.org.apache.xml.internal.security.c14n.oldUtf8"));
       
    42 
       
    43     private UtfHelpper() {
       
    44         // complete
       
    45     }
       
    46 
       
    47     public static void writeByte(
    32         final String str,
    48         final String str,
    33         final OutputStream out,
    49         final OutputStream out,
    34         Map<String, byte[]> cache
    50         Map<String, byte[]> cache
    35     ) throws IOException {
    51     ) throws IOException {
    36         byte[] result = cache.get(str);
    52         byte[] result = cache.get(str);
    40         }
    56         }
    41 
    57 
    42         out.write(result);
    58         out.write(result);
    43     }
    59     }
    44 
    60 
    45     static final void writeCharToUtf8(final char c, final OutputStream out) throws IOException {
    61     public static void writeCodePointToUtf8(final int c, final OutputStream out) throws IOException {
       
    62         if (!Character.isValidCodePoint(c) || c >= 0xD800 && c <= 0xDBFF || c >= 0xDC00 && c <= 0xDFFF) {
       
    63             // valid code point: c >= 0x0000 && c <= 0x10FFFF
       
    64             out.write(0x3f);
       
    65             return;
       
    66         }
       
    67         if (OLD_UTF8 && c >= Character.MIN_SUPPLEMENTARY_CODE_POINT) {
       
    68             // version 2 or before output 2 question mark characters for 32 bit chars
       
    69             out.write(0x3f);
       
    70             out.write(0x3f);
       
    71             return;
       
    72         }
       
    73 
       
    74         if (c < 0x80) {
       
    75             // 0x00000000 - 0x0000007F
       
    76             // 0xxxxxxx
       
    77             out.write(c);
       
    78             return;
       
    79         }
       
    80         byte extraByte = 0;
       
    81         if (c < 0x800) {
       
    82             // 0x00000080 - 0x000007FF
       
    83             // 110xxxxx 10xxxxxx
       
    84             extraByte = 1;
       
    85         } else if (c < 0x10000) {
       
    86             // 0x00000800 - 0x0000FFFF
       
    87             // 1110xxxx 10xxxxxx 10xxxxxx
       
    88             extraByte = 2;
       
    89         } else if (c < 0x200000) {
       
    90             // 0x00010000 - 0x001FFFFF
       
    91             // 11110xxx 10xxxxx 10xxxxxx 10xxxxxx
       
    92             extraByte = 3;
       
    93         } else if (c < 0x4000000) {
       
    94             // 0x00200000 - 0x03FFFFFF
       
    95             // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
       
    96             // already outside valid Character range, just for completeness
       
    97             extraByte = 4;
       
    98         } else if (c <= 0x7FFFFFFF) {
       
    99             // 0x04000000 - 0x7FFFFFFF
       
   100             // 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
       
   101             // already outside valid Character range, just for completeness
       
   102             extraByte = 5;
       
   103         } else {
       
   104             // 0x80000000 - 0xFFFFFFFF
       
   105             // case not possible as java has no unsigned int
       
   106             out.write(0x3f);
       
   107             return;
       
   108         }
       
   109 
       
   110         byte write;
       
   111         int shift = 6 * extraByte;
       
   112         write = (byte)((0xFE << (6 - extraByte)) | (c >>> shift));
       
   113         out.write(write);
       
   114         for (int i = extraByte - 1; i >= 0; i--) {
       
   115             shift -= 6;
       
   116             write = (byte)(0x80 | ((c >>> shift) & 0x3F));
       
   117             out.write(write);
       
   118         }
       
   119     }
       
   120 
       
   121     @Deprecated
       
   122     public static void writeCharToUtf8(final char c, final OutputStream out) throws IOException {
    46         if (c < 0x80) {
   123         if (c < 0x80) {
    47             out.write(c);
   124             out.write(c);
    48             return;
   125             return;
    49         }
   126         }
    50         if ((c >= 0xD800 && c <= 0xDBFF) || (c >= 0xDC00 && c <= 0xDFFF)) {
   127         if (c >= 0xD800 && c <= 0xDBFF || c >= 0xDC00 && c <= 0xDFFF) {
    51             //No Surrogates in sun java
   128             //No Surrogates in sun java
    52             out.write(0x3f);
   129             out.write(0x3f);
    53             return;
   130             return;
    54         }
   131         }
    55         int bias;
   132         int bias;
    57         char ch;
   134         char ch;
    58         if (c > 0x07FF) {
   135         if (c > 0x07FF) {
    59             ch = (char)(c>>>12);
   136             ch = (char)(c>>>12);
    60             write = 0xE0;
   137             write = 0xE0;
    61             if (ch > 0) {
   138             if (ch > 0) {
    62                 write |= (ch & 0x0F);
   139                 write |= ch & 0x0F;
    63             }
   140             }
    64             out.write(write);
   141             out.write(write);
    65             write = 0x80;
   142             write = 0x80;
    66             bias = 0x3F;
   143             bias = 0x3F;
    67         } else {
   144         } else {
    68             write = 0xC0;
   145             write = 0xC0;
    69             bias = 0x1F;
   146             bias = 0x1F;
    70         }
   147         }
    71         ch = (char)(c>>>6);
   148         ch = (char)(c>>>6);
    72         if (ch > 0) {
   149         if (ch > 0) {
    73             write |= (ch & bias);
   150             write |= ch & bias;
    74         }
   151         }
    75         out.write(write);
   152         out.write(write);
    76         out.write(0x80 | ((c) & 0x3F));
   153         out.write(0x80 | ((c) & 0x3F));
    77 
   154 
    78     }
   155     }
    79 
   156 
    80     static final void writeStringToUtf8(
   157     public static void writeStringToUtf8(
    81         final String str,
   158         final String str, final OutputStream out
    82         final OutputStream out
   159     ) throws IOException {
    83     ) throws IOException{
       
    84         final int length = str.length();
   160         final int length = str.length();
    85         int i = 0;
   161         int i = 0;
    86         char c;
   162         int c;
    87         while (i < length) {
   163         while (i < length) {
    88             c = str.charAt(i++);
   164             c = str.codePointAt(i);
       
   165             i += Character.charCount(c);
       
   166             if (!Character.isValidCodePoint(c) || c >= 0xD800 && c <= 0xDBFF || c >= 0xDC00 && c <= 0xDFFF) {
       
   167                 // valid code point: c >= 0x0000 && c <= 0x10FFFF
       
   168                 out.write(0x3f);
       
   169                 continue;
       
   170             }
       
   171             if (OLD_UTF8 && c >= Character.MIN_SUPPLEMENTARY_CODE_POINT) {
       
   172                 // version 2 or before output 2 question mark characters for 32 bit chars
       
   173                 out.write(0x3f);
       
   174                 out.write(0x3f);
       
   175                 continue;
       
   176             }
    89             if (c < 0x80)  {
   177             if (c < 0x80)  {
    90                 out.write(c);
   178                 out.write(c);
    91                 continue;
   179                 continue;
    92             }
   180             }
    93             if ((c >= 0xD800 && c <= 0xDBFF) || (c >= 0xDC00 && c <= 0xDFFF)) {
   181             byte extraByte = 0;
    94                 //No Surrogates in sun java
   182             if (c < 0x800) {
       
   183                 // 0x00000080 - 0x000007FF
       
   184                 // 110xxxxx 10xxxxxx
       
   185                 extraByte = 1;
       
   186             } else if (c < 0x10000) {
       
   187                 // 0x00000800 - 0x0000FFFF
       
   188                 // 1110xxxx 10xxxxxx 10xxxxxx
       
   189                 extraByte = 2;
       
   190             } else if (c < 0x200000) {
       
   191                 // 0x00010000 - 0x001FFFFF
       
   192                 // 11110xxx 10xxxxx 10xxxxxx 10xxxxxx
       
   193                 extraByte = 3;
       
   194             } else if (c < 0x4000000) {
       
   195                 // 0x00200000 - 0x03FFFFFF
       
   196                 // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
       
   197                 // already outside valid Character range, just for completeness
       
   198                 extraByte = 4;
       
   199             } else if (c <= 0x7FFFFFFF) {
       
   200                 // 0x04000000 - 0x7FFFFFFF
       
   201                 // 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
       
   202                 // already outside valid Character range, just for completeness
       
   203                 extraByte = 5;
       
   204             } else {
       
   205                 // 0x80000000 - 0xFFFFFFFF
       
   206                 // case not possible as java has no unsigned int
    95                 out.write(0x3f);
   207                 out.write(0x3f);
    96                 continue;
   208                 continue;
    97             }
   209             }
    98             char ch;
   210             byte write;
    99             int bias;
   211             int shift = 6 * extraByte;
   100             int write;
   212             write = (byte)((0xFE << (6 - extraByte)) | (c >>> shift));
   101             if (c > 0x07FF) {
   213             out.write(write);
   102                 ch = (char)(c>>>12);
   214             for (int j = extraByte - 1; j >= 0; j--) {
   103                 write = 0xE0;
   215                 shift -= 6;
   104                 if (ch > 0) {
   216                 write = (byte)(0x80 | ((c >>> shift) & 0x3F));
   105                     write |= (ch & 0x0F);
       
   106                 }
       
   107                 out.write(write);
   217                 out.write(write);
   108                 write = 0x80;
   218             }
   109                 bias = 0x3F;
   219 
   110             } else {
   220         }
   111                 write = 0xC0;
   221 
   112                 bias = 0x1F;
   222     }
   113             }
   223 
   114             ch = (char)(c>>>6);
   224     public static byte[] getStringInUtf8(final String str) {
   115             if (ch > 0) {
       
   116                 write |= (ch & bias);
       
   117             }
       
   118             out.write(write);
       
   119             out.write(0x80 | ((c) & 0x3F));
       
   120 
       
   121         }
       
   122 
       
   123     }
       
   124 
       
   125     public static final byte[] getStringInUtf8(final String str) {
       
   126         final int length = str.length();
   225         final int length = str.length();
   127         boolean expanded = false;
   226         boolean expanded = false;
   128         byte[] result = new byte[length];
   227         byte[] result = new byte[length];
   129         int i = 0;
   228         int i = 0;
   130         int out = 0;
   229         int out = 0;
   131         char c;
   230         int c;
   132         while (i < length) {
   231         while (i < length) {
   133             c = str.charAt(i++);
   232             c = str.codePointAt(i);
       
   233             i += Character.charCount(c);
       
   234             if (!Character.isValidCodePoint(c) || c >= 0xD800 && c <= 0xDBFF || c >= 0xDC00 && c <= 0xDFFF) {
       
   235                 // valid code point: c >= 0x0000 && c <= 0x10FFFF
       
   236                 result[out++] = (byte)0x3f;
       
   237                 continue;
       
   238             }
       
   239             if (OLD_UTF8 && c >= Character.MIN_SUPPLEMENTARY_CODE_POINT) {
       
   240                 // version 2 or before output 2 question mark characters for 32 bit chars
       
   241                 result[out++] = (byte)0x3f;
       
   242                 result[out++] = (byte)0x3f;
       
   243                 continue;
       
   244             }
   134             if (c < 0x80) {
   245             if (c < 0x80) {
   135                 result[out++] = (byte)c;
   246                 result[out++] = (byte)c;
   136                 continue;
   247                 continue;
   137             }
   248             }
   138             if ((c >= 0xD800 && c <= 0xDBFF) || (c >= 0xDC00 && c <= 0xDFFF)) {
       
   139                 //No Surrogates in sun java
       
   140                 result[out++] = 0x3f;
       
   141                 continue;
       
   142             }
       
   143             if (!expanded) {
   249             if (!expanded) {
   144                 byte newResult[] = new byte[3*length];
   250                 byte newResult[] = new byte[6*length];
   145                 System.arraycopy(result, 0, newResult, 0, out);
   251                 System.arraycopy(result, 0, newResult, 0, out);
   146                 result = newResult;
   252                 result = newResult;
   147                 expanded = true;
   253                 expanded = true;
   148             }
   254             }
   149             char ch;
   255             byte extraByte = 0;
   150             int bias;
   256             if (c < 0x800) {
       
   257                 // 0x00000080 - 0x000007FF
       
   258                 // 110xxxxx 10xxxxxx
       
   259                 extraByte = 1;
       
   260             } else if (c < 0x10000) {
       
   261                 // 0x00000800 - 0x0000FFFF
       
   262                 // 1110xxxx 10xxxxxx 10xxxxxx
       
   263                 extraByte = 2;
       
   264             } else if (c < 0x200000) {
       
   265                 // 0x00010000 - 0x001FFFFF
       
   266                 // 11110xxx 10xxxxx 10xxxxxx 10xxxxxx
       
   267                 extraByte = 3;
       
   268             } else if (c < 0x4000000) {
       
   269                 // 0x00200000 - 0x03FFFFFF
       
   270                 // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
       
   271                 // already outside valid Character range, just for completeness
       
   272                 extraByte = 4;
       
   273             } else if (c <= 0x7FFFFFFF) {
       
   274                 // 0x04000000 - 0x7FFFFFFF
       
   275                 // 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
       
   276                 // already outside valid Character range, just for completeness
       
   277                 extraByte = 5;
       
   278             } else {
       
   279                 // 0x80000000 - 0xFFFFFFFF
       
   280                 // case not possible as java has no unsigned int
       
   281                 result[out++] = 0x3f;
       
   282                 continue;
       
   283             }
   151             byte write;
   284             byte write;
   152             if (c > 0x07FF) {
   285             int shift = 6 * extraByte;
   153                 ch = (char)(c>>>12);
   286             write = (byte)((0xFE << (6 - extraByte)) | (c >>> shift));
   154                 write = (byte)0xE0;
   287             result[out++] = write;
   155                 if (ch > 0) {
   288             for (int j = extraByte - 1; j >= 0; j--) {
   156                     write |= (ch & 0x0F);
   289                 shift -= 6;
   157                 }
   290                 write = (byte)(0x80 | ((c >>> shift) & 0x3F));
   158                 result[out++] = write;
   291                 result[out++] = write;
   159                 write = (byte)0x80;
   292             }
   160                 bias = 0x3F;
       
   161             } else {
       
   162                 write = (byte)0xC0;
       
   163                 bias = 0x1F;
       
   164             }
       
   165             ch = (char)(c>>>6);
       
   166             if (ch > 0) {
       
   167                 write |= (ch & bias);
       
   168             }
       
   169             result[out++] = write;
       
   170             result[out++] = (byte)(0x80 | ((c) & 0x3F));
       
   171         }
   293         }
   172         if (expanded) {
   294         if (expanded) {
   173             byte newResult[] = new byte[out];
   295             byte newResult[] = new byte[out];
   174             System.arraycopy(result, 0, newResult, 0, out);
   296             System.arraycopy(result, 0, newResult, 0, out);
   175             result = newResult;
   297             result = newResult;
   176         }
   298         }
   177         return result;
   299         return result;
   178     }
   300     }
   179 
       
   180 }
   301 }