test/jdk/java/util/jar/Manifest/ValueUtf8Coding.java
changeset 53095 33a51275fee0
equal deleted inserted replaced
53094:9e590041fcd4 53095:33a51275fee0
       
     1 /*
       
     2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.
       
     8  *
       
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    12  * version 2 for more details (a copy is included in the LICENSE file that
       
    13  * accompanied this code).
       
    14  *
       
    15  * You should have received a copy of the GNU General Public License version
       
    16  * 2 along with this work; if not, write to the Free Software Foundation,
       
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    18  *
       
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    20  * or visit www.oracle.com if you need additional information or have any
       
    21  * questions.
       
    22  */
       
    23 
       
    24 import static java.nio.charset.StandardCharsets.UTF_8;
       
    25 
       
    26 import java.io.ByteArrayInputStream;
       
    27 import java.io.ByteArrayOutputStream;
       
    28 import java.io.IOException;
       
    29 import java.util.jar.Attributes;
       
    30 import java.util.jar.Attributes.Name;
       
    31 import java.util.jar.Manifest;
       
    32 import java.util.List;
       
    33 import java.util.ArrayList;
       
    34 
       
    35 import org.testng.annotations.Test;
       
    36 import static org.testng.Assert.*;
       
    37 
       
    38 /**
       
    39  * @test
       
    40  * @bug 8066619
       
    41  * @run testng ValueUtf8Coding
       
    42  * @summary Tests encoding and decoding manifest header values to and from
       
    43  * UTF-8 with the complete Unicode character set.
       
    44  */ /*
       
    45  * see also "../tools/launcher/UnicodeTest.java" for manifest attributes
       
    46  * parsed during launch
       
    47  */
       
    48 public class ValueUtf8Coding {
       
    49 
       
    50     /**
       
    51      * Maximum number of bytes of UTF-8 encoded characters in one header value.
       
    52      * <p>
       
    53      * There are too many different Unicode code points (more than one million)
       
    54      * to fit all into one manifest value. The specifications state:
       
    55      * <q>Implementations should support 65535-byte (not character) header
       
    56      * values, and 65535 headers per file. They might run out of memory,
       
    57      * but there should not be hard-coded limits below these values.</q>
       
    58      *
       
    59      * @see <a
       
    60      * href="{@docRoot}/../specs/jar/jar.html#Notes_on_Manifest_and_Signature_Files">
       
    61      * Notes on Manifest and Signature Files</a>
       
    62      */
       
    63     static final int SUPPORTED_VALUE_LENGTH = 65535;
       
    64 
       
    65     /**
       
    66      * Returns {@code true} if {@code codePoint} is known not to be a supported
       
    67      * character in manifest header values. Explicitly forbidden in manifest
       
    68      * header values are according to a statement from the specifications:
       
    69      * <q>otherchar: any UTF-8 character except NUL, CR and LF</q>.
       
    70      * {@code NUL} ({@code 0x0}), however, works just fine and might have been
       
    71      * used and might still be.
       
    72      *
       
    73      * @see <a href="{@docRoot}/../specs/jar/jar.html#Section-Specification">
       
    74      * Jar File Specification</a>
       
    75      */
       
    76     static boolean isUnsupportedManifestValueCharacter(int codePoint) {
       
    77         return codePoint == '\r' /* CR */ || codePoint == '\n' /* LF */;
       
    78     };
       
    79 
       
    80     /**
       
    81      * Produces a list of strings with all Unicode characters except those
       
    82      * explicitly invalid in manifest header values.
       
    83      * Each string is filled with as many characters as fit into
       
    84      * {@link #SUPPORTED_VALUE_LENGTH} bytes with UTF-8 encoding except the
       
    85      * last string which contains the remaining characters. Each of those
       
    86      * strings becomes a header value the number of which 65535 should be
       
    87      * supported per file.
       
    88      *
       
    89      * @see <a
       
    90      * href="{@docRoot}/../specs/jar/jar.html#Notes_on_Manifest_and_Signature_Files">
       
    91      * Notes on Manifest and Signature Files</a>
       
    92      */
       
    93     static List<String> produceValuesWithAllUnicodeCharacters() {
       
    94         ArrayList<String> values = new ArrayList<>();
       
    95         byte[] valueBuf = new byte[SUPPORTED_VALUE_LENGTH];
       
    96         int pos = 0;
       
    97         for (int codePoint = Character.MIN_CODE_POINT;
       
    98                 codePoint <= Character.MAX_CODE_POINT; codePoint++) {
       
    99             if (isUnsupportedManifestValueCharacter(codePoint)) {
       
   100                 continue;
       
   101             }
       
   102 
       
   103             byte[] charBuf = Character.toString(codePoint).getBytes(UTF_8);
       
   104             if (pos + charBuf.length > valueBuf.length) {
       
   105                 values.add(new String(valueBuf, 0, pos, UTF_8));
       
   106                 pos = 0;
       
   107             }
       
   108             System.arraycopy(charBuf, 0, valueBuf, pos, charBuf.length);
       
   109             pos += charBuf.length;
       
   110         }
       
   111         if (pos > 0) {
       
   112             values.add(new String(valueBuf, 0, pos, UTF_8));
       
   113         }
       
   114         // minimum number of headers supported is the same as the minimum size
       
   115         // of each header value in bytes
       
   116         assertTrue(values.size() <= SUPPORTED_VALUE_LENGTH);
       
   117         return values;
       
   118     }
       
   119 
       
   120     /**
       
   121      * Returns simple, valid, short, and distinct manifest header names.
       
   122      * The returned name cannot collide with "{@code Manifest-Version}" because
       
   123      * the returned string does not contain "{@code -}".
       
   124      */
       
   125     static Name azName(int seed) {
       
   126         StringBuffer name = new StringBuffer();
       
   127         do {
       
   128             name.insert(0, (char) (seed % 26 + (seed < 26 ? 'A' : 'a')));
       
   129             seed = seed / 26 - 1;
       
   130         } while (seed >= 0);
       
   131         return new Name(name.toString());
       
   132     }
       
   133 
       
   134     /**
       
   135      * Writes and reads a manifest with the complete Unicode character set.
       
   136      * The characters are grouped into manifest header values with about as
       
   137      * many bytes as allowed each, utilizing a single big manifest.
       
   138      * <p>
       
   139      * This test assumes that a manifest is encoded and decoded correctly if
       
   140      * writing and then reading it again results in a manifest with identical
       
   141      * values as the original. The test is not about other aspects of writing
       
   142      * and reading manifests than only that, given the fact and the way it
       
   143      * works for some characters such as the most widely and often used ones,
       
   144      * it also works for the complete Unicode character set just the same.
       
   145      * <p>
       
   146      * Only header values are tested. The set of allowed characters for header
       
   147      * names are much more limited and are a different topic entirely and most
       
   148      * simple ones are used here as necessary just to get valid and different
       
   149      * ones (see {@link #azName}).
       
   150      * <p>
       
   151      * Because the current implementation under test uses different portions
       
   152      * of code depending on where the value occurs to read or write, each
       
   153      * character is tested in each of the three positions:<ul>
       
   154      * <li>main attribute header,</li>
       
   155      * <li>named section name, and</li>
       
   156      * <li>named sections header values</li>
       
   157      * </ul>
       
   158      * Implementation of writing the main section headers in
       
   159      * {@link Attributes#writeMain(java.io.DataOutputStream)} differs from the
       
   160      * one writing named section headers in
       
   161      * {@link Attributes#write(java.io.DataOutputStream)} regarding the special
       
   162      * order of {@link Name#MANIFEST_VERSION} and
       
   163      * {@link Name#SIGNATURE_VERSION} and also
       
   164      * {@link Manifest#read(java.io.InputStream)} at least potentially reads
       
   165      * main sections differently than reading named sections names headers in
       
   166      * {@link Attributes#read(Manifest.FastInputStream, byte[])}.
       
   167      */
       
   168     @Test
       
   169     public void testCompleteUnicodeCharacterSet() throws IOException {
       
   170         Manifest mf = new Manifest();
       
   171         mf.getMainAttributes().put(Name.MANIFEST_VERSION, "1.0");
       
   172 
       
   173         List<String> values = produceValuesWithAllUnicodeCharacters();
       
   174         for (int i = 0; i < values.size(); i++) {
       
   175             Name name = azName(i);
       
   176             String value = values.get(i);
       
   177 
       
   178             mf.getMainAttributes().put(name, value);
       
   179             Attributes attributes = new Attributes();
       
   180             mf.getEntries().put(value, attributes);
       
   181             attributes.put(name, value);
       
   182         }
       
   183 
       
   184         mf = writeAndRead(mf);
       
   185 
       
   186         for (int i = 0; i < values.size(); i++) {
       
   187             String value = values.get(i);
       
   188             Name name = azName(i);
       
   189 
       
   190             assertEquals(mf.getMainAttributes().getValue(name), value,
       
   191                     "main attributes header value");
       
   192             Attributes attributes = mf.getAttributes(value);
       
   193             assertNotNull(attributes, "named section");
       
   194             assertEquals(attributes.getValue(name), value,
       
   195                     "named section attributes value");
       
   196         }
       
   197     }
       
   198 
       
   199     static Manifest writeAndRead(Manifest mf) throws IOException {
       
   200         ByteArrayOutputStream out = new ByteArrayOutputStream();
       
   201         mf.write(out);
       
   202         byte[] mfBytes = out.toByteArray();
       
   203 
       
   204         System.out.println("-".repeat(72));
       
   205         System.out.print(new String(mfBytes, UTF_8));
       
   206         System.out.println("-".repeat(72));
       
   207 
       
   208         ByteArrayInputStream in = new ByteArrayInputStream(mfBytes);
       
   209         return new Manifest(in);
       
   210     }
       
   211 
       
   212 }