# HG changeset patch # User lana # Date 1449768289 28800 # Node ID 65254ce59909cc81bf1e611bf87c43532b25cdce # Parent a8eb69059254efeb380880240e7968b2fb75ad9c# Parent 20d95817d066930d4a3449508c1701ed92739f19 Merge diff -r a8eb69059254 -r 65254ce59909 jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLDocumentFragmentScannerImpl.java --- a/jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLDocumentFragmentScannerImpl.java Thu Dec 10 08:17:06 2015 -0800 +++ b/jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLDocumentFragmentScannerImpl.java Thu Dec 10 09:24:49 2015 -0800 @@ -1394,7 +1394,12 @@ fEmptyElement = true; return true; } else if (!isValidNameStartChar(c) || !sawSpace) { - reportFatalError("ElementUnterminated", new Object[]{fElementQName.rawname}); + // Second chance. Check if this character is a high + // surrogate of a valid name start character. + if (!isValidNameStartHighSurrogate(c) || !sawSpace) { + reportFatalError("ElementUnterminated", + new Object[]{fElementQName.rawname}); + } } return false; @@ -2606,40 +2611,38 @@ private void startOfMarkup() throws IOException { fMarkupDepth++; final int ch = fEntityScanner.peekChar(); - - switch(ch){ - case '?' :{ - setScannerState(SCANNER_STATE_PI); - fEntityScanner.skipChar(ch); - break; - } - case '!' :{ - fEntityScanner.skipChar(ch); - if (fEntityScanner.skipChar('-')) { - if (!fEntityScanner.skipChar('-')) { - reportFatalError("InvalidCommentStart", + if (isValidNameStartChar(ch) || isValidNameStartHighSurrogate(ch)) { + setScannerState(SCANNER_STATE_START_ELEMENT_TAG); + } else { + switch(ch){ + case '?' :{ + setScannerState(SCANNER_STATE_PI); + fEntityScanner.skipChar(ch); + break; + } + case '!' :{ + fEntityScanner.skipChar(ch); + if (fEntityScanner.skipChar('-')) { + if (!fEntityScanner.skipChar('-')) { + reportFatalError("InvalidCommentStart", + null); + } + setScannerState(SCANNER_STATE_COMMENT); + } else if (fEntityScanner.skipString(cdata)) { + setScannerState(SCANNER_STATE_CDATA ); + } else if (!scanForDoctypeHook()) { + reportFatalError("MarkupNotRecognizedInContent", null); } - setScannerState(SCANNER_STATE_COMMENT); - } else if (fEntityScanner.skipString(cdata)) { - setScannerState(SCANNER_STATE_CDATA ); - } else if (!scanForDoctypeHook()) { - reportFatalError("MarkupNotRecognizedInContent", - null); + break; } - break; - } - case '/' :{ - setScannerState(SCANNER_STATE_END_ELEMENT_TAG); - fEntityScanner.skipChar(ch); - break; - } - default :{ - if (isValidNameStartChar(ch)) { - setScannerState(SCANNER_STATE_START_ELEMENT_TAG); - } else { - reportFatalError("MarkupNotRecognizedInContent", - null); + case '/' :{ + setScannerState(SCANNER_STATE_END_ELEMENT_TAG); + fEntityScanner.skipChar(ch); + break; + } + default :{ + reportFatalError("MarkupNotRecognizedInContent", null); } } } diff -r a8eb69059254 -r 65254ce59909 jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLDocumentScannerImpl.java --- a/jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLDocumentScannerImpl.java Thu Dec 10 08:17:06 2015 -0800 +++ b/jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLDocumentScannerImpl.java Thu Dec 10 09:24:49 2015 -0800 @@ -847,9 +847,12 @@ case SCANNER_STATE_START_OF_MARKUP: { fMarkupDepth++; - - if (fEntityScanner.skipChar('?')) { - setScannerState(SCANNER_STATE_PI); + if (isValidNameStartChar(fEntityScanner.peekChar()) || + isValidNameStartHighSurrogate(fEntityScanner.peekChar())) { + setScannerState(SCANNER_STATE_ROOT_ELEMENT); + setDriver(fContentDriver); + //from now onwards this would be handled by fContentDriver,in the same next() call + return fContentDriver.next(); } else if (fEntityScanner.skipChar('!')) { if (fEntityScanner.skipChar('-')) { if (!fEntityScanner.skipChar('-')) { @@ -872,12 +875,8 @@ reportFatalError("MarkupNotRecognizedInProlog", null); } - } else if (XMLChar.isNameStart(fEntityScanner.peekChar())) { - setScannerState(SCANNER_STATE_ROOT_ELEMENT); - setDriver(fContentDriver); - //from now onwards this would be handled by fContentDriver,in the same next() call - return fContentDriver.next(); - + } else if (fEntityScanner.skipChar('?')) { + setScannerState(SCANNER_STATE_PI); } else { reportFatalError("MarkupNotRecognizedInProlog", null); @@ -1395,7 +1394,8 @@ } else if (fEntityScanner.skipChar('/')) { reportFatalError("MarkupNotRecognizedInMisc", null); - } else if (XMLChar.isNameStart(fEntityScanner.peekChar())) { + } else if (isValidNameStartChar(fEntityScanner.peekChar()) || + isValidNameStartHighSurrogate(fEntityScanner.peekChar())) { reportFatalError("MarkupNotRecognizedInMisc", null); scanStartElement(); diff -r a8eb69059254 -r 65254ce59909 jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLScanner.java --- a/jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLScanner.java Thu Dec 10 08:17:06 2015 -0800 +++ b/jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLScanner.java Thu Dec 10 09:24:49 2015 -0800 @@ -784,7 +784,7 @@ if (XMLChar.isHighSurrogate(c)) { scanSurrogates(text); } - if (isInvalidLiteral(c)) { + else if (isInvalidLiteral(c)) { reportFatalError("InvalidCharInComment", new Object[] { Integer.toHexString(c) }); fEntityScanner.scanChar(); @@ -1385,6 +1385,14 @@ return (XMLChar.isNameStart(value)); } // isValidNameStartChar(int): boolean + // returns true if the given character is + // a valid high surrogate for a nameStartChar + // with respect to the version of XML understood + // by this scanner. + protected boolean isValidNameStartHighSurrogate(int value) { + return false; + } // isValidNameStartHighSurrogate(int): boolean + protected boolean versionSupported(String version ) { return version.equals("1.0") || version.equals("1.1"); } // version Supported diff -r a8eb69059254 -r 65254ce59909 jaxp/test/javax/xml/jaxp/unittest/parsers/SupplementaryChars.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/jaxp/test/javax/xml/jaxp/unittest/parsers/SupplementaryChars.java Thu Dec 10 09:24:49 2015 -0800 @@ -0,0 +1,67 @@ +package parsers; + +import java.io.ByteArrayInputStream; +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; + +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; +import org.xml.sax.SAXParseException; +import org.xml.sax.helpers.DefaultHandler; + +/** + * @bug 8072081 + * @summary verifies that supplementary characters are supported as character + * data in xml 1.0, and also names in xml 1.1. + * + * Joe Wang (huizhe.wang@oracle.com) + */ + +public class SupplementaryChars { + + @Test(dataProvider = "supported") + public void test(String xml) throws Exception { + ByteArrayInputStream stream = new ByteArrayInputStream(xml.getBytes("UTF-8")); + getParser().parse(stream, new DefaultHandler()); + stream.close(); + } + + @Test(dataProvider = "unsupported", expectedExceptions = SAXParseException.class) + public void testInvalid(String xml) throws Exception { + ByteArrayInputStream stream = new ByteArrayInputStream(xml.getBytes("UTF-8")); + getParser().parse(stream, new DefaultHandler()); + stream.close(); + } + + @DataProvider(name = "supported") + private Object[][] supported() { + + return new Object[][] { + {"\uD840\uDC0B"}, + {""}, + {"in tag name"}, + {"in attribute name"}, + {"\uD840\uDC0B"}, + {""} + }; + } + + @DataProvider(name = "unsupported") + private Object[][] unsupported() { + return new Object[][] { + {"in tag name"}, + {"in attribute name"} + }; + } + + private SAXParser getParser() { + SAXParser parser = null; + try { + SAXParserFactory factory = SAXParserFactory.newInstance(); + parser = factory.newSAXParser(); + } catch (Exception e) { + throw new RuntimeException(e.getMessage()); + } + return parser; + } +}