8012180: Speed up removeNonInlineHtmlTags
authorjjg
Tue, 14 May 2013 10:14:56 -0700
changeset 17571 50895ba58e0d
parent 17570 78512b2899db
child 17572 1081a023532f
8012180: Speed up removeNonInlineHtmlTags Reviewed-by: darcy
langtools/src/share/classes/com/sun/tools/doclets/formats/html/HtmlDocletWriter.java
langtools/src/share/classes/com/sun/tools/doclets/formats/html/SourceToHTMLConverter.java
langtools/src/share/classes/com/sun/tools/doclets/formats/html/markup/HtmlTag.java
langtools/src/share/classes/com/sun/tools/doclets/internal/toolkit/util/Util.java
--- a/langtools/src/share/classes/com/sun/tools/doclets/formats/html/HtmlDocletWriter.java	Tue May 14 10:14:55 2013 -0700
+++ b/langtools/src/share/classes/com/sun/tools/doclets/formats/html/HtmlDocletWriter.java	Tue May 14 10:14:56 2013 -0700
@@ -1620,14 +1620,8 @@
                 if (isFirstSentence) {
                     text = removeNonInlineHtmlTags(text);
                 }
-                StringTokenizer lines = new StringTokenizer(text, "\r\n", true);
-                StringBuilder textBuff = new StringBuilder();
-                while (lines.hasMoreTokens()) {
-                    StringBuilder line = new StringBuilder(lines.nextToken());
-                    Util.replaceTabs(configuration, line);
-                    textBuff.append(line.toString());
-                }
-                result.addContent(new RawHtml(textBuff.toString()));
+                text = Util.replaceTabs(configuration, text);
+                result.addContent(new RawHtml(text));
             }
         }
         return result;
@@ -1732,60 +1726,55 @@
         return text;
     }
 
-    public String removeNonInlineHtmlTags(String text) {
-        if (text.indexOf('<') < 0) {
+    static Set<String> blockTags = new HashSet<>();
+    static {
+        for (HtmlTag t: HtmlTag.values()) {
+            if (t.blockType == HtmlTag.BlockType.BLOCK)
+                blockTags.add(t.value);
+        }
+    }
+
+    public static String removeNonInlineHtmlTags(String text) {
+        final int len = text.length();
+
+        int startPos = 0;                     // start of text to copy
+        int lessThanPos = text.indexOf('<');  // position of latest '<'
+        if (lessThanPos < 0) {
             return text;
         }
-        String noninlinetags[] = { "<ul>", "</ul>", "<ol>", "</ol>",
-                "<dl>", "</dl>", "<table>", "</table>",
-                "<tr>", "</tr>", "<td>", "</td>",
-                "<th>", "</th>", "<p>", "</p>",
-                "<li>", "</li>", "<dd>", "</dd>",
-                "<dir>", "</dir>", "<dt>", "</dt>",
-                "<h1>", "</h1>", "<h2>", "</h2>",
-                "<h3>", "</h3>", "<h4>", "</h4>",
-                "<h5>", "</h5>", "<h6>", "</h6>",
-                "<pre>", "</pre>", "<menu>", "</menu>",
-                "<listing>", "</listing>", "<hr>",
-                "<blockquote>", "</blockquote>",
-                "<center>", "</center>",
-                "<UL>", "</UL>", "<OL>", "</OL>",
-                "<DL>", "</DL>", "<TABLE>", "</TABLE>",
-                "<TR>", "</TR>", "<TD>", "</TD>",
-                "<TH>", "</TH>", "<P>", "</P>",
-                "<LI>", "</LI>", "<DD>", "</DD>",
-                "<DIR>", "</DIR>", "<DT>", "</DT>",
-                "<H1>", "</H1>", "<H2>", "</H2>",
-                "<H3>", "</H3>", "<H4>", "</H4>",
-                "<H5>", "</H5>", "<H6>", "</H6>",
-                "<PRE>", "</PRE>", "<MENU>", "</MENU>",
-                "<LISTING>", "</LISTING>", "<HR>",
-                "<BLOCKQUOTE>", "</BLOCKQUOTE>",
-                "<CENTER>", "</CENTER>"
-        };
-        for (int i = 0; i < noninlinetags.length; i++) {
-            text = replace(text, noninlinetags[i], "");
+
+        StringBuilder result = new StringBuilder();
+    main: while (lessThanPos != -1) {
+            int currPos = lessThanPos + 1;
+            if (currPos == len)
+                break;
+            char ch = text.charAt(currPos);
+            if (ch == '/') {
+                if (++currPos == len)
+                    break;
+                ch = text.charAt(currPos);
+            }
+            int tagPos = currPos;
+            while (isHtmlTagLetterOrDigit(ch)) {
+                if (++currPos == len)
+                    break main;
+                ch = text.charAt(currPos);
+            }
+            if (ch == '>' && blockTags.contains(text.substring(tagPos, currPos).toLowerCase())) {
+                result.append(text, startPos, lessThanPos);
+                startPos = currPos + 1;
+            }
+            lessThanPos = text.indexOf('<', currPos);
         }
-        return text;
+        result.append(text.substring(startPos));
+
+        return result.toString();
     }
 
-    public String replace(String text, String tobe, String by) {
-        while (true) {
-            int startindex = text.indexOf(tobe);
-            if (startindex < 0) {
-                return text;
-            }
-            int endindex = startindex + tobe.length();
-            StringBuilder replaced = new StringBuilder();
-            if (startindex > 0) {
-                replaced.append(text.substring(0, startindex));
-            }
-            replaced.append(by);
-            if (text.length() > endindex) {
-                replaced.append(text.substring(endindex));
-            }
-            text = replaced.toString();
-        }
+    private static boolean isHtmlTagLetterOrDigit(char ch) {
+        return ('a' <= ch && ch <= 'z') ||
+                ('A' <= ch && ch <= 'Z') ||
+                ('1' <= ch && ch <= '6');
     }
 
     /**
--- a/langtools/src/share/classes/com/sun/tools/doclets/formats/html/SourceToHTMLConverter.java	Tue May 14 10:14:55 2013 -0700
+++ b/langtools/src/share/classes/com/sun/tools/doclets/formats/html/SourceToHTMLConverter.java	Tue May 14 10:14:56 2013 -0700
@@ -269,9 +269,7 @@
      */
     private void addLine(Content pre, String line, int currentLineNo) {
         if (line != null) {
-            StringBuilder lineBuffer = new StringBuilder(line);
-            Util.replaceTabs(configuration, lineBuffer);
-            pre.addContent(lineBuffer.toString());
+            pre.addContent(Util.replaceTabs(configuration, line));
             Content anchor = HtmlTree.A_NAME("line." + Integer.toString(currentLineNo));
             pre.addContent(anchor);
             pre.addContent(NEW_LINE);
--- a/langtools/src/share/classes/com/sun/tools/doclets/formats/html/markup/HtmlTag.java	Tue May 14 10:14:55 2013 -0700
+++ b/langtools/src/share/classes/com/sun/tools/doclets/formats/html/markup/HtmlTag.java	Tue May 14 10:14:56 2013 -0700
@@ -44,6 +44,7 @@
     CENTER,
     CODE(BlockType.INLINE, EndTag.END),
     DD,
+    DIR,
     DIV,
     DL,
     DT,
@@ -63,6 +64,7 @@
     I(BlockType.INLINE, EndTag.END),
     IMG(BlockType.INLINE, EndTag.NOEND),
     LI,
+    LISTING,
     LINK(BlockType.OTHER, EndTag.NOEND),
     MENU,
     META(BlockType.OTHER, EndTag.NOEND),
@@ -85,14 +87,14 @@
     TT(BlockType.INLINE, EndTag.END),
     UL;
 
-    protected final BlockType blockType;
-    protected final EndTag endTag;
-    private final String value;
+    public final BlockType blockType;
+    public final EndTag endTag;
+    public final String value;
 
     /**
      * Enum representing the type of HTML element.
      */
-    protected static enum BlockType {
+    public static enum BlockType {
         BLOCK,
         INLINE,
         OTHER;
@@ -101,7 +103,7 @@
     /**
      * Enum representing HTML end tag requirement.
      */
-    protected static enum EndTag {
+    public static enum EndTag {
         END,
         NOEND;
     }
--- a/langtools/src/share/classes/com/sun/tools/doclets/internal/toolkit/util/Util.java	Tue May 14 10:14:55 2013 -0700
+++ b/langtools/src/share/classes/com/sun/tools/doclets/internal/toolkit/util/Util.java	Tue May 14 10:14:56 2013 -0700
@@ -593,20 +593,42 @@
     }
 
     /**
-     * Replace all tabs with the appropriate number of spaces.
+     * Replace all tabs in a string with the appropriate number of spaces.
+     * The string may be a multi-line string.
      * @param configuration the doclet configuration defining the setting for the
      *                      tab length.
-     * @param sb the StringBuilder in which to replace the tabs
+     * @param text the text for which the tabs should be expanded
+     * @return the text with all tabs expanded
      */
-    public static void replaceTabs(Configuration configuration, StringBuilder sb) {
-        int tabLength = configuration.sourcetab;
-        String whitespace = configuration.tabSpaces;
-        int index = 0;
-        while ((index = sb.indexOf("\t", index)) != -1) {
-            int spaceCount = tabLength - index % tabLength;
-            sb.replace(index, index+1, whitespace.substring(0, spaceCount));
-            index += spaceCount;
+    public static String replaceTabs(Configuration configuration, String text) {
+        if (text.indexOf("\t") == -1)
+            return text;
+
+        final int tabLength = configuration.sourcetab;
+        final String whitespace = configuration.tabSpaces;
+        final int textLength = text.length();
+        StringBuilder result = new StringBuilder(textLength);
+        int pos = 0;
+        int lineLength = 0;
+        for (int i = 0; i < textLength; i++) {
+            char ch = text.charAt(i);
+            switch (ch) {
+                case '\n': case '\r':
+                    lineLength = 0;
+                    break;
+                case '\t':
+                    result.append(text, pos, i);
+                    int spaceCount = tabLength - lineLength % tabLength;
+                    result.append(whitespace, 0, spaceCount);
+                    lineLength += spaceCount;
+                    pos = i + 1;
+                    break;
+                default:
+                    lineLength++;
+            }
         }
+        result.append(text, pos, textLength);
+        return result.toString();
     }
 
     /**