langtools/src/jdk.compiler/share/classes/com/sun/tools/javac/tree/DocTreeMaker.java
changeset 33424 e6bd5406e2cf
parent 33360 d8ef08003d35
child 34567 c74f68484156
--- a/langtools/src/jdk.compiler/share/classes/com/sun/tools/javac/tree/DocTreeMaker.java	Wed Jul 05 20:56:54 2017 +0200
+++ b/langtools/src/jdk.compiler/share/classes/com/sun/tools/javac/tree/DocTreeMaker.java	Wed Oct 28 10:41:30 2015 -0700
@@ -30,15 +30,15 @@
 import java.util.Collection;
 import java.util.EnumSet;
 import java.util.ListIterator;
-import java.util.Locale;
 
 import com.sun.source.doctree.AttributeTree.ValueKind;
 import com.sun.source.doctree.DocTree;
 import com.sun.source.doctree.DocTree.Kind;
 import com.sun.source.doctree.EndElementTree;
 import com.sun.source.doctree.StartElementTree;
+import com.sun.source.doctree.TextTree;
 import com.sun.tools.doclint.HtmlTag;
-
+import com.sun.tools.javac.api.JavacTrees;
 import com.sun.tools.javac.parser.Tokens.Comment;
 import com.sun.tools.javac.tree.DCTree.DCAttribute;
 import com.sun.tools.javac.tree.DCTree.DCAuthor;
@@ -75,7 +75,6 @@
 import com.sun.tools.javac.util.List;
 import com.sun.tools.javac.util.ListBuffer;
 import com.sun.tools.javac.util.Name;
-import com.sun.tools.javac.util.Options;
 import com.sun.tools.javac.util.Pair;
 import com.sun.tools.javac.util.Position;
 
@@ -97,8 +96,6 @@
     // anywhere but the zero'th position in the first sentence.
     final EnumSet<HtmlTag> sentenceBreakTags;
 
-    private final BreakIterator sentenceBreaker;
-
     /** Get the TreeMaker instance. */
     public static DocTreeMaker instance(Context context) {
         DocTreeMaker instance = context.get(treeMakerKey);
@@ -114,21 +111,16 @@
     /** Access to diag factory for ErroneousTrees. */
     private final JCDiagnostic.Factory diags;
 
+    private final JavacTrees trees;
+
     /** Create a tree maker with NOPOS as initial position.
      */
     protected DocTreeMaker(Context context) {
         context.put(treeMakerKey, this);
         diags = JCDiagnostic.Factory.instance(context);
         this.pos = Position.NOPOS;
+        trees = JavacTrees.instance(context);
         sentenceBreakTags = EnumSet.of(H1, H2, H3, H4, H5, H6, PRE, P);
-        Locale locale = (context.get(Locale.class) != null)
-                ? context.get(Locale.class)
-                : Locale.getDefault();
-        Options options = Options.instance(context);
-        boolean useBreakIterator = options.isSet("breakiterator");
-        sentenceBreaker = (useBreakIterator || !locale.getLanguage().equals(Locale.ENGLISH.getLanguage()))
-                ? BreakIterator.getSentenceInstance(locale)
-                : null;
     }
 
     /** Reassign current position.
@@ -176,10 +168,23 @@
     }
 
     public DCDocComment DocComment(Comment comment, List<DCTree> fullBody, List<DCTree> tags) {
-        final int savepos = pos;
         Pair<List<DCTree>, List<DCTree>> pair = splitBody(fullBody);
         DCDocComment tree = new DCDocComment(comment, fullBody, pair.fst, pair.snd, tags);
-        this.pos = tree.pos = savepos;
+        tree.pos = pos;
+        return tree;
+    }
+
+    /*
+     * Primarily to produce a DocCommenTree when given a
+     * first sentence and a body, this is useful, in cases
+     * where the trees are being synthesized by a tool.
+     */
+    public DCDocComment DocComment(List<DCTree> firstSentence, List<DCTree> body, List<DCTree> tags) {
+        ListBuffer<DCTree> lb = new ListBuffer<>();
+        lb.addAll(firstSentence);
+        lb.addAll(body);
+        List<DCTree> fullBody = lb.toList();
+        DCDocComment tree = new DCDocComment(null, fullBody, firstSentence, body, tags);
         return tree;
     }
 
@@ -341,74 +346,88 @@
 
     /*
      * Breaks up the body tags into the first sentence and its successors.
-     * The first sentence is determined with the presence of a period, block tag,
-     * or a sentence break, as returned by the BreakIterator. Trailing
-     * whitespaces are trimmed.
+     * The first sentence is determined with the presence of a period,
+     * block tag, or a sentence break, as returned by the BreakIterator.
+     * Trailing whitespaces are trimmed.
      */
-    Pair<List<DCTree>, List<DCTree>> splitBody(Collection<? extends DocTree> list) {
-        ListBuffer<DCTree> body = new ListBuffer<>();
-        // split body into first sentence and body
-        ListBuffer<DCTree> fs = new ListBuffer<>();
-        if (list.isEmpty()) {
-            return new Pair<>(fs.toList(), body.toList());
-        }
-        boolean foundFirstSentence = false;
-        ArrayList<DocTree> alist = new ArrayList<>(list);
-        ListIterator<DocTree> itr = alist.listIterator();
-        while (itr.hasNext()) {
-            boolean isFirst = itr.previousIndex() == -1;
-            DocTree dt = itr.next();
-            int spos = ((DCTree)dt).pos;
-            if (foundFirstSentence) {
-                body.add((DCTree) dt);
-                continue;
+    private Pair<List<DCTree>, List<DCTree>> splitBody(Collection<? extends DocTree> list) {
+        // pos is modified as we create trees, therefore
+        // we save the pos and restore it later.
+        final int savedpos = this.pos;
+        try {
+            ListBuffer<DCTree> body = new ListBuffer<>();
+            // split body into first sentence and body
+            ListBuffer<DCTree> fs = new ListBuffer<>();
+            if (list.isEmpty()) {
+                return new Pair<>(fs.toList(), body.toList());
             }
-            switch (dt.getKind()) {
-                case TEXT:
-                    DCText tt = (DCText)dt;
-                    String s = tt.getBody();
-                    int sbreak = getSentenceBreak(s);
-                    if (sbreak > 0) {
-                        s = removeTrailingWhitespace(s.substring(0, sbreak));
-                        DCText text = this.at(spos).Text(s);
-                        fs.add(text);
-                        foundFirstSentence = true;
-                        int nwPos = skipWhiteSpace(tt.getBody(), sbreak);
-                        if (nwPos > 0) {
-                            DCText text2 = this.at(spos + nwPos).Text(tt.getBody().substring(nwPos));
-                            body.add(text2);
-                        }
-                        continue;
-                    } else if (itr.hasNext()) {
-                        // if the next doctree is a break, remove trailing spaces
-                        DocTree next = itr.next();
-                        boolean sbrk = isSentenceBreak(next, false);
-                        if (sbrk) {
-                            s = removeTrailingWhitespace(s);
+            boolean foundFirstSentence = false;
+            ArrayList<DocTree> alist = new ArrayList<>(list);
+            ListIterator<DocTree> itr = alist.listIterator();
+            while (itr.hasNext()) {
+                boolean isFirst = !itr.hasPrevious();
+                DocTree dt = itr.next();
+                int spos = ((DCTree) dt).pos;
+                if (foundFirstSentence) {
+                    body.add((DCTree) dt);
+                    continue;
+                }
+                switch (dt.getKind()) {
+                    case TEXT:
+                        DCText tt = (DCText) dt;
+                        String s = tt.getBody();
+                        DocTree peekedNext = itr.hasNext()
+                                ? alist.get(itr.nextIndex())
+                                : null;
+                        int sbreak = getSentenceBreak(s, peekedNext);
+                        if (sbreak > 0) {
+                            s = removeTrailingWhitespace(s.substring(0, sbreak));
                             DCText text = this.at(spos).Text(s);
                             fs.add(text);
-                            body.add((DCTree)next);
+                            foundFirstSentence = true;
+                            int nwPos = skipWhiteSpace(tt.getBody(), sbreak);
+                            if (nwPos > 0) {
+                                DCText text2 = this.at(spos + nwPos).Text(tt.getBody().substring(nwPos));
+                                body.add(text2);
+                            }
+                            continue;
+                        } else if (itr.hasNext()) {
+                            // if the next doctree is a break, remove trailing spaces
+                            peekedNext = alist.get(itr.nextIndex());
+                            boolean sbrk = isSentenceBreak(peekedNext, false);
+                            if (sbrk) {
+                                DocTree next = itr.next();
+                                s = removeTrailingWhitespace(s);
+                                DCText text = this.at(spos).Text(s);
+                                fs.add(text);
+                                body.add((DCTree) next);
+                                foundFirstSentence = true;
+                                continue;
+                            }
+                        }
+                        break;
+                    default:
+                        if (isSentenceBreak(dt, isFirst)) {
+                            body.add((DCTree) dt);
                             foundFirstSentence = true;
                             continue;
                         }
-                        // reset to previous for further processing
-                        itr.previous();
-                    }
-                    break;
-                default:
-                    if (isSentenceBreak(dt, isFirst)) {
-                        body.add((DCTree)dt);
-                        foundFirstSentence = true;
-                        continue;
-                    }
+                        break;
+                }
+                fs.add((DCTree) dt);
             }
-            fs.add((DCTree)dt);
+            return new Pair<>(fs.toList(), body.toList());
+        } finally {
+            this.pos = savedpos;
         }
-        return new Pair<>(fs.toList(), body.toList());
+    }
+
+    private boolean isTextTree(DocTree tree) {
+        return tree.getKind() == Kind.TEXT;
     }
 
     /*
-     * Computes the first sentence break.
+     * Computes the first sentence break, a simple dot-space algorithm.
      */
     int defaultSentenceBreak(String s) {
         // scan for period followed by whitespace
@@ -437,12 +456,74 @@
         return -1;
     }
 
-    int getSentenceBreak(String s) {
-        if (sentenceBreaker == null) {
+    /*
+     * Computes the first sentence, if using a default breaker,
+     * the break is returned, if not then a -1, indicating that
+     * more doctree elements are required to be examined.
+     *
+     * BreakIterator.next points to the the start of the following sentence,
+     * and does not provide an easy way to disambiguate between "sentence break",
+     * "possible sentence break" and "not a sentence break" at the end of the input.
+     * For example, BreakIterator.next returns the index for the end
+     * of the string for all of these examples,
+     * using vertical bars to delimit the bounds of the example text
+     * |Abc|        (not a valid end of sentence break, if followed by more text)
+     * |Abc.|       (maybe a valid end of sentence break, depending on the following text)
+     * |Abc. |      (maybe a valid end of sentence break, depending on the following text)
+     * |"Abc." |    (maybe a valid end of sentence break, depending on the following text)
+     * |Abc.  |     (definitely a valid end of sentence break)
+     * |"Abc."  |   (definitely a valid end of sentence break)
+     * Therefore, we have to probe further to determine whether
+     * there really is a sentence break or not at the end of this run of text.
+     */
+    int getSentenceBreak(String s, DocTree dt) {
+        BreakIterator breakIterator = trees.getBreakIterator();
+        if (breakIterator == null) {
             return defaultSentenceBreak(s);
         }
-        sentenceBreaker.setText(s);
-        return sentenceBreaker.first();
+        breakIterator.setText(s);
+        final int sbrk = breakIterator.next();
+        // This is the last doctree, found the droid we are looking for
+        if (dt == null) {
+            return sbrk;
+        }
+
+        // If the break is well within the span of the string ie. not
+        // at EOL, then we have a clear break.
+        if (sbrk < s.length() - 1) {
+            return sbrk;
+        }
+
+        if (isTextTree(dt)) {
+            // Two adjacent text trees, a corner case, perhaps
+            // produced by a tool synthesizing a doctree. In
+            // this case, does the break lie within the first span,
+            // then we have the droid, otherwise allow the callers
+            // logic to handle the break in the adjacent doctree.
+            TextTree ttnext = (TextTree) dt;
+            String combined = s + ttnext.getBody();
+            breakIterator.setText(combined);
+            int sbrk2 = breakIterator.next();
+            if (sbrk < sbrk2) {
+                return sbrk;
+            }
+        }
+
+        // Is the adjacent tree a sentence breaker ?
+        if (isSentenceBreak(dt, false)) {
+            return sbrk;
+        }
+
+        // At this point the adjacent tree is either a javadoc tag ({@..),
+        // html tag (<..) or an entity (&..). Perform a litmus test, by
+        // concatenating a sentence, to validate the break earlier identified.
+        String combined = s + "Dummy Sentence.";
+        breakIterator.setText(combined);
+        int sbrk2 = breakIterator.next();
+        if (sbrk2 <= sbrk) {
+            return sbrk2;
+        }
+        return -1; // indeterminate at this time
     }
 
     boolean isSentenceBreak(javax.lang.model.element.Name tagName) {
@@ -476,7 +557,7 @@
     }
 
     String removeTrailingWhitespace(String s) {
-        for (int i = s.length() - 1 ; i > 0 ; i--) {
+        for (int i = s.length() - 1 ; i >= 0 ; i--) {
             char ch = s.charAt(i);
             if (!Character.isWhitespace(ch)) {
                 return s.substring(0, i + 1);