src/java.base/share/classes/java/util/regex/Matcher.java
changeset 50340 d52bba1f19aa
parent 47216 71c04702a3d5
child 58242 94bb65cb37d3
equal deleted inserted replaced
50339:ede65c4fb6da 50340:d52bba1f19aa
     1 /*
     1 /*
     2  * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
     2  * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     7  * published by the Free Software Foundation.  Oracle designates this
    35 import java.util.function.Function;
    35 import java.util.function.Function;
    36 import java.util.stream.Stream;
    36 import java.util.stream.Stream;
    37 import java.util.stream.StreamSupport;
    37 import java.util.stream.StreamSupport;
    38 
    38 
    39 /**
    39 /**
    40  * An engine that performs match operations on a {@linkplain java.lang.CharSequence
    40  * An engine that performs match operations on a {@linkplain
    41  * character sequence} by interpreting a {@link Pattern}.
    41  * java.lang.CharSequence character sequence} by interpreting a {@link Pattern}.
    42  *
    42  *
    43  * <p> A matcher is created from a pattern by invoking the pattern's {@link
    43  * <p> A matcher is created from a pattern by invoking the pattern's {@link
    44  * Pattern#matcher matcher} method.  Once created, a matcher can be used to
    44  * Pattern#matcher matcher} method.  Once created, a matcher can be used to
    45  * perform three different kinds of match operations:
    45  * perform three different kinds of match operations:
    46  *
    46  *
    50  *   input sequence against the pattern.  </p></li>
    50  *   input sequence against the pattern.  </p></li>
    51  *
    51  *
    52  *   <li><p> The {@link #lookingAt lookingAt} method attempts to match the
    52  *   <li><p> The {@link #lookingAt lookingAt} method attempts to match the
    53  *   input sequence, starting at the beginning, against the pattern.  </p></li>
    53  *   input sequence, starting at the beginning, against the pattern.  </p></li>
    54  *
    54  *
    55  *   <li><p> The {@link #find find} method scans the input sequence looking for
    55  *   <li><p> The {@link #find find} method scans the input sequence looking
    56  *   the next subsequence that matches the pattern.  </p></li>
    56  *   for the next subsequence that matches the pattern.  </p></li>
    57  *
    57  *
    58  * </ul>
    58  * </ul>
    59  *
    59  *
    60  * <p> Each of these methods returns a boolean indicating success or failure.
    60  * <p> Each of these methods returns a boolean indicating success or failure.
    61  * More information about a successful match can be obtained by querying the
    61  * More information about a successful match can be obtained by querying the
    62  * state of the matcher.
    62  * state of the matcher.
    63  *
    63  *
    64  * <p> A matcher finds matches in a subset of its input called the
    64  * <p> A matcher finds matches in a subset of its input called the
    65  * <i>region</i>. By default, the region contains all of the matcher's input.
    65  * <i>region</i>. By default, the region contains all of the matcher's input.
    66  * The region can be modified via the {@link #region region} method and queried
    66  * The region can be modified via the {@link #region(int, int) region} method
    67  * via the {@link #regionStart regionStart} and {@link #regionEnd regionEnd}
    67  * and queried via the {@link #regionStart() regionStart} and {@link
    68  * methods. The way that the region boundaries interact with some pattern
    68  * #regionEnd() regionEnd} methods. The way that the region boundaries interact
    69  * constructs can be changed. See {@link #useAnchoringBounds
    69  * with some pattern constructs can be changed. See {@link
    70  * useAnchoringBounds} and {@link #useTransparentBounds useTransparentBounds}
    70  * #useAnchoringBounds(boolean) useAnchoringBounds} and {@link
    71  * for more details.
    71  * #useTransparentBounds(boolean) useTransparentBounds} for more details.
    72  *
    72  *
    73  * <p> This class also defines methods for replacing matched subsequences with
    73  * <p> This class also defines methods for replacing matched subsequences with
    74  * new strings whose contents can, if desired, be computed from the match
    74  * new strings whose contents can, if desired, be computed from the match
    75  * result.  The {@link #appendReplacement appendReplacement} and {@link
    75  * result.  The {@link #appendReplacement appendReplacement} and {@link
    76  * #appendTail appendTail} methods can be used in tandem in order to collect
    76  * #appendTail appendTail} methods can be used in tandem in order to collect
   584     /**
   584     /**
   585      * Returns the input subsequence matched by the previous match.
   585      * Returns the input subsequence matched by the previous match.
   586      *
   586      *
   587      * <p> For a matcher <i>m</i> with input sequence <i>s</i>,
   587      * <p> For a matcher <i>m</i> with input sequence <i>s</i>,
   588      * the expressions <i>m.</i>{@code group()} and
   588      * the expressions <i>m.</i>{@code group()} and
   589      * <i>s.</i>{@code substring(}<i>m.</i>{@code start(),}&nbsp;<i>m.</i>{@code end())}
   589      * <i>s.</i>{@code substring(}<i>m.</i>{@code start(),}&nbsp;<i>m.</i>
   590      * are equivalent.  </p>
   590      * {@code end())} are equivalent.  </p>
   591      *
   591      *
   592      * <p> Note that some patterns, for example {@code a*}, match the empty
   592      * <p> Note that some patterns, for example {@code a*}, match the empty
   593      * string.  This method will return the empty string when the pattern
   593      * string.  This method will return the empty string when the pattern
   594      * successfully matches the empty string in the input.  </p>
   594      * successfully matches the empty string in the input.  </p>
   595      *
   595      *
   650         return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();
   650         return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();
   651     }
   651     }
   652 
   652 
   653     /**
   653     /**
   654      * Returns the input subsequence captured by the given
   654      * Returns the input subsequence captured by the given
   655      * <a href="Pattern.html#groupname">named-capturing group</a> during the previous
   655      * <a href="Pattern.html#groupname">named-capturing group</a> during the
   656      * match operation.
   656      * previous match operation.
   657      *
   657      *
   658      * <p> If the match was successful but the group specified failed to match
   658      * <p> If the match was successful but the group specified failed to match
   659      * any part of the input sequence, then {@code null} is returned. Note
   659      * any part of the input sequence, then {@code null} is returned. Note
   660      * that some groups, for example {@code (a*)}, match the empty string.
   660      * that some groups, for example {@code (a*)}, match the empty string.
   661      * This method will return the empty string when such a group successfully
   661      * This method will return the empty string when such a group successfully
   864      * treated as references to captured subsequences as described above, and
   864      * treated as references to captured subsequences as described above, and
   865      * backslashes are used to escape literal characters in the replacement
   865      * backslashes are used to escape literal characters in the replacement
   866      * string.
   866      * string.
   867      *
   867      *
   868      * <p> This method is intended to be used in a loop together with the
   868      * <p> This method is intended to be used in a loop together with the
   869      * {@link #appendTail appendTail} and {@link #find find} methods.  The
   869      * {@link #appendTail(StringBuffer) appendTail} and {@link #find() find}
   870      * following code, for example, writes {@code one dog two dogs in the
   870      * methods.  The following code, for example, writes {@code one dog two dogs
   871      * yard} to the standard-output stream: </p>
   871      * in the yard} to the standard-output stream: </p>
   872      *
   872      *
   873      * <blockquote><pre>
   873      * <blockquote><pre>
   874      * Pattern p = Pattern.compile("cat");
   874      * Pattern p = Pattern.compile("cat");
   875      * Matcher m = p.matcher("one cat two cats in the yard");
   875      * Matcher m = p.matcher("one cat two cats in the yard");
   876      * StringBuffer sb = new StringBuffer();
   876      * StringBuffer sb = new StringBuffer();
   957      * treated as references to captured subsequences as described above, and
   957      * treated as references to captured subsequences as described above, and
   958      * backslashes are used to escape literal characters in the replacement
   958      * backslashes are used to escape literal characters in the replacement
   959      * string.
   959      * string.
   960      *
   960      *
   961      * <p> This method is intended to be used in a loop together with the
   961      * <p> This method is intended to be used in a loop together with the
   962      * {@link #appendTail appendTail} and {@link #find find} methods.  The
   962      * {@link #appendTail(StringBuilder) appendTail} and
   963      * following code, for example, writes {@code one dog two dogs in the
   963      * {@link #find() find} methods. The following code, for example, writes
   964      * yard} to the standard-output stream: </p>
   964      * {@code one dog two dogs in the yard} to the standard-output stream: </p>
   965      *
   965      *
   966      * <blockquote><pre>
   966      * <blockquote><pre>
   967      * Pattern p = Pattern.compile("cat");
   967      * Pattern p = Pattern.compile("cat");
   968      * Matcher m = p.matcher("one cat two cats in the yard");
   968      * Matcher m = p.matcher("one cat two cats in the yard");
   969      * StringBuilder sb = new StringBuilder();
   969      * StringBuilder sb = new StringBuilder();
  1102      * Implements a terminal append-and-replace step.
  1102      * Implements a terminal append-and-replace step.
  1103      *
  1103      *
  1104      * <p> This method reads characters from the input sequence, starting at
  1104      * <p> This method reads characters from the input sequence, starting at
  1105      * the append position, and appends them to the given string buffer.  It is
  1105      * the append position, and appends them to the given string buffer.  It is
  1106      * intended to be invoked after one or more invocations of the {@link
  1106      * intended to be invoked after one or more invocations of the {@link
  1107      * #appendReplacement appendReplacement} method in order to copy the
  1107      * #appendReplacement(StringBuffer, String) appendReplacement} method in
  1108      * remainder of the input sequence.  </p>
  1108      * order to copy the remainder of the input sequence.  </p>
  1109      *
  1109      *
  1110      * @param  sb
  1110      * @param  sb
  1111      *         The target string buffer
  1111      *         The target string buffer
  1112      *
  1112      *
  1113      * @return  The target string buffer
  1113      * @return  The target string buffer
  1121      * Implements a terminal append-and-replace step.
  1121      * Implements a terminal append-and-replace step.
  1122      *
  1122      *
  1123      * <p> This method reads characters from the input sequence, starting at
  1123      * <p> This method reads characters from the input sequence, starting at
  1124      * the append position, and appends them to the given string builder.  It is
  1124      * the append position, and appends them to the given string builder.  It is
  1125      * intended to be invoked after one or more invocations of the {@link
  1125      * intended to be invoked after one or more invocations of the {@link
  1126      * #appendReplacement appendReplacement} method in order to copy the
  1126      * #appendReplacement(StringBuilder, String)
  1127      * remainder of the input sequence.  </p>
  1127      * appendReplacement} method in order to copy the remainder of the input
       
  1128      * sequence.  </p>
  1128      *
  1129      *
  1129      * @param  sb
  1130      * @param  sb
  1130      *         The target string builder
  1131      *         The target string builder
  1131      *
  1132      *
  1132      * @return  The target string builder
  1133      * @return  The target string builder
  1488      * method resets the matcher, and then sets the region to start at the
  1489      * method resets the matcher, and then sets the region to start at the
  1489      * index specified by the {@code start} parameter and end at the
  1490      * index specified by the {@code start} parameter and end at the
  1490      * index specified by the {@code end} parameter.
  1491      * index specified by the {@code end} parameter.
  1491      *
  1492      *
  1492      * <p>Depending on the transparency and anchoring being used (see
  1493      * <p>Depending on the transparency and anchoring being used (see
  1493      * {@link #useTransparentBounds useTransparentBounds} and
  1494      * {@link #useTransparentBounds(boolean) useTransparentBounds} and
  1494      * {@link #useAnchoringBounds useAnchoringBounds}), certain constructs such
  1495      * {@link #useAnchoringBounds(boolean) useAnchoringBounds}), certain
  1495      * as anchors may behave differently at or around the boundaries of the
  1496      * constructs such as anchors may behave differently at or around the
  1496      * region.
  1497      * boundaries of the region.
  1497      *
  1498      *
  1498      * @param  start
  1499      * @param  start
  1499      *         The index to start searching at (inclusive)
  1500      *         The index to start searching at (inclusive)
  1500      * @param  end
  1501      * @param  end
  1501      *         The index to end searching at (exclusive)
  1502      *         The index to end searching at (exclusive)
  1521     }
  1522     }
  1522 
  1523 
  1523     /**
  1524     /**
  1524      * Reports the start index of this matcher's region. The
  1525      * Reports the start index of this matcher's region. The
  1525      * searches this matcher conducts are limited to finding matches
  1526      * searches this matcher conducts are limited to finding matches
  1526      * within {@link #regionStart regionStart} (inclusive) and
  1527      * within {@link #regionStart() regionStart} (inclusive) and
  1527      * {@link #regionEnd regionEnd} (exclusive).
  1528      * {@link #regionEnd() regionEnd} (exclusive).
  1528      *
  1529      *
  1529      * @return  The starting point of this matcher's region
  1530      * @return  The starting point of this matcher's region
  1530      * @since 1.5
  1531      * @since 1.5
  1531      */
  1532      */
  1532     public int regionStart() {
  1533     public int regionStart() {
  1534     }
  1535     }
  1535 
  1536 
  1536     /**
  1537     /**
  1537      * Reports the end index (exclusive) of this matcher's region.
  1538      * Reports the end index (exclusive) of this matcher's region.
  1538      * The searches this matcher conducts are limited to finding matches
  1539      * The searches this matcher conducts are limited to finding matches
  1539      * within {@link #regionStart regionStart} (inclusive) and
  1540      * within {@link #regionStart() regionStart} (inclusive) and
  1540      * {@link #regionEnd regionEnd} (exclusive).
  1541      * {@link #regionEnd() regionEnd} (exclusive).
  1541      *
  1542      *
  1542      * @return  the ending point of this matcher's region
  1543      * @return  the ending point of this matcher's region
  1543      * @since 1.5
  1544      * @since 1.5
  1544      */
  1545      */
  1545     public int regionEnd() {
  1546     public int regionEnd() {
  1551      *
  1552      *
  1552      * <p> This method returns {@code true} if this matcher uses
  1553      * <p> This method returns {@code true} if this matcher uses
  1553      * <i>transparent</i> bounds, {@code false} if it uses <i>opaque</i>
  1554      * <i>transparent</i> bounds, {@code false} if it uses <i>opaque</i>
  1554      * bounds.
  1555      * bounds.
  1555      *
  1556      *
  1556      * <p> See {@link #useTransparentBounds useTransparentBounds} for a
  1557      * <p> See {@link #useTransparentBounds(boolean) useTransparentBounds} for a
  1557      * description of transparent and opaque bounds.
  1558      * description of transparent and opaque bounds.
  1558      *
  1559      *
  1559      * <p> By default, a matcher uses opaque region boundaries.
  1560      * <p> By default, a matcher uses opaque region boundaries.
  1560      *
  1561      *
  1561      * @return {@code true} iff this matcher is using transparent bounds,
  1562      * @return {@code true} iff this matcher is using transparent bounds,
  1602      * Queries the anchoring of region bounds for this matcher.
  1603      * Queries the anchoring of region bounds for this matcher.
  1603      *
  1604      *
  1604      * <p> This method returns {@code true} if this matcher uses
  1605      * <p> This method returns {@code true} if this matcher uses
  1605      * <i>anchoring</i> bounds, {@code false} otherwise.
  1606      * <i>anchoring</i> bounds, {@code false} otherwise.
  1606      *
  1607      *
  1607      * <p> See {@link #useAnchoringBounds useAnchoringBounds} for a
  1608      * <p> See {@link #useAnchoringBounds(boolean) useAnchoringBounds} for a
  1608      * description of anchoring bounds.
  1609      * description of anchoring bounds.
  1609      *
  1610      *
  1610      * <p> By default, a matcher uses anchoring region boundaries.
  1611      * <p> By default, a matcher uses anchoring region boundaries.
  1611      *
  1612      *
  1612      * @return {@code true} iff this matcher is using anchoring bounds,
  1613      * @return {@code true} iff this matcher is using anchoring bounds,
  1768     int getTextLength() {
  1769     int getTextLength() {
  1769         return text.length();
  1770         return text.length();
  1770     }
  1771     }
  1771 
  1772 
  1772     /**
  1773     /**
  1773      * Generates a String from this Matcher's input in the specified range.
  1774      * Generates a String from this matcher's input in the specified range.
  1774      *
  1775      *
  1775      * @param  beginIndex   the beginning index, inclusive
  1776      * @param  beginIndex   the beginning index, inclusive
  1776      * @param  endIndex     the ending index, exclusive
  1777      * @param  endIndex     the ending index, exclusive
  1777      * @return A String generated from this Matcher's input
  1778      * @return A String generated from this matcher's input
  1778      */
  1779      */
  1779     CharSequence getSubSequence(int beginIndex, int endIndex) {
  1780     CharSequence getSubSequence(int beginIndex, int endIndex) {
  1780         return text.subSequence(beginIndex, endIndex);
  1781         return text.subSequence(beginIndex, endIndex);
  1781     }
  1782     }
  1782 
  1783 
  1783     /**
  1784     /**
  1784      * Returns this Matcher's input character at index i.
  1785      * Returns this matcher's input character at index i.
  1785      *
  1786      *
  1786      * @return A char from the specified index
  1787      * @return A char from the specified index
  1787      */
  1788      */
  1788     char charAt(int i) {
  1789     char charAt(int i) {
  1789         return text.charAt(i);
  1790         return text.charAt(i);