1 /* |
1 /* |
2 * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. |
2 * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. |
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * |
4 * |
5 * This code is free software; you can redistribute it and/or modify it |
5 * This code is free software; you can redistribute it and/or modify it |
6 * under the terms of the GNU General Public License version 2 only, as |
6 * under the terms of the GNU General Public License version 2 only, as |
7 * published by the Free Software Foundation. Oracle designates this |
7 * published by the Free Software Foundation. Oracle designates this |
35 import java.util.function.Function; |
35 import java.util.function.Function; |
36 import java.util.stream.Stream; |
36 import java.util.stream.Stream; |
37 import java.util.stream.StreamSupport; |
37 import java.util.stream.StreamSupport; |
38 |
38 |
39 /** |
39 /** |
40 * An engine that performs match operations on a {@linkplain java.lang.CharSequence |
40 * An engine that performs match operations on a {@linkplain |
41 * character sequence} by interpreting a {@link Pattern}. |
41 * java.lang.CharSequence character sequence} by interpreting a {@link Pattern}. |
42 * |
42 * |
43 * <p> A matcher is created from a pattern by invoking the pattern's {@link |
43 * <p> A matcher is created from a pattern by invoking the pattern's {@link |
44 * Pattern#matcher matcher} method. Once created, a matcher can be used to |
44 * Pattern#matcher matcher} method. Once created, a matcher can be used to |
45 * perform three different kinds of match operations: |
45 * perform three different kinds of match operations: |
46 * |
46 * |
50 * input sequence against the pattern. </p></li> |
50 * input sequence against the pattern. </p></li> |
51 * |
51 * |
52 * <li><p> The {@link #lookingAt lookingAt} method attempts to match the |
52 * <li><p> The {@link #lookingAt lookingAt} method attempts to match the |
53 * input sequence, starting at the beginning, against the pattern. </p></li> |
53 * input sequence, starting at the beginning, against the pattern. </p></li> |
54 * |
54 * |
55 * <li><p> The {@link #find find} method scans the input sequence looking for |
55 * <li><p> The {@link #find find} method scans the input sequence looking |
56 * the next subsequence that matches the pattern. </p></li> |
56 * for the next subsequence that matches the pattern. </p></li> |
57 * |
57 * |
58 * </ul> |
58 * </ul> |
59 * |
59 * |
60 * <p> Each of these methods returns a boolean indicating success or failure. |
60 * <p> Each of these methods returns a boolean indicating success or failure. |
61 * More information about a successful match can be obtained by querying the |
61 * More information about a successful match can be obtained by querying the |
62 * state of the matcher. |
62 * state of the matcher. |
63 * |
63 * |
64 * <p> A matcher finds matches in a subset of its input called the |
64 * <p> A matcher finds matches in a subset of its input called the |
65 * <i>region</i>. By default, the region contains all of the matcher's input. |
65 * <i>region</i>. By default, the region contains all of the matcher's input. |
66 * The region can be modified via the {@link #region region} method and queried |
66 * The region can be modified via the {@link #region(int, int) region} method |
67 * via the {@link #regionStart regionStart} and {@link #regionEnd regionEnd} |
67 * and queried via the {@link #regionStart() regionStart} and {@link |
68 * methods. The way that the region boundaries interact with some pattern |
68 * #regionEnd() regionEnd} methods. The way that the region boundaries interact |
69 * constructs can be changed. See {@link #useAnchoringBounds |
69 * with some pattern constructs can be changed. See {@link |
70 * useAnchoringBounds} and {@link #useTransparentBounds useTransparentBounds} |
70 * #useAnchoringBounds(boolean) useAnchoringBounds} and {@link |
71 * for more details. |
71 * #useTransparentBounds(boolean) useTransparentBounds} for more details. |
72 * |
72 * |
73 * <p> This class also defines methods for replacing matched subsequences with |
73 * <p> This class also defines methods for replacing matched subsequences with |
74 * new strings whose contents can, if desired, be computed from the match |
74 * new strings whose contents can, if desired, be computed from the match |
75 * result. The {@link #appendReplacement appendReplacement} and {@link |
75 * result. The {@link #appendReplacement appendReplacement} and {@link |
76 * #appendTail appendTail} methods can be used in tandem in order to collect |
76 * #appendTail appendTail} methods can be used in tandem in order to collect |
584 /** |
584 /** |
585 * Returns the input subsequence matched by the previous match. |
585 * Returns the input subsequence matched by the previous match. |
586 * |
586 * |
587 * <p> For a matcher <i>m</i> with input sequence <i>s</i>, |
587 * <p> For a matcher <i>m</i> with input sequence <i>s</i>, |
588 * the expressions <i>m.</i>{@code group()} and |
588 * the expressions <i>m.</i>{@code group()} and |
589 * <i>s.</i>{@code substring(}<i>m.</i>{@code start(),} <i>m.</i>{@code end())} |
589 * <i>s.</i>{@code substring(}<i>m.</i>{@code start(),} <i>m.</i> |
590 * are equivalent. </p> |
590 * {@code end())} are equivalent. </p> |
591 * |
591 * |
592 * <p> Note that some patterns, for example {@code a*}, match the empty |
592 * <p> Note that some patterns, for example {@code a*}, match the empty |
593 * string. This method will return the empty string when the pattern |
593 * string. This method will return the empty string when the pattern |
594 * successfully matches the empty string in the input. </p> |
594 * successfully matches the empty string in the input. </p> |
595 * |
595 * |
650 return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString(); |
650 return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString(); |
651 } |
651 } |
652 |
652 |
653 /** |
653 /** |
654 * Returns the input subsequence captured by the given |
654 * Returns the input subsequence captured by the given |
655 * <a href="Pattern.html#groupname">named-capturing group</a> during the previous |
655 * <a href="Pattern.html#groupname">named-capturing group</a> during the |
656 * match operation. |
656 * previous match operation. |
657 * |
657 * |
658 * <p> If the match was successful but the group specified failed to match |
658 * <p> If the match was successful but the group specified failed to match |
659 * any part of the input sequence, then {@code null} is returned. Note |
659 * any part of the input sequence, then {@code null} is returned. Note |
660 * that some groups, for example {@code (a*)}, match the empty string. |
660 * that some groups, for example {@code (a*)}, match the empty string. |
661 * This method will return the empty string when such a group successfully |
661 * This method will return the empty string when such a group successfully |
864 * treated as references to captured subsequences as described above, and |
864 * treated as references to captured subsequences as described above, and |
865 * backslashes are used to escape literal characters in the replacement |
865 * backslashes are used to escape literal characters in the replacement |
866 * string. |
866 * string. |
867 * |
867 * |
868 * <p> This method is intended to be used in a loop together with the |
868 * <p> This method is intended to be used in a loop together with the |
869 * {@link #appendTail appendTail} and {@link #find find} methods. The |
869 * {@link #appendTail(StringBuffer) appendTail} and {@link #find() find} |
870 * following code, for example, writes {@code one dog two dogs in the |
870 * methods. The following code, for example, writes {@code one dog two dogs |
871 * yard} to the standard-output stream: </p> |
871 * in the yard} to the standard-output stream: </p> |
872 * |
872 * |
873 * <blockquote><pre> |
873 * <blockquote><pre> |
874 * Pattern p = Pattern.compile("cat"); |
874 * Pattern p = Pattern.compile("cat"); |
875 * Matcher m = p.matcher("one cat two cats in the yard"); |
875 * Matcher m = p.matcher("one cat two cats in the yard"); |
876 * StringBuffer sb = new StringBuffer(); |
876 * StringBuffer sb = new StringBuffer(); |
957 * treated as references to captured subsequences as described above, and |
957 * treated as references to captured subsequences as described above, and |
958 * backslashes are used to escape literal characters in the replacement |
958 * backslashes are used to escape literal characters in the replacement |
959 * string. |
959 * string. |
960 * |
960 * |
961 * <p> This method is intended to be used in a loop together with the |
961 * <p> This method is intended to be used in a loop together with the |
962 * {@link #appendTail appendTail} and {@link #find find} methods. The |
962 * {@link #appendTail(StringBuilder) appendTail} and |
963 * following code, for example, writes {@code one dog two dogs in the |
963 * {@link #find() find} methods. The following code, for example, writes |
964 * yard} to the standard-output stream: </p> |
964 * {@code one dog two dogs in the yard} to the standard-output stream: </p> |
965 * |
965 * |
966 * <blockquote><pre> |
966 * <blockquote><pre> |
967 * Pattern p = Pattern.compile("cat"); |
967 * Pattern p = Pattern.compile("cat"); |
968 * Matcher m = p.matcher("one cat two cats in the yard"); |
968 * Matcher m = p.matcher("one cat two cats in the yard"); |
969 * StringBuilder sb = new StringBuilder(); |
969 * StringBuilder sb = new StringBuilder(); |
1102 * Implements a terminal append-and-replace step. |
1102 * Implements a terminal append-and-replace step. |
1103 * |
1103 * |
1104 * <p> This method reads characters from the input sequence, starting at |
1104 * <p> This method reads characters from the input sequence, starting at |
1105 * the append position, and appends them to the given string buffer. It is |
1105 * the append position, and appends them to the given string buffer. It is |
1106 * intended to be invoked after one or more invocations of the {@link |
1106 * intended to be invoked after one or more invocations of the {@link |
1107 * #appendReplacement appendReplacement} method in order to copy the |
1107 * #appendReplacement(StringBuffer, String) appendReplacement} method in |
1108 * remainder of the input sequence. </p> |
1108 * order to copy the remainder of the input sequence. </p> |
1109 * |
1109 * |
1110 * @param sb |
1110 * @param sb |
1111 * The target string buffer |
1111 * The target string buffer |
1112 * |
1112 * |
1113 * @return The target string buffer |
1113 * @return The target string buffer |
1121 * Implements a terminal append-and-replace step. |
1121 * Implements a terminal append-and-replace step. |
1122 * |
1122 * |
1123 * <p> This method reads characters from the input sequence, starting at |
1123 * <p> This method reads characters from the input sequence, starting at |
1124 * the append position, and appends them to the given string builder. It is |
1124 * the append position, and appends them to the given string builder. It is |
1125 * intended to be invoked after one or more invocations of the {@link |
1125 * intended to be invoked after one or more invocations of the {@link |
1126 * #appendReplacement appendReplacement} method in order to copy the |
1126 * #appendReplacement(StringBuilder, String) |
1127 * remainder of the input sequence. </p> |
1127 * appendReplacement} method in order to copy the remainder of the input |
|
1128 * sequence. </p> |
1128 * |
1129 * |
1129 * @param sb |
1130 * @param sb |
1130 * The target string builder |
1131 * The target string builder |
1131 * |
1132 * |
1132 * @return The target string builder |
1133 * @return The target string builder |
1488 * method resets the matcher, and then sets the region to start at the |
1489 * method resets the matcher, and then sets the region to start at the |
1489 * index specified by the {@code start} parameter and end at the |
1490 * index specified by the {@code start} parameter and end at the |
1490 * index specified by the {@code end} parameter. |
1491 * index specified by the {@code end} parameter. |
1491 * |
1492 * |
1492 * <p>Depending on the transparency and anchoring being used (see |
1493 * <p>Depending on the transparency and anchoring being used (see |
1493 * {@link #useTransparentBounds useTransparentBounds} and |
1494 * {@link #useTransparentBounds(boolean) useTransparentBounds} and |
1494 * {@link #useAnchoringBounds useAnchoringBounds}), certain constructs such |
1495 * {@link #useAnchoringBounds(boolean) useAnchoringBounds}), certain |
1495 * as anchors may behave differently at or around the boundaries of the |
1496 * constructs such as anchors may behave differently at or around the |
1496 * region. |
1497 * boundaries of the region. |
1497 * |
1498 * |
1498 * @param start |
1499 * @param start |
1499 * The index to start searching at (inclusive) |
1500 * The index to start searching at (inclusive) |
1500 * @param end |
1501 * @param end |
1501 * The index to end searching at (exclusive) |
1502 * The index to end searching at (exclusive) |
1521 } |
1522 } |
1522 |
1523 |
1523 /** |
1524 /** |
1524 * Reports the start index of this matcher's region. The |
1525 * Reports the start index of this matcher's region. The |
1525 * searches this matcher conducts are limited to finding matches |
1526 * searches this matcher conducts are limited to finding matches |
1526 * within {@link #regionStart regionStart} (inclusive) and |
1527 * within {@link #regionStart() regionStart} (inclusive) and |
1527 * {@link #regionEnd regionEnd} (exclusive). |
1528 * {@link #regionEnd() regionEnd} (exclusive). |
1528 * |
1529 * |
1529 * @return The starting point of this matcher's region |
1530 * @return The starting point of this matcher's region |
1530 * @since 1.5 |
1531 * @since 1.5 |
1531 */ |
1532 */ |
1532 public int regionStart() { |
1533 public int regionStart() { |
1534 } |
1535 } |
1535 |
1536 |
1536 /** |
1537 /** |
1537 * Reports the end index (exclusive) of this matcher's region. |
1538 * Reports the end index (exclusive) of this matcher's region. |
1538 * The searches this matcher conducts are limited to finding matches |
1539 * The searches this matcher conducts are limited to finding matches |
1539 * within {@link #regionStart regionStart} (inclusive) and |
1540 * within {@link #regionStart() regionStart} (inclusive) and |
1540 * {@link #regionEnd regionEnd} (exclusive). |
1541 * {@link #regionEnd() regionEnd} (exclusive). |
1541 * |
1542 * |
1542 * @return the ending point of this matcher's region |
1543 * @return the ending point of this matcher's region |
1543 * @since 1.5 |
1544 * @since 1.5 |
1544 */ |
1545 */ |
1545 public int regionEnd() { |
1546 public int regionEnd() { |
1551 * |
1552 * |
1552 * <p> This method returns {@code true} if this matcher uses |
1553 * <p> This method returns {@code true} if this matcher uses |
1553 * <i>transparent</i> bounds, {@code false} if it uses <i>opaque</i> |
1554 * <i>transparent</i> bounds, {@code false} if it uses <i>opaque</i> |
1554 * bounds. |
1555 * bounds. |
1555 * |
1556 * |
1556 * <p> See {@link #useTransparentBounds useTransparentBounds} for a |
1557 * <p> See {@link #useTransparentBounds(boolean) useTransparentBounds} for a |
1557 * description of transparent and opaque bounds. |
1558 * description of transparent and opaque bounds. |
1558 * |
1559 * |
1559 * <p> By default, a matcher uses opaque region boundaries. |
1560 * <p> By default, a matcher uses opaque region boundaries. |
1560 * |
1561 * |
1561 * @return {@code true} iff this matcher is using transparent bounds, |
1562 * @return {@code true} iff this matcher is using transparent bounds, |
1602 * Queries the anchoring of region bounds for this matcher. |
1603 * Queries the anchoring of region bounds for this matcher. |
1603 * |
1604 * |
1604 * <p> This method returns {@code true} if this matcher uses |
1605 * <p> This method returns {@code true} if this matcher uses |
1605 * <i>anchoring</i> bounds, {@code false} otherwise. |
1606 * <i>anchoring</i> bounds, {@code false} otherwise. |
1606 * |
1607 * |
1607 * <p> See {@link #useAnchoringBounds useAnchoringBounds} for a |
1608 * <p> See {@link #useAnchoringBounds(boolean) useAnchoringBounds} for a |
1608 * description of anchoring bounds. |
1609 * description of anchoring bounds. |
1609 * |
1610 * |
1610 * <p> By default, a matcher uses anchoring region boundaries. |
1611 * <p> By default, a matcher uses anchoring region boundaries. |
1611 * |
1612 * |
1612 * @return {@code true} iff this matcher is using anchoring bounds, |
1613 * @return {@code true} iff this matcher is using anchoring bounds, |
1768 int getTextLength() { |
1769 int getTextLength() { |
1769 return text.length(); |
1770 return text.length(); |
1770 } |
1771 } |
1771 |
1772 |
1772 /** |
1773 /** |
1773 * Generates a String from this Matcher's input in the specified range. |
1774 * Generates a String from this matcher's input in the specified range. |
1774 * |
1775 * |
1775 * @param beginIndex the beginning index, inclusive |
1776 * @param beginIndex the beginning index, inclusive |
1776 * @param endIndex the ending index, exclusive |
1777 * @param endIndex the ending index, exclusive |
1777 * @return A String generated from this Matcher's input |
1778 * @return A String generated from this matcher's input |
1778 */ |
1779 */ |
1779 CharSequence getSubSequence(int beginIndex, int endIndex) { |
1780 CharSequence getSubSequence(int beginIndex, int endIndex) { |
1780 return text.subSequence(beginIndex, endIndex); |
1781 return text.subSequence(beginIndex, endIndex); |
1781 } |
1782 } |
1782 |
1783 |
1783 /** |
1784 /** |
1784 * Returns this Matcher's input character at index i. |
1785 * Returns this matcher's input character at index i. |
1785 * |
1786 * |
1786 * @return A char from the specified index |
1787 * @return A char from the specified index |
1787 */ |
1788 */ |
1788 char charAt(int i) { |
1789 char charAt(int i) { |
1789 return text.charAt(i); |
1790 return text.charAt(i); |