jdk/src/java.base/share/classes/java/util/regex/Matcher.java
changeset 29243 80ea8d3d39d0
parent 25859 3317bb8137f4
child 29380 c18777f9b6b9
equal deleted inserted replaced
29242:33423ec519fd 29243:80ea8d3d39d0
     1 /*
     1 /*
     2  * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
     2  * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     7  * published by the Free Software Foundation.  Oracle designates this
    23  * questions.
    23  * questions.
    24  */
    24  */
    25 
    25 
    26 package java.util.regex;
    26 package java.util.regex;
    27 
    27 
       
    28 import java.util.ConcurrentModificationException;
       
    29 import java.util.Iterator;
       
    30 import java.util.NoSuchElementException;
    28 import java.util.Objects;
    31 import java.util.Objects;
       
    32 import java.util.Spliterator;
       
    33 import java.util.Spliterators;
       
    34 import java.util.function.Consumer;
       
    35 import java.util.function.Function;
       
    36 import java.util.stream.Stream;
       
    37 import java.util.stream.StreamSupport;
    29 
    38 
    30 /**
    39 /**
    31  * An engine that performs match operations on a {@linkplain java.lang.CharSequence
    40  * An engine that performs match operations on a {@linkplain java.lang.CharSequence
    32  * character sequence} by interpreting a {@link Pattern}.
    41  * character sequence} by interpreting a {@link Pattern}.
    33  *
    42  *
   207      * matcher's region match anchors such as ^ and $.
   216      * matcher's region match anchors such as ^ and $.
   208      */
   217      */
   209     boolean anchoringBounds = true;
   218     boolean anchoringBounds = true;
   210 
   219 
   211     /**
   220     /**
       
   221      * Number of times this matcher's state has been modified
       
   222      */
       
   223     int modCount;
       
   224 
       
   225     /**
   212      * No default constructor.
   226      * No default constructor.
   213      */
   227      */
   214     Matcher() {
   228     Matcher() {
   215     }
   229     }
   216 
   230 
   246      *
   260      *
   247      * @return  a <code>MatchResult</code> with the state of this matcher
   261      * @return  a <code>MatchResult</code> with the state of this matcher
   248      * @since 1.5
   262      * @since 1.5
   249      */
   263      */
   250     public MatchResult toMatchResult() {
   264     public MatchResult toMatchResult() {
   251         Matcher result = new Matcher(this.parentPattern, text.toString());
   265         return toMatchResult(text.toString());
   252         result.first = this.first;
   266     }
   253         result.last = this.last;
   267 
   254         result.groups = this.groups.clone();
   268     private MatchResult toMatchResult(String text) {
   255         return result;
   269         return new ImmutableMatchResult(this.first,
       
   270                                         this.last,
       
   271                                         groupCount(),
       
   272                                         this.groups.clone(),
       
   273                                         text);
       
   274     }
       
   275 
       
   276     private static class ImmutableMatchResult implements MatchResult {
       
   277         private final int first;
       
   278         private final int last;
       
   279         private final int[] groups;
       
   280         private final int groupCount;
       
   281         private final String text;
       
   282 
       
   283         ImmutableMatchResult(int first, int last, int groupCount,
       
   284                              int groups[], String text)
       
   285         {
       
   286             this.first = first;
       
   287             this.last = last;
       
   288             this.groupCount = groupCount;
       
   289             this.groups = groups;
       
   290             this.text = text;
       
   291         }
       
   292 
       
   293         @Override
       
   294         public int start() {
       
   295             return first;
       
   296         }
       
   297 
       
   298         @Override
       
   299         public int start(int group) {
       
   300             if (group < 0 || group > groupCount)
       
   301                 throw new IndexOutOfBoundsException("No group " + group);
       
   302             return groups[group * 2];
       
   303         }
       
   304 
       
   305         @Override
       
   306         public int end() {
       
   307             return last;
       
   308         }
       
   309 
       
   310         @Override
       
   311         public int end(int group) {
       
   312             if (group < 0 || group > groupCount)
       
   313                 throw new IndexOutOfBoundsException("No group " + group);
       
   314             return groups[group * 2 + 1];
       
   315         }
       
   316 
       
   317         @Override
       
   318         public int groupCount() {
       
   319             return groupCount;
       
   320         }
       
   321 
       
   322         @Override
       
   323         public String group() {
       
   324             return group(0);
       
   325         }
       
   326 
       
   327         @Override
       
   328         public String group(int group) {
       
   329             if (group < 0 || group > groupCount)
       
   330                 throw new IndexOutOfBoundsException("No group " + group);
       
   331             if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
       
   332                 return null;
       
   333             return text.subSequence(groups[group * 2], groups[group * 2 + 1]).toString();
       
   334         }
   256     }
   335     }
   257 
   336 
   258     /**
   337     /**
   259       * Changes the <tt>Pattern</tt> that this <tt>Matcher</tt> uses to
   338       * Changes the <tt>Pattern</tt> that this <tt>Matcher</tt> uses to
   260       * find matches with.
   339       * find matches with.
   282         locals = new int[newPattern.localCount];
   361         locals = new int[newPattern.localCount];
   283         for (int i = 0; i < groups.length; i++)
   362         for (int i = 0; i < groups.length; i++)
   284             groups[i] = -1;
   363             groups[i] = -1;
   285         for (int i = 0; i < locals.length; i++)
   364         for (int i = 0; i < locals.length; i++)
   286             locals[i] = -1;
   365             locals[i] = -1;
       
   366         modCount++;
   287         return this;
   367         return this;
   288     }
   368     }
   289 
   369 
   290     /**
   370     /**
   291      * Resets this matcher.
   371      * Resets this matcher.
   306         for(int i=0; i<locals.length; i++)
   386         for(int i=0; i<locals.length; i++)
   307             locals[i] = -1;
   387             locals[i] = -1;
   308         lastAppendPosition = 0;
   388         lastAppendPosition = 0;
   309         from = 0;
   389         from = 0;
   310         to = getTextLength();
   390         to = getTextLength();
       
   391         modCount++;
   311         return this;
   392         return this;
   312     }
   393     }
   313 
   394 
   314     /**
   395     /**
   315      * Resets this matcher with a new input sequence.
   396      * Resets this matcher with a new input sequence.
   801         // Append the intervening text
   882         // Append the intervening text
   802         sb.append(text, lastAppendPosition, first);
   883         sb.append(text, lastAppendPosition, first);
   803         // Append the match substitution
   884         // Append the match substitution
   804         sb.append(result);
   885         sb.append(result);
   805         lastAppendPosition = last;
   886         lastAppendPosition = last;
       
   887         modCount++;
   806         return this;
   888         return this;
   807     }
   889     }
   808 
   890 
   809     /**
   891     /**
   810      * Implements a non-terminal append-and-replace step.
   892      * Implements a non-terminal append-and-replace step.
   890         // Append the intervening text
   972         // Append the intervening text
   891         sb.append(text, lastAppendPosition, first);
   973         sb.append(text, lastAppendPosition, first);
   892         // Append the match substitution
   974         // Append the match substitution
   893         sb.append(result);
   975         sb.append(result);
   894         lastAppendPosition = last;
   976         lastAppendPosition = last;
       
   977         modCount++;
   895         return this;
   978         return this;
   896     }
   979     }
   897 
   980 
   898     /**
   981     /**
   899      * Processes replacement string to replace group references with
   982      * Processes replacement string to replace group references with
  1076         }
  1159         }
  1077         return text.toString();
  1160         return text.toString();
  1078     }
  1161     }
  1079 
  1162 
  1080     /**
  1163     /**
       
  1164      * Replaces every subsequence of the input sequence that matches the
       
  1165      * pattern with the result of applying the given replacer function to the
       
  1166      * match result of this matcher corresponding to that subsequence.
       
  1167      * Exceptions thrown by the function are relayed to the caller.
       
  1168      *
       
  1169      * <p> This method first resets this matcher.  It then scans the input
       
  1170      * sequence looking for matches of the pattern.  Characters that are not
       
  1171      * part of any match are appended directly to the result string; each match
       
  1172      * is replaced in the result by the applying the replacer function that
       
  1173      * returns a replacement string.  Each replacement string may contain
       
  1174      * references to captured subsequences as in the {@link #appendReplacement
       
  1175      * appendReplacement} method.
       
  1176      *
       
  1177      * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
       
  1178      * a replacement string may cause the results to be different than if it
       
  1179      * were being treated as a literal replacement string. Dollar signs may be
       
  1180      * treated as references to captured subsequences as described above, and
       
  1181      * backslashes are used to escape literal characters in the replacement
       
  1182      * string.
       
  1183      *
       
  1184      * <p> Given the regular expression <tt>dog</tt>, the input
       
  1185      * <tt>"zzzdogzzzdogzzz"</tt>, and the function
       
  1186      * <tt>mr -> mr.group().toUpperCase()</tt>, an invocation of this method on
       
  1187      * a matcher for that expression would yield the string
       
  1188      * <tt>"zzzDOGzzzDOGzzz"</tt>.
       
  1189      *
       
  1190      * <p> Invoking this method changes this matcher's state.  If the matcher
       
  1191      * is to be used in further matching operations then it should first be
       
  1192      * reset.  </p>
       
  1193      *
       
  1194      * <p> The replacer function should not modify this matcher's state during
       
  1195      * replacement.  This method will, on a best-effort basis, throw a
       
  1196      * {@link java.util.ConcurrentModificationException} if such modification is
       
  1197      * detected.
       
  1198      *
       
  1199      * <p> The state of each match result passed to the replacer function is
       
  1200      * guaranteed to be constant only for the duration of the replacer function
       
  1201      * call and only if the replacer function does not modify this matcher's
       
  1202      * state.
       
  1203      *
       
  1204      * @implNote
       
  1205      * This implementation applies the replacer function to this matcher, which
       
  1206      * is an instance of {@code MatchResult}.
       
  1207      *
       
  1208      * @param  replacer
       
  1209      *         The function to be applied to the match result of this matcher
       
  1210      *         that returns a replacement string.
       
  1211      * @return  The string constructed by replacing each matching subsequence
       
  1212      *          with the result of applying the replacer function to that
       
  1213      *          matched subsequence, substituting captured subsequences as
       
  1214      *          needed.
       
  1215      * @throws NullPointerException if the replacer function is null
       
  1216      * @throws ConcurrentModificationException if it is detected, on a
       
  1217      *         best-effort basis, that the replacer function modified this
       
  1218      *         matcher's state
       
  1219      * @since 1.9
       
  1220      */
       
  1221     public String replaceAll(Function<MatchResult, String> replacer) {
       
  1222         Objects.requireNonNull(replacer);
       
  1223         reset();
       
  1224         boolean result = find();
       
  1225         if (result) {
       
  1226             StringBuilder sb = new StringBuilder();
       
  1227             do {
       
  1228                 int ec = modCount;
       
  1229                 String replacement =  replacer.apply(this);
       
  1230                 if (ec != modCount)
       
  1231                     throw new ConcurrentModificationException();
       
  1232                 appendReplacement(sb, replacement);
       
  1233                 result = find();
       
  1234             } while (result);
       
  1235             appendTail(sb);
       
  1236             return sb.toString();
       
  1237         }
       
  1238         return text.toString();
       
  1239     }
       
  1240 
       
  1241     /**
       
  1242      * Returns a stream of match results for each subsequence of the input
       
  1243      * sequence that matches the pattern.  The match results occur in the
       
  1244      * same order as the matching subsequences in the input sequence.
       
  1245      *
       
  1246      * <p> Each match result is produced as if by {@link #toMatchResult()}.
       
  1247      *
       
  1248      * <p> This method does not reset this matcher.  Matching starts on
       
  1249      * initiation of the terminal stream operation either at the beginning of
       
  1250      * this matcher's region, or, if the matcher has not since been reset, at
       
  1251      * the first character not matched by a previous match.
       
  1252      *
       
  1253      * <p> If the matcher is to be used for further matching operations after
       
  1254      * the terminal stream operation completes then it should be first reset.
       
  1255      *
       
  1256      * <p> This matcher's state should not be modified during execution of the
       
  1257      * returned stream's pipeline.  The returned stream's source
       
  1258      * {@code Spliterator} is <em>fail-fast</em> and will, on a best-effort
       
  1259      * basis, throw a {@link java.util.ConcurrentModificationException} if such
       
  1260      * modification is detected.
       
  1261      *
       
  1262      * @return a sequential stream of match results.
       
  1263      * @since 1.9
       
  1264      */
       
  1265     public Stream<MatchResult> results() {
       
  1266         class MatchResultIterator implements Iterator<MatchResult> {
       
  1267             // -ve for call to find, 0 for not found, 1 for found
       
  1268             int state = -1;
       
  1269             // State for concurrent modification checking
       
  1270             // -1 for uninitialized
       
  1271             int expectedCount = -1;
       
  1272             // The input sequence as a string, set once only after first find
       
  1273             // Avoids repeated conversion from CharSequence for each match
       
  1274             String textAsString;
       
  1275 
       
  1276             @Override
       
  1277             public MatchResult next() {
       
  1278                 if (expectedCount >= 0 && expectedCount != modCount)
       
  1279                     throw new ConcurrentModificationException();
       
  1280 
       
  1281                 if (!hasNext())
       
  1282                     throw new NoSuchElementException();
       
  1283 
       
  1284                 state = -1;
       
  1285                 return toMatchResult(textAsString);
       
  1286             }
       
  1287 
       
  1288             @Override
       
  1289             public boolean hasNext() {
       
  1290                 if (state >= 0)
       
  1291                     return state == 1;
       
  1292 
       
  1293                 // Defer throwing ConcurrentModificationException to when next
       
  1294                 // or forEachRemaining is called.  The is consistent with other
       
  1295                 // fail-fast implementations.
       
  1296                 if (expectedCount >= 0 && expectedCount != modCount)
       
  1297                     return true;
       
  1298 
       
  1299                 boolean found = find();
       
  1300                 // Capture the input sequence as a string on first find
       
  1301                 if (found && state < 0)
       
  1302                     textAsString = text.toString();
       
  1303                 state = found ? 1 : 0;
       
  1304                 expectedCount = modCount;
       
  1305                 return found;
       
  1306             }
       
  1307 
       
  1308             @Override
       
  1309             public void forEachRemaining(Consumer<? super MatchResult> action) {
       
  1310                 if (expectedCount >= 0 && expectedCount != modCount)
       
  1311                     throw new ConcurrentModificationException();
       
  1312 
       
  1313                 int s = state;
       
  1314                 if (s == 0)
       
  1315                     return;
       
  1316 
       
  1317                 // Set state to report no more elements on further operations
       
  1318                 state = 0;
       
  1319                 expectedCount = -1;
       
  1320 
       
  1321                 // Perform a first find if required
       
  1322                 if (s < 0 && !find())
       
  1323                     return;
       
  1324 
       
  1325                 // Capture the input sequence as a string on first find
       
  1326                 textAsString = text.toString();
       
  1327 
       
  1328                 do {
       
  1329                     int ec = modCount;
       
  1330                     action.accept(toMatchResult(textAsString));
       
  1331                     if (ec != modCount)
       
  1332                         throw new ConcurrentModificationException();
       
  1333                 } while (find());
       
  1334             }
       
  1335         }
       
  1336         return StreamSupport.stream(Spliterators.spliteratorUnknownSize(
       
  1337                 new MatchResultIterator(), Spliterator.ORDERED | Spliterator.NONNULL), false);
       
  1338     }
       
  1339 
       
  1340     /**
  1081      * Replaces the first subsequence of the input sequence that matches the
  1341      * Replaces the first subsequence of the input sequence that matches the
  1082      * pattern with the given replacement string.
  1342      * pattern with the given replacement string.
  1083      *
  1343      *
  1084      * <p> This method first resets this matcher.  It then scans the input
  1344      * <p> This method first resets this matcher.  It then scans the input
  1085      * sequence looking for a match of the pattern.  Characters that are not
  1345      * sequence looking for a match of the pattern.  Characters that are not
  1115             throw new NullPointerException("replacement");
  1375             throw new NullPointerException("replacement");
  1116         reset();
  1376         reset();
  1117         if (!find())
  1377         if (!find())
  1118             return text.toString();
  1378             return text.toString();
  1119         StringBuilder sb = new StringBuilder();
  1379         StringBuilder sb = new StringBuilder();
       
  1380         appendReplacement(sb, replacement);
       
  1381         appendTail(sb);
       
  1382         return sb.toString();
       
  1383     }
       
  1384 
       
  1385     /**
       
  1386      * Replaces the first subsequence of the input sequence that matches the
       
  1387      * pattern with the result of applying the given replacer function to the
       
  1388      * match result of this matcher corresponding to that subsequence.
       
  1389      * Exceptions thrown by the replace function are relayed to the caller.
       
  1390      *
       
  1391      * <p> This method first resets this matcher.  It then scans the input
       
  1392      * sequence looking for a match of the pattern.  Characters that are not
       
  1393      * part of the match are appended directly to the result string; the match
       
  1394      * is replaced in the result by the applying the replacer function that
       
  1395      * returns a replacement string.  The replacement string may contain
       
  1396      * references to captured subsequences as in the {@link #appendReplacement
       
  1397      * appendReplacement} method.
       
  1398      *
       
  1399      * <p>Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
       
  1400      * the replacement string may cause the results to be different than if it
       
  1401      * were being treated as a literal replacement string. Dollar signs may be
       
  1402      * treated as references to captured subsequences as described above, and
       
  1403      * backslashes are used to escape literal characters in the replacement
       
  1404      * string.
       
  1405      *
       
  1406      * <p> Given the regular expression <tt>dog</tt>, the input
       
  1407      * <tt>"zzzdogzzzdogzzz"</tt>, and the function
       
  1408      * <tt>mr -> mr.group().toUpperCase()</tt>, an invocation of this method on
       
  1409      * a matcher for that expression would yield the string
       
  1410      * <tt>"zzzDOGzzzdogzzz"</tt>.
       
  1411      *
       
  1412      * <p> Invoking this method changes this matcher's state.  If the matcher
       
  1413      * is to be used in further matching operations then it should first be
       
  1414      * reset.
       
  1415      *
       
  1416      * <p> The replacer function should not modify this matcher's state during
       
  1417      * replacement.  This method will, on a best-effort basis, throw a
       
  1418      * {@link java.util.ConcurrentModificationException} if such modification is
       
  1419      * detected.
       
  1420      *
       
  1421      * <p> The state of the match result passed to the replacer function is
       
  1422      * guaranteed to be constant only for the duration of the replacer function
       
  1423      * call and only if the replacer function does not modify this matcher's
       
  1424      * state.
       
  1425      *
       
  1426      * @implNote
       
  1427      * This implementation applies the replacer function to this matcher, which
       
  1428      * is an instance of {@code MatchResult}.
       
  1429      *
       
  1430      * @param  replacer
       
  1431      *         The function to be applied to the match result of this matcher
       
  1432      *         that returns a replacement string.
       
  1433      * @return  The string constructed by replacing the first matching
       
  1434      *          subsequence with the result of applying the replacer function to
       
  1435      *          the matched subsequence, substituting captured subsequences as
       
  1436      *          needed.
       
  1437      * @throws NullPointerException if the replacer function is null
       
  1438      * @throws ConcurrentModificationException if it is detected, on a
       
  1439      *         best-effort basis, that the replacer function modified this
       
  1440      *         matcher's state
       
  1441      * @since 1.9
       
  1442      */
       
  1443     public String replaceFirst(Function<MatchResult, String> replacer) {
       
  1444         Objects.requireNonNull(replacer);
       
  1445         reset();
       
  1446         if (!find())
       
  1447             return text.toString();
       
  1448         StringBuilder sb = new StringBuilder();
       
  1449         int ec = modCount;
       
  1450         String replacement = replacer.apply(this);
       
  1451         if (ec != modCount)
       
  1452             throw new ConcurrentModificationException();
  1120         appendReplacement(sb, replacement);
  1453         appendReplacement(sb, replacement);
  1121         appendTail(sb);
  1454         appendTail(sb);
  1122         return sb.toString();
  1455         return sb.toString();
  1123     }
  1456     }
  1124 
  1457 
  1363         acceptMode = NOANCHOR;
  1696         acceptMode = NOANCHOR;
  1364         boolean result = parentPattern.root.match(this, from, text);
  1697         boolean result = parentPattern.root.match(this, from, text);
  1365         if (!result)
  1698         if (!result)
  1366             this.first = -1;
  1699             this.first = -1;
  1367         this.oldLast = this.last;
  1700         this.oldLast = this.last;
       
  1701         this.modCount++;
  1368         return result;
  1702         return result;
  1369     }
  1703     }
  1370 
  1704 
  1371     /**
  1705     /**
  1372      * Initiates a search for an anchored match to a Pattern within the given
  1706      * Initiates a search for an anchored match to a Pattern within the given
  1385         acceptMode = anchor;
  1719         acceptMode = anchor;
  1386         boolean result = parentPattern.matchRoot.match(this, from, text);
  1720         boolean result = parentPattern.matchRoot.match(this, from, text);
  1387         if (!result)
  1721         if (!result)
  1388             this.first = -1;
  1722             this.first = -1;
  1389         this.oldLast = this.last;
  1723         this.oldLast = this.last;
       
  1724         this.modCount++;
  1390         return result;
  1725         return result;
  1391     }
  1726     }
  1392 
  1727 
  1393     /**
  1728     /**
  1394      * Returns the end index of the text.
  1729      * Returns the end index of the text.