|
1 // |
|
2 // Copyright 1999-2009 Sun Microsystems, Inc. All Rights Reserved. |
|
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 // |
|
5 // This code is free software; you can redistribute it and/or modify it |
|
6 // under the terms of the GNU General Public License version 2 only, as |
|
7 // published by the Free Software Foundation. |
|
8 // |
|
9 // This code is distributed in the hope that it will be useful, but WITHOUT |
|
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
12 // version 2 for more details (a copy is included in the LICENSE file that |
|
13 // accompanied this code). |
|
14 // |
|
15 // You should have received a copy of the GNU General Public License version |
|
16 // 2 along with this work; if not, write to the Free Software Foundation, |
|
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
18 // |
|
19 // Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
|
20 // CA 95054 USA or visit www.sun.com if you need additional information or |
|
21 // have any questions. |
|
22 // |
|
23 // |
|
24 // This file contains test cases with BMP characters for regular expressions. |
|
25 // A test case consists of three lines: |
|
26 // The first line is a pattern used in the test |
|
27 // The second line is the input to search for the pattern in |
|
28 // The third line is a concatentation of the match, the number of groups, |
|
29 // and the contents of the first four subexpressions. |
|
30 // Empty lines and lines beginning with comment slashes are ignored. |
|
31 |
|
32 // Test unsetting of backed off groups |
|
33 ^(\u3042)?\u3042 |
|
34 \u3042 |
|
35 true \u3042 1 |
|
36 |
|
37 ^(\u3042\u3042(\u3043\u3043)?)+$ |
|
38 \u3042\u3042\u3043\u3043\u3042\u3042 |
|
39 true \u3042\u3042\u3043\u3043\u3042\u3042 2 \u3042\u3042 \u3043\u3043 |
|
40 |
|
41 ((\u3042|\u3043)?\u3043)+ |
|
42 \u3043 |
|
43 true \u3043 2 \u3043 |
|
44 |
|
45 (\u3042\u3042\u3042)?\u3042\u3042\u3042 |
|
46 \u3042\u3042\u3042 |
|
47 true \u3042\u3042\u3042 1 |
|
48 |
|
49 ^(\u3042(\u3043)?)+$ |
|
50 \u3042\u3043\u3042 |
|
51 true \u3042\u3043\u3042 2 \u3042 \u3043 |
|
52 |
|
53 ^(\u3042(\u3043(\u3044)?)?)?\u3042\u3043\u3044 |
|
54 \u3042\u3043\u3044 |
|
55 true \u3042\u3043\u3044 3 |
|
56 |
|
57 ^(\u3042(\u3043(\u3044))).* |
|
58 \u3042\u3043\u3044 |
|
59 true \u3042\u3043\u3044 3 \u3042\u3043\u3044 \u3043\u3044 \u3044 |
|
60 |
|
61 // use of x modifier |
|
62 \u3042\u3043\u3044(?x)\u3043la\u3049 |
|
63 \u3042\u3043\u3044\u3043la\u3049 |
|
64 true \u3042\u3043\u3044\u3043la\u3049 0 |
|
65 |
|
66 \u3042\u3043\u3044(?x) bla\u3049 |
|
67 \u3042\u3043\u3044bla\u3049 |
|
68 true \u3042\u3043\u3044bla\u3049 0 |
|
69 |
|
70 \u3042\u3043\u3044(?x) bla\u3049 ble\u3044\u3049 |
|
71 \u3042\u3043\u3044bla\u3049ble\u3044\u3049 |
|
72 true \u3042\u3043\u3044bla\u3049ble\u3044\u3049 0 |
|
73 |
|
74 \u3042\u3043\u3044(?x) bla\u3049 # ignore comment |
|
75 \u3042\u3043\u3044bla\u3049 |
|
76 true \u3042\u3043\u3044bla\u3049 0 |
|
77 |
|
78 // Simple alternation |
|
79 \u3042|\u3043 |
|
80 \u3042 |
|
81 true \u3042 0 |
|
82 |
|
83 \u3042|\u3043 |
|
84 \u305B |
|
85 false 0 |
|
86 |
|
87 \u3042|\u3043 |
|
88 \u3043 |
|
89 true \u3043 0 |
|
90 |
|
91 \u3042|\u3043|\u3044\u3045 |
|
92 \u3044\u3045 |
|
93 true \u3044\u3045 0 |
|
94 |
|
95 \u3042|\u3042\u3045 |
|
96 \u3042\u3045 |
|
97 true \u3042 0 |
|
98 |
|
99 \u305B(\u3042|\u3042\u3044)\u3043 |
|
100 \u305B\u3042\u3044\u3043 |
|
101 true \u305B\u3042\u3044\u3043 1 \u3042\u3044 |
|
102 |
|
103 // Simple char class |
|
104 [\u3042\u3043\u3044]+ |
|
105 \u3042\u3043\u3042\u3043\u3042\u3043 |
|
106 true \u3042\u3043\u3042\u3043\u3042\u3043 0 |
|
107 |
|
108 [\u3042\u3043\u3044]+ |
|
109 \u3045\u3046\u3047\u3048 |
|
110 false 0 |
|
111 |
|
112 [\u3042\u3043\u3044]+[\u3045\u3046\u3047]+[\u3048\u3049\u304A]+ |
|
113 \u305B\u305B\u305B\u3042\u3042\u3045\u3045\u3048\u3048\u305B\u305B\u305B |
|
114 true \u3042\u3042\u3045\u3045\u3048\u3048 0 |
|
115 |
|
116 // Range char class |
|
117 [\u3042-\u3048]+ |
|
118 \u305B\u305B\u305B\u3048\u3048\u3048 |
|
119 true \u3048\u3048\u3048 0 |
|
120 |
|
121 [\u3042-\u3048]+ |
|
122 mmm |
|
123 false 0 |
|
124 |
|
125 [\u3042-]+ |
|
126 \u305B\u3042-9\u305B |
|
127 true \u3042- 0 |
|
128 |
|
129 [\u3042-\\u4444]+ |
|
130 \u305B\u3042-9\u305B |
|
131 true \u305B\u3042 0 |
|
132 |
|
133 // Negated char class |
|
134 [^\u3042\u3043\u3044]+ |
|
135 \u3042\u3043\u3042\u3043\u3042\u3043 |
|
136 false 0 |
|
137 |
|
138 [^\u3042\u3043\u3044]+ |
|
139 \u3042\u3042\u3042\u3043\u3043\u3043\u3044\u3044\u3044\u3045\u3046\u3047\u3048 |
|
140 true \u3045\u3046\u3047\u3048 0 |
|
141 |
|
142 // Making sure a ^ not in first position matches literal ^ |
|
143 [\u3042\u3043\u3044^\u3043] |
|
144 \u3043 |
|
145 true \u3043 0 |
|
146 |
|
147 [\u3042\u3043\u3044^\u3043] |
|
148 ^ |
|
149 true ^ 0 |
|
150 |
|
151 // Class union and intersection |
|
152 [\u3042\u3043\u3044[\u3045\u3046\u3047]] |
|
153 \u3043 |
|
154 true \u3043 0 |
|
155 |
|
156 [\u3042\u3043\u3044[\u3045\u3046\u3047]] |
|
157 \u3046 |
|
158 true \u3046 0 |
|
159 |
|
160 [\u3042-\u3045[0-9][\u304e-\u3051]] |
|
161 \u3042 |
|
162 true \u3042 0 |
|
163 |
|
164 [\u3042-\u3045[0-9][\u304e-\u3051]] |
|
165 \u3050 |
|
166 true \u3050 0 |
|
167 |
|
168 [\u3042-\u3045[0-9][\u304e-\u3051]] |
|
169 4 |
|
170 true 4 0 |
|
171 |
|
172 [\u3042-\u3045[0-9][\u304e-\u3051]] |
|
173 \u3046 |
|
174 false 0 |
|
175 |
|
176 [\u3042-\u3045[0-9][\u304e-\u3051]] |
|
177 \u3056 |
|
178 false 0 |
|
179 |
|
180 [[\u3042-\u3045][0-9][\u304e-\u3051]] |
|
181 \u3043 |
|
182 true \u3043 0 |
|
183 |
|
184 [[\u3042-\u3045][0-9][\u304e-\u3051]] |
|
185 \u305B |
|
186 false 0 |
|
187 |
|
188 [\u3042-\u3044[\u3045-\u3047[\u3048-\u304A]]] |
|
189 \u3042 |
|
190 true \u3042 0 |
|
191 |
|
192 [\u3042-\u3044[\u3045-\u3047[\u3048-\u304A]]] |
|
193 \u3046 |
|
194 true \u3046 0 |
|
195 |
|
196 [\u3042-\u3044[\u3045-\u3047[\u3048-\u304A]]] |
|
197 \u3049 |
|
198 true \u3049 0 |
|
199 |
|
200 [\u3042-\u3044[\u3045-\u3047[\u3048-\u304A]]] |
|
201 m |
|
202 false 0 |
|
203 |
|
204 [\u3042-\u3044[\u3045-\u3047[\u3048-\u304A]]m] |
|
205 m |
|
206 true m 0 |
|
207 |
|
208 [\u3042\u3043\u3044[\u3045\u3046\u3047]\u3048\u3049\u304A] |
|
209 \u3042 |
|
210 true \u3042 0 |
|
211 |
|
212 [\u3042\u3043\u3044[\u3045\u3046\u3047]\u3048\u3049\u304A] |
|
213 \u3045 |
|
214 true \u3045 0 |
|
215 |
|
216 [\u3042\u3043\u3044[\u3045\u3046\u3047]\u3048\u3049\u304A] |
|
217 \u3049 |
|
218 true \u3049 0 |
|
219 |
|
220 [\u3042\u3043\u3044[\u3045\u3046\u3047]\u3048\u3049\u304A] |
|
221 w |
|
222 false 0 |
|
223 |
|
224 [\u3042-\u3044&&[\u3045-\u3047]] |
|
225 \u3042 |
|
226 false 0 |
|
227 |
|
228 [\u3042-\u3044&&[\u3045-\u3047]] |
|
229 \u3046 |
|
230 false 0 |
|
231 |
|
232 [\u3042-\u3044&&[\u3045-\u3047]] |
|
233 \u305B |
|
234 false 0 |
|
235 |
|
236 [[\u3042-\u3044]&&[\u3045-\u3047]] |
|
237 \u3042 |
|
238 false 0 |
|
239 |
|
240 [[\u3042-\u3044]&&[\u3045-\u3047]] |
|
241 \u3046 |
|
242 false 0 |
|
243 |
|
244 [[\u3042-\u3044]&&[\u3045-\u3047]] |
|
245 \u305B |
|
246 false 0 |
|
247 |
|
248 [\u3042-\u3044&&\u3045-\u3047] |
|
249 \u3042 |
|
250 false 0 |
|
251 |
|
252 [\u3042-\u304e&&\u304e-\u305B] |
|
253 \u304e |
|
254 true \u304e 0 |
|
255 |
|
256 [\u3042-\u304e&&\u304e-\u305B&&\u3042-\u3044] |
|
257 \u304e |
|
258 false 0 |
|
259 |
|
260 [\u3042-\u304e&&\u304e-\u305B&&\u3042-\u305B] |
|
261 \u304e |
|
262 true \u304e 0 |
|
263 |
|
264 [[\u3042-\u304e]&&[\u304e-\u305B]] |
|
265 \u3042 |
|
266 false 0 |
|
267 |
|
268 [[\u3042-\u304e]&&[\u304e-\u305B]] |
|
269 \u304e |
|
270 true \u304e 0 |
|
271 |
|
272 [[\u3042-\u304e]&&[\u304e-\u305B]] |
|
273 \u305B |
|
274 false 0 |
|
275 |
|
276 [[\u3042-\u304e]&&[^\u3042-\u3044]] |
|
277 \u3042 |
|
278 false 0 |
|
279 |
|
280 [[\u3042-\u304e]&&[^\u3042-\u3044]] |
|
281 \u3045 |
|
282 true \u3045 0 |
|
283 |
|
284 [\u3042-\u304e&&[^\u3042-\u3044]] |
|
285 \u3042 |
|
286 false 0 |
|
287 |
|
288 [\u3042-\u304e&&[^\u3042-\u3044]] |
|
289 \u3045 |
|
290 true \u3045 0 |
|
291 |
|
292 [\u3042-\u3044\u3045-\u3047&&[\u3045-\u3047]] |
|
293 \u3042 |
|
294 false 0 |
|
295 |
|
296 [\u3042-\u3044\u3045-\u3047&&[\u3045-\u3047]] |
|
297 \u3046 |
|
298 true \u3046 0 |
|
299 |
|
300 [[\u3042-\u3044]&&\u3045-\u3047\u3042-\u3044] |
|
301 \u3042 |
|
302 true \u3042 0 |
|
303 |
|
304 [[\u3042-\u3044]&&[\u3045-\u3047][\u3042-\u3044]] |
|
305 \u3042 |
|
306 true \u3042 0 |
|
307 |
|
308 [[\u3042-\u3044][\u3045-\u3047]&&\u3042\u3043\u3044] |
|
309 \u3042 |
|
310 true \u3042 0 |
|
311 |
|
312 [[\u3042-\u3044][\u3045-\u3047]&&\u3042\u3043\u3044[\u3045\u3046\u3047]] |
|
313 \u3046 |
|
314 true \u3046 0 |
|
315 |
|
316 [[\u3042-\u3044]&&[\u3043-\u3045]&&[\u3044-\u3046]] |
|
317 \u3042 |
|
318 false 0 |
|
319 |
|
320 [[\u3042-\u3044]&&[\u3043-\u3045]&&[\u3044-\u3046]] |
|
321 \u3044 |
|
322 true \u3044 0 |
|
323 |
|
324 [[\u3042-\u3044]&&[\u3043-\u3045][\u3044-\u3046]&&[\u3056-\u305B]] |
|
325 \u3044 |
|
326 false 0 |
|
327 |
|
328 [\u3042\u3043\u3044[^\u3043\u3044\u3045]] |
|
329 \u3042 |
|
330 true \u3042 0 |
|
331 |
|
332 [\u3042\u3043\u3044[^\u3043\u3044\u3045]] |
|
333 \u3045 |
|
334 false 0 |
|
335 |
|
336 [\u3042-\u3044&&\u3042-\u3045&&\u3042-\u3046\u3048\u3049\u304A] |
|
337 \u3043 |
|
338 true \u3043 0 |
|
339 |
|
340 [\u3042-\u3044&&\u3042-\u3045&&\u3042-\u3046\u3048\u3049\u304A] |
|
341 \u3048 |
|
342 false 0 |
|
343 |
|
344 [[\u3042[\u3043]]&&[\u3043[\u3042]]] |
|
345 \u3042 |
|
346 true \u3042 0 |
|
347 |
|
348 [[\u3042]&&[\u3043][\u3044][\u3042]&&[^\u3045]] |
|
349 \u3042 |
|
350 true \u3042 0 |
|
351 |
|
352 [[\u3042]&&[b][c][\u3042]&&[^d]] |
|
353 \u3042 |
|
354 true \u3042 0 |
|
355 |
|
356 [[\u3042]&&[\u3043][\u3044][\u3042]&&[^\u3045]] |
|
357 \u3045 |
|
358 false 0 |
|
359 |
|
360 [[[\u3042-\u3045]&&[\u3044-\u3047]]] |
|
361 \u3042 |
|
362 false 0 |
|
363 |
|
364 [[[\u3042-\u3045]&&[\u3044-\u3047]]] |
|
365 \u3044 |
|
366 true \u3044 0 |
|
367 |
|
368 [[[\u3042-\u3045]&&[\u3044-\u3047]]&&[\u3044]] |
|
369 \u3044 |
|
370 true \u3044 0 |
|
371 |
|
372 [[[\u3042-\u3045]&&[\u3044-\u3047]]&&[\u3044]&&\u3044] |
|
373 \u3044 |
|
374 true \u3044 0 |
|
375 |
|
376 [[[\u3042-\u3045]&&[\u3044-\u3047]]&&[\u3044]&&\u3044&&\u3044] |
|
377 \u3044 |
|
378 true \u3044 0 |
|
379 |
|
380 [[[\u3042-\u3045]&&[\u3044-\u3047]]&&[\u3044]&&\u3044&&[\u3044\u3045\u3046]] |
|
381 \u3044 |
|
382 true \u3044 0 |
|
383 |
|
384 [\u305B[\u3042\u3043\u3044&&\u3043\u3044\u3045]] |
|
385 \u3044 |
|
386 true \u3044 0 |
|
387 |
|
388 [\u305B[\u3042\u3043\u3044&&\u3043\u3044\u3045]&&[\u3056-\u305B]] |
|
389 \u305B |
|
390 true \u305B 0 |
|
391 |
|
392 [\u3059[\u3042\u3043\u3044&&\u3043\u3044\u3045[\u305B]]&&[\u3056-\u305B]] |
|
393 \u305B |
|
394 false 0 |
|
395 |
|
396 [\u3059[[w\u305B]\u3042\u3043\u3044&&\u3043\u3044\u3045[\u305B]]&&[\u3056-\u305B]] |
|
397 \u305B |
|
398 true \u305B 0 |
|
399 |
|
400 [[\u3042\u3043\u3044]&&[\u3045\u3046\u3047]\u3042\u3043\u3044] |
|
401 \u3042 |
|
402 true \u3042 0 |
|
403 |
|
404 [[\u3042\u3043\u3044]&&[\u3045\u3046\u3047]\u3059\u305A\u305B[\u3042\u3043\u3044]] |
|
405 \u3042 |
|
406 true \u3042 0 |
|
407 |
|
408 \pL |
|
409 \u3042 |
|
410 true \u3042 0 |
|
411 |
|
412 \pL |
|
413 7 |
|
414 false 0 |
|
415 |
|
416 \p{L} |
|
417 \u3042 |
|
418 true \u3042 0 |
|
419 |
|
420 \p{IsL} |
|
421 \u3042 |
|
422 true \u3042 0 |
|
423 |
|
424 \p{InHiragana} |
|
425 \u3042 |
|
426 true \u3042 0 |
|
427 |
|
428 \p{InHiragana} |
|
429 \u0370 |
|
430 false 0 |
|
431 |
|
432 \pL\u3043\u3044 |
|
433 \u3042\u3043\u3044 |
|
434 true \u3042\u3043\u3044 0 |
|
435 |
|
436 \u3042[r\p{InGreek}]\u3044 |
|
437 \u3042\u0370\u3044 |
|
438 true \u3042\u0370\u3044 0 |
|
439 |
|
440 \u3042\p{InGreek} |
|
441 \u3042\u0370 |
|
442 true \u3042\u0370 0 |
|
443 |
|
444 \u3042\P{InGreek} |
|
445 \u3042\u0370 |
|
446 false 0 |
|
447 |
|
448 \u3042\P{InGreek} |
|
449 \u3042\u3043 |
|
450 true \u3042\u3043 0 |
|
451 |
|
452 \u3042{^InGreek} |
|
453 - |
|
454 error |
|
455 |
|
456 \u3042\p{^InGreek} |
|
457 - |
|
458 error |
|
459 |
|
460 \u3042\P{^InGreek} |
|
461 - |
|
462 error |
|
463 |
|
464 \u3042\p{InGreek} |
|
465 \u3042\u0370 |
|
466 true \u3042\u0370 0 |
|
467 |
|
468 \u3042[\p{InGreek}]\u3044 |
|
469 \u3042\u0370\u3044 |
|
470 true \u3042\u0370\u3044 0 |
|
471 |
|
472 \u3042[\P{InGreek}]\u3044 |
|
473 \u3042\u0370\u3044 |
|
474 false 0 |
|
475 |
|
476 \u3042[\P{InGreek}]\u3044 |
|
477 \u3042\u3043\u3044 |
|
478 true \u3042\u3043\u3044 0 |
|
479 |
|
480 \u3042[{^InGreek}]\u3044 |
|
481 \u3042n\u3044 |
|
482 true \u3042n\u3044 0 |
|
483 |
|
484 \u3042[{^InGreek}]\u3044 |
|
485 \u3042\u305B\u3044 |
|
486 false 0 |
|
487 |
|
488 \u3042[\p{^InGreek}]\u3044 |
|
489 - |
|
490 error |
|
491 |
|
492 \u3042[\P{^InGreek}]\u3044 |
|
493 - |
|
494 error |
|
495 |
|
496 \u3042[\p{InGreek}] |
|
497 \u3042\u0370 |
|
498 true \u3042\u0370 0 |
|
499 |
|
500 \u3042[r\p{InGreek}]\u3044 |
|
501 \u3042r\u3044 |
|
502 true \u3042r\u3044 0 |
|
503 |
|
504 \u3042[\p{InGreek}r]\u3044 |
|
505 \u3042r\u3044 |
|
506 true \u3042r\u3044 0 |
|
507 |
|
508 \u3042[r\p{InGreek}]\u3044 |
|
509 \u3042r\u3044 |
|
510 true \u3042r\u3044 0 |
|
511 |
|
512 \u3042[^\p{InGreek}]\u3044 |
|
513 \u3042\u0370\u3044 |
|
514 false 0 |
|
515 |
|
516 \u3042[^\P{InGreek}]\u3044 |
|
517 \u3042\u0370\u3044 |
|
518 true \u3042\u0370\u3044 0 |
|
519 |
|
520 \u3042[\p{InGreek}&&[^\u0370]]\u3044 |
|
521 \u3042\u0370\u3044 |
|
522 false 0 |
|
523 |
|
524 // Test the dot metacharacter |
|
525 \u3042.\u3044.+ |
|
526 \u3042#\u3044%& |
|
527 true \u3042#\u3044%& 0 |
|
528 |
|
529 \u3042\u3043. |
|
530 \u3042\u3043\n |
|
531 false 0 |
|
532 |
|
533 (?s)\u3042\u3043. |
|
534 \u3042\u3043\n |
|
535 true \u3042\u3043\n 0 |
|
536 |
|
537 \u3042[\p{L}&&[\P{InGreek}]]\u3044 |
|
538 \u3042\u6000\u3044 |
|
539 true \u3042\u6000\u3044 0 |
|
540 |
|
541 \u3042[\p{L}&&[\P{InGreek}]]\u3044 |
|
542 \u3042r\u3044 |
|
543 true \u3042r\u3044 0 |
|
544 |
|
545 \u3042[\p{L}&&[\P{InGreek}]]\u3044 |
|
546 \u3042\u0370\u3044 |
|
547 false 0 |
|
548 |
|
549 \u3042\p{InGreek}\u3044 |
|
550 \u3042\u0370\u3044 |
|
551 true \u3042\u0370\u3044 0 |
|
552 |
|
553 \u3042\p{Sc} |
|
554 \u3042$ |
|
555 true \u3042$ 0 |
|
556 |
|
557 \W\w\W |
|
558 rrrr#\u3048\u3048\u3048 |
|
559 false 0 |
|
560 |
|
561 \u3042\u3043\u3044[\s\u3045\u3046\u3047]* |
|
562 \u3042\u3043\u3044 \u3045\u3046\u3047 |
|
563 true \u3042\u3043\u3044 \u3045\u3046\u3047 0 |
|
564 |
|
565 \u3042\u3043\u3044[\s\u305A-\u305B]* |
|
566 \u3042\u3043\u3044 \u305A \u305B |
|
567 true \u3042\u3043\u3044 \u305A \u305B 0 |
|
568 |
|
569 \u3042\u3043\u3044[\u3042-\u3045\s\u304e-\u3051]* |
|
570 \u3042\u3043\u3044\u3042\u3042 \u304e\u304f \u3051 |
|
571 true \u3042\u3043\u3044\u3042\u3042 \u304e\u304f \u3051 0 |
|
572 |
|
573 // Test the whitespace escape sequence |
|
574 \u3042\u3043\s\u3044 |
|
575 \u3042\u3043 \u3044 |
|
576 true \u3042\u3043 \u3044 0 |
|
577 |
|
578 \s\s\s |
|
579 \u3043l\u3042\u3049 \u3046rr |
|
580 false 0 |
|
581 |
|
582 \S\S\s |
|
583 \u3043l\u3042\u3049 \u3046rr |
|
584 true \u3042\u3049 0 |
|
585 |
|
586 // Test the digit escape sequence |
|
587 \u3042\u3043\d\u3044 |
|
588 \u3042\u30439\u3044 |
|
589 true \u3042\u30439\u3044 0 |
|
590 |
|
591 \d\d\d |
|
592 \u3043l\u3042\u304945 |
|
593 false 0 |
|
594 |
|
595 // Test the caret metacharacter |
|
596 ^\u3042\u3043\u3044 |
|
597 \u3042\u3043\u3044\u3045\u3046\u3047 |
|
598 true \u3042\u3043\u3044 0 |
|
599 |
|
600 ^\u3042\u3043\u3044 |
|
601 \u3043\u3044\u3045\u3042\u3043\u3044 |
|
602 false 0 |
|
603 |
|
604 // Greedy ? metacharacter |
|
605 \u3042?\u3043 |
|
606 \u3042\u3042\u3042\u3042\u3043 |
|
607 true \u3042\u3043 0 |
|
608 |
|
609 \u3042?\u3043 |
|
610 \u3043 |
|
611 true \u3043 0 |
|
612 |
|
613 \u3042?\u3043 |
|
614 \u3042\u3042\u3042\u3044\u3044\u3044 |
|
615 false 0 |
|
616 |
|
617 .?\u3043 |
|
618 \u3042\u3042\u3042\u3042\u3043 |
|
619 true \u3042\u3043 0 |
|
620 |
|
621 // Reluctant ? metacharacter |
|
622 \u3042??\u3043 |
|
623 \u3042\u3042\u3042\u3042\u3043 |
|
624 true \u3042\u3043 0 |
|
625 |
|
626 \u3042??\u3043 |
|
627 \u3043 |
|
628 true \u3043 0 |
|
629 |
|
630 \u3042??\u3043 |
|
631 \u3042\u3042\u3042\u3044\u3044\u3044 |
|
632 false 0 |
|
633 |
|
634 .??\u3043 |
|
635 \u3042\u3042\u3042\u3042\u3043 |
|
636 true \u3042\u3043 0 |
|
637 |
|
638 // Possessive ? metacharacter |
|
639 \u3042?+\u3043 |
|
640 \u3042\u3042\u3042\u3042\u3043 |
|
641 true \u3042\u3043 0 |
|
642 |
|
643 \u3042?+\u3043 |
|
644 \u3043 |
|
645 true \u3043 0 |
|
646 |
|
647 \u3042?+\u3043 |
|
648 \u3042\u3042\u3042\u3044\u3044\u3044 |
|
649 false 0 |
|
650 |
|
651 .?+\u3043 |
|
652 \u3042\u3042\u3042\u3042\u3043 |
|
653 true \u3042\u3043 0 |
|
654 |
|
655 // Greedy + metacharacter |
|
656 \u3042+\u3043 |
|
657 \u3042\u3042\u3042\u3042\u3043 |
|
658 true \u3042\u3042\u3042\u3042\u3043 0 |
|
659 |
|
660 \u3042+\u3043 |
|
661 \u3043 |
|
662 false 0 |
|
663 |
|
664 \u3042+\u3043 |
|
665 \u3042\u3042\u3042\u3044\u3044\u3044 |
|
666 false 0 |
|
667 |
|
668 .+\u3043 |
|
669 \u3042\u3042\u3042\u3042\u3043 |
|
670 true \u3042\u3042\u3042\u3042\u3043 0 |
|
671 |
|
672 // Reluctant + metacharacter |
|
673 \u3042+?\u3043 |
|
674 \u3042\u3042\u3042\u3042\u3043 |
|
675 true \u3042\u3042\u3042\u3042\u3043 0 |
|
676 |
|
677 \u3042+?\u3043 |
|
678 \u3043 |
|
679 false 0 |
|
680 |
|
681 \u3042+?\u3043 |
|
682 \u3042\u3042\u3042\u3044\u3044\u3044 |
|
683 false 0 |
|
684 |
|
685 .+?\u3043 |
|
686 \u3042\u3042\u3042\u3042\u3043 |
|
687 true \u3042\u3042\u3042\u3042\u3043 0 |
|
688 |
|
689 // Possessive + metacharacter |
|
690 \u3042++\u3043 |
|
691 \u3042\u3042\u3042\u3042\u3043 |
|
692 true \u3042\u3042\u3042\u3042\u3043 0 |
|
693 |
|
694 \u3042++\u3043 |
|
695 \u3043 |
|
696 false 0 |
|
697 |
|
698 \u3042++\u3043 |
|
699 \u3042\u3042\u3042\u3044\u3044\u3044 |
|
700 false 0 |
|
701 |
|
702 .++\u3043 |
|
703 \u3042\u3042\u3042\u3042\u3043 |
|
704 false 0 |
|
705 |
|
706 // Greedy Repetition |
|
707 \u3042{2,3} |
|
708 \u3042 |
|
709 false 0 |
|
710 |
|
711 \u3042{2,3} |
|
712 \u3042\u3042 |
|
713 true \u3042\u3042 0 |
|
714 |
|
715 \u3042{2,3} |
|
716 \u3042\u3042\u3042 |
|
717 true \u3042\u3042\u3042 0 |
|
718 |
|
719 \u3042{2,3} |
|
720 \u3042\u3042\u3042\u3042 |
|
721 true \u3042\u3042\u3042 0 |
|
722 |
|
723 \u3042{3,} |
|
724 \u305B\u305B\u305B\u3042\u3042\u3042\u3042\u305B\u305B\u305B |
|
725 true \u3042\u3042\u3042\u3042 0 |
|
726 |
|
727 \u3042{3,} |
|
728 \u305B\u305B\u305B\u3042\u3042\u305B\u305B\u305B |
|
729 false 0 |
|
730 |
|
731 // Reluctant Repetition |
|
732 \u3042{2,3}? |
|
733 \u3042 |
|
734 false 0 |
|
735 |
|
736 \u3042{2,3}? |
|
737 \u3042\u3042 |
|
738 true \u3042\u3042 0 |
|
739 |
|
740 \u3042{2,3}? |
|
741 \u3042\u3042\u3042 |
|
742 true \u3042\u3042 0 |
|
743 |
|
744 \u3042{2,3}? |
|
745 \u3042\u3042\u3042\u3042 |
|
746 true \u3042\u3042 0 |
|
747 |
|
748 // Zero width Positive lookahead |
|
749 \u3042\u3043\u3044(?=\u3045) |
|
750 \u305B\u305B\u305B\u3042\u3043\u3044\u3045 |
|
751 true \u3042\u3043\u3044 0 |
|
752 |
|
753 \u3042\u3043\u3044(?=\u3045) |
|
754 \u305B\u305B\u305B\u3042\u3043\u3044\u3046\u3045 |
|
755 false 0 |
|
756 |
|
757 // Zero width Negative lookahead |
|
758 \u3042\u3043\u3044(?!\u3045) |
|
759 \u305B\u305B\u3042\u3043\u3044\u3045 |
|
760 false 0 |
|
761 |
|
762 \u3042\u3043\u3044(?!\u3045) |
|
763 \u305B\u305B\u3042\u3043\u3044\u3046\u3045 |
|
764 true \u3042\u3043\u3044 0 |
|
765 |
|
766 // Zero width Positive lookbehind |
|
767 \u3042(?<=\u3042) |
|
768 ###\u3042\u3043\u3044 |
|
769 true \u3042 0 |
|
770 |
|
771 \u3042(?<=\u3042) |
|
772 ###\u3043\u3044### |
|
773 false 0 |
|
774 |
|
775 // Zero width Negative lookbehind |
|
776 (?<!\u3042)\w |
|
777 ###\u3042\u3043\u3044a### |
|
778 true a 0 |
|
779 |
|
780 (?<!\u3042)\u3044 |
|
781 \u3043\u3044 |
|
782 true \u3044 0 |
|
783 |
|
784 (?<!\u3042)\u3044 |
|
785 \u3042\u3044 |
|
786 false 0 |
|
787 |
|
788 // Nondeterministic group |
|
789 (\u3042+\u3043)+ |
|
790 \u3042\u3043\u3042\u3043\u3042\u3043 |
|
791 true \u3042\u3043\u3042\u3043\u3042\u3043 1 \u3042\u3043 |
|
792 |
|
793 (\u3042|\u3043)+ |
|
794 \u3044\u3044\u3044\u3044\u3045 |
|
795 false 1 |
|
796 |
|
797 // Deterministic group |
|
798 (\u3042\u3043)+ |
|
799 \u3042\u3043\u3042\u3043\u3042\u3043 |
|
800 true \u3042\u3043\u3042\u3043\u3042\u3043 1 \u3042\u3043 |
|
801 |
|
802 (\u3042\u3043)+ |
|
803 \u3042\u3044\u3044\u3044\u3044\u3045 |
|
804 false 1 |
|
805 |
|
806 (\u3042\u3043)* |
|
807 \u3042\u3043\u3042\u3043\u3042\u3043 |
|
808 true \u3042\u3043\u3042\u3043\u3042\u3043 1 \u3042\u3043 |
|
809 |
|
810 (\u3042\u3043)(\u3044\u3045*) |
|
811 \u305B\u305B\u305B\u3042\u3043\u3044\u305B\u305B\u305B |
|
812 true \u3042\u3043\u3044 2 \u3042\u3043 \u3044 |
|
813 |
|
814 \u3042\u3043\u3044(\u3045)*\u3042\u3043\u3044 |
|
815 \u3042\u3043\u3044\u3045\u3045\u3045\u3045\u3045\u3042\u3043\u3044 |
|
816 true \u3042\u3043\u3044\u3045\u3045\u3045\u3045\u3045\u3042\u3043\u3044 1 \u3045 |
|
817 |
|
818 // Back references |
|
819 (\u3042*)\u3043\u3044\1 |
|
820 \u305B\u305B\u305B\u3042\u3042\u3043\u3044\u3042\u3042\u305B\u305B\u305B |
|
821 true \u3042\u3042\u3043\u3044\u3042\u3042 1 \u3042\u3042 |
|
822 |
|
823 (\u3042*)\u3043\u3044\1 |
|
824 \u305B\u305B\u305B\u3042\u3042\u3043\u3044\u3042\u305B\u305B\u305B |
|
825 true \u3042\u3043\u3044\u3042 1 \u3042 |
|
826 |
|
827 (\u3048t*)(\u3045\u3045\u3046)*(\u305A\u3056)\1\3(\u3057\u3057) |
|
828 \u305B\u305B\u305B\u3048tt\u3045\u3045\u3046\u3045\u3045\u3046\u305A\u3056\u3048tt\u305A\u3056\u3057\u3057\u305B\u305B\u305B |
|
829 true \u3048tt\u3045\u3045\u3046\u3045\u3045\u3046\u305A\u3056\u3048tt\u305A\u3056\u3057\u3057 4 \u3048tt \u3045\u3045\u3046 \u305A\u3056 \u3057\u3057 |
|
830 |
|
831 // Greedy * metacharacter |
|
832 \u3042*\u3043 |
|
833 \u3042\u3042\u3042\u3042\u3043 |
|
834 true \u3042\u3042\u3042\u3042\u3043 0 |
|
835 |
|
836 \u3042*\u3043 |
|
837 \u3043 |
|
838 true \u3043 0 |
|
839 |
|
840 \u3042*\u3043 |
|
841 \u3042\u3042\u3042\u3044\u3044\u3044 |
|
842 false 0 |
|
843 |
|
844 .*\u3043 |
|
845 \u3042\u3042\u3042\u3042\u3043 |
|
846 true \u3042\u3042\u3042\u3042\u3043 0 |
|
847 |
|
848 // Reluctant * metacharacter |
|
849 \u3042*?\u3043 |
|
850 \u3042\u3042\u3042\u3042\u3043 |
|
851 true \u3042\u3042\u3042\u3042\u3043 0 |
|
852 |
|
853 \u3042*?\u3043 |
|
854 \u3043 |
|
855 true \u3043 0 |
|
856 |
|
857 \u3042*?\u3043 |
|
858 \u3042\u3042\u3042\u3044\u3044\u3044 |
|
859 false 0 |
|
860 |
|
861 .*?\u3043 |
|
862 \u3042\u3042\u3042\u3042\u3043 |
|
863 true \u3042\u3042\u3042\u3042\u3043 0 |
|
864 |
|
865 // Possessive * metacharacter |
|
866 \u3042*+\u3043 |
|
867 \u3042\u3042\u3042\u3042\u3043 |
|
868 true \u3042\u3042\u3042\u3042\u3043 0 |
|
869 |
|
870 \u3042*+\u3043 |
|
871 \u3043 |
|
872 true \u3043 0 |
|
873 |
|
874 \u3042*+\u3043 |
|
875 \u3042\u3042\u3042\u3044\u3044\u3044 |
|
876 false 0 |
|
877 |
|
878 .*+\u3043 |
|
879 \u3042\u3042\u3042\u3042\u3043 |
|
880 false 0 |
|
881 |
|
882 // Case insensitivity |
|
883 (?iu)\uFF46\uFF4F\uFF4F\uFF42\uFF41\uFF52 |
|
884 \uFF46\uFF2F\uFF4F\uFF42\uFF21\uFF52 |
|
885 true \uFF46\uFF2F\uFF4F\uFF42\uFF21\uFF52 0 |
|
886 |
|
887 \uFF46(?iu)\uFF4F\uFF4F\uFF42\uFF41\uFF52 |
|
888 \uFF46\uFF2F\uFF4F\uFF42\uFF21\uFF52 |
|
889 true \uFF46\uFF2F\uFF4F\uFF42\uFF21\uFF52 0 |
|
890 |
|
891 \uFF46\uFF4F\uFF4F(?iu)\uFF42\uFF41\uFF52 |
|
892 \uFF46\uFF2F\uFF4F\uFF42\uFF21\uFF52 |
|
893 false 0 |
|
894 |
|
895 (?iu)\uFF46\uFF4F\uFF4F[\uFF42\uFF41\uFF52]+ |
|
896 \uFF46\uFF4F\uFF2F\uFF42\uFF21\uFF52 |
|
897 true \uFF46\uFF4F\uFF2F\uFF42\uFF21\uFF52 0 |
|
898 |
|
899 (?iu)\uFF46\uFF4F\uFF4F[\uFF41-\uFF52]+ |
|
900 \uFF46\uFF4F\uFF2F\uFF42\uFF21\uFF52 |
|
901 true \uFF46\uFF4F\uFF2F\uFF42\uFF21\uFF52 0 |
|
902 |
|
903 // Disable metacharacters- test both length <=3 and >3 |
|
904 // So that the BM optimization is part of test |
|
905 \Q***\E\u3042\u3043\u3044 |
|
906 ***\u3042\u3043\u3044 |
|
907 true ***\u3042\u3043\u3044 0 |
|
908 |
|
909 \u3043l\Q***\E\u3042\u3043\u3044 |
|
910 \u3043l***\u3042\u3043\u3044 |
|
911 true \u3043l***\u3042\u3043\u3044 0 |
|
912 |
|
913 \Q***\u3042\u3043\u3044 |
|
914 ***\u3042\u3043\u3044 |
|
915 true ***\u3042\u3043\u3044 0 |
|
916 |
|
917 \u3043l\u3042\u3049\Q***\E\u3042\u3043\u3044 |
|
918 \u3043l\u3042\u3049***\u3042\u3043\u3044 |
|
919 true \u3043l\u3042\u3049***\u3042\u3043\u3044 0 |
|
920 |
|
921 \Q***\u3042\u3043\u3044 |
|
922 ***\u3042\u3043\u3044 |
|
923 true ***\u3042\u3043\u3044 0 |
|
924 |
|
925 \Q*\u3042\u3043 |
|
926 *\u3042\u3043 |
|
927 true *\u3042\u3043 0 |
|
928 |
|
929 \u3043l\u3042\u3049\Q***\u3042\u3043\u3044 |
|
930 \u3043l\u3042\u3049***\u3042\u3043\u3044 |
|
931 true \u3043l\u3042\u3049***\u3042\u3043\u3044 0 |
|
932 |
|
933 \u3043l\u3042\Q***\u3042\u3043\u3044 |
|
934 \u3043l\u3042***\u3042\u3043\u3044 |
|
935 true \u3043l\u3042***\u3042\u3043\u3044 0 |
|
936 |
|
937 [\043]+ |
|
938 \u3043l\u3042\u3049\u3043l\u3042\u3049#\u3043le\u3044\u3049 |
|
939 true # 0 |
|
940 |
|
941 [\042-\044]+ |
|
942 \u3043l\u3042\u3049\u3043l\u3042\u3049#\u3043le\u3044\u3049 |
|
943 true # 0 |
|
944 |
|
945 [\u1234-\u1236] |
|
946 \u3043l\u3042\u3049\u3043l\u3042\u3049\u1235\u3043le\u3044\u3049 |
|
947 true \u1235 0 |
|
948 |
|
949 [^\043]* |
|
950 \u3043l\u3042\u3049\u3043l\u3042\u3049#\u3043le\u3044\u3049 |
|
951 true \u3043l\u3042\u3049\u3043l\u3042\u3049 0 |