46 import sun.util.locale.provider.LocaleProviderAdapter; |
46 import sun.util.locale.provider.LocaleProviderAdapter; |
47 import sun.util.locale.provider.LocaleServiceProviderPool; |
47 import sun.util.locale.provider.LocaleServiceProviderPool; |
48 |
48 |
49 |
49 |
50 /** |
50 /** |
51 * The <code>BreakIterator</code> class implements methods for finding |
51 * The {@code BreakIterator} class implements methods for finding |
52 * the location of boundaries in text. Instances of <code>BreakIterator</code> |
52 * the location of boundaries in text. Instances of {@code BreakIterator} |
53 * maintain a current position and scan over text |
53 * maintain a current position and scan over text |
54 * returning the index of characters where boundaries occur. |
54 * returning the index of characters where boundaries occur. |
55 * Internally, <code>BreakIterator</code> scans text using a |
55 * Internally, {@code BreakIterator} scans text using a |
56 * <code>CharacterIterator</code>, and is thus able to scan text held |
56 * {@code CharacterIterator}, and is thus able to scan text held |
57 * by any object implementing that protocol. A <code>StringCharacterIterator</code> |
57 * by any object implementing that protocol. A {@code StringCharacterIterator} |
58 * is used to scan <code>String</code> objects passed to <code>setText</code>. |
58 * is used to scan {@code String} objects passed to {@code setText}. |
59 * |
59 * |
60 * <p> |
60 * <p> |
61 * You use the factory methods provided by this class to create |
61 * You use the factory methods provided by this class to create |
62 * instances of various types of break iterators. In particular, |
62 * instances of various types of break iterators. In particular, |
63 * use <code>getWordInstance</code>, <code>getLineInstance</code>, |
63 * use {@code getWordInstance}, {@code getLineInstance}, |
64 * <code>getSentenceInstance</code>, and <code>getCharacterInstance</code> |
64 * {@code getSentenceInstance}, and {@code getCharacterInstance} |
65 * to create <code>BreakIterator</code>s that perform |
65 * to create {@code BreakIterator}s that perform |
66 * word, line, sentence, and character boundary analysis respectively. |
66 * word, line, sentence, and character boundary analysis respectively. |
67 * A single <code>BreakIterator</code> can work only on one unit |
67 * A single {@code BreakIterator} can work only on one unit |
68 * (word, line, sentence, and so on). You must use a different iterator |
68 * (word, line, sentence, and so on). You must use a different iterator |
69 * for each unit boundary analysis you wish to perform. |
69 * for each unit boundary analysis you wish to perform. |
70 * |
70 * |
71 * <p><a id="line"></a> |
71 * <p><a id="line"></a> |
72 * Line boundary analysis determines where a text string can be |
72 * Line boundary analysis determines where a text string can be |
272 public abstract int last(); |
272 public abstract int last(); |
273 |
273 |
274 /** |
274 /** |
275 * Returns the nth boundary from the current boundary. If either |
275 * Returns the nth boundary from the current boundary. If either |
276 * the first or last text boundary has been reached, it returns |
276 * the first or last text boundary has been reached, it returns |
277 * <code>BreakIterator.DONE</code> and the current position is set to either |
277 * {@code BreakIterator.DONE} and the current position is set to either |
278 * the first or last text boundary depending on which one is reached. Otherwise, |
278 * the first or last text boundary depending on which one is reached. Otherwise, |
279 * the iterator's current position is set to the new boundary. |
279 * the iterator's current position is set to the new boundary. |
280 * For example, if the iterator's current position is the mth text boundary |
280 * For example, if the iterator's current position is the mth text boundary |
281 * and three more boundaries exist from the current boundary to the last text |
281 * and three more boundaries exist from the current boundary to the last text |
282 * boundary, the next(2) call will return m + 2. The new text position is set |
282 * boundary, the next(2) call will return m + 2. The new text position is set |
283 * to the (m + 2)th text boundary. A next(4) call would return |
283 * to the (m + 2)th text boundary. A next(4) call would return |
284 * <code>BreakIterator.DONE</code> and the last text boundary would become the |
284 * {@code BreakIterator.DONE} and the last text boundary would become the |
285 * new text position. |
285 * new text position. |
286 * @param n which boundary to return. A value of 0 |
286 * @param n which boundary to return. A value of 0 |
287 * does nothing. Negative values move to previous boundaries |
287 * does nothing. Negative values move to previous boundaries |
288 * and positive values move to later boundaries. |
288 * and positive values move to later boundaries. |
289 * @return The character index of the nth boundary from the current position |
289 * @return The character index of the nth boundary from the current position |
290 * or <code>BreakIterator.DONE</code> if either first or last text boundary |
290 * or {@code BreakIterator.DONE} if either first or last text boundary |
291 * has been reached. |
291 * has been reached. |
292 */ |
292 */ |
293 public abstract int next(int n); |
293 public abstract int next(int n); |
294 |
294 |
295 /** |
295 /** |
296 * Returns the boundary following the current boundary. If the current boundary |
296 * Returns the boundary following the current boundary. If the current boundary |
297 * is the last text boundary, it returns <code>BreakIterator.DONE</code> and |
297 * is the last text boundary, it returns {@code BreakIterator.DONE} and |
298 * the iterator's current position is unchanged. Otherwise, the iterator's |
298 * the iterator's current position is unchanged. Otherwise, the iterator's |
299 * current position is set to the boundary following the current boundary. |
299 * current position is set to the boundary following the current boundary. |
300 * @return The character index of the next text boundary or |
300 * @return The character index of the next text boundary or |
301 * <code>BreakIterator.DONE</code> if the current boundary is the last text |
301 * {@code BreakIterator.DONE} if the current boundary is the last text |
302 * boundary. |
302 * boundary. |
303 * Equivalent to next(1). |
303 * Equivalent to next(1). |
304 * @see #next(int) |
304 * @see #next(int) |
305 */ |
305 */ |
306 public abstract int next(); |
306 public abstract int next(); |
307 |
307 |
308 /** |
308 /** |
309 * Returns the boundary preceding the current boundary. If the current boundary |
309 * Returns the boundary preceding the current boundary. If the current boundary |
310 * is the first text boundary, it returns <code>BreakIterator.DONE</code> and |
310 * is the first text boundary, it returns {@code BreakIterator.DONE} and |
311 * the iterator's current position is unchanged. Otherwise, the iterator's |
311 * the iterator's current position is unchanged. Otherwise, the iterator's |
312 * current position is set to the boundary preceding the current boundary. |
312 * current position is set to the boundary preceding the current boundary. |
313 * @return The character index of the previous text boundary or |
313 * @return The character index of the previous text boundary or |
314 * <code>BreakIterator.DONE</code> if the current boundary is the first text |
314 * {@code BreakIterator.DONE} if the current boundary is the first text |
315 * boundary. |
315 * boundary. |
316 */ |
316 */ |
317 public abstract int previous(); |
317 public abstract int previous(); |
318 |
318 |
319 /** |
319 /** |
320 * Returns the first boundary following the specified character offset. If the |
320 * Returns the first boundary following the specified character offset. If the |
321 * specified offset equals to the last text boundary, it returns |
321 * specified offset equals to the last text boundary, it returns |
322 * <code>BreakIterator.DONE</code> and the iterator's current position is unchanged. |
322 * {@code BreakIterator.DONE} and the iterator's current position is unchanged. |
323 * Otherwise, the iterator's current position is set to the returned boundary. |
323 * Otherwise, the iterator's current position is set to the returned boundary. |
324 * The value returned is always greater than the offset or the value |
324 * The value returned is always greater than the offset or the value |
325 * <code>BreakIterator.DONE</code>. |
325 * {@code BreakIterator.DONE}. |
326 * @param offset the character offset to begin scanning. |
326 * @param offset the character offset to begin scanning. |
327 * @return The first boundary after the specified offset or |
327 * @return The first boundary after the specified offset or |
328 * <code>BreakIterator.DONE</code> if the last text boundary is passed in |
328 * {@code BreakIterator.DONE} if the last text boundary is passed in |
329 * as the offset. |
329 * as the offset. |
330 * @throws IllegalArgumentException if the specified offset is less than |
330 * @throws IllegalArgumentException if the specified offset is less than |
331 * the first text boundary or greater than the last text boundary. |
331 * the first text boundary or greater than the last text boundary. |
332 */ |
332 */ |
333 public abstract int following(int offset); |
333 public abstract int following(int offset); |
334 |
334 |
335 /** |
335 /** |
336 * Returns the last boundary preceding the specified character offset. If the |
336 * Returns the last boundary preceding the specified character offset. If the |
337 * specified offset equals to the first text boundary, it returns |
337 * specified offset equals to the first text boundary, it returns |
338 * <code>BreakIterator.DONE</code> and the iterator's current position is unchanged. |
338 * {@code BreakIterator.DONE} and the iterator's current position is unchanged. |
339 * Otherwise, the iterator's current position is set to the returned boundary. |
339 * Otherwise, the iterator's current position is set to the returned boundary. |
340 * The value returned is always less than the offset or the value |
340 * The value returned is always less than the offset or the value |
341 * <code>BreakIterator.DONE</code>. |
341 * {@code BreakIterator.DONE}. |
342 * @param offset the character offset to begin scanning. |
342 * @param offset the character offset to begin scanning. |
343 * @return The last boundary before the specified offset or |
343 * @return The last boundary before the specified offset or |
344 * <code>BreakIterator.DONE</code> if the first text boundary is passed in |
344 * {@code BreakIterator.DONE} if the first text boundary is passed in |
345 * as the offset. |
345 * as the offset. |
346 * @throws IllegalArgumentException if the specified offset is less than |
346 * @throws IllegalArgumentException if the specified offset is less than |
347 * the first text boundary or greater than the last text boundary. |
347 * the first text boundary or greater than the last text boundary. |
348 * @since 1.2 |
348 * @since 1.2 |
349 */ |
349 */ |
435 |
435 |
436 @SuppressWarnings("unchecked") |
436 @SuppressWarnings("unchecked") |
437 private static final SoftReference<BreakIteratorCache>[] iterCache = (SoftReference<BreakIteratorCache>[]) new SoftReference<?>[4]; |
437 private static final SoftReference<BreakIteratorCache>[] iterCache = (SoftReference<BreakIteratorCache>[]) new SoftReference<?>[4]; |
438 |
438 |
439 /** |
439 /** |
440 * Returns a new <code>BreakIterator</code> instance |
440 * Returns a new {@code BreakIterator} instance |
441 * for <a href="BreakIterator.html#word">word breaks</a> |
441 * for <a href="BreakIterator.html#word">word breaks</a> |
442 * for the {@linkplain Locale#getDefault() default locale}. |
442 * for the {@linkplain Locale#getDefault() default locale}. |
443 * @return A break iterator for word breaks |
443 * @return A break iterator for word breaks |
444 */ |
444 */ |
445 public static BreakIterator getWordInstance() |
445 public static BreakIterator getWordInstance() |
446 { |
446 { |
447 return getWordInstance(Locale.getDefault()); |
447 return getWordInstance(Locale.getDefault()); |
448 } |
448 } |
449 |
449 |
450 /** |
450 /** |
451 * Returns a new <code>BreakIterator</code> instance |
451 * Returns a new {@code BreakIterator} instance |
452 * for <a href="BreakIterator.html#word">word breaks</a> |
452 * for <a href="BreakIterator.html#word">word breaks</a> |
453 * for the given locale. |
453 * for the given locale. |
454 * @param locale the desired locale |
454 * @param locale the desired locale |
455 * @return A break iterator for word breaks |
455 * @return A break iterator for word breaks |
456 * @throws NullPointerException if <code>locale</code> is null |
456 * @throws NullPointerException if {@code locale} is null |
457 */ |
457 */ |
458 public static BreakIterator getWordInstance(Locale locale) |
458 public static BreakIterator getWordInstance(Locale locale) |
459 { |
459 { |
460 return getBreakInstance(locale, WORD_INDEX); |
460 return getBreakInstance(locale, WORD_INDEX); |
461 } |
461 } |
462 |
462 |
463 /** |
463 /** |
464 * Returns a new <code>BreakIterator</code> instance |
464 * Returns a new {@code BreakIterator} instance |
465 * for <a href="BreakIterator.html#line">line breaks</a> |
465 * for <a href="BreakIterator.html#line">line breaks</a> |
466 * for the {@linkplain Locale#getDefault() default locale}. |
466 * for the {@linkplain Locale#getDefault() default locale}. |
467 * @return A break iterator for line breaks |
467 * @return A break iterator for line breaks |
468 */ |
468 */ |
469 public static BreakIterator getLineInstance() |
469 public static BreakIterator getLineInstance() |
470 { |
470 { |
471 return getLineInstance(Locale.getDefault()); |
471 return getLineInstance(Locale.getDefault()); |
472 } |
472 } |
473 |
473 |
474 /** |
474 /** |
475 * Returns a new <code>BreakIterator</code> instance |
475 * Returns a new {@code BreakIterator} instance |
476 * for <a href="BreakIterator.html#line">line breaks</a> |
476 * for <a href="BreakIterator.html#line">line breaks</a> |
477 * for the given locale. |
477 * for the given locale. |
478 * @param locale the desired locale |
478 * @param locale the desired locale |
479 * @return A break iterator for line breaks |
479 * @return A break iterator for line breaks |
480 * @throws NullPointerException if <code>locale</code> is null |
480 * @throws NullPointerException if {@code locale} is null |
481 */ |
481 */ |
482 public static BreakIterator getLineInstance(Locale locale) |
482 public static BreakIterator getLineInstance(Locale locale) |
483 { |
483 { |
484 return getBreakInstance(locale, LINE_INDEX); |
484 return getBreakInstance(locale, LINE_INDEX); |
485 } |
485 } |
486 |
486 |
487 /** |
487 /** |
488 * Returns a new <code>BreakIterator</code> instance |
488 * Returns a new {@code BreakIterator} instance |
489 * for <a href="BreakIterator.html#character">character breaks</a> |
489 * for <a href="BreakIterator.html#character">character breaks</a> |
490 * for the {@linkplain Locale#getDefault() default locale}. |
490 * for the {@linkplain Locale#getDefault() default locale}. |
491 * @return A break iterator for character breaks |
491 * @return A break iterator for character breaks |
492 */ |
492 */ |
493 public static BreakIterator getCharacterInstance() |
493 public static BreakIterator getCharacterInstance() |
494 { |
494 { |
495 return getCharacterInstance(Locale.getDefault()); |
495 return getCharacterInstance(Locale.getDefault()); |
496 } |
496 } |
497 |
497 |
498 /** |
498 /** |
499 * Returns a new <code>BreakIterator</code> instance |
499 * Returns a new {@code BreakIterator} instance |
500 * for <a href="BreakIterator.html#character">character breaks</a> |
500 * for <a href="BreakIterator.html#character">character breaks</a> |
501 * for the given locale. |
501 * for the given locale. |
502 * @param locale the desired locale |
502 * @param locale the desired locale |
503 * @return A break iterator for character breaks |
503 * @return A break iterator for character breaks |
504 * @throws NullPointerException if <code>locale</code> is null |
504 * @throws NullPointerException if {@code locale} is null |
505 */ |
505 */ |
506 public static BreakIterator getCharacterInstance(Locale locale) |
506 public static BreakIterator getCharacterInstance(Locale locale) |
507 { |
507 { |
508 return getBreakInstance(locale, CHARACTER_INDEX); |
508 return getBreakInstance(locale, CHARACTER_INDEX); |
509 } |
509 } |
510 |
510 |
511 /** |
511 /** |
512 * Returns a new <code>BreakIterator</code> instance |
512 * Returns a new {@code BreakIterator} instance |
513 * for <a href="BreakIterator.html#sentence">sentence breaks</a> |
513 * for <a href="BreakIterator.html#sentence">sentence breaks</a> |
514 * for the {@linkplain Locale#getDefault() default locale}. |
514 * for the {@linkplain Locale#getDefault() default locale}. |
515 * @return A break iterator for sentence breaks |
515 * @return A break iterator for sentence breaks |
516 */ |
516 */ |
517 public static BreakIterator getSentenceInstance() |
517 public static BreakIterator getSentenceInstance() |
518 { |
518 { |
519 return getSentenceInstance(Locale.getDefault()); |
519 return getSentenceInstance(Locale.getDefault()); |
520 } |
520 } |
521 |
521 |
522 /** |
522 /** |
523 * Returns a new <code>BreakIterator</code> instance |
523 * Returns a new {@code BreakIterator} instance |
524 * for <a href="BreakIterator.html#sentence">sentence breaks</a> |
524 * for <a href="BreakIterator.html#sentence">sentence breaks</a> |
525 * for the given locale. |
525 * for the given locale. |
526 * @param locale the desired locale |
526 * @param locale the desired locale |
527 * @return A break iterator for sentence breaks |
527 * @return A break iterator for sentence breaks |
528 * @throws NullPointerException if <code>locale</code> is null |
528 * @throws NullPointerException if {@code locale} is null |
529 */ |
529 */ |
530 public static BreakIterator getSentenceInstance(Locale locale) |
530 public static BreakIterator getSentenceInstance(Locale locale) |
531 { |
531 { |
532 return getBreakInstance(locale, SENTENCE_INDEX); |
532 return getBreakInstance(locale, SENTENCE_INDEX); |
533 } |
533 } |
578 return iterator; |
578 return iterator; |
579 } |
579 } |
580 |
580 |
581 /** |
581 /** |
582 * Returns an array of all locales for which the |
582 * Returns an array of all locales for which the |
583 * <code>get*Instance</code> methods of this class can return |
583 * {@code get*Instance} methods of this class can return |
584 * localized instances. |
584 * localized instances. |
585 * The returned array represents the union of locales supported by the Java |
585 * The returned array represents the union of locales supported by the Java |
586 * runtime and by installed |
586 * runtime and by installed |
587 * {@link java.text.spi.BreakIteratorProvider BreakIteratorProvider} implementations. |
587 * {@link java.text.spi.BreakIteratorProvider BreakIteratorProvider} implementations. |
588 * It must contain at least a <code>Locale</code> |
588 * It must contain at least a {@code Locale} |
589 * instance equal to {@link java.util.Locale#US Locale.US}. |
589 * instance equal to {@link java.util.Locale#US Locale.US}. |
590 * |
590 * |
591 * @return An array of locales for which localized |
591 * @return An array of locales for which localized |
592 * <code>BreakIterator</code> instances are available. |
592 * {@code BreakIterator} instances are available. |
593 */ |
593 */ |
594 public static synchronized Locale[] getAvailableLocales() |
594 public static synchronized Locale[] getAvailableLocales() |
595 { |
595 { |
596 LocaleServiceProviderPool pool = |
596 LocaleServiceProviderPool pool = |
597 LocaleServiceProviderPool.getPool(BreakIteratorProvider.class); |
597 LocaleServiceProviderPool.getPool(BreakIteratorProvider.class); |